CompilerTemplateAsm-armv5te-vfp.S revision bd0472480c6e876198fe19c4ffa22350c0ce57da
1/*
2 * This file was generated automatically by gen-template.py for 'armv5te-vfp'.
3 *
4 * --> DO NOT EDIT <--
5 */
6
7/* File: armv5te/header.S */
8/*
9 * Copyright (C) 2008 The Android Open Source Project
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24#if defined(WITH_JIT)
25
26/*
27 * ARMv5 definitions and declarations.
28 */
29
30/*
31ARM EABI general notes:
32
33r0-r3 hold first 4 args to a method; they are not preserved across method calls
34r4-r8 are available for general use
35r9 is given special treatment in some situations, but not for us
36r10 (sl) seems to be generally available
37r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
38r12 (ip) is scratch -- not preserved across method calls
39r13 (sp) should be managed carefully in case a signal arrives
40r14 (lr) must be preserved
41r15 (pc) can be tinkered with directly
42
43r0 holds returns of <= 4 bytes
44r0-r1 hold returns of 8 bytes, low word in r0
45
46Callee must save/restore r4+ (except r12) if it modifies them.
47
48Stack is "full descending".  Only the arguments that don't fit in the first 4
49registers are placed on the stack.  "sp" points at the first stacked argument
50(i.e. the 5th arg).
51
52VFP: single-precision results in s0, double-precision results in d0.
53
54In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
5564-bit quantities (long long, double) must be 64-bit aligned.
56*/
57
58/*
59JIT and ARM notes:
60
61The following registers have fixed assignments:
62
63  reg nick      purpose
64  r5  rFP       interpreted frame pointer, used for accessing locals and args
65  r6  rGLUE     MterpGlue pointer
66
67The following registers have fixed assignments in mterp but are scratch
68registers in compiled code
69
70  reg nick      purpose
71  r4  rPC       interpreted program counter, used for fetching instructions
72  r7  rINST     first 16-bit code unit of current instruction
73  r8  rIBASE    interpreted instruction base pointer, used for computed goto
74
75Macros are provided for common operations.  Each macro MUST emit only
76one instruction to make instruction-counting easier.  They MUST NOT alter
77unspecified registers or condition codes.
78*/
79
80/* single-purpose registers, given names for clarity */
81#define rPC     r4
82#define rFP     r5
83#define rGLUE   r6
84#define rINST   r7
85#define rIBASE  r8
86
87/*
88 * Given a frame pointer, find the stack save area.
89 *
90 * In C this is "((StackSaveArea*)(_fp) -1)".
91 */
92#define SAVEAREA_FROM_FP(_reg, _fpreg) \
93    sub     _reg, _fpreg, #sizeofStackSaveArea
94
95#define EXPORT_PC() \
96    str     rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
97
98/*
99 * This is a #include, not a %include, because we want the C pre-processor
100 * to expand the macros into assembler assignment statements.
101 */
102#include "../../../mterp/common/asm-constants.h"
103
104
105/* File: armv5te-vfp/platform.S */
106/*
107 * ===========================================================================
108 *  CPU-version-specific defines and utility
109 * ===========================================================================
110 */
111
112/*
113 * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
114 * Jump to subroutine.
115 *
116 * May modify IP and LR.
117 */
118.macro  LDR_PC_LR source
119    mov     lr, pc
120    ldr     pc, \source
121.endm
122
123
124    .global dvmCompilerTemplateStart
125    .type   dvmCompilerTemplateStart, %function
126    .text
127
128dvmCompilerTemplateStart:
129
130/* ------------------------------ */
131    .balign 4
132    .global dvmCompiler_TEMPLATE_CMP_LONG
133dvmCompiler_TEMPLATE_CMP_LONG:
134/* File: armv5te/TEMPLATE_CMP_LONG.S */
135    /*
136     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
137     * register based on the results of the comparison.
138     *
139     * We load the full values with LDM, but in practice many values could
140     * be resolved by only looking at the high word.  This could be made
141     * faster or slower by splitting the LDM into a pair of LDRs.
142     *
143     * If we just wanted to set condition flags, we could do this:
144     *  subs    ip, r0, r2
145     *  sbcs    ip, r1, r3
146     *  subeqs  ip, r0, r2
147     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
148     * integer value, which we can do with 2 conditional mov/mvn instructions
149     * (set 1, set -1; if they're equal we already have 0 in ip), giving
150     * us a constant 5-cycle path plus a branch at the end to the
151     * instruction epilogue code.  The multi-compare approach below needs
152     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
153     * in the worst case (the 64-bit values are equal).
154     */
155    /* cmp-long vAA, vBB, vCC */
156    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
157    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
158    bgt     .LTEMPLATE_CMP_LONG_greater
159    subs    r0, r0, r2                  @ r0<- r0 - r2
160    bxeq     lr
161    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
162.LTEMPLATE_CMP_LONG_less:
163    mvn     r0, #0                      @ r0<- -1
164    bx      lr
165.LTEMPLATE_CMP_LONG_greater:
166    mov     r0, #1                      @ r0<- 1
167    bx      lr
168
169
170/* ------------------------------ */
171    .balign 4
172    .global dvmCompiler_TEMPLATE_RETURN
173dvmCompiler_TEMPLATE_RETURN:
174/* File: armv5te/TEMPLATE_RETURN.S */
175    /*
176     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
177     * If the stored value in returnAddr
178     * is non-zero, the caller is compiled by the JIT thus return to the
179     * address in the code cache following the invoke instruction. Otherwise
180     * return to the special dvmJitToInterpNoChain entry point.
181     */
182    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
183    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
184    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
185    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
186#if !defined(WITH_SELF_VERIFICATION)
187    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
188#else
189    mov     r9, #0                      @ disable chaining
190#endif
191    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
192                                        @ r2<- method we're returning to
193    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
194    cmp     r2, #0                      @ break frame?
195#if !defined(WITH_SELF_VERIFICATION)
196    beq     1f                          @ bail to interpreter
197#else
198    blxeq   lr                          @ punt to interpreter and compare state
199#endif
200    ldr     r1, .LdvmJitToInterpNoChain @ defined in footer.S
201    mov     rFP, r10                    @ publish new FP
202    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
203    ldr     r8, [r8]                    @ r8<- suspendCount
204
205    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
206    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
207    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
208    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
209    str     r0, [rGLUE, #offGlue_methodClassDex]
210    cmp     r8, #0                      @ check the suspendCount
211    movne   r9, #0                      @ clear the chaining cell address
212    str     r9, [r3, #offThread_inJitCodeCache] @ in code cache or not
213    cmp     r9, #0                      @ chaining cell exists?
214    blxne   r9                          @ jump to the chaining cell
215#if defined(WITH_JIT_TUNING)
216    mov     r0, #kCallsiteInterpreted
217#endif
218    mov     pc, r1                      @ callsite is interpreted
2191:
220    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
221    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
222    mov     r1, #0                      @ changeInterp = false
223    mov     r0, rGLUE                   @ Expecting rGLUE in r0
224    blx     r2                          @ exit the interpreter
225
226/* ------------------------------ */
227    .balign 4
228    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
229dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
230/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
231    /*
232     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
233     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
234     * runtime-resolved callee.
235     */
236    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
237    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
238    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
239    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
240    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
241    add     r3, r1, #1  @ Thumb addr is odd
242    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
243    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
244    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
245    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
246    ldr     r8, [r8]                    @ r8<- suspendCount (int)
247    cmp     r10, r9                     @ bottom < interpStackEnd?
248    bxlo    lr                          @ return to raise stack overflow excep.
249    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
250    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
251    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
252    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
253    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
254    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
255
256
257    @ set up newSaveArea
258    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
259    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
260    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
261    cmp     r8, #0                      @ suspendCount != 0
262    bxne    lr                          @ bail to the interpreter
263    tst     r10, #ACC_NATIVE
264#if !defined(WITH_SELF_VERIFICATION)
265    bne     .LinvokeNative
266#else
267    bxne    lr                          @ bail to the interpreter
268#endif
269
270    ldr     r10, .LdvmJitToInterpTraceSelectNoChain
271    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
272    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
273
274    @ Update "glue" values for the new method
275    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
276    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
277    mov     rFP, r1                         @ fp = newFp
278    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
279
280    @ Start executing the callee
281#if defined(WITH_JIT_TUNING)
282    mov     r0, #kInlineCacheMiss
283#endif
284    mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
285
286/* ------------------------------ */
287    .balign 4
288    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
289dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
290/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
291    /*
292     * For monomorphic callsite, setup the Dalvik frame and return to the
293     * Thumb code through the link register to transfer control to the callee
294     * method through a dedicated chaining cell.
295     */
296    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
297    @ methodToCall is guaranteed to be non-native
298.LinvokeChain:
299    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
300    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
301    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
302    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
303    add     r3, r1, #1  @ Thumb addr is odd
304    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
305    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
306    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
307    add     r12, lr, #2                 @ setup the punt-to-interp address
308    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
309    ldr     r8, [r8]                    @ r8<- suspendCount (int)
310    cmp     r10, r9                     @ bottom < interpStackEnd?
311    bxlo    r12                         @ return to raise stack overflow excep.
312    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
313    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
314    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
315    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
316    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
317
318
319    @ set up newSaveArea
320    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
321    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
322    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
323    cmp     r8, #0                      @ suspendCount != 0
324    bxne    r12                         @ bail to the interpreter
325
326    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
327    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
328
329    @ Update "glue" values for the new method
330    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
331    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
332    mov     rFP, r1                         @ fp = newFp
333    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
334
335    bx      lr                              @ return to the callee-chaining cell
336
337
338
339/* ------------------------------ */
340    .balign 4
341    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
342dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
343/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
344    /*
345     * For polymorphic callsite, check whether the cached class pointer matches
346     * the current one. If so setup the Dalvik frame and return to the
347     * Thumb code through the link register to transfer control to the callee
348     * method through a dedicated chaining cell.
349     *
350     * The predicted chaining cell is declared in ArmLIR.h with the
351     * following layout:
352     *
353     *  typedef struct PredictedChainingCell {
354     *      u4 branch;
355     *      const ClassObject *clazz;
356     *      const Method *method;
357     *      u4 counter;
358     *  } PredictedChainingCell;
359     *
360     * Upon returning to the callsite:
361     *    - lr  : to branch to the chaining cell
362     *    - lr+2: to punt to the interpreter
363     *    - lr+4: to fully resolve the callee and may rechain.
364     *            r3 <- class
365     *            r9 <- counter
366     */
367    @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
368    ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
369    ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
370    ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
371    ldr     r9, [r2, #12]   @ r9 <- predictedChainCell->counter
372    cmp     r3, r8          @ predicted class == actual class?
373#if defined(WITH_JIT_TUNING)
374    ldr     r7, .LdvmICHitCount
375    ldreq   r10, [r7, #0]
376    add     r10, r10, #1
377    streq   r10, [r7, #0]
378#endif
379    beq     .LinvokeChain   @ predicted chain is valid
380    ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
381    sub     r1, r9, #1      @ count--
382    str     r1, [r2, #12]   @ write back to PredictedChainingCell->counter
383    add     lr, lr, #4      @ return to fully-resolve landing pad
384    /*
385     * r1 <- count
386     * r2 <- &predictedChainCell
387     * r3 <- this->class
388     * r4 <- dPC
389     * r7 <- this->class->vtable
390     */
391    bx      lr
392
393/* ------------------------------ */
394    .balign 4
395    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
396dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
397/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
398    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
399    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
400    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
401    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
402    add     r3, r1, #1  @ Thumb addr is odd
403    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
404    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
405    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
406    ldr     r8, [r8]                    @ r3<- suspendCount (int)
407    cmp     r10, r9                     @ bottom < interpStackEnd?
408    bxlo    lr                          @ return to raise stack overflow excep.
409    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
410    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
411    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
412    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
413
414
415    @ set up newSaveArea
416    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
417    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
418    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
419    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
420    cmp     r8, #0                      @ suspendCount != 0
421    ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
422#if !defined(WITH_SELF_VERIFICATION)
423    bxne    lr                          @ bail to the interpreter
424#else
425    bx      lr                          @ bail to interpreter unconditionally
426#endif
427
428    @ go ahead and transfer control to the native code
429    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
430    mov     r2, #0
431    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
432    str     r2, [r3, #offThread_inJitCodeCache] @ not in the jit code cache
433    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
434                                        @ newFp->localRefCookie=top
435    mov     r9, r3                      @ r9<- glue->self (preserve)
436    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
437
438    mov     r2, r0                      @ r2<- methodToCall
439    mov     r0, r1                      @ r0<- newFP
440    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
441
442    blx     r8                          @ off to the native code
443
444    @ native return; r9=self, r10=newSaveArea
445    @ equivalent to dvmPopJniLocals
446    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
447    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
448    ldr     r1, [r9, #offThread_exception] @ check for exception
449    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
450    cmp     r1, #0                      @ null?
451    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
452    ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
453
454    @ r0 = dalvikCallsitePC
455    bne     .LhandleException           @ no, handle exception
456
457    str     r2, [r9, #offThread_inJitCodeCache] @ set the mode properly
458    cmp     r2, #0                      @ return chaining cell still exists?
459    bxne    r2                          @ yes - go ahead
460
461    @ continue executing the next instruction through the interpreter
462    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
463    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
464#if defined(WITH_JIT_TUNING)
465    mov     r0, #kCallsiteInterpreted
466#endif
467    mov     pc, r1
468
469
470
471
472/* ------------------------------ */
473    .balign 4
474    .global dvmCompiler_TEMPLATE_MUL_LONG
475dvmCompiler_TEMPLATE_MUL_LONG:
476/* File: armv5te/TEMPLATE_MUL_LONG.S */
477    /*
478     * Signed 64-bit integer multiply.
479     *
480     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
481     *
482     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
483     *        WX
484     *      x YZ
485     *  --------
486     *     ZW ZX
487     *  YW YX
488     *
489     * The low word of the result holds ZX, the high word holds
490     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
491     * it doesn't fit in the low 64 bits.
492     *
493     * Unlike most ARM math operations, multiply instructions have
494     * restrictions on using the same register more than once (Rd and Rm
495     * cannot be the same).
496     */
497    /* mul-long vAA, vBB, vCC */
498    mul     ip, r2, r1                  @  ip<- ZxW
499    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
500    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
501    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
502    mov     r0,r9
503    mov     r1,r10
504    bx      lr
505
506/* ------------------------------ */
507    .balign 4
508    .global dvmCompiler_TEMPLATE_SHL_LONG
509dvmCompiler_TEMPLATE_SHL_LONG:
510/* File: armv5te/TEMPLATE_SHL_LONG.S */
511    /*
512     * Long integer shift.  This is different from the generic 32/64-bit
513     * binary operations because vAA/vBB are 64-bit but vCC (the shift
514     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
515     * 6 bits.
516     */
517    /* shl-long vAA, vBB, vCC */
518    and     r2, r2, #63                 @ r2<- r2 & 0x3f
519    mov     r1, r1, asl r2              @  r1<- r1 << r2
520    rsb     r3, r2, #32                 @  r3<- 32 - r2
521    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
522    subs    ip, r2, #32                 @  ip<- r2 - 32
523    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
524    mov     r0, r0, asl r2              @  r0<- r0 << r2
525    bx      lr
526
527/* ------------------------------ */
528    .balign 4
529    .global dvmCompiler_TEMPLATE_SHR_LONG
530dvmCompiler_TEMPLATE_SHR_LONG:
531/* File: armv5te/TEMPLATE_SHR_LONG.S */
532    /*
533     * Long integer shift.  This is different from the generic 32/64-bit
534     * binary operations because vAA/vBB are 64-bit but vCC (the shift
535     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
536     * 6 bits.
537     */
538    /* shr-long vAA, vBB, vCC */
539    and     r2, r2, #63                 @ r0<- r0 & 0x3f
540    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
541    rsb     r3, r2, #32                 @  r3<- 32 - r2
542    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
543    subs    ip, r2, #32                 @  ip<- r2 - 32
544    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
545    mov     r1, r1, asr r2              @  r1<- r1 >> r2
546    bx      lr
547
548
549/* ------------------------------ */
550    .balign 4
551    .global dvmCompiler_TEMPLATE_USHR_LONG
552dvmCompiler_TEMPLATE_USHR_LONG:
553/* File: armv5te/TEMPLATE_USHR_LONG.S */
554    /*
555     * Long integer shift.  This is different from the generic 32/64-bit
556     * binary operations because vAA/vBB are 64-bit but vCC (the shift
557     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
558     * 6 bits.
559     */
560    /* ushr-long vAA, vBB, vCC */
561    and     r2, r2, #63                 @ r0<- r0 & 0x3f
562    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
563    rsb     r3, r2, #32                 @  r3<- 32 - r2
564    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
565    subs    ip, r2, #32                 @  ip<- r2 - 32
566    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
567    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
568    bx      lr
569
570
571/* ------------------------------ */
572    .balign 4
573    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
574dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
575/* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
576/* File: armv5te-vfp/fbinop.S */
577    /*
578     * Generic 32-bit floating point operation.  Provide an "instr" line that
579     * specifies an instruction that performs s2 = s0 op s1.
580     *
581     * On entry:
582     *     r0 = target dalvik register address
583     *     r1 = op1 address
584     *     r2 = op2 address
585     */
586     flds    s0,[r1]
587     flds    s1,[r2]
588     fadds   s2, s0, s1
589     fsts    s2,[r0]
590     bx      lr
591
592
593/* ------------------------------ */
594    .balign 4
595    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
596dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
597/* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
598/* File: armv5te-vfp/fbinop.S */
599    /*
600     * Generic 32-bit floating point operation.  Provide an "instr" line that
601     * specifies an instruction that performs s2 = s0 op s1.
602     *
603     * On entry:
604     *     r0 = target dalvik register address
605     *     r1 = op1 address
606     *     r2 = op2 address
607     */
608     flds    s0,[r1]
609     flds    s1,[r2]
610     fsubs   s2, s0, s1
611     fsts    s2,[r0]
612     bx      lr
613
614
615/* ------------------------------ */
616    .balign 4
617    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
618dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
619/* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
620/* File: armv5te-vfp/fbinop.S */
621    /*
622     * Generic 32-bit floating point operation.  Provide an "instr" line that
623     * specifies an instruction that performs s2 = s0 op s1.
624     *
625     * On entry:
626     *     r0 = target dalvik register address
627     *     r1 = op1 address
628     *     r2 = op2 address
629     */
630     flds    s0,[r1]
631     flds    s1,[r2]
632     fmuls   s2, s0, s1
633     fsts    s2,[r0]
634     bx      lr
635
636
637/* ------------------------------ */
638    .balign 4
639    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
640dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
641/* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
642/* File: armv5te-vfp/fbinop.S */
643    /*
644     * Generic 32-bit floating point operation.  Provide an "instr" line that
645     * specifies an instruction that performs s2 = s0 op s1.
646     *
647     * On entry:
648     *     r0 = target dalvik register address
649     *     r1 = op1 address
650     *     r2 = op2 address
651     */
652     flds    s0,[r1]
653     flds    s1,[r2]
654     fdivs   s2, s0, s1
655     fsts    s2,[r0]
656     bx      lr
657
658
659/* ------------------------------ */
660    .balign 4
661    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
662dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
663/* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
664/* File: armv5te-vfp/fbinopWide.S */
665    /*
666     * Generic 64-bit floating point operation.  Provide an "instr" line that
667     * specifies an instruction that performs s2 = s0 op s1.
668     *
669     * On entry:
670     *     r0 = target dalvik register address
671     *     r1 = op1 address
672     *     r2 = op2 address
673     */
674     fldd    d0,[r1]
675     fldd    d1,[r2]
676     faddd   d2, d0, d1
677     fstd    d2,[r0]
678     bx      lr
679
680
681/* ------------------------------ */
682    .balign 4
683    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
684dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
685/* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
686/* File: armv5te-vfp/fbinopWide.S */
687    /*
688     * Generic 64-bit floating point operation.  Provide an "instr" line that
689     * specifies an instruction that performs s2 = s0 op s1.
690     *
691     * On entry:
692     *     r0 = target dalvik register address
693     *     r1 = op1 address
694     *     r2 = op2 address
695     */
696     fldd    d0,[r1]
697     fldd    d1,[r2]
698     fsubd   d2, d0, d1
699     fstd    d2,[r0]
700     bx      lr
701
702
703/* ------------------------------ */
704    .balign 4
705    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
706dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
707/* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
708/* File: armv5te-vfp/fbinopWide.S */
709    /*
710     * Generic 64-bit floating point operation.  Provide an "instr" line that
711     * specifies an instruction that performs s2 = s0 op s1.
712     *
713     * On entry:
714     *     r0 = target dalvik register address
715     *     r1 = op1 address
716     *     r2 = op2 address
717     */
718     fldd    d0,[r1]
719     fldd    d1,[r2]
720     fmuld   d2, d0, d1
721     fstd    d2,[r0]
722     bx      lr
723
724
725/* ------------------------------ */
726    .balign 4
727    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
728dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
729/* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
730/* File: armv5te-vfp/fbinopWide.S */
731    /*
732     * Generic 64-bit floating point operation.  Provide an "instr" line that
733     * specifies an instruction that performs s2 = s0 op s1.
734     *
735     * On entry:
736     *     r0 = target dalvik register address
737     *     r1 = op1 address
738     *     r2 = op2 address
739     */
740     fldd    d0,[r1]
741     fldd    d1,[r2]
742     fdivd   d2, d0, d1
743     fstd    d2,[r0]
744     bx      lr
745
746
747/* ------------------------------ */
748    .balign 4
749    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
750dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
751/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
752/* File: armv5te-vfp/funopNarrower.S */
753    /*
754     * Generic 64bit-to-32bit floating point unary operation.  Provide an
755     * "instr" line that specifies an instruction that performs "s0 = op d0".
756     *
757     * For: double-to-int, double-to-float
758     *
759     * On entry:
760     *     r0 = target dalvik register address
761     *     r1 = src dalvik register address
762     */
763    /* unop vA, vB */
764    fldd    d0, [r1]                    @ d0<- vB
765    fcvtsd  s0, d0                              @ s0<- op d0
766    fsts    s0, [r0]                    @ vA<- s0
767    bx      lr
768
769
770/* ------------------------------ */
771    .balign 4
772    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
773dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
774/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
775/* File: armv5te-vfp/funopNarrower.S */
776    /*
777     * Generic 64bit-to-32bit floating point unary operation.  Provide an
778     * "instr" line that specifies an instruction that performs "s0 = op d0".
779     *
780     * For: double-to-int, double-to-float
781     *
782     * On entry:
783     *     r0 = target dalvik register address
784     *     r1 = src dalvik register address
785     */
786    /* unop vA, vB */
787    fldd    d0, [r1]                    @ d0<- vB
788    ftosizd  s0, d0                              @ s0<- op d0
789    fsts    s0, [r0]                    @ vA<- s0
790    bx      lr
791
792
793/* ------------------------------ */
794    .balign 4
795    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
796dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
797/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
798/* File: armv5te-vfp/funopWider.S */
799    /*
800     * Generic 32bit-to-64bit floating point unary operation.  Provide an
801     * "instr" line that specifies an instruction that performs "d0 = op s0".
802     *
803     * For: int-to-double, float-to-double
804     *
805     * On entry:
806     *     r0 = target dalvik register address
807     *     r1 = src dalvik register address
808     */
809    /* unop vA, vB */
810    flds    s0, [r1]                    @ s0<- vB
811    fcvtds  d0, s0                              @ d0<- op s0
812    fstd    d0, [r0]                    @ vA<- d0
813    bx      lr
814
815
816/* ------------------------------ */
817    .balign 4
818    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
819dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
820/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
821/* File: armv5te-vfp/funop.S */
822    /*
823     * Generic 32bit-to-32bit floating point unary operation.  Provide an
824     * "instr" line that specifies an instruction that performs "s1 = op s0".
825     *
826     * For: float-to-int, int-to-float
827     *
828     * On entry:
829     *     r0 = target dalvik register address
830     *     r1 = src dalvik register address
831     */
832    /* unop vA, vB */
833    flds    s0, [r1]                    @ s0<- vB
834    ftosizs s1, s0                              @ s1<- op s0
835    fsts    s1, [r0]                    @ vA<- s1
836    bx      lr
837
838
839/* ------------------------------ */
840    .balign 4
841    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
842dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
843/* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
844/* File: armv5te-vfp/funopWider.S */
845    /*
846     * Generic 32bit-to-64bit floating point unary operation.  Provide an
847     * "instr" line that specifies an instruction that performs "d0 = op s0".
848     *
849     * For: int-to-double, float-to-double
850     *
851     * On entry:
852     *     r0 = target dalvik register address
853     *     r1 = src dalvik register address
854     */
855    /* unop vA, vB */
856    flds    s0, [r1]                    @ s0<- vB
857    fsitod  d0, s0                              @ d0<- op s0
858    fstd    d0, [r0]                    @ vA<- d0
859    bx      lr
860
861
862/* ------------------------------ */
863    .balign 4
864    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
865dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
866/* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
867/* File: armv5te-vfp/funop.S */
868    /*
869     * Generic 32bit-to-32bit floating point unary operation.  Provide an
870     * "instr" line that specifies an instruction that performs "s1 = op s0".
871     *
872     * For: float-to-int, int-to-float
873     *
874     * On entry:
875     *     r0 = target dalvik register address
876     *     r1 = src dalvik register address
877     */
878    /* unop vA, vB */
879    flds    s0, [r1]                    @ s0<- vB
880    fsitos  s1, s0                              @ s1<- op s0
881    fsts    s1, [r0]                    @ vA<- s1
882    bx      lr
883
884
885/* ------------------------------ */
886    .balign 4
887    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
888dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
889/* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
890    /*
891     * Compare two floating-point values.  Puts 0, 1, or -1 into the
892     * destination register based on the results of the comparison.
893     *
894     * int compare(x, y) {
895     *     if (x == y) {
896     *         return 0;
897     *     } else if (x < y) {
898     *         return -1;
899     *     } else if (x > y) {
900     *         return 1;
901     *     } else {
902     *         return 1;
903     *     }
904     * }
905     *
906     * On entry:
907     *    r0 = &op1 [vBB]
908     *    r1 = &op2 [vCC]
909     */
910    /* op vAA, vBB, vCC */
911    fldd    d0, [r0]                    @ d0<- vBB
912    fldd    d1, [r1]                    @ d1<- vCC
913    fcmpd  d0, d1                       @ compare (vBB, vCC)
914    mov     r0, #1                      @ r0<- 1 (default)
915    fmstat                              @ export status flags
916    mvnmi   r0, #0                      @ (less than) r0<- -1
917    moveq   r0, #0                      @ (equal) r0<- 0
918    bx      lr
919
920
921/* ------------------------------ */
922    .balign 4
923    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
924dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
925/* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
926    /*
927     * Compare two floating-point values.  Puts 0, 1, or -1 into the
928     * destination register based on the results of the comparison.
929     *
930     * int compare(x, y) {
931     *     if (x == y) {
932     *         return 0;
933     *     } else if (x > y) {
934     *         return 1;
935     *     } else if (x < y) {
936     *         return -1;
937     *     } else {
938     *         return -1;
939     *     }
940     * }
941     * On entry:
942     *    r0 = &op1 [vBB]
943     *    r1 = &op2 [vCC]
944     */
945    /* op vAA, vBB, vCC */
946    fldd    d0, [r0]                    @ d0<- vBB
947    fldd    d1, [r1]                    @ d1<- vCC
948    fcmped  d0, d1                      @ compare (vBB, vCC)
949    mvn     r0, #0                      @ r0<- -1 (default)
950    fmstat                              @ export status flags
951    movgt   r0, #1                      @ (greater than) r0<- 1
952    moveq   r0, #0                      @ (equal) r0<- 0
953    bx      lr
954
955/* ------------------------------ */
956    .balign 4
957    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
958dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
959/* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
960    /*
961     * Compare two floating-point values.  Puts 0, 1, or -1 into the
962     * destination register based on the results of the comparison.
963     *
964     * int compare(x, y) {
965     *     if (x == y) {
966     *         return 0;
967     *     } else if (x < y) {
968     *         return -1;
969     *     } else if (x > y) {
970     *         return 1;
971     *     } else {
972     *         return 1;
973     *     }
974     * }
975     * On entry:
976     *    r0 = &op1 [vBB]
977     *    r1 = &op2 [vCC]
978     */
979    /* op vAA, vBB, vCC */
980    flds    s0, [r0]                    @ d0<- vBB
981    flds    s1, [r1]                    @ d1<- vCC
982    fcmps  s0, s1                      @ compare (vBB, vCC)
983    mov     r0, #1                      @ r0<- 1 (default)
984    fmstat                              @ export status flags
985    mvnmi   r0, #0                      @ (less than) r0<- -1
986    moveq   r0, #0                      @ (equal) r0<- 0
987    bx      lr
988
989/* ------------------------------ */
990    .balign 4
991    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
992dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
993/* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
994    /*
995     * Compare two floating-point values.  Puts 0, 1, or -1 into the
996     * destination register based on the results of the comparison.
997     *
998     * int compare(x, y) {
999     *     if (x == y) {
1000     *         return 0;
1001     *     } else if (x > y) {
1002     *         return 1;
1003     *     } else if (x < y) {
1004     *         return -1;
1005     *     } else {
1006     *         return -1;
1007     *     }
1008     * }
1009     * On entry:
1010     *    r0 = &op1 [vBB]
1011     *    r1 = &op2 [vCC]
1012     */
1013    /* op vAA, vBB, vCC */
1014    flds    s0, [r0]                    @ d0<- vBB
1015    flds    s1, [r1]                    @ d1<- vCC
1016    fcmps  s0, s1                      @ compare (vBB, vCC)
1017    mvn     r0, #0                      @ r0<- -1 (default)
1018    fmstat                              @ export status flags
1019    movgt   r0, #1                      @ (greater than) r0<- 1
1020    moveq   r0, #0                      @ (equal) r0<- 0
1021    bx      lr
1022
1023/* ------------------------------ */
1024    .balign 4
1025    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
1026dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
1027/* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
1028    /*
1029     * 64-bit floating point vfp sqrt operation.
1030     * If the result is a NaN, bail out to library code to do
1031     * the right thing.
1032     *
1033     * On entry:
1034     *     r2 src addr of op1
1035     * On exit:
1036     *     r0,r1 = res
1037     */
1038    fldd    d0, [r2]
1039    fsqrtd  d1, d0
1040    fcmpd   d1, d1
1041    fmstat
1042    fmrrd   r0, r1, d1
1043    bxeq    lr   @ Result OK - return
1044    ldr     r2, .Lsqrt
1045    fmrrd   r0, r1, d0   @ reload orig operand
1046    bx      r2   @ tail call to sqrt library routine
1047
1048.Lsqrt:
1049    .word   sqrt
1050
1051/* ------------------------------ */
1052    .balign 4
1053    .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
1054dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
1055/* File: armv5te/TEMPLATE_THROW_EXCEPTION_COMMON.S */
1056    /*
1057     * Throw an exception from JIT'ed code.
1058     * On entry:
1059     *    r0    Dalvik PC that raises the exception
1060     */
1061    b       .LhandleException
1062
1063/* ------------------------------ */
1064    .balign 4
1065    .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
1066dvmCompiler_TEMPLATE_MEM_OP_DECODE:
1067/* File: armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S */
1068#if defined(WITH_SELF_VERIFICATION)
1069    /*
1070     * This handler encapsulates heap memory ops for selfVerification mode.
1071     *
1072     * The call to the handler is inserted prior to a heap memory operation.
1073     * This handler then calls a function to decode the memory op, and process
1074     * it accordingly. Afterwards, the handler changes the return address to
1075     * skip the memory op so it never gets executed.
1076     */
1077    vpush   {d0-d15}                    @ save out all fp registers
1078    push    {r0-r12,lr}                 @ save out all registers
1079    mov     r0, lr                      @ arg0 <- link register
1080    mov     r1, sp                      @ arg1 <- stack pointer
1081    ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
1082    blx     r2                          @ decode and handle the mem op
1083    pop     {r0-r12,lr}                 @ restore all registers
1084    vpop    {d0-d15}                    @ restore all fp registers
1085    bx      lr                          @ return to compiled code
1086#endif
1087
1088/* ------------------------------ */
1089    .balign 4
1090    .global dvmCompiler_TEMPLATE_STRING_COMPARETO
1091dvmCompiler_TEMPLATE_STRING_COMPARETO:
1092/* File: armv5te/TEMPLATE_STRING_COMPARETO.S */
1093    /*
1094     * String's compareTo.
1095     *
1096     * Requires r0/r1 to have been previously checked for null.  Will
1097     * return negative if this's string is < comp, 0 if they are the
1098     * same and positive if >.
1099     *
1100     * IMPORTANT NOTE:
1101     *
1102     * This code relies on hard-coded offsets for string objects, and must be
1103     * kept in sync with definitions in UtfString.h.  See asm-constants.h
1104     *
1105     * On entry:
1106     *    r0:   this object pointer
1107     *    r1:   comp object pointer
1108     *
1109     */
1110
1111    mov    r2, r0         @ this to r2, opening up r0 for return value
1112    subs   r0, r2, r1     @ Same?
1113    bxeq   lr
1114
1115    ldr    r4, [r2, #STRING_FIELDOFF_OFFSET]
1116    ldr    r9, [r1, #STRING_FIELDOFF_OFFSET]
1117    ldr    r7, [r2, #STRING_FIELDOFF_COUNT]
1118    ldr    r10, [r1, #STRING_FIELDOFF_COUNT]
1119    ldr    r2, [r2, #STRING_FIELDOFF_VALUE]
1120    ldr    r1, [r1, #STRING_FIELDOFF_VALUE]
1121
1122    /*
1123     * At this point, we have:
1124     *    value:  r2/r1
1125     *    offset: r4/r9
1126     *    count:  r7/r10
1127     * We're going to compute
1128     *    r11 <- countDiff
1129     *    r10 <- minCount
1130     */
1131     subs  r11, r7, r10
1132     movls r10, r7
1133
1134     /* Now, build pointers to the string data */
1135     add   r2, r2, r4, lsl #1
1136     add   r1, r1, r9, lsl #1
1137     /*
1138      * Note: data pointers point to previous element so we can use pre-index
1139      * mode with base writeback.
1140      */
1141     add   r2, #16-2   @ offset to contents[-1]
1142     add   r1, #16-2   @ offset to contents[-1]
1143
1144     /*
1145      * At this point we have:
1146      *   r2: *this string data
1147      *   r1: *comp string data
1148      *   r10: iteration count for comparison
1149      *   r11: value to return if the first part of the string is equal
1150      *   r0: reserved for result
1151      *   r3, r4, r7, r8, r9, r12 available for loading string data
1152      */
1153
1154    subs  r10, #2
1155    blt   do_remainder2
1156
1157      /*
1158       * Unroll the first two checks so we can quickly catch early mismatch
1159       * on long strings (but preserve incoming alignment)
1160       */
1161
1162    ldrh  r3, [r2, #2]!
1163    ldrh  r4, [r1, #2]!
1164    ldrh  r7, [r2, #2]!
1165    ldrh  r8, [r1, #2]!
1166    subs  r0, r3, r4
1167    subeqs  r0, r7, r8
1168    bxne  lr
1169    cmp   r10, #28
1170    bgt   do_memcmp16
1171    subs  r10, #3
1172    blt   do_remainder
1173
1174loopback_triple:
1175    ldrh  r3, [r2, #2]!
1176    ldrh  r4, [r1, #2]!
1177    ldrh  r7, [r2, #2]!
1178    ldrh  r8, [r1, #2]!
1179    ldrh  r9, [r2, #2]!
1180    ldrh  r12,[r1, #2]!
1181    subs  r0, r3, r4
1182    subeqs  r0, r7, r8
1183    subeqs  r0, r9, r12
1184    bxne  lr
1185    subs  r10, #3
1186    bge   loopback_triple
1187
1188do_remainder:
1189    adds  r10, #3
1190    beq   returnDiff
1191
1192loopback_single:
1193    ldrh  r3, [r2, #2]!
1194    ldrh  r4, [r1, #2]!
1195    subs  r0, r3, r4
1196    bxne  lr
1197    subs  r10, #1
1198    bne     loopback_single
1199
1200returnDiff:
1201    mov   r0, r11
1202    bx    lr
1203
1204do_remainder2:
1205    adds  r10, #2
1206    bne   loopback_single
1207    mov   r0, r11
1208    bx    lr
1209
1210    /* Long string case */
1211do_memcmp16:
1212    mov   r4, lr
1213    ldr   lr, .Lmemcmp16
1214    mov   r7, r11
1215    add   r0, r2, #2
1216    add   r1, r1, #2
1217    mov   r2, r10
1218    blx   lr
1219    cmp   r0, #0
1220    bxne  r4
1221    mov   r0, r7
1222    bx    r4
1223
1224.Lmemcmp16:
1225    .word __memcmp16
1226
1227
1228/* ------------------------------ */
1229    .balign 4
1230    .global dvmCompiler_TEMPLATE_STRING_INDEXOF
1231dvmCompiler_TEMPLATE_STRING_INDEXOF:
1232/* File: armv5te/TEMPLATE_STRING_INDEXOF.S */
1233    /*
1234     * String's indexOf.
1235     *
1236     * Requires r0 to have been previously checked for null.  Will
1237     * return index of match of r1 in r0.
1238     *
1239     * IMPORTANT NOTE:
1240     *
1241     * This code relies on hard-coded offsets for string objects, and must be
1242     * kept in sync wth definitions in UtfString.h  See asm-constants.h
1243     *
1244     * On entry:
1245     *    r0:   string object pointer
1246     *    r1:   char to match
1247     *    r2:   Starting offset in string data
1248     */
1249
1250    ldr    r7, [r0, #STRING_FIELDOFF_OFFSET]
1251    ldr    r8, [r0, #STRING_FIELDOFF_COUNT]
1252    ldr    r0, [r0, #STRING_FIELDOFF_VALUE]
1253
1254    /*
1255     * At this point, we have:
1256     *    r0: object pointer
1257     *    r1: char to match
1258     *    r2: starting offset
1259     *    r7: offset
1260     *    r8: string length
1261     */
1262
1263     /* Build pointer to start of string data */
1264     add   r0, #16
1265     add   r0, r0, r7, lsl #1
1266
1267     /* Save a copy of starting data in r7 */
1268     mov   r7, r0
1269
1270     /* Clamp start to [0..count] */
1271     cmp   r2, #0
1272     movlt r2, #0
1273     cmp   r2, r8
1274     movgt r2, r8
1275
1276     /* Build pointer to start of data to compare and pre-bias */
1277     add   r0, r0, r2, lsl #1
1278     sub   r0, #2
1279
1280     /* Compute iteration count */
1281     sub   r8, r2
1282
1283     /*
1284      * At this point we have:
1285      *   r0: start of data to test
1286      *   r1: chat to compare
1287      *   r8: iteration count
1288      *   r7: original start of string
1289      *   r3, r4, r9, r10, r11, r12 available for loading string data
1290      */
1291
1292    subs  r8, #4
1293    blt   indexof_remainder
1294
1295indexof_loop4:
1296    ldrh  r3, [r0, #2]!
1297    ldrh  r4, [r0, #2]!
1298    ldrh  r10, [r0, #2]!
1299    ldrh  r11, [r0, #2]!
1300    cmp   r3, r1
1301    beq   match_0
1302    cmp   r4, r1
1303    beq   match_1
1304    cmp   r10, r1
1305    beq   match_2
1306    cmp   r11, r1
1307    beq   match_3
1308    subs  r8, #4
1309    bge   indexof_loop4
1310
1311indexof_remainder:
1312    adds    r8, #4
1313    beq     indexof_nomatch
1314
1315indexof_loop1:
1316    ldrh  r3, [r0, #2]!
1317    cmp   r3, r1
1318    beq   match_3
1319    subs  r8, #1
1320    bne   indexof_loop1
1321
1322indexof_nomatch:
1323    mov   r0, #-1
1324    bx    lr
1325
1326match_0:
1327    sub   r0, #6
1328    sub   r0, r7
1329    asr   r0, r0, #1
1330    bx    lr
1331match_1:
1332    sub   r0, #4
1333    sub   r0, r7
1334    asr   r0, r0, #1
1335    bx    lr
1336match_2:
1337    sub   r0, #2
1338    sub   r0, r7
1339    asr   r0, r0, #1
1340    bx    lr
1341match_3:
1342    sub   r0, r7
1343    asr   r0, r0, #1
1344    bx    lr
1345
1346
1347/* ------------------------------ */
1348    .balign 4
1349    .global dvmCompiler_TEMPLATE_INTERPRET
1350dvmCompiler_TEMPLATE_INTERPRET:
1351/* File: armv5te/TEMPLATE_INTERPRET.S */
1352    /*
1353     * This handler transfers control to the interpeter without performing
1354     * any lookups.  It may be called either as part of a normal chaining
1355     * operation, or from the transition code in header.S.  We distinquish
1356     * the two cases by looking at the link register.  If called from a
1357     * translation chain, it will point to the chaining Dalvik PC -3.
1358     * On entry:
1359     *    lr - if NULL:
1360     *        r1 - the Dalvik PC to begin interpretation.
1361     *    else
1362     *        [lr, #3] contains Dalvik PC to begin interpretation
1363     *    rGLUE - pointer to interpState
1364     *    rFP - Dalvik frame pointer
1365     */
1366    cmp     lr, #0
1367    ldrne   r1,[lr, #3]
1368    ldr     r2, .LinterpPunt
1369    mov     r0, r1                       @ set Dalvik PC
1370    bx      r2
1371    @ doesn't return
1372
1373.LinterpPunt:
1374    .word   dvmJitToInterpPunt
1375
1376/* ------------------------------ */
1377    .balign 4
1378    .global dvmCompiler_TEMPLATE_MONITOR_ENTER
1379dvmCompiler_TEMPLATE_MONITOR_ENTER:
1380/* File: armv5te/TEMPLATE_MONITOR_ENTER.S */
1381    /*
1382     * Call out to the runtime to lock an object.  Because this thread
1383     * may have been suspended in THREAD_MONITOR state and the Jit's
1384     * translation cache subsequently cleared, we cannot return directly.
1385     * Instead, unconditionally transition to the interpreter to resume.
1386     *
1387     * On entry:
1388     *    r0 - self pointer
1389     *    r1 - the object (which has already been null-checked by the caller
1390     *    r4 - the Dalvik PC of the following instruction.
1391     */
1392    ldr     r2, .LdvmLockObject
1393    mov     r3, #0                       @ Record that we're not returning
1394    str     r3, [r0, #offThread_inJitCodeCache]
1395    blx     r2                           @ dvmLockObject(self, obj)
1396    @ refresh Jit's on/off status
1397    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1398    ldr     r0, [r0]
1399    ldr     r2, .LdvmJitToInterpNoChain
1400    str     r0, [rGLUE, #offGlue_pJitProfTable]
1401    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1402#if defined(WITH_JIT_TUNING)
1403    mov     r0, #kHeavyweightMonitor
1404#endif
1405    bx      r2
1406
1407
1408/* ------------------------------ */
1409    .balign 4
1410    .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
1411dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
1412/* File: armv5te/TEMPLATE_MONITOR_ENTER_DEBUG.S */
1413    /*
1414     * To support deadlock prediction, this version of MONITOR_ENTER
1415     * will always call the heavyweight dvmLockObject, check for an
1416     * exception and then bail out to the interpreter.
1417     *
1418     * On entry:
1419     *    r0 - self pointer
1420     *    r1 - the object (which has already been null-checked by the caller
1421     *    r4 - the Dalvik PC of the following instruction.
1422     *
1423     */
1424    ldr     r2, .LdvmLockObject
1425    mov     r3, #0                       @ Record that we're not returning
1426    str     r3, [r0, #offThread_inJitCodeCache]
1427    blx     r2             @ dvmLockObject(self, obj)
1428    @ refresh Jit's on/off status & test for exception
1429    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1430    ldr     r1, [rGLUE, #offGlue_self]
1431    ldr     r0, [r0]
1432    ldr     r1, [r1, #offThread_exception]
1433    str     r0, [rGLUE, #offGlue_pJitProfTable]
1434    cmp     r1, #0
1435    beq     1f
1436    ldr     r2, .LhandleException
1437    sub     r0, r4, #2     @ roll dPC back to this monitor instruction
1438    bx      r2
14391:
1440    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1441#if defined(WITH_JIT_TUNING)
1442    mov     r0, #kHeavyweightMonitor
1443#endif
1444    ldr     pc, .LdvmJitToInterpNoChain
1445
1446    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
1447/* File: armv5te/footer.S */
1448/*
1449 * ===========================================================================
1450 *  Common subroutines and data
1451 * ===========================================================================
1452 */
1453
1454    .text
1455    .align  2
1456.LinvokeNative:
1457    @ Prep for the native call
1458    @ r1 = newFP, r0 = methodToCall
1459    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
1460    mov     r2, #0
1461    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
1462    str     r2, [r3, #offThread_inJitCodeCache] @ not in jit code cache
1463    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
1464    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
1465                                        @ newFp->localRefCookie=top
1466    mov     r9, r3                      @ r9<- glue->self (preserve)
1467    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
1468
1469    mov     r2, r0                      @ r2<- methodToCall
1470    mov     r0, r1                      @ r0<- newFP
1471    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
1472
1473    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
1474
1475    @ Refresh Jit's on/off status
1476    ldr     r3, [rGLUE, #offGlue_ppJitProfTable]
1477
1478    @ native return; r9=self, r10=newSaveArea
1479    @ equivalent to dvmPopJniLocals
1480    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
1481    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
1482    ldr     r1, [r9, #offThread_exception] @ check for exception
1483    ldr     r3, [r3]    @ r1 <- pointer to Jit profile table
1484    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
1485    cmp     r1, #0                      @ null?
1486    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
1487    ldr     r0, [r10, #offStackSaveArea_savedPc] @ reload rPC
1488    str     r3, [rGLUE, #offGlue_pJitProfTable]  @ cache current JitProfTable
1489
1490    @ r0 = dalvikCallsitePC
1491    bne     .LhandleException           @ no, handle exception
1492
1493    str     r2, [r9, #offThread_inJitCodeCache] @ set the new mode
1494    cmp     r2, #0                      @ return chaining cell still exists?
1495    bxne    r2                          @ yes - go ahead
1496
1497    @ continue executing the next instruction through the interpreter
1498    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
1499    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
1500#if defined(WITH_JIT_TUNING)
1501    mov     r0, #kCallsiteInterpreted
1502#endif
1503    mov     pc, r1
1504
1505/*
1506 * On entry:
1507 * r0  Faulting Dalvik PC
1508 */
1509.LhandleException:
1510#if defined(WITH_SELF_VERIFICATION)
1511    ldr     pc, .LdeadFood @ should not see this under self-verification mode
1512.LdeadFood:
1513    .word   0xdeadf00d
1514#endif
1515    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
1516    mov     r2, #0
1517    str     r2, [r3, #offThread_inJitCodeCache] @ in interpreter land
1518    ldr     r1, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
1519    ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
1520    mov     rPC, r0                 @ reload the faulting Dalvik address
1521    mov     pc, r1                  @ branch to dvmMterpCommonExceptionThrown
1522
1523    .align  2
1524.LdvmAsmInstructionStart:
1525    .word   dvmAsmInstructionStart
1526.LdvmJitToInterpTraceSelectNoChain:
1527    .word   dvmJitToInterpTraceSelectNoChain
1528.LdvmJitToInterpNoChain:
1529    .word   dvmJitToInterpNoChain
1530.LdvmMterpStdBail:
1531    .word   dvmMterpStdBail
1532.LdvmMterpCommonExceptionThrown:
1533    .word   dvmMterpCommonExceptionThrown
1534.LdvmLockObject:
1535    .word   dvmLockObject
1536#if defined(WITH_JIT_TUNING)
1537.LdvmICHitCount:
1538    .word   gDvmICHitCount
1539#endif
1540#if defined(WITH_SELF_VERIFICATION)
1541.LdvmSelfVerificationMemOpDecode:
1542    .word   dvmSelfVerificationMemOpDecode
1543#endif
1544.L__aeabi_cdcmple:
1545    .word   __aeabi_cdcmple
1546.L__aeabi_cfcmple:
1547    .word   __aeabi_cfcmple
1548
1549    .global dmvCompilerTemplateEnd
1550dmvCompilerTemplateEnd:
1551
1552#endif /* WITH_JIT */
1553
1554