CompilerTemplateAsm-armv7-a-neon.S revision 7365493ad8d360c1dcf9cd8b6eee62747af01cae
1/*
2 * This file was generated automatically by gen-template.py for 'armv7-a-neon'.
3 *
4 * --> DO NOT EDIT <--
5 */
6
7/* File: armv5te/header.S */
8/*
9 * Copyright (C) 2008 The Android Open Source Project
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24#if defined(WITH_JIT)
25
26/*
27 * ARMv5 definitions and declarations.
28 */
29
30/*
31ARM EABI general notes:
32
33r0-r3 hold first 4 args to a method; they are not preserved across method calls
34r4-r8 are available for general use
35r9 is given special treatment in some situations, but not for us
36r10 (sl) seems to be generally available
37r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
38r12 (ip) is scratch -- not preserved across method calls
39r13 (sp) should be managed carefully in case a signal arrives
40r14 (lr) must be preserved
41r15 (pc) can be tinkered with directly
42
43r0 holds returns of <= 4 bytes
44r0-r1 hold returns of 8 bytes, low word in r0
45
46Callee must save/restore r4+ (except r12) if it modifies them.
47
48Stack is "full descending".  Only the arguments that don't fit in the first 4
49registers are placed on the stack.  "sp" points at the first stacked argument
50(i.e. the 5th arg).
51
52VFP: single-precision results in s0, double-precision results in d0.
53
54In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
5564-bit quantities (long long, double) must be 64-bit aligned.
56*/
57
58/*
59JIT and ARM notes:
60
61The following registers have fixed assignments:
62
63  reg nick      purpose
64  r5  rFP       interpreted frame pointer, used for accessing locals and args
65  r6  rGLUE     MterpGlue pointer
66
67The following registers have fixed assignments in mterp but are scratch
68registers in compiled code
69
70  reg nick      purpose
71  r4  rPC       interpreted program counter, used for fetching instructions
72  r7  rINST     first 16-bit code unit of current instruction
73  r8  rIBASE    interpreted instruction base pointer, used for computed goto
74
75Macros are provided for common operations.  Each macro MUST emit only
76one instruction to make instruction-counting easier.  They MUST NOT alter
77unspecified registers or condition codes.
78*/
79
80/* single-purpose registers, given names for clarity */
81#define rPC     r4
82#define rFP     r5
83#define rGLUE   r6
84#define rINST   r7
85#define rIBASE  r8
86
87/*
88 * Given a frame pointer, find the stack save area.
89 *
90 * In C this is "((StackSaveArea*)(_fp) -1)".
91 */
92#define SAVEAREA_FROM_FP(_reg, _fpreg) \
93    sub     _reg, _fpreg, #sizeofStackSaveArea
94
95#define EXPORT_PC() \
96    str     rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
97
98/*
99 * This is a #include, not a %include, because we want the C pre-processor
100 * to expand the macros into assembler assignment statements.
101 */
102#include "../../../mterp/common/asm-constants.h"
103
104
105/* File: armv5te-vfp/platform.S */
106/*
107 * ===========================================================================
108 *  CPU-version-specific defines and utility
109 * ===========================================================================
110 */
111
112/*
113 * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
114 * Jump to subroutine.
115 *
116 * May modify IP and LR.
117 */
118.macro  LDR_PC_LR source
119    mov     lr, pc
120    ldr     pc, \source
121.endm
122
123
124    .global dvmCompilerTemplateStart
125    .type   dvmCompilerTemplateStart, %function
126    .text
127
128dvmCompilerTemplateStart:
129
130/* ------------------------------ */
131    .balign 4
132    .global dvmCompiler_TEMPLATE_CMP_LONG
133dvmCompiler_TEMPLATE_CMP_LONG:
134/* File: armv5te/TEMPLATE_CMP_LONG.S */
135    /*
136     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
137     * register based on the results of the comparison.
138     *
139     * We load the full values with LDM, but in practice many values could
140     * be resolved by only looking at the high word.  This could be made
141     * faster or slower by splitting the LDM into a pair of LDRs.
142     *
143     * If we just wanted to set condition flags, we could do this:
144     *  subs    ip, r0, r2
145     *  sbcs    ip, r1, r3
146     *  subeqs  ip, r0, r2
147     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
148     * integer value, which we can do with 2 conditional mov/mvn instructions
149     * (set 1, set -1; if they're equal we already have 0 in ip), giving
150     * us a constant 5-cycle path plus a branch at the end to the
151     * instruction epilogue code.  The multi-compare approach below needs
152     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
153     * in the worst case (the 64-bit values are equal).
154     */
155    /* cmp-long vAA, vBB, vCC */
156    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
157    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
158    bgt     .LTEMPLATE_CMP_LONG_greater
159    subs    r0, r0, r2                  @ r0<- r0 - r2
160    bxeq     lr
161    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
162.LTEMPLATE_CMP_LONG_less:
163    mvn     r0, #0                      @ r0<- -1
164    bx      lr
165.LTEMPLATE_CMP_LONG_greater:
166    mov     r0, #1                      @ r0<- 1
167    bx      lr
168
169
170/* ------------------------------ */
171    .balign 4
172    .global dvmCompiler_TEMPLATE_RETURN
173dvmCompiler_TEMPLATE_RETURN:
174/* File: armv5te/TEMPLATE_RETURN.S */
175    /*
176     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
177     * If the stored value in returnAddr
178     * is non-zero, the caller is compiled by the JIT thus return to the
179     * address in the code cache following the invoke instruction. Otherwise
180     * return to the special dvmJitToInterpNoChain entry point.
181     */
182    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
183    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
184    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
185    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
186#if !defined(WITH_SELF_VERIFICATION)
187    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
188#else
189    mov     r9, #0                      @ disable chaining
190#endif
191    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
192                                        @ r2<- method we're returning to
193    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
194    cmp     r2, #0                      @ break frame?
195#if !defined(WITH_SELF_VERIFICATION)
196    beq     1f                          @ bail to interpreter
197#else
198    blxeq   lr                          @ punt to interpreter and compare state
199#endif
200    ldr     r1, .LdvmJitToInterpNoChain @ defined in footer.S
201    mov     rFP, r10                    @ publish new FP
202    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
203    ldr     r8, [r8]                    @ r8<- suspendCount
204
205    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
206    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
207    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
208    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
209    str     r0, [rGLUE, #offGlue_methodClassDex]
210    cmp     r8, #0                      @ check the suspendCount
211    movne   r9, #0                      @ clear the chaining cell address
212    str     r9, [r3, #offThread_inJitCodeCache] @ in code cache or not
213    cmp     r9, #0                      @ chaining cell exists?
214    blxne   r9                          @ jump to the chaining cell
215#if defined(WITH_JIT_TUNING)
216    mov     r0, #kCallsiteInterpreted
217#endif
218    mov     pc, r1                      @ callsite is interpreted
2191:
220    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
221    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
222    mov     r1, #0                      @ changeInterp = false
223    mov     r0, rGLUE                   @ Expecting rGLUE in r0
224    blx     r2                          @ exit the interpreter
225
226/* ------------------------------ */
227    .balign 4
228    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
229dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
230/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
231    /*
232     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
233     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
234     * runtime-resolved callee.
235     */
236    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
237    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
238    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
239    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
240    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
241    add     r3, r1, #1  @ Thumb addr is odd
242    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
243    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
244    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
245    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
246    ldr     r8, [r8]                    @ r8<- suspendCount (int)
247    cmp     r10, r9                     @ bottom < interpStackEnd?
248    bxlo    lr                          @ return to raise stack overflow excep.
249    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
250    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
251    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
252    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
253    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
254    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
255
256
257    @ set up newSaveArea
258    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
259    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
260    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
261    cmp     r8, #0                      @ suspendCount != 0
262    bxne    lr                          @ bail to the interpreter
263    tst     r10, #ACC_NATIVE
264#if !defined(WITH_SELF_VERIFICATION)
265    bne     .LinvokeNative
266#else
267    bxne    lr                          @ bail to the interpreter
268#endif
269
270    ldr     r10, .LdvmJitToInterpTraceSelectNoChain
271    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
272    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
273
274    @ Update "glue" values for the new method
275    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
276    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
277    mov     rFP, r1                         @ fp = newFp
278    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
279
280    @ Start executing the callee
281#if defined(WITH_JIT_TUNING)
282    mov     r0, #kInlineCacheMiss
283#endif
284    mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
285
286/* ------------------------------ */
287    .balign 4
288    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
289dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
290/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
291    /*
292     * For monomorphic callsite, setup the Dalvik frame and return to the
293     * Thumb code through the link register to transfer control to the callee
294     * method through a dedicated chaining cell.
295     */
296    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
297    @ methodToCall is guaranteed to be non-native
298.LinvokeChain:
299    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
300    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
301    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
302    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
303    add     r3, r1, #1  @ Thumb addr is odd
304    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
305    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
306    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
307    add     r12, lr, #2                 @ setup the punt-to-interp address
308    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
309    ldr     r8, [r8]                    @ r8<- suspendCount (int)
310    cmp     r10, r9                     @ bottom < interpStackEnd?
311    bxlo    r12                         @ return to raise stack overflow excep.
312    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
313    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
314    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
315    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
316    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
317
318
319    @ set up newSaveArea
320    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
321    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
322    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
323    cmp     r8, #0                      @ suspendCount != 0
324    bxne    r12                         @ bail to the interpreter
325
326    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
327    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
328
329    @ Update "glue" values for the new method
330    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
331    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
332    mov     rFP, r1                         @ fp = newFp
333    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
334
335    bx      lr                              @ return to the callee-chaining cell
336
337
338
339/* ------------------------------ */
340    .balign 4
341    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
342dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
343/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
344    /*
345     * For polymorphic callsite, check whether the cached class pointer matches
346     * the current one. If so setup the Dalvik frame and return to the
347     * Thumb code through the link register to transfer control to the callee
348     * method through a dedicated chaining cell.
349     *
350     * The predicted chaining cell is declared in ArmLIR.h with the
351     * following layout:
352     *
353     *  typedef struct PredictedChainingCell {
354     *      u4 branch;
355     *      const ClassObject *clazz;
356     *      const Method *method;
357     *      u4 counter;
358     *  } PredictedChainingCell;
359     *
360     * Upon returning to the callsite:
361     *    - lr  : to branch to the chaining cell
362     *    - lr+2: to punt to the interpreter
363     *    - lr+4: to fully resolve the callee and may rechain.
364     *            r3 <- class
365     *            r9 <- counter
366     */
367    @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
368    ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
369    ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
370    ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
371    ldr     r9, [rGLUE, #offGlue_icRechainCount]   @ r1 <- shared rechainCount
372    cmp     r3, r8          @ predicted class == actual class?
373#if defined(WITH_JIT_TUNING)
374    ldr     r7, .LdvmICHitCount
375    ldreq   r10, [r7, #0]
376    add     r10, r10, #1
377    streq   r10, [r7, #0]
378#endif
379    beq     .LinvokeChain   @ predicted chain is valid
380    ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
381    cmp     r8, #0          @ initialized class or not
382    moveq   r1, #0
383    subne   r1, r9, #1      @ count--
384    strne   r1, [rGLUE, #offGlue_icRechainCount]   @ write back to InterpState
385    add     lr, lr, #4      @ return to fully-resolve landing pad
386    /*
387     * r1 <- count
388     * r2 <- &predictedChainCell
389     * r3 <- this->class
390     * r4 <- dPC
391     * r7 <- this->class->vtable
392     */
393    bx      lr
394
395/* ------------------------------ */
396    .balign 4
397    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
398dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
399/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
400    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
401    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
402    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
403    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
404    add     r3, r1, #1  @ Thumb addr is odd
405    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
406    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
407    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
408    ldr     r8, [r8]                    @ r3<- suspendCount (int)
409    cmp     r10, r9                     @ bottom < interpStackEnd?
410    bxlo    lr                          @ return to raise stack overflow excep.
411    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
412    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
413    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
414    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
415
416
417    @ set up newSaveArea
418    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
419    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
420    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
421    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
422    cmp     r8, #0                      @ suspendCount != 0
423    ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
424#if !defined(WITH_SELF_VERIFICATION)
425    bxne    lr                          @ bail to the interpreter
426#else
427    bx      lr                          @ bail to interpreter unconditionally
428#endif
429
430    @ go ahead and transfer control to the native code
431    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
432    mov     r2, #0
433    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
434    str     r2, [r3, #offThread_inJitCodeCache] @ not in the jit code cache
435    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
436                                        @ newFp->localRefCookie=top
437    mov     r9, r3                      @ r9<- glue->self (preserve)
438    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
439
440    mov     r2, r0                      @ r2<- methodToCall
441    mov     r0, r1                      @ r0<- newFP
442    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
443
444    blx     r8                          @ off to the native code
445
446    @ native return; r9=self, r10=newSaveArea
447    @ equivalent to dvmPopJniLocals
448    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
449    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
450    ldr     r1, [r9, #offThread_exception] @ check for exception
451    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
452    cmp     r1, #0                      @ null?
453    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
454    ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
455
456    @ r0 = dalvikCallsitePC
457    bne     .LhandleException           @ no, handle exception
458
459    str     r2, [r9, #offThread_inJitCodeCache] @ set the mode properly
460    cmp     r2, #0                      @ return chaining cell still exists?
461    bxne    r2                          @ yes - go ahead
462
463    @ continue executing the next instruction through the interpreter
464    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
465    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
466#if defined(WITH_JIT_TUNING)
467    mov     r0, #kCallsiteInterpreted
468#endif
469    mov     pc, r1
470
471
472
473
474/* ------------------------------ */
475    .balign 4
476    .global dvmCompiler_TEMPLATE_MUL_LONG
477dvmCompiler_TEMPLATE_MUL_LONG:
478/* File: armv5te/TEMPLATE_MUL_LONG.S */
479    /*
480     * Signed 64-bit integer multiply.
481     *
482     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
483     *
484     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
485     *        WX
486     *      x YZ
487     *  --------
488     *     ZW ZX
489     *  YW YX
490     *
491     * The low word of the result holds ZX, the high word holds
492     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
493     * it doesn't fit in the low 64 bits.
494     *
495     * Unlike most ARM math operations, multiply instructions have
496     * restrictions on using the same register more than once (Rd and Rm
497     * cannot be the same).
498     */
499    /* mul-long vAA, vBB, vCC */
500    mul     ip, r2, r1                  @  ip<- ZxW
501    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
502    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
503    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
504    mov     r0,r9
505    mov     r1,r10
506    bx      lr
507
508/* ------------------------------ */
509    .balign 4
510    .global dvmCompiler_TEMPLATE_SHL_LONG
511dvmCompiler_TEMPLATE_SHL_LONG:
512/* File: armv5te/TEMPLATE_SHL_LONG.S */
513    /*
514     * Long integer shift.  This is different from the generic 32/64-bit
515     * binary operations because vAA/vBB are 64-bit but vCC (the shift
516     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
517     * 6 bits.
518     */
519    /* shl-long vAA, vBB, vCC */
520    and     r2, r2, #63                 @ r2<- r2 & 0x3f
521    mov     r1, r1, asl r2              @  r1<- r1 << r2
522    rsb     r3, r2, #32                 @  r3<- 32 - r2
523    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
524    subs    ip, r2, #32                 @  ip<- r2 - 32
525    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
526    mov     r0, r0, asl r2              @  r0<- r0 << r2
527    bx      lr
528
529/* ------------------------------ */
530    .balign 4
531    .global dvmCompiler_TEMPLATE_SHR_LONG
532dvmCompiler_TEMPLATE_SHR_LONG:
533/* File: armv5te/TEMPLATE_SHR_LONG.S */
534    /*
535     * Long integer shift.  This is different from the generic 32/64-bit
536     * binary operations because vAA/vBB are 64-bit but vCC (the shift
537     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
538     * 6 bits.
539     */
540    /* shr-long vAA, vBB, vCC */
541    and     r2, r2, #63                 @ r0<- r0 & 0x3f
542    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
543    rsb     r3, r2, #32                 @  r3<- 32 - r2
544    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
545    subs    ip, r2, #32                 @  ip<- r2 - 32
546    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
547    mov     r1, r1, asr r2              @  r1<- r1 >> r2
548    bx      lr
549
550
551/* ------------------------------ */
552    .balign 4
553    .global dvmCompiler_TEMPLATE_USHR_LONG
554dvmCompiler_TEMPLATE_USHR_LONG:
555/* File: armv5te/TEMPLATE_USHR_LONG.S */
556    /*
557     * Long integer shift.  This is different from the generic 32/64-bit
558     * binary operations because vAA/vBB are 64-bit but vCC (the shift
559     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
560     * 6 bits.
561     */
562    /* ushr-long vAA, vBB, vCC */
563    and     r2, r2, #63                 @ r0<- r0 & 0x3f
564    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
565    rsb     r3, r2, #32                 @  r3<- 32 - r2
566    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
567    subs    ip, r2, #32                 @  ip<- r2 - 32
568    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
569    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
570    bx      lr
571
572
573/* ------------------------------ */
574    .balign 4
575    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
576dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
577/* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
578/* File: armv5te-vfp/fbinop.S */
579    /*
580     * Generic 32-bit floating point operation.  Provide an "instr" line that
581     * specifies an instruction that performs s2 = s0 op s1.
582     *
583     * On entry:
584     *     r0 = target dalvik register address
585     *     r1 = op1 address
586     *     r2 = op2 address
587     */
588     flds    s0,[r1]
589     flds    s1,[r2]
590     fadds   s2, s0, s1
591     fsts    s2,[r0]
592     bx      lr
593
594
595/* ------------------------------ */
596    .balign 4
597    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
598dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
599/* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
600/* File: armv5te-vfp/fbinop.S */
601    /*
602     * Generic 32-bit floating point operation.  Provide an "instr" line that
603     * specifies an instruction that performs s2 = s0 op s1.
604     *
605     * On entry:
606     *     r0 = target dalvik register address
607     *     r1 = op1 address
608     *     r2 = op2 address
609     */
610     flds    s0,[r1]
611     flds    s1,[r2]
612     fsubs   s2, s0, s1
613     fsts    s2,[r0]
614     bx      lr
615
616
617/* ------------------------------ */
618    .balign 4
619    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
620dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
621/* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
622/* File: armv5te-vfp/fbinop.S */
623    /*
624     * Generic 32-bit floating point operation.  Provide an "instr" line that
625     * specifies an instruction that performs s2 = s0 op s1.
626     *
627     * On entry:
628     *     r0 = target dalvik register address
629     *     r1 = op1 address
630     *     r2 = op2 address
631     */
632     flds    s0,[r1]
633     flds    s1,[r2]
634     fmuls   s2, s0, s1
635     fsts    s2,[r0]
636     bx      lr
637
638
639/* ------------------------------ */
640    .balign 4
641    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
642dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
643/* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
644/* File: armv5te-vfp/fbinop.S */
645    /*
646     * Generic 32-bit floating point operation.  Provide an "instr" line that
647     * specifies an instruction that performs s2 = s0 op s1.
648     *
649     * On entry:
650     *     r0 = target dalvik register address
651     *     r1 = op1 address
652     *     r2 = op2 address
653     */
654     flds    s0,[r1]
655     flds    s1,[r2]
656     fdivs   s2, s0, s1
657     fsts    s2,[r0]
658     bx      lr
659
660
661/* ------------------------------ */
662    .balign 4
663    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
664dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
665/* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
666/* File: armv5te-vfp/fbinopWide.S */
667    /*
668     * Generic 64-bit floating point operation.  Provide an "instr" line that
669     * specifies an instruction that performs s2 = s0 op s1.
670     *
671     * On entry:
672     *     r0 = target dalvik register address
673     *     r1 = op1 address
674     *     r2 = op2 address
675     */
676     fldd    d0,[r1]
677     fldd    d1,[r2]
678     faddd   d2, d0, d1
679     fstd    d2,[r0]
680     bx      lr
681
682
683/* ------------------------------ */
684    .balign 4
685    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
686dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
687/* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
688/* File: armv5te-vfp/fbinopWide.S */
689    /*
690     * Generic 64-bit floating point operation.  Provide an "instr" line that
691     * specifies an instruction that performs s2 = s0 op s1.
692     *
693     * On entry:
694     *     r0 = target dalvik register address
695     *     r1 = op1 address
696     *     r2 = op2 address
697     */
698     fldd    d0,[r1]
699     fldd    d1,[r2]
700     fsubd   d2, d0, d1
701     fstd    d2,[r0]
702     bx      lr
703
704
705/* ------------------------------ */
706    .balign 4
707    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
708dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
709/* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
710/* File: armv5te-vfp/fbinopWide.S */
711    /*
712     * Generic 64-bit floating point operation.  Provide an "instr" line that
713     * specifies an instruction that performs s2 = s0 op s1.
714     *
715     * On entry:
716     *     r0 = target dalvik register address
717     *     r1 = op1 address
718     *     r2 = op2 address
719     */
720     fldd    d0,[r1]
721     fldd    d1,[r2]
722     fmuld   d2, d0, d1
723     fstd    d2,[r0]
724     bx      lr
725
726
727/* ------------------------------ */
728    .balign 4
729    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
730dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
731/* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
732/* File: armv5te-vfp/fbinopWide.S */
733    /*
734     * Generic 64-bit floating point operation.  Provide an "instr" line that
735     * specifies an instruction that performs s2 = s0 op s1.
736     *
737     * On entry:
738     *     r0 = target dalvik register address
739     *     r1 = op1 address
740     *     r2 = op2 address
741     */
742     fldd    d0,[r1]
743     fldd    d1,[r2]
744     fdivd   d2, d0, d1
745     fstd    d2,[r0]
746     bx      lr
747
748
749/* ------------------------------ */
750    .balign 4
751    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
752dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
753/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
754/* File: armv5te-vfp/funopNarrower.S */
755    /*
756     * Generic 64bit-to-32bit floating point unary operation.  Provide an
757     * "instr" line that specifies an instruction that performs "s0 = op d0".
758     *
759     * For: double-to-int, double-to-float
760     *
761     * On entry:
762     *     r0 = target dalvik register address
763     *     r1 = src dalvik register address
764     */
765    /* unop vA, vB */
766    fldd    d0, [r1]                    @ d0<- vB
767    fcvtsd  s0, d0                              @ s0<- op d0
768    fsts    s0, [r0]                    @ vA<- s0
769    bx      lr
770
771
772/* ------------------------------ */
773    .balign 4
774    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
775dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
776/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
777/* File: armv5te-vfp/funopNarrower.S */
778    /*
779     * Generic 64bit-to-32bit floating point unary operation.  Provide an
780     * "instr" line that specifies an instruction that performs "s0 = op d0".
781     *
782     * For: double-to-int, double-to-float
783     *
784     * On entry:
785     *     r0 = target dalvik register address
786     *     r1 = src dalvik register address
787     */
788    /* unop vA, vB */
789    fldd    d0, [r1]                    @ d0<- vB
790    ftosizd  s0, d0                              @ s0<- op d0
791    fsts    s0, [r0]                    @ vA<- s0
792    bx      lr
793
794
795/* ------------------------------ */
796    .balign 4
797    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
798dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
799/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
800/* File: armv5te-vfp/funopWider.S */
801    /*
802     * Generic 32bit-to-64bit floating point unary operation.  Provide an
803     * "instr" line that specifies an instruction that performs "d0 = op s0".
804     *
805     * For: int-to-double, float-to-double
806     *
807     * On entry:
808     *     r0 = target dalvik register address
809     *     r1 = src dalvik register address
810     */
811    /* unop vA, vB */
812    flds    s0, [r1]                    @ s0<- vB
813    fcvtds  d0, s0                              @ d0<- op s0
814    fstd    d0, [r0]                    @ vA<- d0
815    bx      lr
816
817
818/* ------------------------------ */
819    .balign 4
820    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
821dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
822/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
823/* File: armv5te-vfp/funop.S */
824    /*
825     * Generic 32bit-to-32bit floating point unary operation.  Provide an
826     * "instr" line that specifies an instruction that performs "s1 = op s0".
827     *
828     * For: float-to-int, int-to-float
829     *
830     * On entry:
831     *     r0 = target dalvik register address
832     *     r1 = src dalvik register address
833     */
834    /* unop vA, vB */
835    flds    s0, [r1]                    @ s0<- vB
836    ftosizs s1, s0                              @ s1<- op s0
837    fsts    s1, [r0]                    @ vA<- s1
838    bx      lr
839
840
841/* ------------------------------ */
842    .balign 4
843    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
844dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
845/* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
846/* File: armv5te-vfp/funopWider.S */
847    /*
848     * Generic 32bit-to-64bit floating point unary operation.  Provide an
849     * "instr" line that specifies an instruction that performs "d0 = op s0".
850     *
851     * For: int-to-double, float-to-double
852     *
853     * On entry:
854     *     r0 = target dalvik register address
855     *     r1 = src dalvik register address
856     */
857    /* unop vA, vB */
858    flds    s0, [r1]                    @ s0<- vB
859    fsitod  d0, s0                              @ d0<- op s0
860    fstd    d0, [r0]                    @ vA<- d0
861    bx      lr
862
863
864/* ------------------------------ */
865    .balign 4
866    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
867dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
868/* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
869/* File: armv5te-vfp/funop.S */
870    /*
871     * Generic 32bit-to-32bit floating point unary operation.  Provide an
872     * "instr" line that specifies an instruction that performs "s1 = op s0".
873     *
874     * For: float-to-int, int-to-float
875     *
876     * On entry:
877     *     r0 = target dalvik register address
878     *     r1 = src dalvik register address
879     */
880    /* unop vA, vB */
881    flds    s0, [r1]                    @ s0<- vB
882    fsitos  s1, s0                              @ s1<- op s0
883    fsts    s1, [r0]                    @ vA<- s1
884    bx      lr
885
886
887/* ------------------------------ */
888    .balign 4
889    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
890dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
891/* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
892    /*
893     * Compare two floating-point values.  Puts 0, 1, or -1 into the
894     * destination register based on the results of the comparison.
895     *
896     * int compare(x, y) {
897     *     if (x == y) {
898     *         return 0;
899     *     } else if (x < y) {
900     *         return -1;
901     *     } else if (x > y) {
902     *         return 1;
903     *     } else {
904     *         return 1;
905     *     }
906     * }
907     *
908     * On entry:
909     *    r0 = &op1 [vBB]
910     *    r1 = &op2 [vCC]
911     */
912    /* op vAA, vBB, vCC */
913    fldd    d0, [r0]                    @ d0<- vBB
914    fldd    d1, [r1]                    @ d1<- vCC
915    fcmpd  d0, d1                       @ compare (vBB, vCC)
916    mov     r0, #1                      @ r0<- 1 (default)
917    fmstat                              @ export status flags
918    mvnmi   r0, #0                      @ (less than) r0<- -1
919    moveq   r0, #0                      @ (equal) r0<- 0
920    bx      lr
921
922
923/* ------------------------------ */
924    .balign 4
925    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
926dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
927/* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
928    /*
929     * Compare two floating-point values.  Puts 0, 1, or -1 into the
930     * destination register based on the results of the comparison.
931     *
932     * int compare(x, y) {
933     *     if (x == y) {
934     *         return 0;
935     *     } else if (x > y) {
936     *         return 1;
937     *     } else if (x < y) {
938     *         return -1;
939     *     } else {
940     *         return -1;
941     *     }
942     * }
943     * On entry:
944     *    r0 = &op1 [vBB]
945     *    r1 = &op2 [vCC]
946     */
947    /* op vAA, vBB, vCC */
948    fldd    d0, [r0]                    @ d0<- vBB
949    fldd    d1, [r1]                    @ d1<- vCC
950    fcmped  d0, d1                      @ compare (vBB, vCC)
951    mvn     r0, #0                      @ r0<- -1 (default)
952    fmstat                              @ export status flags
953    movgt   r0, #1                      @ (greater than) r0<- 1
954    moveq   r0, #0                      @ (equal) r0<- 0
955    bx      lr
956
957/* ------------------------------ */
958    .balign 4
959    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
960dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
961/* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
962    /*
963     * Compare two floating-point values.  Puts 0, 1, or -1 into the
964     * destination register based on the results of the comparison.
965     *
966     * int compare(x, y) {
967     *     if (x == y) {
968     *         return 0;
969     *     } else if (x < y) {
970     *         return -1;
971     *     } else if (x > y) {
972     *         return 1;
973     *     } else {
974     *         return 1;
975     *     }
976     * }
977     * On entry:
978     *    r0 = &op1 [vBB]
979     *    r1 = &op2 [vCC]
980     */
981    /* op vAA, vBB, vCC */
982    flds    s0, [r0]                    @ d0<- vBB
983    flds    s1, [r1]                    @ d1<- vCC
984    fcmps  s0, s1                      @ compare (vBB, vCC)
985    mov     r0, #1                      @ r0<- 1 (default)
986    fmstat                              @ export status flags
987    mvnmi   r0, #0                      @ (less than) r0<- -1
988    moveq   r0, #0                      @ (equal) r0<- 0
989    bx      lr
990
991/* ------------------------------ */
992    .balign 4
993    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
994dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
995/* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
996    /*
997     * Compare two floating-point values.  Puts 0, 1, or -1 into the
998     * destination register based on the results of the comparison.
999     *
1000     * int compare(x, y) {
1001     *     if (x == y) {
1002     *         return 0;
1003     *     } else if (x > y) {
1004     *         return 1;
1005     *     } else if (x < y) {
1006     *         return -1;
1007     *     } else {
1008     *         return -1;
1009     *     }
1010     * }
1011     * On entry:
1012     *    r0 = &op1 [vBB]
1013     *    r1 = &op2 [vCC]
1014     */
1015    /* op vAA, vBB, vCC */
1016    flds    s0, [r0]                    @ d0<- vBB
1017    flds    s1, [r1]                    @ d1<- vCC
1018    fcmps  s0, s1                      @ compare (vBB, vCC)
1019    mvn     r0, #0                      @ r0<- -1 (default)
1020    fmstat                              @ export status flags
1021    movgt   r0, #1                      @ (greater than) r0<- 1
1022    moveq   r0, #0                      @ (equal) r0<- 0
1023    bx      lr
1024
1025/* ------------------------------ */
1026    .balign 4
1027    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
1028dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
1029/* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
1030    /*
1031     * 64-bit floating point vfp sqrt operation.
1032     * If the result is a NaN, bail out to library code to do
1033     * the right thing.
1034     *
1035     * On entry:
1036     *     r2 src addr of op1
1037     * On exit:
1038     *     r0,r1 = res
1039     */
1040    fldd    d0, [r2]
1041    fsqrtd  d1, d0
1042    fcmpd   d1, d1
1043    fmstat
1044    fmrrd   r0, r1, d1
1045    bxeq    lr   @ Result OK - return
1046    ldr     r2, .Lsqrt
1047    fmrrd   r0, r1, d0   @ reload orig operand
1048    bx      r2   @ tail call to sqrt library routine
1049
1050.Lsqrt:
1051    .word   sqrt
1052
1053/* ------------------------------ */
1054    .balign 4
1055    .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
1056dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
1057/* File: armv5te/TEMPLATE_THROW_EXCEPTION_COMMON.S */
1058    /*
1059     * Throw an exception from JIT'ed code.
1060     * On entry:
1061     *    r0    Dalvik PC that raises the exception
1062     */
1063    b       .LhandleException
1064
1065/* ------------------------------ */
1066    .balign 4
1067    .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
1068dvmCompiler_TEMPLATE_MEM_OP_DECODE:
1069/* File: armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S */
1070#if defined(WITH_SELF_VERIFICATION)
1071    /*
1072     * This handler encapsulates heap memory ops for selfVerification mode.
1073     *
1074     * The call to the handler is inserted prior to a heap memory operation.
1075     * This handler then calls a function to decode the memory op, and process
1076     * it accordingly. Afterwards, the handler changes the return address to
1077     * skip the memory op so it never gets executed.
1078     */
1079    vpush   {d0-d15}                    @ save out all fp registers
1080    push    {r0-r12,lr}                 @ save out all registers
1081    mov     r0, lr                      @ arg0 <- link register
1082    mov     r1, sp                      @ arg1 <- stack pointer
1083    ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
1084    blx     r2                          @ decode and handle the mem op
1085    pop     {r0-r12,lr}                 @ restore all registers
1086    vpop    {d0-d15}                    @ restore all fp registers
1087    bx      lr                          @ return to compiled code
1088#endif
1089
1090/* ------------------------------ */
1091    .balign 4
1092    .global dvmCompiler_TEMPLATE_STRING_COMPARETO
1093dvmCompiler_TEMPLATE_STRING_COMPARETO:
1094/* File: armv5te/TEMPLATE_STRING_COMPARETO.S */
1095    /*
1096     * String's compareTo.
1097     *
1098     * Requires r0/r1 to have been previously checked for null.  Will
1099     * return negative if this's string is < comp, 0 if they are the
1100     * same and positive if >.
1101     *
1102     * IMPORTANT NOTE:
1103     *
1104     * This code relies on hard-coded offsets for string objects, and must be
1105     * kept in sync with definitions in UtfString.h.  See asm-constants.h
1106     *
1107     * On entry:
1108     *    r0:   this object pointer
1109     *    r1:   comp object pointer
1110     *
1111     */
1112
1113    mov    r2, r0         @ this to r2, opening up r0 for return value
1114    subs   r0, r2, r1     @ Same?
1115    bxeq   lr
1116
1117    ldr    r4, [r2, #STRING_FIELDOFF_OFFSET]
1118    ldr    r9, [r1, #STRING_FIELDOFF_OFFSET]
1119    ldr    r7, [r2, #STRING_FIELDOFF_COUNT]
1120    ldr    r10, [r1, #STRING_FIELDOFF_COUNT]
1121    ldr    r2, [r2, #STRING_FIELDOFF_VALUE]
1122    ldr    r1, [r1, #STRING_FIELDOFF_VALUE]
1123
1124    /*
1125     * At this point, we have:
1126     *    value:  r2/r1
1127     *    offset: r4/r9
1128     *    count:  r7/r10
1129     * We're going to compute
1130     *    r11 <- countDiff
1131     *    r10 <- minCount
1132     */
1133     subs  r11, r7, r10
1134     movls r10, r7
1135
1136     /* Now, build pointers to the string data */
1137     add   r2, r2, r4, lsl #1
1138     add   r1, r1, r9, lsl #1
1139     /*
1140      * Note: data pointers point to previous element so we can use pre-index
1141      * mode with base writeback.
1142      */
1143     add   r2, #16-2   @ offset to contents[-1]
1144     add   r1, #16-2   @ offset to contents[-1]
1145
1146     /*
1147      * At this point we have:
1148      *   r2: *this string data
1149      *   r1: *comp string data
1150      *   r10: iteration count for comparison
1151      *   r11: value to return if the first part of the string is equal
1152      *   r0: reserved for result
1153      *   r3, r4, r7, r8, r9, r12 available for loading string data
1154      */
1155
1156    subs  r10, #2
1157    blt   do_remainder2
1158
1159      /*
1160       * Unroll the first two checks so we can quickly catch early mismatch
1161       * on long strings (but preserve incoming alignment)
1162       */
1163
1164    ldrh  r3, [r2, #2]!
1165    ldrh  r4, [r1, #2]!
1166    ldrh  r7, [r2, #2]!
1167    ldrh  r8, [r1, #2]!
1168    subs  r0, r3, r4
1169    subeqs  r0, r7, r8
1170    bxne  lr
1171    cmp   r10, #28
1172    bgt   do_memcmp16
1173    subs  r10, #3
1174    blt   do_remainder
1175
1176loopback_triple:
1177    ldrh  r3, [r2, #2]!
1178    ldrh  r4, [r1, #2]!
1179    ldrh  r7, [r2, #2]!
1180    ldrh  r8, [r1, #2]!
1181    ldrh  r9, [r2, #2]!
1182    ldrh  r12,[r1, #2]!
1183    subs  r0, r3, r4
1184    subeqs  r0, r7, r8
1185    subeqs  r0, r9, r12
1186    bxne  lr
1187    subs  r10, #3
1188    bge   loopback_triple
1189
1190do_remainder:
1191    adds  r10, #3
1192    beq   returnDiff
1193
1194loopback_single:
1195    ldrh  r3, [r2, #2]!
1196    ldrh  r4, [r1, #2]!
1197    subs  r0, r3, r4
1198    bxne  lr
1199    subs  r10, #1
1200    bne     loopback_single
1201
1202returnDiff:
1203    mov   r0, r11
1204    bx    lr
1205
1206do_remainder2:
1207    adds  r10, #2
1208    bne   loopback_single
1209    mov   r0, r11
1210    bx    lr
1211
1212    /* Long string case */
1213do_memcmp16:
1214    mov   r4, lr
1215    ldr   lr, .Lmemcmp16
1216    mov   r7, r11
1217    add   r0, r2, #2
1218    add   r1, r1, #2
1219    mov   r2, r10
1220    blx   lr
1221    cmp   r0, #0
1222    bxne  r4
1223    mov   r0, r7
1224    bx    r4
1225
1226.Lmemcmp16:
1227    .word __memcmp16
1228
1229
1230/* ------------------------------ */
1231    .balign 4
1232    .global dvmCompiler_TEMPLATE_STRING_INDEXOF
1233dvmCompiler_TEMPLATE_STRING_INDEXOF:
1234/* File: armv5te/TEMPLATE_STRING_INDEXOF.S */
1235    /*
1236     * String's indexOf.
1237     *
1238     * Requires r0 to have been previously checked for null.  Will
1239     * return index of match of r1 in r0.
1240     *
1241     * IMPORTANT NOTE:
1242     *
1243     * This code relies on hard-coded offsets for string objects, and must be
1244     * kept in sync wth definitions in UtfString.h  See asm-constants.h
1245     *
1246     * On entry:
1247     *    r0:   string object pointer
1248     *    r1:   char to match
1249     *    r2:   Starting offset in string data
1250     */
1251
1252    ldr    r7, [r0, #STRING_FIELDOFF_OFFSET]
1253    ldr    r8, [r0, #STRING_FIELDOFF_COUNT]
1254    ldr    r0, [r0, #STRING_FIELDOFF_VALUE]
1255
1256    /*
1257     * At this point, we have:
1258     *    r0: object pointer
1259     *    r1: char to match
1260     *    r2: starting offset
1261     *    r7: offset
1262     *    r8: string length
1263     */
1264
1265     /* Build pointer to start of string data */
1266     add   r0, #16
1267     add   r0, r0, r7, lsl #1
1268
1269     /* Save a copy of starting data in r7 */
1270     mov   r7, r0
1271
1272     /* Clamp start to [0..count] */
1273     cmp   r2, #0
1274     movlt r2, #0
1275     cmp   r2, r8
1276     movgt r2, r8
1277
1278     /* Build pointer to start of data to compare and pre-bias */
1279     add   r0, r0, r2, lsl #1
1280     sub   r0, #2
1281
1282     /* Compute iteration count */
1283     sub   r8, r2
1284
1285     /*
1286      * At this point we have:
1287      *   r0: start of data to test
1288      *   r1: chat to compare
1289      *   r8: iteration count
1290      *   r7: original start of string
1291      *   r3, r4, r9, r10, r11, r12 available for loading string data
1292      */
1293
1294    subs  r8, #4
1295    blt   indexof_remainder
1296
1297indexof_loop4:
1298    ldrh  r3, [r0, #2]!
1299    ldrh  r4, [r0, #2]!
1300    ldrh  r10, [r0, #2]!
1301    ldrh  r11, [r0, #2]!
1302    cmp   r3, r1
1303    beq   match_0
1304    cmp   r4, r1
1305    beq   match_1
1306    cmp   r10, r1
1307    beq   match_2
1308    cmp   r11, r1
1309    beq   match_3
1310    subs  r8, #4
1311    bge   indexof_loop4
1312
1313indexof_remainder:
1314    adds    r8, #4
1315    beq     indexof_nomatch
1316
1317indexof_loop1:
1318    ldrh  r3, [r0, #2]!
1319    cmp   r3, r1
1320    beq   match_3
1321    subs  r8, #1
1322    bne   indexof_loop1
1323
1324indexof_nomatch:
1325    mov   r0, #-1
1326    bx    lr
1327
1328match_0:
1329    sub   r0, #6
1330    sub   r0, r7
1331    asr   r0, r0, #1
1332    bx    lr
1333match_1:
1334    sub   r0, #4
1335    sub   r0, r7
1336    asr   r0, r0, #1
1337    bx    lr
1338match_2:
1339    sub   r0, #2
1340    sub   r0, r7
1341    asr   r0, r0, #1
1342    bx    lr
1343match_3:
1344    sub   r0, r7
1345    asr   r0, r0, #1
1346    bx    lr
1347
1348
1349/* ------------------------------ */
1350    .balign 4
1351    .global dvmCompiler_TEMPLATE_INTERPRET
1352dvmCompiler_TEMPLATE_INTERPRET:
1353/* File: armv5te/TEMPLATE_INTERPRET.S */
1354    /*
1355     * This handler transfers control to the interpeter without performing
1356     * any lookups.  It may be called either as part of a normal chaining
1357     * operation, or from the transition code in header.S.  We distinquish
1358     * the two cases by looking at the link register.  If called from a
1359     * translation chain, it will point to the chaining Dalvik PC -3.
1360     * On entry:
1361     *    lr - if NULL:
1362     *        r1 - the Dalvik PC to begin interpretation.
1363     *    else
1364     *        [lr, #3] contains Dalvik PC to begin interpretation
1365     *    rGLUE - pointer to interpState
1366     *    rFP - Dalvik frame pointer
1367     */
1368    cmp     lr, #0
1369    ldrne   r1,[lr, #3]
1370    ldr     r2, .LinterpPunt
1371    mov     r0, r1                       @ set Dalvik PC
1372    bx      r2
1373    @ doesn't return
1374
1375.LinterpPunt:
1376    .word   dvmJitToInterpPunt
1377
1378/* ------------------------------ */
1379    .balign 4
1380    .global dvmCompiler_TEMPLATE_MONITOR_ENTER
1381dvmCompiler_TEMPLATE_MONITOR_ENTER:
1382/* File: armv5te/TEMPLATE_MONITOR_ENTER.S */
1383    /*
1384     * Call out to the runtime to lock an object.  Because this thread
1385     * may have been suspended in THREAD_MONITOR state and the Jit's
1386     * translation cache subsequently cleared, we cannot return directly.
1387     * Instead, unconditionally transition to the interpreter to resume.
1388     *
1389     * On entry:
1390     *    r0 - self pointer
1391     *    r1 - the object (which has already been null-checked by the caller
1392     *    r4 - the Dalvik PC of the following instruction.
1393     */
1394    ldr     r2, .LdvmLockObject
1395    mov     r3, #0                       @ Record that we're not returning
1396    str     r3, [r0, #offThread_inJitCodeCache]
1397    blx     r2                           @ dvmLockObject(self, obj)
1398    @ refresh Jit's on/off status
1399    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1400    ldr     r0, [r0]
1401    ldr     r2, .LdvmJitToInterpNoChain
1402    str     r0, [rGLUE, #offGlue_pJitProfTable]
1403    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1404#if defined(WITH_JIT_TUNING)
1405    mov     r0, #kHeavyweightMonitor
1406#endif
1407    bx      r2
1408
1409
1410/* ------------------------------ */
1411    .balign 4
1412    .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
1413dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
1414/* File: armv5te/TEMPLATE_MONITOR_ENTER_DEBUG.S */
1415    /*
1416     * To support deadlock prediction, this version of MONITOR_ENTER
1417     * will always call the heavyweight dvmLockObject, check for an
1418     * exception and then bail out to the interpreter.
1419     *
1420     * On entry:
1421     *    r0 - self pointer
1422     *    r1 - the object (which has already been null-checked by the caller
1423     *    r4 - the Dalvik PC of the following instruction.
1424     *
1425     */
1426    ldr     r2, .LdvmLockObject
1427    mov     r3, #0                       @ Record that we're not returning
1428    str     r3, [r0, #offThread_inJitCodeCache]
1429    blx     r2             @ dvmLockObject(self, obj)
1430    @ refresh Jit's on/off status & test for exception
1431    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1432    ldr     r1, [rGLUE, #offGlue_self]
1433    ldr     r0, [r0]
1434    ldr     r1, [r1, #offThread_exception]
1435    str     r0, [rGLUE, #offGlue_pJitProfTable]
1436    cmp     r1, #0
1437    beq     1f
1438    ldr     r2, .LhandleException
1439    sub     r0, r4, #2     @ roll dPC back to this monitor instruction
1440    bx      r2
14411:
1442    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1443#if defined(WITH_JIT_TUNING)
1444    mov     r0, #kHeavyweightMonitor
1445#endif
1446    ldr     pc, .LdvmJitToInterpNoChain
1447
1448    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
1449/* File: armv5te/footer.S */
1450/*
1451 * ===========================================================================
1452 *  Common subroutines and data
1453 * ===========================================================================
1454 */
1455
1456    .text
1457    .align  2
1458.LinvokeNative:
1459    @ Prep for the native call
1460    @ r1 = newFP, r0 = methodToCall
1461    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
1462    mov     r2, #0
1463    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
1464    str     r2, [r3, #offThread_inJitCodeCache] @ not in jit code cache
1465    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
1466    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
1467                                        @ newFp->localRefCookie=top
1468    mov     r9, r3                      @ r9<- glue->self (preserve)
1469    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
1470
1471    mov     r2, r0                      @ r2<- methodToCall
1472    mov     r0, r1                      @ r0<- newFP
1473    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
1474
1475    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
1476
1477    @ Refresh Jit's on/off status
1478    ldr     r3, [rGLUE, #offGlue_ppJitProfTable]
1479
1480    @ native return; r9=self, r10=newSaveArea
1481    @ equivalent to dvmPopJniLocals
1482    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
1483    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
1484    ldr     r1, [r9, #offThread_exception] @ check for exception
1485    ldr     r3, [r3]    @ r1 <- pointer to Jit profile table
1486    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
1487    cmp     r1, #0                      @ null?
1488    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
1489    ldr     r0, [r10, #offStackSaveArea_savedPc] @ reload rPC
1490    str     r3, [rGLUE, #offGlue_pJitProfTable]  @ cache current JitProfTable
1491
1492    @ r0 = dalvikCallsitePC
1493    bne     .LhandleException           @ no, handle exception
1494
1495    str     r2, [r9, #offThread_inJitCodeCache] @ set the new mode
1496    cmp     r2, #0                      @ return chaining cell still exists?
1497    bxne    r2                          @ yes - go ahead
1498
1499    @ continue executing the next instruction through the interpreter
1500    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
1501    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
1502#if defined(WITH_JIT_TUNING)
1503    mov     r0, #kCallsiteInterpreted
1504#endif
1505    mov     pc, r1
1506
1507/*
1508 * On entry:
1509 * r0  Faulting Dalvik PC
1510 */
1511.LhandleException:
1512#if defined(WITH_SELF_VERIFICATION)
1513    ldr     pc, .LdeadFood @ should not see this under self-verification mode
1514.LdeadFood:
1515    .word   0xdeadf00d
1516#endif
1517    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
1518    mov     r2, #0
1519    str     r2, [r3, #offThread_inJitCodeCache] @ in interpreter land
1520    ldr     r1, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
1521    ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
1522    mov     rPC, r0                 @ reload the faulting Dalvik address
1523    mov     pc, r1                  @ branch to dvmMterpCommonExceptionThrown
1524
1525    .align  2
1526.LdvmAsmInstructionStart:
1527    .word   dvmAsmInstructionStart
1528.LdvmJitToInterpTraceSelectNoChain:
1529    .word   dvmJitToInterpTraceSelectNoChain
1530.LdvmJitToInterpNoChain:
1531    .word   dvmJitToInterpNoChain
1532.LdvmMterpStdBail:
1533    .word   dvmMterpStdBail
1534.LdvmMterpCommonExceptionThrown:
1535    .word   dvmMterpCommonExceptionThrown
1536.LdvmLockObject:
1537    .word   dvmLockObject
1538#if defined(WITH_JIT_TUNING)
1539.LdvmICHitCount:
1540    .word   gDvmICHitCount
1541#endif
1542#if defined(WITH_SELF_VERIFICATION)
1543.LdvmSelfVerificationMemOpDecode:
1544    .word   dvmSelfVerificationMemOpDecode
1545#endif
1546.L__aeabi_cdcmple:
1547    .word   __aeabi_cdcmple
1548.L__aeabi_cfcmple:
1549    .word   __aeabi_cfcmple
1550
1551    .global dmvCompilerTemplateEnd
1552dmvCompilerTemplateEnd:
1553
1554#endif /* WITH_JIT */
1555