CompilerTemplateAsm-armv7-a-neon.S revision 7a2697d327936e20ef5484f7819e2e4bf91c891f
1/*
2 * This file was generated automatically by gen-template.py for 'armv7-a-neon'.
3 *
4 * --> DO NOT EDIT <--
5 */
6
7/* File: armv5te/header.S */
8/*
9 * Copyright (C) 2008 The Android Open Source Project
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24#if defined(WITH_JIT)
25
26/*
27 * ARMv5 definitions and declarations.
28 */
29
30/*
31ARM EABI general notes:
32
33r0-r3 hold first 4 args to a method; they are not preserved across method calls
34r4-r8 are available for general use
35r9 is given special treatment in some situations, but not for us
36r10 (sl) seems to be generally available
37r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
38r12 (ip) is scratch -- not preserved across method calls
39r13 (sp) should be managed carefully in case a signal arrives
40r14 (lr) must be preserved
41r15 (pc) can be tinkered with directly
42
43r0 holds returns of <= 4 bytes
44r0-r1 hold returns of 8 bytes, low word in r0
45
46Callee must save/restore r4+ (except r12) if it modifies them.
47
48Stack is "full descending".  Only the arguments that don't fit in the first 4
49registers are placed on the stack.  "sp" points at the first stacked argument
50(i.e. the 5th arg).
51
52VFP: single-precision results in s0, double-precision results in d0.
53
54In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
5564-bit quantities (long long, double) must be 64-bit aligned.
56*/
57
58/*
59JIT and ARM notes:
60
61The following registers have fixed assignments:
62
63  reg nick      purpose
64  r5  rFP       interpreted frame pointer, used for accessing locals and args
65  r6  rGLUE     MterpGlue pointer
66
67The following registers have fixed assignments in mterp but are scratch
68registers in compiled code
69
70  reg nick      purpose
71  r4  rPC       interpreted program counter, used for fetching instructions
72  r7  rINST     first 16-bit code unit of current instruction
73  r8  rIBASE    interpreted instruction base pointer, used for computed goto
74
75Macros are provided for common operations.  Each macro MUST emit only
76one instruction to make instruction-counting easier.  They MUST NOT alter
77unspecified registers or condition codes.
78*/
79
80/* single-purpose registers, given names for clarity */
81#define rPC     r4
82#define rFP     r5
83#define rGLUE   r6
84#define rINST   r7
85#define rIBASE  r8
86
87/*
88 * Given a frame pointer, find the stack save area.
89 *
90 * In C this is "((StackSaveArea*)(_fp) -1)".
91 */
92#define SAVEAREA_FROM_FP(_reg, _fpreg) \
93    sub     _reg, _fpreg, #sizeofStackSaveArea
94
95#define EXPORT_PC() \
96    str     rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
97
98/*
99 * This is a #include, not a %include, because we want the C pre-processor
100 * to expand the macros into assembler assignment statements.
101 */
102#include "../../../mterp/common/asm-constants.h"
103
104/* File: armv5te-vfp/platform.S */
105/*
106 * ===========================================================================
107 *  CPU-version-specific defines and utility
108 * ===========================================================================
109 */
110
111/*
112 * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
113 * Jump to subroutine.
114 *
115 * May modify IP and LR.
116 */
117.macro  LDR_PC_LR source
118    mov     lr, pc
119    ldr     pc, \source
120.endm
121
122
123    .global dvmCompilerTemplateStart
124    .type   dvmCompilerTemplateStart, %function
125    .text
126
127dvmCompilerTemplateStart:
128
129/* ------------------------------ */
130    .balign 4
131    .global dvmCompiler_TEMPLATE_CMP_LONG
132dvmCompiler_TEMPLATE_CMP_LONG:
133/* File: armv5te/TEMPLATE_CMP_LONG.S */
134    /*
135     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
136     * register based on the results of the comparison.
137     *
138     * We load the full values with LDM, but in practice many values could
139     * be resolved by only looking at the high word.  This could be made
140     * faster or slower by splitting the LDM into a pair of LDRs.
141     *
142     * If we just wanted to set condition flags, we could do this:
143     *  subs    ip, r0, r2
144     *  sbcs    ip, r1, r3
145     *  subeqs  ip, r0, r2
146     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
147     * integer value, which we can do with 2 conditional mov/mvn instructions
148     * (set 1, set -1; if they're equal we already have 0 in ip), giving
149     * us a constant 5-cycle path plus a branch at the end to the
150     * instruction epilogue code.  The multi-compare approach below needs
151     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
152     * in the worst case (the 64-bit values are equal).
153     */
154    /* cmp-long vAA, vBB, vCC */
155    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
156    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
157    bgt     .LTEMPLATE_CMP_LONG_greater
158    subs    r0, r0, r2                  @ r0<- r0 - r2
159    bxeq     lr
160    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
161.LTEMPLATE_CMP_LONG_less:
162    mvn     r0, #0                      @ r0<- -1
163    bx      lr
164.LTEMPLATE_CMP_LONG_greater:
165    mov     r0, #1                      @ r0<- 1
166    bx      lr
167
168/* ------------------------------ */
169    .balign 4
170    .global dvmCompiler_TEMPLATE_RETURN
171dvmCompiler_TEMPLATE_RETURN:
172/* File: armv5te/TEMPLATE_RETURN.S */
173    /*
174     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
175     * If the stored value in returnAddr
176     * is non-zero, the caller is compiled by the JIT thus return to the
177     * address in the code cache following the invoke instruction. Otherwise
178     * return to the special dvmJitToInterpNoChain entry point.
179     */
180    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
181    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
182    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
183    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
184#if !defined(WITH_SELF_VERIFICATION)
185    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
186#else
187    mov     r9, #0                      @ disable chaining
188#endif
189    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
190                                        @ r2<- method we're returning to
191    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
192    cmp     r2, #0                      @ break frame?
193#if !defined(WITH_SELF_VERIFICATION)
194    beq     1f                          @ bail to interpreter
195#else
196    blxeq   lr                          @ punt to interpreter and compare state
197#endif
198    ldr     r1, .LdvmJitToInterpNoChainNoProfile @ defined in footer.S
199    mov     rFP, r10                    @ publish new FP
200    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
201    ldr     r8, [r8]                    @ r8<- suspendCount
202
203    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
204    ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
205    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
206    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
207    str     r0, [rGLUE, #offGlue_methodClassDex]
208    cmp     r8, #0                      @ check the suspendCount
209    movne   r9, #0                      @ clear the chaining cell address
210    str     r9, [r3, #offThread_inJitCodeCache] @ in code cache or not
211    cmp     r9, #0                      @ chaining cell exists?
212    blxne   r9                          @ jump to the chaining cell
213#if defined(WITH_JIT_TUNING)
214    mov     r0, #kCallsiteInterpreted
215#endif
216    mov     pc, r1                      @ callsite is interpreted
2171:
218    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
219    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
220    mov     r1, #0                      @ changeInterp = false
221    mov     r0, rGLUE                   @ Expecting rGLUE in r0
222    blx     r2                          @ exit the interpreter
223
224/* ------------------------------ */
225    .balign 4
226    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
227dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
228/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
229    /*
230     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
231     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
232     * runtime-resolved callee.
233     */
234    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
235    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
236    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
237    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
238    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
239    add     r3, r1, #1  @ Thumb addr is odd
240    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
241    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
242    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
243    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
244    ldr     r8, [r8]                    @ r8<- suspendCount (int)
245    cmp     r10, r9                     @ bottom < interpStackEnd?
246    bxlo    lr                          @ return to raise stack overflow excep.
247    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
248    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
249    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
250    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
251    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
252    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
253
254
255    @ set up newSaveArea
256    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
257    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
258    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
259    cmp     r8, #0                      @ suspendCount != 0
260    bxne    lr                          @ bail to the interpreter
261    tst     r10, #ACC_NATIVE
262#if !defined(WITH_SELF_VERIFICATION)
263    bne     .LinvokeNative
264#else
265    bxne    lr                          @ bail to the interpreter
266#endif
267
268    ldr     r10, .LdvmJitToInterpTraceSelectNoChain
269    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
270    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
271
272    @ Update "glue" values for the new method
273    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
274    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
275    mov     rFP, r1                         @ fp = newFp
276    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
277
278    @ Start executing the callee
279#if defined(WITH_JIT_TUNING)
280    mov     r0, #kInlineCacheMiss
281#endif
282    mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
283
284/* ------------------------------ */
285    .balign 4
286    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
287dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
288/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
289    /*
290     * For monomorphic callsite, setup the Dalvik frame and return to the
291     * Thumb code through the link register to transfer control to the callee
292     * method through a dedicated chaining cell.
293     */
294    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
295    @ methodToCall is guaranteed to be non-native
296.LinvokeChain:
297    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
298    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
299    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
300    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
301    add     r3, r1, #1  @ Thumb addr is odd
302    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
303    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
304    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
305    add     r12, lr, #2                 @ setup the punt-to-interp address
306    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
307    ldr     r8, [r8]                    @ r8<- suspendCount (int)
308    cmp     r10, r9                     @ bottom < interpStackEnd?
309    bxlo    r12                         @ return to raise stack overflow excep.
310    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
311    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
312    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
313    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
314    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
315
316
317    @ set up newSaveArea
318    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
319    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
320    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
321    cmp     r8, #0                      @ suspendCount != 0
322    bxne    r12                         @ bail to the interpreter
323
324    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
325    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
326
327    @ Update "glue" values for the new method
328    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
329    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
330    mov     rFP, r1                         @ fp = newFp
331    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
332
333    bx      lr                              @ return to the callee-chaining cell
334
335/* ------------------------------ */
336    .balign 4
337    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
338dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
339/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
340    /*
341     * For polymorphic callsite, check whether the cached class pointer matches
342     * the current one. If so setup the Dalvik frame and return to the
343     * Thumb code through the link register to transfer control to the callee
344     * method through a dedicated chaining cell.
345     *
346     * The predicted chaining cell is declared in ArmLIR.h with the
347     * following layout:
348     *
349     *  typedef struct PredictedChainingCell {
350     *      u4 branch;
351     *      const ClassObject *clazz;
352     *      const Method *method;
353     *      u4 counter;
354     *  } PredictedChainingCell;
355     *
356     * Upon returning to the callsite:
357     *    - lr  : to branch to the chaining cell
358     *    - lr+2: to punt to the interpreter
359     *    - lr+4: to fully resolve the callee and may rechain.
360     *            r3 <- class
361     *            r9 <- counter
362     */
363    @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
364    ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
365    ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
366    ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
367    ldr     r9, [rGLUE, #offGlue_icRechainCount]   @ r1 <- shared rechainCount
368    cmp     r3, r8          @ predicted class == actual class?
369#if defined(WITH_JIT_TUNING)
370    ldr     r7, .LdvmICHitCount
371    ldreq   r10, [r7, #0]
372    add     r10, r10, #1
373    streq   r10, [r7, #0]
374#endif
375    beq     .LinvokeChain   @ predicted chain is valid
376    ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
377    cmp     r8, #0          @ initialized class or not
378    moveq   r1, #0
379    subne   r1, r9, #1      @ count--
380    strne   r1, [rGLUE, #offGlue_icRechainCount]   @ write back to InterpState
381    add     lr, lr, #4      @ return to fully-resolve landing pad
382    /*
383     * r1 <- count
384     * r2 <- &predictedChainCell
385     * r3 <- this->class
386     * r4 <- dPC
387     * r7 <- this->class->vtable
388     */
389    bx      lr
390
391/* ------------------------------ */
392    .balign 4
393    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
394dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
395/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
396    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
397    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
398    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
399    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
400    add     r3, r1, #1  @ Thumb addr is odd
401    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
402    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
403    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
404    ldr     r8, [r8]                    @ r3<- suspendCount (int)
405    cmp     r10, r9                     @ bottom < interpStackEnd?
406    bxlo    lr                          @ return to raise stack overflow excep.
407    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
408    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
409    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
410    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
411
412
413    @ set up newSaveArea
414    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
415    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
416    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
417    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
418    cmp     r8, #0                      @ suspendCount != 0
419    ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
420#if !defined(WITH_SELF_VERIFICATION)
421    bxne    lr                          @ bail to the interpreter
422#else
423    bx      lr                          @ bail to interpreter unconditionally
424#endif
425
426    @ go ahead and transfer control to the native code
427    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
428    mov     r2, #0
429    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
430    str     r2, [r3, #offThread_inJitCodeCache] @ not in the jit code cache
431    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
432                                        @ newFp->localRefCookie=top
433    mov     r9, r3                      @ r9<- glue->self (preserve)
434    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
435
436    mov     r2, r0                      @ r2<- methodToCall
437    mov     r0, r1                      @ r0<- newFP
438    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
439
440    blx     r8                          @ off to the native code
441
442    @ native return; r9=self, r10=newSaveArea
443    @ equivalent to dvmPopJniLocals
444    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
445    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
446    ldr     r1, [r9, #offThread_exception] @ check for exception
447    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
448    cmp     r1, #0                      @ null?
449    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
450    ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
451
452    @ r0 = dalvikCallsitePC
453    bne     .LhandleException           @ no, handle exception
454
455    str     r2, [r9, #offThread_inJitCodeCache] @ set the mode properly
456    cmp     r2, #0                      @ return chaining cell still exists?
457    bxne    r2                          @ yes - go ahead
458
459    @ continue executing the next instruction through the interpreter
460    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
461    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
462#if defined(WITH_JIT_TUNING)
463    mov     r0, #kCallsiteInterpreted
464#endif
465    mov     pc, r1
466
467/* ------------------------------ */
468    .balign 4
469    .global dvmCompiler_TEMPLATE_MUL_LONG
470dvmCompiler_TEMPLATE_MUL_LONG:
471/* File: armv5te/TEMPLATE_MUL_LONG.S */
472    /*
473     * Signed 64-bit integer multiply.
474     *
475     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
476     *
477     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
478     *        WX
479     *      x YZ
480     *  --------
481     *     ZW ZX
482     *  YW YX
483     *
484     * The low word of the result holds ZX, the high word holds
485     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
486     * it doesn't fit in the low 64 bits.
487     *
488     * Unlike most ARM math operations, multiply instructions have
489     * restrictions on using the same register more than once (Rd and Rm
490     * cannot be the same).
491     */
492    /* mul-long vAA, vBB, vCC */
493    mul     ip, r2, r1                  @  ip<- ZxW
494    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
495    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
496    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
497    mov     r0,r9
498    mov     r1,r10
499    bx      lr
500
501/* ------------------------------ */
502    .balign 4
503    .global dvmCompiler_TEMPLATE_SHL_LONG
504dvmCompiler_TEMPLATE_SHL_LONG:
505/* File: armv5te/TEMPLATE_SHL_LONG.S */
506    /*
507     * Long integer shift.  This is different from the generic 32/64-bit
508     * binary operations because vAA/vBB are 64-bit but vCC (the shift
509     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
510     * 6 bits.
511     */
512    /* shl-long vAA, vBB, vCC */
513    and     r2, r2, #63                 @ r2<- r2 & 0x3f
514    mov     r1, r1, asl r2              @  r1<- r1 << r2
515    rsb     r3, r2, #32                 @  r3<- 32 - r2
516    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
517    subs    ip, r2, #32                 @  ip<- r2 - 32
518    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
519    mov     r0, r0, asl r2              @  r0<- r0 << r2
520    bx      lr
521
522/* ------------------------------ */
523    .balign 4
524    .global dvmCompiler_TEMPLATE_SHR_LONG
525dvmCompiler_TEMPLATE_SHR_LONG:
526/* File: armv5te/TEMPLATE_SHR_LONG.S */
527    /*
528     * Long integer shift.  This is different from the generic 32/64-bit
529     * binary operations because vAA/vBB are 64-bit but vCC (the shift
530     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
531     * 6 bits.
532     */
533    /* shr-long vAA, vBB, vCC */
534    and     r2, r2, #63                 @ r0<- r0 & 0x3f
535    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
536    rsb     r3, r2, #32                 @  r3<- 32 - r2
537    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
538    subs    ip, r2, #32                 @  ip<- r2 - 32
539    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
540    mov     r1, r1, asr r2              @  r1<- r1 >> r2
541    bx      lr
542
543/* ------------------------------ */
544    .balign 4
545    .global dvmCompiler_TEMPLATE_USHR_LONG
546dvmCompiler_TEMPLATE_USHR_LONG:
547/* File: armv5te/TEMPLATE_USHR_LONG.S */
548    /*
549     * Long integer shift.  This is different from the generic 32/64-bit
550     * binary operations because vAA/vBB are 64-bit but vCC (the shift
551     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
552     * 6 bits.
553     */
554    /* ushr-long vAA, vBB, vCC */
555    and     r2, r2, #63                 @ r0<- r0 & 0x3f
556    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
557    rsb     r3, r2, #32                 @  r3<- 32 - r2
558    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
559    subs    ip, r2, #32                 @  ip<- r2 - 32
560    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
561    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
562    bx      lr
563
564/* ------------------------------ */
565    .balign 4
566    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
567dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
568/* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
569/* File: armv5te-vfp/fbinop.S */
570    /*
571     * Generic 32-bit floating point operation.  Provide an "instr" line that
572     * specifies an instruction that performs s2 = s0 op s1.
573     *
574     * On entry:
575     *     r0 = target dalvik register address
576     *     r1 = op1 address
577     *     r2 = op2 address
578     */
579     flds    s0,[r1]
580     flds    s1,[r2]
581     fadds   s2, s0, s1
582     fsts    s2,[r0]
583     bx      lr
584
585
586/* ------------------------------ */
587    .balign 4
588    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
589dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
590/* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
591/* File: armv5te-vfp/fbinop.S */
592    /*
593     * Generic 32-bit floating point operation.  Provide an "instr" line that
594     * specifies an instruction that performs s2 = s0 op s1.
595     *
596     * On entry:
597     *     r0 = target dalvik register address
598     *     r1 = op1 address
599     *     r2 = op2 address
600     */
601     flds    s0,[r1]
602     flds    s1,[r2]
603     fsubs   s2, s0, s1
604     fsts    s2,[r0]
605     bx      lr
606
607
608/* ------------------------------ */
609    .balign 4
610    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
611dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
612/* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
613/* File: armv5te-vfp/fbinop.S */
614    /*
615     * Generic 32-bit floating point operation.  Provide an "instr" line that
616     * specifies an instruction that performs s2 = s0 op s1.
617     *
618     * On entry:
619     *     r0 = target dalvik register address
620     *     r1 = op1 address
621     *     r2 = op2 address
622     */
623     flds    s0,[r1]
624     flds    s1,[r2]
625     fmuls   s2, s0, s1
626     fsts    s2,[r0]
627     bx      lr
628
629
630/* ------------------------------ */
631    .balign 4
632    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
633dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
634/* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
635/* File: armv5te-vfp/fbinop.S */
636    /*
637     * Generic 32-bit floating point operation.  Provide an "instr" line that
638     * specifies an instruction that performs s2 = s0 op s1.
639     *
640     * On entry:
641     *     r0 = target dalvik register address
642     *     r1 = op1 address
643     *     r2 = op2 address
644     */
645     flds    s0,[r1]
646     flds    s1,[r2]
647     fdivs   s2, s0, s1
648     fsts    s2,[r0]
649     bx      lr
650
651
652/* ------------------------------ */
653    .balign 4
654    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
655dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
656/* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
657/* File: armv5te-vfp/fbinopWide.S */
658    /*
659     * Generic 64-bit floating point operation.  Provide an "instr" line that
660     * specifies an instruction that performs s2 = s0 op s1.
661     *
662     * On entry:
663     *     r0 = target dalvik register address
664     *     r1 = op1 address
665     *     r2 = op2 address
666     */
667     fldd    d0,[r1]
668     fldd    d1,[r2]
669     faddd   d2, d0, d1
670     fstd    d2,[r0]
671     bx      lr
672
673
674/* ------------------------------ */
675    .balign 4
676    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
677dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
678/* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
679/* File: armv5te-vfp/fbinopWide.S */
680    /*
681     * Generic 64-bit floating point operation.  Provide an "instr" line that
682     * specifies an instruction that performs s2 = s0 op s1.
683     *
684     * On entry:
685     *     r0 = target dalvik register address
686     *     r1 = op1 address
687     *     r2 = op2 address
688     */
689     fldd    d0,[r1]
690     fldd    d1,[r2]
691     fsubd   d2, d0, d1
692     fstd    d2,[r0]
693     bx      lr
694
695
696/* ------------------------------ */
697    .balign 4
698    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
699dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
700/* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
701/* File: armv5te-vfp/fbinopWide.S */
702    /*
703     * Generic 64-bit floating point operation.  Provide an "instr" line that
704     * specifies an instruction that performs s2 = s0 op s1.
705     *
706     * On entry:
707     *     r0 = target dalvik register address
708     *     r1 = op1 address
709     *     r2 = op2 address
710     */
711     fldd    d0,[r1]
712     fldd    d1,[r2]
713     fmuld   d2, d0, d1
714     fstd    d2,[r0]
715     bx      lr
716
717
718/* ------------------------------ */
719    .balign 4
720    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
721dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
722/* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
723/* File: armv5te-vfp/fbinopWide.S */
724    /*
725     * Generic 64-bit floating point operation.  Provide an "instr" line that
726     * specifies an instruction that performs s2 = s0 op s1.
727     *
728     * On entry:
729     *     r0 = target dalvik register address
730     *     r1 = op1 address
731     *     r2 = op2 address
732     */
733     fldd    d0,[r1]
734     fldd    d1,[r2]
735     fdivd   d2, d0, d1
736     fstd    d2,[r0]
737     bx      lr
738
739
740/* ------------------------------ */
741    .balign 4
742    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
743dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
744/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
745/* File: armv5te-vfp/funopNarrower.S */
746    /*
747     * Generic 64bit-to-32bit floating point unary operation.  Provide an
748     * "instr" line that specifies an instruction that performs "s0 = op d0".
749     *
750     * For: double-to-int, double-to-float
751     *
752     * On entry:
753     *     r0 = target dalvik register address
754     *     r1 = src dalvik register address
755     */
756    /* unop vA, vB */
757    fldd    d0, [r1]                    @ d0<- vB
758    fcvtsd  s0, d0                              @ s0<- op d0
759    fsts    s0, [r0]                    @ vA<- s0
760    bx      lr
761
762
763/* ------------------------------ */
764    .balign 4
765    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
766dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
767/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
768/* File: armv5te-vfp/funopNarrower.S */
769    /*
770     * Generic 64bit-to-32bit floating point unary operation.  Provide an
771     * "instr" line that specifies an instruction that performs "s0 = op d0".
772     *
773     * For: double-to-int, double-to-float
774     *
775     * On entry:
776     *     r0 = target dalvik register address
777     *     r1 = src dalvik register address
778     */
779    /* unop vA, vB */
780    fldd    d0, [r1]                    @ d0<- vB
781    ftosizd  s0, d0                              @ s0<- op d0
782    fsts    s0, [r0]                    @ vA<- s0
783    bx      lr
784
785
786/* ------------------------------ */
787    .balign 4
788    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
789dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
790/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
791/* File: armv5te-vfp/funopWider.S */
792    /*
793     * Generic 32bit-to-64bit floating point unary operation.  Provide an
794     * "instr" line that specifies an instruction that performs "d0 = op s0".
795     *
796     * For: int-to-double, float-to-double
797     *
798     * On entry:
799     *     r0 = target dalvik register address
800     *     r1 = src dalvik register address
801     */
802    /* unop vA, vB */
803    flds    s0, [r1]                    @ s0<- vB
804    fcvtds  d0, s0                              @ d0<- op s0
805    fstd    d0, [r0]                    @ vA<- d0
806    bx      lr
807
808
809/* ------------------------------ */
810    .balign 4
811    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
812dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
813/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
814/* File: armv5te-vfp/funop.S */
815    /*
816     * Generic 32bit-to-32bit floating point unary operation.  Provide an
817     * "instr" line that specifies an instruction that performs "s1 = op s0".
818     *
819     * For: float-to-int, int-to-float
820     *
821     * On entry:
822     *     r0 = target dalvik register address
823     *     r1 = src dalvik register address
824     */
825    /* unop vA, vB */
826    flds    s0, [r1]                    @ s0<- vB
827    ftosizs s1, s0                              @ s1<- op s0
828    fsts    s1, [r0]                    @ vA<- s1
829    bx      lr
830
831
832/* ------------------------------ */
833    .balign 4
834    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
835dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
836/* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
837/* File: armv5te-vfp/funopWider.S */
838    /*
839     * Generic 32bit-to-64bit floating point unary operation.  Provide an
840     * "instr" line that specifies an instruction that performs "d0 = op s0".
841     *
842     * For: int-to-double, float-to-double
843     *
844     * On entry:
845     *     r0 = target dalvik register address
846     *     r1 = src dalvik register address
847     */
848    /* unop vA, vB */
849    flds    s0, [r1]                    @ s0<- vB
850    fsitod  d0, s0                              @ d0<- op s0
851    fstd    d0, [r0]                    @ vA<- d0
852    bx      lr
853
854
855/* ------------------------------ */
856    .balign 4
857    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
858dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
859/* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
860/* File: armv5te-vfp/funop.S */
861    /*
862     * Generic 32bit-to-32bit floating point unary operation.  Provide an
863     * "instr" line that specifies an instruction that performs "s1 = op s0".
864     *
865     * For: float-to-int, int-to-float
866     *
867     * On entry:
868     *     r0 = target dalvik register address
869     *     r1 = src dalvik register address
870     */
871    /* unop vA, vB */
872    flds    s0, [r1]                    @ s0<- vB
873    fsitos  s1, s0                              @ s1<- op s0
874    fsts    s1, [r0]                    @ vA<- s1
875    bx      lr
876
877
878/* ------------------------------ */
879    .balign 4
880    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
881dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
882/* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
883    /*
884     * Compare two floating-point values.  Puts 0, 1, or -1 into the
885     * destination register based on the results of the comparison.
886     *
887     * int compare(x, y) {
888     *     if (x == y) {
889     *         return 0;
890     *     } else if (x < y) {
891     *         return -1;
892     *     } else if (x > y) {
893     *         return 1;
894     *     } else {
895     *         return 1;
896     *     }
897     * }
898     *
899     * On entry:
900     *    r0 = &op1 [vBB]
901     *    r1 = &op2 [vCC]
902     */
903    /* op vAA, vBB, vCC */
904    fldd    d0, [r0]                    @ d0<- vBB
905    fldd    d1, [r1]                    @ d1<- vCC
906    fcmpd  d0, d1                       @ compare (vBB, vCC)
907    mov     r0, #1                      @ r0<- 1 (default)
908    fmstat                              @ export status flags
909    mvnmi   r0, #0                      @ (less than) r0<- -1
910    moveq   r0, #0                      @ (equal) r0<- 0
911    bx      lr
912
913/* ------------------------------ */
914    .balign 4
915    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
916dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
917/* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
918    /*
919     * Compare two floating-point values.  Puts 0, 1, or -1 into the
920     * destination register based on the results of the comparison.
921     *
922     * int compare(x, y) {
923     *     if (x == y) {
924     *         return 0;
925     *     } else if (x > y) {
926     *         return 1;
927     *     } else if (x < y) {
928     *         return -1;
929     *     } else {
930     *         return -1;
931     *     }
932     * }
933     * On entry:
934     *    r0 = &op1 [vBB]
935     *    r1 = &op2 [vCC]
936     */
937    /* op vAA, vBB, vCC */
938    fldd    d0, [r0]                    @ d0<- vBB
939    fldd    d1, [r1]                    @ d1<- vCC
940    fcmped  d0, d1                      @ compare (vBB, vCC)
941    mvn     r0, #0                      @ r0<- -1 (default)
942    fmstat                              @ export status flags
943    movgt   r0, #1                      @ (greater than) r0<- 1
944    moveq   r0, #0                      @ (equal) r0<- 0
945    bx      lr
946
947/* ------------------------------ */
948    .balign 4
949    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
950dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
951/* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
952    /*
953     * Compare two floating-point values.  Puts 0, 1, or -1 into the
954     * destination register based on the results of the comparison.
955     *
956     * int compare(x, y) {
957     *     if (x == y) {
958     *         return 0;
959     *     } else if (x < y) {
960     *         return -1;
961     *     } else if (x > y) {
962     *         return 1;
963     *     } else {
964     *         return 1;
965     *     }
966     * }
967     * On entry:
968     *    r0 = &op1 [vBB]
969     *    r1 = &op2 [vCC]
970     */
971    /* op vAA, vBB, vCC */
972    flds    s0, [r0]                    @ d0<- vBB
973    flds    s1, [r1]                    @ d1<- vCC
974    fcmps  s0, s1                      @ compare (vBB, vCC)
975    mov     r0, #1                      @ r0<- 1 (default)
976    fmstat                              @ export status flags
977    mvnmi   r0, #0                      @ (less than) r0<- -1
978    moveq   r0, #0                      @ (equal) r0<- 0
979    bx      lr
980
981/* ------------------------------ */
982    .balign 4
983    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
984dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
985/* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
986    /*
987     * Compare two floating-point values.  Puts 0, 1, or -1 into the
988     * destination register based on the results of the comparison.
989     *
990     * int compare(x, y) {
991     *     if (x == y) {
992     *         return 0;
993     *     } else if (x > y) {
994     *         return 1;
995     *     } else if (x < y) {
996     *         return -1;
997     *     } else {
998     *         return -1;
999     *     }
1000     * }
1001     * On entry:
1002     *    r0 = &op1 [vBB]
1003     *    r1 = &op2 [vCC]
1004     */
1005    /* op vAA, vBB, vCC */
1006    flds    s0, [r0]                    @ d0<- vBB
1007    flds    s1, [r1]                    @ d1<- vCC
1008    fcmps  s0, s1                      @ compare (vBB, vCC)
1009    mvn     r0, #0                      @ r0<- -1 (default)
1010    fmstat                              @ export status flags
1011    movgt   r0, #1                      @ (greater than) r0<- 1
1012    moveq   r0, #0                      @ (equal) r0<- 0
1013    bx      lr
1014
1015/* ------------------------------ */
1016    .balign 4
1017    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
1018dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
1019/* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
1020    /*
1021     * 64-bit floating point vfp sqrt operation.
1022     * If the result is a NaN, bail out to library code to do
1023     * the right thing.
1024     *
1025     * On entry:
1026     *     r2 src addr of op1
1027     * On exit:
1028     *     r0,r1 = res
1029     */
1030    fldd    d0, [r2]
1031    fsqrtd  d1, d0
1032    fcmpd   d1, d1
1033    fmstat
1034    fmrrd   r0, r1, d1
1035    bxeq    lr   @ Result OK - return
1036    ldr     r2, .Lsqrt
1037    fmrrd   r0, r1, d0   @ reload orig operand
1038    bx      r2   @ tail call to sqrt library routine
1039
1040.Lsqrt:
1041    .word   sqrt
1042
1043/* ------------------------------ */
1044    .balign 4
1045    .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
1046dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
1047/* File: armv5te/TEMPLATE_THROW_EXCEPTION_COMMON.S */
1048    /*
1049     * Throw an exception from JIT'ed code.
1050     * On entry:
1051     *    r0    Dalvik PC that raises the exception
1052     */
1053    b       .LhandleException
1054
1055/* ------------------------------ */
1056    .balign 4
1057    .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
1058dvmCompiler_TEMPLATE_MEM_OP_DECODE:
1059/* File: armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S */
1060#if defined(WITH_SELF_VERIFICATION)
1061    /*
1062     * This handler encapsulates heap memory ops for selfVerification mode.
1063     *
1064     * The call to the handler is inserted prior to a heap memory operation.
1065     * This handler then calls a function to decode the memory op, and process
1066     * it accordingly. Afterwards, the handler changes the return address to
1067     * skip the memory op so it never gets executed.
1068     */
1069    vpush   {d0-d15}                    @ save out all fp registers
1070    push    {r0-r12,lr}                 @ save out all registers
1071    mov     r0, lr                      @ arg0 <- link register
1072    mov     r1, sp                      @ arg1 <- stack pointer
1073    ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
1074    blx     r2                          @ decode and handle the mem op
1075    pop     {r0-r12,lr}                 @ restore all registers
1076    vpop    {d0-d15}                    @ restore all fp registers
1077    bx      lr                          @ return to compiled code
1078#endif
1079
1080/* ------------------------------ */
1081    .balign 4
1082    .global dvmCompiler_TEMPLATE_STRING_COMPARETO
1083dvmCompiler_TEMPLATE_STRING_COMPARETO:
1084/* File: armv5te/TEMPLATE_STRING_COMPARETO.S */
1085    /*
1086     * String's compareTo.
1087     *
1088     * Requires r0/r1 to have been previously checked for null.  Will
1089     * return negative if this's string is < comp, 0 if they are the
1090     * same and positive if >.
1091     *
1092     * IMPORTANT NOTE:
1093     *
1094     * This code relies on hard-coded offsets for string objects, and must be
1095     * kept in sync with definitions in UtfString.h.  See asm-constants.h
1096     *
1097     * On entry:
1098     *    r0:   this object pointer
1099     *    r1:   comp object pointer
1100     *
1101     */
1102
1103    mov    r2, r0         @ this to r2, opening up r0 for return value
1104    subs   r0, r2, r1     @ Same?
1105    bxeq   lr
1106
1107    ldr    r4, [r2, #STRING_FIELDOFF_OFFSET]
1108    ldr    r9, [r1, #STRING_FIELDOFF_OFFSET]
1109    ldr    r7, [r2, #STRING_FIELDOFF_COUNT]
1110    ldr    r10, [r1, #STRING_FIELDOFF_COUNT]
1111    ldr    r2, [r2, #STRING_FIELDOFF_VALUE]
1112    ldr    r1, [r1, #STRING_FIELDOFF_VALUE]
1113
1114    /*
1115     * At this point, we have:
1116     *    value:  r2/r1
1117     *    offset: r4/r9
1118     *    count:  r7/r10
1119     * We're going to compute
1120     *    r11 <- countDiff
1121     *    r10 <- minCount
1122     */
1123     subs  r11, r7, r10
1124     movls r10, r7
1125
1126     /* Now, build pointers to the string data */
1127     add   r2, r2, r4, lsl #1
1128     add   r1, r1, r9, lsl #1
1129     /*
1130      * Note: data pointers point to previous element so we can use pre-index
1131      * mode with base writeback.
1132      */
1133     add   r2, #16-2   @ offset to contents[-1]
1134     add   r1, #16-2   @ offset to contents[-1]
1135
1136     /*
1137      * At this point we have:
1138      *   r2: *this string data
1139      *   r1: *comp string data
1140      *   r10: iteration count for comparison
1141      *   r11: value to return if the first part of the string is equal
1142      *   r0: reserved for result
1143      *   r3, r4, r7, r8, r9, r12 available for loading string data
1144      */
1145
1146    subs  r10, #2
1147    blt   do_remainder2
1148
1149      /*
1150       * Unroll the first two checks so we can quickly catch early mismatch
1151       * on long strings (but preserve incoming alignment)
1152       */
1153
1154    ldrh  r3, [r2, #2]!
1155    ldrh  r4, [r1, #2]!
1156    ldrh  r7, [r2, #2]!
1157    ldrh  r8, [r1, #2]!
1158    subs  r0, r3, r4
1159    subeqs  r0, r7, r8
1160    bxne  lr
1161    cmp   r10, #28
1162    bgt   do_memcmp16
1163    subs  r10, #3
1164    blt   do_remainder
1165
1166loopback_triple:
1167    ldrh  r3, [r2, #2]!
1168    ldrh  r4, [r1, #2]!
1169    ldrh  r7, [r2, #2]!
1170    ldrh  r8, [r1, #2]!
1171    ldrh  r9, [r2, #2]!
1172    ldrh  r12,[r1, #2]!
1173    subs  r0, r3, r4
1174    subeqs  r0, r7, r8
1175    subeqs  r0, r9, r12
1176    bxne  lr
1177    subs  r10, #3
1178    bge   loopback_triple
1179
1180do_remainder:
1181    adds  r10, #3
1182    beq   returnDiff
1183
1184loopback_single:
1185    ldrh  r3, [r2, #2]!
1186    ldrh  r4, [r1, #2]!
1187    subs  r0, r3, r4
1188    bxne  lr
1189    subs  r10, #1
1190    bne     loopback_single
1191
1192returnDiff:
1193    mov   r0, r11
1194    bx    lr
1195
1196do_remainder2:
1197    adds  r10, #2
1198    bne   loopback_single
1199    mov   r0, r11
1200    bx    lr
1201
1202    /* Long string case */
1203do_memcmp16:
1204    mov   r4, lr
1205    ldr   lr, .Lmemcmp16
1206    mov   r7, r11
1207    add   r0, r2, #2
1208    add   r1, r1, #2
1209    mov   r2, r10
1210    blx   lr
1211    cmp   r0, #0
1212    bxne  r4
1213    mov   r0, r7
1214    bx    r4
1215
1216.Lmemcmp16:
1217    .word __memcmp16
1218
1219/* ------------------------------ */
1220    .balign 4
1221    .global dvmCompiler_TEMPLATE_STRING_INDEXOF
1222dvmCompiler_TEMPLATE_STRING_INDEXOF:
1223/* File: armv5te/TEMPLATE_STRING_INDEXOF.S */
1224    /*
1225     * String's indexOf.
1226     *
1227     * Requires r0 to have been previously checked for null.  Will
1228     * return index of match of r1 in r0.
1229     *
1230     * IMPORTANT NOTE:
1231     *
1232     * This code relies on hard-coded offsets for string objects, and must be
1233     * kept in sync wth definitions in UtfString.h  See asm-constants.h
1234     *
1235     * On entry:
1236     *    r0:   string object pointer
1237     *    r1:   char to match
1238     *    r2:   Starting offset in string data
1239     */
1240
1241    ldr    r7, [r0, #STRING_FIELDOFF_OFFSET]
1242    ldr    r8, [r0, #STRING_FIELDOFF_COUNT]
1243    ldr    r0, [r0, #STRING_FIELDOFF_VALUE]
1244
1245    /*
1246     * At this point, we have:
1247     *    r0: object pointer
1248     *    r1: char to match
1249     *    r2: starting offset
1250     *    r7: offset
1251     *    r8: string length
1252     */
1253
1254     /* Build pointer to start of string data */
1255     add   r0, #16
1256     add   r0, r0, r7, lsl #1
1257
1258     /* Save a copy of starting data in r7 */
1259     mov   r7, r0
1260
1261     /* Clamp start to [0..count] */
1262     cmp   r2, #0
1263     movlt r2, #0
1264     cmp   r2, r8
1265     movgt r2, r8
1266
1267     /* Build pointer to start of data to compare and pre-bias */
1268     add   r0, r0, r2, lsl #1
1269     sub   r0, #2
1270
1271     /* Compute iteration count */
1272     sub   r8, r2
1273
1274     /*
1275      * At this point we have:
1276      *   r0: start of data to test
1277      *   r1: chat to compare
1278      *   r8: iteration count
1279      *   r7: original start of string
1280      *   r3, r4, r9, r10, r11, r12 available for loading string data
1281      */
1282
1283    subs  r8, #4
1284    blt   indexof_remainder
1285
1286indexof_loop4:
1287    ldrh  r3, [r0, #2]!
1288    ldrh  r4, [r0, #2]!
1289    ldrh  r10, [r0, #2]!
1290    ldrh  r11, [r0, #2]!
1291    cmp   r3, r1
1292    beq   match_0
1293    cmp   r4, r1
1294    beq   match_1
1295    cmp   r10, r1
1296    beq   match_2
1297    cmp   r11, r1
1298    beq   match_3
1299    subs  r8, #4
1300    bge   indexof_loop4
1301
1302indexof_remainder:
1303    adds    r8, #4
1304    beq     indexof_nomatch
1305
1306indexof_loop1:
1307    ldrh  r3, [r0, #2]!
1308    cmp   r3, r1
1309    beq   match_3
1310    subs  r8, #1
1311    bne   indexof_loop1
1312
1313indexof_nomatch:
1314    mov   r0, #-1
1315    bx    lr
1316
1317match_0:
1318    sub   r0, #6
1319    sub   r0, r7
1320    asr   r0, r0, #1
1321    bx    lr
1322match_1:
1323    sub   r0, #4
1324    sub   r0, r7
1325    asr   r0, r0, #1
1326    bx    lr
1327match_2:
1328    sub   r0, #2
1329    sub   r0, r7
1330    asr   r0, r0, #1
1331    bx    lr
1332match_3:
1333    sub   r0, r7
1334    asr   r0, r0, #1
1335    bx    lr
1336
1337/* ------------------------------ */
1338    .balign 4
1339    .global dvmCompiler_TEMPLATE_INTERPRET
1340dvmCompiler_TEMPLATE_INTERPRET:
1341/* File: armv5te/TEMPLATE_INTERPRET.S */
1342    /*
1343     * This handler transfers control to the interpeter without performing
1344     * any lookups.  It may be called either as part of a normal chaining
1345     * operation, or from the transition code in header.S.  We distinquish
1346     * the two cases by looking at the link register.  If called from a
1347     * translation chain, it will point to the chaining Dalvik PC -3.
1348     * On entry:
1349     *    lr - if NULL:
1350     *        r1 - the Dalvik PC to begin interpretation.
1351     *    else
1352     *        [lr, #3] contains Dalvik PC to begin interpretation
1353     *    rGLUE - pointer to interpState
1354     *    rFP - Dalvik frame pointer
1355     */
1356    cmp     lr, #0
1357    ldrne   r1,[lr, #3]
1358    ldr     r2, .LinterpPunt
1359    mov     r0, r1                       @ set Dalvik PC
1360    bx      r2
1361    @ doesn't return
1362
1363.LinterpPunt:
1364    .word   dvmJitToInterpPunt
1365
1366/* ------------------------------ */
1367    .balign 4
1368    .global dvmCompiler_TEMPLATE_MONITOR_ENTER
1369dvmCompiler_TEMPLATE_MONITOR_ENTER:
1370/* File: armv5te/TEMPLATE_MONITOR_ENTER.S */
1371    /*
1372     * Call out to the runtime to lock an object.  Because this thread
1373     * may have been suspended in THREAD_MONITOR state and the Jit's
1374     * translation cache subsequently cleared, we cannot return directly.
1375     * Instead, unconditionally transition to the interpreter to resume.
1376     *
1377     * On entry:
1378     *    r0 - self pointer
1379     *    r1 - the object (which has already been null-checked by the caller
1380     *    r4 - the Dalvik PC of the following instruction.
1381     */
1382    ldr     r2, .LdvmLockObject
1383    mov     r3, #0                       @ Record that we're not returning
1384    str     r3, [r0, #offThread_inJitCodeCache]
1385    blx     r2                           @ dvmLockObject(self, obj)
1386    @ refresh Jit's on/off status
1387    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1388    ldr     r0, [r0]
1389    ldr     r2, .LdvmJitToInterpNoChain
1390    str     r0, [rGLUE, #offGlue_pJitProfTable]
1391    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1392#if defined(WITH_JIT_TUNING)
1393    mov     r0, #kHeavyweightMonitor
1394#endif
1395    bx      r2
1396
1397/* ------------------------------ */
1398    .balign 4
1399    .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
1400dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
1401/* File: armv5te/TEMPLATE_MONITOR_ENTER_DEBUG.S */
1402    /*
1403     * To support deadlock prediction, this version of MONITOR_ENTER
1404     * will always call the heavyweight dvmLockObject, check for an
1405     * exception and then bail out to the interpreter.
1406     *
1407     * On entry:
1408     *    r0 - self pointer
1409     *    r1 - the object (which has already been null-checked by the caller
1410     *    r4 - the Dalvik PC of the following instruction.
1411     *
1412     */
1413    ldr     r2, .LdvmLockObject
1414    mov     r3, #0                       @ Record that we're not returning
1415    str     r3, [r0, #offThread_inJitCodeCache]
1416    blx     r2             @ dvmLockObject(self, obj)
1417    @ refresh Jit's on/off status & test for exception
1418    ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
1419    ldr     r1, [rGLUE, #offGlue_self]
1420    ldr     r0, [r0]
1421    ldr     r1, [r1, #offThread_exception]
1422    str     r0, [rGLUE, #offGlue_pJitProfTable]
1423    cmp     r1, #0
1424    beq     1f
1425    ldr     r2, .LhandleException
1426    sub     r0, r4, #2     @ roll dPC back to this monitor instruction
1427    bx      r2
14281:
1429    @ Bail to interpreter - no chain [note - r4 still contains rPC]
1430#if defined(WITH_JIT_TUNING)
1431    mov     r0, #kHeavyweightMonitor
1432#endif
1433    ldr     pc, .LdvmJitToInterpNoChain
1434
1435    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
1436/* File: armv5te/footer.S */
1437/*
1438 * ===========================================================================
1439 *  Common subroutines and data
1440 * ===========================================================================
1441 */
1442
1443    .text
1444    .align  2
1445.LinvokeNative:
1446    @ Prep for the native call
1447    @ r1 = newFP, r0 = methodToCall
1448    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
1449    mov     r2, #0
1450    ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
1451    str     r2, [r3, #offThread_inJitCodeCache] @ not in jit code cache
1452    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
1453    str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
1454                                        @ newFp->localRefCookie=top
1455    mov     r9, r3                      @ r9<- glue->self (preserve)
1456    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
1457
1458    mov     r2, r0                      @ r2<- methodToCall
1459    mov     r0, r1                      @ r0<- newFP
1460    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
1461
1462    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
1463
1464    @ Refresh Jit's on/off status
1465    ldr     r3, [rGLUE, #offGlue_ppJitProfTable]
1466
1467    @ native return; r9=self, r10=newSaveArea
1468    @ equivalent to dvmPopJniLocals
1469    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
1470    ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
1471    ldr     r1, [r9, #offThread_exception] @ check for exception
1472    ldr     r3, [r3]    @ r1 <- pointer to Jit profile table
1473    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
1474    cmp     r1, #0                      @ null?
1475    str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
1476    ldr     r0, [r10, #offStackSaveArea_savedPc] @ reload rPC
1477    str     r3, [rGLUE, #offGlue_pJitProfTable]  @ cache current JitProfTable
1478
1479    @ r0 = dalvikCallsitePC
1480    bne     .LhandleException           @ no, handle exception
1481
1482    str     r2, [r9, #offThread_inJitCodeCache] @ set the new mode
1483    cmp     r2, #0                      @ return chaining cell still exists?
1484    bxne    r2                          @ yes - go ahead
1485
1486    @ continue executing the next instruction through the interpreter
1487    ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
1488    add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
1489#if defined(WITH_JIT_TUNING)
1490    mov     r0, #kCallsiteInterpreted
1491#endif
1492    mov     pc, r1
1493
1494/*
1495 * On entry:
1496 * r0  Faulting Dalvik PC
1497 */
1498.LhandleException:
1499#if defined(WITH_SELF_VERIFICATION)
1500    ldr     pc, .LdeadFood @ should not see this under self-verification mode
1501.LdeadFood:
1502    .word   0xdeadf00d
1503#endif
1504    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
1505    mov     r2, #0
1506    str     r2, [r3, #offThread_inJitCodeCache] @ in interpreter land
1507    ldr     r1, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
1508    ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
1509    mov     rPC, r0                 @ reload the faulting Dalvik address
1510    mov     pc, r1                  @ branch to dvmMterpCommonExceptionThrown
1511
1512    .align  2
1513.LdvmAsmInstructionStart:
1514    .word   dvmAsmInstructionStart
1515.LdvmJitToInterpNoChainNoProfile:
1516    .word   dvmJitToInterpNoChainNoProfile
1517.LdvmJitToInterpTraceSelectNoChain:
1518    .word   dvmJitToInterpTraceSelectNoChain
1519.LdvmJitToInterpNoChain:
1520    .word   dvmJitToInterpNoChain
1521.LdvmMterpStdBail:
1522    .word   dvmMterpStdBail
1523.LdvmMterpCommonExceptionThrown:
1524    .word   dvmMterpCommonExceptionThrown
1525.LdvmLockObject:
1526    .word   dvmLockObject
1527#if defined(WITH_JIT_TUNING)
1528.LdvmICHitCount:
1529    .word   gDvmICHitCount
1530#endif
1531#if defined(WITH_SELF_VERIFICATION)
1532.LdvmSelfVerificationMemOpDecode:
1533    .word   dvmSelfVerificationMemOpDecode
1534#endif
1535.L__aeabi_cdcmple:
1536    .word   __aeabi_cdcmple
1537.L__aeabi_cfcmple:
1538    .word   __aeabi_cfcmple
1539
1540    .global dmvCompilerTemplateEnd
1541dmvCompilerTemplateEnd:
1542
1543#endif /* WITH_JIT */
1544
1545