CompilerTemplateAsm-armv7-a.S revision 97319a8a234e9fe1cf90ca39aa6eca37d729afd5
1/*
2 * This file was generated automatically by gen-template.py for 'armv7-a'.
3 *
4 * --> DO NOT EDIT <--
5 */
6
7/* File: armv5te/header.S */
8/*
9 * Copyright (C) 2008 The Android Open Source Project
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24#if defined(WITH_JIT)
25
26/*
27 * ARMv5 definitions and declarations.
28 */
29
30/*
31ARM EABI general notes:
32
33r0-r3 hold first 4 args to a method; they are not preserved across method calls
34r4-r8 are available for general use
35r9 is given special treatment in some situations, but not for us
36r10 (sl) seems to be generally available
37r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
38r12 (ip) is scratch -- not preserved across method calls
39r13 (sp) should be managed carefully in case a signal arrives
40r14 (lr) must be preserved
41r15 (pc) can be tinkered with directly
42
43r0 holds returns of <= 4 bytes
44r0-r1 hold returns of 8 bytes, low word in r0
45
46Callee must save/restore r4+ (except r12) if it modifies them.
47
48Stack is "full descending".  Only the arguments that don't fit in the first 4
49registers are placed on the stack.  "sp" points at the first stacked argument
50(i.e. the 5th arg).
51
52VFP: single-precision results in s0, double-precision results in d0.
53
54In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
5564-bit quantities (long long, double) must be 64-bit aligned.
56*/
57
58/*
59JIT and ARM notes:
60
61The following registers have fixed assignments:
62
63  reg nick      purpose
64  r5  rFP       interpreted frame pointer, used for accessing locals and args
65  r6  rGLUE     MterpGlue pointer
66
67The following registers have fixed assignments in mterp but are scratch
68registers in compiled code
69
70  reg nick      purpose
71  r4  rPC       interpreted program counter, used for fetching instructions
72  r7  rINST     first 16-bit code unit of current instruction
73  r8  rIBASE    interpreted instruction base pointer, used for computed goto
74
75Macros are provided for common operations.  Each macro MUST emit only
76one instruction to make instruction-counting easier.  They MUST NOT alter
77unspecified registers or condition codes.
78*/
79
80/* single-purpose registers, given names for clarity */
81#define rPC     r4
82#define rFP     r5
83#define rGLUE   r6
84#define rINST   r7
85#define rIBASE  r8
86
87/*
88 * Given a frame pointer, find the stack save area.
89 *
90 * In C this is "((StackSaveArea*)(_fp) -1)".
91 */
92#define SAVEAREA_FROM_FP(_reg, _fpreg) \
93    sub     _reg, _fpreg, #sizeofStackSaveArea
94
95/*
96 * This is a #include, not a %include, because we want the C pre-processor
97 * to expand the macros into assembler assignment statements.
98 */
99#include "../../../mterp/common/asm-constants.h"
100
101
102/* File: armv5te/platform.S */
103/*
104 * ===========================================================================
105 *  CPU-version-specific defines
106 * ===========================================================================
107 */
108
109/*
110 * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
111 * Jump to subroutine.
112 *
113 * May modify IP and LR.
114 */
115.macro  LDR_PC_LR source
116    mov     lr, pc
117    ldr     pc, \source
118.endm
119
120
121    .global dvmCompilerTemplateStart
122    .type   dvmCompilerTemplateStart, %function
123    .text
124
125dvmCompilerTemplateStart:
126
127/* ------------------------------ */
128    .balign 4
129    .global dvmCompiler_TEMPLATE_CMP_LONG
130dvmCompiler_TEMPLATE_CMP_LONG:
131/* File: armv5te/TEMPLATE_CMP_LONG.S */
132    /*
133     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
134     * register based on the results of the comparison.
135     *
136     * We load the full values with LDM, but in practice many values could
137     * be resolved by only looking at the high word.  This could be made
138     * faster or slower by splitting the LDM into a pair of LDRs.
139     *
140     * If we just wanted to set condition flags, we could do this:
141     *  subs    ip, r0, r2
142     *  sbcs    ip, r1, r3
143     *  subeqs  ip, r0, r2
144     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
145     * integer value, which we can do with 2 conditional mov/mvn instructions
146     * (set 1, set -1; if they're equal we already have 0 in ip), giving
147     * us a constant 5-cycle path plus a branch at the end to the
148     * instruction epilogue code.  The multi-compare approach below needs
149     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
150     * in the worst case (the 64-bit values are equal).
151     */
152    /* cmp-long vAA, vBB, vCC */
153    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
154    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
155    bgt     .LTEMPLATE_CMP_LONG_greater
156    subs    r0, r0, r2                  @ r0<- r0 - r2
157    bxeq     lr
158    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
159.LTEMPLATE_CMP_LONG_less:
160    mvn     r0, #0                      @ r0<- -1
161    bx      lr
162.LTEMPLATE_CMP_LONG_greater:
163    mov     r0, #1                      @ r0<- 1
164    bx      lr
165
166
167/* ------------------------------ */
168    .balign 4
169    .global dvmCompiler_TEMPLATE_RETURN
170dvmCompiler_TEMPLATE_RETURN:
171/* File: armv5te/TEMPLATE_RETURN.S */
172    /*
173     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
174     * If the stored value in returnAddr
175     * is non-zero, the caller is compiled by the JIT thus return to the
176     * address in the code cache following the invoke instruction. Otherwise
177     * return to the special dvmJitToInterpNoChain entry point.
178     */
179    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
180    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
181    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
182    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
183#if !defined(WITH_SELF_VERIFICATION)
184    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
185#else
186    mov     r9, #0                      @ disable chaining
187#endif
188    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
189                                        @ r2<- method we're returning to
190    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
191    cmp     r2, #0                      @ break frame?
192#if !defined(WITH_SELF_VERIFICATION)
193    beq     1f                          @ bail to interpreter
194#else
195    blxeq   lr                          @ punt to interpreter and compare state
196#endif
197    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
198    mov     rFP, r10                    @ publish new FP
199    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
200    ldr     r8, [r8]                    @ r8<- suspendCount
201
202    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
203    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
204    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
205    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
206    str     r1, [rGLUE, #offGlue_methodClassDex]
207    cmp     r8, #0                      @ check the suspendCount
208    movne   r9, #0                      @ clear the chaining cell address
209    cmp     r9, #0                      @ chaining cell exists?
210    blxne   r9                          @ jump to the chaining cell
211    mov     pc, r0                      @ callsite is interpreted
2121:
213    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
214    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
215    mov     r1, #0                      @ changeInterp = false
216    mov     r0, rGLUE                   @ Expecting rGLUE in r0
217    blx     r2                          @ exit the interpreter
218
219/* ------------------------------ */
220    .balign 4
221    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
222dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
223/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
224    /*
225     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
226     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
227     * runtime-resolved callee.
228     */
229    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
230    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
231    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
232    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
233    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
234    add     r3, r1, #1  @ Thumb addr is odd
235    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
236    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
237    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
238    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
239    ldr     r8, [r8]                    @ r3<- suspendCount (int)
240    cmp     r10, r9                     @ bottom < interpStackEnd?
241    bxlt    lr                          @ return to raise stack overflow excep.
242    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
243    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
244    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
245    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
246    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
247    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
248
249
250    @ set up newSaveArea
251    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
252    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
253    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
254    cmp     r8, #0                      @ suspendCount != 0
255    bxne    lr                          @ bail to the interpreter
256    tst     r10, #ACC_NATIVE
257#if !defined(WITH_SELF_VERIFICATION)
258    bne     .LinvokeNative
259#else
260    bxne    lr                          @ bail to the interpreter
261#endif
262
263    ldr     r10, .LdvmJitToInterpNoChain
264    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
265    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
266
267    @ Update "glue" values for the new method
268    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
269    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
270    mov     rFP, r1                         @ fp = newFp
271    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
272
273    @ Start executing the callee
274    mov     pc, r10                         @ dvmJitToInterpNoChain
275
276/* ------------------------------ */
277    .balign 4
278    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
279dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
280/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
281    /*
282     * For monomorphic callsite, setup the Dalvik frame and return to the
283     * Thumb code through the link register to transfer control to the callee
284     * method through a dedicated chaining cell.
285     */
286    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
287    @ methodToCall is guaranteed to be non-native
288.LinvokeChain:
289    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
290    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
291    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
292    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
293    add     r3, r1, #1  @ Thumb addr is odd
294    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
295    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
296    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
297    add     r12, lr, #2                 @ setup the punt-to-interp address
298    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
299    ldr     r8, [r8]                    @ r3<- suspendCount (int)
300    cmp     r10, r9                     @ bottom < interpStackEnd?
301    bxlt    r12                         @ return to raise stack overflow excep.
302    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
303    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
304    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
305    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
306    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
307
308
309    @ set up newSaveArea
310    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
311    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
312    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
313    cmp     r8, #0                      @ suspendCount != 0
314    bxne    r12                         @ bail to the interpreter
315
316    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
317    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
318
319    @ Update "glue" values for the new method
320    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
321    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
322    mov     rFP, r1                         @ fp = newFp
323    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
324
325    bx      lr                              @ return to the callee-chaining cell
326
327
328
329/* ------------------------------ */
330    .balign 4
331    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
332dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
333/* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
334    /*
335     * For polymorphic callsite, check whether the cached class pointer matches
336     * the current one. If so setup the Dalvik frame and return to the
337     * Thumb code through the link register to transfer control to the callee
338     * method through a dedicated chaining cell.
339     *
340     * The predicted chaining cell is declared in ArmLIR.h with the
341     * following layout:
342     *
343     *  typedef struct PredictedChainingCell {
344     *      u4 branch;
345     *      const ClassObject *clazz;
346     *      const Method *method;
347     *      u4 counter;
348     *  } PredictedChainingCell;
349     *
350     * Upon returning to the callsite:
351     *    - lr  : to branch to the chaining cell
352     *    - lr+2: to punt to the interpreter
353     *    - lr+4: to fully resolve the callee and may rechain.
354     *            r3 <- class
355     *            r9 <- counter
356     */
357    @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
358    ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
359    ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
360    ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
361    ldr     r9, [r2, #12]   @ r9 <- predictedChainCell->counter
362    cmp     r3, r8          @ predicted class == actual class?
363    beq     .LinvokeChain   @ predicted chain is valid
364    ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
365    sub     r1, r9, #1      @ count--
366    str     r1, [r2, #12]   @ write back to PredictedChainingCell->counter
367    add     lr, lr, #4      @ return to fully-resolve landing pad
368    /*
369     * r1 <- count
370     * r2 <- &predictedChainCell
371     * r3 <- this->class
372     * r4 <- dPC
373     * r7 <- this->class->vtable
374     */
375    bx      lr
376
377/* ------------------------------ */
378    .balign 4
379    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
380dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
381/* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
382    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
383    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
384    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
385    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
386    add     r3, r1, #1  @ Thumb addr is odd
387    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
388    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
389    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
390    ldr     r8, [r8]                    @ r3<- suspendCount (int)
391    cmp     r10, r9                     @ bottom < interpStackEnd?
392    bxlt    lr                          @ return to raise stack overflow excep.
393    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
394    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
395    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
396    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
397
398
399    @ set up newSaveArea
400    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
401    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
402    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
403    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
404    cmp     r8, #0                      @ suspendCount != 0
405    ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
406#if !defined(WITH_SELF_VERIFICATION)
407    bxne    lr                          @ bail to the interpreter
408#else
409    bx      lr                          @ bail to interpreter unconditionally
410#endif
411
412    @ go ahead and transfer control to the native code
413    ldr     r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
414    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
415    str     r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
416                                        @ newFp->localRefTop=refNext
417    mov     r9, r3                      @ r9<- glue->self (preserve)
418    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
419
420    mov     r2, r0                      @ r2<- methodToCall
421    mov     r0, r1                      @ r0<- newFP
422    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
423
424    blx     r8                          @ off to the native code
425
426    @ native return; r9=self, r10=newSaveArea
427    @ equivalent to dvmPopJniLocals
428    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
429    ldr     r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
430    ldr     r1, [r9, #offThread_exception] @ check for exception
431    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
432    cmp     r1, #0                      @ null?
433    str     r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
434    bne     .LhandleException             @ no, handle exception
435    bx      r2
436
437
438/* ------------------------------ */
439    .balign 4
440    .global dvmCompiler_TEMPLATE_MUL_LONG
441dvmCompiler_TEMPLATE_MUL_LONG:
442/* File: armv5te/TEMPLATE_MUL_LONG.S */
443    /*
444     * Signed 64-bit integer multiply.
445     *
446     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
447     *
448     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
449     *        WX
450     *      x YZ
451     *  --------
452     *     ZW ZX
453     *  YW YX
454     *
455     * The low word of the result holds ZX, the high word holds
456     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
457     * it doesn't fit in the low 64 bits.
458     *
459     * Unlike most ARM math operations, multiply instructions have
460     * restrictions on using the same register more than once (Rd and Rm
461     * cannot be the same).
462     */
463    /* mul-long vAA, vBB, vCC */
464    mul     ip, r2, r1                  @  ip<- ZxW
465    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
466    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
467    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
468    mov     r0,r9
469    mov     r1,r10
470    bx      lr
471
472/* ------------------------------ */
473    .balign 4
474    .global dvmCompiler_TEMPLATE_SHL_LONG
475dvmCompiler_TEMPLATE_SHL_LONG:
476/* File: armv5te/TEMPLATE_SHL_LONG.S */
477    /*
478     * Long integer shift.  This is different from the generic 32/64-bit
479     * binary operations because vAA/vBB are 64-bit but vCC (the shift
480     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
481     * 6 bits.
482     */
483    /* shl-long vAA, vBB, vCC */
484    and     r2, r2, #63                 @ r2<- r2 & 0x3f
485    mov     r1, r1, asl r2              @  r1<- r1 << r2
486    rsb     r3, r2, #32                 @  r3<- 32 - r2
487    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
488    subs    ip, r2, #32                 @  ip<- r2 - 32
489    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
490    mov     r0, r0, asl r2              @  r0<- r0 << r2
491    bx      lr
492
493/* ------------------------------ */
494    .balign 4
495    .global dvmCompiler_TEMPLATE_SHR_LONG
496dvmCompiler_TEMPLATE_SHR_LONG:
497/* File: armv5te/TEMPLATE_SHR_LONG.S */
498    /*
499     * Long integer shift.  This is different from the generic 32/64-bit
500     * binary operations because vAA/vBB are 64-bit but vCC (the shift
501     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
502     * 6 bits.
503     */
504    /* shr-long vAA, vBB, vCC */
505    and     r2, r2, #63                 @ r0<- r0 & 0x3f
506    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
507    rsb     r3, r2, #32                 @  r3<- 32 - r2
508    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
509    subs    ip, r2, #32                 @  ip<- r2 - 32
510    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
511    mov     r1, r1, asr r2              @  r1<- r1 >> r2
512    bx      lr
513
514
515/* ------------------------------ */
516    .balign 4
517    .global dvmCompiler_TEMPLATE_USHR_LONG
518dvmCompiler_TEMPLATE_USHR_LONG:
519/* File: armv5te/TEMPLATE_USHR_LONG.S */
520    /*
521     * Long integer shift.  This is different from the generic 32/64-bit
522     * binary operations because vAA/vBB are 64-bit but vCC (the shift
523     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
524     * 6 bits.
525     */
526    /* ushr-long vAA, vBB, vCC */
527    and     r2, r2, #63                 @ r0<- r0 & 0x3f
528    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
529    rsb     r3, r2, #32                 @  r3<- 32 - r2
530    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
531    subs    ip, r2, #32                 @  ip<- r2 - 32
532    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
533    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
534    bx      lr
535
536
537/* ------------------------------ */
538    .balign 4
539    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
540dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
541/* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
542/* File: armv5te-vfp/fbinop.S */
543    /*
544     * Generic 32-bit floating point operation.  Provide an "instr" line that
545     * specifies an instruction that performs s2 = s0 op s1.
546     *
547     * On entry:
548     *     r0 = target dalvik register address
549     *     r1 = op1 address
550     *     r2 = op2 address
551     */
552     flds    s0,[r1]
553     flds    s1,[r2]
554     fadds   s2, s0, s1
555     fsts    s2,[r0]
556     bx      lr
557
558
559/* ------------------------------ */
560    .balign 4
561    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
562dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
563/* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
564/* File: armv5te-vfp/fbinop.S */
565    /*
566     * Generic 32-bit floating point operation.  Provide an "instr" line that
567     * specifies an instruction that performs s2 = s0 op s1.
568     *
569     * On entry:
570     *     r0 = target dalvik register address
571     *     r1 = op1 address
572     *     r2 = op2 address
573     */
574     flds    s0,[r1]
575     flds    s1,[r2]
576     fsubs   s2, s0, s1
577     fsts    s2,[r0]
578     bx      lr
579
580
581/* ------------------------------ */
582    .balign 4
583    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
584dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
585/* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
586/* File: armv5te-vfp/fbinop.S */
587    /*
588     * Generic 32-bit floating point operation.  Provide an "instr" line that
589     * specifies an instruction that performs s2 = s0 op s1.
590     *
591     * On entry:
592     *     r0 = target dalvik register address
593     *     r1 = op1 address
594     *     r2 = op2 address
595     */
596     flds    s0,[r1]
597     flds    s1,[r2]
598     fmuls   s2, s0, s1
599     fsts    s2,[r0]
600     bx      lr
601
602
603/* ------------------------------ */
604    .balign 4
605    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
606dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
607/* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
608/* File: armv5te-vfp/fbinop.S */
609    /*
610     * Generic 32-bit floating point operation.  Provide an "instr" line that
611     * specifies an instruction that performs s2 = s0 op s1.
612     *
613     * On entry:
614     *     r0 = target dalvik register address
615     *     r1 = op1 address
616     *     r2 = op2 address
617     */
618     flds    s0,[r1]
619     flds    s1,[r2]
620     fdivs   s2, s0, s1
621     fsts    s2,[r0]
622     bx      lr
623
624
625/* ------------------------------ */
626    .balign 4
627    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
628dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
629/* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
630/* File: armv5te-vfp/fbinopWide.S */
631    /*
632     * Generic 64-bit floating point operation.  Provide an "instr" line that
633     * specifies an instruction that performs s2 = s0 op s1.
634     *
635     * On entry:
636     *     r0 = target dalvik register address
637     *     r1 = op1 address
638     *     r2 = op2 address
639     */
640     fldd    d0,[r1]
641     fldd    d1,[r2]
642     faddd   d2, d0, d1
643     fstd    d2,[r0]
644     bx      lr
645
646
647/* ------------------------------ */
648    .balign 4
649    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
650dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
651/* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
652/* File: armv5te-vfp/fbinopWide.S */
653    /*
654     * Generic 64-bit floating point operation.  Provide an "instr" line that
655     * specifies an instruction that performs s2 = s0 op s1.
656     *
657     * On entry:
658     *     r0 = target dalvik register address
659     *     r1 = op1 address
660     *     r2 = op2 address
661     */
662     fldd    d0,[r1]
663     fldd    d1,[r2]
664     fsubd   d2, d0, d1
665     fstd    d2,[r0]
666     bx      lr
667
668
669/* ------------------------------ */
670    .balign 4
671    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
672dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
673/* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
674/* File: armv5te-vfp/fbinopWide.S */
675    /*
676     * Generic 64-bit floating point operation.  Provide an "instr" line that
677     * specifies an instruction that performs s2 = s0 op s1.
678     *
679     * On entry:
680     *     r0 = target dalvik register address
681     *     r1 = op1 address
682     *     r2 = op2 address
683     */
684     fldd    d0,[r1]
685     fldd    d1,[r2]
686     fmuld   d2, d0, d1
687     fstd    d2,[r0]
688     bx      lr
689
690
691/* ------------------------------ */
692    .balign 4
693    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
694dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
695/* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
696/* File: armv5te-vfp/fbinopWide.S */
697    /*
698     * Generic 64-bit floating point operation.  Provide an "instr" line that
699     * specifies an instruction that performs s2 = s0 op s1.
700     *
701     * On entry:
702     *     r0 = target dalvik register address
703     *     r1 = op1 address
704     *     r2 = op2 address
705     */
706     fldd    d0,[r1]
707     fldd    d1,[r2]
708     fdivd   d2, d0, d1
709     fstd    d2,[r0]
710     bx      lr
711
712
713/* ------------------------------ */
714    .balign 4
715    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
716dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
717/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
718/* File: armv5te-vfp/funopNarrower.S */
719    /*
720     * Generic 64bit-to-32bit floating point unary operation.  Provide an
721     * "instr" line that specifies an instruction that performs "s0 = op d0".
722     *
723     * For: double-to-int, double-to-float
724     *
725     * On entry:
726     *     r0 = target dalvik register address
727     *     r1 = src dalvik register address
728     */
729    /* unop vA, vB */
730    fldd    d0, [r1]                    @ d0<- vB
731    fcvtsd  s0, d0                              @ s0<- op d0
732    fsts    s0, [r0]                    @ vA<- s0
733    bx      lr
734
735
736/* ------------------------------ */
737    .balign 4
738    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
739dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
740/* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
741/* File: armv5te-vfp/funopNarrower.S */
742    /*
743     * Generic 64bit-to-32bit floating point unary operation.  Provide an
744     * "instr" line that specifies an instruction that performs "s0 = op d0".
745     *
746     * For: double-to-int, double-to-float
747     *
748     * On entry:
749     *     r0 = target dalvik register address
750     *     r1 = src dalvik register address
751     */
752    /* unop vA, vB */
753    fldd    d0, [r1]                    @ d0<- vB
754    ftosizd  s0, d0                              @ s0<- op d0
755    fsts    s0, [r0]                    @ vA<- s0
756    bx      lr
757
758
759/* ------------------------------ */
760    .balign 4
761    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
762dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
763/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
764/* File: armv5te-vfp/funopWider.S */
765    /*
766     * Generic 32bit-to-64bit floating point unary operation.  Provide an
767     * "instr" line that specifies an instruction that performs "d0 = op s0".
768     *
769     * For: int-to-double, float-to-double
770     *
771     * On entry:
772     *     r0 = target dalvik register address
773     *     r1 = src dalvik register address
774     */
775    /* unop vA, vB */
776    flds    s0, [r1]                    @ s0<- vB
777    fcvtds  d0, s0                              @ d0<- op s0
778    fstd    d0, [r0]                    @ vA<- d0
779    bx      lr
780
781
782/* ------------------------------ */
783    .balign 4
784    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
785dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
786/* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
787/* File: armv5te-vfp/funop.S */
788    /*
789     * Generic 32bit-to-32bit floating point unary operation.  Provide an
790     * "instr" line that specifies an instruction that performs "s1 = op s0".
791     *
792     * For: float-to-int, int-to-float
793     *
794     * On entry:
795     *     r0 = target dalvik register address
796     *     r1 = src dalvik register address
797     */
798    /* unop vA, vB */
799    flds    s0, [r1]                    @ s0<- vB
800    ftosizs s1, s0                              @ s1<- op s0
801    fsts    s1, [r0]                    @ vA<- s1
802    bx      lr
803
804
805/* ------------------------------ */
806    .balign 4
807    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
808dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
809/* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
810/* File: armv5te-vfp/funopWider.S */
811    /*
812     * Generic 32bit-to-64bit floating point unary operation.  Provide an
813     * "instr" line that specifies an instruction that performs "d0 = op s0".
814     *
815     * For: int-to-double, float-to-double
816     *
817     * On entry:
818     *     r0 = target dalvik register address
819     *     r1 = src dalvik register address
820     */
821    /* unop vA, vB */
822    flds    s0, [r1]                    @ s0<- vB
823    fsitod  d0, s0                              @ d0<- op s0
824    fstd    d0, [r0]                    @ vA<- d0
825    bx      lr
826
827
828/* ------------------------------ */
829    .balign 4
830    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
831dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
832/* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
833/* File: armv5te-vfp/funop.S */
834    /*
835     * Generic 32bit-to-32bit floating point unary operation.  Provide an
836     * "instr" line that specifies an instruction that performs "s1 = op s0".
837     *
838     * For: float-to-int, int-to-float
839     *
840     * On entry:
841     *     r0 = target dalvik register address
842     *     r1 = src dalvik register address
843     */
844    /* unop vA, vB */
845    flds    s0, [r1]                    @ s0<- vB
846    fsitos  s1, s0                              @ s1<- op s0
847    fsts    s1, [r0]                    @ vA<- s1
848    bx      lr
849
850
851/* ------------------------------ */
852    .balign 4
853    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
854dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
855/* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
856    /*
857     * Compare two floating-point values.  Puts 0, 1, or -1 into the
858     * destination register based on the results of the comparison.
859     *
860     * int compare(x, y) {
861     *     if (x == y) {
862     *         return 0;
863     *     } else if (x < y) {
864     *         return -1;
865     *     } else if (x > y) {
866     *         return 1;
867     *     } else {
868     *         return 1;
869     *     }
870     * }
871     *
872     * On entry:
873     *    r0 = &op1 [vBB]
874     *    r1 = &op2 [vCC]
875     */
876    /* op vAA, vBB, vCC */
877    fldd    d0, [r0]                    @ d0<- vBB
878    fldd    d1, [r1]                    @ d1<- vCC
879    fcmped  d0, d1                      @ compare (vBB, vCC)
880    mov     r0, #1                      @ r0<- 1 (default)
881    fmstat                              @ export status flags
882    mvnmi   r0, #0                      @ (less than) r0<- -1
883    moveq   r0, #0                      @ (equal) r0<- 0
884    bx      lr
885
886
887/* ------------------------------ */
888    .balign 4
889    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
890dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
891/* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
892    /*
893     * Compare two floating-point values.  Puts 0, 1, or -1 into the
894     * destination register based on the results of the comparison.
895     *
896     * int compare(x, y) {
897     *     if (x == y) {
898     *         return 0;
899     *     } else if (x > y) {
900     *         return 1;
901     *     } else if (x < y) {
902     *         return -1;
903     *     } else {
904     *         return -1;
905     *     }
906     * }
907     * On entry:
908     *    r0 = &op1 [vBB]
909     *    r1 = &op2 [vCC]
910     */
911    /* op vAA, vBB, vCC */
912    fldd    d0, [r0]                    @ d0<- vBB
913    fldd    d1, [r1]                    @ d1<- vCC
914    fcmped  d0, d1                      @ compare (vBB, vCC)
915    mvn     r0, #0                      @ r0<- -1 (default)
916    fmstat                              @ export status flags
917    movgt   r0, #1                      @ (greater than) r0<- 1
918    moveq   r0, #0                      @ (equal) r0<- 0
919    bx      lr
920
921/* ------------------------------ */
922    .balign 4
923    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
924dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
925/* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
926    /*
927     * Compare two floating-point values.  Puts 0, 1, or -1 into the
928     * destination register based on the results of the comparison.
929     *
930     * int compare(x, y) {
931     *     if (x == y) {
932     *         return 0;
933     *     } else if (x < y) {
934     *         return -1;
935     *     } else if (x > y) {
936     *         return 1;
937     *     } else {
938     *         return 1;
939     *     }
940     * }
941     * On entry:
942     *    r0 = &op1 [vBB]
943     *    r1 = &op2 [vCC]
944     */
945    /* op vAA, vBB, vCC */
946    flds    s0, [r0]                    @ d0<- vBB
947    flds    s1, [r1]                    @ d1<- vCC
948    fcmpes  s0, s1                      @ compare (vBB, vCC)
949    mov     r0, #1                      @ r0<- 1 (default)
950    fmstat                              @ export status flags
951    mvnmi   r0, #0                      @ (less than) r0<- -1
952    moveq   r0, #0                      @ (equal) r0<- 0
953    bx      lr
954
955/* ------------------------------ */
956    .balign 4
957    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
958dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
959/* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
960    /*
961     * Compare two floating-point values.  Puts 0, 1, or -1 into the
962     * destination register based on the results of the comparison.
963     *
964     * int compare(x, y) {
965     *     if (x == y) {
966     *         return 0;
967     *     } else if (x > y) {
968     *         return 1;
969     *     } else if (x < y) {
970     *         return -1;
971     *     } else {
972     *         return -1;
973     *     }
974     * }
975     * On entry:
976     *    r0 = &op1 [vBB]
977     *    r1 = &op2 [vCC]
978     */
979    /* op vAA, vBB, vCC */
980    flds    s0, [r0]                    @ d0<- vBB
981    flds    s1, [r1]                    @ d1<- vCC
982    fcmpes  s0, s1                      @ compare (vBB, vCC)
983    mvn     r0, #0                      @ r0<- -1 (default)
984    fmstat                              @ export status flags
985    movgt   r0, #1                      @ (greater than) r0<- 1
986    moveq   r0, #0                      @ (equal) r0<- 0
987    bx      lr
988
989/* ------------------------------ */
990    .balign 4
991    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
992dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
993/* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
994    /*
995     * 64-bit floating point vfp sqrt operation.
996     * If the result is a NaN, bail out to library code to do
997     * the right thing.
998     *
999     * On entry:
1000     *     r2 src addr of op1
1001     * On exit:
1002     *     r0,r1 = res
1003     */
1004    fldd    d0, [r2]
1005    fsqrtd  d1, d0
1006    fcmpd   d1, d1
1007    fmstat
1008    fmrrd   r0, r1, d1
1009    bxeq    lr   @ Result OK - return
1010    ldr     r2, .Lsqrt
1011    fmrrd   r0, r1, d0   @ reload orig operand
1012    bx      r2   @ tail call to sqrt library routine
1013
1014.Lsqrt:
1015    .word   sqrt
1016
1017    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
1018/* File: armv5te/footer.S */
1019/*
1020 * ===========================================================================
1021 *  Common subroutines and data
1022 * ===========================================================================
1023 */
1024
1025    .text
1026    .align  2
1027.LinvokeNative:
1028    @ Prep for the native call
1029    @ r1 = newFP, r0 = methodToCall
1030    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
1031    ldr     r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
1032    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
1033    str     r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
1034                                        @ newFp->localRefTop=refNext
1035    mov     r9, r3                      @ r9<- glue->self (preserve)
1036    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
1037
1038    mov     r2, r0                      @ r2<- methodToCall
1039    mov     r0, r1                      @ r0<- newFP
1040    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
1041
1042    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
1043
1044    @ native return; r9=self, r10=newSaveArea
1045    @ equivalent to dvmPopJniLocals
1046    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
1047    ldr     r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
1048    ldr     r1, [r9, #offThread_exception] @ check for exception
1049    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
1050    cmp     r1, #0                      @ null?
1051    str     r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
1052    bne     .LhandleException             @ no, handle exception
1053    bx      r2
1054
1055/* NOTE - this path can be exercised if the JIT threshold is set to 5 */
1056.LhandleException:
1057    ldr     r0, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
1058    ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
1059    ldr     rPC, [r10, #offStackSaveArea_savedPc] @ reload rPC
1060    mov     pc, r0                  @ branch to dvmMterpCommonExceptionThrown
1061
1062    .align  2
1063.LdvmAsmInstructionStart:
1064    .word   dvmAsmInstructionStart
1065.LdvmJitToInterpNoChain:
1066    .word   dvmJitToInterpNoChain
1067.LdvmMterpStdBail:
1068    .word   dvmMterpStdBail
1069.LdvmMterpCommonExceptionThrown:
1070    .word   dvmMterpCommonExceptionThrown
1071.L__aeabi_cdcmple:
1072    .word   __aeabi_cdcmple
1073.L__aeabi_cfcmple:
1074    .word   __aeabi_cfcmple
1075
1076    .global dmvCompilerTemplateEnd
1077dmvCompilerTemplateEnd:
1078
1079#endif /* WITH_JIT */
1080
1081