CompilerTemplateAsm-armv5te.S revision ba4fc8bfc1bccae048403bd1cea3b869dca61dd7
1/*
2 * This file was generated automatically by gen-template.py for 'armv5te'.
3 *
4 * --> DO NOT EDIT <--
5 */
6
7/* File: armv5te/header.S */
8/*
9 * Copyright (C) 2008 The Android Open Source Project
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24#if defined(WITH_JIT)
25
26/*
27 * ARMv5 definitions and declarations.
28 */
29
30/*
31ARM EABI general notes:
32
33r0-r3 hold first 4 args to a method; they are not preserved across method calls
34r4-r8 are available for general use
35r9 is given special treatment in some situations, but not for us
36r10 (sl) seems to be generally available
37r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
38r12 (ip) is scratch -- not preserved across method calls
39r13 (sp) should be managed carefully in case a signal arrives
40r14 (lr) must be preserved
41r15 (pc) can be tinkered with directly
42
43r0 holds returns of <= 4 bytes
44r0-r1 hold returns of 8 bytes, low word in r0
45
46Callee must save/restore r4+ (except r12) if it modifies them.
47
48Stack is "full descending".  Only the arguments that don't fit in the first 4
49registers are placed on the stack.  "sp" points at the first stacked argument
50(i.e. the 5th arg).
51
52VFP: single-precision results in s0, double-precision results in d0.
53
54In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
5564-bit quantities (long long, double) must be 64-bit aligned.
56*/
57
58/*
59JIT and ARM notes:
60
61The following registers have fixed assignments:
62
63  reg nick      purpose
64  r5  rFP       interpreted frame pointer, used for accessing locals and args
65  r6  rGLUE     MterpGlue pointer
66
67The following registers have fixed assignments in mterp but are scratch
68registers in compiled code
69
70  reg nick      purpose
71  r4  rPC       interpreted program counter, used for fetching instructions
72  r7  rIBASE    interpreted instruction base pointer, used for computed goto
73  r8  rINST     first 16-bit code unit of current instruction
74
75Macros are provided for common operations.  Each macro MUST emit only
76one instruction to make instruction-counting easier.  They MUST NOT alter
77unspecified registers or condition codes.
78*/
79
80/* single-purpose registers, given names for clarity */
81#define rPC     r4
82#define rFP     r5
83#define rGLUE   r6
84#define rIBASE  r7
85#define rINST   r8
86
87/*
88 * Given a frame pointer, find the stack save area.
89 *
90 * In C this is "((StackSaveArea*)(_fp) -1)".
91 */
92#define SAVEAREA_FROM_FP(_reg, _fpreg) \
93    sub     _reg, _fpreg, #sizeofStackSaveArea
94
95/*
96 * This is a #include, not a %include, because we want the C pre-processor
97 * to expand the macros into assembler assignment statements.
98 */
99#include "../../../mterp/common/asm-constants.h"
100
101
102/* File: armv5te/platform.S */
103/*
104 * ===========================================================================
105 *  CPU-version-specific defines
106 * ===========================================================================
107 */
108
109/*
110 * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
111 * Jump to subroutine.
112 *
113 * May modify IP and LR.
114 */
115.macro  LDR_PC_LR source
116    mov     lr, pc
117    ldr     pc, \source
118.endm
119
120
121    .global dvmCompilerTemplateStart
122    .type   dvmCompilerTemplateStart, %function
123    .text
124
125dvmCompilerTemplateStart:
126
127/* ------------------------------ */
128    .balign 4
129    .global dvmCompiler_TEMPLATE_CMP_LONG
130dvmCompiler_TEMPLATE_CMP_LONG:
131/* File: armv5te/TEMPLATE_CMP_LONG.S */
132    /*
133     * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
134     * register based on the results of the comparison.
135     *
136     * We load the full values with LDM, but in practice many values could
137     * be resolved by only looking at the high word.  This could be made
138     * faster or slower by splitting the LDM into a pair of LDRs.
139     *
140     * If we just wanted to set condition flags, we could do this:
141     *  subs    ip, r0, r2
142     *  sbcs    ip, r1, r3
143     *  subeqs  ip, r0, r2
144     * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
145     * integer value, which we can do with 2 conditional mov/mvn instructions
146     * (set 1, set -1; if they're equal we already have 0 in ip), giving
147     * us a constant 5-cycle path plus a branch at the end to the
148     * instruction epilogue code.  The multi-compare approach below needs
149     * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
150     * in the worst case (the 64-bit values are equal).
151     */
152    /* cmp-long vAA, vBB, vCC */
153    cmp     r1, r3                      @ compare (vBB+1, vCC+1)
154    blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
155    bgt     .LTEMPLATE_CMP_LONG_greater
156    subs    r0, r0, r2                  @ r0<- r0 - r2
157    bxeq     lr
158    bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
159.LTEMPLATE_CMP_LONG_less:
160    mvn     r0, #0                      @ r0<- -1
161    bx      lr
162.LTEMPLATE_CMP_LONG_greater:
163    mov     r0, #1                      @ r0<- 1
164    bx      lr
165
166
167/* ------------------------------ */
168    .balign 4
169    .global dvmCompiler_TEMPLATE_RETURN
170dvmCompiler_TEMPLATE_RETURN:
171/* File: armv5te/TEMPLATE_RETURN.S */
172    /*
173     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
174     * If the stored value in returnAddr
175     * is non-zero, the caller is compiled by the JIT thus return to the
176     * address in the code cache following the invoke instruction. Otherwise
177     * return to the special dvmJitToInterpNoChain entry point.
178     */
179    SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
180    ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
181    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
182    ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
183    ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
184    ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
185                                        @ r2<- method we're returning to
186    ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
187    cmp     r2, #0                      @ break frame?
188    beq     1f                          @ bail to interpreter
189    ldr     r0, .LdvmJitToInterpNoChain @ defined in footer.S
190    mov     rFP, r10                    @ publish new FP
191    ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
192    ldr     r8, [r8]                    @ r8<- suspendCount
193
194    str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
195    ldr     r1, [r10, #offClassObject_pDvmDex] @ r1<- method->clazz->pDvmDex
196    str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
197    add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
198    str     r1, [rGLUE, #offGlue_methodClassDex]
199    cmp     r8, #0                      @ check the suspendCount
200    movne   r9, #0                      @ clear the chaining cell address
201    cmp     r9, #0                      @ chaining cell exists?
202    blxne   r9                          @ jump to the chaining cell
203    mov     pc, r0                      @ callsite is interpreted
2041:
205    stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
206    ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
207    mov     r1, #0                      @ changeInterp = false
208    mov     r0, rGLUE                   @ Expecting rGLUE in r0
209    blx     r2                          @ exit the interpreter
210
211/* ------------------------------ */
212    .balign 4
213    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
214dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
215/* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
216    /*
217     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
218     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
219     * runtime-resolved callee.
220     */
221    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
222    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
223    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
224    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
225    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
226    add     r3, r1, #1  @ Thumb addr is odd
227    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
228    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
229    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
230    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
231    ldr     r8, [r8]                    @ r3<- suspendCount (int)
232    cmp     r10, r9                     @ bottom < interpStackEnd?
233    bxlt    lr                          @ return to raise stack overflow excep.
234    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
235    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
236    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
237    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
238    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
239    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
240
241
242    @ set up newSaveArea
243    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
244    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
245    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
246    cmp     r8, #0                      @ suspendCount != 0
247    bxne    lr                          @ bail to the interpreter
248    tst     r10, #ACC_NATIVE
249    bne     .LinvokeNative
250    /*
251     * If we want to punt to the interpreter for native call, swap the bne with
252     * the following
253     * bxne    lr
254     */
255
256
257    ldr     r10, .LdvmJitToInterpNoChain
258    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
259    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
260
261    @ Update "glue" values for the new method
262    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
263    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
264    mov     rFP, r1                         @ fp = newFp
265    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
266
267    @ Start executing the callee
268    mov     pc, r10                         @ dvmJitToInterpNoChain
269
270/* ------------------------------ */
271    .balign 4
272    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
273dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
274/* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
275    /*
276     * For monomorphic callsite, setup the Dalvik frame and return to the
277     * Thumb code through the link register to transfer control to the callee
278     * method through a dedicated chaining cell.
279     */
280    @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
281    ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
282    ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
283    ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
284    ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
285    add     r3, r1, #1  @ Thumb addr is odd
286    SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
287    sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
288    SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
289    add     r12, lr, #2                 @ setup the punt-to-interp address
290    sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
291    ldr     r8, [r8]                    @ r3<- suspendCount (int)
292    cmp     r10, r9                     @ bottom < interpStackEnd?
293    bxlt    r12                         @ return to raise stack overflow excep.
294    @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
295    ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
296    ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
297    str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
298    str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
299    ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
300
301
302    @ set up newSaveArea
303    str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
304    str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
305    str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
306    cmp     r8, #0                      @ suspendCount != 0
307    bxne    r12                         @ bail to the interpreter
308    tst     r10, #ACC_NATIVE
309    bne     .LinvokeNative
310    /*
311     * If we want to punt to the interpreter for native call, swap the bne with
312     * the following
313     * bxne    r12
314     */
315
316
317    ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
318    ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
319
320    @ Update "glue" values for the new method
321    str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
322    str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
323    mov     rFP, r1                         @ fp = newFp
324    str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
325
326    bx      lr                              @ return to the callee-chaining cell
327
328
329
330/* ------------------------------ */
331    .balign 4
332    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE
333dvmCompiler_TEMPLATE_CMPG_DOUBLE:
334/* File: armv5te/TEMPLATE_CMPG_DOUBLE.S */
335/* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */
336    /*
337     * For the JIT: incoming arguments are pointers to the arguments in r0/r1
338     *              result in r0
339     *
340     * Compare two floating-point values.  Puts 0, 1, or -1 into the
341     * destination register based on the results of the comparison.
342     *
343     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
344     * on what value we'd like to return when one of the operands is NaN.
345     *
346     * See OP_CMPL_FLOAT for an explanation.
347     *
348     * For: cmpl-double, cmpg-double
349     */
350    /* op vAA, vBB, vCC */
351    mov     r4, lr                      @ save return address
352    mov     r9, r0                      @ save copy of &arg1
353    mov     r10, r1                     @ save copy of &arg2
354    ldmia   r9, {r0-r1}                 @ r0/r1<- vBB/vBB+1
355    ldmia   r10, {r2-r3}                @ r2/r3<- vCC/vCC+1
356    LDR_PC_LR ".L__aeabi_cdcmple"       @ PIC way of "bl __aeabi_cdcmple"
357    bhi     .LTEMPLATE_CMPG_DOUBLE_gt_or_nan       @ C set and Z clear, disambiguate
358    mvncc   r0, #0                      @ (less than) r1<- -1
359    moveq   r0, #0                      @ (equal) r1<- 0, trumps less than
360    bx      r4
361
362    @ Test for NaN with a second comparison.  EABI forbids testing bit
363    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
364    @ make the library call.
365.LTEMPLATE_CMPG_DOUBLE_gt_or_nan:
366    ldmia   r10, {r0-r1}                @ reverse order
367    ldmia   r9, {r2-r3}
368    LDR_PC_LR ".L__aeabi_cdcmple"       @ r0<- Z set if eq, C clear if <
369    movcc   r0, #1                      @ (greater than) r1<- 1
370    bxcc    r4
371    mov     r0, #1                            @ r1<- 1 or -1 for NaN
372    bx      r4
373
374
375
376/* ------------------------------ */
377    .balign 4
378    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE
379dvmCompiler_TEMPLATE_CMPL_DOUBLE:
380/* File: armv5te/TEMPLATE_CMPL_DOUBLE.S */
381    /*
382     * For the JIT: incoming arguments are pointers to the arguments in r0/r1
383     *              result in r0
384     *
385     * Compare two floating-point values.  Puts 0, 1, or -1 into the
386     * destination register based on the results of the comparison.
387     *
388     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
389     * on what value we'd like to return when one of the operands is NaN.
390     *
391     * See OP_CMPL_FLOAT for an explanation.
392     *
393     * For: cmpl-double, cmpg-double
394     */
395    /* op vAA, vBB, vCC */
396    mov     r4, lr                      @ save return address
397    mov     r9, r0                      @ save copy of &arg1
398    mov     r10, r1                     @ save copy of &arg2
399    ldmia   r9, {r0-r1}                 @ r0/r1<- vBB/vBB+1
400    ldmia   r10, {r2-r3}                @ r2/r3<- vCC/vCC+1
401    LDR_PC_LR ".L__aeabi_cdcmple"       @ PIC way of "bl __aeabi_cdcmple"
402    bhi     .LTEMPLATE_CMPL_DOUBLE_gt_or_nan       @ C set and Z clear, disambiguate
403    mvncc   r0, #0                      @ (less than) r1<- -1
404    moveq   r0, #0                      @ (equal) r1<- 0, trumps less than
405    bx      r4
406
407    @ Test for NaN with a second comparison.  EABI forbids testing bit
408    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
409    @ make the library call.
410.LTEMPLATE_CMPL_DOUBLE_gt_or_nan:
411    ldmia   r10, {r0-r1}                @ reverse order
412    ldmia   r9, {r2-r3}
413    LDR_PC_LR ".L__aeabi_cdcmple"       @ r0<- Z set if eq, C clear if <
414    movcc   r0, #1                      @ (greater than) r1<- 1
415    bxcc    r4
416    mvn     r0, #0                            @ r1<- 1 or -1 for NaN
417    bx      r4
418
419
420/* ------------------------------ */
421    .balign 4
422    .global dvmCompiler_TEMPLATE_CMPG_FLOAT
423dvmCompiler_TEMPLATE_CMPG_FLOAT:
424/* File: armv5te/TEMPLATE_CMPG_FLOAT.S */
425/* File: armv5te/TEMPLATE_CMPL_FLOAT.S */
426    /*
427     * For the JIT: incoming arguments in r0, r1
428     *              result in r0
429     *
430     * Compare two floating-point values.  Puts 0, 1, or -1 into the
431     * destination register based on the results of the comparison.
432     *
433     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
434     * on what value we'd like to return when one of the operands is NaN.
435     *
436     * The operation we're implementing is:
437     *   if (x == y)
438     *     return 0;
439     *   else if (x < y)
440     *     return -1;
441     *   else if (x > y)
442     *     return 1;
443     *   else
444     *     return {-1,1};  // one or both operands was NaN
445     *
446     * The straightforward implementation requires 3 calls to functions
447     * that return a result in r0.  We can do it with two calls if our
448     * EABI library supports __aeabi_cfcmple (only one if we want to check
449     * for NaN directly):
450     *   check x <= y
451     *     if <, return -1
452     *     if ==, return 0
453     *   check y <= x
454     *     if <, return 1
455     *   return {-1,1}
456     *
457     * for: cmpl-float, cmpg-float
458     */
459    /* op vAA, vBB, vCC */
460    mov     r4, lr                      @ save return address
461    mov     r9, r0                      @ Save copies - we may need to redo
462    mov     r10, r1
463    LDR_PC_LR ".L__aeabi_cfcmple"       @ cmp <=: C clear if <, Z set if eq
464    bhi     .LTEMPLATE_CMPG_FLOAT_gt_or_nan       @ C set and Z clear, disambiguate
465    mvncc   r0, #0                      @ (less than) r0<- -1
466    moveq   r0, #0                      @ (equal) r0<- 0, trumps less than
467    bx      r4
468    @ Test for NaN with a second comparison.  EABI forbids testing bit
469    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
470    @ make the library call.
471.LTEMPLATE_CMPG_FLOAT_gt_or_nan:
472    mov     r1, r9                      @ reverse order
473    mov     r0, r10
474    LDR_PC_LR ".L__aeabi_cfcmple"       @ r0<- Z set if eq, C clear if <
475    movcc   r0, #1                      @ (greater than) r1<- 1
476    bxcc    r4
477    mov     r0, #1                            @ r1<- 1 or -1 for NaN
478    bx      r4
479
480
481
482
483/* ------------------------------ */
484    .balign 4
485    .global dvmCompiler_TEMPLATE_CMPL_FLOAT
486dvmCompiler_TEMPLATE_CMPL_FLOAT:
487/* File: armv5te/TEMPLATE_CMPL_FLOAT.S */
488    /*
489     * For the JIT: incoming arguments in r0, r1
490     *              result in r0
491     *
492     * Compare two floating-point values.  Puts 0, 1, or -1 into the
493     * destination register based on the results of the comparison.
494     *
495     * Provide a "naninst" instruction that puts 1 or -1 into r1 depending
496     * on what value we'd like to return when one of the operands is NaN.
497     *
498     * The operation we're implementing is:
499     *   if (x == y)
500     *     return 0;
501     *   else if (x < y)
502     *     return -1;
503     *   else if (x > y)
504     *     return 1;
505     *   else
506     *     return {-1,1};  // one or both operands was NaN
507     *
508     * The straightforward implementation requires 3 calls to functions
509     * that return a result in r0.  We can do it with two calls if our
510     * EABI library supports __aeabi_cfcmple (only one if we want to check
511     * for NaN directly):
512     *   check x <= y
513     *     if <, return -1
514     *     if ==, return 0
515     *   check y <= x
516     *     if <, return 1
517     *   return {-1,1}
518     *
519     * for: cmpl-float, cmpg-float
520     */
521    /* op vAA, vBB, vCC */
522    mov     r4, lr                      @ save return address
523    mov     r9, r0                      @ Save copies - we may need to redo
524    mov     r10, r1
525    LDR_PC_LR ".L__aeabi_cfcmple"       @ cmp <=: C clear if <, Z set if eq
526    bhi     .LTEMPLATE_CMPL_FLOAT_gt_or_nan       @ C set and Z clear, disambiguate
527    mvncc   r0, #0                      @ (less than) r0<- -1
528    moveq   r0, #0                      @ (equal) r0<- 0, trumps less than
529    bx      r4
530    @ Test for NaN with a second comparison.  EABI forbids testing bit
531    @ patterns, and we can't represent 0x7fc00000 in immediate form, so
532    @ make the library call.
533.LTEMPLATE_CMPL_FLOAT_gt_or_nan:
534    mov     r1, r9                      @ reverse order
535    mov     r0, r10
536    LDR_PC_LR ".L__aeabi_cfcmple"       @ r0<- Z set if eq, C clear if <
537    movcc   r0, #1                      @ (greater than) r1<- 1
538    bxcc    r4
539    mvn     r0, #0                            @ r1<- 1 or -1 for NaN
540    bx      r4
541
542
543
544/* ------------------------------ */
545    .balign 4
546    .global dvmCompiler_TEMPLATE_MUL_LONG
547dvmCompiler_TEMPLATE_MUL_LONG:
548/* File: armv5te/TEMPLATE_MUL_LONG.S */
549    /*
550     * Signed 64-bit integer multiply.
551     *
552     * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
553     *
554     * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
555     *        WX
556     *      x YZ
557     *  --------
558     *     ZW ZX
559     *  YW YX
560     *
561     * The low word of the result holds ZX, the high word holds
562     * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
563     * it doesn't fit in the low 64 bits.
564     *
565     * Unlike most ARM math operations, multiply instructions have
566     * restrictions on using the same register more than once (Rd and Rm
567     * cannot be the same).
568     */
569    /* mul-long vAA, vBB, vCC */
570    mul     ip, r2, r1                  @  ip<- ZxW
571    umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
572    mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
573    add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
574    mov     r0,r9
575    mov     r1,r10
576    bx      lr
577
578/* ------------------------------ */
579    .balign 4
580    .global dvmCompiler_TEMPLATE_SHL_LONG
581dvmCompiler_TEMPLATE_SHL_LONG:
582/* File: armv5te/TEMPLATE_SHL_LONG.S */
583    /*
584     * Long integer shift.  This is different from the generic 32/64-bit
585     * binary operations because vAA/vBB are 64-bit but vCC (the shift
586     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
587     * 6 bits.
588     */
589    /* shl-long vAA, vBB, vCC */
590    and     r2, r2, #63                 @ r2<- r2 & 0x3f
591    mov     r1, r1, asl r2              @  r1<- r1 << r2
592    rsb     r3, r2, #32                 @  r3<- 32 - r2
593    orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
594    subs    ip, r2, #32                 @  ip<- r2 - 32
595    movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
596    mov     r0, r0, asl r2              @  r0<- r0 << r2
597    bx      lr
598
599/* ------------------------------ */
600    .balign 4
601    .global dvmCompiler_TEMPLATE_SHR_LONG
602dvmCompiler_TEMPLATE_SHR_LONG:
603/* File: armv5te/TEMPLATE_SHR_LONG.S */
604    /*
605     * Long integer shift.  This is different from the generic 32/64-bit
606     * binary operations because vAA/vBB are 64-bit but vCC (the shift
607     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
608     * 6 bits.
609     */
610    /* shr-long vAA, vBB, vCC */
611    and     r2, r2, #63                 @ r0<- r0 & 0x3f
612    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
613    rsb     r3, r2, #32                 @  r3<- 32 - r2
614    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
615    subs    ip, r2, #32                 @  ip<- r2 - 32
616    movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
617    mov     r1, r1, asr r2              @  r1<- r1 >> r2
618    bx      lr
619
620
621/* ------------------------------ */
622    .balign 4
623    .global dvmCompiler_TEMPLATE_USHR_LONG
624dvmCompiler_TEMPLATE_USHR_LONG:
625/* File: armv5te/TEMPLATE_USHR_LONG.S */
626    /*
627     * Long integer shift.  This is different from the generic 32/64-bit
628     * binary operations because vAA/vBB are 64-bit but vCC (the shift
629     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
630     * 6 bits.
631     */
632    /* ushr-long vAA, vBB, vCC */
633    and     r2, r2, #63                 @ r0<- r0 & 0x3f
634    mov     r0, r0, lsr r2              @  r0<- r2 >> r2
635    rsb     r3, r2, #32                 @  r3<- 32 - r2
636    orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
637    subs    ip, r2, #32                 @  ip<- r2 - 32
638    movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
639    mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
640    bx      lr
641
642
643    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
644/* File: armv5te/footer.S */
645/*
646 * ===========================================================================
647 *  Common subroutines and data
648 * ===========================================================================
649 */
650
651    .text
652    .align  2
653.LinvokeNative:
654    @ Prep for the native call
655    @ r1 = newFP, r0 = methodToCall
656    ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
657    ldr     r9, [r3, #offThread_jniLocal_nextEntry] @ r9<- thread->refNext
658    str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
659    str     r9, [r1, #(offStackSaveArea_localRefTop - sizeofStackSaveArea)]
660                                        @ newFp->localRefTop=refNext
661    mov     r9, r3                      @ r9<- glue->self (preserve)
662    SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
663
664    mov     r2, r0                      @ r2<- methodToCall
665    mov     r0, r1                      @ r0<- newFP
666    add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
667
668    LDR_PC_LR "[r2, #offMethod_nativeFunc]"
669
670    @ native return; r9=self, r10=newSaveArea
671    @ equivalent to dvmPopJniLocals
672    ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
673    ldr     r0, [r10, #offStackSaveArea_localRefTop] @ r0<- newSave->localRefTop
674    ldr     r1, [r9, #offThread_exception] @ check for exception
675    str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
676    cmp     r1, #0                      @ null?
677    str     r0, [r9, #offThread_jniLocal_nextEntry] @ self->refNext<- r0
678    bne     .LhandleException             @ no, handle exception
679    bx      r2
680
681/* FIXME - untested */
682.LhandleException:
683    ldr     rIBASE, .LdvmAsmInstructionStart
684    ldr     rPC, [r10, #offStackSaveArea_savedPc] @ reload rPC
685    b       dvmMterpCommonExceptionThrown
686
687    .align  2
688.LdvmAsmInstructionStart:
689    .word   dvmAsmInstructionStart
690.LdvmJitToInterpNoChain:
691    .word   dvmJitToInterpNoChain
692.LdvmMterpStdBail:
693    .word   dvmMterpStdBail
694.L__aeabi_cdcmple:
695    .word   __aeabi_cdcmple
696.L__aeabi_cfcmple:
697    .word   __aeabi_cfcmple
698
699    .global dmvCompilerTemplateEnd
700dmvCompilerTemplateEnd:
701
702#endif /* WITH_JIT */
703
704