1/*
2 * This file was generated automatically by gen-template.py for 'mips'.
3 *
4 * --> DO NOT EDIT <--
5 */
6
7/* File: mips/header.S */
8/*
9 * Copyright (C) 2008 The Android Open Source Project
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24#if defined(WITH_JIT)
25
26/*
27 * This is a #include, not a %include, because we want the C pre-processor
28 * to expand the macros into assembler assignment statements.
29 */
30#include "../../../mterp/common/asm-constants.h"
31#include "../../../mterp/common/mips-defines.h"
32#include "../../../mterp/common/jit-config.h"
33#include <asm/regdef.h>
34#include <asm/fpregdef.h>
35
36#ifdef	__mips_hard_float
37#define		HARD_FLOAT
38#else
39#define		SOFT_FLOAT
40#endif
41
42/* MIPS definitions and declarations
43
44   reg	nick		purpose
45   s0	rPC		interpreted program counter, used for fetching instructions
46   s1	rFP		interpreted frame pointer, used for accessing locals and args
47   s2	rSELF		pointer to thread
48   s3	rIBASE		interpreted instruction base pointer, used for computed goto
49   s4	rINST		first 16-bit code unit of current instruction
50*/
51
52/* register offsets */
53#define r_ZERO      0
54#define r_AT        1
55#define r_V0        2
56#define r_V1        3
57#define r_A0        4
58#define r_A1        5
59#define r_A2        6
60#define r_A3        7
61#define r_T0        8
62#define r_T1        9
63#define r_T2        10
64#define r_T3        11
65#define r_T4        12
66#define r_T5        13
67#define r_T6        14
68#define r_T7        15
69#define r_S0        16
70#define r_S1        17
71#define r_S2        18
72#define r_S3        19
73#define r_S4        20
74#define r_S5        21
75#define r_S6        22
76#define r_S7        23
77#define r_T8        24
78#define r_T9        25
79#define r_K0        26
80#define r_K1        27
81#define r_GP        28
82#define r_SP        29
83#define r_FP        30
84#define r_RA        31
85#define r_F0        32
86#define r_F1        33
87#define r_F2        34
88#define r_F3        35
89#define r_F4        36
90#define r_F5        37
91#define r_F6        38
92#define r_F7        39
93#define r_F8        40
94#define r_F9        41
95#define r_F10       42
96#define r_F11       43
97#define r_F12       44
98#define r_F13       45
99#define r_F14       46
100#define r_F15       47
101#define r_F16       48
102#define r_F17       49
103#define r_F18       50
104#define r_F19       51
105#define r_F20       52
106#define r_F21       53
107#define r_F22       54
108#define r_F23       55
109#define r_F24       56
110#define r_F25       57
111#define r_F26       58
112#define r_F27       59
113#define r_F28       60
114#define r_F29       61
115#define r_F30       62
116#define r_F31       63
117
118/* single-purpose registers, given names for clarity */
119#define rPC	s0
120#define rFP	s1
121#define rSELF	s2
122#define rIBASE	s3
123#define rINST	s4
124#define rOBJ	s5
125#define rBIX	s6
126#define rTEMP	s7
127
128/* The long arguments sent to function calls in Big-endian mode should be register
129swapped when sent to functions in little endian mode. In other words long variable
130sent as a0(MSW), a1(LSW) for a function call in LE mode should be sent as a1, a0 in
131Big Endian mode */
132
133#ifdef HAVE_LITTLE_ENDIAN
134#define rARG0     a0
135#define rARG1     a1
136#define rARG2     a2
137#define rARG3     a3
138#define rRESULT0  v0
139#define rRESULT1  v1
140#else
141#define rARG0     a1
142#define rARG1     a0
143#define rARG2     a3
144#define rARG3     a2
145#define rRESULT0  v1
146#define rRESULT1  v0
147#endif
148
149
150/* save/restore the PC and/or FP from the thread struct */
151#define LOAD_PC_FROM_SELF()	lw	rPC, offThread_pc(rSELF)
152#define SAVE_PC_TO_SELF()	sw	rPC, offThread_pc(rSELF)
153#define LOAD_FP_FROM_SELF()	lw	rFP, offThread_curFrame(rSELF)
154#define SAVE_FP_TO_SELF()	sw	rFP, offThread_curFrame(rSELF)
155
156#define EXPORT_PC() \
157	sw	rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
158
159#define SAVEAREA_FROM_FP(rd, _fpreg) \
160	subu	rd, _fpreg, sizeofStackSaveArea
161
162#define FETCH_INST()			lhu	rINST, (rPC)
163
164#define FETCH_ADVANCE_INST(_count)	lhu     rINST, (_count*2)(rPC); \
165					addu	rPC, rPC, (_count * 2)
166
167#define FETCH_ADVANCE_INST_RB(rd)	addu	rPC, rPC, rd;	\
168					lhu     rINST, (rPC)
169
170#define FETCH(rd, _count)		lhu	rd, (_count * 2)(rPC)
171#define FETCH_S(rd, _count)		lh	rd, (_count * 2)(rPC)
172
173#ifdef HAVE_LITTLE_ENDIAN
174
175#define FETCH_B(rd, _count)            lbu     rd, (_count * 2)(rPC)
176#define FETCH_C(rd, _count)            lbu     rd, (_count * 2 + 1)(rPC)
177
178#else
179
180#define FETCH_B(rd, _count)            lbu     rd, (_count * 2 + 1)(rPC)
181#define FETCH_C(rd, _count)            lbu     rd, (_count * 2)(rPC)
182
183#endif
184
185#define GET_INST_OPCODE(rd)		and	rd, rINST, 0xFF
186
187#define GOTO_OPCODE(rd)			sll  rd, rd, -1000;	\
188					addu rd, rIBASE, rd;	\
189					jr  rd
190
191
192#define LOAD(rd, rbase)			lw  rd, 0(rbase)
193#define LOAD_F(rd, rbase)		l.s rd, (rbase)
194#define STORE(rd, rbase)		sw  rd, 0(rbase)
195#define STORE_F(rd, rbase)		s.s rd, (rbase)
196
197#define GET_VREG(rd, rix)		LOAD_eas2(rd,rFP,rix)
198
199#define GET_VREG_F(rd, rix)		EAS2(AT, rFP, rix);		\
200					.set noat;  l.s rd, (AT); .set at
201
202#define SET_VREG(rd, rix)		STORE_eas2(rd, rFP, rix)
203
204#define SET_VREG_GOTO(rd, rix, dst)	.set noreorder;		\
205					sll  dst, dst, -1000;	\
206					addu dst, rIBASE, dst;			\
207					sll  t8, rix, 2;	\
208					addu t8, t8, rFP;	\
209					jr  dst;		\
210					sw  rd, 0(t8);		\
211					.set reorder
212
213#define SET_VREG_F(rd, rix)		EAS2(AT, rFP, rix);		\
214					.set noat;  s.s	rd, (AT); .set at
215
216
217#define GET_OPA(rd)			srl     rd, rINST, 8
218#ifndef		MIPS32R2
219#define GET_OPA4(rd)			GET_OPA(rd);  and  rd, 0xf
220#else
221#define GET_OPA4(rd)			ext	rd, rd, 8, 4
222#endif
223#define GET_OPB(rd)			srl     rd, rINST, 12
224
225#define LOAD_rSELF_OFF(rd,off)		lw    rd, offThread_##off##(rSELF)
226
227#define LOAD_rSELF_method(rd)		LOAD_rSELF_OFF(rd, method)
228#define LOAD_rSELF_methodClassDex(rd)	LOAD_rSELF_OFF(rd, methodClassDex)
229#define LOAD_rSELF_interpStackEnd(rd)	LOAD_rSELF_OFF(rd, interpStackEnd)
230#define LOAD_rSELF_retval(rd)		LOAD_rSELF_OFF(rd, retval)
231#define LOAD_rSELF_pActiveProfilers(rd)	LOAD_rSELF_OFF(rd, pActiveProfilers)
232#define LOAD_rSELF_bailPtr(rd)		LOAD_rSELF_OFF(rd, bailPtr)
233
234#define GET_JIT_PROF_TABLE(rd)		LOAD_rSELF_OFF(rd,pJitProfTable)
235#define GET_JIT_THRESHOLD(rd)		LOAD_rSELF_OFF(rd,jitThreshold)
236
237/*
238 * Form an Effective Address rd = rbase + roff<<n;
239 * Uses reg AT
240 */
241#define EASN(rd,rbase,roff,rshift)	.set noat;		\
242					sll  AT, roff, rshift;	\
243					addu rd, rbase, AT;	\
244					.set at
245
246#define EAS1(rd,rbase,roff)		EASN(rd,rbase,roff,1)
247#define EAS2(rd,rbase,roff)		EASN(rd,rbase,roff,2)
248#define EAS3(rd,rbase,roff)		EASN(rd,rbase,roff,3)
249#define EAS4(rd,rbase,roff)		EASN(rd,rbase,roff,4)
250
251/*
252 * Form an Effective Shift Right rd = rbase + roff>>n;
253 * Uses reg AT
254 */
255#define ESRN(rd,rbase,roff,rshift)	.set noat;		\
256					srl  AT, roff, rshift;	\
257					addu rd, rbase, AT;	\
258					.set at
259
260#define LOAD_eas2(rd,rbase,roff)	EAS2(AT, rbase, roff);  \
261					.set noat;  lw  rd, 0(AT); .set at
262
263#define STORE_eas2(rd,rbase,roff)	EAS2(AT, rbase, roff);  \
264					.set noat;  sw  rd, 0(AT); .set at
265
266#define LOAD_RB_OFF(rd,rbase,off)	lw	rd, off(rbase)
267#define LOADu2_RB_OFF(rd,rbase,off)	lhu	rd, off(rbase)
268#define STORE_RB_OFF(rd,rbase,off)	sw	rd, off(rbase)
269
270#ifdef HAVE_LITTLE_ENDIAN
271
272#define STORE64_off(rlo,rhi,rbase,off)	        sw	rlo, off(rbase);	\
273					        sw	rhi, (off+4)(rbase)
274#define LOAD64_off(rlo,rhi,rbase,off)	        lw	rlo, off(rbase);	\
275					        lw	rhi, (off+4)(rbase)
276
277#define STORE64_off_F(rlo,rhi,rbase,off)	s.s	rlo, off(rbase);	\
278						s.s	rhi, (off+4)(rbase)
279#define LOAD64_off_F(rlo,rhi,rbase,off)		l.s	rlo, off(rbase);	\
280						l.s	rhi, (off+4)(rbase)
281#else
282
283#define STORE64_off(rlo,rhi,rbase,off)	        sw	rlo, (off+4)(rbase);	\
284					        sw	rhi, (off)(rbase)
285#define LOAD64_off(rlo,rhi,rbase,off)	        lw	rlo, (off+4)(rbase);	\
286					        lw	rhi, (off)(rbase)
287#define STORE64_off_F(rlo,rhi,rbase,off)	s.s	rlo, (off+4)(rbase);	\
288						s.s	rhi, (off)(rbase)
289#define LOAD64_off_F(rlo,rhi,rbase,off)		l.s	rlo, (off+4)(rbase);	\
290						l.s	rhi, (off)(rbase)
291#endif
292
293#define STORE64(rlo,rhi,rbase)		STORE64_off(rlo,rhi,rbase,0)
294#define LOAD64(rlo,rhi,rbase)		LOAD64_off(rlo,rhi,rbase,0)
295
296#define STORE64_F(rlo,rhi,rbase)	STORE64_off_F(rlo,rhi,rbase,0)
297#define LOAD64_F(rlo,rhi,rbase)		LOAD64_off_F(rlo,rhi,rbase,0)
298
299#define STORE64_lo(rd,rbase)		sw	rd, 0(rbase)
300#define STORE64_hi(rd,rbase)		sw	rd, 4(rbase)
301
302
303#define LOAD_offThread_exception(rd,rbase)		LOAD_RB_OFF(rd,rbase,offThread_exception)
304#define LOAD_base_offArrayObject_length(rd,rbase)	LOAD_RB_OFF(rd,rbase,offArrayObject_length)
305#define LOAD_base_offClassObject_accessFlags(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_accessFlags)
306#define LOAD_base_offClassObject_descriptor(rd,rbase)   LOAD_RB_OFF(rd,rbase,offClassObject_descriptor)
307#define LOAD_base_offClassObject_super(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_super)
308
309#define LOAD_base_offClassObject_vtable(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_vtable)
310#define LOAD_base_offClassObject_vtableCount(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_vtableCount)
311#define LOAD_base_offDvmDex_pResClasses(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResClasses)
312#define LOAD_base_offDvmDex_pResFields(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResFields)
313
314#define LOAD_base_offDvmDex_pResMethods(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResMethods)
315#define LOAD_base_offDvmDex_pResStrings(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResStrings)
316#define LOAD_base_offInstField_byteOffset(rd,rbase)	LOAD_RB_OFF(rd,rbase,offInstField_byteOffset)
317#define LOAD_base_offStaticField_value(rd,rbase)	LOAD_RB_OFF(rd,rbase,offStaticField_value)
318#define LOAD_base_offMethod_clazz(rd,rbase)		LOAD_RB_OFF(rd,rbase,offMethod_clazz)
319
320#define LOAD_base_offMethod_name(rd,rbase)		LOAD_RB_OFF(rd,rbase,offMethod_name)
321#define LOAD_base_offObject_clazz(rd,rbase)		LOAD_RB_OFF(rd,rbase,offObject_clazz)
322
323#define LOADu2_offMethod_methodIndex(rd,rbase)		LOADu2_RB_OFF(rd,rbase,offMethod_methodIndex)
324
325
326#define STORE_offThread_exception(rd,rbase)		STORE_RB_OFF(rd,rbase,offThread_exception)
327
328
329#define	STACK_STORE(rd,off)	sw   rd, off(sp)
330#define	STACK_LOAD(rd,off)	lw   rd, off(sp)
331#define CREATE_STACK(n)	 	subu sp, sp, n
332#define DELETE_STACK(n)	 	addu sp, sp, n
333
334#define SAVE_RA(offset)	 	STACK_STORE(ra, offset)
335#define LOAD_RA(offset)	 	STACK_LOAD(ra, offset)
336
337#define LOAD_ADDR(dest,addr)	la   dest, addr
338#define LOAD_IMM(dest, imm)	li   dest, imm
339#define MOVE_REG(dest,src)	move dest, src
340#define	RETURN			jr   ra
341#define	STACK_SIZE		128
342
343#define STACK_OFFSET_ARG04	16
344#define STACK_OFFSET_GP		84
345#define STACK_OFFSET_rFP	112
346
347/* This directive will make sure all subsequent jal restore gp at a known offset */
348        .cprestore STACK_OFFSET_GP
349
350#define JAL(func)		move rTEMP, ra;				\
351				jal  func;				\
352				move ra, rTEMP
353
354#define JALR(reg)		move rTEMP, ra;				\
355				jalr ra, reg;				\
356				move ra, rTEMP
357
358#define BAL(n)			bal  n
359
360#define	STACK_STORE_RA()  	CREATE_STACK(STACK_SIZE);		\
361				STACK_STORE(gp, STACK_OFFSET_GP);	\
362				STACK_STORE(ra, 124)
363
364#define	STACK_STORE_S0()  	STACK_STORE_RA();			\
365				STACK_STORE(s0, 116)
366
367#define	STACK_STORE_S0S1()  	STACK_STORE_S0();			\
368				STACK_STORE(s1, STACK_OFFSET_rFP)
369
370#define	STACK_LOAD_RA()		STACK_LOAD(ra, 124);			\
371				STACK_LOAD(gp, STACK_OFFSET_GP);	\
372				DELETE_STACK(STACK_SIZE)
373
374#define	STACK_LOAD_S0()  	STACK_LOAD(s0, 116);			\
375				STACK_LOAD_RA()
376
377#define	STACK_LOAD_S0S1()  	STACK_LOAD(s1, STACK_OFFSET_rFP);	\
378				STACK_LOAD_S0()
379
380#define STACK_STORE_FULL()	CREATE_STACK(STACK_SIZE);	\
381				STACK_STORE(ra, 124);		\
382				STACK_STORE(fp, 120);		\
383				STACK_STORE(s0, 116);		\
384				STACK_STORE(s1, STACK_OFFSET_rFP);	\
385				STACK_STORE(s2, 108);		\
386				STACK_STORE(s3, 104);		\
387				STACK_STORE(s4, 100);		\
388				STACK_STORE(s5, 96);		\
389				STACK_STORE(s6, 92);		\
390				STACK_STORE(s7, 88);
391
392#define STACK_LOAD_FULL()	STACK_LOAD(gp, STACK_OFFSET_GP);	\
393				STACK_LOAD(s7, 88);	\
394				STACK_LOAD(s6, 92);	\
395				STACK_LOAD(s5, 96);	\
396				STACK_LOAD(s4, 100);	\
397				STACK_LOAD(s3, 104);	\
398				STACK_LOAD(s2, 108);	\
399				STACK_LOAD(s1, STACK_OFFSET_rFP);	\
400				STACK_LOAD(s0, 116);	\
401				STACK_LOAD(fp, 120);	\
402				STACK_LOAD(ra, 124);	\
403				DELETE_STACK(STACK_SIZE)
404
405/*
406 * first 8 words are reserved for function calls
407 * Maximum offset is STACK_OFFSET_SCRMX-STACK_OFFSET_SCR
408 */
409#define STACK_OFFSET_SCR   32
410#define SCRATCH_STORE(r,off) \
411    STACK_STORE(r, STACK_OFFSET_SCR+off);
412#define SCRATCH_LOAD(r,off) \
413    STACK_LOAD(r, STACK_OFFSET_SCR+off);
414
415/* File: mips/platform.S */
416/*
417 * ===========================================================================
418 *  CPU-version-specific defines and utility
419 * ===========================================================================
420 */
421
422
423
424    .global dvmCompilerTemplateStart
425    .type   dvmCompilerTemplateStart, %function
426    .section .data.rel.ro
427
428dvmCompilerTemplateStart:
429
430/* ------------------------------ */
431    .balign 4
432    .global dvmCompiler_TEMPLATE_CMP_LONG
433dvmCompiler_TEMPLATE_CMP_LONG:
434/* File: mips/TEMPLATE_CMP_LONG.S */
435    /*
436     * Compare two 64-bit values
437     *    x = y     return  0
438     *    x < y     return -1
439     *    x > y     return  1
440     *
441     * I think I can improve on the ARM code by the following observation
442     *    slt   t0,  x.hi, y.hi;        # (x.hi < y.hi) ? 1:0
443     *    sgt   t1,  x.hi, y.hi;        # (y.hi > x.hi) ? 1:0
444     *    subu  v0, t0, t1              # v0= -1:1:0 for [ < > = ]
445     *
446     * This code assumes the register pair ordering will depend on endianess (a1:a0 or a0:a1).
447     *    a1:a0 => vBB
448     *    a3:a2 => vCC
449     */
450    /* cmp-long vAA, vBB, vCC */
451    slt    t0, rARG1, rARG3             # compare hi
452    sgt    t1, rARG1, rARG3
453    subu   v0, t1, t0                   # v0<- (-1,1,0)
454    bnez   v0, .LTEMPLATE_CMP_LONG_finish
455                                        # at this point x.hi==y.hi
456    sltu   t0, rARG0, rARG2             # compare lo
457    sgtu   t1, rARG0, rARG2
458    subu   v0, t1, t0                   # v0<- (-1,1,0) for [< > =]
459.LTEMPLATE_CMP_LONG_finish:
460    RETURN
461
462/* ------------------------------ */
463    .balign 4
464    .global dvmCompiler_TEMPLATE_RETURN
465dvmCompiler_TEMPLATE_RETURN:
466/* File: mips/TEMPLATE_RETURN.S */
467    /*
468     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
469     * If the stored value in returnAddr
470     * is non-zero, the caller is compiled by the JIT thus return to the
471     * address in the code cache following the invoke instruction. Otherwise
472     * return to the special dvmJitToInterpNoChain entry point.
473     */
474#if defined(TEMPLATE_INLINE_PROFILING)
475    # preserve a0-a2 and ra
476    SCRATCH_STORE(a0, 0)
477    SCRATCH_STORE(a1, 4)
478    SCRATCH_STORE(a2, 8)
479    SCRATCH_STORE(ra, 12)
480
481    # a0=rSELF
482    move    a0, rSELF
483    la      t9, dvmFastMethodTraceExit
484    JALR(t9)
485    lw      gp, STACK_OFFSET_GP(sp)
486
487    # restore a0-a2 and ra
488    SCRATCH_LOAD(ra, 12)
489    SCRATCH_LOAD(a2, 8)
490    SCRATCH_LOAD(a1, 4)
491    SCRATCH_LOAD(a0, 0)
492#endif
493    SAVEAREA_FROM_FP(a0, rFP)           # a0<- saveArea (old)
494    lw      t0, offStackSaveArea_prevFrame(a0)     # t0<- saveArea->prevFrame
495    lbu     t1, offThread_breakFlags(rSELF)        # t1<- breakFlags
496    lw      rPC, offStackSaveArea_savedPc(a0)      # rPC<- saveArea->savedPc
497#if !defined(WITH_SELF_VERIFICATION)
498    lw      t2,  offStackSaveArea_returnAddr(a0)   # t2<- chaining cell ret
499#else
500    move    t2, zero                               # disable chaining
501#endif
502    lw      a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
503                                                   # a2<- method we're returning to
504#if !defined(WITH_SELF_VERIFICATION)
505    beq     a2, zero, 1f                           # bail to interpreter
506#else
507    bne     a2, zero, 2f
508    JALR(ra)                                       # punt to interpreter and compare state
509    # DOUG: assume this does not return ???
5102:
511#endif
512    la      t4, .LdvmJitToInterpNoChainNoProfile   # defined in footer.S
513    lw      a1, (t4)
514    move    rFP, t0                                # publish new FP
515    beq     a2, zero, 4f
516    lw      t0, offMethod_clazz(a2)                # t0<- method->clazz
5174:
518
519    sw      a2, offThread_method(rSELF)            # self->method = newSave->method
520    lw      a0, offClassObject_pDvmDex(t0)         # a0<- method->clazz->pDvmDex
521    sw      rFP, offThread_curFrame(rSELF)         # self->curFrame = fp
522    add     rPC, rPC, 3*2                          # publish new rPC
523    sw      a0, offThread_methodClassDex(rSELF)
524    movn    t2, zero, t1                           # check the breadFlags and
525                                                   # clear the chaining cell address
526    sw      t2, offThread_inJitCodeCache(rSELF)    # in code cache or not
527    beq     t2, zero, 3f                           # chaining cell exists?
528    JALR(t2)                                       # jump to the chaining cell
529    # DOUG: assume this does not return ???
5303:
531#if defined(WITH_JIT_TUNING)
532    li      a0, kCallsiteInterpreted
533#endif
534    j       a1                                     # callsite is interpreted
5351:
536    sw      zero, offThread_inJitCodeCache(rSELF)  # reset inJitCodeCache
537    SAVE_PC_TO_SELF()                              # SAVE_PC_FP_TO_SELF()
538    SAVE_FP_TO_SELF()
539    la      t4, .LdvmMterpStdBail                  # defined in footer.S
540    lw      a2, (t4)
541    move    a0, rSELF                              # Expecting rSELF in a0
542    JALR(a2)                                       # exit the interpreter
543    # DOUG: assume this does not return ???
544
545/* ------------------------------ */
546    .balign 4
547    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
548dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
549/* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
550    /*
551     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
552     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
553     * runtime-resolved callee.
554     */
555    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
556    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
557    lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
558    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
559    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
560    move   a3, a1                                 # a3<- returnCell
561    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
562    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
563    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
564    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
565    sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
566    sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
567    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
568    RETURN                                        # return to raise stack overflow excep.
569
5701:
571    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
572    lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
573    lw     t0, offMethod_accessFlags(a0)          # t0<- methodToCall->accessFlags
574    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
575    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
576    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
577
578    # set up newSaveArea
579    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
580    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
581    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
582    beqz   t8, 2f                                 # breakFlags != 0
583    RETURN                                        # bail to the interpreter
584
5852:
586    and    t6, t0, ACC_NATIVE
587    beqz   t6, 3f
588#if !defined(WITH_SELF_VERIFICATION)
589    j      .LinvokeNative
590#else
591    RETURN                                        # bail to the interpreter
592#endif
593
5943:
595    # continue executing the next instruction through the interpreter
596    la     t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
597    lw     rTEMP, (t0)
598    lw     a3, offClassObject_pDvmDex(t9)         # a3<- method->clazz->pDvmDex
599
600    # Update "thread" values for the new method
601    sw     a0, offThread_method(rSELF)            # self->method = methodToCall
602    sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
603    move   rFP, a1                                # fp = newFp
604    sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
605#if defined(TEMPLATE_INLINE_PROFILING)
606    # preserve rTEMP,a1-a3
607    SCRATCH_STORE(rTEMP, 0)
608    SCRATCH_STORE(a1, 4)
609    SCRATCH_STORE(a2, 8)
610    SCRATCH_STORE(a3, 12)
611
612    # a0=methodToCall, a1=rSELF
613    move   a1, rSELF
614    la     t9, dvmFastMethodTraceEnter
615    JALR(t9)
616    lw     gp, STACK_OFFSET_GP(sp)
617
618    # restore rTEMP,a1-a3
619    SCRATCH_LOAD(a3, 12)
620    SCRATCH_LOAD(a2, 8)
621    SCRATCH_LOAD(a1, 4)
622    SCRATCH_LOAD(rTEMP, 0)
623#endif
624
625    # Start executing the callee
626#if defined(WITH_JIT_TUNING)
627    li     a0, kInlineCacheMiss
628#endif
629    jr     rTEMP                                  # dvmJitToInterpTraceSelectNoChain
630
631/* ------------------------------ */
632    .balign 4
633    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
634dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
635/* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
636    /*
637     * For monomorphic callsite, setup the Dalvik frame and return to the
638     * Thumb code through the link register to transfer control to the callee
639     * method through a dedicated chaining cell.
640     */
641    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
642    # methodToCall is guaranteed to be non-native
643.LinvokeChain:
644    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
645    lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
646    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
647    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
648    move   a3, a1                                 # a3<- returnCell
649    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
650    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
651    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
652    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
653    add    t2, ra, 8                              # setup the punt-to-interp address
654                                                  # 8 bytes skips branch and delay slot
655    sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
656    sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
657    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
658    jr     t2                                     # return to raise stack overflow excep.
659
6601:
661    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
662    lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
663    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
664    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
665    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
666
667    # set up newSaveArea
668    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
669    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
670    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
671    beqz   t8, 2f                                 # breakFlags != 0
672    jr     t2                                     # bail to the interpreter
673
6742:
675    lw     a3, offClassObject_pDvmDex(t9)         # a3<- methodToCall->clazz->pDvmDex
676
677    # Update "thread" values for the new method
678    sw     a0, offThread_method(rSELF)            # self->method = methodToCall
679    sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
680    move   rFP, a1                                # fp = newFp
681    sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
682#if defined(TEMPLATE_INLINE_PROFILING)
683    # preserve a0-a2 and ra
684    SCRATCH_STORE(a0, 0)
685    SCRATCH_STORE(a1, 4)
686    SCRATCH_STORE(a2, 8)
687    SCRATCH_STORE(ra, 12)
688
689    move   a1, rSELF
690    # a0=methodToCall, a1=rSELF
691    la     t9, dvmFastMethodTraceEnter
692    jalr   t9
693    lw     gp, STACK_OFFSET_GP(sp)
694
695    # restore a0-a2 and ra
696    SCRATCH_LOAD(ra, 12)
697    SCRATCH_LOAD(a2, 8)
698    SCRATCH_LOAD(a1, 4)
699    SCRATCH_LOAD(a0, 0)
700#endif
701    RETURN                                        # return to the callee-chaining cell
702
703/* ------------------------------ */
704    .balign 4
705    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
706dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
707/* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
708    /*
709     * For polymorphic callsite, check whether the cached class pointer matches
710     * the current one. If so setup the Dalvik frame and return to the
711     * Thumb code through the link register to transfer control to the callee
712     * method through a dedicated chaining cell.
713     *
714     * The predicted chaining cell is declared in ArmLIR.h with the
715     * following layout:
716     *
717     *  typedef struct PredictedChainingCell {
718     *      u4 branch;
719     *      u4 delay_slot;
720     *      const ClassObject *clazz;
721     *      const Method *method;
722     *      u4 counter;
723     *  } PredictedChainingCell;
724     *
725     * Upon returning to the callsite:
726     *    - lr   : to branch to the chaining cell
727     *    - lr+8 : to punt to the interpreter
728     *    - lr+16: to fully resolve the callee and may rechain.
729     *             a3 <- class
730     */
731    # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
732    lw      a3, offObject_clazz(a0)     # a3 <- this->class
733    lw      rIBASE, 8(a2)                   # t0 <- predictedChainCell->clazz
734    lw      a0, 12(a2)                  # a0 <- predictedChainCell->method
735    lw      t1, offThread_icRechainCount(rSELF)    # t1 <- shared rechainCount
736
737#if defined(WITH_JIT_TUNING)
738    la      rINST, .LdvmICHitCount
739    #add     t2, t2, 1
740    bne    a3, rIBASE, 1f
741    nop
742    lw      t2, 0(rINST)
743    add     t2, t2, 1
744    sw      t2, 0(rINST)
7451:
746    #add     t2, t2, 1
747#endif
748    beq     a3, rIBASE, .LinvokeChain       # branch if predicted chain is valid
749    lw      rINST, offClassObject_vtable(a3)     # rINST <- this->class->vtable
750    beqz    rIBASE, 2f                      # initialized class or not
751    sub     a1, t1, 1                   # count--
752    sw      a1, offThread_icRechainCount(rSELF)   # write back to InterpState
753    b       3f
7542:
755    move    a1, zero
7563:
757    add     ra, ra, 16                  # return to fully-resolve landing pad
758    /*
759     * a1 <- count
760     * a2 <- &predictedChainCell
761     * a3 <- this->class
762     * rPC <- dPC
763     * rINST <- this->class->vtable
764     */
765    RETURN
766
767/* ------------------------------ */
768    .balign 4
769    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
770dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
771/* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
772    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
773    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
774    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
775    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
776    move   a3, a1                                 # a3<- returnCell
777    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
778    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
779    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
780    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
781    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
782    RETURN                                        # return to raise stack overflow excep.
783
7841:
785    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
786    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
787    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
788    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
789
790    # set up newSaveArea
791    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
792    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
793    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
794    lw     rTEMP, offMethod_nativeFunc(a0)        # t9<- method->nativeFunc
795#if !defined(WITH_SELF_VERIFICATION)
796    beqz   t8, 2f                                 # breakFlags != 0
797    RETURN                                        # bail to the interpreter
7982:
799#else
800    RETURN                                        # bail to the interpreter unconditionally
801#endif
802
803    # go ahead and transfer control to the native code
804    lw     t6, offThread_jniLocal_topCookie(rSELF)  # t6<- thread->localRef->...
805    sw     a1, offThread_curFrame(rSELF)          # self->curFrame = newFp
806    sw     zero, offThread_inJitCodeCache(rSELF)  # not in the jit code cache
807    sw     t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
808                                                  # newFp->localRefCookie=top
809    SAVEAREA_FROM_FP(rBIX, a1)                    # rBIX<- new stack save area
810    move   a2, a0                                 # a2<- methodToCall
811    move   a0, a1                                 # a0<- newFp
812    add    a1, rSELF, offThread_retval            # a1<- &retval
813    move   a3, rSELF                              # a3<- self
814#if defined(TEMPLATE_INLINE_PROFILING)
815    # a2: methodToCall
816    # preserve rTEMP,a0-a3
817    SCRATCH_STORE(a0, 0)
818    SCRATCH_STORE(a1, 4)
819    SCRATCH_STORE(a2, 8)
820    SCRATCH_STORE(a3, 12)
821    SCRATCH_STORE(rTEMP, 16)
822
823    move   a0, a2
824    move   a1, rSELF
825    # a0=JNIMethod, a1=rSELF
826    la      t9, dvmFastMethodTraceEnter
827    JALR(t9)                                      # off to the native code
828    lw     gp, STACK_OFFSET_GP(sp)
829
830    # restore rTEMP,a0-a3
831    SCRATCH_LOAD(rTEMP, 16)
832    SCRATCH_LOAD(a3, 12)
833    SCRATCH_LOAD(a2, 8)
834    SCRATCH_LOAD(a1, 4)
835    SCRATCH_LOAD(a0, 0)
836
837    move   rOBJ, a2                               # save a2
838#endif
839    move   t9, rTEMP
840    JALR(t9)                                   # off to the native code
841    lw     gp, STACK_OFFSET_GP(sp)
842
843#if defined(TEMPLATE_INLINE_PROFILING)
844    move   a0, rOBJ
845    move   a1, rSELF
846    # a0=JNIMethod, a1=rSELF
847    la      t9, dvmFastNativeMethodTraceExit
848    JALR(t9)
849    lw     gp, STACK_OFFSET_GP(sp)
850#endif
851
852    # native return; rBIX=newSaveArea
853    # equivalent to dvmPopJniLocals
854    lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
855    lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
856    lw     a1, offThread_exception(rSELF)            # check for exception
857    sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
858    sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
859    lw     a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
860
861    # a0 = dalvikCallsitePC
862    bnez   a1, .LhandleException                     # handle exception if any
863
864    sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
865    beqz   a2, 3f
866    jr     a2                                        # go if return chaining cell still exist
867
8683:
869    # continue executing the next instruction through the interpreter
870    la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
871    lw     a1, (a1)
872    add    rPC, a0, 3*2                              # reconstruct new rPC (advance 3 dalvik instr)
873
874#if defined(WITH_JIT_TUNING)
875    li     a0, kCallsiteInterpreted
876#endif
877    jr     a1
878
879/* ------------------------------ */
880    .balign 4
881    .global dvmCompiler_TEMPLATE_MUL_LONG
882dvmCompiler_TEMPLATE_MUL_LONG:
883/* File: mips/TEMPLATE_MUL_LONG.S */
884    /*
885     * Signed 64-bit integer multiply.
886     *
887     * For JIT: op1 in a0/a1, op2 in a2/a3, return in v0/v1
888     *
889     * Consider WXxYZ (a1a0 x a3a2) with a long multiply:
890     *
891     *         a1   a0
892     *   x     a3   a2
893     *   -------------
894     *       a2a1 a2a0
895     *       a3a0
896     *  a3a1 (<= unused)
897     *  ---------------
898     *         v1   v0
899     *
900     */
901    /* mul-long vAA, vBB, vCC */
902    mul     rRESULT1,rARG3,rARG0              #  v1= a3a0
903    multu   rARG2,rARG0
904    mfhi    t1
905    mflo    rRESULT0                          #  v0= a2a0
906    mul     t0,rARG2,rARG1                    #  t0= a2a1
907    addu    rRESULT1,rRESULT1,t1              #  v1= a3a0 + hi(a2a0)
908    addu    rRESULT1,rRESULT1,t0              #  v1= a3a0 + hi(a2a0) + a2a1;
909    RETURN
910
911/* ------------------------------ */
912    .balign 4
913    .global dvmCompiler_TEMPLATE_SHL_LONG
914dvmCompiler_TEMPLATE_SHL_LONG:
915/* File: mips/TEMPLATE_SHL_LONG.S */
916    /*
917     * Long integer shift.  This is different from the generic 32/64-bit
918     * binary operations because vAA/vBB are 64-bit but vCC (the shift
919     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
920     * 6 bits.
921     */
922    /* shl-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
923    sll     rRESULT0, rARG0, a2		#  rlo<- alo << (shift&31)
924    not     rRESULT1, a2		#  rhi<- 31-shift  (shift is 5b)
925    srl     rARG0, 1
926    srl     rARG0, rRESULT1		#  alo<- alo >> (32-(shift&31))
927    sll     rRESULT1, rARG1, a2		#  rhi<- ahi << (shift&31)
928    or      rRESULT1, rARG0		#  rhi<- rhi | alo
929    andi    a2, 0x20			#  shift< shift & 0x20
930    movn    rRESULT1, rRESULT0, a2	#  rhi<- rlo (if shift&0x20)
931    movn    rRESULT0, zero, a2		#  rlo<- 0  (if shift&0x20)
932    RETURN
933
934/* ------------------------------ */
935    .balign 4
936    .global dvmCompiler_TEMPLATE_SHR_LONG
937dvmCompiler_TEMPLATE_SHR_LONG:
938/* File: mips/TEMPLATE_SHR_LONG.S */
939    /*
940     * Long integer shift.  This is different from the generic 32/64-bit
941     * binary operations because vAA/vBB are 64-bit but vCC (the shift
942     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
943     * 6 bits.
944     */
945    /* shr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
946    sra     rRESULT1, rARG1, a2		#  rhi<- ahi >> (shift&31)
947    srl     rRESULT0, rARG0, a2		#  rlo<- alo >> (shift&31)
948    sra     a3, rARG1, 31		#  a3<- sign(ah)
949    not     rARG0, a2			#  alo<- 31-shift (shift is 5b)
950    sll     rARG1, 1
951    sll     rARG1, rARG0		#  ahi<- ahi << (32-(shift&31))
952    or      rRESULT0, rARG1		#  rlo<- rlo | ahi
953    andi    a2, 0x20			#  shift & 0x20
954    movn    rRESULT0, rRESULT1, a2	#  rlo<- rhi (if shift&0x20)
955    movn    rRESULT1, a3, a2		#  rhi<- sign(ahi) (if shift&0x20)
956    RETURN
957
958/* ------------------------------ */
959    .balign 4
960    .global dvmCompiler_TEMPLATE_USHR_LONG
961dvmCompiler_TEMPLATE_USHR_LONG:
962/* File: mips/TEMPLATE_USHR_LONG.S */
963    /*
964     * Long integer shift.  This is different from the generic 32/64-bit
965     * binary operations because vAA/vBB are 64-bit but vCC (the shift
966     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
967     * 6 bits.
968     */
969    /* ushr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
970    srl     rRESULT1, rARG1, a2		#  rhi<- ahi >> (shift&31)
971    srl     rRESULT0, rARG0, a2		#  rlo<- alo >> (shift&31)
972    not     rARG0, a2			#  alo<- 31-n  (shift is 5b)
973    sll     rARG1, 1
974    sll     rARG1, rARG0		#  ahi<- ahi << (32-(shift&31))
975    or      rRESULT0, rARG1		#  rlo<- rlo | ahi
976    andi    a2, 0x20			#  shift & 0x20
977    movn    rRESULT0, rRESULT1, a2	#  rlo<- rhi (if shift&0x20)
978    movn    rRESULT1, zero, a2		#  rhi<- 0 (if shift&0x20)
979    RETURN
980
981/* ------------------------------ */
982    .balign 4
983    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
984dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
985/* File: mips/TEMPLATE_ADD_FLOAT_VFP.S */
986/* File: mips/fbinop.S */
987    /*
988     * Generic 32-bit binary float operation. a0 = a1 op a2.
989     *
990     * For: add-fp, sub-fp, mul-fp, div-fp
991     *
992     * On entry:
993     *     a0 = target dalvik register address
994     *     a1 = op1 address
995     *     a2 = op2 address
996     *
997     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
998     *
999     */
1000    move rOBJ, a0                       # save a0
1001#ifdef  SOFT_FLOAT
1002    LOAD(a0, a1)                        # a0<- vBB
1003    LOAD(a1, a2)                        # a1<- vCC
1004    .if 0
1005    beqz    a1, common_errDivideByZero  # is second operand zero?
1006    .endif
1007                               # optional op
1008    JAL(__addsf3)                              # v0 = result
1009    STORE(v0, rOBJ)                     # vAA <- v0
1010#else
1011    LOAD_F(fa0, a1)                     # fa0<- vBB
1012    LOAD_F(fa1, a2)                     # fa1<- vCC
1013    .if 0
1014    # is second operand zero?
1015    li.s        ft0, 0
1016    c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
1017    bc1t        fcc0, common_errDivideByZero
1018    .endif
1019                               # optional op
1020    add.s fv0, fa0, fa1                            # fv0 = result
1021    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1022#endif
1023    RETURN
1024
1025
1026/* ------------------------------ */
1027    .balign 4
1028    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
1029dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
1030/* File: mips/TEMPLATE_SUB_FLOAT_VFP.S */
1031/* File: mips/fbinop.S */
1032    /*
1033     * Generic 32-bit binary float operation. a0 = a1 op a2.
1034     *
1035     * For: add-fp, sub-fp, mul-fp, div-fp
1036     *
1037     * On entry:
1038     *     a0 = target dalvik register address
1039     *     a1 = op1 address
1040     *     a2 = op2 address
1041     *
1042     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1043     *
1044     */
1045    move rOBJ, a0                       # save a0
1046#ifdef  SOFT_FLOAT
1047    LOAD(a0, a1)                        # a0<- vBB
1048    LOAD(a1, a2)                        # a1<- vCC
1049    .if 0
1050    beqz    a1, common_errDivideByZero  # is second operand zero?
1051    .endif
1052                               # optional op
1053    JAL(__subsf3)                              # v0 = result
1054    STORE(v0, rOBJ)                     # vAA <- v0
1055#else
1056    LOAD_F(fa0, a1)                     # fa0<- vBB
1057    LOAD_F(fa1, a2)                     # fa1<- vCC
1058    .if 0
1059    # is second operand zero?
1060    li.s        ft0, 0
1061    c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
1062    bc1t        fcc0, common_errDivideByZero
1063    .endif
1064                               # optional op
1065    sub.s fv0, fa0, fa1                            # fv0 = result
1066    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1067#endif
1068    RETURN
1069
1070
1071/* ------------------------------ */
1072    .balign 4
1073    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
1074dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
1075/* File: mips/TEMPLATE_MUL_FLOAT_VFP.S */
1076/* File: mips/fbinop.S */
1077    /*
1078     * Generic 32-bit binary float operation. a0 = a1 op a2.
1079     *
1080     * For: add-fp, sub-fp, mul-fp, div-fp
1081     *
1082     * On entry:
1083     *     a0 = target dalvik register address
1084     *     a1 = op1 address
1085     *     a2 = op2 address
1086     *
1087     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1088     *
1089     */
1090    move rOBJ, a0                       # save a0
1091#ifdef  SOFT_FLOAT
1092    LOAD(a0, a1)                        # a0<- vBB
1093    LOAD(a1, a2)                        # a1<- vCC
1094    .if 0
1095    beqz    a1, common_errDivideByZero  # is second operand zero?
1096    .endif
1097                               # optional op
1098    JAL(__mulsf3)                              # v0 = result
1099    STORE(v0, rOBJ)                     # vAA <- v0
1100#else
1101    LOAD_F(fa0, a1)                     # fa0<- vBB
1102    LOAD_F(fa1, a2)                     # fa1<- vCC
1103    .if 0
1104    # is second operand zero?
1105    li.s        ft0, 0
1106    c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
1107    bc1t        fcc0, common_errDivideByZero
1108    .endif
1109                               # optional op
1110    mul.s fv0, fa0, fa1                            # fv0 = result
1111    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1112#endif
1113    RETURN
1114
1115
1116/* ------------------------------ */
1117    .balign 4
1118    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
1119dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
1120/* File: mips/TEMPLATE_DIV_FLOAT_VFP.S */
1121/* File: mips/fbinop.S */
1122    /*
1123     * Generic 32-bit binary float operation. a0 = a1 op a2.
1124     *
1125     * For: add-fp, sub-fp, mul-fp, div-fp
1126     *
1127     * On entry:
1128     *     a0 = target dalvik register address
1129     *     a1 = op1 address
1130     *     a2 = op2 address
1131     *
1132     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1133     *
1134     */
1135    move rOBJ, a0                       # save a0
1136#ifdef  SOFT_FLOAT
1137    LOAD(a0, a1)                        # a0<- vBB
1138    LOAD(a1, a2)                        # a1<- vCC
1139    .if 0
1140    beqz    a1, common_errDivideByZero  # is second operand zero?
1141    .endif
1142                               # optional op
1143    JAL(__divsf3)                              # v0 = result
1144    STORE(v0, rOBJ)                     # vAA <- v0
1145#else
1146    LOAD_F(fa0, a1)                     # fa0<- vBB
1147    LOAD_F(fa1, a2)                     # fa1<- vCC
1148    .if 0
1149    # is second operand zero?
1150    li.s        ft0, 0
1151    c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
1152    bc1t        fcc0, common_errDivideByZero
1153    .endif
1154                               # optional op
1155    div.s fv0, fa0, fa1                            # fv0 = result
1156    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1157#endif
1158    RETURN
1159
1160
1161/* ------------------------------ */
1162    .balign 4
1163    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
1164dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
1165/* File: mips/TEMPLATE_ADD_DOUBLE_VFP.S */
1166/* File: mips/fbinopWide.S */
1167    /*
1168     * Generic 64-bit binary operation.  Provide an "instr" line that
1169     * specifies an instruction that performs "result = a0-a1 op a2-a3".
1170     * This could be an MIPS instruction or a function call.
1171     * If "chkzero" is set to 1, we perform a divide-by-zero check on
1172     * vCC (a1).  Useful for integer division and modulus.
1173     *
1174     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
1175     *      xor-long, add-double, sub-double, mul-double, div-double,
1176     *      rem-double
1177     *
1178     * On entry:
1179     *     a0 = target dalvik register address
1180     *     a1 = op1 address
1181     *     a2 = op2 address
1182     *
1183     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1184     */
1185    move rOBJ, a0                       # save a0
1186#ifdef  SOFT_FLOAT
1187    move t0, a1                         # save a1
1188    move t1, a2                         # save a2
1189    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
1190    LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
1191    .if 0
1192    or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
1193    beqz        t0, common_errDivideByZero
1194    .endif
1195                               # optional op
1196    JAL(__adddf3)                              # result<- op, a0-a3 changed
1197    STORE64(rRESULT0, rRESULT1, rOBJ)
1198#else
1199    LOAD64_F(fa0, fa0f, a1)
1200    LOAD64_F(fa1, fa1f, a2)
1201    .if 0
1202    li.d        ft0, 0
1203    c.eq.d      fcc0, fa1, ft0
1204    bc1t        fcc0, common_errDivideByZero
1205    .endif
1206                               # optional op
1207    add.d fv0, fa0, fa1
1208    STORE64_F(fv0, fv0f, rOBJ)
1209#endif
1210    RETURN
1211
1212
1213/* ------------------------------ */
1214    .balign 4
1215    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
1216dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
1217/* File: mips/TEMPLATE_SUB_DOUBLE_VFP.S */
1218/* File: mips/fbinopWide.S */
1219    /*
1220     * Generic 64-bit binary operation.  Provide an "instr" line that
1221     * specifies an instruction that performs "result = a0-a1 op a2-a3".
1222     * This could be an MIPS instruction or a function call.
1223     * If "chkzero" is set to 1, we perform a divide-by-zero check on
1224     * vCC (a1).  Useful for integer division and modulus.
1225     *
1226     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
1227     *      xor-long, add-double, sub-double, mul-double, div-double,
1228     *      rem-double
1229     *
1230     * On entry:
1231     *     a0 = target dalvik register address
1232     *     a1 = op1 address
1233     *     a2 = op2 address
1234     *
1235     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1236     */
1237    move rOBJ, a0                       # save a0
1238#ifdef  SOFT_FLOAT
1239    move t0, a1                         # save a1
1240    move t1, a2                         # save a2
1241    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
1242    LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
1243    .if 0
1244    or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
1245    beqz        t0, common_errDivideByZero
1246    .endif
1247                               # optional op
1248    JAL(__subdf3)                              # result<- op, a0-a3 changed
1249    STORE64(rRESULT0, rRESULT1, rOBJ)
1250#else
1251    LOAD64_F(fa0, fa0f, a1)
1252    LOAD64_F(fa1, fa1f, a2)
1253    .if 0
1254    li.d        ft0, 0
1255    c.eq.d      fcc0, fa1, ft0
1256    bc1t        fcc0, common_errDivideByZero
1257    .endif
1258                               # optional op
1259    sub.d fv0, fa0, fa1
1260    STORE64_F(fv0, fv0f, rOBJ)
1261#endif
1262    RETURN
1263
1264
1265/* ------------------------------ */
1266    .balign 4
1267    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
1268dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
1269/* File: mips/TEMPLATE_MUL_DOUBLE_VFP.S */
1270/* File: mips/fbinopWide.S */
1271    /*
1272     * Generic 64-bit binary operation.  Provide an "instr" line that
1273     * specifies an instruction that performs "result = a0-a1 op a2-a3".
1274     * This could be an MIPS instruction or a function call.
1275     * If "chkzero" is set to 1, we perform a divide-by-zero check on
1276     * vCC (a1).  Useful for integer division and modulus.
1277     *
1278     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
1279     *      xor-long, add-double, sub-double, mul-double, div-double,
1280     *      rem-double
1281     *
1282     * On entry:
1283     *     a0 = target dalvik register address
1284     *     a1 = op1 address
1285     *     a2 = op2 address
1286     *
1287     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1288     */
1289    move rOBJ, a0                       # save a0
1290#ifdef  SOFT_FLOAT
1291    move t0, a1                         # save a1
1292    move t1, a2                         # save a2
1293    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
1294    LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
1295    .if 0
1296    or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
1297    beqz        t0, common_errDivideByZero
1298    .endif
1299                               # optional op
1300    JAL(__muldf3)                              # result<- op, a0-a3 changed
1301    STORE64(rRESULT0, rRESULT1, rOBJ)
1302#else
1303    LOAD64_F(fa0, fa0f, a1)
1304    LOAD64_F(fa1, fa1f, a2)
1305    .if 0
1306    li.d        ft0, 0
1307    c.eq.d      fcc0, fa1, ft0
1308    bc1t        fcc0, common_errDivideByZero
1309    .endif
1310                               # optional op
1311    mul.d fv0, fa0, fa1
1312    STORE64_F(fv0, fv0f, rOBJ)
1313#endif
1314    RETURN
1315
1316
1317/* ------------------------------ */
1318    .balign 4
1319    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
1320dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
1321/* File: mips/TEMPLATE_DIV_DOUBLE_VFP.S */
1322/* File: mips/fbinopWide.S */
1323    /*
1324     * Generic 64-bit binary operation.  Provide an "instr" line that
1325     * specifies an instruction that performs "result = a0-a1 op a2-a3".
1326     * This could be an MIPS instruction or a function call.
1327     * If "chkzero" is set to 1, we perform a divide-by-zero check on
1328     * vCC (a1).  Useful for integer division and modulus.
1329     *
1330     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
1331     *      xor-long, add-double, sub-double, mul-double, div-double,
1332     *      rem-double
1333     *
1334     * On entry:
1335     *     a0 = target dalvik register address
1336     *     a1 = op1 address
1337     *     a2 = op2 address
1338     *
1339     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1340     */
1341    move rOBJ, a0                       # save a0
1342#ifdef  SOFT_FLOAT
1343    move t0, a1                         # save a1
1344    move t1, a2                         # save a2
1345    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
1346    LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
1347    .if 0
1348    or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
1349    beqz        t0, common_errDivideByZero
1350    .endif
1351                               # optional op
1352    JAL(__divdf3)                              # result<- op, a0-a3 changed
1353    STORE64(rRESULT0, rRESULT1, rOBJ)
1354#else
1355    LOAD64_F(fa0, fa0f, a1)
1356    LOAD64_F(fa1, fa1f, a2)
1357    .if 0
1358    li.d        ft0, 0
1359    c.eq.d      fcc0, fa1, ft0
1360    bc1t        fcc0, common_errDivideByZero
1361    .endif
1362                               # optional op
1363    div.d fv0, fa0, fa1
1364    STORE64_F(fv0, fv0f, rOBJ)
1365#endif
1366    RETURN
1367
1368
1369/* ------------------------------ */
1370    .balign 4
1371    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
1372dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
1373/* File: mips/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
1374/* File: mips/funopNarrower.S */
1375    /*
1376     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
1377     * that specifies an instruction that performs "result = op a0/a1", where
1378     * "result" is a 32-bit quantity in a0.
1379     *
1380     * For: long-to-float, double-to-int, double-to-float
1381     * If hard floating point support is available, use fa0 as the parameter, except for
1382     * long-to-float opcode.
1383     * (This would work for long-to-int, but that instruction is actually
1384     * an exact match for OP_MOVE.)
1385     *
1386     * On entry:
1387     *     a0 = target dalvik register address
1388     *     a1 = src dalvik register address
1389     *
1390     */
1391    move rINST, a0                      # save a0
1392#ifdef  SOFT_FLOAT
1393    move t0, a1                         # save a1
1394    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vB/vB+1
1395                               # optional op
1396    JAL(__truncdfsf2)                              # v0<- op, a0-a3 changed
1397.LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg:
1398    STORE(v0, rINST)                    # vA<- v0
1399#else
1400    LOAD64_F(fa0, fa0f, a1)
1401                               # optional op
1402    cvt.s.d  fv0,fa0                            # fv0 = result
1403.LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg_f:
1404    STORE_F(fv0, rINST)                 # vA<- fv0
1405#endif
1406    RETURN
1407
1408
1409/* ------------------------------ */
1410    .balign 4
1411    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
1412dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
1413/* File: mips/TEMPLATE_DOUBLE_TO_INT_VFP.S */
1414/* File: mips/funopNarrower.S */
1415    /*
1416     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
1417     * that specifies an instruction that performs "result = op a0/a1", where
1418     * "result" is a 32-bit quantity in a0.
1419     *
1420     * For: long-to-float, double-to-int, double-to-float
1421     * If hard floating point support is available, use fa0 as the parameter, except for
1422     * long-to-float opcode.
1423     * (This would work for long-to-int, but that instruction is actually
1424     * an exact match for OP_MOVE.)
1425     *
1426     * On entry:
1427     *     a0 = target dalvik register address
1428     *     a1 = src dalvik register address
1429     *
1430     */
1431    move rINST, a0                      # save a0
1432#ifdef  SOFT_FLOAT
1433    move t0, a1                         # save a1
1434    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vB/vB+1
1435                               # optional op
1436    b    d2i_doconv                              # v0<- op, a0-a3 changed
1437.LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg:
1438    STORE(v0, rINST)                    # vA<- v0
1439#else
1440    LOAD64_F(fa0, fa0f, a1)
1441                               # optional op
1442    b    d2i_doconv                            # fv0 = result
1443.LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f:
1444    STORE_F(fv0, rINST)                 # vA<- fv0
1445#endif
1446    RETURN
1447
1448
1449/*
1450 * Convert the double in a0/a1 to an int in a0.
1451 *
1452 * We have to clip values to int min/max per the specification.  The
1453 * expected common case is a "reasonable" value that converts directly
1454 * to modest integer.  The EABI convert function isn't doing this for us.
1455 * Use rBIX / rOBJ as global to hold arguments (they are not bound to a global var)
1456 */
1457
1458d2i_doconv:
1459#ifdef SOFT_FLOAT
1460    la          t0, .LDOUBLE_TO_INT_max
1461    LOAD64(rARG2, rARG3, t0)
1462    move        rBIX, rARG0                       # save a0
1463    move        rOBJ, rARG1                       #  and a1
1464    JAL(__gedf2)                               # is arg >= maxint?
1465
1466    move        t0, v0
1467    li          v0, ~0x80000000                # return maxint (7fffffff)
1468    bgez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg       # nonzero == yes
1469
1470    move        rARG0, rBIX                       # recover arg
1471    move        rARG1, rOBJ
1472    la          t0, .LDOUBLE_TO_INT_min
1473    LOAD64(rARG2, rARG3, t0)
1474    JAL(__ledf2)                               # is arg <= minint?
1475
1476    move        t0, v0
1477    li          v0, 0x80000000                 # return minint (80000000)
1478    blez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg       # nonzero == yes
1479
1480    move        rARG0, rBIX                  # recover arg
1481    move        rARG1, rOBJ
1482    move        rARG2, rBIX                  # compare against self
1483    move        rARG3, rOBJ
1484    JAL(__nedf2)                        # is arg == self?
1485
1486    move        t0, v0                  # zero == no
1487    li          v0, 0
1488    bnez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg        # return zero for NaN
1489
1490    move        rARG0, rBIX                  # recover arg
1491    move        rARG1, rOBJ
1492    JAL(__fixdfsi)                      # convert double to int
1493    b           .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg
1494#else
1495    la          t0, .LDOUBLE_TO_INT_max
1496    LOAD64_F(fa1, fa1f, t0)
1497    c.ole.d     fcc0, fa1, fa0
1498    l.s         fv0, .LDOUBLE_TO_INT_maxret
1499    bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
1500
1501    la          t0, .LDOUBLE_TO_INT_min
1502    LOAD64_F(fa1, fa1f, t0)
1503    c.ole.d     fcc0, fa0, fa1
1504    l.s         fv0, .LDOUBLE_TO_INT_minret
1505    bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
1506
1507    mov.d       fa1, fa0
1508    c.un.d      fcc0, fa0, fa1
1509    li.s        fv0, 0
1510    bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
1511
1512    trunc.w.d   fv0, fa0
1513    b           .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
1514#endif
1515
1516
1517.LDOUBLE_TO_INT_max:
1518    .dword   0x41dfffffffc00000
1519.LDOUBLE_TO_INT_min:
1520    .dword   0xc1e0000000000000                  # minint, as a double (high word)
1521.LDOUBLE_TO_INT_maxret:
1522    .word   0x7fffffff
1523.LDOUBLE_TO_INT_minret:
1524    .word   0x80000000
1525
1526/* ------------------------------ */
1527    .balign 4
1528    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
1529dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
1530/* File: mips/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
1531/* File: mips/funopWider.S */
1532    /*
1533     * Generic 32bit-to-64bit floating point unary operation.  Provide an
1534     * "instr" line that specifies an instruction that performs "d0 = op s0".
1535     *
1536     * For: int-to-double, float-to-double
1537     *
1538     * On entry:
1539     *     a0 = target dalvik register address
1540     *     a1 = src dalvik register address
1541     */
1542    /* unop vA, vB */
1543    move rOBJ, a0                       # save a0
1544#ifdef  SOFT_FLOAT
1545    LOAD(a0, a1)                        # a0<- vB
1546                               # optional op
1547    JAL(__extendsfdf2)                              # result<- op, a0-a3 changed
1548
1549.LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
1550    STORE64(rRESULT0, rRESULT1, rOBJ)   # vA/vA+1<- v0/v1
1551#else
1552    LOAD_F(fa0, a1)                     # fa0<- vB
1553                               # optional op
1554    cvt.d.s fv0, fa0
1555
1556.LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
1557    STORE64_F(fv0, fv0f, rOBJ)                          # vA/vA+1<- fv0/fv0f
1558#endif
1559    RETURN
1560
1561
1562/* ------------------------------ */
1563    .balign 4
1564    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
1565dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
1566/* File: mips/TEMPLATE_FLOAT_TO_INT_VFP.S */
1567/* File: mips/funop.S */
1568    /*
1569     * Generic 32-bit unary operation.  Provide an "instr" line that
1570     * specifies an instruction that performs "result = op a0".
1571     * This could be a MIPS instruction or a function call.
1572     *
1573     * for: int-to-float, float-to-int
1574     *
1575     * On entry:
1576     *     a0 = target dalvik register address
1577     *     a1 = src dalvik register address
1578     *
1579     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1580     *
1581     */
1582    move rOBJ, a0                       # save a0
1583#ifdef SOFT_FLOAT
1584    LOAD(a0, a1)                        # a0<- vBB
1585                               # optional op
1586    b    f2i_doconv                              # v0<- op, a0-a3 changed
1587.LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg:
1588    STORE(v0, rOBJ)                     # vAA<- v0
1589#else
1590    LOAD_F(fa0, a1)                     # fa0<- vBB
1591                               # optional op
1592    b        f2i_doconv                            # fv0 = result
1593.LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f:
1594    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1595#endif
1596    RETURN
1597
1598
1599/*
1600 * Not an entry point as it is used only once !!
1601 */
1602f2i_doconv:
1603#ifdef SOFT_FLOAT
1604        li      a1, 0x4f000000  # (float)maxint
1605        move    rBIX, a0
1606        JAL(__gesf2)            # is arg >= maxint?
1607        move    t0, v0
1608        li      v0, ~0x80000000 # return maxint (7fffffff)
1609        bgez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
1610
1611        move    a0, rBIX                # recover arg
1612        li      a1, 0xcf000000  # (float)minint
1613        JAL(__lesf2)
1614
1615        move    t0, v0
1616        li      v0, 0x80000000  # return minint (80000000)
1617        blez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
1618        move    a0, rBIX
1619        move    a1, rBIX
1620        JAL(__nesf2)
1621
1622        move    t0, v0
1623        li      v0, 0           # return zero for NaN
1624        bnez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
1625
1626        move    a0, rBIX
1627        JAL(__fixsfsi)
1628        b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
1629#else
1630        l.s             fa1, .LFLOAT_TO_INT_max
1631        c.ole.s         fcc0, fa1, fa0
1632        l.s             fv0, .LFLOAT_TO_INT_ret_max
1633        bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
1634
1635        l.s             fa1, .LFLOAT_TO_INT_min
1636        c.ole.s         fcc0, fa0, fa1
1637        l.s             fv0, .LFLOAT_TO_INT_ret_min
1638        bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
1639
1640        mov.s           fa1, fa0
1641        c.un.s          fcc0, fa0, fa1
1642        li.s            fv0, 0
1643        bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
1644
1645        trunc.w.s       fv0, fa0
1646        b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
1647#endif
1648
1649.LFLOAT_TO_INT_max:
1650        .word   0x4f000000
1651.LFLOAT_TO_INT_min:
1652        .word   0xcf000000
1653.LFLOAT_TO_INT_ret_max:
1654        .word   0x7fffffff
1655.LFLOAT_TO_INT_ret_min:
1656        .word   0x80000000
1657
1658
1659/* ------------------------------ */
1660    .balign 4
1661    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
1662dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
1663/* File: mips/TEMPLATE_INT_TO_DOUBLE_VFP.S */
1664/* File: mips/funopWider.S */
1665    /*
1666     * Generic 32bit-to-64bit floating point unary operation.  Provide an
1667     * "instr" line that specifies an instruction that performs "d0 = op s0".
1668     *
1669     * For: int-to-double, float-to-double
1670     *
1671     * On entry:
1672     *     a0 = target dalvik register address
1673     *     a1 = src dalvik register address
1674     */
1675    /* unop vA, vB */
1676    move rOBJ, a0                       # save a0
1677#ifdef  SOFT_FLOAT
1678    LOAD(a0, a1)                        # a0<- vB
1679                               # optional op
1680    JAL(__floatsidf)                              # result<- op, a0-a3 changed
1681
1682.LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
1683    STORE64(rRESULT0, rRESULT1, rOBJ)   # vA/vA+1<- v0/v1
1684#else
1685    LOAD_F(fa0, a1)                     # fa0<- vB
1686                               # optional op
1687    cvt.d.w    fv0, fa0
1688
1689.LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
1690    STORE64_F(fv0, fv0f, rOBJ)                          # vA/vA+1<- fv0/fv0f
1691#endif
1692    RETURN
1693
1694
1695/* ------------------------------ */
1696    .balign 4
1697    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
1698dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
1699/* File: mips/TEMPLATE_INT_TO_FLOAT_VFP.S */
1700/* File: mips/funop.S */
1701    /*
1702     * Generic 32-bit unary operation.  Provide an "instr" line that
1703     * specifies an instruction that performs "result = op a0".
1704     * This could be a MIPS instruction or a function call.
1705     *
1706     * for: int-to-float, float-to-int
1707     *
1708     * On entry:
1709     *     a0 = target dalvik register address
1710     *     a1 = src dalvik register address
1711     *
1712     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1713     *
1714     */
1715    move rOBJ, a0                       # save a0
1716#ifdef SOFT_FLOAT
1717    LOAD(a0, a1)                        # a0<- vBB
1718                               # optional op
1719    JAL(__floatsisf)                              # v0<- op, a0-a3 changed
1720.LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg:
1721    STORE(v0, rOBJ)                     # vAA<- v0
1722#else
1723    LOAD_F(fa0, a1)                     # fa0<- vBB
1724                               # optional op
1725    cvt.s.w fv0, fa0                            # fv0 = result
1726.LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg_f:
1727    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1728#endif
1729    RETURN
1730
1731
1732/* ------------------------------ */
1733    .balign 4
1734    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
1735dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
1736/* File: mips/TEMPLATE_CMPG_DOUBLE_VFP.S */
1737/* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
1738    /*
1739     * Compare two double precision floating-point values.  Puts 0, 1, or -1 into the
1740     * destination register based on the results of the comparison.
1741     *
1742     * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
1743     * on what value we'd like to return when one of the operands is NaN.
1744     *
1745     * The operation we're implementing is:
1746     *   if (x == y)
1747     *     return 0;
1748     *   else if (x < y)
1749     *     return -1;
1750     *   else if (x > y)
1751     *     return 1;
1752     *   else
1753     *     return {-1,1};  // one or both operands was NaN
1754     *
1755     * On entry:
1756     *    a0 = &op1 [vBB]
1757     *    a1 = &op2 [vCC]
1758     *
1759     * for: cmpl-double, cmpg-double
1760     */
1761    /* op vAA, vBB, vCC */
1762
1763    /* "clasic" form */
1764#ifdef  SOFT_FLOAT
1765    move rOBJ, a0                       # save a0
1766    move rBIX, a1                       # save a1
1767    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1768    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1769    JAL(__eqdf2)                        # v0<- (vBB == vCC)
1770    li       rTEMP, 0                   # vAA<- 0
1771    beqz     v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1772    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1773    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1774    JAL(__ltdf2)                        # a0<- (vBB < vCC)
1775    li       rTEMP, -1                  # vAA<- -1
1776    bltz     v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1777    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1778    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1779    JAL(__gtdf2)                        # v0<- (vBB > vCC)
1780    li      rTEMP, 1                    # vAA<- 1
1781    bgtz    v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1782#else
1783    LOAD64_F(fs0, fs0f, a0)             # fs0<- vBB
1784    LOAD64_F(fs1, fs1f, a1)             # fs1<- vCC
1785    c.olt.d     fcc0, fs0, fs1          # Is fs0 < fs1
1786    li          rTEMP, -1
1787    bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1788    c.olt.d     fcc0, fs1, fs0
1789    li          rTEMP, 1
1790    bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1791    c.eq.d      fcc0, fs0, fs1
1792    li          rTEMP, 0
1793    bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1794#endif
1795
1796    li            rTEMP, 1
1797
1798TEMPLATE_CMPG_DOUBLE_VFP_finish:
1799    move     v0, rTEMP                  # v0<- vAA
1800    RETURN
1801
1802
1803/* ------------------------------ */
1804    .balign 4
1805    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
1806dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
1807/* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
1808    /*
1809     * Compare two double precision floating-point values.  Puts 0, 1, or -1 into the
1810     * destination register based on the results of the comparison.
1811     *
1812     * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
1813     * on what value we'd like to return when one of the operands is NaN.
1814     *
1815     * The operation we're implementing is:
1816     *   if (x == y)
1817     *     return 0;
1818     *   else if (x < y)
1819     *     return -1;
1820     *   else if (x > y)
1821     *     return 1;
1822     *   else
1823     *     return {-1,1};  // one or both operands was NaN
1824     *
1825     * On entry:
1826     *    a0 = &op1 [vBB]
1827     *    a1 = &op2 [vCC]
1828     *
1829     * for: cmpl-double, cmpg-double
1830     */
1831    /* op vAA, vBB, vCC */
1832
1833    /* "clasic" form */
1834#ifdef  SOFT_FLOAT
1835    move rOBJ, a0                       # save a0
1836    move rBIX, a1                       # save a1
1837    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1838    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1839    JAL(__eqdf2)                        # v0<- (vBB == vCC)
1840    li       rTEMP, 0                   # vAA<- 0
1841    beqz     v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1842    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1843    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1844    JAL(__ltdf2)                        # a0<- (vBB < vCC)
1845    li       rTEMP, -1                  # vAA<- -1
1846    bltz     v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1847    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1848    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1849    JAL(__gtdf2)                        # v0<- (vBB > vCC)
1850    li      rTEMP, 1                    # vAA<- 1
1851    bgtz    v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1852#else
1853    LOAD64_F(fs0, fs0f, a0)             # fs0<- vBB
1854    LOAD64_F(fs1, fs1f, a1)             # fs1<- vCC
1855    c.olt.d     fcc0, fs0, fs1          # Is fs0 < fs1
1856    li          rTEMP, -1
1857    bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1858    c.olt.d     fcc0, fs1, fs0
1859    li          rTEMP, 1
1860    bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1861    c.eq.d      fcc0, fs0, fs1
1862    li          rTEMP, 0
1863    bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1864#endif
1865
1866    li     rTEMP, -1
1867
1868TEMPLATE_CMPL_DOUBLE_VFP_finish:
1869    move     v0, rTEMP                  # v0<- vAA
1870    RETURN
1871
1872/* ------------------------------ */
1873    .balign 4
1874    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
1875dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
1876/* File: mips/TEMPLATE_CMPG_FLOAT_VFP.S */
1877/* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
1878    /*
1879     * Compare two floating-point values.  Puts 0, 1, or -1 into the
1880     * destination register based on the results of the comparison.
1881     *
1882     * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
1883     * on what value we'd like to return when one of the operands is NaN.
1884     *
1885     * The operation we're implementing is:
1886     *   if (x == y)
1887     *     return 0;
1888     *   else if (x < y)
1889     *     return -1;
1890     *   else if (x > y)
1891     *     return 1;
1892     *   else
1893     *     return {-1,1};  // one or both operands was NaN
1894     *
1895     * On entry:
1896     *    a0 = &op1 [vBB]
1897     *    a1 = &op2 [vCC]
1898     *
1899     * for: cmpl-float, cmpg-float
1900     */
1901    /* op vAA, vBB, vCC */
1902
1903    /* "clasic" form */
1904#ifdef  SOFT_FLOAT
1905    LOAD(rOBJ, a0)                      # rOBJ<- vBB
1906    LOAD(rBIX, a1)                      # rBIX<- vCC
1907    move     a0, rOBJ                   # a0<- vBB
1908    move     a1, rBIX                   # a1<- vCC
1909    JAL(__eqsf2)                        # v0<- (vBB == vCC)
1910    li       rTEMP, 0                   # vAA<- 0
1911    beqz     v0, TEMPLATE_CMPG_FLOAT_VFP_finish
1912    move     a0, rOBJ                   # a0<- vBB
1913    move     a1, rBIX                   # a1<- vCC
1914    JAL(__ltsf2)                        # a0<- (vBB < vCC)
1915    li       rTEMP, -1                  # vAA<- -1
1916    bltz     v0, TEMPLATE_CMPG_FLOAT_VFP_finish
1917    move     a0, rOBJ                   # a0<- vBB
1918    move     a1, rBIX                   # a1<- vCC
1919    JAL(__gtsf2)                        # v0<- (vBB > vCC)
1920    li      rTEMP, 1                    # vAA<- 1
1921    bgtz    v0, TEMPLATE_CMPG_FLOAT_VFP_finish
1922#else
1923    LOAD_F(fs0, a0)                     # fs0<- vBB
1924    LOAD_F(fs1, a1)                     # fs1<- vCC
1925    c.olt.s     fcc0, fs0, fs1          #Is fs0 < fs1
1926    li          rTEMP, -1
1927    bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
1928    c.olt.s     fcc0, fs1, fs0
1929    li          rTEMP, 1
1930    bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
1931    c.eq.s      fcc0, fs0, fs1
1932    li          rTEMP, 0
1933    bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
1934#endif
1935
1936    li     rTEMP, 1
1937
1938TEMPLATE_CMPG_FLOAT_VFP_finish:
1939    move     v0, rTEMP                  # v0<- vAA
1940    RETURN
1941
1942
1943/* ------------------------------ */
1944    .balign 4
1945    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
1946dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
1947/* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
1948    /*
1949     * Compare two floating-point values.  Puts 0, 1, or -1 into the
1950     * destination register based on the results of the comparison.
1951     *
1952     * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
1953     * on what value we'd like to return when one of the operands is NaN.
1954     *
1955     * The operation we're implementing is:
1956     *   if (x == y)
1957     *     return 0;
1958     *   else if (x < y)
1959     *     return -1;
1960     *   else if (x > y)
1961     *     return 1;
1962     *   else
1963     *     return {-1,1};  // one or both operands was NaN
1964     *
1965     * On entry:
1966     *    a0 = &op1 [vBB]
1967     *    a1 = &op2 [vCC]
1968     *
1969     * for: cmpl-float, cmpg-float
1970     */
1971    /* op vAA, vBB, vCC */
1972
1973    /* "clasic" form */
1974#ifdef  SOFT_FLOAT
1975    LOAD(rOBJ, a0)                      # rOBJ<- vBB
1976    LOAD(rBIX, a1)                      # rBIX<- vCC
1977    move     a0, rOBJ                   # a0<- vBB
1978    move     a1, rBIX                   # a1<- vCC
1979    JAL(__eqsf2)                        # v0<- (vBB == vCC)
1980    li       rTEMP, 0                   # vAA<- 0
1981    beqz     v0, TEMPLATE_CMPL_FLOAT_VFP_finish
1982    move     a0, rOBJ                   # a0<- vBB
1983    move     a1, rBIX                   # a1<- vCC
1984    JAL(__ltsf2)                        # a0<- (vBB < vCC)
1985    li       rTEMP, -1                  # vAA<- -1
1986    bltz     v0, TEMPLATE_CMPL_FLOAT_VFP_finish
1987    move     a0, rOBJ                   # a0<- vBB
1988    move     a1, rBIX                   # a1<- vCC
1989    JAL(__gtsf2)                        # v0<- (vBB > vCC)
1990    li      rTEMP, 1                    # vAA<- 1
1991    bgtz    v0, TEMPLATE_CMPL_FLOAT_VFP_finish
1992#else
1993    LOAD_F(fs0, a0)                     # fs0<- vBB
1994    LOAD_F(fs1, a1)                     # fs1<- vCC
1995    c.olt.s     fcc0, fs0, fs1          #Is fs0 < fs1
1996    li          rTEMP, -1
1997    bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
1998    c.olt.s     fcc0, fs1, fs0
1999    li          rTEMP, 1
2000    bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
2001    c.eq.s      fcc0, fs0, fs1
2002    li          rTEMP, 0
2003    bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
2004#endif
2005
2006    li     rTEMP, -1
2007
2008TEMPLATE_CMPL_FLOAT_VFP_finish:
2009    move     v0, rTEMP                  # v0<- vAA
2010    RETURN
2011
2012/* ------------------------------ */
2013    .balign 4
2014    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
2015dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
2016/* File: mips/TEMPLATE_SQRT_DOUBLE_VFP.S */
2017
2018    /*
2019     * 64-bit floating point sqrt operation.
2020     * If the result is a NaN, bail out to library code to do
2021     * the right thing.
2022     *
2023     * On entry:
2024     *     a2 src addr of op1
2025     * On exit:
2026     *     v0,v1/fv0 = res
2027     */
2028#ifdef  SOFT_FLOAT
2029    LOAD64(rARG0, rARG1, a2)        # a0/a1<- vBB/vBB+1
2030#else
2031    LOAD64_F(fa0, fa0f, a2)         # fa0/fa0f<- vBB/vBB+1
2032    sqrt.d	fv0, fa0
2033    c.eq.d	fv0, fv0
2034    bc1t	1f
2035#endif
2036    JAL(sqrt)
20371:
2038    RETURN
2039
2040/* ------------------------------ */
2041    .balign 4
2042    .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
2043dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
2044/* File: mips/TEMPLATE_THROW_EXCEPTION_COMMON.S */
2045    /*
2046     * Throw an exception from JIT'ed code.
2047     * On entry:
2048     *    a0    Dalvik PC that raises the exception
2049     */
2050    j      .LhandleException
2051
2052/* ------------------------------ */
2053    .balign 4
2054    .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
2055dvmCompiler_TEMPLATE_MEM_OP_DECODE:
2056/* File: mips/TEMPLATE_MEM_OP_DECODE.S */
2057#if defined(WITH_SELF_VERIFICATION)
2058    /*
2059     * This handler encapsulates heap memory ops for selfVerification mode.
2060     *
2061     * The call to the handler is inserted prior to a heap memory operation.
2062     * This handler then calls a function to decode the memory op, and process
2063     * it accordingly. Afterwards, the handler changes the return address to
2064     * skip the memory op so it never gets executed.
2065     */
2066#ifdef HARD_FLOAT
2067    /* push f0-f31 onto stack */
2068    sw      f0, fr0*-4(sp)              # push f0
2069    sw      f1, fr1*-4(sp)              # push f1
2070    sw      f2, fr2*-4(sp)              # push f2
2071    sw      f3, fr3*-4(sp)              # push f3
2072    sw      f4, fr4*-4(sp)              # push f4
2073    sw      f5, fr5*-4(sp)              # push f5
2074    sw      f6, fr6*-4(sp)              # push f6
2075    sw      f7, fr7*-4(sp)              # push f7
2076    sw      f8, fr8*-4(sp)              # push f8
2077    sw      f9, fr9*-4(sp)              # push f9
2078    sw      f10, fr10*-4(sp)            # push f10
2079    sw      f11, fr11*-4(sp)            # push f11
2080    sw      f12, fr12*-4(sp)            # push f12
2081    sw      f13, fr13*-4(sp)            # push f13
2082    sw      f14, fr14*-4(sp)            # push f14
2083    sw      f15, fr15*-4(sp)            # push f15
2084    sw      f16, fr16*-4(sp)            # push f16
2085    sw      f17, fr17*-4(sp)            # push f17
2086    sw      f18, fr18*-4(sp)            # push f18
2087    sw      f19, fr19*-4(sp)            # push f19
2088    sw      f20, fr20*-4(sp)            # push f20
2089    sw      f21, fr21*-4(sp)            # push f21
2090    sw      f22, fr22*-4(sp)            # push f22
2091    sw      f23, fr23*-4(sp)            # push f23
2092    sw      f24, fr24*-4(sp)            # push f24
2093    sw      f25, fr25*-4(sp)            # push f25
2094    sw      f26, fr26*-4(sp)            # push f26
2095    sw      f27, fr27*-4(sp)            # push f27
2096    sw      f28, fr28*-4(sp)            # push f28
2097    sw      f29, fr29*-4(sp)            # push f29
2098    sw      f30, fr30*-4(sp)            # push f30
2099    sw      f31, fr31*-4(sp)            # push f31
2100
2101    sub     sp, (32-0)*4                # adjust stack pointer
2102#endif
2103
2104    /* push gp registers (except zero, gp, sp, and fp) */
2105    .set noat
2106    sw      AT, r_AT*-4(sp)             # push at
2107    .set at
2108    sw      v0, r_V0*-4(sp)             # push v0
2109    sw      v1, r_V1*-4(sp)             # push v1
2110    sw      a0, r_A0*-4(sp)             # push a0
2111    sw      a1, r_A1*-4(sp)             # push a1
2112    sw      a2, r_A2*-4(sp)             # push a2
2113    sw      a3, r_A3*-4(sp)             # push a3
2114    sw      t0, r_T0*-4(sp)             # push t0
2115    sw      t1, r_T1*-4(sp)             # push t1
2116    sw      t2, r_T2*-4(sp)             # push t2
2117    sw      t3, r_T3*-4(sp)             # push t3
2118    sw      t4, r_T4*-4(sp)             # push t4
2119    sw      t5, r_T5*-4(sp)             # push t5
2120    sw      t6, r_T6*-4(sp)             # push t6
2121    sw      t7, r_T7*-4(sp)             # push t7
2122    sw      s0, r_S0*-4(sp)             # push s0
2123    sw      s1, r_S1*-4(sp)             # push s1
2124    sw      s2, r_S2*-4(sp)             # push s2
2125    sw      s3, r_S3*-4(sp)             # push s3
2126    sw      s4, r_S4*-4(sp)             # push s4
2127    sw      s5, r_S5*-4(sp)             # push s5
2128    sw      s6, r_S6*-4(sp)             # push s6
2129    sw      s7, r_S7*-4(sp)             # push s7
2130    sw      t8, r_T8*-4(sp)             # push t8
2131    sw      t9, r_T9*-4(sp)             # push t9
2132    sw      k0, r_K0*-4(sp)             # push k0
2133    sw      k1, r_K1*-4(sp)             # push k1
2134    sw      ra, r_RA*-4(sp)             # push RA
2135
2136    # Note: even if we don't save all 32 registers, we still need to
2137    #       adjust SP by 32 registers due to the way we are storing
2138    #       the registers on the stack.
2139    sub     sp, (32-0)*4                # adjust stack pointer
2140
2141    la     a2, .LdvmSelfVerificationMemOpDecode  # defined in footer.S
2142    lw     a2, (a2)
2143    move   a0, ra                       # a0<- link register
2144    move   a1, sp                       # a1<- stack pointer
2145    JALR(a2)
2146
2147    /* pop gp registers (except zero, gp, sp, and fp) */
2148    # Note: even if we don't save all 32 registers, we still need to
2149    #       adjust SP by 32 registers due to the way we are storing
2150    #       the registers on the stack.
2151    add     sp, (32-0)*4                # adjust stack pointer
2152    .set noat
2153    lw      AT, r_AT*-4(sp)             # pop at
2154    .set at
2155    lw      v0, r_V0*-4(sp)             # pop v0
2156    lw      v1, r_V1*-4(sp)             # pop v1
2157    lw      a0, r_A0*-4(sp)             # pop a0
2158    lw      a1, r_A1*-4(sp)             # pop a1
2159    lw      a2, r_A2*-4(sp)             # pop a2
2160    lw      a3, r_A3*-4(sp)             # pop a3
2161    lw      t0, r_T0*-4(sp)             # pop t0
2162    lw      t1, r_T1*-4(sp)             # pop t1
2163    lw      t2, r_T2*-4(sp)             # pop t2
2164    lw      t3, r_T3*-4(sp)             # pop t3
2165    lw      t4, r_T4*-4(sp)             # pop t4
2166    lw      t5, r_T5*-4(sp)             # pop t5
2167    lw      t6, r_T6*-4(sp)             # pop t6
2168    lw      t7, r_T7*-4(sp)             # pop t7
2169    lw      s0, r_S0*-4(sp)             # pop s0
2170    lw      s1, r_S1*-4(sp)             # pop s1
2171    lw      s2, r_S2*-4(sp)             # pop s2
2172    lw      s3, r_S3*-4(sp)             # pop s3
2173    lw      s4, r_S4*-4(sp)             # pop s4
2174    lw      s5, r_S5*-4(sp)             # pop s5
2175    lw      s6, r_S6*-4(sp)             # pop s6
2176    lw      s7, r_S7*-4(sp)             # pop s7
2177    lw      t8, r_T8*-4(sp)             # pop t8
2178    lw      t9, r_T9*-4(sp)             # pop t9
2179    lw      k0, r_K0*-4(sp)             # pop k0
2180    lw      k1, r_K1*-4(sp)             # pop k1
2181    lw      ra, r_RA*-4(sp)             # pop RA
2182
2183#ifdef HARD_FLOAT
2184    /* pop f0-f31 from stack */
2185    add     sp, (32-0)*4                # adjust stack pointer
2186    lw      f0, fr0*-4(sp)              # pop f0
2187    lw      f1, fr1*-4(sp)              # pop f1
2188    lw      f2, fr2*-4(sp)              # pop f2
2189    lw      f3, fr3*-4(sp)              # pop f3
2190    lw      f4, fr4*-4(sp)              # pop f4
2191    lw      f5, fr5*-4(sp)              # pop f5
2192    lw      f6, fr6*-4(sp)              # pop f6
2193    lw      f7, fr7*-4(sp)              # pop f7
2194    lw      f8, fr8*-4(sp)              # pop f8
2195    lw      f9, fr9*-4(sp)              # pop f9
2196    lw      f10, fr10*-4(sp)            # pop f10
2197    lw      f11, fr11*-4(sp)            # pop f11
2198    lw      f12, fr12*-4(sp)            # pop f12
2199    lw      f13, fr13*-4(sp)            # pop f13
2200    lw      f14, fr14*-4(sp)            # pop f14
2201    lw      f15, fr15*-4(sp)            # pop f15
2202    lw      f16, fr16*-4(sp)            # pop f16
2203    lw      f17, fr17*-4(sp)            # pop f17
2204    lw      f18, fr18*-4(sp)            # pop f18
2205    lw      f19, fr19*-4(sp)            # pop f19
2206    lw      f20, fr20*-4(sp)            # pop f20
2207    lw      f21, fr21*-4(sp)            # pop f21
2208    lw      f22, fr22*-4(sp)            # pop f22
2209    lw      f23, fr23*-4(sp)            # pop f23
2210    lw      f24, fr24*-4(sp)            # pop f24
2211    lw      f25, fr25*-4(sp)            # pop f25
2212    lw      f26, fr26*-4(sp)            # pop f26
2213    lw      f27, fr27*-4(sp)            # pop f27
2214    lw      f28, fr28*-4(sp)            # pop f28
2215    lw      f29, fr29*-4(sp)            # pop f29
2216    lw      f30, fr30*-4(sp)            # pop f30
2217    lw      f31, fr31*-4(sp)            # pop f31
2218#endif
2219
2220    RETURN
2221#endif
2222
2223/* ------------------------------ */
2224    .balign 4
2225    .global dvmCompiler_TEMPLATE_STRING_COMPARETO
2226dvmCompiler_TEMPLATE_STRING_COMPARETO:
2227/* File: mips/TEMPLATE_STRING_COMPARETO.S */
2228    /*
2229     * String's compareTo.
2230     *
2231     * Requires a0/a1 to have been previously checked for null.  Will
2232     * return negative if this's string is < comp, 0 if they are the
2233     * same and positive if >.
2234     *
2235     * IMPORTANT NOTE:
2236     *
2237     * This code relies on hard-coded offsets for string objects, and must be
2238     * kept in sync with definitions in UtfString.h.  See asm-constants.h
2239     *
2240     * On entry:
2241     *    a0:   this object pointer
2242     *    a1:   comp object pointer
2243     *
2244     */
2245
2246     subu  v0, a0, a1                # Same?
2247     bnez  v0, 1f
2248     RETURN
22491:
2250     lw    t0, STRING_FIELDOFF_OFFSET(a0)
2251     lw    t1, STRING_FIELDOFF_OFFSET(a1)
2252     lw    t2, STRING_FIELDOFF_COUNT(a0)
2253     lw    a2, STRING_FIELDOFF_COUNT(a1)
2254     lw    a0, STRING_FIELDOFF_VALUE(a0)
2255     lw    a1, STRING_FIELDOFF_VALUE(a1)
2256
2257    /*
2258     * At this point, we have this/comp:
2259     *    offset: t0/t1
2260     *    count:  t2/a2
2261     *    value:  a0/a1
2262     * We're going to compute
2263     *    a3 <- countDiff
2264     *    a2 <- minCount
2265     */
2266     subu  a3, t2, a2                # a3<- countDiff
2267     sleu  t7, t2, a2
2268     movn  a2, t2, t7                # a2<- minCount
2269
2270     /*
2271      * Note: data pointers point to first element.
2272      */
2273     addu  a0, 16                    # point to contents[0]
2274     addu  a1, 16                    # point to contents[0]
2275
2276     /* Now, build pointers to the string data */
2277     sll   t7, t0, 1                 # multiply offset by 2
2278     addu  a0, a0, t7
2279     sll   t7, t1, 1                 # multiply offset by 2
2280     addu  a1, a1, t7
2281
2282     /*
2283      * At this point we have:
2284      *   a0: *this string data
2285      *   a1: *comp string data
2286      *   a2: iteration count for comparison
2287      *   a3: value to return if the first part of the string is equal
2288      *   v0: reserved for result
2289      *   t0-t5 available for loading string data
2290      */
2291
2292     subu  a2, 2
2293     bltz  a2, do_remainder2
2294
2295     /*
2296      * Unroll the first two checks so we can quickly catch early mismatch
2297      * on long strings (but preserve incoming alignment)
2298      */
2299     lhu   t0, 0(a0)
2300     lhu   t1, 0(a1)
2301     subu  v0, t0, t1
2302     beqz  v0, 1f
2303     RETURN
23041:
2305     lhu   t2, 2(a0)
2306     lhu   t3, 2(a1)
2307     subu  v0, t2, t3
2308     beqz  v0, 2f
2309     RETURN
23102:
2311     addu  a0, 4                     # offset to contents[2]
2312     addu  a1, 4                     # offset to contents[2]
2313     li    t7, 28
2314     bgt   a2, t7, do_memcmp16
2315     subu  a2, 3
2316     bltz  a2, do_remainder
2317
2318loopback_triple:
2319     lhu   t0, 0(a0)
2320     lhu   t1, 0(a1)
2321     subu  v0, t0, t1
2322     beqz  v0, 1f
2323     RETURN
23241:
2325     lhu   t2, 2(a0)
2326     lhu   t3, 2(a1)
2327     subu  v0, t2, t3
2328     beqz  v0, 2f
2329     RETURN
23302:
2331     lhu   t4, 4(a0)
2332     lhu   t5, 4(a1)
2333     subu  v0, t4, t5
2334     beqz  v0, 3f
2335     RETURN
23363:
2337     addu  a0, 6                     # offset to contents[i+3]
2338     addu  a1, 6                     # offset to contents[i+3]
2339     subu  a2, 3
2340     bgez  a2, loopback_triple
2341
2342do_remainder:
2343     addu  a2, 3
2344     beqz  a2, returnDiff
2345
2346loopback_single:
2347     lhu   t0, 0(a0)
2348     lhu   t1, 0(a1)
2349     subu  v0, t0, t1
2350     bnez  v0, 1f
2351     addu  a0, 2                     # offset to contents[i+1]
2352     addu  a1, 2                     # offset to contents[i+1]
2353     subu  a2, 1
2354     bnez  a2, loopback_single
2355
2356returnDiff:
2357     move  v0, a3
23581:
2359     RETURN
2360
2361do_remainder2:
2362     addu  a2, 2
2363     bnez  a2, loopback_single
2364     move  v0, a3
2365     RETURN
2366
2367    /* Long string case */
2368do_memcmp16:
2369     move  rOBJ, a3                  # save return value if strings are equal
2370     JAL(__memcmp16)
2371     seq   t0, v0, zero
2372     movn  v0, rOBJ, t0              # overwrite return value if strings are equal
2373     RETURN
2374
2375/* ------------------------------ */
2376    .balign 4
2377    .global dvmCompiler_TEMPLATE_STRING_INDEXOF
2378dvmCompiler_TEMPLATE_STRING_INDEXOF:
2379/* File: mips/TEMPLATE_STRING_INDEXOF.S */
2380    /*
2381     * String's indexOf.
2382     *
2383     * Requires a0 to have been previously checked for null.  Will
2384     * return index of match of a1 in v0.
2385     *
2386     * IMPORTANT NOTE:
2387     *
2388     * This code relies on hard-coded offsets for string objects, and must be
2389     * kept in sync wth definitions in UtfString.h  See asm-constants.h
2390     *
2391     * On entry:
2392     *    a0:   string object pointer
2393     *    a1:   char to match
2394     *    a2:   Starting offset in string data
2395     */
2396
2397     lw    t0, STRING_FIELDOFF_OFFSET(a0)
2398     lw    t1, STRING_FIELDOFF_COUNT(a0)
2399     lw    v0, STRING_FIELDOFF_VALUE(a0)
2400
2401    /*
2402     * At this point, we have:
2403     *    v0: object pointer
2404     *    a1: char to match
2405     *    a2: starting offset
2406     *    t0: offset
2407     *    t1: string length
2408     */
2409
2410    /* Point to first element */
2411     addu  v0, 16                    # point to contents[0]
2412
2413    /* Build pointer to start of string data */
2414     sll   t7, t0, 1                 # multiply offset by 2
2415     addu  v0, v0, t7
2416
2417    /* Save a copy of starting data in v1 */
2418     move  v1, v0
2419
2420    /* Clamp start to [0..count] */
2421     slt   t7, a2, zero
2422     movn  a2, zero, t7
2423     sgt   t7, a2, t1
2424     movn  a2, t1, t7
2425
2426    /* Build pointer to start of data to compare */
2427     sll   t7, a2, 1                # multiply offset by 2
2428     addu  v0, v0, t7
2429
2430    /* Compute iteration count */
2431     subu  a3, t1, a2
2432
2433    /*
2434     * At this point we have:
2435     *   v0: start of data to test
2436     *   a1: char to compare
2437     *   a3: iteration count
2438     *   v1: original start of string
2439     *   t0-t7 available for loading string data
2440     */
2441     subu  a3, 4
2442     bltz  a3, indexof_remainder
2443
2444indexof_loop4:
2445     lhu   t0, 0(v0)
2446     beq   t0, a1, match_0
2447     lhu   t0, 2(v0)
2448     beq   t0, a1, match_1
2449     lhu   t0, 4(v0)
2450     beq   t0, a1, match_2
2451     lhu   t0, 6(v0)
2452     beq   t0, a1, match_3
2453     addu  v0, 8                     # offset to contents[i+4]
2454     subu  a3, 4
2455     bgez  a3, indexof_loop4
2456
2457indexof_remainder:
2458     addu  a3, 4
2459     beqz  a3, indexof_nomatch
2460
2461indexof_loop1:
2462     lhu   t0, 0(v0)
2463     beq   t0, a1, match_0
2464     addu  v0, 2                     # offset to contents[i+1]
2465     subu  a3, 1
2466     bnez  a3, indexof_loop1
2467
2468indexof_nomatch:
2469     li    v0, -1
2470     RETURN
2471
2472match_0:
2473     subu  v0, v1
2474     sra   v0, v0, 1                 # divide by 2
2475     RETURN
2476match_1:
2477     addu  v0, 2
2478     subu  v0, v1
2479     sra   v0, v0, 1                 # divide by 2
2480     RETURN
2481match_2:
2482     addu  v0, 4
2483     subu  v0, v1
2484     sra   v0, v0, 1                 # divide by 2
2485     RETURN
2486match_3:
2487     addu  v0, 6
2488     subu  v0, v1
2489     sra   v0, v0, 1                 # divide by 2
2490     RETURN
2491
2492/* ------------------------------ */
2493    .balign 4
2494    .global dvmCompiler_TEMPLATE_INTERPRET
2495dvmCompiler_TEMPLATE_INTERPRET:
2496/* File: mips/TEMPLATE_INTERPRET.S */
2497    /*
2498     * This handler transfers control to the interpeter without performing
2499     * any lookups.  It may be called either as part of a normal chaining
2500     * operation, or from the transition code in header.S.  We distinquish
2501     * the two cases by looking at the link register.  If called from a
2502     * translation chain, it will point to the chaining Dalvik PC.
2503     * On entry:
2504     *    ra - if NULL:
2505     *        a1 - the Dalvik PC to begin interpretation.
2506     *    else
2507     *        [ra] contains Dalvik PC to begin interpretation
2508     *    rSELF - pointer to thread
2509     *    rFP - Dalvik frame pointer
2510     */
2511    la      t0, dvmJitToInterpPunt
2512    move    a0, a1
2513    beq     ra, zero, 1f
2514    lw      a0, 0(ra)
25151:
2516    jr      t0
2517    # doesn't return
2518
2519/* ------------------------------ */
2520    .balign 4
2521    .global dvmCompiler_TEMPLATE_MONITOR_ENTER
2522dvmCompiler_TEMPLATE_MONITOR_ENTER:
2523/* File: mips/TEMPLATE_MONITOR_ENTER.S */
2524    /*
2525     * Call out to the runtime to lock an object.  Because this thread
2526     * may have been suspended in THREAD_MONITOR state and the Jit's
2527     * translation cache subsequently cleared, we cannot return directly.
2528     * Instead, unconditionally transition to the interpreter to resume.
2529     *
2530     * On entry:
2531     *    a0 - self pointer
2532     *    a1 - the object (which has already been null-checked by the caller
2533     *    rPC - the Dalvik PC of the following instruction.
2534     */
2535    la     a2, .LdvmLockObject
2536    lw     t9, (a2)
2537    sw     zero, offThread_inJitCodeCache(a0)   # record that we're not returning
2538    JALR(t9)                                    # dvmLockObject(self, obj)
2539    lw     gp, STACK_OFFSET_GP(sp)
2540
2541    la     a2, .LdvmJitToInterpNoChain
2542    lw     a2, (a2)
2543
2544    # Bail to interpreter - no chain [note - rPC still contains dPC]
2545#if defined(WITH_JIT_TUNING)
2546    li      a0, kHeavyweightMonitor
2547#endif
2548    jr      a2
2549
2550/* ------------------------------ */
2551    .balign 4
2552    .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
2553dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
2554/* File: mips/TEMPLATE_MONITOR_ENTER_DEBUG.S */
2555    /*
2556     * To support deadlock prediction, this version of MONITOR_ENTER
2557     * will always call the heavyweight dvmLockObject, check for an
2558     * exception and then bail out to the interpreter.
2559     *
2560     * On entry:
2561     *    a0 - self pointer
2562     *    a1 - the object (which has already been null-checked by the caller
2563     *    rPC - the Dalvik PC of the following instruction.
2564     *
2565     */
2566    la     a2, .LdvmLockObject
2567    lw     t9, (a2)
2568    sw     zero, offThread_inJitCodeCache(a0)   # record that we're not returning
2569    JALR(t9)                                    # dvmLockObject(self, obj)
2570    lw     gp, STACK_OFFSET_GP(sp)
2571
2572    # test for exception
2573    lw     a1, offThread_exception(rSELF)
2574    beqz   a1, 1f
2575    sub    a0, rPC, 2                           # roll dPC back to this monitor instruction
2576    j      .LhandleException
25771:
2578    # Bail to interpreter - no chain [note - rPC still contains dPC]
2579#if defined(WITH_JIT_TUNING)
2580    li     a0, kHeavyweightMonitor
2581#endif
2582    la     a2, .LdvmJitToInterpNoChain
2583    lw     a2, (a2)
2584    jr     a2
2585
2586/* ------------------------------ */
2587    .balign 4
2588    .global dvmCompiler_TEMPLATE_RESTORE_STATE
2589dvmCompiler_TEMPLATE_RESTORE_STATE:
2590/* File: mips/TEMPLATE_RESTORE_STATE.S */
2591    /*
2592     * This handler restores state following a selfVerification memory access.
2593     * On entry:
2594     *    a0 - offset from rSELF to the 1st element of the coreRegs save array.
2595     * Note: the following registers are not restored
2596     *       zero, AT, gp, sp, fp, ra
2597     */
2598
2599    add     a0, a0, rSELF               # pointer to heapArgSpace.coreRegs[0]
2600#if 0
2601    lw      zero, r_ZERO*4(a0)          # restore zero
2602#endif
2603    .set noat
2604    lw      AT, r_AT*4(a0)              # restore at
2605    .set at
2606    lw      v0, r_V0*4(a0)              # restore v0
2607    lw      v1, r_V1*4(a0)              # restore v1
2608
2609    lw      a1, r_A1*4(a0)              # restore a1
2610    lw      a2, r_A2*4(a0)              # restore a2
2611    lw      a3, r_A3*4(a0)              # restore a3
2612
2613    lw      t0, r_T0*4(a0)              # restore t0
2614    lw      t1, r_T1*4(a0)              # restore t1
2615    lw      t2, r_T2*4(a0)              # restore t2
2616    lw      t3, r_T3*4(a0)              # restore t3
2617    lw      t4, r_T4*4(a0)              # restore t4
2618    lw      t5, r_T5*4(a0)              # restore t5
2619    lw      t6, r_T6*4(a0)              # restore t6
2620    lw      t7, r_T7*4(a0)              # restore t7
2621
2622    lw      s0, r_S0*4(a0)              # restore s0
2623    lw      s1, r_S1*4(a0)              # restore s1
2624    lw      s2, r_S2*4(a0)              # restore s2
2625    lw      s3, r_S3*4(a0)              # restore s3
2626    lw      s4, r_S4*4(a0)              # restore s4
2627    lw      s5, r_S5*4(a0)              # restore s5
2628    lw      s6, r_S6*4(a0)              # restore s6
2629    lw      s7, r_S7*4(a0)              # restore s7
2630
2631    lw      t8, r_T8*4(a0)              # restore t8
2632    lw      t9, r_T9*4(a0)              # restore t9
2633
2634    lw      k0, r_K0*4(a0)              # restore k0
2635    lw      k1, r_K1*4(a0)              # restore k1
2636
2637#if 0
2638    lw      gp, r_GP*4(a0)              # restore gp
2639    lw      sp, r_SP*4(a0)              # restore sp
2640    lw      fp, r_FP*4(a0)              # restore fp
2641    lw      ra, r_RA*4(a0)              # restore ra
2642#endif
2643
2644/* #ifdef HARD_FLOAT */
2645#if 0
2646    lw      f0, fr0*4(a0)               # restore f0
2647    lw      f1, fr1*4(a0)               # restore f1
2648    lw      f2, fr2*4(a0)               # restore f2
2649    lw      f3, fr3*4(a0)               # restore f3
2650    lw      f4, fr4*4(a0)               # restore f4
2651    lw      f5, fr5*4(a0)               # restore f5
2652    lw      f6, fr6*4(a0)               # restore f6
2653    lw      f7, fr7*4(a0)               # restore f7
2654    lw      f8, fr8*4(a0)               # restore f8
2655    lw      f9, fr9*4(a0)               # restore f9
2656    lw      f10, fr10*4(a0)             # restore f10
2657    lw      f11, fr11*4(a0)             # restore f11
2658    lw      f12, fr12*4(a0)             # restore f12
2659    lw      f13, fr13*4(a0)             # restore f13
2660    lw      f14, fr14*4(a0)             # restore f14
2661    lw      f15, fr15*4(a0)             # restore f15
2662    lw      f16, fr16*4(a0)             # restore f16
2663    lw      f17, fr17*4(a0)             # restore f17
2664    lw      f18, fr18*4(a0)             # restore f18
2665    lw      f19, fr19*4(a0)             # restore f19
2666    lw      f20, fr20*4(a0)             # restore f20
2667    lw      f21, fr21*4(a0)             # restore f21
2668    lw      f22, fr22*4(a0)             # restore f22
2669    lw      f23, fr23*4(a0)             # restore f23
2670    lw      f24, fr24*4(a0)             # restore f24
2671    lw      f25, fr25*4(a0)             # restore f25
2672    lw      f26, fr26*4(a0)             # restore f26
2673    lw      f27, fr27*4(a0)             # restore f27
2674    lw      f28, fr28*4(a0)             # restore f28
2675    lw      f29, fr29*4(a0)             # restore f29
2676    lw      f30, fr30*4(a0)             # restore f30
2677    lw      f31, fr31*4(a0)             # restore f31
2678#endif
2679
2680    lw      a0, r_A1*4(a0)              # restore a0
2681    RETURN
2682
2683/* ------------------------------ */
2684    .balign 4
2685    .global dvmCompiler_TEMPLATE_SAVE_STATE
2686dvmCompiler_TEMPLATE_SAVE_STATE:
2687/* File: mips/TEMPLATE_SAVE_STATE.S */
2688    /*
2689     * This handler performs a register save for selfVerification mode.
2690     * On entry:
2691     *    Top of stack + 4: a1 value to save
2692     *    Top of stack + 0: a0 value to save
2693     *    a0 - offset from rSELF to the beginning of the heapArgSpace record
2694     *    a1 - the value of regMap
2695     *
2696     * The handler must save regMap, r0-r31, f0-f31 if FPU, and then return with
2697     * r0-r31 with their original values (note that this means a0 and a1 must take
2698     * the values on the stack - not the ones in those registers on entry.
2699     * Finally, the two registers previously pushed must be popped.
2700     * Note: the following registers are not saved
2701     *       zero, AT, gp, sp, fp, ra
2702     */
2703    add     a0, a0, rSELF               # pointer to heapArgSpace
2704    sw      a1, 0(a0)                   # save regMap
2705    add     a0, a0, 4                   # pointer to coreRegs
2706#if 0
2707    sw      zero, r_ZERO*4(a0)          # save zero
2708#endif
2709    .set noat
2710    sw      AT, r_AT*4(a0)              # save at
2711    .set at
2712    sw      v0, r_V0*4(a0)              # save v0
2713    sw      v1, r_V1*4(a0)              # save v1
2714
2715    lw      a1, 0(sp)                   # recover a0 value
2716    sw      a1, r_A0*4(a0)              # save a0
2717    lw      a1, 4(sp)                   # recover a1 value
2718    sw      a1, r_A1*4(a0)              # save a1
2719    sw      a2, r_A2*4(a0)              # save a2
2720    sw      a3, r_A3*4(a0)              # save a3
2721
2722    sw      t0, r_T0*4(a0)              # save t0
2723    sw      t1, r_T1*4(a0)              # save t1
2724    sw      t2, r_T2*4(a0)              # save t2
2725    sw      t3, r_T3*4(a0)              # save t3
2726    sw      t4, r_T4*4(a0)              # save t4
2727    sw      t5, r_T5*4(a0)              # save t5
2728    sw      t6, r_T6*4(a0)              # save t6
2729    sw      t7, r_T7*4(a0)              # save t7
2730
2731    sw      s0, r_S0*4(a0)              # save s0
2732    sw      s1, r_S1*4(a0)              # save s1
2733    sw      s2, r_S2*4(a0)              # save s2
2734    sw      s3, r_S3*4(a0)              # save s3
2735    sw      s4, r_S4*4(a0)              # save s4
2736    sw      s5, r_S5*4(a0)              # save s5
2737    sw      s6, r_S6*4(a0)              # save s6
2738    sw      s7, r_S7*4(a0)              # save s7
2739
2740    sw      t8, r_T8*4(a0)              # save t8
2741    sw      t9, r_T9*4(a0)              # save t9
2742
2743    sw      k0, r_K0*4(a0)              # save k0
2744    sw      k1, r_K1*4(a0)              # save k1
2745
2746#if 0
2747    sw      gp, r_GP*4(a0)              # save gp
2748    sw      sp, r_SP*4(a0)              # save sp (need to adjust??? )
2749    sw      fp, r_FP*4(a0)              # save fp
2750    sw      ra, r_RA*4(a0)              # save ra
2751#endif
2752
2753/* #ifdef HARD_FLOAT */
2754#if 0
2755    sw      f0, fr0*4(a0)               # save f0
2756    sw      f1, fr1*4(a0)               # save f1
2757    sw      f2, fr2*4(a0)               # save f2
2758    sw      f3, fr3*4(a0)               # save f3
2759    sw      f4, fr4*4(a0)               # save f4
2760    sw      f5, fr5*4(a0)               # save f5
2761    sw      f6, fr6*4(a0)               # save f6
2762    sw      f7, fr7*4(a0)               # save f7
2763    sw      f8, fr8*4(a0)               # save f8
2764    sw      f9, fr9*4(a0)               # save f9
2765    sw      f10, fr10*4(a0)             # save f10
2766    sw      f11, fr11*4(a0)             # save f11
2767    sw      f12, fr12*4(a0)             # save f12
2768    sw      f13, fr13*4(a0)             # save f13
2769    sw      f14, fr14*4(a0)             # save f14
2770    sw      f15, fr15*4(a0)             # save f15
2771    sw      f16, fr16*4(a0)             # save f16
2772    sw      f17, fr17*4(a0)             # save f17
2773    sw      f18, fr18*4(a0)             # save f18
2774    sw      f19, fr19*4(a0)             # save f19
2775    sw      f20, fr20*4(a0)             # save f20
2776    sw      f21, fr21*4(a0)             # save f21
2777    sw      f22, fr22*4(a0)             # save f22
2778    sw      f23, fr23*4(a0)             # save f23
2779    sw      f24, fr24*4(a0)             # save f24
2780    sw      f25, fr25*4(a0)             # save f25
2781    sw      f26, fr26*4(a0)             # save f26
2782    sw      f27, fr27*4(a0)             # save f27
2783    sw      f28, fr28*4(a0)             # save f28
2784    sw      f29, fr29*4(a0)             # save f29
2785    sw      f30, fr30*4(a0)             # save f30
2786    sw      f31, fr31*4(a0)             # save f31
2787#endif
2788
2789    lw      a1, 0(sp)                   # recover a0 value
2790    lw      a1, 4(sp)                   # recover a1 value
2791    sub     sp, sp, 8                   # adjust stack ptr
2792    RETURN
2793
2794/* ------------------------------ */
2795    .balign 4
2796    .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
2797dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
2798/* File: mips/TEMPLATE_PERIODIC_PROFILING.S */
2799    /*
2800     * Increment profile counter for this trace, and decrement
2801     * sample counter.  If sample counter goes below zero, turn
2802     * off profiling.
2803     *
2804     * On entry
2805     * (ra-16) is address of pointer to counter.  Note: the counter
2806     *    actually exists 16 bytes before the return target for mips.
2807     *     - 4 bytes for prof count addr.
2808     *     - 4 bytes for chain cell offset (2bytes 32 bit aligned).
2809     *     - 4 bytes for call TEMPLATE_PERIODIC_PROFILING.
2810     *     - 4 bytes for call delay slot.
2811     */
2812     lw     a0, -16(ra)
2813     lw     a1, offThread_pProfileCountdown(rSELF)
2814     lw     a2, 0(a0)                   # get counter
2815     lw     a3, 0(a1)                   # get countdown timer
2816     addu   a2, 1
2817     sub    a3, 1                       # FIXME - bug in ARM code???
2818     bltz   a3, .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
2819     sw     a2, 0(a0)
2820     sw     a3, 0(a1)
2821     RETURN
2822.LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
2823     la     a0, dvmJitTraceProfilingOff
2824     JALR(a0)
2825     # The ra register is preserved by the JALR macro.
2826     jr     ra
2827
2828/* ------------------------------ */
2829    .balign 4
2830    .global dvmCompiler_TEMPLATE_RETURN_PROF
2831dvmCompiler_TEMPLATE_RETURN_PROF:
2832/* File: mips/TEMPLATE_RETURN_PROF.S */
2833#define TEMPLATE_INLINE_PROFILING
2834/* File: mips/TEMPLATE_RETURN.S */
2835    /*
2836     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
2837     * If the stored value in returnAddr
2838     * is non-zero, the caller is compiled by the JIT thus return to the
2839     * address in the code cache following the invoke instruction. Otherwise
2840     * return to the special dvmJitToInterpNoChain entry point.
2841     */
2842#if defined(TEMPLATE_INLINE_PROFILING)
2843    # preserve a0-a2 and ra
2844    SCRATCH_STORE(a0, 0)
2845    SCRATCH_STORE(a1, 4)
2846    SCRATCH_STORE(a2, 8)
2847    SCRATCH_STORE(ra, 12)
2848
2849    # a0=rSELF
2850    move    a0, rSELF
2851    la      t9, dvmFastMethodTraceExit
2852    JALR(t9)
2853    lw      gp, STACK_OFFSET_GP(sp)
2854
2855    # restore a0-a2 and ra
2856    SCRATCH_LOAD(ra, 12)
2857    SCRATCH_LOAD(a2, 8)
2858    SCRATCH_LOAD(a1, 4)
2859    SCRATCH_LOAD(a0, 0)
2860#endif
2861    SAVEAREA_FROM_FP(a0, rFP)           # a0<- saveArea (old)
2862    lw      t0, offStackSaveArea_prevFrame(a0)     # t0<- saveArea->prevFrame
2863    lbu     t1, offThread_breakFlags(rSELF)        # t1<- breakFlags
2864    lw      rPC, offStackSaveArea_savedPc(a0)      # rPC<- saveArea->savedPc
2865#if !defined(WITH_SELF_VERIFICATION)
2866    lw      t2,  offStackSaveArea_returnAddr(a0)   # t2<- chaining cell ret
2867#else
2868    move    t2, zero                               # disable chaining
2869#endif
2870    lw      a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
2871                                                   # a2<- method we're returning to
2872#if !defined(WITH_SELF_VERIFICATION)
2873    beq     a2, zero, 1f                           # bail to interpreter
2874#else
2875    bne     a2, zero, 2f
2876    JALR(ra)                                       # punt to interpreter and compare state
2877    # DOUG: assume this does not return ???
28782:
2879#endif
2880    la      t4, .LdvmJitToInterpNoChainNoProfile   # defined in footer.S
2881    lw      a1, (t4)
2882    move    rFP, t0                                # publish new FP
2883    beq     a2, zero, 4f
2884    lw      t0, offMethod_clazz(a2)                # t0<- method->clazz
28854:
2886
2887    sw      a2, offThread_method(rSELF)            # self->method = newSave->method
2888    lw      a0, offClassObject_pDvmDex(t0)         # a0<- method->clazz->pDvmDex
2889    sw      rFP, offThread_curFrame(rSELF)         # self->curFrame = fp
2890    add     rPC, rPC, 3*2                          # publish new rPC
2891    sw      a0, offThread_methodClassDex(rSELF)
2892    movn    t2, zero, t1                           # check the breadFlags and
2893                                                   # clear the chaining cell address
2894    sw      t2, offThread_inJitCodeCache(rSELF)    # in code cache or not
2895    beq     t2, zero, 3f                           # chaining cell exists?
2896    JALR(t2)                                       # jump to the chaining cell
2897    # DOUG: assume this does not return ???
28983:
2899#if defined(WITH_JIT_TUNING)
2900    li      a0, kCallsiteInterpreted
2901#endif
2902    j       a1                                     # callsite is interpreted
29031:
2904    sw      zero, offThread_inJitCodeCache(rSELF)  # reset inJitCodeCache
2905    SAVE_PC_TO_SELF()                              # SAVE_PC_FP_TO_SELF()
2906    SAVE_FP_TO_SELF()
2907    la      t4, .LdvmMterpStdBail                  # defined in footer.S
2908    lw      a2, (t4)
2909    move    a0, rSELF                              # Expecting rSELF in a0
2910    JALR(a2)                                       # exit the interpreter
2911    # DOUG: assume this does not return ???
2912
2913#undef TEMPLATE_INLINE_PROFILING
2914
2915/* ------------------------------ */
2916    .balign 4
2917    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF
2918dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF:
2919/* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT_PROF.S */
2920#define TEMPLATE_INLINE_PROFILING
2921/* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
2922    /*
2923     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
2924     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
2925     * runtime-resolved callee.
2926     */
2927    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
2928    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
2929    lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
2930    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
2931    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
2932    move   a3, a1                                 # a3<- returnCell
2933    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
2934    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
2935    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
2936    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
2937    sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
2938    sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
2939    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
2940    RETURN                                        # return to raise stack overflow excep.
2941
29421:
2943    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
2944    lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
2945    lw     t0, offMethod_accessFlags(a0)          # t0<- methodToCall->accessFlags
2946    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
2947    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
2948    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
2949
2950    # set up newSaveArea
2951    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
2952    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
2953    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
2954    beqz   t8, 2f                                 # breakFlags != 0
2955    RETURN                                        # bail to the interpreter
2956
29572:
2958    and    t6, t0, ACC_NATIVE
2959    beqz   t6, 3f
2960#if !defined(WITH_SELF_VERIFICATION)
2961    j      .LinvokeNative
2962#else
2963    RETURN                                        # bail to the interpreter
2964#endif
2965
29663:
2967    # continue executing the next instruction through the interpreter
2968    la     t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
2969    lw     rTEMP, (t0)
2970    lw     a3, offClassObject_pDvmDex(t9)         # a3<- method->clazz->pDvmDex
2971
2972    # Update "thread" values for the new method
2973    sw     a0, offThread_method(rSELF)            # self->method = methodToCall
2974    sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
2975    move   rFP, a1                                # fp = newFp
2976    sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
2977#if defined(TEMPLATE_INLINE_PROFILING)
2978    # preserve rTEMP,a1-a3
2979    SCRATCH_STORE(rTEMP, 0)
2980    SCRATCH_STORE(a1, 4)
2981    SCRATCH_STORE(a2, 8)
2982    SCRATCH_STORE(a3, 12)
2983
2984    # a0=methodToCall, a1=rSELF
2985    move   a1, rSELF
2986    la     t9, dvmFastMethodTraceEnter
2987    JALR(t9)
2988    lw     gp, STACK_OFFSET_GP(sp)
2989
2990    # restore rTEMP,a1-a3
2991    SCRATCH_LOAD(a3, 12)
2992    SCRATCH_LOAD(a2, 8)
2993    SCRATCH_LOAD(a1, 4)
2994    SCRATCH_LOAD(rTEMP, 0)
2995#endif
2996
2997    # Start executing the callee
2998#if defined(WITH_JIT_TUNING)
2999    li     a0, kInlineCacheMiss
3000#endif
3001    jr     rTEMP                                  # dvmJitToInterpTraceSelectNoChain
3002
3003#undef TEMPLATE_INLINE_PROFILING
3004
3005/* ------------------------------ */
3006    .balign 4
3007    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF
3008dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF:
3009/* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN_PROF.S */
3010#define TEMPLATE_INLINE_PROFILING
3011/* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
3012    /*
3013     * For monomorphic callsite, setup the Dalvik frame and return to the
3014     * Thumb code through the link register to transfer control to the callee
3015     * method through a dedicated chaining cell.
3016     */
3017    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
3018    # methodToCall is guaranteed to be non-native
3019.LinvokeChainProf:
3020    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
3021    lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
3022    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
3023    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
3024    move   a3, a1                                 # a3<- returnCell
3025    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
3026    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
3027    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
3028    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
3029    add    t2, ra, 8                              # setup the punt-to-interp address
3030                                                  # 8 bytes skips branch and delay slot
3031    sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
3032    sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
3033    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
3034    jr     t2                                     # return to raise stack overflow excep.
3035
30361:
3037    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
3038    lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
3039    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
3040    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
3041    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
3042
3043    # set up newSaveArea
3044    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
3045    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
3046    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
3047    beqz   t8, 2f                                 # breakFlags != 0
3048    jr     t2                                     # bail to the interpreter
3049
30502:
3051    lw     a3, offClassObject_pDvmDex(t9)         # a3<- methodToCall->clazz->pDvmDex
3052
3053    # Update "thread" values for the new method
3054    sw     a0, offThread_method(rSELF)            # self->method = methodToCall
3055    sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
3056    move   rFP, a1                                # fp = newFp
3057    sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
3058#if defined(TEMPLATE_INLINE_PROFILING)
3059    # preserve a0-a2 and ra
3060    SCRATCH_STORE(a0, 0)
3061    SCRATCH_STORE(a1, 4)
3062    SCRATCH_STORE(a2, 8)
3063    SCRATCH_STORE(ra, 12)
3064
3065    move   a1, rSELF
3066    # a0=methodToCall, a1=rSELF
3067    la     t9, dvmFastMethodTraceEnter
3068    jalr   t9
3069    lw     gp, STACK_OFFSET_GP(sp)
3070
3071    # restore a0-a2 and ra
3072    SCRATCH_LOAD(ra, 12)
3073    SCRATCH_LOAD(a2, 8)
3074    SCRATCH_LOAD(a1, 4)
3075    SCRATCH_LOAD(a0, 0)
3076#endif
3077    RETURN                                        # return to the callee-chaining cell
3078
3079#undef TEMPLATE_INLINE_PROFILING
3080
3081/* ------------------------------ */
3082    .balign 4
3083    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF
3084dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF:
3085/* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF.S */
3086#define TEMPLATE_INLINE_PROFILING
3087/* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
3088    /*
3089     * For polymorphic callsite, check whether the cached class pointer matches
3090     * the current one. If so setup the Dalvik frame and return to the
3091     * Thumb code through the link register to transfer control to the callee
3092     * method through a dedicated chaining cell.
3093     *
3094     * The predicted chaining cell is declared in ArmLIR.h with the
3095     * following layout:
3096     *
3097     *  typedef struct PredictedChainingCell {
3098     *      u4 branch;
3099     *      u4 delay_slot;
3100     *      const ClassObject *clazz;
3101     *      const Method *method;
3102     *      u4 counter;
3103     *  } PredictedChainingCell;
3104     *
3105     * Upon returning to the callsite:
3106     *    - lr   : to branch to the chaining cell
3107     *    - lr+8 : to punt to the interpreter
3108     *    - lr+16: to fully resolve the callee and may rechain.
3109     *             a3 <- class
3110     */
3111    # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
3112    lw      a3, offObject_clazz(a0)     # a3 <- this->class
3113    lw      rIBASE, 8(a2)                   # t0 <- predictedChainCell->clazz
3114    lw      a0, 12(a2)                  # a0 <- predictedChainCell->method
3115    lw      t1, offThread_icRechainCount(rSELF)    # t1 <- shared rechainCount
3116
3117#if defined(WITH_JIT_TUNING)
3118    la      rINST, .LdvmICHitCount
3119    #add     t2, t2, 1
3120    bne    a3, rIBASE, 1f
3121    nop
3122    lw      t2, 0(rINST)
3123    add     t2, t2, 1
3124    sw      t2, 0(rINST)
31251:
3126    #add     t2, t2, 1
3127#endif
3128    beq     a3, rIBASE, .LinvokeChainProf       # branch if predicted chain is valid
3129    lw      rINST, offClassObject_vtable(a3)     # rINST <- this->class->vtable
3130    beqz    rIBASE, 2f                      # initialized class or not
3131    sub     a1, t1, 1                   # count--
3132    sw      a1, offThread_icRechainCount(rSELF)   # write back to InterpState
3133    b       3f
31342:
3135    move    a1, zero
31363:
3137    add     ra, ra, 16                  # return to fully-resolve landing pad
3138    /*
3139     * a1 <- count
3140     * a2 <- &predictedChainCell
3141     * a3 <- this->class
3142     * rPC <- dPC
3143     * rINST <- this->class->vtable
3144     */
3145    RETURN
3146
3147#undef TEMPLATE_INLINE_PROFILING
3148
3149/* ------------------------------ */
3150    .balign 4
3151    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF
3152dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF:
3153/* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE_PROF.S */
3154#define TEMPLATE_INLINE_PROFILING
3155/* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
3156    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
3157    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
3158    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
3159    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
3160    move   a3, a1                                 # a3<- returnCell
3161    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
3162    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
3163    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
3164    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
3165    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
3166    RETURN                                        # return to raise stack overflow excep.
3167
31681:
3169    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
3170    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
3171    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
3172    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
3173
3174    # set up newSaveArea
3175    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
3176    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
3177    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
3178    lw     rTEMP, offMethod_nativeFunc(a0)        # t9<- method->nativeFunc
3179#if !defined(WITH_SELF_VERIFICATION)
3180    beqz   t8, 2f                                 # breakFlags != 0
3181    RETURN                                        # bail to the interpreter
31822:
3183#else
3184    RETURN                                        # bail to the interpreter unconditionally
3185#endif
3186
3187    # go ahead and transfer control to the native code
3188    lw     t6, offThread_jniLocal_topCookie(rSELF)  # t6<- thread->localRef->...
3189    sw     a1, offThread_curFrame(rSELF)          # self->curFrame = newFp
3190    sw     zero, offThread_inJitCodeCache(rSELF)  # not in the jit code cache
3191    sw     t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
3192                                                  # newFp->localRefCookie=top
3193    SAVEAREA_FROM_FP(rBIX, a1)                    # rBIX<- new stack save area
3194    move   a2, a0                                 # a2<- methodToCall
3195    move   a0, a1                                 # a0<- newFp
3196    add    a1, rSELF, offThread_retval            # a1<- &retval
3197    move   a3, rSELF                              # a3<- self
3198#if defined(TEMPLATE_INLINE_PROFILING)
3199    # a2: methodToCall
3200    # preserve rTEMP,a0-a3
3201    SCRATCH_STORE(a0, 0)
3202    SCRATCH_STORE(a1, 4)
3203    SCRATCH_STORE(a2, 8)
3204    SCRATCH_STORE(a3, 12)
3205    SCRATCH_STORE(rTEMP, 16)
3206
3207    move   a0, a2
3208    move   a1, rSELF
3209    # a0=JNIMethod, a1=rSELF
3210    la      t9, dvmFastMethodTraceEnter
3211    JALR(t9)                                      # off to the native code
3212    lw     gp, STACK_OFFSET_GP(sp)
3213
3214    # restore rTEMP,a0-a3
3215    SCRATCH_LOAD(rTEMP, 16)
3216    SCRATCH_LOAD(a3, 12)
3217    SCRATCH_LOAD(a2, 8)
3218    SCRATCH_LOAD(a1, 4)
3219    SCRATCH_LOAD(a0, 0)
3220
3221    move   rOBJ, a2                               # save a2
3222#endif
3223    move   t9, rTEMP
3224    JALR(t9)                                   # off to the native code
3225    lw     gp, STACK_OFFSET_GP(sp)
3226
3227#if defined(TEMPLATE_INLINE_PROFILING)
3228    move   a0, rOBJ
3229    move   a1, rSELF
3230    # a0=JNIMethod, a1=rSELF
3231    la      t9, dvmFastNativeMethodTraceExit
3232    JALR(t9)
3233    lw     gp, STACK_OFFSET_GP(sp)
3234#endif
3235
3236    # native return; rBIX=newSaveArea
3237    # equivalent to dvmPopJniLocals
3238    lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
3239    lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
3240    lw     a1, offThread_exception(rSELF)            # check for exception
3241    sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
3242    sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
3243    lw     a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
3244
3245    # a0 = dalvikCallsitePC
3246    bnez   a1, .LhandleException                     # handle exception if any
3247
3248    sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
3249    beqz   a2, 3f
3250    jr     a2                                        # go if return chaining cell still exist
3251
32523:
3253    # continue executing the next instruction through the interpreter
3254    la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
3255    lw     a1, (a1)
3256    add    rPC, a0, 3*2                              # reconstruct new rPC (advance 3 dalvik instr)
3257
3258#if defined(WITH_JIT_TUNING)
3259    li     a0, kCallsiteInterpreted
3260#endif
3261    jr     a1
3262
3263#undef TEMPLATE_INLINE_PROFILING
3264
3265    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
3266/* File: mips/footer.S */
3267/*
3268 * ===========================================================================
3269 *  Common subroutines and data
3270 * ===========================================================================
3271 */
3272
3273    .section .data.rel.ro
3274    .align  4
3275.LinvokeNative:
3276    # Prep for the native call
3277    # a1 = newFP, a0 = methodToCall
3278    lw     t9, offThread_jniLocal_topCookie(rSELF)  # t9<- thread->localRef->...
3279    sw     zero, offThread_inJitCodeCache(rSELF)    # not in jit code cache
3280    sw     a1, offThread_curFrame(rSELF)            # self->curFrame = newFp
3281    sw     t9, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
3282                                                 # newFp->localRefCookie=top
3283    lhu     ra, offThread_subMode(rSELF)
3284    SAVEAREA_FROM_FP(rBIX, a1)                   # rBIX<- new stack save area
3285
3286    move    a2, a0                               # a2<- methodToCall
3287    move    a0, a1                               # a0<- newFp
3288    add     a1, rSELF, offThread_retval          # a1<- &retval
3289    move    a3, rSELF                            # a3<- self
3290    andi    ra, kSubModeMethodTrace
3291    beqz    ra, 121f
3292    # a2: methodToCall
3293    # preserve a0-a3
3294    SCRATCH_STORE(a0, 0)
3295    SCRATCH_STORE(a1, 4)
3296    SCRATCH_STORE(a2, 8)
3297    SCRATCH_STORE(a3, 12)
3298
3299    move    a0, a2
3300    move    a1, rSELF
3301    la      t9, dvmFastMethodTraceEnter
3302    JALR(t9)
3303    lw      gp, STACK_OFFSET_GP(sp)
3304
3305    # restore a0-a3
3306    SCRATCH_LOAD(a3, 12)
3307    SCRATCH_LOAD(a2, 8)
3308    SCRATCH_LOAD(a1, 4)
3309    SCRATCH_LOAD(a0, 0)
3310
3311    lw      t9, offMethod_nativeFunc(a2)
3312    JALR(t9)                                      # call methodToCall->nativeFunc
3313    lw      gp, STACK_OFFSET_GP(sp)
3314
3315    # restore a2 again
3316    SCRATCH_LOAD(a2, 8)
3317
3318    move    a0, a2
3319    move    a1, rSELF
3320    la      t9, dvmFastNativeMethodTraceExit
3321    JALR(t9)
3322    lw      gp, STACK_OFFSET_GP(sp)
3323    b       212f
3324
3325121:
3326    lw      t9, offMethod_nativeFunc(a2)
3327    JALR(t9)                                     # call methodToCall->nativeFunc
3328    lw      gp, STACK_OFFSET_GP(sp)
3329
3330212:
3331    # native return; rBIX=newSaveArea
3332    # equivalent to dvmPopJniLocals
3333    lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
3334    lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
3335    lw     a1, offThread_exception(rSELF)            # check for exception
3336    sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
3337    sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
3338    lw     a0, offStackSaveArea_savedPc(rBIX)        # reload rPC
3339
3340    # a0 = dalvikCallsitePC
3341    bnez   a1, .LhandleException                     # handle exception if any
3342
3343    sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
3344    beqz   a2, 3f
3345    jr     a2                                        # go if return chaining cell still exist
3346
33473:
3348    # continue executing the next instruction through the interpreter
3349    la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
3350    lw     a1, (a1)
3351    add    rPC, a0, 3*2                              # reconstruct new rPC
3352
3353#if defined(WITH_JIT_TUNING)
3354    li     a0, kCallsiteInterpreted
3355#endif
3356    jr     a1
3357
3358
3359/*
3360 * On entry:
3361 * a0  Faulting Dalvik PC
3362 */
3363.LhandleException:
3364#if defined(WITH_SELF_VERIFICATION)
3365    la     t0, .LdeadFood
3366    lw     t0, (t0)                  # should not see this under self-verification mode
3367    jr     t0
3368.LdeadFood:
3369    .word   0xdeadf00d
3370#endif
3371    sw     zero, offThread_inJitCodeCache(rSELF)  # in interpreter land
3372    la     a1, .LdvmMterpCommonExceptionThrown  # PIC way of getting &func
3373    lw     a1, (a1)
3374    la     rIBASE, .LdvmAsmInstructionStart     # PIC way of getting &func
3375    lw     rIBASE, (rIBASE)
3376    move   rPC, a0                              # reload the faulting Dalvid address
3377    jr     a1                                   # branch to dvmMterpCommonExeceptionThrown
3378
3379    .align  4
3380.LdvmAsmInstructionStart:
3381    .word   dvmAsmInstructionStart
3382.LdvmJitToInterpNoChainNoProfile:
3383    .word   dvmJitToInterpNoChainNoProfile
3384.LdvmJitToInterpTraceSelectNoChain:
3385    .word   dvmJitToInterpTraceSelectNoChain
3386.LdvmJitToInterpNoChain:
3387    .word   dvmJitToInterpNoChain
3388.LdvmMterpStdBail:
3389    .word   dvmMterpStdBail
3390.LdvmMterpCommonExceptionThrown:
3391    .word   dvmMterpCommonExceptionThrown
3392.LdvmLockObject:
3393    .word   dvmLockObject
3394#if defined(WITH_JIT_TUNING)
3395.LdvmICHitCount:
3396    .word   gDvmICHitCount
3397#endif
3398#if defined(WITH_SELF_VERIFICATION)
3399.LdvmSelfVerificationMemOpDecode:
3400    .word   dvmSelfVerificationMemOpDecode
3401#endif
3402
3403    .global dmvCompilerTemplateEnd
3404dmvCompilerTemplateEnd:
3405
3406#endif /* WITH_JIT */
3407
3408