CompilerTemplateAsm-mips.S revision 5dfcc78af479937ba8dafceefd9b1931a88dfaaf
1/*
2 * This file was generated automatically by gen-template.py for 'mips'.
3 *
4 * --> DO NOT EDIT <--
5 */
6
7/* File: mips/header.S */
8/*
9 * Copyright (C) 2008 The Android Open Source Project
10 *
11 * Licensed under the Apache License, Version 2.0 (the "License");
12 * you may not use this file except in compliance with the License.
13 * You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 * Unless required by applicable law or agreed to in writing, software
18 * distributed under the License is distributed on an "AS IS" BASIS,
19 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 * See the License for the specific language governing permissions and
21 * limitations under the License.
22 */
23
24#if defined(WITH_JIT)
25
26/*
27 * This is a #include, not a %include, because we want the C pre-processor
28 * to expand the macros into assembler assignment statements.
29 */
30#include "../../../mterp/common/asm-constants.h"
31#include "../../../mterp/common/mips-defines.h"
32#include "../../../mterp/common/jit-config.h"
33#include <asm/regdef.h>
34#include <asm/fpregdef.h>
35
36#ifdef	__mips_hard_float
37#define		HARD_FLOAT
38#else
39#define		SOFT_FLOAT
40#endif
41
42/* MIPS definitions and declarations
43
44   reg	nick		purpose
45   s0	rPC		interpreted program counter, used for fetching instructions
46   s1	rFP		interpreted frame pointer, used for accessing locals and args
47   s2	rSELF		pointer to thread
48   s3	rIBASE		interpreted instruction base pointer, used for computed goto
49   s4	rINST		first 16-bit code unit of current instruction
50*/
51
52/* register offsets */
53#define r_ZERO      0
54#define r_AT        1
55#define r_V0        2
56#define r_V1        3
57#define r_A0        4
58#define r_A1        5
59#define r_A2        6
60#define r_A3        7
61#define r_T0        8
62#define r_T1        9
63#define r_T2        10
64#define r_T3        11
65#define r_T4        12
66#define r_T5        13
67#define r_T6        14
68#define r_T7        15
69#define r_S0        16
70#define r_S1        17
71#define r_S2        18
72#define r_S3        19
73#define r_S4        20
74#define r_S5        21
75#define r_S6        22
76#define r_S7        23
77#define r_T8        24
78#define r_T9        25
79#define r_K0        26
80#define r_K1        27
81#define r_GP        28
82#define r_SP        29
83#define r_FP        30
84#define r_RA        31
85#define r_F0        32
86#define r_F1        33
87#define r_F2        34
88#define r_F3        35
89#define r_F4        36
90#define r_F5        37
91#define r_F6        38
92#define r_F7        39
93#define r_F8        40
94#define r_F9        41
95#define r_F10       42
96#define r_F11       43
97#define r_F12       44
98#define r_F13       45
99#define r_F14       46
100#define r_F15       47
101#define r_F16       48
102#define r_F17       49
103#define r_F18       50
104#define r_F19       51
105#define r_F20       52
106#define r_F21       53
107#define r_F22       54
108#define r_F23       55
109#define r_F24       56
110#define r_F25       57
111#define r_F26       58
112#define r_F27       59
113#define r_F28       60
114#define r_F29       61
115#define r_F30       62
116#define r_F31       63
117
118/* single-purpose registers, given names for clarity */
119#define rPC	s0
120#define rFP	s1
121#define rSELF	s2
122#define rIBASE	s3
123#define rINST	s4
124#define rOBJ	s5
125#define rBIX	s6
126#define rTEMP	s7
127
128/* The long arguments sent to function calls in Big-endian mode should be register
129swapped when sent to functions in little endian mode. In other words long variable
130sent as a0(MSW), a1(LSW) for a function call in LE mode should be sent as a1, a0 in
131Big Endian mode */
132
133#ifdef HAVE_LITTLE_ENDIAN
134#define rARG0     a0
135#define rARG1     a1
136#define rARG2     a2
137#define rARG3     a3
138#define rRESULT0  v0
139#define rRESULT1  v1
140#else
141#define rARG0     a1
142#define rARG1     a0
143#define rARG2     a3
144#define rARG3     a2
145#define rRESULT0  v1
146#define rRESULT1  v0
147#endif
148
149
150/* save/restore the PC and/or FP from the thread struct */
151#define LOAD_PC_FROM_SELF()	lw	rPC, offThread_pc(rSELF)
152#define SAVE_PC_TO_SELF()	sw	rPC, offThread_pc(rSELF)
153#define LOAD_FP_FROM_SELF()	lw	rFP, offThread_curFrame(rSELF)
154#define SAVE_FP_TO_SELF()	sw	rFP, offThread_curFrame(rSELF)
155
156#define EXPORT_PC() \
157	sw	rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
158
159#define SAVEAREA_FROM_FP(rd, _fpreg) \
160	subu	rd, _fpreg, sizeofStackSaveArea
161
162#define FETCH_INST()			lhu	rINST, (rPC)
163
164#define FETCH_ADVANCE_INST(_count)	lhu     rINST, (_count*2)(rPC); \
165					addu	rPC, rPC, (_count * 2)
166
167#define FETCH_ADVANCE_INST_RB(rd)	addu	rPC, rPC, rd;	\
168					lhu     rINST, (rPC)
169
170#define FETCH(rd, _count)		lhu	rd, (_count * 2)(rPC)
171#define FETCH_S(rd, _count)		lh	rd, (_count * 2)(rPC)
172
173#ifdef HAVE_LITTLE_ENDIAN
174
175#define FETCH_B(rd, _count)            lbu     rd, (_count * 2)(rPC)
176#define FETCH_C(rd, _count)            lbu     rd, (_count * 2 + 1)(rPC)
177
178#else
179
180#define FETCH_B(rd, _count)            lbu     rd, (_count * 2 + 1)(rPC)
181#define FETCH_C(rd, _count)            lbu     rd, (_count * 2)(rPC)
182
183#endif
184
185#define GET_INST_OPCODE(rd)		and	rd, rINST, 0xFF
186
187#define GOTO_OPCODE(rd)			sll  rd, rd, -1000;	\
188					addu rd, rIBASE, rd;	\
189					jr  rd
190
191
192#define LOAD(rd, rbase)			lw  rd, 0(rbase)
193#define LOAD_F(rd, rbase)		l.s rd, (rbase)
194#define STORE(rd, rbase)		sw  rd, 0(rbase)
195#define STORE_F(rd, rbase)		s.s rd, (rbase)
196
197#define GET_VREG(rd, rix)		LOAD_eas2(rd,rFP,rix)
198
199#define GET_VREG_F(rd, rix)		EAS2(AT, rFP, rix);		\
200					.set noat;  l.s rd, (AT); .set at
201
202#define SET_VREG(rd, rix)		STORE_eas2(rd, rFP, rix)
203
204#define SET_VREG_GOTO(rd, rix, dst)	.set noreorder;		\
205					sll  dst, dst, -1000;	\
206					addu dst, rIBASE, dst;			\
207					sll  t8, rix, 2;	\
208					addu t8, t8, rFP;	\
209					jr  dst;		\
210					sw  rd, 0(t8);		\
211					.set reorder
212
213#define SET_VREG_F(rd, rix)		EAS2(AT, rFP, rix);		\
214					.set noat;  s.s	rd, (AT); .set at
215
216
217#define GET_OPA(rd)			srl     rd, rINST, 8
218#ifndef		MIPS32R2
219#define GET_OPA4(rd)			GET_OPA(rd);  and  rd, 0xf
220#else
221#define GET_OPA4(rd)			ext	rd, rd, 8, 4
222#endif
223#define GET_OPB(rd)			srl     rd, rINST, 12
224
225#define LOAD_rSELF_OFF(rd,off)		lw    rd, offThread_##off##(rSELF)
226
227#define LOAD_rSELF_method(rd)		LOAD_rSELF_OFF(rd, method)
228#define LOAD_rSELF_methodClassDex(rd)	LOAD_rSELF_OFF(rd, methodClassDex)
229#define LOAD_rSELF_interpStackEnd(rd)	LOAD_rSELF_OFF(rd, interpStackEnd)
230#define LOAD_rSELF_retval(rd)		LOAD_rSELF_OFF(rd, retval)
231#define LOAD_rSELF_pActiveProfilers(rd)	LOAD_rSELF_OFF(rd, pActiveProfilers)
232#define LOAD_rSELF_bailPtr(rd)		LOAD_rSELF_OFF(rd, bailPtr)
233
234#define GET_JIT_PROF_TABLE(rd)		LOAD_rSELF_OFF(rd,pJitProfTable)
235#define GET_JIT_THRESHOLD(rd)		LOAD_rSELF_OFF(rd,jitThreshold)
236
237/*
238 * Form an Effective Address rd = rbase + roff<<n;
239 * Uses reg AT
240 */
241#define EASN(rd,rbase,roff,rshift)	.set noat;		\
242					sll  AT, roff, rshift;	\
243					addu rd, rbase, AT;	\
244					.set at
245
246#define EAS1(rd,rbase,roff)		EASN(rd,rbase,roff,1)
247#define EAS2(rd,rbase,roff)		EASN(rd,rbase,roff,2)
248#define EAS3(rd,rbase,roff)		EASN(rd,rbase,roff,3)
249#define EAS4(rd,rbase,roff)		EASN(rd,rbase,roff,4)
250
251/*
252 * Form an Effective Shift Right rd = rbase + roff>>n;
253 * Uses reg AT
254 */
255#define ESRN(rd,rbase,roff,rshift)	.set noat;		\
256					srl  AT, roff, rshift;	\
257					addu rd, rbase, AT;	\
258					.set at
259
260#define LOAD_eas2(rd,rbase,roff)	EAS2(AT, rbase, roff);  \
261					.set noat;  lw  rd, 0(AT); .set at
262
263#define STORE_eas2(rd,rbase,roff)	EAS2(AT, rbase, roff);  \
264					.set noat;  sw  rd, 0(AT); .set at
265
266#define LOAD_RB_OFF(rd,rbase,off)	lw	rd, off(rbase)
267#define LOADu2_RB_OFF(rd,rbase,off)	lhu	rd, off(rbase)
268#define STORE_RB_OFF(rd,rbase,off)	sw	rd, off(rbase)
269
270#ifdef HAVE_LITTLE_ENDIAN
271
272#define STORE64_off(rlo,rhi,rbase,off)	        sw	rlo, off(rbase);	\
273					        sw	rhi, (off+4)(rbase)
274#define LOAD64_off(rlo,rhi,rbase,off)	        lw	rlo, off(rbase);	\
275					        lw	rhi, (off+4)(rbase)
276
277#define STORE64_off_F(rlo,rhi,rbase,off)	s.s	rlo, off(rbase);	\
278						s.s	rhi, (off+4)(rbase)
279#define LOAD64_off_F(rlo,rhi,rbase,off)		l.s	rlo, off(rbase);	\
280						l.s	rhi, (off+4)(rbase)
281#else
282
283#define STORE64_off(rlo,rhi,rbase,off)	        sw	rlo, (off+4)(rbase);	\
284					        sw	rhi, (off)(rbase)
285#define LOAD64_off(rlo,rhi,rbase,off)	        lw	rlo, (off+4)(rbase);	\
286					        lw	rhi, (off)(rbase)
287#define STORE64_off_F(rlo,rhi,rbase,off)	s.s	rlo, (off+4)(rbase);	\
288						s.s	rhi, (off)(rbase)
289#define LOAD64_off_F(rlo,rhi,rbase,off)		l.s	rlo, (off+4)(rbase);	\
290						l.s	rhi, (off)(rbase)
291#endif
292
293#define STORE64(rlo,rhi,rbase)		STORE64_off(rlo,rhi,rbase,0)
294#define LOAD64(rlo,rhi,rbase)		LOAD64_off(rlo,rhi,rbase,0)
295
296#define STORE64_F(rlo,rhi,rbase)	STORE64_off_F(rlo,rhi,rbase,0)
297#define LOAD64_F(rlo,rhi,rbase)		LOAD64_off_F(rlo,rhi,rbase,0)
298
299#define STORE64_lo(rd,rbase)		sw	rd, 0(rbase)
300#define STORE64_hi(rd,rbase)		sw	rd, 4(rbase)
301
302
303#define LOAD_offThread_exception(rd,rbase)		LOAD_RB_OFF(rd,rbase,offThread_exception)
304#define LOAD_base_offArrayObject_length(rd,rbase)	LOAD_RB_OFF(rd,rbase,offArrayObject_length)
305#define LOAD_base_offClassObject_accessFlags(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_accessFlags)
306#define LOAD_base_offClassObject_descriptor(rd,rbase)   LOAD_RB_OFF(rd,rbase,offClassObject_descriptor)
307#define LOAD_base_offClassObject_super(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_super)
308
309#define LOAD_base_offClassObject_vtable(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_vtable)
310#define LOAD_base_offClassObject_vtableCount(rd,rbase)	LOAD_RB_OFF(rd,rbase,offClassObject_vtableCount)
311#define LOAD_base_offDvmDex_pResClasses(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResClasses)
312#define LOAD_base_offDvmDex_pResFields(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResFields)
313
314#define LOAD_base_offDvmDex_pResMethods(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResMethods)
315#define LOAD_base_offDvmDex_pResStrings(rd,rbase)	LOAD_RB_OFF(rd,rbase,offDvmDex_pResStrings)
316#define LOAD_base_offInstField_byteOffset(rd,rbase)	LOAD_RB_OFF(rd,rbase,offInstField_byteOffset)
317#define LOAD_base_offStaticField_value(rd,rbase)	LOAD_RB_OFF(rd,rbase,offStaticField_value)
318#define LOAD_base_offMethod_clazz(rd,rbase)		LOAD_RB_OFF(rd,rbase,offMethod_clazz)
319
320#define LOAD_base_offMethod_name(rd,rbase)		LOAD_RB_OFF(rd,rbase,offMethod_name)
321#define LOAD_base_offObject_clazz(rd,rbase)		LOAD_RB_OFF(rd,rbase,offObject_clazz)
322
323#define LOADu2_offMethod_methodIndex(rd,rbase)		LOADu2_RB_OFF(rd,rbase,offMethod_methodIndex)
324
325
326#define STORE_offThread_exception(rd,rbase)		STORE_RB_OFF(rd,rbase,offThread_exception)
327
328
329#define	STACK_STORE(rd,off)	sw   rd, off(sp)
330#define	STACK_LOAD(rd,off)	lw   rd, off(sp)
331#define CREATE_STACK(n)	 	subu sp, sp, n
332#define DELETE_STACK(n)	 	addu sp, sp, n
333
334#define SAVE_RA(offset)	 	STACK_STORE(ra, offset)
335#define LOAD_RA(offset)	 	STACK_LOAD(ra, offset)
336
337#define LOAD_ADDR(dest,addr)	la   dest, addr
338#define LOAD_IMM(dest, imm)	li   dest, imm
339#define MOVE_REG(dest,src)	move dest, src
340#define	RETURN			jr   ra
341#define	STACK_SIZE		128
342
343#define STACK_OFFSET_ARG04	16
344#define STACK_OFFSET_GP		84
345#define STACK_OFFSET_rFP	112
346
347/* This directive will make sure all subsequent jal restore gp at a known offset */
348        .cprestore STACK_OFFSET_GP
349
350#define JAL(func)		move rTEMP, ra;				\
351				jal  func;				\
352				move ra, rTEMP
353
354#define JALR(reg)		move rTEMP, ra;				\
355				jalr ra, reg;				\
356				move ra, rTEMP
357
358#define BAL(n)			bal  n
359
360#define	STACK_STORE_RA()  	CREATE_STACK(STACK_SIZE);		\
361				STACK_STORE(gp, STACK_OFFSET_GP);	\
362				STACK_STORE(ra, 124)
363
364#define	STACK_STORE_S0()  	STACK_STORE_RA();			\
365				STACK_STORE(s0, 116)
366
367#define	STACK_STORE_S0S1()  	STACK_STORE_S0();			\
368				STACK_STORE(s1, STACK_OFFSET_rFP)
369
370#define	STACK_LOAD_RA()		STACK_LOAD(ra, 124);			\
371				STACK_LOAD(gp, STACK_OFFSET_GP);	\
372				DELETE_STACK(STACK_SIZE)
373
374#define	STACK_LOAD_S0()  	STACK_LOAD(s0, 116);			\
375				STACK_LOAD_RA()
376
377#define	STACK_LOAD_S0S1()  	STACK_LOAD(s1, STACK_OFFSET_rFP);	\
378				STACK_LOAD_S0()
379
380#define STACK_STORE_FULL()	CREATE_STACK(STACK_SIZE);	\
381				STACK_STORE(ra, 124);		\
382				STACK_STORE(fp, 120);		\
383				STACK_STORE(s0, 116);		\
384				STACK_STORE(s1, STACK_OFFSET_rFP);	\
385				STACK_STORE(s2, 108);		\
386				STACK_STORE(s3, 104);		\
387				STACK_STORE(s4, 100);		\
388				STACK_STORE(s5, 96);		\
389				STACK_STORE(s6, 92);		\
390				STACK_STORE(s7, 88);
391
392#define STACK_LOAD_FULL()	STACK_LOAD(gp, STACK_OFFSET_GP);	\
393				STACK_LOAD(s7, 88);	\
394				STACK_LOAD(s6, 92);	\
395				STACK_LOAD(s5, 96);	\
396				STACK_LOAD(s4, 100);	\
397				STACK_LOAD(s3, 104);	\
398				STACK_LOAD(s2, 108);	\
399				STACK_LOAD(s1, STACK_OFFSET_rFP);	\
400				STACK_LOAD(s0, 116);	\
401				STACK_LOAD(fp, 120);	\
402				STACK_LOAD(ra, 124);	\
403				DELETE_STACK(STACK_SIZE)
404
405/*
406 * first 8 words are reserved for function calls
407 * Maximum offset is STACK_OFFSET_SCRMX-STACK_OFFSET_SCR
408 */
409#define STACK_OFFSET_SCR   32
410#define SCRATCH_STORE(r,off) \
411    STACK_STORE(r, STACK_OFFSET_SCR+off);
412#define SCRATCH_LOAD(r,off) \
413    STACK_LOAD(r, STACK_OFFSET_SCR+off);
414
415/* File: mips/platform.S */
416/*
417 * ===========================================================================
418 *  CPU-version-specific defines and utility
419 * ===========================================================================
420 */
421
422
423
424    .global dvmCompilerTemplateStart
425    .type   dvmCompilerTemplateStart, %function
426    .section .data.rel.ro
427
428dvmCompilerTemplateStart:
429
430/* ------------------------------ */
431    .balign 4
432    .global dvmCompiler_TEMPLATE_CMP_LONG
433dvmCompiler_TEMPLATE_CMP_LONG:
434/* File: mips/TEMPLATE_CMP_LONG.S */
435    /*
436     * Compare two 64-bit values
437     *    x = y     return  0
438     *    x < y     return -1
439     *    x > y     return  1
440     *
441     * I think I can improve on the ARM code by the following observation
442     *    slt   t0,  x.hi, y.hi;        # (x.hi < y.hi) ? 1:0
443     *    sgt   t1,  x.hi, y.hi;        # (y.hi > x.hi) ? 1:0
444     *    subu  v0, t0, t1              # v0= -1:1:0 for [ < > = ]
445     *
446     * This code assumes the register pair ordering will depend on endianess (a1:a0 or a0:a1).
447     *    a1:a0 => vBB
448     *    a3:a2 => vCC
449     */
450    /* cmp-long vAA, vBB, vCC */
451    slt    t0, rARG1, rARG3             # compare hi
452    sgt    t1, rARG1, rARG3
453    subu   v0, t1, t0                   # v0<- (-1,1,0)
454    bnez   v0, .LTEMPLATE_CMP_LONG_finish
455                                        # at this point x.hi==y.hi
456    sltu   t0, rARG0, rARG2             # compare lo
457    sgtu   t1, rARG0, rARG2
458    subu   v0, t1, t0                   # v0<- (-1,1,0) for [< > =]
459.LTEMPLATE_CMP_LONG_finish:
460    RETURN
461
462/* ------------------------------ */
463    .balign 4
464    .global dvmCompiler_TEMPLATE_RETURN
465dvmCompiler_TEMPLATE_RETURN:
466/* File: mips/TEMPLATE_RETURN.S */
467    /*
468     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
469     * If the stored value in returnAddr
470     * is non-zero, the caller is compiled by the JIT thus return to the
471     * address in the code cache following the invoke instruction. Otherwise
472     * return to the special dvmJitToInterpNoChain entry point.
473     */
474#if defined(TEMPLATE_INLINE_PROFILING)
475    # preserve a0-a2 and ra
476    SCRATCH_STORE(a0, 0)
477    SCRATCH_STORE(a1, 4)
478    SCRATCH_STORE(a2, 8)
479    SCRATCH_STORE(ra, 12)
480
481    # a0=rSELF
482    move    a0, rSELF
483    la      t9, dvmFastMethodTraceExit
484    JALR(t9)
485    lw      gp, STACK_OFFSET_GP(sp)
486
487    # restore a0-a2 and ra
488    SCRATCH_LOAD(ra, 12)
489    SCRATCH_LOAD(a2, 8)
490    SCRATCH_LOAD(a1, 4)
491    SCRATCH_LOAD(a0, 0)
492#endif
493    SAVEAREA_FROM_FP(a0, rFP)           # a0<- saveArea (old)
494    lw      t0, offStackSaveArea_prevFrame(a0)     # t0<- saveArea->prevFrame
495    lbu     t1, offThread_breakFlags(rSELF)        # t1<- breakFlags
496    lw      rPC, offStackSaveArea_savedPc(a0)      # rPC<- saveArea->savedPc
497#if !defined(WITH_SELF_VERIFICATION)
498    lw      t2,  offStackSaveArea_returnAddr(a0)   # t2<- chaining cell ret
499#else
500    move    t2, zero                               # disable chaining
501#endif
502    lw      a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
503                                                   # a2<- method we're returning to
504#if !defined(WITH_SELF_VERIFICATION)
505    beq     a2, zero, 1f                           # bail to interpreter
506#else
507    bne     a2, zero, 2f
508    JALR(ra)                                       # punt to interpreter and compare state
509    # DOUG: assume this does not return ???
5102:
511#endif
512    la      t4, .LdvmJitToInterpNoChainNoProfile   # defined in footer.S
513    lw      a1, (t4)
514    move    rFP, t0                                # publish new FP
515    beq     a2, zero, 4f
516    lw      t0, offMethod_clazz(a2)                # t0<- method->clazz
5174:
518
519    sw      a2, offThread_method(rSELF)            # self->method = newSave->method
520    lw      a0, offClassObject_pDvmDex(t0)         # a0<- method->clazz->pDvmDex
521    sw      rFP, offThread_curFrame(rSELF)         # self->curFrame = fp
522    add     rPC, rPC, 3*2                          # publish new rPC
523    sw      a0, offThread_methodClassDex(rSELF)
524    movn    t2, zero, t1                           # check the breadFlags and
525                                                   # clear the chaining cell address
526    sw      t2, offThread_inJitCodeCache(rSELF)    # in code cache or not
527    beq     t2, zero, 3f                           # chaining cell exists?
528    JALR(t2)                                       # jump to the chaining cell
529    # DOUG: assume this does not return ???
5303:
531#if defined(WITH_JIT_TUNING)
532    li      a0, kCallsiteInterpreted
533#endif
534    j       a1                                     # callsite is interpreted
5351:
536    sw      zero, offThread_inJitCodeCache(rSELF)  # reset inJitCodeCache
537    SAVE_PC_TO_SELF()                              # SAVE_PC_FP_TO_SELF()
538    SAVE_FP_TO_SELF()
539    la      t4, .LdvmMterpStdBail                  # defined in footer.S
540    lw      a2, (t4)
541    move    a0, rSELF                              # Expecting rSELF in a0
542    JALR(a2)                                       # exit the interpreter
543    # DOUG: assume this does not return ???
544
545/* ------------------------------ */
546    .balign 4
547    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
548dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
549/* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
550    /*
551     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
552     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
553     * runtime-resolved callee.
554     */
555    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
556    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
557    lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
558    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
559    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
560    move   a3, a1                                 # a3<- returnCell
561    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
562    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
563    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
564    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
565    sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
566    sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
567    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
568    RETURN                                        # return to raise stack overflow excep.
569
5701:
571    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
572    lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
573    lw     t0, offMethod_accessFlags(a0)          # t0<- methodToCall->accessFlags
574    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
575    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
576    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
577
578    # set up newSaveArea
579    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
580    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
581    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
582    beqz   t8, 2f                                 # breakFlags != 0
583    RETURN                                        # bail to the interpreter
584
5852:
586    and    t6, t0, ACC_NATIVE
587    beqz   t6, 3f
588#if !defined(WITH_SELF_VERIFICATION)
589    j      .LinvokeNative
590#else
591    RETURN                                        # bail to the interpreter
592#endif
593
5943:
595    # continue executing the next instruction through the interpreter
596    la     t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
597    lw     rTEMP, (t0)
598    lw     a3, offClassObject_pDvmDex(t9)         # a3<- method->clazz->pDvmDex
599
600    # Update "thread" values for the new method
601    sw     a0, offThread_method(rSELF)            # self->method = methodToCall
602    sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
603    move   rFP, a1                                # fp = newFp
604    sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
605#if defined(TEMPLATE_INLINE_PROFILING)
606    # preserve a0-a3
607    SCRATCH_STORE(a0, 0)
608    SCRATCH_STORE(a1, 4)
609    SCRATCH_STORE(a2, 8)
610    SCRATCH_STORE(a3, 12)
611
612    # a0=methodToCall, a1=rSELF
613    move   a1, rSELF
614    la     t9, dvmFastMethodTraceEnter
615    JALR(t9)
616    lw     gp, STACK_OFFSET_GP(sp)
617
618    # restore a0-a3
619    SCRATCH_LOAD(a3, 12)
620    SCRATCH_LOAD(a2, 8)
621    SCRATCH_LOAD(a1, 4)
622    SCRATCH_LOAD(a0, 0)
623#endif
624
625    # Start executing the callee
626#if defined(WITH_JIT_TUNING)
627    li     a0, kInlineCacheMiss
628#endif
629    jr     rTEMP                                  # dvmJitToInterpTraceSelectNoChain
630
631/* ------------------------------ */
632    .balign 4
633    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
634dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
635/* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
636    /*
637     * For monomorphic callsite, setup the Dalvik frame and return to the
638     * Thumb code through the link register to transfer control to the callee
639     * method through a dedicated chaining cell.
640     */
641    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
642    # methodToCall is guaranteed to be non-native
643.LinvokeChain:
644    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
645    lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
646    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
647    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
648    move   a3, a1                                 # a3<- returnCell
649    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
650    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
651    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
652    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
653    add    t2, ra, 8                              # setup the punt-to-interp address
654                                                  # 8 bytes skips branch and delay slot
655    sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
656    sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
657    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
658    jr     t2                                     # return to raise stack overflow excep.
659
6601:
661    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
662    lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
663    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
664    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
665    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
666
667    # set up newSaveArea
668    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
669    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
670    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
671    beqz   t8, 2f                                 # breakFlags != 0
672    jr     t2                                     # bail to the interpreter
673
6742:
675    lw     a3, offClassObject_pDvmDex(t9)         # a3<- methodToCall->clazz->pDvmDex
676
677    # Update "thread" values for the new method
678    sw     a0, offThread_method(rSELF)            # self->method = methodToCall
679    sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
680    move   rFP, a1                                # fp = newFp
681    sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
682#if defined(TEMPLATE_INLINE_PROFILING)
683    # preserve a0-a2 and ra
684    SCRATCH_STORE(a0, 0)
685    SCRATCH_STORE(a1, 4)
686    SCRATCH_STORE(a2, 8)
687    SCRATCH_STORE(ra, 12)
688
689    move   a1, rSELF
690    # a0=methodToCall, a1=rSELF
691    la     t9, dvmFastMethodTraceEnter
692    jalr   t9
693    lw     gp, STACK_OFFSET_GP(sp)
694
695    # restore a0-a2 and ra
696    SCRATCH_LOAD(ra, 12)
697    SCRATCH_LOAD(a2, 8)
698    SCRATCH_LOAD(a1, 4)
699    SCRATCH_LOAD(a0, 0)
700#endif
701    RETURN                                        # return to the callee-chaining cell
702
703/* ------------------------------ */
704    .balign 4
705    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
706dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
707/* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
708    /*
709     * For polymorphic callsite, check whether the cached class pointer matches
710     * the current one. If so setup the Dalvik frame and return to the
711     * Thumb code through the link register to transfer control to the callee
712     * method through a dedicated chaining cell.
713     *
714     * The predicted chaining cell is declared in ArmLIR.h with the
715     * following layout:
716     *
717     *  typedef struct PredictedChainingCell {
718     *      u4 branch;
719     *      u4 delay_slot;
720     *      const ClassObject *clazz;
721     *      const Method *method;
722     *      u4 counter;
723     *  } PredictedChainingCell;
724     *
725     * Upon returning to the callsite:
726     *    - lr   : to branch to the chaining cell
727     *    - lr+8 : to punt to the interpreter
728     *    - lr+16: to fully resolve the callee and may rechain.
729     *             a3 <- class
730     */
731    # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
732    lw      a3, offObject_clazz(a0)     # a3 <- this->class
733    lw      rIBASE, 8(a2)                   # t0 <- predictedChainCell->clazz
734    lw      a0, 12(a2)                  # a0 <- predictedChainCell->method
735    lw      t1, offThread_icRechainCount(rSELF)    # t1 <- shared rechainCount
736
737#if defined(WITH_JIT_TUNING)
738    la      rINST, .LdvmICHitCount
739    #add     t2, t2, 1
740    bne    a3, rIBASE, 1f
741    nop
742    lw      t2, 0(rINST)
743    add     t2, t2, 1
744    sw      t2, 0(rINST)
7451:
746    #add     t2, t2, 1
747#endif
748    beq     a3, rIBASE, .LinvokeChain       # branch if predicted chain is valid
749    lw      rINST, offClassObject_vtable(a3)     # rINST <- this->class->vtable
750    beqz    rIBASE, 2f                      # initialized class or not
751    sub     a1, t1, 1                   # count--
752    sw      a1, offThread_icRechainCount(rSELF)   # write back to InterpState
753    b       3f
7542:
755    move    a1, zero
7563:
757    add     ra, ra, 16                  # return to fully-resolve landing pad
758    /*
759     * a1 <- count
760     * a2 <- &predictedChainCell
761     * a3 <- this->class
762     * rPC <- dPC
763     * rINST <- this->class->vtable
764     */
765    RETURN
766
767/* ------------------------------ */
768    .balign 4
769    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
770dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
771/* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
772    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
773    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
774    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
775    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
776    move   a3, a1                                 # a3<- returnCell
777    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
778    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
779    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
780    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
781    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
782    RETURN                                        # return to raise stack overflow excep.
783
7841:
785    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
786    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
787    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
788    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
789
790    # set up newSaveArea
791    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
792    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
793    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
794    lw     rTEMP, offMethod_nativeFunc(a0)        # t9<- method->nativeFunc
795#if !defined(WITH_SELF_VERIFICATION)
796    beqz   t8, 2f                                 # breakFlags != 0
797    RETURN                                        # bail to the interpreter
7982:
799#else
800    RETURN                                        # bail to the interpreter unconditionally
801#endif
802
803    # go ahead and transfer control to the native code
804    lw     t6, offThread_jniLocal_topCookie(rSELF)  # t6<- thread->localRef->...
805    sw     a1, offThread_curFrame(rSELF)          # self->curFrame = newFp
806    sw     zero, offThread_inJitCodeCache(rSELF)  # not in the jit code cache
807    sw     t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
808                                                  # newFp->localRefCookie=top
809    SAVEAREA_FROM_FP(rBIX, a1)                    # rBIX<- new stack save area
810    move   a2, a0                                 # a2<- methodToCall
811    move   a0, a1                                 # a0<- newFp
812    add    a1, rSELF, offThread_retval            # a1<- &retval
813    move   a3, rSELF                              # a3<- self
814#if defined(TEMPLATE_INLINE_PROFILING)
815    # a2: methodToCall
816    # preserve a0-a3
817    SCRATCH_STORE(a0, 0)
818    SCRATCH_STORE(a1, 4)
819    SCRATCH_STORE(a2, 8)
820    SCRATCH_STORE(a3, 12)
821
822    move   a0, a2
823    move   a1, rSELF
824    # a0=JNIMethod, a1=rSELF
825    la      t9, dvmFastMethodTraceEnter
826    JALR(t9)                                      # off to the native code
827    lw     gp, STACK_OFFSET_GP(sp)
828
829    # restore a0-a3
830    SCRATCH_LOAD(a3, 12)
831    SCRATCH_LOAD(a2, 8)
832    SCRATCH_LOAD(a1, 4)
833    SCRATCH_LOAD(a0, 0)
834
835    move   rOBJ, a2                               # save a2
836#endif
837
838    JALR(rTEMP)                                   # off to the native code
839    lw     gp, STACK_OFFSET_GP(sp)
840
841#if defined(TEMPLATE_INLINE_PROFILING)
842    move   a0, rOBJ
843    move   a1, rSELF
844    # a0=JNIMethod, a1=rSELF
845    la      t9, dvmFastNativeMethodTraceExit
846    JALR(t9)
847    lw     gp, STACK_OFFSET_GP(sp)
848#endif
849
850    # native return; rBIX=newSaveArea
851    # equivalent to dvmPopJniLocals
852    lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
853    lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
854    lw     a1, offThread_exception(rSELF)            # check for exception
855    sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
856    sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
857    lw     a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
858
859    # a0 = dalvikCallsitePC
860    bnez   a1, .LhandleException                     # handle exception if any
861
862    sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
863    beqz   a2, 3f
864    jr     a2                                        # go if return chaining cell still exist
865
8663:
867    # continue executing the next instruction through the interpreter
868    la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
869    lw     a1, (a1)
870    add    rPC, a0, 3*2                              # reconstruct new rPC (advance 3 dalvik instr)
871
872#if defined(WITH_JIT_TUNING)
873    li     a0, kCallsiteInterpreted
874#endif
875    jr     a1
876
877/* ------------------------------ */
878    .balign 4
879    .global dvmCompiler_TEMPLATE_MUL_LONG
880dvmCompiler_TEMPLATE_MUL_LONG:
881/* File: mips/TEMPLATE_MUL_LONG.S */
882    /*
883     * Signed 64-bit integer multiply.
884     *
885     * For JIT: op1 in a0/a1, op2 in a2/a3, return in v0/v1
886     *
887     * Consider WXxYZ (a1a0 x a3a2) with a long multiply:
888     *
889     *         a1   a0
890     *   x     a3   a2
891     *   -------------
892     *       a2a1 a2a0
893     *       a3a0
894     *  a3a1 (<= unused)
895     *  ---------------
896     *         v1   v0
897     *
898     */
899    /* mul-long vAA, vBB, vCC */
900    mul     rRESULT1,rARG3,rARG0              #  v1= a3a0
901    multu   rARG2,rARG0
902    mfhi    t1
903    mflo    rRESULT0                          #  v0= a2a0
904    mul     t0,rARG2,rARG1                    #  t0= a2a1
905    addu    rRESULT1,rRESULT1,t1              #  v1= a3a0 + hi(a2a0)
906    addu    rRESULT1,rRESULT1,t0              #  v1= a3a0 + hi(a2a0) + a2a1;
907    RETURN
908
909/* ------------------------------ */
910    .balign 4
911    .global dvmCompiler_TEMPLATE_SHL_LONG
912dvmCompiler_TEMPLATE_SHL_LONG:
913/* File: mips/TEMPLATE_SHL_LONG.S */
914    /*
915     * Long integer shift.  This is different from the generic 32/64-bit
916     * binary operations because vAA/vBB are 64-bit but vCC (the shift
917     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
918     * 6 bits.
919     */
920    /* shl-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
921    sll     rRESULT0, rARG0, a2		#  rlo<- alo << (shift&31)
922    not     rRESULT1, a2		#  rhi<- 31-shift  (shift is 5b)
923    srl     rARG0, 1
924    srl     rARG0, rRESULT1		#  alo<- alo >> (32-(shift&31))
925    sll     rRESULT1, rARG1, a2		#  rhi<- ahi << (shift&31)
926    or      rRESULT1, rARG0		#  rhi<- rhi | alo
927    andi    a2, 0x20			#  shift< shift & 0x20
928    movn    rRESULT1, rRESULT0, a2	#  rhi<- rlo (if shift&0x20)
929    movn    rRESULT0, zero, a2		#  rlo<- 0  (if shift&0x20)
930    RETURN
931
932/* ------------------------------ */
933    .balign 4
934    .global dvmCompiler_TEMPLATE_SHR_LONG
935dvmCompiler_TEMPLATE_SHR_LONG:
936/* File: mips/TEMPLATE_SHR_LONG.S */
937    /*
938     * Long integer shift.  This is different from the generic 32/64-bit
939     * binary operations because vAA/vBB are 64-bit but vCC (the shift
940     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
941     * 6 bits.
942     */
943    /* shr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
944    sra     rRESULT1, rARG1, a2		#  rhi<- ahi >> (shift&31)
945    srl     rRESULT0, rARG0, a2		#  rlo<- alo >> (shift&31)
946    sra     a3, rARG1, 31		#  a3<- sign(ah)
947    not     rARG0, a2			#  alo<- 31-shift (shift is 5b)
948    sll     rARG1, 1
949    sll     rARG1, rARG0		#  ahi<- ahi << (32-(shift&31))
950    or      rRESULT0, rARG1		#  rlo<- rlo | ahi
951    andi    a2, 0x20			#  shift & 0x20
952    movn    rRESULT0, rRESULT1, a2	#  rlo<- rhi (if shift&0x20)
953    movn    rRESULT1, a3, a2		#  rhi<- sign(ahi) (if shift&0x20)
954    RETURN
955
956/* ------------------------------ */
957    .balign 4
958    .global dvmCompiler_TEMPLATE_USHR_LONG
959dvmCompiler_TEMPLATE_USHR_LONG:
960/* File: mips/TEMPLATE_USHR_LONG.S */
961    /*
962     * Long integer shift.  This is different from the generic 32/64-bit
963     * binary operations because vAA/vBB are 64-bit but vCC (the shift
964     * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
965     * 6 bits.
966     */
967    /* ushr-long vAA:vBB(rARG1:rARG0), vCC(a2) - result in (rRESULT1:rRESULT0) */
968    srl     rRESULT1, rARG1, a2		#  rhi<- ahi >> (shift&31)
969    srl     rRESULT0, rARG0, a2		#  rlo<- alo >> (shift&31)
970    not     rARG0, a2			#  alo<- 31-n  (shift is 5b)
971    sll     rARG1, 1
972    sll     rARG1, rARG0		#  ahi<- ahi << (32-(shift&31))
973    or      rRESULT0, rARG1		#  rlo<- rlo | ahi
974    andi    a2, 0x20			#  shift & 0x20
975    movn    rRESULT0, rRESULT1, a2	#  rlo<- rhi (if shift&0x20)
976    movn    rRESULT1, zero, a2		#  rhi<- 0 (if shift&0x20)
977    RETURN
978
979/* ------------------------------ */
980    .balign 4
981    .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
982dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
983/* File: mips/TEMPLATE_ADD_FLOAT_VFP.S */
984/* File: mips/fbinop.S */
985    /*
986     * Generic 32-bit binary float operation. a0 = a1 op a2.
987     *
988     * For: add-fp, sub-fp, mul-fp, div-fp
989     *
990     * On entry:
991     *     a0 = target dalvik register address
992     *     a1 = op1 address
993     *     a2 = op2 address
994     *
995     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
996     *
997     */
998    move rOBJ, a0                       # save a0
999#ifdef  SOFT_FLOAT
1000    LOAD(a0, a1)                        # a0<- vBB
1001    LOAD(a1, a2)                        # a1<- vCC
1002    .if 0
1003    beqz    a1, common_errDivideByZero  # is second operand zero?
1004    .endif
1005                               # optional op
1006    JAL(__addsf3)                              # v0 = result
1007    STORE(v0, rOBJ)                     # vAA <- v0
1008#else
1009    LOAD_F(fa0, a1)                     # fa0<- vBB
1010    LOAD_F(fa1, a2)                     # fa1<- vCC
1011    .if 0
1012    # is second operand zero?
1013    li.s        ft0, 0
1014    c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
1015    bc1t        fcc0, common_errDivideByZero
1016    .endif
1017                               # optional op
1018    add.s fv0, fa0, fa1                            # fv0 = result
1019    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1020#endif
1021    RETURN
1022
1023
1024/* ------------------------------ */
1025    .balign 4
1026    .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
1027dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
1028/* File: mips/TEMPLATE_SUB_FLOAT_VFP.S */
1029/* File: mips/fbinop.S */
1030    /*
1031     * Generic 32-bit binary float operation. a0 = a1 op a2.
1032     *
1033     * For: add-fp, sub-fp, mul-fp, div-fp
1034     *
1035     * On entry:
1036     *     a0 = target dalvik register address
1037     *     a1 = op1 address
1038     *     a2 = op2 address
1039     *
1040     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1041     *
1042     */
1043    move rOBJ, a0                       # save a0
1044#ifdef  SOFT_FLOAT
1045    LOAD(a0, a1)                        # a0<- vBB
1046    LOAD(a1, a2)                        # a1<- vCC
1047    .if 0
1048    beqz    a1, common_errDivideByZero  # is second operand zero?
1049    .endif
1050                               # optional op
1051    JAL(__subsf3)                              # v0 = result
1052    STORE(v0, rOBJ)                     # vAA <- v0
1053#else
1054    LOAD_F(fa0, a1)                     # fa0<- vBB
1055    LOAD_F(fa1, a2)                     # fa1<- vCC
1056    .if 0
1057    # is second operand zero?
1058    li.s        ft0, 0
1059    c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
1060    bc1t        fcc0, common_errDivideByZero
1061    .endif
1062                               # optional op
1063    sub.s fv0, fa0, fa1                            # fv0 = result
1064    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1065#endif
1066    RETURN
1067
1068
1069/* ------------------------------ */
1070    .balign 4
1071    .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
1072dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
1073/* File: mips/TEMPLATE_MUL_FLOAT_VFP.S */
1074/* File: mips/fbinop.S */
1075    /*
1076     * Generic 32-bit binary float operation. a0 = a1 op a2.
1077     *
1078     * For: add-fp, sub-fp, mul-fp, div-fp
1079     *
1080     * On entry:
1081     *     a0 = target dalvik register address
1082     *     a1 = op1 address
1083     *     a2 = op2 address
1084     *
1085     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1086     *
1087     */
1088    move rOBJ, a0                       # save a0
1089#ifdef  SOFT_FLOAT
1090    LOAD(a0, a1)                        # a0<- vBB
1091    LOAD(a1, a2)                        # a1<- vCC
1092    .if 0
1093    beqz    a1, common_errDivideByZero  # is second operand zero?
1094    .endif
1095                               # optional op
1096    JAL(__mulsf3)                              # v0 = result
1097    STORE(v0, rOBJ)                     # vAA <- v0
1098#else
1099    LOAD_F(fa0, a1)                     # fa0<- vBB
1100    LOAD_F(fa1, a2)                     # fa1<- vCC
1101    .if 0
1102    # is second operand zero?
1103    li.s        ft0, 0
1104    c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
1105    bc1t        fcc0, common_errDivideByZero
1106    .endif
1107                               # optional op
1108    mul.s fv0, fa0, fa1                            # fv0 = result
1109    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1110#endif
1111    RETURN
1112
1113
1114/* ------------------------------ */
1115    .balign 4
1116    .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
1117dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
1118/* File: mips/TEMPLATE_DIV_FLOAT_VFP.S */
1119/* File: mips/fbinop.S */
1120    /*
1121     * Generic 32-bit binary float operation. a0 = a1 op a2.
1122     *
1123     * For: add-fp, sub-fp, mul-fp, div-fp
1124     *
1125     * On entry:
1126     *     a0 = target dalvik register address
1127     *     a1 = op1 address
1128     *     a2 = op2 address
1129     *
1130     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1131     *
1132     */
1133    move rOBJ, a0                       # save a0
1134#ifdef  SOFT_FLOAT
1135    LOAD(a0, a1)                        # a0<- vBB
1136    LOAD(a1, a2)                        # a1<- vCC
1137    .if 0
1138    beqz    a1, common_errDivideByZero  # is second operand zero?
1139    .endif
1140                               # optional op
1141    JAL(__divsf3)                              # v0 = result
1142    STORE(v0, rOBJ)                     # vAA <- v0
1143#else
1144    LOAD_F(fa0, a1)                     # fa0<- vBB
1145    LOAD_F(fa1, a2)                     # fa1<- vCC
1146    .if 0
1147    # is second operand zero?
1148    li.s        ft0, 0
1149    c.eq.s      fcc0, ft0, fa1          # condition bit and comparision with 0
1150    bc1t        fcc0, common_errDivideByZero
1151    .endif
1152                               # optional op
1153    div.s fv0, fa0, fa1                            # fv0 = result
1154    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1155#endif
1156    RETURN
1157
1158
1159/* ------------------------------ */
1160    .balign 4
1161    .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
1162dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
1163/* File: mips/TEMPLATE_ADD_DOUBLE_VFP.S */
1164/* File: mips/fbinopWide.S */
1165    /*
1166     * Generic 64-bit binary operation.  Provide an "instr" line that
1167     * specifies an instruction that performs "result = a0-a1 op a2-a3".
1168     * This could be an MIPS instruction or a function call.
1169     * If "chkzero" is set to 1, we perform a divide-by-zero check on
1170     * vCC (a1).  Useful for integer division and modulus.
1171     *
1172     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
1173     *      xor-long, add-double, sub-double, mul-double, div-double,
1174     *      rem-double
1175     *
1176     * On entry:
1177     *     a0 = target dalvik register address
1178     *     a1 = op1 address
1179     *     a2 = op2 address
1180     *
1181     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1182     */
1183    move rOBJ, a0                       # save a0
1184#ifdef  SOFT_FLOAT
1185    move t0, a1                         # save a1
1186    move t1, a2                         # save a2
1187    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
1188    LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
1189    .if 0
1190    or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
1191    beqz        t0, common_errDivideByZero
1192    .endif
1193                               # optional op
1194    JAL(__adddf3)                              # result<- op, a0-a3 changed
1195    STORE64(rRESULT0, rRESULT1, rOBJ)
1196#else
1197    LOAD64_F(fa0, fa0f, a1)
1198    LOAD64_F(fa1, fa1f, a2)
1199    .if 0
1200    li.d        ft0, 0
1201    c.eq.d      fcc0, fa1, ft0
1202    bc1t        fcc0, common_errDivideByZero
1203    .endif
1204                               # optional op
1205    add.d fv0, fa0, fa1
1206    STORE64_F(fv0, fv0f, rOBJ)
1207#endif
1208    RETURN
1209
1210
1211/* ------------------------------ */
1212    .balign 4
1213    .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
1214dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
1215/* File: mips/TEMPLATE_SUB_DOUBLE_VFP.S */
1216/* File: mips/fbinopWide.S */
1217    /*
1218     * Generic 64-bit binary operation.  Provide an "instr" line that
1219     * specifies an instruction that performs "result = a0-a1 op a2-a3".
1220     * This could be an MIPS instruction or a function call.
1221     * If "chkzero" is set to 1, we perform a divide-by-zero check on
1222     * vCC (a1).  Useful for integer division and modulus.
1223     *
1224     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
1225     *      xor-long, add-double, sub-double, mul-double, div-double,
1226     *      rem-double
1227     *
1228     * On entry:
1229     *     a0 = target dalvik register address
1230     *     a1 = op1 address
1231     *     a2 = op2 address
1232     *
1233     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1234     */
1235    move rOBJ, a0                       # save a0
1236#ifdef  SOFT_FLOAT
1237    move t0, a1                         # save a1
1238    move t1, a2                         # save a2
1239    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
1240    LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
1241    .if 0
1242    or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
1243    beqz        t0, common_errDivideByZero
1244    .endif
1245                               # optional op
1246    JAL(__subdf3)                              # result<- op, a0-a3 changed
1247    STORE64(rRESULT0, rRESULT1, rOBJ)
1248#else
1249    LOAD64_F(fa0, fa0f, a1)
1250    LOAD64_F(fa1, fa1f, a2)
1251    .if 0
1252    li.d        ft0, 0
1253    c.eq.d      fcc0, fa1, ft0
1254    bc1t        fcc0, common_errDivideByZero
1255    .endif
1256                               # optional op
1257    sub.d fv0, fa0, fa1
1258    STORE64_F(fv0, fv0f, rOBJ)
1259#endif
1260    RETURN
1261
1262
1263/* ------------------------------ */
1264    .balign 4
1265    .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
1266dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
1267/* File: mips/TEMPLATE_MUL_DOUBLE_VFP.S */
1268/* File: mips/fbinopWide.S */
1269    /*
1270     * Generic 64-bit binary operation.  Provide an "instr" line that
1271     * specifies an instruction that performs "result = a0-a1 op a2-a3".
1272     * This could be an MIPS instruction or a function call.
1273     * If "chkzero" is set to 1, we perform a divide-by-zero check on
1274     * vCC (a1).  Useful for integer division and modulus.
1275     *
1276     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
1277     *      xor-long, add-double, sub-double, mul-double, div-double,
1278     *      rem-double
1279     *
1280     * On entry:
1281     *     a0 = target dalvik register address
1282     *     a1 = op1 address
1283     *     a2 = op2 address
1284     *
1285     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1286     */
1287    move rOBJ, a0                       # save a0
1288#ifdef  SOFT_FLOAT
1289    move t0, a1                         # save a1
1290    move t1, a2                         # save a2
1291    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
1292    LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
1293    .if 0
1294    or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
1295    beqz        t0, common_errDivideByZero
1296    .endif
1297                               # optional op
1298    JAL(__muldf3)                              # result<- op, a0-a3 changed
1299    STORE64(rRESULT0, rRESULT1, rOBJ)
1300#else
1301    LOAD64_F(fa0, fa0f, a1)
1302    LOAD64_F(fa1, fa1f, a2)
1303    .if 0
1304    li.d        ft0, 0
1305    c.eq.d      fcc0, fa1, ft0
1306    bc1t        fcc0, common_errDivideByZero
1307    .endif
1308                               # optional op
1309    mul.d fv0, fa0, fa1
1310    STORE64_F(fv0, fv0f, rOBJ)
1311#endif
1312    RETURN
1313
1314
1315/* ------------------------------ */
1316    .balign 4
1317    .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
1318dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
1319/* File: mips/TEMPLATE_DIV_DOUBLE_VFP.S */
1320/* File: mips/fbinopWide.S */
1321    /*
1322     * Generic 64-bit binary operation.  Provide an "instr" line that
1323     * specifies an instruction that performs "result = a0-a1 op a2-a3".
1324     * This could be an MIPS instruction or a function call.
1325     * If "chkzero" is set to 1, we perform a divide-by-zero check on
1326     * vCC (a1).  Useful for integer division and modulus.
1327     *
1328     * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
1329     *      xor-long, add-double, sub-double, mul-double, div-double,
1330     *      rem-double
1331     *
1332     * On entry:
1333     *     a0 = target dalvik register address
1334     *     a1 = op1 address
1335     *     a2 = op2 address
1336     *
1337     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1338     */
1339    move rOBJ, a0                       # save a0
1340#ifdef  SOFT_FLOAT
1341    move t0, a1                         # save a1
1342    move t1, a2                         # save a2
1343    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vBB/vBB+1
1344    LOAD64(rARG2, rARG3, t1)            # a2/a3<- vCC/vCC+1
1345    .if 0
1346    or          t0, rARG2, rARG3        # second arg (a2-a3) is zero?
1347    beqz        t0, common_errDivideByZero
1348    .endif
1349                               # optional op
1350    JAL(__divdf3)                              # result<- op, a0-a3 changed
1351    STORE64(rRESULT0, rRESULT1, rOBJ)
1352#else
1353    LOAD64_F(fa0, fa0f, a1)
1354    LOAD64_F(fa1, fa1f, a2)
1355    .if 0
1356    li.d        ft0, 0
1357    c.eq.d      fcc0, fa1, ft0
1358    bc1t        fcc0, common_errDivideByZero
1359    .endif
1360                               # optional op
1361    div.d fv0, fa0, fa1
1362    STORE64_F(fv0, fv0f, rOBJ)
1363#endif
1364    RETURN
1365
1366
1367/* ------------------------------ */
1368    .balign 4
1369    .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
1370dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
1371/* File: mips/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
1372/* File: mips/funopNarrower.S */
1373    /*
1374     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
1375     * that specifies an instruction that performs "result = op a0/a1", where
1376     * "result" is a 32-bit quantity in a0.
1377     *
1378     * For: long-to-float, double-to-int, double-to-float
1379     * If hard floating point support is available, use fa0 as the parameter, except for
1380     * long-to-float opcode.
1381     * (This would work for long-to-int, but that instruction is actually
1382     * an exact match for OP_MOVE.)
1383     *
1384     * On entry:
1385     *     a0 = target dalvik register address
1386     *     a1 = src dalvik register address
1387     *
1388     */
1389    move rINST, a0                      # save a0
1390#ifdef  SOFT_FLOAT
1391    move t0, a1                         # save a1
1392    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vB/vB+1
1393                               # optional op
1394    JAL(__truncdfsf2)                              # v0<- op, a0-a3 changed
1395.LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg:
1396    STORE(v0, rINST)                    # vA<- v0
1397#else
1398    LOAD64_F(fa0, fa0f, a1)
1399                               # optional op
1400    cvt.s.d  fv0,fa0                            # fv0 = result
1401.LTEMPLATE_DOUBLE_TO_FLOAT_VFP_set_vreg_f:
1402    STORE_F(fv0, rINST)                 # vA<- fv0
1403#endif
1404    RETURN
1405
1406
1407/* ------------------------------ */
1408    .balign 4
1409    .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
1410dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
1411/* File: mips/TEMPLATE_DOUBLE_TO_INT_VFP.S */
1412/* File: mips/funopNarrower.S */
1413    /*
1414     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
1415     * that specifies an instruction that performs "result = op a0/a1", where
1416     * "result" is a 32-bit quantity in a0.
1417     *
1418     * For: long-to-float, double-to-int, double-to-float
1419     * If hard floating point support is available, use fa0 as the parameter, except for
1420     * long-to-float opcode.
1421     * (This would work for long-to-int, but that instruction is actually
1422     * an exact match for OP_MOVE.)
1423     *
1424     * On entry:
1425     *     a0 = target dalvik register address
1426     *     a1 = src dalvik register address
1427     *
1428     */
1429    move rINST, a0                      # save a0
1430#ifdef  SOFT_FLOAT
1431    move t0, a1                         # save a1
1432    LOAD64(rARG0, rARG1, t0)            # a0/a1<- vB/vB+1
1433                               # optional op
1434    b    d2i_doconv                              # v0<- op, a0-a3 changed
1435.LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg:
1436    STORE(v0, rINST)                    # vA<- v0
1437#else
1438    LOAD64_F(fa0, fa0f, a1)
1439                               # optional op
1440    b    d2i_doconv                            # fv0 = result
1441.LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f:
1442    STORE_F(fv0, rINST)                 # vA<- fv0
1443#endif
1444    RETURN
1445
1446
1447/*
1448 * Convert the double in a0/a1 to an int in a0.
1449 *
1450 * We have to clip values to int min/max per the specification.  The
1451 * expected common case is a "reasonable" value that converts directly
1452 * to modest integer.  The EABI convert function isn't doing this for us.
1453 * Use rBIX / rOBJ as global to hold arguments (they are not bound to a global var)
1454 */
1455
1456d2i_doconv:
1457#ifdef SOFT_FLOAT
1458    la          t0, .LDOUBLE_TO_INT_max
1459    LOAD64(rARG2, rARG3, t0)
1460    move        rBIX, rARG0                       # save a0
1461    move        rOBJ, rARG1                       #  and a1
1462    JAL(__gedf2)                               # is arg >= maxint?
1463
1464    move        t0, v0
1465    li          v0, ~0x80000000                # return maxint (7fffffff)
1466    bgez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg       # nonzero == yes
1467
1468    move        rARG0, rBIX                       # recover arg
1469    move        rARG1, rOBJ
1470    la          t0, .LDOUBLE_TO_INT_min
1471    LOAD64(rARG2, rARG3, t0)
1472    JAL(__ledf2)                               # is arg <= minint?
1473
1474    move        t0, v0
1475    li          v0, 0x80000000                 # return minint (80000000)
1476    blez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg       # nonzero == yes
1477
1478    move        rARG0, rBIX                  # recover arg
1479    move        rARG1, rOBJ
1480    move        rARG2, rBIX                  # compare against self
1481    move        rARG3, rOBJ
1482    JAL(__nedf2)                        # is arg == self?
1483
1484    move        t0, v0                  # zero == no
1485    li          v0, 0
1486    bnez        t0, .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg        # return zero for NaN
1487
1488    move        rARG0, rBIX                  # recover arg
1489    move        rARG1, rOBJ
1490    JAL(__fixdfsi)                      # convert double to int
1491    b           .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg
1492#else
1493    la          t0, .LDOUBLE_TO_INT_max
1494    LOAD64_F(fa1, fa1f, t0)
1495    c.ole.d     fcc0, fa1, fa0
1496    l.s         fv0, .LDOUBLE_TO_INT_maxret
1497    bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
1498
1499    la          t0, .LDOUBLE_TO_INT_min
1500    LOAD64_F(fa1, fa1f, t0)
1501    c.ole.d     fcc0, fa0, fa1
1502    l.s         fv0, .LDOUBLE_TO_INT_minret
1503    bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
1504
1505    mov.d       fa1, fa0
1506    c.un.d      fcc0, fa0, fa1
1507    li.s        fv0, 0
1508    bc1t        .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
1509
1510    trunc.w.d   fv0, fa0
1511    b           .LTEMPLATE_DOUBLE_TO_INT_VFP_set_vreg_f
1512#endif
1513
1514
1515.LDOUBLE_TO_INT_max:
1516    .dword   0x41dfffffffc00000
1517.LDOUBLE_TO_INT_min:
1518    .dword   0xc1e0000000000000                  # minint, as a double (high word)
1519.LDOUBLE_TO_INT_maxret:
1520    .word   0x7fffffff
1521.LDOUBLE_TO_INT_minret:
1522    .word   0x80000000
1523
1524/* ------------------------------ */
1525    .balign 4
1526    .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
1527dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
1528/* File: mips/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
1529/* File: mips/funopWider.S */
1530    /*
1531     * Generic 32bit-to-64bit floating point unary operation.  Provide an
1532     * "instr" line that specifies an instruction that performs "d0 = op s0".
1533     *
1534     * For: int-to-double, float-to-double
1535     *
1536     * On entry:
1537     *     a0 = target dalvik register address
1538     *     a1 = src dalvik register address
1539     */
1540    /* unop vA, vB */
1541    move rOBJ, a0                       # save a0
1542#ifdef  SOFT_FLOAT
1543    LOAD(a0, a1)                        # a0<- vB
1544                               # optional op
1545    JAL(__extendsfdf2)                              # result<- op, a0-a3 changed
1546
1547.LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
1548    STORE64(rRESULT0, rRESULT1, rOBJ)   # vA/vA+1<- v0/v1
1549#else
1550    LOAD_F(fa0, a1)                     # fa0<- vB
1551                               # optional op
1552    cvt.d.s fv0, fa0
1553
1554.LTEMPLATE_FLOAT_TO_DOUBLE_VFP_set_vreg:
1555    STORE64_F(fv0, fv0f, rOBJ)                          # vA/vA+1<- fv0/fv0f
1556#endif
1557    RETURN
1558
1559
1560/* ------------------------------ */
1561    .balign 4
1562    .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
1563dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
1564/* File: mips/TEMPLATE_FLOAT_TO_INT_VFP.S */
1565/* File: mips/funop.S */
1566    /*
1567     * Generic 32-bit unary operation.  Provide an "instr" line that
1568     * specifies an instruction that performs "result = op a0".
1569     * This could be a MIPS instruction or a function call.
1570     *
1571     * for: int-to-float, float-to-int
1572     *
1573     * On entry:
1574     *     a0 = target dalvik register address
1575     *     a1 = src dalvik register address
1576     *
1577     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1578     *
1579     */
1580    move rOBJ, a0                       # save a0
1581#ifdef SOFT_FLOAT
1582    LOAD(a0, a1)                        # a0<- vBB
1583                               # optional op
1584    b    f2i_doconv                              # v0<- op, a0-a3 changed
1585.LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg:
1586    STORE(v0, rOBJ)                     # vAA<- v0
1587#else
1588    LOAD_F(fa0, a1)                     # fa0<- vBB
1589                               # optional op
1590    b        f2i_doconv                            # fv0 = result
1591.LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f:
1592    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1593#endif
1594    RETURN
1595
1596
1597/*
1598 * Not an entry point as it is used only once !!
1599 */
1600f2i_doconv:
1601#ifdef SOFT_FLOAT
1602        li      a1, 0x4f000000  # (float)maxint
1603        move    rBIX, a0
1604        JAL(__gesf2)            # is arg >= maxint?
1605        move    t0, v0
1606        li      v0, ~0x80000000 # return maxint (7fffffff)
1607        bgez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
1608
1609        move    a0, rBIX                # recover arg
1610        li      a1, 0xcf000000  # (float)minint
1611        JAL(__lesf2)
1612
1613        move    t0, v0
1614        li      v0, 0x80000000  # return minint (80000000)
1615        blez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
1616        move    a0, rBIX
1617        move    a1, rBIX
1618        JAL(__nesf2)
1619
1620        move    t0, v0
1621        li      v0, 0           # return zero for NaN
1622        bnez    t0, .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
1623
1624        move    a0, rBIX
1625        JAL(__fixsfsi)
1626        b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg
1627#else
1628        l.s             fa1, .LFLOAT_TO_INT_max
1629        c.ole.s         fcc0, fa1, fa0
1630        l.s             fv0, .LFLOAT_TO_INT_ret_max
1631        bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
1632
1633        l.s             fa1, .LFLOAT_TO_INT_min
1634        c.ole.s         fcc0, fa0, fa1
1635        l.s             fv0, .LFLOAT_TO_INT_ret_min
1636        bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
1637
1638        mov.s           fa1, fa0
1639        c.un.s          fcc0, fa0, fa1
1640        li.s            fv0, 0
1641        bc1t            .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
1642
1643        trunc.w.s       fv0, fa0
1644        b .LTEMPLATE_FLOAT_TO_INT_VFP_set_vreg_f
1645#endif
1646
1647.LFLOAT_TO_INT_max:
1648        .word   0x4f000000
1649.LFLOAT_TO_INT_min:
1650        .word   0xcf000000
1651.LFLOAT_TO_INT_ret_max:
1652        .word   0x7fffffff
1653.LFLOAT_TO_INT_ret_min:
1654        .word   0x80000000
1655
1656
1657/* ------------------------------ */
1658    .balign 4
1659    .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
1660dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
1661/* File: mips/TEMPLATE_INT_TO_DOUBLE_VFP.S */
1662/* File: mips/funopWider.S */
1663    /*
1664     * Generic 32bit-to-64bit floating point unary operation.  Provide an
1665     * "instr" line that specifies an instruction that performs "d0 = op s0".
1666     *
1667     * For: int-to-double, float-to-double
1668     *
1669     * On entry:
1670     *     a0 = target dalvik register address
1671     *     a1 = src dalvik register address
1672     */
1673    /* unop vA, vB */
1674    move rOBJ, a0                       # save a0
1675#ifdef  SOFT_FLOAT
1676    LOAD(a0, a1)                        # a0<- vB
1677                               # optional op
1678    JAL(__floatsidf)                              # result<- op, a0-a3 changed
1679
1680.LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
1681    STORE64(rRESULT0, rRESULT1, rOBJ)   # vA/vA+1<- v0/v1
1682#else
1683    LOAD_F(fa0, a1)                     # fa0<- vB
1684                               # optional op
1685    cvt.d.w    fv0, fa0
1686
1687.LTEMPLATE_INT_TO_DOUBLE_VFP_set_vreg:
1688    STORE64_F(fv0, fv0f, rOBJ)                          # vA/vA+1<- fv0/fv0f
1689#endif
1690    RETURN
1691
1692
1693/* ------------------------------ */
1694    .balign 4
1695    .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
1696dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
1697/* File: mips/TEMPLATE_INT_TO_FLOAT_VFP.S */
1698/* File: mips/funop.S */
1699    /*
1700     * Generic 32-bit unary operation.  Provide an "instr" line that
1701     * specifies an instruction that performs "result = op a0".
1702     * This could be a MIPS instruction or a function call.
1703     *
1704     * for: int-to-float, float-to-int
1705     *
1706     * On entry:
1707     *     a0 = target dalvik register address
1708     *     a1 = src dalvik register address
1709     *
1710     * IMPORTANT: you may specify "chkzero" or "preinstr" but not both.
1711     *
1712     */
1713    move rOBJ, a0                       # save a0
1714#ifdef SOFT_FLOAT
1715    LOAD(a0, a1)                        # a0<- vBB
1716                               # optional op
1717    JAL(__floatsisf)                              # v0<- op, a0-a3 changed
1718.LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg:
1719    STORE(v0, rOBJ)                     # vAA<- v0
1720#else
1721    LOAD_F(fa0, a1)                     # fa0<- vBB
1722                               # optional op
1723    cvt.s.w fv0, fa0                            # fv0 = result
1724.LTEMPLATE_INT_TO_FLOAT_VFP_set_vreg_f:
1725    STORE_F(fv0, rOBJ)                  # vAA <- fv0
1726#endif
1727    RETURN
1728
1729
1730/* ------------------------------ */
1731    .balign 4
1732    .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
1733dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
1734/* File: mips/TEMPLATE_CMPG_DOUBLE_VFP.S */
1735/* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
1736    /*
1737     * Compare two double precision floating-point values.  Puts 0, 1, or -1 into the
1738     * destination register based on the results of the comparison.
1739     *
1740     * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
1741     * on what value we'd like to return when one of the operands is NaN.
1742     *
1743     * The operation we're implementing is:
1744     *   if (x == y)
1745     *     return 0;
1746     *   else if (x < y)
1747     *     return -1;
1748     *   else if (x > y)
1749     *     return 1;
1750     *   else
1751     *     return {-1,1};  // one or both operands was NaN
1752     *
1753     * On entry:
1754     *    a0 = &op1 [vBB]
1755     *    a1 = &op2 [vCC]
1756     *
1757     * for: cmpl-double, cmpg-double
1758     */
1759    /* op vAA, vBB, vCC */
1760
1761    /* "clasic" form */
1762#ifdef  SOFT_FLOAT
1763    move rOBJ, a0                       # save a0
1764    move rBIX, a1                       # save a1
1765    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1766    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1767    JAL(__eqdf2)                        # v0<- (vBB == vCC)
1768    li       rTEMP, 0                   # vAA<- 0
1769    beqz     v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1770    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1771    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1772    JAL(__ltdf2)                        # a0<- (vBB < vCC)
1773    li       rTEMP, -1                  # vAA<- -1
1774    bltz     v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1775    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1776    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1777    JAL(__gtdf2)                        # v0<- (vBB > vCC)
1778    li      rTEMP, 1                    # vAA<- 1
1779    bgtz    v0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1780#else
1781    LOAD64_F(fs0, fs0f, a0)             # fs0<- vBB
1782    LOAD64_F(fs1, fs1f, a1)             # fs1<- vCC
1783    c.olt.d     fcc0, fs0, fs1          # Is fs0 < fs1
1784    li          rTEMP, -1
1785    bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1786    c.olt.d     fcc0, fs1, fs0
1787    li          rTEMP, 1
1788    bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1789    c.eq.d      fcc0, fs0, fs1
1790    li          rTEMP, 0
1791    bc1t        fcc0, TEMPLATE_CMPG_DOUBLE_VFP_finish
1792#endif
1793
1794    li            rTEMP, 1
1795
1796TEMPLATE_CMPG_DOUBLE_VFP_finish:
1797    move     v0, rTEMP                  # v0<- vAA
1798    RETURN
1799
1800
1801/* ------------------------------ */
1802    .balign 4
1803    .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
1804dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
1805/* File: mips/TEMPLATE_CMPL_DOUBLE_VFP.S */
1806    /*
1807     * Compare two double precision floating-point values.  Puts 0, 1, or -1 into the
1808     * destination register based on the results of the comparison.
1809     *
1810     * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
1811     * on what value we'd like to return when one of the operands is NaN.
1812     *
1813     * The operation we're implementing is:
1814     *   if (x == y)
1815     *     return 0;
1816     *   else if (x < y)
1817     *     return -1;
1818     *   else if (x > y)
1819     *     return 1;
1820     *   else
1821     *     return {-1,1};  // one or both operands was NaN
1822     *
1823     * On entry:
1824     *    a0 = &op1 [vBB]
1825     *    a1 = &op2 [vCC]
1826     *
1827     * for: cmpl-double, cmpg-double
1828     */
1829    /* op vAA, vBB, vCC */
1830
1831    /* "clasic" form */
1832#ifdef  SOFT_FLOAT
1833    move rOBJ, a0                       # save a0
1834    move rBIX, a1                       # save a1
1835    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1836    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1837    JAL(__eqdf2)                        # v0<- (vBB == vCC)
1838    li       rTEMP, 0                   # vAA<- 0
1839    beqz     v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1840    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1841    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1842    JAL(__ltdf2)                        # a0<- (vBB < vCC)
1843    li       rTEMP, -1                  # vAA<- -1
1844    bltz     v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1845    LOAD64(rARG0, rARG1, rOBJ)          # a0/a1<- vBB/vBB+1
1846    LOAD64(rARG2, rARG3, rBIX)          # a2/a3<- vCC/vCC+1
1847    JAL(__gtdf2)                        # v0<- (vBB > vCC)
1848    li      rTEMP, 1                    # vAA<- 1
1849    bgtz    v0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1850#else
1851    LOAD64_F(fs0, fs0f, a0)             # fs0<- vBB
1852    LOAD64_F(fs1, fs1f, a1)             # fs1<- vCC
1853    c.olt.d     fcc0, fs0, fs1          # Is fs0 < fs1
1854    li          rTEMP, -1
1855    bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1856    c.olt.d     fcc0, fs1, fs0
1857    li          rTEMP, 1
1858    bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1859    c.eq.d      fcc0, fs0, fs1
1860    li          rTEMP, 0
1861    bc1t        fcc0, TEMPLATE_CMPL_DOUBLE_VFP_finish
1862#endif
1863
1864    li     rTEMP, -1
1865
1866TEMPLATE_CMPL_DOUBLE_VFP_finish:
1867    move     v0, rTEMP                  # v0<- vAA
1868    RETURN
1869
1870/* ------------------------------ */
1871    .balign 4
1872    .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
1873dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
1874/* File: mips/TEMPLATE_CMPG_FLOAT_VFP.S */
1875/* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
1876    /*
1877     * Compare two floating-point values.  Puts 0, 1, or -1 into the
1878     * destination register based on the results of the comparison.
1879     *
1880     * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
1881     * on what value we'd like to return when one of the operands is NaN.
1882     *
1883     * The operation we're implementing is:
1884     *   if (x == y)
1885     *     return 0;
1886     *   else if (x < y)
1887     *     return -1;
1888     *   else if (x > y)
1889     *     return 1;
1890     *   else
1891     *     return {-1,1};  // one or both operands was NaN
1892     *
1893     * On entry:
1894     *    a0 = &op1 [vBB]
1895     *    a1 = &op2 [vCC]
1896     *
1897     * for: cmpl-float, cmpg-float
1898     */
1899    /* op vAA, vBB, vCC */
1900
1901    /* "clasic" form */
1902#ifdef  SOFT_FLOAT
1903    LOAD(rOBJ, a0)                      # rOBJ<- vBB
1904    LOAD(rBIX, a1)                      # rBIX<- vCC
1905    move     a0, rOBJ                   # a0<- vBB
1906    move     a1, rBIX                   # a1<- vCC
1907    JAL(__eqsf2)                        # v0<- (vBB == vCC)
1908    li       rTEMP, 0                   # vAA<- 0
1909    beqz     v0, TEMPLATE_CMPG_FLOAT_VFP_finish
1910    move     a0, rOBJ                   # a0<- vBB
1911    move     a1, rBIX                   # a1<- vCC
1912    JAL(__ltsf2)                        # a0<- (vBB < vCC)
1913    li       rTEMP, -1                  # vAA<- -1
1914    bltz     v0, TEMPLATE_CMPG_FLOAT_VFP_finish
1915    move     a0, rOBJ                   # a0<- vBB
1916    move     a1, rBIX                   # a1<- vCC
1917    JAL(__gtsf2)                        # v0<- (vBB > vCC)
1918    li      rTEMP, 1                    # vAA<- 1
1919    bgtz    v0, TEMPLATE_CMPG_FLOAT_VFP_finish
1920#else
1921    LOAD_F(fs0, a0)                     # fs0<- vBB
1922    LOAD_F(fs1, a1)                     # fs1<- vCC
1923    c.olt.s     fcc0, fs0, fs1          #Is fs0 < fs1
1924    li          rTEMP, -1
1925    bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
1926    c.olt.s     fcc0, fs1, fs0
1927    li          rTEMP, 1
1928    bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
1929    c.eq.s      fcc0, fs0, fs1
1930    li          rTEMP, 0
1931    bc1t        fcc0, TEMPLATE_CMPG_FLOAT_VFP_finish
1932#endif
1933
1934    li     rTEMP, 1
1935
1936TEMPLATE_CMPG_FLOAT_VFP_finish:
1937    move     v0, rTEMP                  # v0<- vAA
1938    RETURN
1939
1940
1941/* ------------------------------ */
1942    .balign 4
1943    .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
1944dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
1945/* File: mips/TEMPLATE_CMPL_FLOAT_VFP.S */
1946    /*
1947     * Compare two floating-point values.  Puts 0, 1, or -1 into the
1948     * destination register based on the results of the comparison.
1949     *
1950     * Provide a "naninst" instruction that puts 1 or -1 into a1 depending
1951     * on what value we'd like to return when one of the operands is NaN.
1952     *
1953     * The operation we're implementing is:
1954     *   if (x == y)
1955     *     return 0;
1956     *   else if (x < y)
1957     *     return -1;
1958     *   else if (x > y)
1959     *     return 1;
1960     *   else
1961     *     return {-1,1};  // one or both operands was NaN
1962     *
1963     * On entry:
1964     *    a0 = &op1 [vBB]
1965     *    a1 = &op2 [vCC]
1966     *
1967     * for: cmpl-float, cmpg-float
1968     */
1969    /* op vAA, vBB, vCC */
1970
1971    /* "clasic" form */
1972#ifdef  SOFT_FLOAT
1973    LOAD(rOBJ, a0)                      # rOBJ<- vBB
1974    LOAD(rBIX, a1)                      # rBIX<- vCC
1975    move     a0, rOBJ                   # a0<- vBB
1976    move     a1, rBIX                   # a1<- vCC
1977    JAL(__eqsf2)                        # v0<- (vBB == vCC)
1978    li       rTEMP, 0                   # vAA<- 0
1979    beqz     v0, TEMPLATE_CMPL_FLOAT_VFP_finish
1980    move     a0, rOBJ                   # a0<- vBB
1981    move     a1, rBIX                   # a1<- vCC
1982    JAL(__ltsf2)                        # a0<- (vBB < vCC)
1983    li       rTEMP, -1                  # vAA<- -1
1984    bltz     v0, TEMPLATE_CMPL_FLOAT_VFP_finish
1985    move     a0, rOBJ                   # a0<- vBB
1986    move     a1, rBIX                   # a1<- vCC
1987    JAL(__gtsf2)                        # v0<- (vBB > vCC)
1988    li      rTEMP, 1                    # vAA<- 1
1989    bgtz    v0, TEMPLATE_CMPL_FLOAT_VFP_finish
1990#else
1991    LOAD_F(fs0, a0)                     # fs0<- vBB
1992    LOAD_F(fs1, a1)                     # fs1<- vCC
1993    c.olt.s     fcc0, fs0, fs1          #Is fs0 < fs1
1994    li          rTEMP, -1
1995    bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
1996    c.olt.s     fcc0, fs1, fs0
1997    li          rTEMP, 1
1998    bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
1999    c.eq.s      fcc0, fs0, fs1
2000    li          rTEMP, 0
2001    bc1t        fcc0, TEMPLATE_CMPL_FLOAT_VFP_finish
2002#endif
2003
2004    li     rTEMP, -1
2005
2006TEMPLATE_CMPL_FLOAT_VFP_finish:
2007    move     v0, rTEMP                  # v0<- vAA
2008    RETURN
2009
2010/* ------------------------------ */
2011    .balign 4
2012    .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
2013dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
2014/* File: mips/TEMPLATE_SQRT_DOUBLE_VFP.S */
2015
2016    /*
2017     * 64-bit floating point sqrt operation.
2018     * If the result is a NaN, bail out to library code to do
2019     * the right thing.
2020     *
2021     * On entry:
2022     *     a2 src addr of op1
2023     * On exit:
2024     *     v0,v1/fv0 = res
2025     */
2026#ifdef  SOFT_FLOAT
2027    LOAD64(rARG0, rARG1, a2)        # a0/a1<- vBB/vBB+1
2028#else
2029    LOAD64_F(fa0, fa0f, a2)         # fa0/fa0f<- vBB/vBB+1
2030    sqrt.d	fv0, fa0
2031    c.eq.d	fv0, fv0
2032    bc1t	1f
2033#endif
2034    JAL(sqrt)
20351:
2036    RETURN
2037
2038/* ------------------------------ */
2039    .balign 4
2040    .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
2041dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
2042/* File: mips/TEMPLATE_THROW_EXCEPTION_COMMON.S */
2043    /*
2044     * Throw an exception from JIT'ed code.
2045     * On entry:
2046     *    a0    Dalvik PC that raises the exception
2047     */
2048    j      .LhandleException
2049
2050/* ------------------------------ */
2051    .balign 4
2052    .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
2053dvmCompiler_TEMPLATE_MEM_OP_DECODE:
2054/* File: mips/TEMPLATE_MEM_OP_DECODE.S */
2055#if defined(WITH_SELF_VERIFICATION)
2056    /*
2057     * This handler encapsulates heap memory ops for selfVerification mode.
2058     *
2059     * The call to the handler is inserted prior to a heap memory operation.
2060     * This handler then calls a function to decode the memory op, and process
2061     * it accordingly. Afterwards, the handler changes the return address to
2062     * skip the memory op so it never gets executed.
2063     */
2064#ifdef HARD_FLOAT
2065    /* push f0-f31 onto stack */
2066    sw      f0, fr0*-4(sp)              # push f0
2067    sw      f1, fr1*-4(sp)              # push f1
2068    sw      f2, fr2*-4(sp)              # push f2
2069    sw      f3, fr3*-4(sp)              # push f3
2070    sw      f4, fr4*-4(sp)              # push f4
2071    sw      f5, fr5*-4(sp)              # push f5
2072    sw      f6, fr6*-4(sp)              # push f6
2073    sw      f7, fr7*-4(sp)              # push f7
2074    sw      f8, fr8*-4(sp)              # push f8
2075    sw      f9, fr9*-4(sp)              # push f9
2076    sw      f10, fr10*-4(sp)            # push f10
2077    sw      f11, fr11*-4(sp)            # push f11
2078    sw      f12, fr12*-4(sp)            # push f12
2079    sw      f13, fr13*-4(sp)            # push f13
2080    sw      f14, fr14*-4(sp)            # push f14
2081    sw      f15, fr15*-4(sp)            # push f15
2082    sw      f16, fr16*-4(sp)            # push f16
2083    sw      f17, fr17*-4(sp)            # push f17
2084    sw      f18, fr18*-4(sp)            # push f18
2085    sw      f19, fr19*-4(sp)            # push f19
2086    sw      f20, fr20*-4(sp)            # push f20
2087    sw      f21, fr21*-4(sp)            # push f21
2088    sw      f22, fr22*-4(sp)            # push f22
2089    sw      f23, fr23*-4(sp)            # push f23
2090    sw      f24, fr24*-4(sp)            # push f24
2091    sw      f25, fr25*-4(sp)            # push f25
2092    sw      f26, fr26*-4(sp)            # push f26
2093    sw      f27, fr27*-4(sp)            # push f27
2094    sw      f28, fr28*-4(sp)            # push f28
2095    sw      f29, fr29*-4(sp)            # push f29
2096    sw      f30, fr30*-4(sp)            # push f30
2097    sw      f31, fr31*-4(sp)            # push f31
2098
2099    sub     sp, (32-0)*4                # adjust stack pointer
2100#endif
2101
2102    /* push gp registers (except zero, gp, sp, and fp) */
2103    .set noat
2104    sw      AT, r_AT*-4(sp)             # push at
2105    .set at
2106    sw      v0, r_V0*-4(sp)             # push v0
2107    sw      v1, r_V1*-4(sp)             # push v1
2108    sw      a0, r_A0*-4(sp)             # push a0
2109    sw      a1, r_A1*-4(sp)             # push a1
2110    sw      a2, r_A2*-4(sp)             # push a2
2111    sw      a3, r_A3*-4(sp)             # push a3
2112    sw      t0, r_T0*-4(sp)             # push t0
2113    sw      t1, r_T1*-4(sp)             # push t1
2114    sw      t2, r_T2*-4(sp)             # push t2
2115    sw      t3, r_T3*-4(sp)             # push t3
2116    sw      t4, r_T4*-4(sp)             # push t4
2117    sw      t5, r_T5*-4(sp)             # push t5
2118    sw      t6, r_T6*-4(sp)             # push t6
2119    sw      t7, r_T7*-4(sp)             # push t7
2120    sw      s0, r_S0*-4(sp)             # push s0
2121    sw      s1, r_S1*-4(sp)             # push s1
2122    sw      s2, r_S2*-4(sp)             # push s2
2123    sw      s3, r_S3*-4(sp)             # push s3
2124    sw      s4, r_S4*-4(sp)             # push s4
2125    sw      s5, r_S5*-4(sp)             # push s5
2126    sw      s6, r_S6*-4(sp)             # push s6
2127    sw      s7, r_S7*-4(sp)             # push s7
2128    sw      t8, r_T8*-4(sp)             # push t8
2129    sw      t9, r_T9*-4(sp)             # push t9
2130    sw      k0, r_K0*-4(sp)             # push k0
2131    sw      k1, r_K1*-4(sp)             # push k1
2132    sw      ra, r_RA*-4(sp)             # push RA
2133
2134    # Note: even if we don't save all 32 registers, we still need to
2135    #       adjust SP by 32 registers due to the way we are storing
2136    #       the registers on the stack.
2137    sub     sp, (32-0)*4                # adjust stack pointer
2138
2139    la     a2, .LdvmSelfVerificationMemOpDecode  # defined in footer.S
2140    lw     a2, (a2)
2141    move   a0, ra                       # a0<- link register
2142    move   a1, sp                       # a1<- stack pointer
2143    JALR(a2)
2144
2145    /* pop gp registers (except zero, gp, sp, and fp) */
2146    # Note: even if we don't save all 32 registers, we still need to
2147    #       adjust SP by 32 registers due to the way we are storing
2148    #       the registers on the stack.
2149    add     sp, (32-0)*4                # adjust stack pointer
2150    .set noat
2151    lw      AT, r_AT*-4(sp)             # pop at
2152    .set at
2153    lw      v0, r_V0*-4(sp)             # pop v0
2154    lw      v1, r_V1*-4(sp)             # pop v1
2155    lw      a0, r_A0*-4(sp)             # pop a0
2156    lw      a1, r_A1*-4(sp)             # pop a1
2157    lw      a2, r_A2*-4(sp)             # pop a2
2158    lw      a3, r_A3*-4(sp)             # pop a3
2159    lw      t0, r_T0*-4(sp)             # pop t0
2160    lw      t1, r_T1*-4(sp)             # pop t1
2161    lw      t2, r_T2*-4(sp)             # pop t2
2162    lw      t3, r_T3*-4(sp)             # pop t3
2163    lw      t4, r_T4*-4(sp)             # pop t4
2164    lw      t5, r_T5*-4(sp)             # pop t5
2165    lw      t6, r_T6*-4(sp)             # pop t6
2166    lw      t7, r_T7*-4(sp)             # pop t7
2167    lw      s0, r_S0*-4(sp)             # pop s0
2168    lw      s1, r_S1*-4(sp)             # pop s1
2169    lw      s2, r_S2*-4(sp)             # pop s2
2170    lw      s3, r_S3*-4(sp)             # pop s3
2171    lw      s4, r_S4*-4(sp)             # pop s4
2172    lw      s5, r_S5*-4(sp)             # pop s5
2173    lw      s6, r_S6*-4(sp)             # pop s6
2174    lw      s7, r_S7*-4(sp)             # pop s7
2175    lw      t8, r_T8*-4(sp)             # pop t8
2176    lw      t9, r_T9*-4(sp)             # pop t9
2177    lw      k0, r_K0*-4(sp)             # pop k0
2178    lw      k1, r_K1*-4(sp)             # pop k1
2179    lw      ra, r_RA*-4(sp)             # pop RA
2180
2181#ifdef HARD_FLOAT
2182    /* pop f0-f31 from stack */
2183    add     sp, (32-0)*4                # adjust stack pointer
2184    lw      f0, fr0*-4(sp)              # pop f0
2185    lw      f1, fr1*-4(sp)              # pop f1
2186    lw      f2, fr2*-4(sp)              # pop f2
2187    lw      f3, fr3*-4(sp)              # pop f3
2188    lw      f4, fr4*-4(sp)              # pop f4
2189    lw      f5, fr5*-4(sp)              # pop f5
2190    lw      f6, fr6*-4(sp)              # pop f6
2191    lw      f7, fr7*-4(sp)              # pop f7
2192    lw      f8, fr8*-4(sp)              # pop f8
2193    lw      f9, fr9*-4(sp)              # pop f9
2194    lw      f10, fr10*-4(sp)            # pop f10
2195    lw      f11, fr11*-4(sp)            # pop f11
2196    lw      f12, fr12*-4(sp)            # pop f12
2197    lw      f13, fr13*-4(sp)            # pop f13
2198    lw      f14, fr14*-4(sp)            # pop f14
2199    lw      f15, fr15*-4(sp)            # pop f15
2200    lw      f16, fr16*-4(sp)            # pop f16
2201    lw      f17, fr17*-4(sp)            # pop f17
2202    lw      f18, fr18*-4(sp)            # pop f18
2203    lw      f19, fr19*-4(sp)            # pop f19
2204    lw      f20, fr20*-4(sp)            # pop f20
2205    lw      f21, fr21*-4(sp)            # pop f21
2206    lw      f22, fr22*-4(sp)            # pop f22
2207    lw      f23, fr23*-4(sp)            # pop f23
2208    lw      f24, fr24*-4(sp)            # pop f24
2209    lw      f25, fr25*-4(sp)            # pop f25
2210    lw      f26, fr26*-4(sp)            # pop f26
2211    lw      f27, fr27*-4(sp)            # pop f27
2212    lw      f28, fr28*-4(sp)            # pop f28
2213    lw      f29, fr29*-4(sp)            # pop f29
2214    lw      f30, fr30*-4(sp)            # pop f30
2215    lw      f31, fr31*-4(sp)            # pop f31
2216#endif
2217
2218    RETURN
2219#endif
2220
2221/* ------------------------------ */
2222    .balign 4
2223    .global dvmCompiler_TEMPLATE_STRING_COMPARETO
2224dvmCompiler_TEMPLATE_STRING_COMPARETO:
2225/* File: mips/TEMPLATE_STRING_COMPARETO.S */
2226    /*
2227     * String's compareTo.
2228     *
2229     * Requires a0/a1 to have been previously checked for null.  Will
2230     * return negative if this's string is < comp, 0 if they are the
2231     * same and positive if >.
2232     *
2233     * IMPORTANT NOTE:
2234     *
2235     * This code relies on hard-coded offsets for string objects, and must be
2236     * kept in sync with definitions in UtfString.h.  See asm-constants.h
2237     *
2238     * On entry:
2239     *    a0:   this object pointer
2240     *    a1:   comp object pointer
2241     *
2242     */
2243
2244     subu  v0, a0, a1                # Same?
2245     bnez  v0, 1f
2246     RETURN
22471:
2248     lw    t0, STRING_FIELDOFF_OFFSET(a0)
2249     lw    t1, STRING_FIELDOFF_OFFSET(a1)
2250     lw    t2, STRING_FIELDOFF_COUNT(a0)
2251     lw    a2, STRING_FIELDOFF_COUNT(a1)
2252     lw    a0, STRING_FIELDOFF_VALUE(a0)
2253     lw    a1, STRING_FIELDOFF_VALUE(a1)
2254
2255    /*
2256     * At this point, we have this/comp:
2257     *    offset: t0/t1
2258     *    count:  t2/a2
2259     *    value:  a0/a1
2260     * We're going to compute
2261     *    a3 <- countDiff
2262     *    a2 <- minCount
2263     */
2264     subu  a3, t2, a2                # a3<- countDiff
2265     sleu  t7, t2, a2
2266     movn  a2, t2, t7                # a2<- minCount
2267
2268     /*
2269      * Note: data pointers point to first element.
2270      */
2271     addu  a0, 16                    # point to contents[0]
2272     addu  a1, 16                    # point to contents[0]
2273
2274     /* Now, build pointers to the string data */
2275     sll   t7, t0, 1                 # multiply offset by 2
2276     addu  a0, a0, t7
2277     sll   t7, t1, 1                 # multiply offset by 2
2278     addu  a1, a1, t7
2279
2280     /*
2281      * At this point we have:
2282      *   a0: *this string data
2283      *   a1: *comp string data
2284      *   a2: iteration count for comparison
2285      *   a3: value to return if the first part of the string is equal
2286      *   v0: reserved for result
2287      *   t0-t5 available for loading string data
2288      */
2289
2290     subu  a2, 2
2291     bltz  a2, do_remainder2
2292
2293     /*
2294      * Unroll the first two checks so we can quickly catch early mismatch
2295      * on long strings (but preserve incoming alignment)
2296      */
2297     lhu   t0, 0(a0)
2298     lhu   t1, 0(a1)
2299     subu  v0, t0, t1
2300     beqz  v0, 1f
2301     RETURN
23021:
2303     lhu   t2, 2(a0)
2304     lhu   t3, 2(a1)
2305     subu  v0, t2, t3
2306     beqz  v0, 2f
2307     RETURN
23082:
2309     addu  a0, 4                     # offset to contents[2]
2310     addu  a1, 4                     # offset to contents[2]
2311     li    t7, 28
2312     bgt   a2, t7, do_memcmp16
2313     subu  a2, 3
2314     bltz  a2, do_remainder
2315
2316loopback_triple:
2317     lhu   t0, 0(a0)
2318     lhu   t1, 0(a1)
2319     subu  v0, t0, t1
2320     beqz  v0, 1f
2321     RETURN
23221:
2323     lhu   t2, 2(a0)
2324     lhu   t3, 2(a1)
2325     subu  v0, t2, t3
2326     beqz  v0, 2f
2327     RETURN
23282:
2329     lhu   t4, 4(a0)
2330     lhu   t5, 4(a1)
2331     subu  v0, t4, t5
2332     beqz  v0, 3f
2333     RETURN
23343:
2335     addu  a0, 6                     # offset to contents[i+3]
2336     addu  a1, 6                     # offset to contents[i+3]
2337     subu  a2, 3
2338     bgez  a2, loopback_triple
2339
2340do_remainder:
2341     addu  a2, 3
2342     beqz  a2, returnDiff
2343
2344loopback_single:
2345     lhu   t0, 0(a0)
2346     lhu   t1, 0(a1)
2347     subu  v0, t0, t1
2348     bnez  v0, 1f
2349     addu  a0, 2                     # offset to contents[i+1]
2350     addu  a1, 2                     # offset to contents[i+1]
2351     subu  a2, 1
2352     bnez  a2, loopback_single
2353
2354returnDiff:
2355     move  v0, a3
23561:
2357     RETURN
2358
2359do_remainder2:
2360     addu  a2, 2
2361     bnez  a2, loopback_single
2362     move  v0, a3
2363     RETURN
2364
2365    /* Long string case */
2366do_memcmp16:
2367     move  rOBJ, a3                  # save return value if strings are equal
2368     JAL(__memcmp16)
2369     seq   t0, v0, zero
2370     movn  v0, rOBJ, t0              # overwrite return value if strings are equal
2371     RETURN
2372
2373/* ------------------------------ */
2374    .balign 4
2375    .global dvmCompiler_TEMPLATE_STRING_INDEXOF
2376dvmCompiler_TEMPLATE_STRING_INDEXOF:
2377/* File: mips/TEMPLATE_STRING_INDEXOF.S */
2378    /*
2379     * String's indexOf.
2380     *
2381     * Requires a0 to have been previously checked for null.  Will
2382     * return index of match of a1 in v0.
2383     *
2384     * IMPORTANT NOTE:
2385     *
2386     * This code relies on hard-coded offsets for string objects, and must be
2387     * kept in sync wth definitions in UtfString.h  See asm-constants.h
2388     *
2389     * On entry:
2390     *    a0:   string object pointer
2391     *    a1:   char to match
2392     *    a2:   Starting offset in string data
2393     */
2394
2395     lw    t0, STRING_FIELDOFF_OFFSET(a0)
2396     lw    t1, STRING_FIELDOFF_COUNT(a0)
2397     lw    v0, STRING_FIELDOFF_VALUE(a0)
2398
2399    /*
2400     * At this point, we have:
2401     *    v0: object pointer
2402     *    a1: char to match
2403     *    a2: starting offset
2404     *    t0: offset
2405     *    t1: string length
2406     */
2407
2408    /* Point to first element */
2409     addu  v0, 16                    # point to contents[0]
2410
2411    /* Build pointer to start of string data */
2412     sll   t7, t0, 1                 # multiply offset by 2
2413     addu  v0, v0, t7
2414
2415    /* Save a copy of starting data in v1 */
2416     move  v1, v0
2417
2418    /* Clamp start to [0..count] */
2419     slt   t7, a2, zero
2420     movn  a2, zero, t7
2421     sgt   t7, a2, t1
2422     movn  a2, t1, t7
2423
2424    /* Build pointer to start of data to compare */
2425     sll   t7, a2, 1                # multiply offset by 2
2426     addu  v0, v0, t7
2427
2428    /* Compute iteration count */
2429     subu  a3, t1, a2
2430
2431    /*
2432     * At this point we have:
2433     *   v0: start of data to test
2434     *   a1: char to compare
2435     *   a3: iteration count
2436     *   v1: original start of string
2437     *   t0-t7 available for loading string data
2438     */
2439     subu  a3, 4
2440     bltz  a3, indexof_remainder
2441
2442indexof_loop4:
2443     lhu   t0, 0(v0)
2444     beq   t0, a1, match_0
2445     lhu   t0, 2(v0)
2446     beq   t0, a1, match_1
2447     lhu   t0, 4(v0)
2448     beq   t0, a1, match_2
2449     lhu   t0, 6(v0)
2450     beq   t0, a1, match_3
2451     addu  v0, 8                     # offset to contents[i+4]
2452     subu  a3, 4
2453     bgez  a3, indexof_loop4
2454
2455indexof_remainder:
2456     addu  a3, 4
2457     beqz  a3, indexof_nomatch
2458
2459indexof_loop1:
2460     lhu   t0, 0(v0)
2461     beq   t0, a1, match_0
2462     addu  v0, 2                     # offset to contents[i+1]
2463     subu  a3, 1
2464     bnez  a3, indexof_loop1
2465
2466indexof_nomatch:
2467     li    v0, -1
2468     RETURN
2469
2470match_0:
2471     subu  v0, v1
2472     sra   v0, v0, 1                 # divide by 2
2473     RETURN
2474match_1:
2475     addu  v0, 2
2476     subu  v0, v1
2477     sra   v0, v0, 1                 # divide by 2
2478     RETURN
2479match_2:
2480     addu  v0, 4
2481     subu  v0, v1
2482     sra   v0, v0, 1                 # divide by 2
2483     RETURN
2484match_3:
2485     addu  v0, 6
2486     subu  v0, v1
2487     sra   v0, v0, 1                 # divide by 2
2488     RETURN
2489
2490/* ------------------------------ */
2491    .balign 4
2492    .global dvmCompiler_TEMPLATE_INTERPRET
2493dvmCompiler_TEMPLATE_INTERPRET:
2494/* File: mips/TEMPLATE_INTERPRET.S */
2495    /*
2496     * This handler transfers control to the interpeter without performing
2497     * any lookups.  It may be called either as part of a normal chaining
2498     * operation, or from the transition code in header.S.  We distinquish
2499     * the two cases by looking at the link register.  If called from a
2500     * translation chain, it will point to the chaining Dalvik PC.
2501     * On entry:
2502     *    ra - if NULL:
2503     *        a1 - the Dalvik PC to begin interpretation.
2504     *    else
2505     *        [ra] contains Dalvik PC to begin interpretation
2506     *    rSELF - pointer to thread
2507     *    rFP - Dalvik frame pointer
2508     */
2509    la      t0, dvmJitToInterpPunt
2510    move    a0, a1
2511    beq     ra, zero, 1f
2512    lw      a0, 0(ra)
25131:
2514    jr      t0
2515    # doesn't return
2516
2517/* ------------------------------ */
2518    .balign 4
2519    .global dvmCompiler_TEMPLATE_MONITOR_ENTER
2520dvmCompiler_TEMPLATE_MONITOR_ENTER:
2521/* File: mips/TEMPLATE_MONITOR_ENTER.S */
2522    /*
2523     * Call out to the runtime to lock an object.  Because this thread
2524     * may have been suspended in THREAD_MONITOR state and the Jit's
2525     * translation cache subsequently cleared, we cannot return directly.
2526     * Instead, unconditionally transition to the interpreter to resume.
2527     *
2528     * On entry:
2529     *    a0 - self pointer
2530     *    a1 - the object (which has already been null-checked by the caller
2531     *    rPC - the Dalvik PC of the following instruction.
2532     */
2533    la     a2, .LdvmLockObject
2534    lw     t9, (a2)
2535    sw     zero, offThread_inJitCodeCache(a0)   # record that we're not returning
2536    JALR(t9)                                    # dvmLockObject(self, obj)
2537    lw     gp, STACK_OFFSET_GP(sp)
2538
2539    la     a2, .LdvmJitToInterpNoChain
2540    lw     a2, (a2)
2541
2542    # Bail to interpreter - no chain [note - rPC still contains dPC]
2543#if defined(WITH_JIT_TUNING)
2544    li      a0, kHeavyweightMonitor
2545#endif
2546    jr      a2
2547
2548/* ------------------------------ */
2549    .balign 4
2550    .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
2551dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
2552/* File: mips/TEMPLATE_MONITOR_ENTER_DEBUG.S */
2553    /*
2554     * To support deadlock prediction, this version of MONITOR_ENTER
2555     * will always call the heavyweight dvmLockObject, check for an
2556     * exception and then bail out to the interpreter.
2557     *
2558     * On entry:
2559     *    a0 - self pointer
2560     *    a1 - the object (which has already been null-checked by the caller
2561     *    rPC - the Dalvik PC of the following instruction.
2562     *
2563     */
2564    la     a2, .LdvmLockObject
2565    lw     t9, (a2)
2566    sw     zero, offThread_inJitCodeCache(a0)   # record that we're not returning
2567    JALR(t9)                                    # dvmLockObject(self, obj)
2568    lw     gp, STACK_OFFSET_GP(sp)
2569
2570    # test for exception
2571    lw     a1, offThread_exception(rSELF)
2572    beqz   a1, 1f
2573    sub    a0, rPC, 2                           # roll dPC back to this monitor instruction
2574    j      .LhandleException
25751:
2576    # Bail to interpreter - no chain [note - rPC still contains dPC]
2577#if defined(WITH_JIT_TUNING)
2578    li     a0, kHeavyweightMonitor
2579#endif
2580    la     a2, .LdvmJitToInterpNoChain
2581    lw     a2, (a2)
2582    jr     a2
2583
2584/* ------------------------------ */
2585    .balign 4
2586    .global dvmCompiler_TEMPLATE_RESTORE_STATE
2587dvmCompiler_TEMPLATE_RESTORE_STATE:
2588/* File: mips/TEMPLATE_RESTORE_STATE.S */
2589    /*
2590     * This handler restores state following a selfVerification memory access.
2591     * On entry:
2592     *    a0 - offset from rSELF to the 1st element of the coreRegs save array.
2593     * Note: the following registers are not restored
2594     *       zero, AT, gp, sp, fp, ra
2595     */
2596
2597    add     a0, a0, rSELF               # pointer to heapArgSpace.coreRegs[0]
2598#if 0
2599    lw      zero, r_ZERO*4(a0)          # restore zero
2600#endif
2601    .set noat
2602    lw      AT, r_AT*4(a0)              # restore at
2603    .set at
2604    lw      v0, r_V0*4(a0)              # restore v0
2605    lw      v1, r_V1*4(a0)              # restore v1
2606
2607    lw      a1, r_A1*4(a0)              # restore a1
2608    lw      a2, r_A2*4(a0)              # restore a2
2609    lw      a3, r_A3*4(a0)              # restore a3
2610
2611    lw      t0, r_T0*4(a0)              # restore t0
2612    lw      t1, r_T1*4(a0)              # restore t1
2613    lw      t2, r_T2*4(a0)              # restore t2
2614    lw      t3, r_T3*4(a0)              # restore t3
2615    lw      t4, r_T4*4(a0)              # restore t4
2616    lw      t5, r_T5*4(a0)              # restore t5
2617    lw      t6, r_T6*4(a0)              # restore t6
2618    lw      t7, r_T7*4(a0)              # restore t7
2619
2620    lw      s0, r_S0*4(a0)              # restore s0
2621    lw      s1, r_S1*4(a0)              # restore s1
2622    lw      s2, r_S2*4(a0)              # restore s2
2623    lw      s3, r_S3*4(a0)              # restore s3
2624    lw      s4, r_S4*4(a0)              # restore s4
2625    lw      s5, r_S5*4(a0)              # restore s5
2626    lw      s6, r_S6*4(a0)              # restore s6
2627    lw      s7, r_S7*4(a0)              # restore s7
2628
2629    lw      t8, r_T8*4(a0)              # restore t8
2630    lw      t9, r_T9*4(a0)              # restore t9
2631
2632    lw      k0, r_K0*4(a0)              # restore k0
2633    lw      k1, r_K1*4(a0)              # restore k1
2634
2635#if 0
2636    lw      gp, r_GP*4(a0)              # restore gp
2637    lw      sp, r_SP*4(a0)              # restore sp
2638    lw      fp, r_FP*4(a0)              # restore fp
2639    lw      ra, r_RA*4(a0)              # restore ra
2640#endif
2641
2642/* #ifdef HARD_FLOAT */
2643#if 0
2644    lw      f0, fr0*4(a0)               # restore f0
2645    lw      f1, fr1*4(a0)               # restore f1
2646    lw      f2, fr2*4(a0)               # restore f2
2647    lw      f3, fr3*4(a0)               # restore f3
2648    lw      f4, fr4*4(a0)               # restore f4
2649    lw      f5, fr5*4(a0)               # restore f5
2650    lw      f6, fr6*4(a0)               # restore f6
2651    lw      f7, fr7*4(a0)               # restore f7
2652    lw      f8, fr8*4(a0)               # restore f8
2653    lw      f9, fr9*4(a0)               # restore f9
2654    lw      f10, fr10*4(a0)             # restore f10
2655    lw      f11, fr11*4(a0)             # restore f11
2656    lw      f12, fr12*4(a0)             # restore f12
2657    lw      f13, fr13*4(a0)             # restore f13
2658    lw      f14, fr14*4(a0)             # restore f14
2659    lw      f15, fr15*4(a0)             # restore f15
2660    lw      f16, fr16*4(a0)             # restore f16
2661    lw      f17, fr17*4(a0)             # restore f17
2662    lw      f18, fr18*4(a0)             # restore f18
2663    lw      f19, fr19*4(a0)             # restore f19
2664    lw      f20, fr20*4(a0)             # restore f20
2665    lw      f21, fr21*4(a0)             # restore f21
2666    lw      f22, fr22*4(a0)             # restore f22
2667    lw      f23, fr23*4(a0)             # restore f23
2668    lw      f24, fr24*4(a0)             # restore f24
2669    lw      f25, fr25*4(a0)             # restore f25
2670    lw      f26, fr26*4(a0)             # restore f26
2671    lw      f27, fr27*4(a0)             # restore f27
2672    lw      f28, fr28*4(a0)             # restore f28
2673    lw      f29, fr29*4(a0)             # restore f29
2674    lw      f30, fr30*4(a0)             # restore f30
2675    lw      f31, fr31*4(a0)             # restore f31
2676#endif
2677
2678    lw      a0, r_A1*4(a0)              # restore a0
2679    RETURN
2680
2681/* ------------------------------ */
2682    .balign 4
2683    .global dvmCompiler_TEMPLATE_SAVE_STATE
2684dvmCompiler_TEMPLATE_SAVE_STATE:
2685/* File: mips/TEMPLATE_SAVE_STATE.S */
2686    /*
2687     * This handler performs a register save for selfVerification mode.
2688     * On entry:
2689     *    Top of stack + 4: a1 value to save
2690     *    Top of stack + 0: a0 value to save
2691     *    a0 - offset from rSELF to the beginning of the heapArgSpace record
2692     *    a1 - the value of regMap
2693     *
2694     * The handler must save regMap, r0-r31, f0-f31 if FPU, and then return with
2695     * r0-r31 with their original values (note that this means a0 and a1 must take
2696     * the values on the stack - not the ones in those registers on entry.
2697     * Finally, the two registers previously pushed must be popped.
2698     * Note: the following registers are not saved
2699     *       zero, AT, gp, sp, fp, ra
2700     */
2701    add     a0, a0, rSELF               # pointer to heapArgSpace
2702    sw      a1, 0(a0)                   # save regMap
2703    add     a0, a0, 4                   # pointer to coreRegs
2704#if 0
2705    sw      zero, r_ZERO*4(a0)          # save zero
2706#endif
2707    .set noat
2708    sw      AT, r_AT*4(a0)              # save at
2709    .set at
2710    sw      v0, r_V0*4(a0)              # save v0
2711    sw      v1, r_V1*4(a0)              # save v1
2712
2713    lw      a1, 0(sp)                   # recover a0 value
2714    sw      a1, r_A0*4(a0)              # save a0
2715    lw      a1, 4(sp)                   # recover a1 value
2716    sw      a1, r_A1*4(a0)              # save a1
2717    sw      a2, r_A2*4(a0)              # save a2
2718    sw      a3, r_A3*4(a0)              # save a3
2719
2720    sw      t0, r_T0*4(a0)              # save t0
2721    sw      t1, r_T1*4(a0)              # save t1
2722    sw      t2, r_T2*4(a0)              # save t2
2723    sw      t3, r_T3*4(a0)              # save t3
2724    sw      t4, r_T4*4(a0)              # save t4
2725    sw      t5, r_T5*4(a0)              # save t5
2726    sw      t6, r_T6*4(a0)              # save t6
2727    sw      t7, r_T7*4(a0)              # save t7
2728
2729    sw      s0, r_S0*4(a0)              # save s0
2730    sw      s1, r_S1*4(a0)              # save s1
2731    sw      s2, r_S2*4(a0)              # save s2
2732    sw      s3, r_S3*4(a0)              # save s3
2733    sw      s4, r_S4*4(a0)              # save s4
2734    sw      s5, r_S5*4(a0)              # save s5
2735    sw      s6, r_S6*4(a0)              # save s6
2736    sw      s7, r_S7*4(a0)              # save s7
2737
2738    sw      t8, r_T8*4(a0)              # save t8
2739    sw      t9, r_T9*4(a0)              # save t9
2740
2741    sw      k0, r_K0*4(a0)              # save k0
2742    sw      k1, r_K1*4(a0)              # save k1
2743
2744#if 0
2745    sw      gp, r_GP*4(a0)              # save gp
2746    sw      sp, r_SP*4(a0)              # save sp (need to adjust??? )
2747    sw      fp, r_FP*4(a0)              # save fp
2748    sw      ra, r_RA*4(a0)              # save ra
2749#endif
2750
2751/* #ifdef HARD_FLOAT */
2752#if 0
2753    sw      f0, fr0*4(a0)               # save f0
2754    sw      f1, fr1*4(a0)               # save f1
2755    sw      f2, fr2*4(a0)               # save f2
2756    sw      f3, fr3*4(a0)               # save f3
2757    sw      f4, fr4*4(a0)               # save f4
2758    sw      f5, fr5*4(a0)               # save f5
2759    sw      f6, fr6*4(a0)               # save f6
2760    sw      f7, fr7*4(a0)               # save f7
2761    sw      f8, fr8*4(a0)               # save f8
2762    sw      f9, fr9*4(a0)               # save f9
2763    sw      f10, fr10*4(a0)             # save f10
2764    sw      f11, fr11*4(a0)             # save f11
2765    sw      f12, fr12*4(a0)             # save f12
2766    sw      f13, fr13*4(a0)             # save f13
2767    sw      f14, fr14*4(a0)             # save f14
2768    sw      f15, fr15*4(a0)             # save f15
2769    sw      f16, fr16*4(a0)             # save f16
2770    sw      f17, fr17*4(a0)             # save f17
2771    sw      f18, fr18*4(a0)             # save f18
2772    sw      f19, fr19*4(a0)             # save f19
2773    sw      f20, fr20*4(a0)             # save f20
2774    sw      f21, fr21*4(a0)             # save f21
2775    sw      f22, fr22*4(a0)             # save f22
2776    sw      f23, fr23*4(a0)             # save f23
2777    sw      f24, fr24*4(a0)             # save f24
2778    sw      f25, fr25*4(a0)             # save f25
2779    sw      f26, fr26*4(a0)             # save f26
2780    sw      f27, fr27*4(a0)             # save f27
2781    sw      f28, fr28*4(a0)             # save f28
2782    sw      f29, fr29*4(a0)             # save f29
2783    sw      f30, fr30*4(a0)             # save f30
2784    sw      f31, fr31*4(a0)             # save f31
2785#endif
2786
2787    lw      a1, 0(sp)                   # recover a0 value
2788    lw      a1, 4(sp)                   # recover a1 value
2789    sub     sp, sp, 8                   # adjust stack ptr
2790    RETURN
2791
2792/* ------------------------------ */
2793    .balign 4
2794    .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
2795dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
2796/* File: mips/TEMPLATE_PERIODIC_PROFILING.S */
2797    /*
2798     * Increment profile counter for this trace, and decrement
2799     * sample counter.  If sample counter goes below zero, turn
2800     * off profiling.
2801     *
2802     * On entry
2803     * (ra-16) is address of pointer to counter.  Note: the counter
2804     *    actually exists 16 bytes before the return target for mips.
2805     *     - 4 bytes for prof count addr.
2806     *     - 4 bytes for chain cell offset (2bytes 32 bit aligned).
2807     *     - 4 bytes for call TEMPLATE_PERIODIC_PROFILING.
2808     *     - 4 bytes for call delay slot.
2809     */
2810     lw     a0, -16(ra)
2811     lw     a1, offThread_pProfileCountdown(rSELF)
2812     lw     a2, 0(a0)                   # get counter
2813     lw     a3, 0(a1)                   # get countdown timer
2814     addu   a2, 1
2815     sub    a3, 1                       # FIXME - bug in ARM code???
2816     bltz   a3, .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
2817     sw     a2, 0(a0)
2818     sw     a3, 0(a1)
2819     RETURN
2820.LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
2821     move   rTEMP, ra                   # preserve ra
2822     la     a0, dvmJitTraceProfilingOff
2823     JALR(a0)
2824     jr     rTEMP
2825
2826/* ------------------------------ */
2827    .balign 4
2828    .global dvmCompiler_TEMPLATE_RETURN_PROF
2829dvmCompiler_TEMPLATE_RETURN_PROF:
2830/* File: mips/TEMPLATE_RETURN_PROF.S */
2831#define TEMPLATE_INLINE_PROFILING
2832/* File: mips/TEMPLATE_RETURN.S */
2833    /*
2834     * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
2835     * If the stored value in returnAddr
2836     * is non-zero, the caller is compiled by the JIT thus return to the
2837     * address in the code cache following the invoke instruction. Otherwise
2838     * return to the special dvmJitToInterpNoChain entry point.
2839     */
2840#if defined(TEMPLATE_INLINE_PROFILING)
2841    # preserve a0-a2 and ra
2842    SCRATCH_STORE(a0, 0)
2843    SCRATCH_STORE(a1, 4)
2844    SCRATCH_STORE(a2, 8)
2845    SCRATCH_STORE(ra, 12)
2846
2847    # a0=rSELF
2848    move    a0, rSELF
2849    la      t9, dvmFastMethodTraceExit
2850    JALR(t9)
2851    lw      gp, STACK_OFFSET_GP(sp)
2852
2853    # restore a0-a2 and ra
2854    SCRATCH_LOAD(ra, 12)
2855    SCRATCH_LOAD(a2, 8)
2856    SCRATCH_LOAD(a1, 4)
2857    SCRATCH_LOAD(a0, 0)
2858#endif
2859    SAVEAREA_FROM_FP(a0, rFP)           # a0<- saveArea (old)
2860    lw      t0, offStackSaveArea_prevFrame(a0)     # t0<- saveArea->prevFrame
2861    lbu     t1, offThread_breakFlags(rSELF)        # t1<- breakFlags
2862    lw      rPC, offStackSaveArea_savedPc(a0)      # rPC<- saveArea->savedPc
2863#if !defined(WITH_SELF_VERIFICATION)
2864    lw      t2,  offStackSaveArea_returnAddr(a0)   # t2<- chaining cell ret
2865#else
2866    move    t2, zero                               # disable chaining
2867#endif
2868    lw      a2, offStackSaveArea_method - sizeofStackSaveArea(t0)
2869                                                   # a2<- method we're returning to
2870#if !defined(WITH_SELF_VERIFICATION)
2871    beq     a2, zero, 1f                           # bail to interpreter
2872#else
2873    bne     a2, zero, 2f
2874    JALR(ra)                                       # punt to interpreter and compare state
2875    # DOUG: assume this does not return ???
28762:
2877#endif
2878    la      t4, .LdvmJitToInterpNoChainNoProfile   # defined in footer.S
2879    lw      a1, (t4)
2880    move    rFP, t0                                # publish new FP
2881    beq     a2, zero, 4f
2882    lw      t0, offMethod_clazz(a2)                # t0<- method->clazz
28834:
2884
2885    sw      a2, offThread_method(rSELF)            # self->method = newSave->method
2886    lw      a0, offClassObject_pDvmDex(t0)         # a0<- method->clazz->pDvmDex
2887    sw      rFP, offThread_curFrame(rSELF)         # self->curFrame = fp
2888    add     rPC, rPC, 3*2                          # publish new rPC
2889    sw      a0, offThread_methodClassDex(rSELF)
2890    movn    t2, zero, t1                           # check the breadFlags and
2891                                                   # clear the chaining cell address
2892    sw      t2, offThread_inJitCodeCache(rSELF)    # in code cache or not
2893    beq     t2, zero, 3f                           # chaining cell exists?
2894    JALR(t2)                                       # jump to the chaining cell
2895    # DOUG: assume this does not return ???
28963:
2897#if defined(WITH_JIT_TUNING)
2898    li      a0, kCallsiteInterpreted
2899#endif
2900    j       a1                                     # callsite is interpreted
29011:
2902    sw      zero, offThread_inJitCodeCache(rSELF)  # reset inJitCodeCache
2903    SAVE_PC_TO_SELF()                              # SAVE_PC_FP_TO_SELF()
2904    SAVE_FP_TO_SELF()
2905    la      t4, .LdvmMterpStdBail                  # defined in footer.S
2906    lw      a2, (t4)
2907    move    a0, rSELF                              # Expecting rSELF in a0
2908    JALR(a2)                                       # exit the interpreter
2909    # DOUG: assume this does not return ???
2910
2911#undef TEMPLATE_INLINE_PROFILING
2912
2913/* ------------------------------ */
2914    .balign 4
2915    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF
2916dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF:
2917/* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT_PROF.S */
2918#define TEMPLATE_INLINE_PROFILING
2919/* File: mips/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
2920    /*
2921     * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
2922     * into rPC then jump to dvmJitToInterpNoChain to dispatch the
2923     * runtime-resolved callee.
2924     */
2925    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
2926    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
2927    lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
2928    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
2929    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
2930    move   a3, a1                                 # a3<- returnCell
2931    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
2932    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
2933    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
2934    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
2935    sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
2936    sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
2937    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
2938    RETURN                                        # return to raise stack overflow excep.
2939
29401:
2941    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
2942    lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
2943    lw     t0, offMethod_accessFlags(a0)          # t0<- methodToCall->accessFlags
2944    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
2945    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
2946    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
2947
2948    # set up newSaveArea
2949    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
2950    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
2951    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
2952    beqz   t8, 2f                                 # breakFlags != 0
2953    RETURN                                        # bail to the interpreter
2954
29552:
2956    and    t6, t0, ACC_NATIVE
2957    beqz   t6, 3f
2958#if !defined(WITH_SELF_VERIFICATION)
2959    j      .LinvokeNative
2960#else
2961    RETURN                                        # bail to the interpreter
2962#endif
2963
29643:
2965    # continue executing the next instruction through the interpreter
2966    la     t0, .LdvmJitToInterpTraceSelectNoChain # defined in footer.S
2967    lw     rTEMP, (t0)
2968    lw     a3, offClassObject_pDvmDex(t9)         # a3<- method->clazz->pDvmDex
2969
2970    # Update "thread" values for the new method
2971    sw     a0, offThread_method(rSELF)            # self->method = methodToCall
2972    sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
2973    move   rFP, a1                                # fp = newFp
2974    sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
2975#if defined(TEMPLATE_INLINE_PROFILING)
2976    # preserve a0-a3
2977    SCRATCH_STORE(a0, 0)
2978    SCRATCH_STORE(a1, 4)
2979    SCRATCH_STORE(a2, 8)
2980    SCRATCH_STORE(a3, 12)
2981
2982    # a0=methodToCall, a1=rSELF
2983    move   a1, rSELF
2984    la     t9, dvmFastMethodTraceEnter
2985    JALR(t9)
2986    lw     gp, STACK_OFFSET_GP(sp)
2987
2988    # restore a0-a3
2989    SCRATCH_LOAD(a3, 12)
2990    SCRATCH_LOAD(a2, 8)
2991    SCRATCH_LOAD(a1, 4)
2992    SCRATCH_LOAD(a0, 0)
2993#endif
2994
2995    # Start executing the callee
2996#if defined(WITH_JIT_TUNING)
2997    li     a0, kInlineCacheMiss
2998#endif
2999    jr     rTEMP                                  # dvmJitToInterpTraceSelectNoChain
3000
3001#undef TEMPLATE_INLINE_PROFILING
3002
3003/* ------------------------------ */
3004    .balign 4
3005    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF
3006dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF:
3007/* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN_PROF.S */
3008#define TEMPLATE_INLINE_PROFILING
3009/* File: mips/TEMPLATE_INVOKE_METHOD_CHAIN.S */
3010    /*
3011     * For monomorphic callsite, setup the Dalvik frame and return to the
3012     * Thumb code through the link register to transfer control to the callee
3013     * method through a dedicated chaining cell.
3014     */
3015    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
3016    # methodToCall is guaranteed to be non-native
3017.LinvokeChainProf:
3018    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
3019    lh     a2, offMethod_outsSize(a0)             # a2<- methodToCall->outsSize
3020    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
3021    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
3022    move   a3, a1                                 # a3<- returnCell
3023    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
3024    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
3025    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
3026    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
3027    add    t2, ra, 8                              # setup the punt-to-interp address
3028                                                  # 8 bytes skips branch and delay slot
3029    sll    t6, a2, 2                              # multiply outsSize by 4 (4 bytes per reg)
3030    sub    t0, t0, t6                             # t0<- bottom (newsave-outsSize)
3031    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
3032    jr     t2                                     # return to raise stack overflow excep.
3033
30341:
3035    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
3036    lw     t9, offMethod_clazz(a0)                # t9<- methodToCall->clazz
3037    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
3038    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
3039    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
3040
3041    # set up newSaveArea
3042    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
3043    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
3044    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
3045    beqz   t8, 2f                                 # breakFlags != 0
3046    jr     t2                                     # bail to the interpreter
3047
30482:
3049    lw     a3, offClassObject_pDvmDex(t9)         # a3<- methodToCall->clazz->pDvmDex
3050
3051    # Update "thread" values for the new method
3052    sw     a0, offThread_method(rSELF)            # self->method = methodToCall
3053    sw     a3, offThread_methodClassDex(rSELF)    # self->methodClassDex = ...
3054    move   rFP, a1                                # fp = newFp
3055    sw     rFP, offThread_curFrame(rSELF)         # self->curFrame = newFp
3056#if defined(TEMPLATE_INLINE_PROFILING)
3057    # preserve a0-a2 and ra
3058    SCRATCH_STORE(a0, 0)
3059    SCRATCH_STORE(a1, 4)
3060    SCRATCH_STORE(a2, 8)
3061    SCRATCH_STORE(ra, 12)
3062
3063    move   a1, rSELF
3064    # a0=methodToCall, a1=rSELF
3065    la     t9, dvmFastMethodTraceEnter
3066    jalr   t9
3067    lw     gp, STACK_OFFSET_GP(sp)
3068
3069    # restore a0-a2 and ra
3070    SCRATCH_LOAD(ra, 12)
3071    SCRATCH_LOAD(a2, 8)
3072    SCRATCH_LOAD(a1, 4)
3073    SCRATCH_LOAD(a0, 0)
3074#endif
3075    RETURN                                        # return to the callee-chaining cell
3076
3077#undef TEMPLATE_INLINE_PROFILING
3078
3079/* ------------------------------ */
3080    .balign 4
3081    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF
3082dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF:
3083/* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF.S */
3084#define TEMPLATE_INLINE_PROFILING
3085/* File: mips/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
3086    /*
3087     * For polymorphic callsite, check whether the cached class pointer matches
3088     * the current one. If so setup the Dalvik frame and return to the
3089     * Thumb code through the link register to transfer control to the callee
3090     * method through a dedicated chaining cell.
3091     *
3092     * The predicted chaining cell is declared in ArmLIR.h with the
3093     * following layout:
3094     *
3095     *  typedef struct PredictedChainingCell {
3096     *      u4 branch;
3097     *      u4 delay_slot;
3098     *      const ClassObject *clazz;
3099     *      const Method *method;
3100     *      u4 counter;
3101     *  } PredictedChainingCell;
3102     *
3103     * Upon returning to the callsite:
3104     *    - lr   : to branch to the chaining cell
3105     *    - lr+8 : to punt to the interpreter
3106     *    - lr+16: to fully resolve the callee and may rechain.
3107     *             a3 <- class
3108     */
3109    # a0 = this, a1 = returnCell, a2 = predictedChainCell, rPC = dalvikCallsite
3110    lw      a3, offObject_clazz(a0)     # a3 <- this->class
3111    lw      rIBASE, 8(a2)                   # t0 <- predictedChainCell->clazz
3112    lw      a0, 12(a2)                  # a0 <- predictedChainCell->method
3113    lw      t1, offThread_icRechainCount(rSELF)    # t1 <- shared rechainCount
3114
3115#if defined(WITH_JIT_TUNING)
3116    la      rINST, .LdvmICHitCount
3117    #add     t2, t2, 1
3118    bne    a3, rIBASE, 1f
3119    nop
3120    lw      t2, 0(rINST)
3121    add     t2, t2, 1
3122    sw      t2, 0(rINST)
31231:
3124    #add     t2, t2, 1
3125#endif
3126    beq     a3, rIBASE, .LinvokeChainProf       # branch if predicted chain is valid
3127    lw      rINST, offClassObject_vtable(a3)     # rINST <- this->class->vtable
3128    beqz    rIBASE, 2f                      # initialized class or not
3129    sub     a1, t1, 1                   # count--
3130    sw      a1, offThread_icRechainCount(rSELF)   # write back to InterpState
3131    b       3f
31322:
3133    move    a1, zero
31343:
3135    add     ra, ra, 16                  # return to fully-resolve landing pad
3136    /*
3137     * a1 <- count
3138     * a2 <- &predictedChainCell
3139     * a3 <- this->class
3140     * rPC <- dPC
3141     * rINST <- this->class->vtable
3142     */
3143    RETURN
3144
3145#undef TEMPLATE_INLINE_PROFILING
3146
3147/* ------------------------------ */
3148    .balign 4
3149    .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF
3150dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF:
3151/* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE_PROF.S */
3152#define TEMPLATE_INLINE_PROFILING
3153/* File: mips/TEMPLATE_INVOKE_METHOD_NATIVE.S */
3154    # a0 = methodToCall, a1 = returnCell, rPC = dalvikCallsite
3155    lh     t7, offMethod_registersSize(a0)        # t7<- methodToCall->regsSize
3156    lw     t9, offThread_interpStackEnd(rSELF)    # t9<- interpStackEnd
3157    lbu    t8, offThread_breakFlags(rSELF)        # t8<- breakFlags
3158    move   a3, a1                                 # a3<- returnCell
3159    SAVEAREA_FROM_FP(a1, rFP)                     # a1<- stack save area
3160    sll    t6, t7, 2                              # multiply regsSize by 4 (4 bytes per reg)
3161    sub    a1, a1, t6                             # a1<- newFp(old savearea-regsSize)
3162    SAVEAREA_FROM_FP(t0, a1)                      # t0<- stack save area
3163    bgeu   t0, t9, 1f                             # bottom < interpStackEnd?
3164    RETURN                                        # return to raise stack overflow excep.
3165
31661:
3167    # a1 = newFP, a0 = methodToCall, a3 = returnCell, rPC = dalvikCallsite
3168    sw     rPC, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
3169    sw     rPC, (offStackSaveArea_savedPc - sizeofStackSaveArea)(a1)
3170    lw     rPC, offMethod_insns(a0)               # rPC<- methodToCall->insns
3171
3172    # set up newSaveArea
3173    sw     rFP, (offStackSaveArea_prevFrame - sizeofStackSaveArea)(a1)
3174    sw     a3, (offStackSaveArea_returnAddr - sizeofStackSaveArea)(a1)
3175    sw     a0, (offStackSaveArea_method - sizeofStackSaveArea)(a1)
3176    lw     rTEMP, offMethod_nativeFunc(a0)        # t9<- method->nativeFunc
3177#if !defined(WITH_SELF_VERIFICATION)
3178    beqz   t8, 2f                                 # breakFlags != 0
3179    RETURN                                        # bail to the interpreter
31802:
3181#else
3182    RETURN                                        # bail to the interpreter unconditionally
3183#endif
3184
3185    # go ahead and transfer control to the native code
3186    lw     t6, offThread_jniLocal_topCookie(rSELF)  # t6<- thread->localRef->...
3187    sw     a1, offThread_curFrame(rSELF)          # self->curFrame = newFp
3188    sw     zero, offThread_inJitCodeCache(rSELF)  # not in the jit code cache
3189    sw     t6, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
3190                                                  # newFp->localRefCookie=top
3191    SAVEAREA_FROM_FP(rBIX, a1)                    # rBIX<- new stack save area
3192    move   a2, a0                                 # a2<- methodToCall
3193    move   a0, a1                                 # a0<- newFp
3194    add    a1, rSELF, offThread_retval            # a1<- &retval
3195    move   a3, rSELF                              # a3<- self
3196#if defined(TEMPLATE_INLINE_PROFILING)
3197    # a2: methodToCall
3198    # preserve a0-a3
3199    SCRATCH_STORE(a0, 0)
3200    SCRATCH_STORE(a1, 4)
3201    SCRATCH_STORE(a2, 8)
3202    SCRATCH_STORE(a3, 12)
3203
3204    move   a0, a2
3205    move   a1, rSELF
3206    # a0=JNIMethod, a1=rSELF
3207    la      t9, dvmFastMethodTraceEnter
3208    JALR(t9)                                      # off to the native code
3209    lw     gp, STACK_OFFSET_GP(sp)
3210
3211    # restore a0-a3
3212    SCRATCH_LOAD(a3, 12)
3213    SCRATCH_LOAD(a2, 8)
3214    SCRATCH_LOAD(a1, 4)
3215    SCRATCH_LOAD(a0, 0)
3216
3217    move   rOBJ, a2                               # save a2
3218#endif
3219
3220    JALR(rTEMP)                                   # off to the native code
3221    lw     gp, STACK_OFFSET_GP(sp)
3222
3223#if defined(TEMPLATE_INLINE_PROFILING)
3224    move   a0, rOBJ
3225    move   a1, rSELF
3226    # a0=JNIMethod, a1=rSELF
3227    la      t9, dvmFastNativeMethodTraceExit
3228    JALR(t9)
3229    lw     gp, STACK_OFFSET_GP(sp)
3230#endif
3231
3232    # native return; rBIX=newSaveArea
3233    # equivalent to dvmPopJniLocals
3234    lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
3235    lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
3236    lw     a1, offThread_exception(rSELF)            # check for exception
3237    sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
3238    sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
3239    lw     a0, (offStackSaveArea_currentPc - sizeofStackSaveArea)(rFP)
3240
3241    # a0 = dalvikCallsitePC
3242    bnez   a1, .LhandleException                     # handle exception if any
3243
3244    sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
3245    beqz   a2, 3f
3246    jr     a2                                        # go if return chaining cell still exist
3247
32483:
3249    # continue executing the next instruction through the interpreter
3250    la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
3251    lw     a1, (a1)
3252    add    rPC, a0, 3*2                              # reconstruct new rPC (advance 3 dalvik instr)
3253
3254#if defined(WITH_JIT_TUNING)
3255    li     a0, kCallsiteInterpreted
3256#endif
3257    jr     a1
3258
3259#undef TEMPLATE_INLINE_PROFILING
3260
3261    .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
3262/* File: mips/footer.S */
3263/*
3264 * ===========================================================================
3265 *  Common subroutines and data
3266 * ===========================================================================
3267 */
3268
3269    .section .data.rel.ro
3270    .align  4
3271.LinvokeNative:
3272    # Prep for the native call
3273    # a1 = newFP, a0 = methodToCall
3274    lw     t9, offThread_jniLocal_topCookie(rSELF)  # t9<- thread->localRef->...
3275    sw     zero, offThread_inJitCodeCache(rSELF)    # not in jit code cache
3276    sw     a1, offThread_curFrame(rSELF)            # self->curFrame = newFp
3277    sw     t9, (offStackSaveArea_localRefCookie - sizeofStackSaveArea)(a1)
3278                                                 # newFp->localRefCookie=top
3279    lhu     ra, offThread_subMode(rSELF)
3280    SAVEAREA_FROM_FP(rBIX, a1)                   # rBIX<- new stack save area
3281
3282    move    a2, a0                               # a2<- methodToCall
3283    move    a0, a1                               # a0<- newFp
3284    add     a1, rSELF, offThread_retval          # a1<- &retval
3285    move    a3, rSELF                            # a3<- self
3286    andi    ra, kSubModeMethodTrace
3287    beqz    ra, 121f
3288    # a2: methodToCall
3289    # preserve a0-a3
3290    SCRATCH_STORE(a0, 0)
3291    SCRATCH_STORE(a1, 4)
3292    SCRATCH_STORE(a2, 8)
3293    SCRATCH_STORE(a3, 12)
3294    move    rTEMP, a2                            # preserve a2
3295
3296    move    a0, rTEMP
3297    move    a1, rSELF
3298    la      t9, dvmFastMethodTraceEnter
3299    JALR(t9)
3300    lw      gp, STACK_OFFSET_GP(sp)
3301
3302    # restore a0-a3
3303    SCRATCH_LOAD(a3, 12)
3304    SCRATCH_LOAD(a2, 8)
3305    SCRATCH_LOAD(a1, 4)
3306    SCRATCH_LOAD(a0, 0)
3307
3308    lw      t9, offMethod_nativeFunc(a2)
3309    JALR(t9)                                      # call methodToCall->nativeFunc
3310    lw      gp, STACK_OFFSET_GP(sp)
3311
3312    move    a0, rTEMP
3313    move    a1, rSELF
3314    la      t9, dvmFastNativeMethodTraceExit
3315    JALR(t9)
3316    lw      gp, STACK_OFFSET_GP(sp)
3317    b       212f
3318
3319121:
3320    lw      t9, offMethod_nativeFunc(a2)
3321    JALR(t9)                                     # call methodToCall->nativeFunc
3322    lw      gp, STACK_OFFSET_GP(sp)
3323
3324212:
3325    # native return; rBIX=newSaveArea
3326    # equivalent to dvmPopJniLocals
3327    lw     a2, offStackSaveArea_returnAddr(rBIX)     # a2 = chaining cell ret addr
3328    lw     a0, offStackSaveArea_localRefCookie(rBIX) # a0<- saved->top
3329    lw     a1, offThread_exception(rSELF)            # check for exception
3330    sw     rFP, offThread_curFrame(rSELF)            # self->curFrame = fp
3331    sw     a0, offThread_jniLocal_topCookie(rSELF)   # new top <- old top
3332    lw     a0, offStackSaveArea_savedPc(rBIX)        # reload rPC
3333
3334    # a0 = dalvikCallsitePC
3335    bnez   a1, .LhandleException                     # handle exception if any
3336
3337    sw     a2, offThread_inJitCodeCache(rSELF)       # set the mode properly
3338    beqz   a2, 3f
3339    jr     a2                                        # go if return chaining cell still exist
3340
33413:
3342    # continue executing the next instruction through the interpreter
3343    la     a1, .LdvmJitToInterpTraceSelectNoChain    # defined in footer.S
3344    lw     a1, (a1)
3345    add    rPC, a0, 3*2                              # reconstruct new rPC
3346
3347#if defined(WITH_JIT_TUNING)
3348    li     a0, kCallsiteInterpreted
3349#endif
3350    jr     a1
3351
3352
3353/*
3354 * On entry:
3355 * a0  Faulting Dalvik PC
3356 */
3357.LhandleException:
3358#if defined(WITH_SELF_VERIFICATION)
3359    la     t0, .LdeadFood
3360    lw     t0, (t0)                  # should not see this under self-verification mode
3361    jr     t0
3362.LdeadFood:
3363    .word   0xdeadf00d
3364#endif
3365    sw     zero, offThread_inJitCodeCache(rSELF)  # in interpreter land
3366    la     a1, .LdvmMterpCommonExceptionThrown  # PIC way of getting &func
3367    lw     a1, (a1)
3368    la     rIBASE, .LdvmAsmInstructionStart     # PIC way of getting &func
3369    lw     rIBASE, (rIBASE)
3370    move   rPC, a0                              # reload the faulting Dalvid address
3371    jr     a1                                   # branch to dvmMterpCommonExeceptionThrown
3372
3373    .align  4
3374.LdvmAsmInstructionStart:
3375    .word   dvmAsmInstructionStart
3376.LdvmJitToInterpNoChainNoProfile:
3377    .word   dvmJitToInterpNoChainNoProfile
3378.LdvmJitToInterpTraceSelectNoChain:
3379    .word   dvmJitToInterpTraceSelectNoChain
3380.LdvmJitToInterpNoChain:
3381    .word   dvmJitToInterpNoChain
3382.LdvmMterpStdBail:
3383    .word   dvmMterpStdBail
3384.LdvmMterpCommonExceptionThrown:
3385    .word   dvmMterpCommonExceptionThrown
3386.LdvmLockObject:
3387    .word   dvmLockObject
3388#if defined(WITH_JIT_TUNING)
3389.LdvmICHitCount:
3390    .word   gDvmICHitCount
3391#endif
3392#if defined(WITH_SELF_VERIFICATION)
3393.LdvmSelfVerificationMemOpDecode:
3394    .word   dvmSelfVerificationMemOpDecode
3395#endif
3396
3397    .global dmvCompilerTemplateEnd
3398dmvCompilerTemplateEnd:
3399
3400#endif /* WITH_JIT */
3401
3402