1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This file contains codegen for the Thumb ISA and is intended to be
19 * includes by:
20 *
21 *        Codegen-$(TARGET_ARCH_VARIANT).c
22 *
23 */
24
25/*
26 * Reserve 6 bytes at the beginning of the trace
27 *        +----------------------------+
28 *        | prof count addr (4 bytes)  |
29 *        +----------------------------+
30 *        | chain cell offset (2 bytes)|
31 *        +----------------------------+
32 *
33 * ...and then code to increment the execution
34 *
35 * For continuous profiling (12 bytes):
36 *
37 *       mov   r0, pc       @ move adr of "mov r0,pc" + 4 to r0
38 *       sub   r0, #10      @ back up to addr prof count pointer
39 *       ldr   r0, [r0]     @ get address of counter
40 *       ldr   r1, [r0]
41 *       add   r1, #1
42 *       str   r1, [r0]
43 *
44 * For periodic profiling (4 bytes):
45 *       call  TEMPLATE_PERIODIC_PROFILING
46 *
47 * and return the size (in bytes) of the generated code.
48 */
49
50static int genTraceProfileEntry(CompilationUnit *cUnit)
51{
52    intptr_t addr = (intptr_t)dvmJitNextTraceCounter();
53    assert(__BYTE_ORDER == __LITTLE_ENDIAN);
54    newLIR1(cUnit, kArm16BitData, addr & 0xffff);
55    newLIR1(cUnit, kArm16BitData, (addr >> 16) & 0xffff);
56    cUnit->chainCellOffsetLIR =
57        (LIR *) newLIR1(cUnit, kArm16BitData, CHAIN_CELL_OFFSET_TAG);
58    cUnit->headerSize = 6;
59    if ((gDvmJit.profileMode == kTraceProfilingContinuous) ||
60        (gDvmJit.profileMode == kTraceProfilingDisabled)) {
61        /* Thumb instruction used directly here to ensure correct size */
62        newLIR2(cUnit, kThumbMovRR_H2L, r0, r15pc);
63        newLIR2(cUnit, kThumbSubRI8, r0, 10);
64        newLIR3(cUnit, kThumbLdrRRI5, r0, r0, 0);
65        newLIR3(cUnit, kThumbLdrRRI5, r1, r0, 0);
66        newLIR2(cUnit, kThumbAddRI8, r1, 1);
67        newLIR3(cUnit, kThumbStrRRI5, r1, r0, 0);
68        return 12;
69    } else {
70        int opcode = TEMPLATE_PERIODIC_PROFILING;
71        newLIR2(cUnit, kThumbBlx1,
72            (int) gDvmJit.codeCache + templateEntryOffsets[opcode],
73            (int) gDvmJit.codeCache + templateEntryOffsets[opcode]);
74        newLIR2(cUnit, kThumbBlx2,
75            (int) gDvmJit.codeCache + templateEntryOffsets[opcode],
76            (int) gDvmJit.codeCache + templateEntryOffsets[opcode]);
77        return 4;
78    }
79}
80
81/*
82 * Perform a "reg cmp imm" operation and jump to the PCR region if condition
83 * satisfies.
84 */
85static void genNegFloat(CompilationUnit *cUnit, RegLocation rlDest,
86                        RegLocation rlSrc)
87{
88    RegLocation rlResult;
89    rlSrc = loadValue(cUnit, rlSrc, kCoreReg);
90    rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
91    opRegRegImm(cUnit, kOpAdd, rlResult.lowReg,
92                rlSrc.lowReg, 0x80000000);
93    storeValue(cUnit, rlDest, rlResult);
94}
95
96static void genNegDouble(CompilationUnit *cUnit, RegLocation rlDest,
97                         RegLocation rlSrc)
98{
99    RegLocation rlResult;
100    rlSrc = loadValueWide(cUnit, rlSrc, kCoreReg);
101    rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, true);
102    opRegRegImm(cUnit, kOpAdd, rlResult.highReg, rlSrc.highReg,
103                        0x80000000);
104    genRegCopy(cUnit, rlResult.lowReg, rlSrc.lowReg);
105    storeValueWide(cUnit, rlDest, rlResult);
106}
107
108static void genMulLong(CompilationUnit *cUnit, RegLocation rlDest,
109                       RegLocation rlSrc1, RegLocation rlSrc2)
110{
111    RegLocation rlResult;
112    loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
113    loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
114    genDispatchToHandler(cUnit, TEMPLATE_MUL_LONG);
115    rlResult = dvmCompilerGetReturnWide(cUnit);
116    storeValueWide(cUnit, rlDest, rlResult);
117}
118
119static bool partialOverlap(int sreg1, int sreg2)
120{
121    return abs(sreg1 - sreg2) == 1;
122}
123
124static void genLong3Addr(CompilationUnit *cUnit, MIR *mir, OpKind firstOp,
125                         OpKind secondOp, RegLocation rlDest,
126                         RegLocation rlSrc1, RegLocation rlSrc2)
127{
128    RegLocation rlResult;
129    if (partialOverlap(rlSrc1.sRegLow,rlSrc2.sRegLow) ||
130        partialOverlap(rlSrc1.sRegLow,rlDest.sRegLow) ||
131        partialOverlap(rlSrc2.sRegLow,rlDest.sRegLow)) {
132        // Rare case - not enough registers to properly handle
133        genInterpSingleStep(cUnit, mir);
134    } else if (rlDest.sRegLow == rlSrc1.sRegLow) {
135        // Already 2-operand
136        rlResult = loadValueWide(cUnit, rlDest, kCoreReg);
137        rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
138        opRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc2.lowReg);
139        opRegReg(cUnit, secondOp, rlResult.highReg, rlSrc2.highReg);
140        storeValueWide(cUnit, rlDest, rlResult);
141    } else if (rlDest.sRegLow == rlSrc2.sRegLow) {
142        // Bad case - must use/clobber Src1 and reassign Dest
143        rlSrc1 = loadValueWide(cUnit, rlSrc1, kCoreReg);
144        rlResult = loadValueWide(cUnit, rlDest, kCoreReg);
145        opRegReg(cUnit, firstOp, rlSrc1.lowReg, rlResult.lowReg);
146        opRegReg(cUnit, secondOp, rlSrc1.highReg, rlResult.highReg);
147        // Old reg assignments are now invalid
148        dvmCompilerClobber(cUnit, rlResult.lowReg);
149        dvmCompilerClobber(cUnit, rlResult.highReg);
150        dvmCompilerClobber(cUnit, rlSrc1.lowReg);
151        dvmCompilerClobber(cUnit, rlSrc1.highReg);
152        rlDest.location = kLocDalvikFrame;
153        assert(rlSrc1.location == kLocPhysReg);
154        // Reassign registers - rlDest will now get rlSrc1's old regs
155        storeValueWide(cUnit, rlDest, rlSrc1);
156    } else {
157        // Copy Src1 to Dest
158        rlSrc2 = loadValueWide(cUnit, rlSrc2, kCoreReg);
159        rlResult = dvmCompilerEvalLoc(cUnit, rlDest, kCoreReg, false);
160        loadValueDirectWide(cUnit, rlSrc1, rlResult.lowReg,
161                            rlResult.highReg);
162        rlResult.location = kLocPhysReg;
163        opRegReg(cUnit, firstOp, rlResult.lowReg, rlSrc2.lowReg);
164        opRegReg(cUnit, secondOp, rlResult.highReg, rlSrc2.highReg);
165        storeValueWide(cUnit, rlDest, rlResult);
166    }
167}
168
169void dvmCompilerInitializeRegAlloc(CompilationUnit *cUnit)
170{
171    int numTemps = sizeof(coreTemps)/sizeof(int);
172    RegisterPool *pool = (RegisterPool *) dvmCompilerNew(sizeof(*pool), true);
173    cUnit->regPool = pool;
174    pool->numCoreTemps = numTemps;
175    pool->coreTemps = (RegisterInfo *)
176            dvmCompilerNew(numTemps * sizeof(*pool->coreTemps), true);
177    pool->numFPTemps = 0;
178    pool->FPTemps = NULL;
179    dvmCompilerInitPool(pool->coreTemps, coreTemps, pool->numCoreTemps);
180    dvmCompilerInitPool(pool->FPTemps, NULL, 0);
181    pool->nullCheckedRegs =
182        dvmCompilerAllocBitVector(cUnit->numSSARegs, false);
183}
184
185/* Export the Dalvik PC assicated with an instruction to the StackSave area */
186static ArmLIR *genExportPC(CompilationUnit *cUnit, MIR *mir)
187{
188    ArmLIR *res;
189    int rDPC = dvmCompilerAllocTemp(cUnit);
190    int rAddr = dvmCompilerAllocTemp(cUnit);
191    int offset = offsetof(StackSaveArea, xtra.currentPc);
192    res = loadConstant(cUnit, rDPC, (int) (cUnit->method->insns + mir->offset));
193    newLIR2(cUnit, kThumbMovRR, rAddr, r5FP);
194    newLIR2(cUnit, kThumbSubRI8, rAddr, sizeof(StackSaveArea) - offset);
195    storeWordDisp( cUnit, rAddr, 0, rDPC);
196    return res;
197}
198
199static void genMonitor(CompilationUnit *cUnit, MIR *mir)
200{
201    genMonitorPortable(cUnit, mir);
202}
203
204static void genCmpLong(CompilationUnit *cUnit, MIR *mir, RegLocation rlDest,
205                       RegLocation rlSrc1, RegLocation rlSrc2)
206{
207    RegLocation rlResult;
208    loadValueDirectWideFixed(cUnit, rlSrc1, r0, r1);
209    loadValueDirectWideFixed(cUnit, rlSrc2, r2, r3);
210    genDispatchToHandler(cUnit, TEMPLATE_CMP_LONG);
211    rlResult = dvmCompilerGetReturn(cUnit);
212    storeValue(cUnit, rlDest, rlResult);
213}
214
215static bool genInlinedAbsFloat(CompilationUnit *cUnit, MIR *mir)
216{
217    int offset = offsetof(Thread, interpSave.retval);
218    RegLocation rlSrc = dvmCompilerGetSrc(cUnit, mir, 0);
219    int reg0 = loadValue(cUnit, rlSrc, kCoreReg).lowReg;
220    int signMask = dvmCompilerAllocTemp(cUnit);
221    loadConstant(cUnit, signMask, 0x7fffffff);
222    newLIR2(cUnit, kThumbAndRR, reg0, signMask);
223    dvmCompilerFreeTemp(cUnit, signMask);
224    storeWordDisp(cUnit, r6SELF, offset, reg0);
225    //TUNING: rewrite this to not clobber
226    dvmCompilerClobber(cUnit, reg0);
227    return false;
228}
229
230static bool genInlinedAbsDouble(CompilationUnit *cUnit, MIR *mir)
231{
232    int offset = offsetof(Thread, interpSave.retval);
233    RegLocation rlSrc = dvmCompilerGetSrcWide(cUnit, mir, 0, 1);
234    RegLocation regSrc = loadValueWide(cUnit, rlSrc, kCoreReg);
235    int reglo = regSrc.lowReg;
236    int reghi = regSrc.highReg;
237    int signMask = dvmCompilerAllocTemp(cUnit);
238    loadConstant(cUnit, signMask, 0x7fffffff);
239    storeWordDisp(cUnit, r6SELF, offset, reglo);
240    newLIR2(cUnit, kThumbAndRR, reghi, signMask);
241    dvmCompilerFreeTemp(cUnit, signMask);
242    storeWordDisp(cUnit, r6SELF, offset + 4, reghi);
243    //TUNING: rewrite this to not clobber
244    dvmCompilerClobber(cUnit, reghi);
245    return false;
246}
247
248/* No select in thumb, so we need to branch.  Thumb2 will do better */
249static bool genInlinedMinMaxInt(CompilationUnit *cUnit, MIR *mir, bool isMin)
250{
251    int offset = offsetof(Thread, interpSave.retval);
252    RegLocation rlSrc1 = dvmCompilerGetSrc(cUnit, mir, 0);
253    RegLocation rlSrc2 = dvmCompilerGetSrc(cUnit, mir, 1);
254    int reg0 = loadValue(cUnit, rlSrc1, kCoreReg).lowReg;
255    int reg1 = loadValue(cUnit, rlSrc2, kCoreReg).lowReg;
256    newLIR2(cUnit, kThumbCmpRR, reg0, reg1);
257    ArmLIR *branch1 = newLIR2(cUnit, kThumbBCond, 2,
258           isMin ? kArmCondLt : kArmCondGt);
259    newLIR2(cUnit, kThumbMovRR, reg0, reg1);
260    ArmLIR *target = newLIR0(cUnit, kArmPseudoTargetLabel);
261    target->defMask = ENCODE_ALL;
262    newLIR3(cUnit, kThumbStrRRI5, reg0, r6SELF, offset >> 2);
263    branch1->generic.target = (LIR *)target;
264    //TUNING: rewrite this to not clobber
265    dvmCompilerClobber(cUnit,reg0);
266    return false;
267}
268
269static void genMultiplyByTwoBitMultiplier(CompilationUnit *cUnit,
270        RegLocation rlSrc, RegLocation rlResult, int lit,
271        int firstBit, int secondBit)
272{
273    // We can't implement "add src, src, src, lsl#shift" on Thumb, so we have
274    // to do a regular multiply.
275    opRegRegImm(cUnit, kOpMul, rlResult.lowReg, rlSrc.lowReg, lit);
276}
277
278static void genMultiplyByShiftAndReverseSubtract(CompilationUnit *cUnit,
279        RegLocation rlSrc, RegLocation rlResult, int lit)
280{
281    int tReg = dvmCompilerAllocTemp(cUnit);
282    opRegRegImm(cUnit, kOpLsl, tReg, rlSrc.lowReg, lit);
283    opRegRegReg(cUnit, kOpSub, rlResult.lowReg, tReg, rlSrc.lowReg);
284}
285