GlobalOptimizations.cpp revision a8b91c52fd8a90b784835dfe1f8898035266c4dd
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Dalvik.h"
18#include "vm/compiler/CompilerInternals.h"
19#include "MipsLIR.h"
20
21/*
22 * Identify unconditional branches that jump to the immediate successor of the
23 * branch itself.
24 */
25static void applyRedundantBranchElimination(CompilationUnit *cUnit)
26{
27    MipsLIR *thisLIR;
28
29    for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn;
30         thisLIR != (MipsLIR *) cUnit->lastLIRInsn;
31         thisLIR = NEXT_LIR(thisLIR)) {
32
33        /* Branch to the next instruction */
34        if (!thisLIR->flags.isNop && thisLIR->opcode == kMipsB) {
35            MipsLIR *nextLIR = thisLIR;
36
37            while (true) {
38                nextLIR = NEXT_LIR(nextLIR);
39
40                /*
41                 * Is the branch target the next instruction?
42                 */
43                if (nextLIR == (MipsLIR *) thisLIR->generic.target) {
44                    thisLIR->flags.isNop = true;
45                    break;
46                }
47
48                /*
49                 * Found real useful stuff between the branch and the target.
50                 * Need to explicitly check the lastLIRInsn here since with
51                 * method-based JIT the branch might be the last real
52                 * instruction.
53                 */
54                if (!isPseudoOpCode(nextLIR->opcode) ||
55                    (nextLIR = (MipsLIR *) cUnit->lastLIRInsn))
56                    break;
57            }
58        }
59    }
60}
61
62/*
63 * Do simple a form of copy propagation and elimination.
64 */
65static void applyCopyPropagation(CompilationUnit *cUnit)
66{
67    MipsLIR *thisLIR;
68
69    /* look for copies to possibly eliminate */
70    for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn;
71         thisLIR != (MipsLIR *) cUnit->lastLIRInsn;
72         thisLIR = NEXT_LIR(thisLIR)) {
73
74        if (thisLIR->flags.isNop || thisLIR->opcode != kMipsMove)
75            continue;
76
77        const int max_insns = 10;
78        MipsLIR *savedLIR[max_insns];
79        int srcRedefined = 0;
80        int insnCount = 0;
81        MipsLIR *nextLIR;
82
83        /* look for and record all uses of reg defined by the copy */
84        for (nextLIR = (MipsLIR *) NEXT_LIR(thisLIR);
85             nextLIR != (MipsLIR *) cUnit->lastLIRInsn;
86             nextLIR = NEXT_LIR(nextLIR)) {
87
88            if (nextLIR->flags.isNop || nextLIR->opcode == kMips32BitData)
89                continue;
90
91            if (isPseudoOpCode(nextLIR->opcode)) {
92                if (nextLIR->opcode == kMipsPseudoDalvikByteCodeBoundary ||
93                    nextLIR->opcode == kMipsPseudoBarrier ||
94                    nextLIR->opcode == kMipsPseudoExtended ||
95                    nextLIR->opcode == kMipsPseudoSSARep)
96                    continue; /* these pseudos don't pose problems */
97                else if (nextLIR->opcode == kMipsPseudoTargetLabel ||
98                         nextLIR->opcode == kMipsPseudoEntryBlock ||
99                         nextLIR->opcode == kMipsPseudoExitBlock)
100                    insnCount = 0;  /* give up for these pseudos */
101                break; /* reached end for copy propagation */
102            }
103
104            /* Since instructions with IS_BRANCH flag set will have its */
105            /* useMask and defMask set to ENCODE_ALL, any checking of   */
106            /* these flags must come after the branching checks.        */
107
108            /* don't propagate across branch/jump and link case
109               or jump via register */
110            if (EncodingMap[nextLIR->opcode].flags & REG_DEF_LR ||
111                nextLIR->opcode == kMipsJalr ||
112                nextLIR->opcode == kMipsJr) {
113                insnCount = 0;
114                break;
115            }
116
117            /* branches with certain targets ok while others aren't */
118            if (EncodingMap[nextLIR->opcode].flags & IS_BRANCH) {
119                MipsLIR *targetLIR =  (MipsLIR *) nextLIR->generic.target;
120                if (targetLIR->opcode != kMipsPseudoEHBlockLabel &&
121                    targetLIR->opcode != kMipsPseudoChainingCellHot &&
122                    targetLIR->opcode != kMipsPseudoChainingCellNormal &&
123                    targetLIR->opcode != kMipsPseudoChainingCellInvokePredicted &&
124                    targetLIR->opcode != kMipsPseudoChainingCellInvokeSingleton &&
125                    targetLIR->opcode != kMipsPseudoPCReconstructionBlockLabel &&
126                    targetLIR->opcode != kMipsPseudoPCReconstructionCell) {
127                    insnCount = 0;
128                    break;
129                }
130                /* FIXME - for now don't propagate across any branch/jump. */
131                insnCount = 0;
132                break;
133            }
134
135            /* copy def reg used here, so record insn for copy propagation */
136            if (thisLIR->defMask & nextLIR->useMask) {
137                if (insnCount == max_insns || srcRedefined) {
138                    insnCount = 0;
139                    break; /* just give up if too many or not possible */
140                }
141                savedLIR[insnCount++] = nextLIR;
142            }
143
144            if (thisLIR->defMask & nextLIR->defMask) {
145		if (nextLIR->opcode == kMipsMovz)
146		    insnCount = 0; /* movz relies on thisLIR setting dst reg so abandon propagation*/
147                break;
148            }
149
150            /* copy src reg redefined here, so can't propagate further */
151            if (thisLIR->useMask & nextLIR->defMask) {
152                if (insnCount == 0)
153                    break; /* nothing to propagate */
154                srcRedefined = 1;
155            }
156       }
157
158        /* conditions allow propagation and copy elimination */
159        if (insnCount) {
160            int i;
161            for (i = 0; i < insnCount; i++) {
162                int flags = EncodingMap[savedLIR[i]->opcode].flags;
163                savedLIR[i]->useMask &= ~(1 << thisLIR->operands[0]);
164                savedLIR[i]->useMask |= 1 << thisLIR->operands[1];
165                if ((flags & REG_USE0) &&
166                    savedLIR[i]->operands[0] == thisLIR->operands[0])
167                    savedLIR[i]->operands[0] = thisLIR->operands[1];
168                if ((flags & REG_USE1) &&
169                    savedLIR[i]->operands[1] == thisLIR->operands[0])
170                    savedLIR[i]->operands[1] = thisLIR->operands[1];
171                if ((flags & REG_USE2) &&
172                    savedLIR[i]->operands[2] == thisLIR->operands[0])
173                    savedLIR[i]->operands[2] = thisLIR->operands[1];
174                if ((flags & REG_USE3) &&
175                    savedLIR[i]->operands[3] == thisLIR->operands[0])
176                    savedLIR[i]->operands[3] = thisLIR->operands[1];
177            }
178            thisLIR->flags.isNop = true;
179        }
180    }
181}
182
183#ifdef __mips_hard_float
184/*
185 * Look for pairs of mov.s instructions that can be combined into mov.d
186 */
187static void mergeMovs(CompilationUnit *cUnit)
188{
189  MipsLIR *movsLIR = NULL;
190  MipsLIR *thisLIR;
191
192  for (thisLIR = (MipsLIR *) cUnit->firstLIRInsn;
193       thisLIR != (MipsLIR *) cUnit->lastLIRInsn;
194       thisLIR = NEXT_LIR(thisLIR)) {
195    if (thisLIR->flags.isNop)
196      continue;
197
198    if (isPseudoOpCode(thisLIR->opcode)) {
199      if (thisLIR->opcode == kMipsPseudoDalvikByteCodeBoundary ||
200                thisLIR->opcode == kMipsPseudoExtended ||
201	  thisLIR->opcode == kMipsPseudoSSARep)
202	continue;  /* ok to move across these pseudos */
203      movsLIR = NULL; /* don't merge across other pseudos */
204      continue;
205    }
206
207    /* merge pairs of mov.s instructions */
208    if (thisLIR->opcode == kMipsFmovs) {
209      if (movsLIR == NULL)
210	movsLIR = thisLIR;
211      else if (((movsLIR->operands[0] & 1) == 0) &&
212	       ((movsLIR->operands[1] & 1) == 0) &&
213	       ((movsLIR->operands[0] + 1) == thisLIR->operands[0]) &&
214	       ((movsLIR->operands[1] + 1) == thisLIR->operands[1])) {
215	/* movsLIR is handling even register - upgrade to mov.d */
216	movsLIR->opcode = kMipsFmovd;
217	movsLIR->operands[0] = S2D(movsLIR->operands[0], movsLIR->operands[0]+1);
218	movsLIR->operands[1] = S2D(movsLIR->operands[1], movsLIR->operands[1]+1);
219	thisLIR->flags.isNop = true;
220	movsLIR = NULL;
221      }
222      else if (((movsLIR->operands[0] & 1) == 1) &&
223	       ((movsLIR->operands[1] & 1) == 1) &&
224	       ((movsLIR->operands[0] - 1) == thisLIR->operands[0]) &&
225	       ((movsLIR->operands[1] - 1) == thisLIR->operands[1])) {
226	/* thissLIR is handling even register - upgrade to mov.d */
227	thisLIR->opcode = kMipsFmovd;
228	thisLIR->operands[0] = S2D(thisLIR->operands[0], thisLIR->operands[0]+1);
229	thisLIR->operands[1] = S2D(thisLIR->operands[1], thisLIR->operands[1]+1);
230	movsLIR->flags.isNop = true;
231	movsLIR = NULL;
232      }
233      else
234	/* carry on searching from here */
235	movsLIR = thisLIR;
236      continue;
237    }
238
239    /* intervening instruction - start search from scratch */
240    movsLIR = NULL;
241  }
242}
243#endif
244
245
246/*
247 * Look back first and then ahead to try to find an instruction to move into
248 * the branch delay slot.  If the analysis can be done cheaply enough, it may be
249 * be possible to tune this routine to be more beneficial (e.g., being more
250 * particular about what instruction is speculated).
251 */
252static MipsLIR *delaySlotLIR(MipsLIR *firstLIR, MipsLIR *branchLIR)
253{
254    int isLoad;
255    int loadVisited = 0;
256    int isStore;
257    int storeVisited = 0;
258    u8 useMask = branchLIR->useMask;
259    u8 defMask = branchLIR->defMask;
260    MipsLIR *thisLIR;
261    MipsLIR *newLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true);
262
263    for (thisLIR = PREV_LIR(branchLIR);
264         thisLIR != firstLIR;
265         thisLIR = PREV_LIR(thisLIR)) {
266        if (thisLIR->flags.isNop)
267            continue;
268
269        if (isPseudoOpCode(thisLIR->opcode)) {
270            if (thisLIR->opcode == kMipsPseudoDalvikByteCodeBoundary ||
271                thisLIR->opcode == kMipsPseudoExtended ||
272                thisLIR->opcode == kMipsPseudoSSARep)
273                continue;  /* ok to move across these pseudos */
274            break; /* don't move across all other pseudos */
275        }
276
277        /* give up on moving previous instruction down into slot */
278        if (thisLIR->opcode == kMipsNop ||
279            thisLIR->opcode == kMips32BitData ||
280            EncodingMap[thisLIR->opcode].flags & IS_BRANCH)
281            break;
282
283        /* don't reorder loads/stores (the alias info could
284           possibly be used to allow as a future enhancement) */
285        isLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD;
286        isStore = EncodingMap[thisLIR->opcode].flags & IS_STORE;
287
288        if (!(thisLIR->useMask & defMask) &&
289            !(thisLIR->defMask & useMask) &&
290            !(thisLIR->defMask & defMask) &&
291            !(isLoad && storeVisited) &&
292            !(isStore && loadVisited) &&
293            !(isStore && storeVisited)) {
294            *newLIR = *thisLIR;
295            thisLIR->flags.isNop = true;
296            return newLIR; /* move into delay slot succeeded */
297        }
298
299        loadVisited |= isLoad;
300        storeVisited |= isStore;
301
302        /* accumulate def/use constraints */
303        useMask |= thisLIR->useMask;
304        defMask |= thisLIR->defMask;
305    }
306
307    /* for unconditional branches try to copy the instruction at the
308       branch target up into the delay slot and adjust the branch */
309    if (branchLIR->opcode == kMipsB) {
310        MipsLIR *targetLIR;
311        for (targetLIR = (MipsLIR *) branchLIR->generic.target;
312             targetLIR;
313             targetLIR = NEXT_LIR(targetLIR)) {
314            if (!targetLIR->flags.isNop &&
315                (!isPseudoOpCode(targetLIR->opcode) || /* can't pull predicted up */
316                 targetLIR->opcode == kMipsPseudoChainingCellInvokePredicted))
317                break; /* try to get to next real op at branch target */
318        }
319        if (targetLIR && !isPseudoOpCode(targetLIR->opcode) &&
320            !(EncodingMap[targetLIR->opcode].flags & IS_BRANCH)) {
321            *newLIR = *targetLIR;
322            branchLIR->generic.target = (LIR *) NEXT_LIR(targetLIR);
323            return newLIR;
324        }
325    } else if (branchLIR->opcode >= kMipsBeq && branchLIR->opcode <= kMipsBne) {
326        /* for conditional branches try to fill branch delay slot
327           via speculative execution when safe */
328        MipsLIR *targetLIR;
329        for (targetLIR = (MipsLIR *) branchLIR->generic.target;
330             targetLIR;
331             targetLIR = NEXT_LIR(targetLIR)) {
332            if (!targetLIR->flags.isNop && !isPseudoOpCode(targetLIR->opcode))
333                break; /* try to get to next real op at branch target */
334        }
335
336        MipsLIR *nextLIR;
337        for (nextLIR = NEXT_LIR(branchLIR);
338             nextLIR;
339             nextLIR = NEXT_LIR(nextLIR)) {
340            if (!nextLIR->flags.isNop && !isPseudoOpCode(nextLIR->opcode))
341                break; /* try to get to next real op for fall thru */
342        }
343
344        if (nextLIR && targetLIR) {
345            int flags = EncodingMap[nextLIR->opcode].flags;
346            int isLoad = flags & IS_LOAD;
347
348            /* common branch and fall thru to normal chaining cells case */
349            if (isLoad && nextLIR->opcode == targetLIR->opcode &&
350                nextLIR->operands[0] == targetLIR->operands[0] &&
351                nextLIR->operands[1] == targetLIR->operands[1] &&
352                nextLIR->operands[2] == targetLIR->operands[2]) {
353                *newLIR = *targetLIR;
354                branchLIR->generic.target = (LIR *) NEXT_LIR(targetLIR);
355                return newLIR;
356            }
357
358            /* try prefetching (maybe try speculating instructions along the
359               trace like dalvik frame load which is common and may be safe) */
360            int isStore = flags & IS_STORE;
361            if (isLoad || isStore) {
362                newLIR->opcode = kMipsPref;
363                newLIR->operands[0] = isLoad ? 0 : 1;
364                newLIR->operands[1] = nextLIR->operands[1];
365                newLIR->operands[2] = nextLIR->operands[2];
366                newLIR->defMask = nextLIR->defMask;
367                newLIR->useMask = nextLIR->useMask;
368                return newLIR;
369            }
370        }
371    }
372
373    /* couldn't find a useful instruction to move into the delay slot */
374    newLIR->opcode = kMipsNop;
375    return newLIR;
376}
377
378/*
379 * The branch delay slot has been ignored thus far.  This is the point where
380 * a useful instruction is moved into it or a nop is inserted.  Leave existing
381 * NOPs alone -- these came from sparse and packed switch ops and are needed
382 * to maintain the proper offset to the jump table.
383 */
384static void introduceBranchDelaySlot(CompilationUnit *cUnit)
385{
386    MipsLIR *thisLIR;
387    MipsLIR *firstLIR =(MipsLIR *) cUnit->firstLIRInsn;
388    MipsLIR *lastLIR =(MipsLIR *) cUnit->lastLIRInsn;
389
390    for (thisLIR = lastLIR; thisLIR != firstLIR; thisLIR = PREV_LIR(thisLIR)) {
391        if (thisLIR->flags.isNop ||
392            isPseudoOpCode(thisLIR->opcode) ||
393            !(EncodingMap[thisLIR->opcode].flags & IS_BRANCH)) {
394            continue;
395        } else if (thisLIR == lastLIR) {
396            dvmCompilerAppendLIR(cUnit,
397                (LIR *) delaySlotLIR(firstLIR, thisLIR));
398        } else if (NEXT_LIR(thisLIR)->opcode != kMipsNop) {
399            dvmCompilerInsertLIRAfter((LIR *) thisLIR,
400                (LIR *) delaySlotLIR(firstLIR, thisLIR));
401        }
402    }
403
404    if (!thisLIR->flags.isNop &&
405        !isPseudoOpCode(thisLIR->opcode) &&
406        EncodingMap[thisLIR->opcode].flags & IS_BRANCH) {
407        /* nothing available to move, so insert nop */
408        MipsLIR *nopLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true);
409        nopLIR->opcode = kMipsNop;
410        dvmCompilerInsertLIRAfter((LIR *) thisLIR, (LIR *) nopLIR);
411    }
412}
413
414void dvmCompilerApplyGlobalOptimizations(CompilationUnit *cUnit)
415{
416    applyRedundantBranchElimination(cUnit);
417    applyCopyPropagation(cUnit);
418#ifdef __mips_hard_float
419    mergeMovs(cUnit);
420#endif
421    introduceBranchDelaySlot(cUnit);
422}
423