LocalOptimizations.cpp revision bd1b0d74bbf40c69e47863fab7ea99eac418c515
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "Dalvik.h"
18#include "vm/compiler/CompilerInternals.h"
19#include "MipsLIR.h"
20#include "Codegen.h"
21
22#define DEBUG_OPT(X)
23
24/* Check RAW, WAR, and WAR dependency on the register operands */
25#define CHECK_REG_DEP(use, def, check) ((def & check->useMask) || \
26                                        ((use | def) & check->defMask))
27
28/* Scheduler heuristics */
29#define MAX_HOIST_DISTANCE 20
30#define LDLD_DISTANCE 4
31#define LD_LATENCY 2
32
33static inline bool isDalvikRegisterClobbered(MipsLIR *lir1, MipsLIR *lir2)
34{
35    int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->aliasInfo);
36    int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->aliasInfo);
37    int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->aliasInfo);
38    int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->aliasInfo);
39
40    return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo);
41}
42
43#if 0
44/* Debugging utility routine */
45static void dumpDependentInsnPair(MipsLIR *thisLIR, MipsLIR *checkLIR,
46                                  const char *optimization)
47{
48    LOGD("************ %s ************", optimization);
49    dvmDumpLIRInsn((LIR *) thisLIR, 0);
50    dvmDumpLIRInsn((LIR *) checkLIR, 0);
51}
52#endif
53
54/* Convert a more expensive instruction (ie load) into a move */
55static void convertMemOpIntoMove(CompilationUnit *cUnit, MipsLIR *origLIR,
56                                 int dest, int src)
57{
58    /* Insert a move to replace the load */
59    MipsLIR *moveLIR;
60    moveLIR = dvmCompilerRegCopyNoInsert( cUnit, dest, src);
61    /*
62     * Insert the converted instruction after the original since the
63     * optimization is scannng in the top-down order and the new instruction
64     * will need to be re-checked (eg the new dest clobbers the src used in
65     * thisLIR).
66     */
67    dvmCompilerInsertLIRAfter((LIR *) origLIR, (LIR *) moveLIR);
68}
69
70/*
71 * Perform a pass of top-down walk, from the second-last instruction in the
72 * superblock, to eliminate redundant loads and stores.
73 *
74 * An earlier load can eliminate a later load iff
75 *   1) They are must-aliases
76 *   2) The native register is not clobbered in between
77 *   3) The memory location is not written to in between
78 *
79 * An earlier store can eliminate a later load iff
80 *   1) They are must-aliases
81 *   2) The native register is not clobbered in between
82 *   3) The memory location is not written to in between
83 *
84 * A later store can be eliminated by an earlier store iff
85 *   1) They are must-aliases
86 *   2) The memory location is not written to in between
87 */
88static void applyLoadStoreElimination(CompilationUnit *cUnit,
89                                      MipsLIR *headLIR,
90                                      MipsLIR *tailLIR)
91{
92    MipsLIR *thisLIR;
93
94    if (headLIR == tailLIR) return;
95
96    for (thisLIR = PREV_LIR(tailLIR);
97         thisLIR != headLIR;
98         thisLIR = PREV_LIR(thisLIR)) {
99        int sinkDistance = 0;
100
101        /* Skip non-interesting instructions */
102        if ((thisLIR->flags.isNop == true) ||
103            isPseudoOpCode(thisLIR->opcode) ||
104            !(EncodingMap[thisLIR->opcode].flags & (IS_LOAD | IS_STORE))) {
105            continue;
106        }
107
108        int nativeRegId = thisLIR->operands[0];
109        bool isThisLIRLoad = EncodingMap[thisLIR->opcode].flags & IS_LOAD;
110        MipsLIR *checkLIR;
111        /* Use the mem mask to determine the rough memory location */
112        u8 thisMemMask = (thisLIR->useMask | thisLIR->defMask) & ENCODE_MEM;
113
114        /*
115         * Currently only eliminate redundant ld/st for constant and Dalvik
116         * register accesses.
117         */
118        if (!(thisMemMask & (ENCODE_LITERAL | ENCODE_DALVIK_REG))) continue;
119
120        /*
121         * Add r15 (pc) to the resource mask to prevent this instruction
122         * from sinking past branch instructions. Also take out the memory
123         * region bits since stopMask is used to check data/control
124         * dependencies.
125         */
126        u8 stopUseRegMask = (ENCODE_REG_PC | thisLIR->useMask) &
127                            ~ENCODE_MEM;
128        u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
129
130        for (checkLIR = NEXT_LIR(thisLIR);
131             checkLIR != tailLIR;
132             checkLIR = NEXT_LIR(checkLIR)) {
133
134            /*
135             * Skip already dead instructions (whose dataflow information is
136             * outdated and misleading).
137             */
138            if (checkLIR->flags.isNop) continue;
139
140            u8 checkMemMask = (checkLIR->useMask | checkLIR->defMask) &
141                              ENCODE_MEM;
142            u8 aliasCondition = thisMemMask & checkMemMask;
143            bool stopHere = false;
144
145            /*
146             * Potential aliases seen - check the alias relations
147             */
148            if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
149                bool isCheckLIRLoad = EncodingMap[checkLIR->opcode].flags &
150                                      IS_LOAD;
151                if  (aliasCondition == ENCODE_LITERAL) {
152                    /*
153                     * Should only see literal loads in the instruction
154                     * stream.
155                     */
156                    assert(!(EncodingMap[checkLIR->opcode].flags &
157                             IS_STORE));
158                    /* Same value && same register type */
159                    if (checkLIR->aliasInfo == thisLIR->aliasInfo &&
160                        REGTYPE(checkLIR->operands[0]) == REGTYPE(nativeRegId)){
161                        /*
162                         * Different destination register - insert
163                         * a move
164                         */
165                        if (checkLIR->operands[0] != nativeRegId) {
166                            convertMemOpIntoMove(cUnit, checkLIR,
167                                                 checkLIR->operands[0],
168                                                 nativeRegId);
169                        }
170                        checkLIR->flags.isNop = true;
171                    }
172                } else if (aliasCondition == ENCODE_DALVIK_REG) {
173                    /* Must alias */
174                    if (checkLIR->aliasInfo == thisLIR->aliasInfo) {
175                        /* Only optimize compatible registers */
176                        bool regCompatible =
177                            REGTYPE(checkLIR->operands[0]) ==
178                            REGTYPE(nativeRegId);
179                        if ((isThisLIRLoad && isCheckLIRLoad) ||
180                            (!isThisLIRLoad && isCheckLIRLoad)) {
181                            /* RAR or RAW */
182                            if (regCompatible) {
183                                /*
184                                 * Different destination register -
185                                 * insert a move
186                                 */
187                                if (checkLIR->operands[0] !=
188                                    nativeRegId) {
189                                    convertMemOpIntoMove(cUnit,
190                                                 checkLIR,
191                                                 checkLIR->operands[0],
192                                                 nativeRegId);
193                                }
194                                checkLIR->flags.isNop = true;
195                            } else {
196                                /*
197                                 * Destinaions are of different types -
198                                 * something complicated going on so
199                                 * stop looking now.
200                                 */
201                                stopHere = true;
202                            }
203                        } else if (isThisLIRLoad && !isCheckLIRLoad) {
204                            /* WAR - register value is killed */
205                            stopHere = true;
206                        } else if (!isThisLIRLoad && !isCheckLIRLoad) {
207                            /* WAW - nuke the earlier store */
208                            thisLIR->flags.isNop = true;
209                            stopHere = true;
210                        }
211                    /* Partial overlap */
212                    } else if (isDalvikRegisterClobbered(thisLIR, checkLIR)) {
213                        /*
214                         * It is actually ok to continue if checkLIR
215                         * is a read. But it is hard to make a test
216                         * case for this so we just stop here to be
217                         * conservative.
218                         */
219                        stopHere = true;
220                    }
221                }
222                /* Memory content may be updated. Stop looking now. */
223                if (stopHere) {
224                    break;
225                /* The checkLIR has been transformed - check the next one */
226                } else if (checkLIR->flags.isNop) {
227                    continue;
228                }
229            }
230
231
232            /*
233             * this and check LIRs have no memory dependency. Now check if
234             * their register operands have any RAW, WAR, and WAW
235             * dependencies. If so, stop looking.
236             */
237            if (stopHere == false) {
238                stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
239                                         checkLIR);
240            }
241
242            if (stopHere == true) {
243                DEBUG_OPT(dumpDependentInsnPair(thisLIR, checkLIR,
244                                                "REG CLOBBERED"));
245                /* Only sink store instructions */
246                if (sinkDistance && !isThisLIRLoad) {
247                    MipsLIR *newStoreLIR =
248                        (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR), true);
249                    *newStoreLIR = *thisLIR;
250                    /*
251                     * Stop point found - insert *before* the checkLIR
252                     * since the instruction list is scanned in the
253                     * top-down order.
254                     */
255                    dvmCompilerInsertLIRBefore((LIR *) checkLIR,
256                                               (LIR *) newStoreLIR);
257                    thisLIR->flags.isNop = true;
258                }
259                break;
260            } else if (!checkLIR->flags.isNop) {
261                sinkDistance++;
262            }
263        }
264    }
265}
266
267/*
268 * Perform a pass of bottom-up walk, from the second instruction in the
269 * superblock, to try to hoist loads to earlier slots.
270 */
271static void applyLoadHoisting(CompilationUnit *cUnit,
272                              MipsLIR *headLIR,
273                              MipsLIR *tailLIR)
274{
275    MipsLIR *thisLIR, *checkLIR;
276    /*
277     * Store the list of independent instructions that can be hoisted past.
278     * Will decide the best place to insert later.
279     */
280    MipsLIR *prevInstList[MAX_HOIST_DISTANCE];
281
282    /* Empty block */
283    if (headLIR == tailLIR) return;
284
285    /* Start from the second instruction */
286    for (thisLIR = NEXT_LIR(headLIR);
287         thisLIR != tailLIR;
288         thisLIR = NEXT_LIR(thisLIR)) {
289
290        /* Skip non-interesting instructions */
291        if ((thisLIR->flags.isNop == true) ||
292            isPseudoOpCode(thisLIR->opcode) ||
293            !(EncodingMap[thisLIR->opcode].flags & IS_LOAD)) {
294            continue;
295        }
296
297        u8 stopUseAllMask = thisLIR->useMask;
298
299        /*
300         * Branches for null/range checks are marked with the true resource
301         * bits, and loads to Dalvik registers, constant pools, and non-alias
302         * locations are safe to be hoisted. So only mark the heap references
303         * conservatively here.
304         */
305        if (stopUseAllMask & ENCODE_HEAP_REF) {
306            stopUseAllMask |= ENCODE_REG_PC;
307        }
308
309        /* Similar as above, but just check for pure register dependency */
310        u8 stopUseRegMask = stopUseAllMask & ~ENCODE_MEM;
311        u8 stopDefRegMask = thisLIR->defMask & ~ENCODE_MEM;
312
313        int nextSlot = 0;
314        bool stopHere = false;
315
316        /* Try to hoist the load to a good spot */
317        for (checkLIR = PREV_LIR(thisLIR);
318             checkLIR != headLIR;
319             checkLIR = PREV_LIR(checkLIR)) {
320
321            /*
322             * Skip already dead instructions (whose dataflow information is
323             * outdated and misleading).
324             */
325            if (checkLIR->flags.isNop) continue;
326
327            u8 checkMemMask = checkLIR->defMask & ENCODE_MEM;
328            u8 aliasCondition = stopUseAllMask & checkMemMask;
329            stopHere = false;
330
331            /* Potential WAR alias seen - check the exact relation */
332            if (checkMemMask != ENCODE_MEM && aliasCondition != 0) {
333                /* We can fully disambiguate Dalvik references */
334                if (aliasCondition == ENCODE_DALVIK_REG) {
335                    /* Must alias or partually overlap */
336                    if ((checkLIR->aliasInfo == thisLIR->aliasInfo) ||
337                        isDalvikRegisterClobbered(thisLIR, checkLIR)) {
338                        stopHere = true;
339                    }
340                /* Conservatively treat all heap refs as may-alias */
341                } else {
342                    assert(aliasCondition == ENCODE_HEAP_REF);
343                    stopHere = true;
344                }
345                /* Memory content may be updated. Stop looking now. */
346                if (stopHere) {
347                    prevInstList[nextSlot++] = checkLIR;
348                    break;
349                }
350            }
351
352            if (stopHere == false) {
353                stopHere = CHECK_REG_DEP(stopUseRegMask, stopDefRegMask,
354                                         checkLIR);
355            }
356
357            /*
358             * Store the dependent or non-pseudo/indepedent instruction to the
359             * list.
360             */
361            if (stopHere || !isPseudoOpCode(checkLIR->opcode)) {
362                prevInstList[nextSlot++] = checkLIR;
363                if (nextSlot == MAX_HOIST_DISTANCE) break;
364            }
365
366            /* Found a new place to put the load - move it here */
367            if (stopHere == true) {
368                DEBUG_OPT(dumpDependentInsnPair(checkLIR, thisLIR
369                                                "HOIST STOP"));
370                break;
371            }
372        }
373
374        /*
375         * Reached the top - use headLIR as the dependent marker as all labels
376         * are barriers.
377         */
378        if (stopHere == false && nextSlot < MAX_HOIST_DISTANCE) {
379            prevInstList[nextSlot++] = headLIR;
380        }
381
382        /*
383         * At least one independent instruction is found. Scan in the reversed
384         * direction to find a beneficial slot.
385         */
386        if (nextSlot >= 2) {
387            int firstSlot = nextSlot - 2;
388            int slot;
389            MipsLIR *depLIR = prevInstList[nextSlot-1];
390            /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */
391            if (!isPseudoOpCode(depLIR->opcode) &&
392                (EncodingMap[depLIR->opcode].flags & IS_LOAD)) {
393                firstSlot -= LDLD_DISTANCE;
394            }
395            /*
396             * Make sure we check slot >= 0 since firstSlot may be negative
397             * when the loop is first entered.
398             */
399            for (slot = firstSlot; slot >= 0; slot--) {
400                MipsLIR *curLIR = prevInstList[slot];
401                MipsLIR *prevLIR = prevInstList[slot+1];
402
403                /*
404                 * Check the highest instruction.
405                 * ENCODE_ALL represents a scheduling barrier.
406                 */
407                if (prevLIR->defMask == ENCODE_ALL) {
408                    /*
409                     * If the first instruction is a load, don't hoist anything
410                     * above it since it is unlikely to be beneficial.
411                     */
412                    if (EncodingMap[curLIR->opcode].flags & IS_LOAD) continue;
413                    /*
414                     * Need to unconditionally break here even if the hoisted
415                     * distance is greater than LD_LATENCY (ie more than enough
416                     * cycles are inserted to hide the load latency) since theu
417                     * subsequent code doesn't expect to compare against a
418                     * pseudo opcode (whose opcode value is negative).
419                     */
420                    break;
421                }
422
423                /*
424                 * NOTE: now prevLIR is guaranteed to be a non-pseudo
425                 * instruction (ie accessing EncodingMap[prevLIR->opcode] is
426                 * safe).
427                 *
428                 * Try to find two instructions with load/use dependency until
429                 * the remaining instructions are less than LD_LATENCY.
430                 */
431                if (((curLIR->useMask & prevLIR->defMask) &&
432                     (EncodingMap[prevLIR->opcode].flags & IS_LOAD)) ||
433                    (slot < LD_LATENCY)) {
434                    break;
435                }
436            }
437
438            /* Found a slot to hoist to */
439            if (slot >= 0) {
440                MipsLIR *curLIR = prevInstList[slot];
441                MipsLIR *newLoadLIR = (MipsLIR *) dvmCompilerNew(sizeof(MipsLIR),
442                                                               true);
443                *newLoadLIR = *thisLIR;
444                /*
445                 * Insertion is guaranteed to succeed since checkLIR
446                 * is never the first LIR on the list
447                 */
448                dvmCompilerInsertLIRBefore((LIR *) curLIR,
449                                           (LIR *) newLoadLIR);
450                thisLIR->flags.isNop = true;
451            }
452        }
453    }
454}
455
456void dvmCompilerApplyLocalOptimizations(CompilationUnit *cUnit, LIR *headLIR,
457                                        LIR *tailLIR)
458{
459    if (!(gDvmJit.disableOpt & (1 << kLoadStoreElimination))) {
460        applyLoadStoreElimination(cUnit, (MipsLIR *) headLIR,
461                                  (MipsLIR *) tailLIR);
462    }
463    if (!(gDvmJit.disableOpt & (1 << kLoadHoisting))) {
464        applyLoadHoisting(cUnit, (MipsLIR *) headLIR, (MipsLIR *) tailLIR);
465    }
466}
467