local_optimizations.cc revision c32447bcc8c36ee8ff265ed678c7df86936a9ebe
1/* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "dex/compiler_internals.h" 18 19namespace art { 20 21#define DEBUG_OPT(X) 22 23/* Check RAW, WAR, and RAW dependency on the register operands */ 24#define CHECK_REG_DEP(use, def, check) (def.Intersects(*check->u.m.use_mask)) || \ 25 (use.Union(def).Intersects(*check->u.m.def_mask)) 26 27/* Scheduler heuristics */ 28#define MAX_HOIST_DISTANCE 20 29#define LDLD_DISTANCE 4 30#define LD_LATENCY 2 31 32static bool IsDalvikRegisterClobbered(LIR* lir1, LIR* lir2) { 33 int reg1Lo = DECODE_ALIAS_INFO_REG(lir1->flags.alias_info); 34 int reg1Hi = reg1Lo + DECODE_ALIAS_INFO_WIDE(lir1->flags.alias_info); 35 int reg2Lo = DECODE_ALIAS_INFO_REG(lir2->flags.alias_info); 36 int reg2Hi = reg2Lo + DECODE_ALIAS_INFO_WIDE(lir2->flags.alias_info); 37 38 return (reg1Lo == reg2Lo) || (reg1Lo == reg2Hi) || (reg1Hi == reg2Lo); 39} 40 41/* Convert a more expensive instruction (ie load) into a move */ 42void Mir2Lir::ConvertMemOpIntoMove(LIR* orig_lir, RegStorage dest, RegStorage src) { 43 /* Insert a move to replace the load */ 44 LIR* move_lir; 45 move_lir = OpRegCopyNoInsert(dest, src); 46 /* 47 * Insert the converted instruction after the original since the 48 * optimization is scannng in the top-down order and the new instruction 49 * will need to be re-checked (eg the new dest clobbers the src used in 50 * this_lir). 51 */ 52 InsertLIRAfter(orig_lir, move_lir); 53} 54 55/* 56 * Perform a pass of top-down walk, from the second-last instruction in the 57 * superblock, to eliminate redundant loads and stores. 58 * 59 * An earlier load can eliminate a later load iff 60 * 1) They are must-aliases 61 * 2) The native register is not clobbered in between 62 * 3) The memory location is not written to in between 63 * 64 * An earlier store can eliminate a later load iff 65 * 1) They are must-aliases 66 * 2) The native register is not clobbered in between 67 * 3) The memory location is not written to in between 68 * 69 * A later store can be eliminated by an earlier store iff 70 * 1) They are must-aliases 71 * 2) The memory location is not written to in between 72 */ 73void Mir2Lir::ApplyLoadStoreElimination(LIR* head_lir, LIR* tail_lir) { 74 LIR* this_lir; 75 76 if (head_lir == tail_lir) { 77 return; 78 } 79 80 for (this_lir = PREV_LIR(tail_lir); this_lir != head_lir; this_lir = PREV_LIR(this_lir)) { 81 if (IsPseudoLirOp(this_lir->opcode)) { 82 continue; 83 } 84 85 int sink_distance = 0; 86 87 uint64_t target_flags = GetTargetInstFlags(this_lir->opcode); 88 89 /* Skip non-interesting instructions */ 90 if ((this_lir->flags.is_nop == true) || 91 (target_flags & IS_BRANCH) || 92 ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || // Skip wide loads. 93 ((target_flags & (REG_USE0 | REG_USE1 | REG_USE2)) == 94 (REG_USE0 | REG_USE1 | REG_USE2)) || // Skip wide stores. 95 // Skip instructions that are neither loads or stores. 96 !(target_flags & (IS_LOAD | IS_STORE)) || 97 // Skip instructions that do both load and store. 98 ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) { 99 continue; 100 } 101 102 int native_reg_id; 103 if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { 104 // If x86, location differs depending on whether memory/reg operation. 105 native_reg_id = (target_flags & IS_STORE) ? this_lir->operands[2] : this_lir->operands[0]; 106 } else { 107 native_reg_id = this_lir->operands[0]; 108 } 109 bool is_this_lir_load = target_flags & IS_LOAD; 110 LIR* check_lir; 111 /* Use the mem mask to determine the rough memory location */ 112 ResourceMask this_mem_mask = kEncodeMem.Intersection( 113 this_lir->u.m.use_mask->Union(*this_lir->u.m.def_mask)); 114 115 /* 116 * Currently only eliminate redundant ld/st for constant and Dalvik 117 * register accesses. 118 */ 119 if (!this_mem_mask.Intersects(kEncodeLiteral.Union(kEncodeDalvikReg))) { 120 continue; 121 } 122 123 ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem); 124 125 /* 126 * Add pc to the resource mask to prevent this instruction 127 * from sinking past branch instructions. Also take out the memory 128 * region bits since stop_mask is used to check data/control 129 * dependencies. 130 * 131 * Note: on x86(-64) and Arm64 we use the IsBranch bit, as the PC is not exposed. 132 */ 133 ResourceMask pc_encoding = GetPCUseDefEncoding(); 134 if (pc_encoding == kEncodeNone) { 135 // TODO: Stop the abuse of kIsBranch as a bit specification for ResourceMask. 136 pc_encoding = ResourceMask::Bit(kIsBranch); 137 } 138 ResourceMask stop_use_reg_mask = pc_encoding.Union(*this_lir->u.m.use_mask). 139 Without(kEncodeMem); 140 141 for (check_lir = NEXT_LIR(this_lir); check_lir != tail_lir; check_lir = NEXT_LIR(check_lir)) { 142 /* 143 * Skip already dead instructions (whose dataflow information is 144 * outdated and misleading). 145 */ 146 if (check_lir->flags.is_nop || IsPseudoLirOp(check_lir->opcode)) { 147 continue; 148 } 149 150 ResourceMask check_mem_mask = kEncodeMem.Intersection( 151 check_lir->u.m.use_mask->Union(*check_lir->u.m.def_mask)); 152 ResourceMask alias_condition = this_mem_mask.Intersection(check_mem_mask); 153 bool stop_here = false; 154 155 /* 156 * Potential aliases seen - check the alias relations 157 */ 158 uint64_t check_flags = GetTargetInstFlags(check_lir->opcode); 159 // TUNING: Support instructions with multiple register targets. 160 if ((check_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) { 161 stop_here = true; 162 } else if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) { 163 bool is_check_lir_load = check_flags & IS_LOAD; 164 if (alias_condition.Equals(kEncodeLiteral)) { 165 /* 166 * Should only see literal loads in the instruction 167 * stream. 168 */ 169 DCHECK(!(check_flags & IS_STORE)); 170 /* Same value && same register type */ 171 if (check_lir->flags.alias_info == this_lir->flags.alias_info && 172 RegStorage::SameRegType(check_lir->operands[0], native_reg_id)) { 173 /* 174 * Different destination register - insert 175 * a move 176 */ 177 if (check_lir->operands[0] != native_reg_id) { 178 // TODO: update for 64-bit regs. 179 ConvertMemOpIntoMove(check_lir, RegStorage::Solo32(check_lir->operands[0]), 180 RegStorage::Solo32(native_reg_id)); 181 } 182 NopLIR(check_lir); 183 } 184 } else if (alias_condition.Equals(kEncodeDalvikReg)) { 185 /* Must alias */ 186 if (check_lir->flags.alias_info == this_lir->flags.alias_info) { 187 /* Only optimize compatible registers */ 188 bool reg_compatible = RegStorage::SameRegType(check_lir->operands[0], native_reg_id); 189 if ((is_this_lir_load && is_check_lir_load) || 190 (!is_this_lir_load && is_check_lir_load)) { 191 /* RAR or RAW */ 192 if (reg_compatible) { 193 /* 194 * Different destination register - 195 * insert a move 196 */ 197 if (check_lir->operands[0] != native_reg_id) { 198 // TODO: update for 64-bit regs. 199 ConvertMemOpIntoMove(check_lir, RegStorage::Solo32(check_lir->operands[0]), 200 RegStorage::Solo32(native_reg_id)); 201 } 202 NopLIR(check_lir); 203 } else { 204 /* 205 * Destinaions are of different types - 206 * something complicated going on so 207 * stop looking now. 208 */ 209 stop_here = true; 210 } 211 } else if (is_this_lir_load && !is_check_lir_load) { 212 /* WAR - register value is killed */ 213 stop_here = true; 214 } else if (!is_this_lir_load && !is_check_lir_load) { 215 /* WAW - nuke the earlier store */ 216 NopLIR(this_lir); 217 stop_here = true; 218 } 219 /* Partial overlap */ 220 } else if (IsDalvikRegisterClobbered(this_lir, check_lir)) { 221 /* 222 * It is actually ok to continue if check_lir 223 * is a read. But it is hard to make a test 224 * case for this so we just stop here to be 225 * conservative. 226 */ 227 stop_here = true; 228 } 229 } 230 /* Memory content may be updated. Stop looking now. */ 231 if (stop_here) { 232 break; 233 /* The check_lir has been transformed - check the next one */ 234 } else if (check_lir->flags.is_nop) { 235 continue; 236 } 237 } 238 239 240 /* 241 * this and check LIRs have no memory dependency. Now check if 242 * their register operands have any RAW, WAR, and WAW 243 * dependencies. If so, stop looking. 244 */ 245 if (stop_here == false) { 246 stop_here = CHECK_REG_DEP(stop_use_reg_mask, stop_def_reg_mask, check_lir); 247 } 248 249 if (stop_here == true) { 250 if (cu_->instruction_set == kX86 || cu_->instruction_set == kX86_64) { 251 // Prevent stores from being sunk between ops that generate ccodes and 252 // ops that use them. 253 uint64_t flags = GetTargetInstFlags(check_lir->opcode); 254 if (sink_distance > 0 && (flags & IS_BRANCH) && (flags & USES_CCODES)) { 255 check_lir = PREV_LIR(check_lir); 256 sink_distance--; 257 } 258 } 259 DEBUG_OPT(dump_dependent_insn_pair(this_lir, check_lir, "REG CLOBBERED")); 260 /* Only sink store instructions */ 261 if (sink_distance && !is_this_lir_load) { 262 LIR* new_store_lir = 263 static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); 264 *new_store_lir = *this_lir; 265 /* 266 * Stop point found - insert *before* the check_lir 267 * since the instruction list is scanned in the 268 * top-down order. 269 */ 270 InsertLIRBefore(check_lir, new_store_lir); 271 NopLIR(this_lir); 272 } 273 break; 274 } else if (!check_lir->flags.is_nop) { 275 sink_distance++; 276 } 277 } 278 } 279} 280 281/* 282 * Perform a pass of bottom-up walk, from the second instruction in the 283 * superblock, to try to hoist loads to earlier slots. 284 */ 285void Mir2Lir::ApplyLoadHoisting(LIR* head_lir, LIR* tail_lir) { 286 LIR* this_lir, *check_lir; 287 /* 288 * Store the list of independent instructions that can be hoisted past. 289 * Will decide the best place to insert later. 290 */ 291 LIR* prev_inst_list[MAX_HOIST_DISTANCE]; 292 293 /* Empty block */ 294 if (head_lir == tail_lir) { 295 return; 296 } 297 298 /* Start from the second instruction */ 299 for (this_lir = NEXT_LIR(head_lir); this_lir != tail_lir; this_lir = NEXT_LIR(this_lir)) { 300 if (IsPseudoLirOp(this_lir->opcode)) { 301 continue; 302 } 303 304 uint64_t target_flags = GetTargetInstFlags(this_lir->opcode); 305 /* Skip non-interesting instructions */ 306 if (!(target_flags & IS_LOAD) || 307 (this_lir->flags.is_nop == true) || 308 ((target_flags & (REG_DEF0 | REG_DEF1)) == (REG_DEF0 | REG_DEF1)) || 309 ((target_flags & (IS_STORE | IS_LOAD)) == (IS_STORE | IS_LOAD))) { 310 continue; 311 } 312 313 ResourceMask stop_use_all_mask = *this_lir->u.m.use_mask; 314 315 /* 316 * Branches for null/range checks are marked with the true resource 317 * bits, and loads to Dalvik registers, constant pools, and non-alias 318 * locations are safe to be hoisted. So only mark the heap references 319 * conservatively here. 320 * 321 * Note: on x86(-64) and Arm64 this will add kEncodeNone. 322 * TODO: Sanity check. LoadStoreElimination uses kBranchBit to fake a PC. 323 */ 324 if (stop_use_all_mask.HasBit(ResourceMask::kHeapRef)) { 325 stop_use_all_mask.SetBits(GetPCUseDefEncoding()); 326 } 327 328 /* Similar as above, but just check for pure register dependency */ 329 ResourceMask stop_use_reg_mask = stop_use_all_mask.Without(kEncodeMem); 330 ResourceMask stop_def_reg_mask = this_lir->u.m.def_mask->Without(kEncodeMem); 331 332 int next_slot = 0; 333 bool stop_here = false; 334 335 /* Try to hoist the load to a good spot */ 336 for (check_lir = PREV_LIR(this_lir); check_lir != head_lir; check_lir = PREV_LIR(check_lir)) { 337 /* 338 * Skip already dead instructions (whose dataflow information is 339 * outdated and misleading). 340 */ 341 if (check_lir->flags.is_nop) { 342 continue; 343 } 344 345 ResourceMask check_mem_mask = check_lir->u.m.def_mask->Intersection(kEncodeMem); 346 ResourceMask alias_condition = stop_use_all_mask.Intersection(check_mem_mask); 347 stop_here = false; 348 349 /* Potential WAR alias seen - check the exact relation */ 350 if (!check_mem_mask.Equals(kEncodeMem) && !alias_condition.Equals(kEncodeNone)) { 351 /* We can fully disambiguate Dalvik references */ 352 if (alias_condition.Equals(kEncodeDalvikReg)) { 353 /* Must alias or partially overlap */ 354 if ((check_lir->flags.alias_info == this_lir->flags.alias_info) || 355 IsDalvikRegisterClobbered(this_lir, check_lir)) { 356 stop_here = true; 357 } 358 /* Conservatively treat all heap refs as may-alias */ 359 } else { 360 DCHECK(alias_condition.Equals(kEncodeHeapRef)); 361 stop_here = true; 362 } 363 /* Memory content may be updated. Stop looking now. */ 364 if (stop_here) { 365 prev_inst_list[next_slot++] = check_lir; 366 break; 367 } 368 } 369 370 if (stop_here == false) { 371 stop_here = CHECK_REG_DEP(stop_use_reg_mask, stop_def_reg_mask, 372 check_lir); 373 } 374 375 /* 376 * Store the dependent or non-pseudo/indepedent instruction to the 377 * list. 378 */ 379 if (stop_here || !IsPseudoLirOp(check_lir->opcode)) { 380 prev_inst_list[next_slot++] = check_lir; 381 if (next_slot == MAX_HOIST_DISTANCE) { 382 break; 383 } 384 } 385 386 /* Found a new place to put the load - move it here */ 387 if (stop_here == true) { 388 DEBUG_OPT(dump_dependent_insn_pair(check_lir, this_lir "HOIST STOP")); 389 break; 390 } 391 } 392 393 /* 394 * Reached the top - use head_lir as the dependent marker as all labels 395 * are barriers. 396 */ 397 if (stop_here == false && next_slot < MAX_HOIST_DISTANCE) { 398 prev_inst_list[next_slot++] = head_lir; 399 } 400 401 /* 402 * At least one independent instruction is found. Scan in the reversed 403 * direction to find a beneficial slot. 404 */ 405 if (next_slot >= 2) { 406 int first_slot = next_slot - 2; 407 int slot; 408 LIR* dep_lir = prev_inst_list[next_slot-1]; 409 /* If there is ld-ld dependency, wait LDLD_DISTANCE cycles */ 410 if (!IsPseudoLirOp(dep_lir->opcode) && 411 (GetTargetInstFlags(dep_lir->opcode) & IS_LOAD)) { 412 first_slot -= LDLD_DISTANCE; 413 } 414 /* 415 * Make sure we check slot >= 0 since first_slot may be negative 416 * when the loop is first entered. 417 */ 418 for (slot = first_slot; slot >= 0; slot--) { 419 LIR* cur_lir = prev_inst_list[slot]; 420 LIR* prev_lir = prev_inst_list[slot+1]; 421 422 /* Check the highest instruction */ 423 if (prev_lir->u.m.def_mask->Equals(kEncodeAll)) { 424 /* 425 * If the first instruction is a load, don't hoist anything 426 * above it since it is unlikely to be beneficial. 427 */ 428 if (GetTargetInstFlags(cur_lir->opcode) & IS_LOAD) { 429 continue; 430 } 431 /* 432 * If the remaining number of slots is less than LD_LATENCY, 433 * insert the hoisted load here. 434 */ 435 if (slot < LD_LATENCY) { 436 break; 437 } 438 } 439 440 // Don't look across a barrier label 441 if ((prev_lir->opcode == kPseudoTargetLabel) || 442 (prev_lir->opcode == kPseudoSafepointPC) || 443 (prev_lir->opcode == kPseudoBarrier)) { 444 break; 445 } 446 447 /* 448 * Try to find two instructions with load/use dependency until 449 * the remaining instructions are less than LD_LATENCY. 450 */ 451 bool prev_is_load = IsPseudoLirOp(prev_lir->opcode) ? false : 452 (GetTargetInstFlags(prev_lir->opcode) & IS_LOAD); 453 if ((prev_is_load && (cur_lir->u.m.use_mask->Intersects(*prev_lir->u.m.def_mask))) || 454 (slot < LD_LATENCY)) { 455 break; 456 } 457 } 458 459 /* Found a slot to hoist to */ 460 if (slot >= 0) { 461 LIR* cur_lir = prev_inst_list[slot]; 462 LIR* new_load_lir = 463 static_cast<LIR*>(arena_->Alloc(sizeof(LIR), kArenaAllocLIR)); 464 *new_load_lir = *this_lir; 465 /* 466 * Insertion is guaranteed to succeed since check_lir 467 * is never the first LIR on the list 468 */ 469 InsertLIRBefore(cur_lir, new_load_lir); 470 NopLIR(this_lir); 471 } 472 } 473 } 474} 475 476void Mir2Lir::ApplyLocalOptimizations(LIR* head_lir, LIR* tail_lir) { 477 if (!(cu_->disable_opt & (1 << kLoadStoreElimination))) { 478 ApplyLoadStoreElimination(head_lir, tail_lir); 479 } 480 if (!(cu_->disable_opt & (1 << kLoadHoisting))) { 481 ApplyLoadHoisting(head_lir, tail_lir); 482 } 483} 484 485} // namespace art 486