X86FloatingPoint.cpp revision 100c267249d1d03c4f96eede9877a4f9f54f2247
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the pass which converts floating point instructions from 11// pseudo registers into register stack instructions. This pass uses live 12// variable information to indicate where the FPn registers are used and their 13// lifetimes. 14// 15// The x87 hardware tracks liveness of the stack registers, so it is necessary 16// to implement exact liveness tracking between basic blocks. The CFG edges are 17// partitioned into bundles where the same FP registers must be live in 18// identical stack positions. Instructions are inserted at the end of each basic 19// block to rearrange the live registers to match the outgoing bundle. 20// 21// This approach avoids splitting critical edges at the potential cost of more 22// live register shuffling instructions when critical edges are present. 23// 24//===----------------------------------------------------------------------===// 25 26#define DEBUG_TYPE "x86-codegen" 27#include "X86.h" 28#include "X86InstrInfo.h" 29#include "llvm/ADT/DepthFirstIterator.h" 30#include "llvm/ADT/DenseMap.h" 31#include "llvm/ADT/SmallPtrSet.h" 32#include "llvm/ADT/SmallVector.h" 33#include "llvm/ADT/Statistic.h" 34#include "llvm/ADT/STLExtras.h" 35#include "llvm/CodeGen/MachineFunctionPass.h" 36#include "llvm/CodeGen/MachineInstrBuilder.h" 37#include "llvm/CodeGen/MachineRegisterInfo.h" 38#include "llvm/CodeGen/Passes.h" 39#include "llvm/Support/Debug.h" 40#include "llvm/Support/ErrorHandling.h" 41#include "llvm/Support/raw_ostream.h" 42#include "llvm/Target/TargetInstrInfo.h" 43#include "llvm/Target/TargetMachine.h" 44#include <algorithm> 45using namespace llvm; 46 47STATISTIC(NumFXCH, "Number of fxch instructions inserted"); 48STATISTIC(NumFP , "Number of floating point instructions"); 49 50namespace { 51 struct FPS : public MachineFunctionPass { 52 static char ID; 53 FPS() : MachineFunctionPass(ID) { 54 // This is really only to keep valgrind quiet. 55 // The logic in isLive() is too much for it. 56 memset(Stack, 0, sizeof(Stack)); 57 memset(RegMap, 0, sizeof(RegMap)); 58 } 59 60 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 61 AU.setPreservesCFG(); 62 AU.addPreservedID(MachineLoopInfoID); 63 AU.addPreservedID(MachineDominatorsID); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 virtual bool runOnMachineFunction(MachineFunction &MF); 68 69 virtual const char *getPassName() const { return "X86 FP Stackifier"; } 70 71 private: 72 const TargetInstrInfo *TII; // Machine instruction info. 73 74 // Two CFG edges are related if they leave the same block, or enter the same 75 // block. The transitive closure of an edge under this relation is a 76 // LiveBundle. It represents a set of CFG edges where the live FP stack 77 // registers must be allocated identically in the x87 stack. 78 // 79 // A LiveBundle is usually all the edges leaving a block, or all the edges 80 // entering a block, but it can contain more edges if critical edges are 81 // present. 82 // 83 // The set of live FP registers in a LiveBundle is calculated by bundleCFG, 84 // but the exact mapping of FP registers to stack slots is fixed later. 85 struct LiveBundle { 86 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. 87 unsigned Mask; 88 89 // Number of pre-assigned live registers in FixStack. This is 0 when the 90 // stack order has not yet been fixed. 91 unsigned FixCount; 92 93 // Assigned stack order for live-in registers. 94 // FixStack[i] == getStackEntry(i) for all i < FixCount. 95 unsigned char FixStack[8]; 96 97 LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {} 98 99 // Have the live registers been assigned a stack order yet? 100 bool isFixed() const { return !Mask || FixCount; } 101 }; 102 103 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges 104 // with no live FP registers. 105 SmallVector<LiveBundle, 8> LiveBundles; 106 107 // Map each MBB in the current function to an (ingoing, outgoing) index into 108 // LiveBundles. Blocks with no FP registers live in or out map to (0, 0) 109 // and are not actually stored in the map. 110 DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle; 111 112 // Return a bitmask of FP registers in block's live-in list. 113 unsigned calcLiveInMask(MachineBasicBlock *MBB) { 114 unsigned Mask = 0; 115 for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), 116 E = MBB->livein_end(); I != E; ++I) { 117 unsigned Reg = *I - X86::FP0; 118 if (Reg < 8) 119 Mask |= 1 << Reg; 120 } 121 return Mask; 122 } 123 124 // Partition all the CFG edges into LiveBundles. 125 void bundleCFG(MachineFunction &MF); 126 127 MachineBasicBlock *MBB; // Current basic block 128 unsigned Stack[8]; // FP<n> Registers in each stack slot... 129 unsigned RegMap[8]; // Track which stack slot contains each register 130 unsigned StackTop; // The current top of the FP stack. 131 132 // Set up our stack model to match the incoming registers to MBB. 133 void setupBlockStack(); 134 135 // Shuffle live registers to match the expectations of successor blocks. 136 void finishBlockStack(); 137 138 void dumpStack() const { 139 dbgs() << "Stack contents:"; 140 for (unsigned i = 0; i != StackTop; ++i) { 141 dbgs() << " FP" << Stack[i]; 142 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); 143 } 144 dbgs() << "\n"; 145 } 146 147 /// getSlot - Return the stack slot number a particular register number is 148 /// in. 149 unsigned getSlot(unsigned RegNo) const { 150 assert(RegNo < 8 && "Regno out of range!"); 151 return RegMap[RegNo]; 152 } 153 154 /// isLive - Is RegNo currently live in the stack? 155 bool isLive(unsigned RegNo) const { 156 unsigned Slot = getSlot(RegNo); 157 return Slot < StackTop && Stack[Slot] == RegNo; 158 } 159 160 /// getScratchReg - Return an FP register that is not currently in use. 161 unsigned getScratchReg() { 162 for (int i = 7; i >= 0; --i) 163 if (!isLive(i)) 164 return i; 165 llvm_unreachable("Ran out of scratch FP registers"); 166 } 167 168 /// getStackEntry - Return the X86::FP<n> register in register ST(i). 169 unsigned getStackEntry(unsigned STi) const { 170 if (STi >= StackTop) 171 report_fatal_error("Access past stack top!"); 172 return Stack[StackTop-1-STi]; 173 } 174 175 /// getSTReg - Return the X86::ST(i) register which contains the specified 176 /// FP<RegNo> register. 177 unsigned getSTReg(unsigned RegNo) const { 178 return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; 179 } 180 181 // pushReg - Push the specified FP<n> register onto the stack. 182 void pushReg(unsigned Reg) { 183 assert(Reg < 8 && "Register number out of range!"); 184 if (StackTop >= 8) 185 report_fatal_error("Stack overflow!"); 186 Stack[StackTop] = Reg; 187 RegMap[Reg] = StackTop++; 188 } 189 190 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } 191 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { 192 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 193 if (isAtTop(RegNo)) return; 194 195 unsigned STReg = getSTReg(RegNo); 196 unsigned RegOnTop = getStackEntry(0); 197 198 // Swap the slots the regs are in. 199 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 200 201 // Swap stack slot contents. 202 if (RegMap[RegOnTop] >= StackTop) 203 report_fatal_error("Access past stack top!"); 204 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 205 206 // Emit an fxch to update the runtime processors version of the state. 207 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); 208 ++NumFXCH; 209 } 210 211 void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { 212 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 213 unsigned STReg = getSTReg(RegNo); 214 pushReg(AsReg); // New register on top of stack 215 216 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); 217 } 218 219 /// popStackAfter - Pop the current value off of the top of the FP stack 220 /// after the specified instruction. 221 void popStackAfter(MachineBasicBlock::iterator &I); 222 223 /// freeStackSlotAfter - Free the specified register from the register 224 /// stack, so that it is no longer in a register. If the register is 225 /// currently at the top of the stack, we just pop the current instruction, 226 /// otherwise we store the current top-of-stack into the specified slot, 227 /// then pop the top of stack. 228 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); 229 230 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted 231 /// instruction. 232 MachineBasicBlock::iterator 233 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); 234 235 /// Adjust the live registers to be the set in Mask. 236 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); 237 238 /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is 239 /// st(0), FP reg FixStack[1] is st(1) etc. 240 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, 241 MachineBasicBlock::iterator I); 242 243 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 244 245 void handleZeroArgFP(MachineBasicBlock::iterator &I); 246 void handleOneArgFP(MachineBasicBlock::iterator &I); 247 void handleOneArgFPRW(MachineBasicBlock::iterator &I); 248 void handleTwoArgFP(MachineBasicBlock::iterator &I); 249 void handleCompareFP(MachineBasicBlock::iterator &I); 250 void handleCondMovFP(MachineBasicBlock::iterator &I); 251 void handleSpecialFP(MachineBasicBlock::iterator &I); 252 253 bool translateCopy(MachineInstr*); 254 }; 255 char FPS::ID = 0; 256} 257 258FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } 259 260/// getFPReg - Return the X86::FPx register number for the specified operand. 261/// For example, this returns 3 for X86::FP3. 262static unsigned getFPReg(const MachineOperand &MO) { 263 assert(MO.isReg() && "Expected an FP register!"); 264 unsigned Reg = MO.getReg(); 265 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); 266 return Reg - X86::FP0; 267} 268 269/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP 270/// register references into FP stack references. 271/// 272bool FPS::runOnMachineFunction(MachineFunction &MF) { 273 // We only need to run this pass if there are any FP registers used in this 274 // function. If it is all integer, there is nothing for us to do! 275 bool FPIsUsed = false; 276 277 assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); 278 for (unsigned i = 0; i <= 6; ++i) 279 if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { 280 FPIsUsed = true; 281 break; 282 } 283 284 // Early exit. 285 if (!FPIsUsed) return false; 286 287 TII = MF.getTarget().getInstrInfo(); 288 289 // Prepare cross-MBB liveness. 290 bundleCFG(MF); 291 292 StackTop = 0; 293 294 // Process the function in depth first order so that we process at least one 295 // of the predecessors for every reachable block in the function. 296 SmallPtrSet<MachineBasicBlock*, 8> Processed; 297 MachineBasicBlock *Entry = MF.begin(); 298 299 bool Changed = false; 300 for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> > 301 I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed); 302 I != E; ++I) 303 Changed |= processBasicBlock(MF, **I); 304 305 // Process any unreachable blocks in arbitrary order now. 306 if (MF.size() != Processed.size()) 307 for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) 308 if (Processed.insert(BB)) 309 Changed |= processBasicBlock(MF, *BB); 310 311 BlockBundle.clear(); 312 LiveBundles.clear(); 313 314 return Changed; 315} 316 317/// bundleCFG - Scan all the basic blocks to determine consistent live-in and 318/// live-out sets for the FP registers. Consistent means that the set of 319/// registers live-out from a block is identical to the live-in set of all 320/// successors. This is not enforced by the normal live-in lists since 321/// registers may be implicitly defined, or not used by all successors. 322void FPS::bundleCFG(MachineFunction &MF) { 323 assert(LiveBundles.empty() && "Stale data in LiveBundles"); 324 assert(BlockBundle.empty() && "Stale data in BlockBundle"); 325 SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp; 326 327 // LiveBundle[0] is the empty live-in set. 328 LiveBundles.resize(1); 329 330 // First gather the actual live-in masks for all MBBs. 331 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 332 MachineBasicBlock *MBB = I; 333 const unsigned Mask = calcLiveInMask(MBB); 334 if (!Mask) 335 continue; 336 // Ingoing bundle index. 337 unsigned &Idx = BlockBundle[MBB].first; 338 // Already assigned an ingoing bundle? 339 if (Idx) 340 continue; 341 // Allocate a new LiveBundle struct for this block's live-ins. 342 const unsigned BundleIdx = Idx = LiveBundles.size(); 343 DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#" 344 << MBB->getNumber()); 345 LiveBundles.push_back(Mask); 346 LiveBundle &Bundle = LiveBundles.back(); 347 348 // Make sure all predecessors have the same live-out set. 349 PropUp.insert(MBB); 350 351 // Keep pushing liveness up and down the CFG until convergence. 352 // Only critical edges cause iteration here, but when they do, multiple 353 // blocks can be assigned to the same LiveBundle index. 354 do { 355 // Assign BundleIdx as liveout from predecessors in PropUp. 356 for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(), 357 E = PropUp.end(); I != E; ++I) { 358 MachineBasicBlock *MBB = *I; 359 for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(), 360 LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) { 361 MachineBasicBlock *PredMBB = *LinkI; 362 // PredMBB's liveout bundle should be set to LIIdx. 363 unsigned &Idx = BlockBundle[PredMBB].second; 364 if (Idx) { 365 assert(Idx == BundleIdx && "Inconsistent CFG"); 366 continue; 367 } 368 Idx = BundleIdx; 369 DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber()); 370 // Propagate to siblings. 371 if (PredMBB->succ_size() > 1) 372 PropDown.insert(PredMBB); 373 } 374 } 375 PropUp.clear(); 376 377 // Assign BundleIdx as livein to successors in PropDown. 378 for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(), 379 E = PropDown.end(); I != E; ++I) { 380 MachineBasicBlock *MBB = *I; 381 for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(), 382 LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) { 383 MachineBasicBlock *SuccMBB = *LinkI; 384 // LinkMBB's livein bundle should be set to BundleIdx. 385 unsigned &Idx = BlockBundle[SuccMBB].first; 386 if (Idx) { 387 assert(Idx == BundleIdx && "Inconsistent CFG"); 388 continue; 389 } 390 Idx = BundleIdx; 391 DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber()); 392 // Propagate to siblings. 393 if (SuccMBB->pred_size() > 1) 394 PropUp.insert(SuccMBB); 395 // Also accumulate the bundle liveness mask from the liveins here. 396 Bundle.Mask |= calcLiveInMask(SuccMBB); 397 } 398 } 399 PropDown.clear(); 400 } while (!PropUp.empty()); 401 DEBUG({ 402 dbgs() << " live:"; 403 for (unsigned i = 0; i < 8; ++i) 404 if (Bundle.Mask & (1<<i)) 405 dbgs() << " %FP" << i; 406 dbgs() << '\n'; 407 }); 408 } 409} 410 411/// processBasicBlock - Loop over all of the instructions in the basic block, 412/// transforming FP instructions into their stack form. 413/// 414bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { 415 bool Changed = false; 416 MBB = &BB; 417 418 setupBlockStack(); 419 420 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 421 MachineInstr *MI = I; 422 uint64_t Flags = MI->getDesc().TSFlags; 423 424 unsigned FPInstClass = Flags & X86II::FPTypeMask; 425 if (MI->isInlineAsm()) 426 FPInstClass = X86II::SpecialFP; 427 428 if (MI->isCopy() && translateCopy(MI)) 429 FPInstClass = X86II::SpecialFP; 430 431 if (FPInstClass == X86II::NotFP) 432 continue; // Efficiently ignore non-fp insts! 433 434 MachineInstr *PrevMI = 0; 435 if (I != BB.begin()) 436 PrevMI = prior(I); 437 438 ++NumFP; // Keep track of # of pseudo instrs 439 DEBUG(dbgs() << "\nFPInst:\t" << *MI); 440 441 // Get dead variables list now because the MI pointer may be deleted as part 442 // of processing! 443 SmallVector<unsigned, 8> DeadRegs; 444 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 445 const MachineOperand &MO = MI->getOperand(i); 446 if (MO.isReg() && MO.isDead()) 447 DeadRegs.push_back(MO.getReg()); 448 } 449 450 switch (FPInstClass) { 451 case X86II::ZeroArgFP: handleZeroArgFP(I); break; 452 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) 453 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) 454 case X86II::TwoArgFP: handleTwoArgFP(I); break; 455 case X86II::CompareFP: handleCompareFP(I); break; 456 case X86II::CondMovFP: handleCondMovFP(I); break; 457 case X86II::SpecialFP: handleSpecialFP(I); break; 458 default: llvm_unreachable("Unknown FP Type!"); 459 } 460 461 // Check to see if any of the values defined by this instruction are dead 462 // after definition. If so, pop them. 463 for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { 464 unsigned Reg = DeadRegs[i]; 465 if (Reg >= X86::FP0 && Reg <= X86::FP6) { 466 DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); 467 freeStackSlotAfter(I, Reg-X86::FP0); 468 } 469 } 470 471 // Print out all of the instructions expanded to if -debug 472 DEBUG( 473 MachineBasicBlock::iterator PrevI(PrevMI); 474 if (I == PrevI) { 475 dbgs() << "Just deleted pseudo instruction\n"; 476 } else { 477 MachineBasicBlock::iterator Start = I; 478 // Rewind to first instruction newly inserted. 479 while (Start != BB.begin() && prior(Start) != PrevI) --Start; 480 dbgs() << "Inserted instructions:\n\t"; 481 Start->print(dbgs(), &MF.getTarget()); 482 while (++Start != llvm::next(I)) {} 483 } 484 dumpStack(); 485 ); 486 487 Changed = true; 488 } 489 490 finishBlockStack(); 491 492 return Changed; 493} 494 495/// setupBlockStack - Use the BlockBundle map to set up our model of the stack 496/// to match predecessors' live out stack. 497void FPS::setupBlockStack() { 498 DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() 499 << " derived from " << MBB->getName() << ".\n"); 500 StackTop = 0; 501 const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first]; 502 503 if (!Bundle.Mask) { 504 DEBUG(dbgs() << "Block has no FP live-ins.\n"); 505 return; 506 } 507 508 // Depth-first iteration should ensure that we always have an assigned stack. 509 assert(Bundle.isFixed() && "Reached block before any predecessors"); 510 511 // Push the fixed live-in registers. 512 for (unsigned i = Bundle.FixCount; i > 0; --i) { 513 MBB->addLiveIn(X86::ST0+i-1); 514 DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" 515 << unsigned(Bundle.FixStack[i-1]) << '\n'); 516 pushReg(Bundle.FixStack[i-1]); 517 } 518 519 // Kill off unwanted live-ins. This can happen with a critical edge. 520 // FIXME: We could keep these live registers around as zombies. They may need 521 // to be revived at the end of a short block. It might save a few instrs. 522 adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); 523 DEBUG(MBB->dump()); 524} 525 526/// finishBlockStack - Revive live-outs that are implicitly defined out of 527/// MBB. Shuffle live registers to match the expected fixed stack of any 528/// predecessors, and ensure that all predecessors are expecting the same 529/// stack. 530void FPS::finishBlockStack() { 531 // The RET handling below takes care of return blocks for us. 532 if (MBB->succ_empty()) 533 return; 534 535 DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() 536 << " derived from " << MBB->getName() << ".\n"); 537 538 unsigned BundleIdx = BlockBundle.lookup(MBB).second; 539 LiveBundle &Bundle = LiveBundles[BundleIdx]; 540 541 // We may need to kill and define some registers to match successors. 542 // FIXME: This can probably be combined with the shuffle below. 543 MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); 544 adjustLiveRegs(Bundle.Mask, Term); 545 546 if (!Bundle.Mask) { 547 DEBUG(dbgs() << "No live-outs.\n"); 548 return; 549 } 550 551 // Has the stack order been fixed yet? 552 DEBUG(dbgs() << "LB#" << BundleIdx << ": "); 553 if (Bundle.isFixed()) { 554 DEBUG(dbgs() << "Shuffling stack to match.\n"); 555 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); 556 } else { 557 // Not fixed yet, we get to choose. 558 DEBUG(dbgs() << "Fixing stack order now.\n"); 559 Bundle.FixCount = StackTop; 560 for (unsigned i = 0; i < StackTop; ++i) 561 Bundle.FixStack[i] = getStackEntry(i); 562 } 563} 564 565 566//===----------------------------------------------------------------------===// 567// Efficient Lookup Table Support 568//===----------------------------------------------------------------------===// 569 570namespace { 571 struct TableEntry { 572 unsigned from; 573 unsigned to; 574 bool operator<(const TableEntry &TE) const { return from < TE.from; } 575 friend bool operator<(const TableEntry &TE, unsigned V) { 576 return TE.from < V; 577 } 578 friend bool LLVM_ATTRIBUTE_USED operator<(unsigned V, 579 const TableEntry &TE) { 580 return V < TE.from; 581 } 582 }; 583} 584 585#ifndef NDEBUG 586static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { 587 for (unsigned i = 0; i != NumEntries-1; ++i) 588 if (!(Table[i] < Table[i+1])) return false; 589 return true; 590} 591#endif 592 593static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { 594 const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); 595 if (I != Table+N && I->from == Opcode) 596 return I->to; 597 return -1; 598} 599 600#ifdef NDEBUG 601#define ASSERT_SORTED(TABLE) 602#else 603#define ASSERT_SORTED(TABLE) \ 604 { static bool TABLE##Checked = false; \ 605 if (!TABLE##Checked) { \ 606 assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ 607 "All lookup tables must be sorted for efficient access!"); \ 608 TABLE##Checked = true; \ 609 } \ 610 } 611#endif 612 613//===----------------------------------------------------------------------===// 614// Register File -> Register Stack Mapping Methods 615//===----------------------------------------------------------------------===// 616 617// OpcodeTable - Sorted map of register instructions to their stack version. 618// The first element is an register file pseudo instruction, the second is the 619// concrete X86 instruction which uses the register stack. 620// 621static const TableEntry OpcodeTable[] = { 622 { X86::ABS_Fp32 , X86::ABS_F }, 623 { X86::ABS_Fp64 , X86::ABS_F }, 624 { X86::ABS_Fp80 , X86::ABS_F }, 625 { X86::ADD_Fp32m , X86::ADD_F32m }, 626 { X86::ADD_Fp64m , X86::ADD_F64m }, 627 { X86::ADD_Fp64m32 , X86::ADD_F32m }, 628 { X86::ADD_Fp80m32 , X86::ADD_F32m }, 629 { X86::ADD_Fp80m64 , X86::ADD_F64m }, 630 { X86::ADD_FpI16m32 , X86::ADD_FI16m }, 631 { X86::ADD_FpI16m64 , X86::ADD_FI16m }, 632 { X86::ADD_FpI16m80 , X86::ADD_FI16m }, 633 { X86::ADD_FpI32m32 , X86::ADD_FI32m }, 634 { X86::ADD_FpI32m64 , X86::ADD_FI32m }, 635 { X86::ADD_FpI32m80 , X86::ADD_FI32m }, 636 { X86::CHS_Fp32 , X86::CHS_F }, 637 { X86::CHS_Fp64 , X86::CHS_F }, 638 { X86::CHS_Fp80 , X86::CHS_F }, 639 { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, 640 { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, 641 { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, 642 { X86::CMOVB_Fp32 , X86::CMOVB_F }, 643 { X86::CMOVB_Fp64 , X86::CMOVB_F }, 644 { X86::CMOVB_Fp80 , X86::CMOVB_F }, 645 { X86::CMOVE_Fp32 , X86::CMOVE_F }, 646 { X86::CMOVE_Fp64 , X86::CMOVE_F }, 647 { X86::CMOVE_Fp80 , X86::CMOVE_F }, 648 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, 649 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, 650 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, 651 { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, 652 { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, 653 { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, 654 { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, 655 { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, 656 { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, 657 { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, 658 { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, 659 { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, 660 { X86::CMOVP_Fp32 , X86::CMOVP_F }, 661 { X86::CMOVP_Fp64 , X86::CMOVP_F }, 662 { X86::CMOVP_Fp80 , X86::CMOVP_F }, 663 { X86::COS_Fp32 , X86::COS_F }, 664 { X86::COS_Fp64 , X86::COS_F }, 665 { X86::COS_Fp80 , X86::COS_F }, 666 { X86::DIVR_Fp32m , X86::DIVR_F32m }, 667 { X86::DIVR_Fp64m , X86::DIVR_F64m }, 668 { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, 669 { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, 670 { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, 671 { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, 672 { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, 673 { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, 674 { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, 675 { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, 676 { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, 677 { X86::DIV_Fp32m , X86::DIV_F32m }, 678 { X86::DIV_Fp64m , X86::DIV_F64m }, 679 { X86::DIV_Fp64m32 , X86::DIV_F32m }, 680 { X86::DIV_Fp80m32 , X86::DIV_F32m }, 681 { X86::DIV_Fp80m64 , X86::DIV_F64m }, 682 { X86::DIV_FpI16m32 , X86::DIV_FI16m }, 683 { X86::DIV_FpI16m64 , X86::DIV_FI16m }, 684 { X86::DIV_FpI16m80 , X86::DIV_FI16m }, 685 { X86::DIV_FpI32m32 , X86::DIV_FI32m }, 686 { X86::DIV_FpI32m64 , X86::DIV_FI32m }, 687 { X86::DIV_FpI32m80 , X86::DIV_FI32m }, 688 { X86::ILD_Fp16m32 , X86::ILD_F16m }, 689 { X86::ILD_Fp16m64 , X86::ILD_F16m }, 690 { X86::ILD_Fp16m80 , X86::ILD_F16m }, 691 { X86::ILD_Fp32m32 , X86::ILD_F32m }, 692 { X86::ILD_Fp32m64 , X86::ILD_F32m }, 693 { X86::ILD_Fp32m80 , X86::ILD_F32m }, 694 { X86::ILD_Fp64m32 , X86::ILD_F64m }, 695 { X86::ILD_Fp64m64 , X86::ILD_F64m }, 696 { X86::ILD_Fp64m80 , X86::ILD_F64m }, 697 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, 698 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, 699 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, 700 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, 701 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, 702 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, 703 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, 704 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, 705 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, 706 { X86::IST_Fp16m32 , X86::IST_F16m }, 707 { X86::IST_Fp16m64 , X86::IST_F16m }, 708 { X86::IST_Fp16m80 , X86::IST_F16m }, 709 { X86::IST_Fp32m32 , X86::IST_F32m }, 710 { X86::IST_Fp32m64 , X86::IST_F32m }, 711 { X86::IST_Fp32m80 , X86::IST_F32m }, 712 { X86::IST_Fp64m32 , X86::IST_FP64m }, 713 { X86::IST_Fp64m64 , X86::IST_FP64m }, 714 { X86::IST_Fp64m80 , X86::IST_FP64m }, 715 { X86::LD_Fp032 , X86::LD_F0 }, 716 { X86::LD_Fp064 , X86::LD_F0 }, 717 { X86::LD_Fp080 , X86::LD_F0 }, 718 { X86::LD_Fp132 , X86::LD_F1 }, 719 { X86::LD_Fp164 , X86::LD_F1 }, 720 { X86::LD_Fp180 , X86::LD_F1 }, 721 { X86::LD_Fp32m , X86::LD_F32m }, 722 { X86::LD_Fp32m64 , X86::LD_F32m }, 723 { X86::LD_Fp32m80 , X86::LD_F32m }, 724 { X86::LD_Fp64m , X86::LD_F64m }, 725 { X86::LD_Fp64m80 , X86::LD_F64m }, 726 { X86::LD_Fp80m , X86::LD_F80m }, 727 { X86::MUL_Fp32m , X86::MUL_F32m }, 728 { X86::MUL_Fp64m , X86::MUL_F64m }, 729 { X86::MUL_Fp64m32 , X86::MUL_F32m }, 730 { X86::MUL_Fp80m32 , X86::MUL_F32m }, 731 { X86::MUL_Fp80m64 , X86::MUL_F64m }, 732 { X86::MUL_FpI16m32 , X86::MUL_FI16m }, 733 { X86::MUL_FpI16m64 , X86::MUL_FI16m }, 734 { X86::MUL_FpI16m80 , X86::MUL_FI16m }, 735 { X86::MUL_FpI32m32 , X86::MUL_FI32m }, 736 { X86::MUL_FpI32m64 , X86::MUL_FI32m }, 737 { X86::MUL_FpI32m80 , X86::MUL_FI32m }, 738 { X86::SIN_Fp32 , X86::SIN_F }, 739 { X86::SIN_Fp64 , X86::SIN_F }, 740 { X86::SIN_Fp80 , X86::SIN_F }, 741 { X86::SQRT_Fp32 , X86::SQRT_F }, 742 { X86::SQRT_Fp64 , X86::SQRT_F }, 743 { X86::SQRT_Fp80 , X86::SQRT_F }, 744 { X86::ST_Fp32m , X86::ST_F32m }, 745 { X86::ST_Fp64m , X86::ST_F64m }, 746 { X86::ST_Fp64m32 , X86::ST_F32m }, 747 { X86::ST_Fp80m32 , X86::ST_F32m }, 748 { X86::ST_Fp80m64 , X86::ST_F64m }, 749 { X86::ST_FpP80m , X86::ST_FP80m }, 750 { X86::SUBR_Fp32m , X86::SUBR_F32m }, 751 { X86::SUBR_Fp64m , X86::SUBR_F64m }, 752 { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, 753 { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, 754 { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, 755 { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, 756 { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, 757 { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, 758 { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, 759 { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, 760 { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, 761 { X86::SUB_Fp32m , X86::SUB_F32m }, 762 { X86::SUB_Fp64m , X86::SUB_F64m }, 763 { X86::SUB_Fp64m32 , X86::SUB_F32m }, 764 { X86::SUB_Fp80m32 , X86::SUB_F32m }, 765 { X86::SUB_Fp80m64 , X86::SUB_F64m }, 766 { X86::SUB_FpI16m32 , X86::SUB_FI16m }, 767 { X86::SUB_FpI16m64 , X86::SUB_FI16m }, 768 { X86::SUB_FpI16m80 , X86::SUB_FI16m }, 769 { X86::SUB_FpI32m32 , X86::SUB_FI32m }, 770 { X86::SUB_FpI32m64 , X86::SUB_FI32m }, 771 { X86::SUB_FpI32m80 , X86::SUB_FI32m }, 772 { X86::TST_Fp32 , X86::TST_F }, 773 { X86::TST_Fp64 , X86::TST_F }, 774 { X86::TST_Fp80 , X86::TST_F }, 775 { X86::UCOM_FpIr32 , X86::UCOM_FIr }, 776 { X86::UCOM_FpIr64 , X86::UCOM_FIr }, 777 { X86::UCOM_FpIr80 , X86::UCOM_FIr }, 778 { X86::UCOM_Fpr32 , X86::UCOM_Fr }, 779 { X86::UCOM_Fpr64 , X86::UCOM_Fr }, 780 { X86::UCOM_Fpr80 , X86::UCOM_Fr }, 781}; 782 783static unsigned getConcreteOpcode(unsigned Opcode) { 784 ASSERT_SORTED(OpcodeTable); 785 int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); 786 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); 787 return Opc; 788} 789 790//===----------------------------------------------------------------------===// 791// Helper Methods 792//===----------------------------------------------------------------------===// 793 794// PopTable - Sorted map of instructions to their popping version. The first 795// element is an instruction, the second is the version which pops. 796// 797static const TableEntry PopTable[] = { 798 { X86::ADD_FrST0 , X86::ADD_FPrST0 }, 799 800 { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, 801 { X86::DIV_FrST0 , X86::DIV_FPrST0 }, 802 803 { X86::IST_F16m , X86::IST_FP16m }, 804 { X86::IST_F32m , X86::IST_FP32m }, 805 806 { X86::MUL_FrST0 , X86::MUL_FPrST0 }, 807 808 { X86::ST_F32m , X86::ST_FP32m }, 809 { X86::ST_F64m , X86::ST_FP64m }, 810 { X86::ST_Frr , X86::ST_FPrr }, 811 812 { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, 813 { X86::SUB_FrST0 , X86::SUB_FPrST0 }, 814 815 { X86::UCOM_FIr , X86::UCOM_FIPr }, 816 817 { X86::UCOM_FPr , X86::UCOM_FPPr }, 818 { X86::UCOM_Fr , X86::UCOM_FPr }, 819}; 820 821/// popStackAfter - Pop the current value off of the top of the FP stack after 822/// the specified instruction. This attempts to be sneaky and combine the pop 823/// into the instruction itself if possible. The iterator is left pointing to 824/// the last instruction, be it a new pop instruction inserted, or the old 825/// instruction if it was modified in place. 826/// 827void FPS::popStackAfter(MachineBasicBlock::iterator &I) { 828 MachineInstr* MI = I; 829 DebugLoc dl = MI->getDebugLoc(); 830 ASSERT_SORTED(PopTable); 831 if (StackTop == 0) 832 report_fatal_error("Cannot pop empty stack!"); 833 RegMap[Stack[--StackTop]] = ~0; // Update state 834 835 // Check to see if there is a popping version of this instruction... 836 int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); 837 if (Opcode != -1) { 838 I->setDesc(TII->get(Opcode)); 839 if (Opcode == X86::UCOM_FPPr) 840 I->RemoveOperand(0); 841 } else { // Insert an explicit pop 842 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); 843 } 844} 845 846/// freeStackSlotAfter - Free the specified register from the register stack, so 847/// that it is no longer in a register. If the register is currently at the top 848/// of the stack, we just pop the current instruction, otherwise we store the 849/// current top-of-stack into the specified slot, then pop the top of stack. 850void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { 851 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. 852 popStackAfter(I); 853 return; 854 } 855 856 // Otherwise, store the top of stack into the dead slot, killing the operand 857 // without having to add in an explicit xchg then pop. 858 // 859 I = freeStackSlotBefore(++I, FPRegNo); 860} 861 862/// freeStackSlotBefore - Free the specified register without trying any 863/// folding. 864MachineBasicBlock::iterator 865FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { 866 unsigned STReg = getSTReg(FPRegNo); 867 unsigned OldSlot = getSlot(FPRegNo); 868 unsigned TopReg = Stack[StackTop-1]; 869 Stack[OldSlot] = TopReg; 870 RegMap[TopReg] = OldSlot; 871 RegMap[FPRegNo] = ~0; 872 Stack[--StackTop] = ~0; 873 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg); 874} 875 876/// adjustLiveRegs - Kill and revive registers such that exactly the FP 877/// registers with a bit in Mask are live. 878void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { 879 unsigned Defs = Mask; 880 unsigned Kills = 0; 881 for (unsigned i = 0; i < StackTop; ++i) { 882 unsigned RegNo = Stack[i]; 883 if (!(Defs & (1 << RegNo))) 884 // This register is live, but we don't want it. 885 Kills |= (1 << RegNo); 886 else 887 // We don't need to imp-def this live register. 888 Defs &= ~(1 << RegNo); 889 } 890 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); 891 892 // Produce implicit-defs for free by using killed registers. 893 while (Kills && Defs) { 894 unsigned KReg = CountTrailingZeros_32(Kills); 895 unsigned DReg = CountTrailingZeros_32(Defs); 896 DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); 897 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); 898 std::swap(RegMap[KReg], RegMap[DReg]); 899 Kills &= ~(1 << KReg); 900 Defs &= ~(1 << DReg); 901 } 902 903 // Kill registers by popping. 904 if (Kills && I != MBB->begin()) { 905 MachineBasicBlock::iterator I2 = llvm::prior(I); 906 for (;;) { 907 unsigned KReg = getStackEntry(0); 908 if (!(Kills & (1 << KReg))) 909 break; 910 DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); 911 popStackAfter(I2); 912 Kills &= ~(1 << KReg); 913 } 914 } 915 916 // Manually kill the rest. 917 while (Kills) { 918 unsigned KReg = CountTrailingZeros_32(Kills); 919 DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); 920 freeStackSlotBefore(I, KReg); 921 Kills &= ~(1 << KReg); 922 } 923 924 // Load zeros for all the imp-defs. 925 while(Defs) { 926 unsigned DReg = CountTrailingZeros_32(Defs); 927 DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); 928 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); 929 pushReg(DReg); 930 Defs &= ~(1 << DReg); 931 } 932 933 // Now we should have the correct registers live. 934 DEBUG(dumpStack()); 935 assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch"); 936} 937 938/// shuffleStackTop - emit fxch instructions before I to shuffle the top 939/// FixCount entries into the order given by FixStack. 940/// FIXME: Is there a better algorithm than insertion sort? 941void FPS::shuffleStackTop(const unsigned char *FixStack, 942 unsigned FixCount, 943 MachineBasicBlock::iterator I) { 944 // Move items into place, starting from the desired stack bottom. 945 while (FixCount--) { 946 // Old register at position FixCount. 947 unsigned OldReg = getStackEntry(FixCount); 948 // Desired register at position FixCount. 949 unsigned Reg = FixStack[FixCount]; 950 if (Reg == OldReg) 951 continue; 952 // (Reg st0) (OldReg st0) = (Reg OldReg st0) 953 moveToTop(Reg, I); 954 moveToTop(OldReg, I); 955 } 956 DEBUG(dumpStack()); 957} 958 959 960//===----------------------------------------------------------------------===// 961// Instruction transformation implementation 962//===----------------------------------------------------------------------===// 963 964/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> 965/// 966void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { 967 MachineInstr *MI = I; 968 unsigned DestReg = getFPReg(MI->getOperand(0)); 969 970 // Change from the pseudo instruction to the concrete instruction. 971 MI->RemoveOperand(0); // Remove the explicit ST(0) operand 972 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 973 974 // Result gets pushed on the stack. 975 pushReg(DestReg); 976} 977 978/// handleOneArgFP - fst <mem>, ST(0) 979/// 980void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { 981 MachineInstr *MI = I; 982 unsigned NumOps = MI->getDesc().getNumOperands(); 983 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && 984 "Can only handle fst* & ftst instructions!"); 985 986 // Is this the last use of the source register? 987 unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); 988 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 989 990 // FISTP64m is strange because there isn't a non-popping versions. 991 // If we have one _and_ we don't want to pop the operand, duplicate the value 992 // on the stack instead of moving it. This ensure that popping the value is 993 // always ok. 994 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. 995 // 996 if (!KillsSrc && 997 (MI->getOpcode() == X86::IST_Fp64m32 || 998 MI->getOpcode() == X86::ISTT_Fp16m32 || 999 MI->getOpcode() == X86::ISTT_Fp32m32 || 1000 MI->getOpcode() == X86::ISTT_Fp64m32 || 1001 MI->getOpcode() == X86::IST_Fp64m64 || 1002 MI->getOpcode() == X86::ISTT_Fp16m64 || 1003 MI->getOpcode() == X86::ISTT_Fp32m64 || 1004 MI->getOpcode() == X86::ISTT_Fp64m64 || 1005 MI->getOpcode() == X86::IST_Fp64m80 || 1006 MI->getOpcode() == X86::ISTT_Fp16m80 || 1007 MI->getOpcode() == X86::ISTT_Fp32m80 || 1008 MI->getOpcode() == X86::ISTT_Fp64m80 || 1009 MI->getOpcode() == X86::ST_FpP80m)) { 1010 duplicateToTop(Reg, getScratchReg(), I); 1011 } else { 1012 moveToTop(Reg, I); // Move to the top of the stack... 1013 } 1014 1015 // Convert from the pseudo instruction to the concrete instruction. 1016 MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand 1017 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1018 1019 if (MI->getOpcode() == X86::IST_FP64m || 1020 MI->getOpcode() == X86::ISTT_FP16m || 1021 MI->getOpcode() == X86::ISTT_FP32m || 1022 MI->getOpcode() == X86::ISTT_FP64m || 1023 MI->getOpcode() == X86::ST_FP80m) { 1024 if (StackTop == 0) 1025 report_fatal_error("Stack empty??"); 1026 --StackTop; 1027 } else if (KillsSrc) { // Last use of operand? 1028 popStackAfter(I); 1029 } 1030} 1031 1032 1033/// handleOneArgFPRW: Handle instructions that read from the top of stack and 1034/// replace the value with a newly computed value. These instructions may have 1035/// non-fp operands after their FP operands. 1036/// 1037/// Examples: 1038/// R1 = fchs R2 1039/// R1 = fadd R2, [mem] 1040/// 1041void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { 1042 MachineInstr *MI = I; 1043#ifndef NDEBUG 1044 unsigned NumOps = MI->getDesc().getNumOperands(); 1045 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); 1046#endif 1047 1048 // Is this the last use of the source register? 1049 unsigned Reg = getFPReg(MI->getOperand(1)); 1050 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 1051 1052 if (KillsSrc) { 1053 // If this is the last use of the source register, just make sure it's on 1054 // the top of the stack. 1055 moveToTop(Reg, I); 1056 if (StackTop == 0) 1057 report_fatal_error("Stack cannot be empty!"); 1058 --StackTop; 1059 pushReg(getFPReg(MI->getOperand(0))); 1060 } else { 1061 // If this is not the last use of the source register, _copy_ it to the top 1062 // of the stack. 1063 duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); 1064 } 1065 1066 // Change from the pseudo instruction to the concrete instruction. 1067 MI->RemoveOperand(1); // Drop the source operand. 1068 MI->RemoveOperand(0); // Drop the destination operand. 1069 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1070} 1071 1072 1073//===----------------------------------------------------------------------===// 1074// Define tables of various ways to map pseudo instructions 1075// 1076 1077// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) 1078static const TableEntry ForwardST0Table[] = { 1079 { X86::ADD_Fp32 , X86::ADD_FST0r }, 1080 { X86::ADD_Fp64 , X86::ADD_FST0r }, 1081 { X86::ADD_Fp80 , X86::ADD_FST0r }, 1082 { X86::DIV_Fp32 , X86::DIV_FST0r }, 1083 { X86::DIV_Fp64 , X86::DIV_FST0r }, 1084 { X86::DIV_Fp80 , X86::DIV_FST0r }, 1085 { X86::MUL_Fp32 , X86::MUL_FST0r }, 1086 { X86::MUL_Fp64 , X86::MUL_FST0r }, 1087 { X86::MUL_Fp80 , X86::MUL_FST0r }, 1088 { X86::SUB_Fp32 , X86::SUB_FST0r }, 1089 { X86::SUB_Fp64 , X86::SUB_FST0r }, 1090 { X86::SUB_Fp80 , X86::SUB_FST0r }, 1091}; 1092 1093// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) 1094static const TableEntry ReverseST0Table[] = { 1095 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative 1096 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative 1097 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative 1098 { X86::DIV_Fp32 , X86::DIVR_FST0r }, 1099 { X86::DIV_Fp64 , X86::DIVR_FST0r }, 1100 { X86::DIV_Fp80 , X86::DIVR_FST0r }, 1101 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative 1102 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative 1103 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative 1104 { X86::SUB_Fp32 , X86::SUBR_FST0r }, 1105 { X86::SUB_Fp64 , X86::SUBR_FST0r }, 1106 { X86::SUB_Fp80 , X86::SUBR_FST0r }, 1107}; 1108 1109// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) 1110static const TableEntry ForwardSTiTable[] = { 1111 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative 1112 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative 1113 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative 1114 { X86::DIV_Fp32 , X86::DIVR_FrST0 }, 1115 { X86::DIV_Fp64 , X86::DIVR_FrST0 }, 1116 { X86::DIV_Fp80 , X86::DIVR_FrST0 }, 1117 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative 1118 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative 1119 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative 1120 { X86::SUB_Fp32 , X86::SUBR_FrST0 }, 1121 { X86::SUB_Fp64 , X86::SUBR_FrST0 }, 1122 { X86::SUB_Fp80 , X86::SUBR_FrST0 }, 1123}; 1124 1125// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) 1126static const TableEntry ReverseSTiTable[] = { 1127 { X86::ADD_Fp32 , X86::ADD_FrST0 }, 1128 { X86::ADD_Fp64 , X86::ADD_FrST0 }, 1129 { X86::ADD_Fp80 , X86::ADD_FrST0 }, 1130 { X86::DIV_Fp32 , X86::DIV_FrST0 }, 1131 { X86::DIV_Fp64 , X86::DIV_FrST0 }, 1132 { X86::DIV_Fp80 , X86::DIV_FrST0 }, 1133 { X86::MUL_Fp32 , X86::MUL_FrST0 }, 1134 { X86::MUL_Fp64 , X86::MUL_FrST0 }, 1135 { X86::MUL_Fp80 , X86::MUL_FrST0 }, 1136 { X86::SUB_Fp32 , X86::SUB_FrST0 }, 1137 { X86::SUB_Fp64 , X86::SUB_FrST0 }, 1138 { X86::SUB_Fp80 , X86::SUB_FrST0 }, 1139}; 1140 1141 1142/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual 1143/// instructions which need to be simplified and possibly transformed. 1144/// 1145/// Result: ST(0) = fsub ST(0), ST(i) 1146/// ST(i) = fsub ST(0), ST(i) 1147/// ST(0) = fsubr ST(0), ST(i) 1148/// ST(i) = fsubr ST(0), ST(i) 1149/// 1150void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { 1151 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1152 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1153 MachineInstr *MI = I; 1154 1155 unsigned NumOperands = MI->getDesc().getNumOperands(); 1156 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); 1157 unsigned Dest = getFPReg(MI->getOperand(0)); 1158 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1159 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1160 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1161 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1162 DebugLoc dl = MI->getDebugLoc(); 1163 1164 unsigned TOS = getStackEntry(0); 1165 1166 // One of our operands must be on the top of the stack. If neither is yet, we 1167 // need to move one. 1168 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? 1169 // We can choose to move either operand to the top of the stack. If one of 1170 // the operands is killed by this instruction, we want that one so that we 1171 // can update right on top of the old version. 1172 if (KillsOp0) { 1173 moveToTop(Op0, I); // Move dead operand to TOS. 1174 TOS = Op0; 1175 } else if (KillsOp1) { 1176 moveToTop(Op1, I); 1177 TOS = Op1; 1178 } else { 1179 // All of the operands are live after this instruction executes, so we 1180 // cannot update on top of any operand. Because of this, we must 1181 // duplicate one of the stack elements to the top. It doesn't matter 1182 // which one we pick. 1183 // 1184 duplicateToTop(Op0, Dest, I); 1185 Op0 = TOS = Dest; 1186 KillsOp0 = true; 1187 } 1188 } else if (!KillsOp0 && !KillsOp1) { 1189 // If we DO have one of our operands at the top of the stack, but we don't 1190 // have a dead operand, we must duplicate one of the operands to a new slot 1191 // on the stack. 1192 duplicateToTop(Op0, Dest, I); 1193 Op0 = TOS = Dest; 1194 KillsOp0 = true; 1195 } 1196 1197 // Now we know that one of our operands is on the top of the stack, and at 1198 // least one of our operands is killed by this instruction. 1199 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && 1200 "Stack conditions not set up right!"); 1201 1202 // We decide which form to use based on what is on the top of the stack, and 1203 // which operand is killed by this instruction. 1204 const TableEntry *InstTable; 1205 bool isForward = TOS == Op0; 1206 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); 1207 if (updateST0) { 1208 if (isForward) 1209 InstTable = ForwardST0Table; 1210 else 1211 InstTable = ReverseST0Table; 1212 } else { 1213 if (isForward) 1214 InstTable = ForwardSTiTable; 1215 else 1216 InstTable = ReverseSTiTable; 1217 } 1218 1219 int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), 1220 MI->getOpcode()); 1221 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); 1222 1223 // NotTOS - The register which is not on the top of stack... 1224 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; 1225 1226 // Replace the old instruction with a new instruction 1227 MBB->remove(I++); 1228 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); 1229 1230 // If both operands are killed, pop one off of the stack in addition to 1231 // overwriting the other one. 1232 if (KillsOp0 && KillsOp1 && Op0 != Op1) { 1233 assert(!updateST0 && "Should have updated other operand!"); 1234 popStackAfter(I); // Pop the top of stack 1235 } 1236 1237 // Update stack information so that we know the destination register is now on 1238 // the stack. 1239 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); 1240 assert(UpdatedSlot < StackTop && Dest < 7); 1241 Stack[UpdatedSlot] = Dest; 1242 RegMap[Dest] = UpdatedSlot; 1243 MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction 1244} 1245 1246/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP 1247/// register arguments and no explicit destinations. 1248/// 1249void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { 1250 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1251 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1252 MachineInstr *MI = I; 1253 1254 unsigned NumOperands = MI->getDesc().getNumOperands(); 1255 assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); 1256 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1257 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1258 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1259 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1260 1261 // Make sure the first operand is on the top of stack, the other one can be 1262 // anywhere. 1263 moveToTop(Op0, I); 1264 1265 // Change from the pseudo instruction to the concrete instruction. 1266 MI->getOperand(0).setReg(getSTReg(Op1)); 1267 MI->RemoveOperand(1); 1268 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1269 1270 // If any of the operands are killed by this instruction, free them. 1271 if (KillsOp0) freeStackSlotAfter(I, Op0); 1272 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); 1273} 1274 1275/// handleCondMovFP - Handle two address conditional move instructions. These 1276/// instructions move a st(i) register to st(0) iff a condition is true. These 1277/// instructions require that the first operand is at the top of the stack, but 1278/// otherwise don't modify the stack at all. 1279void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { 1280 MachineInstr *MI = I; 1281 1282 unsigned Op0 = getFPReg(MI->getOperand(0)); 1283 unsigned Op1 = getFPReg(MI->getOperand(2)); 1284 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1285 1286 // The first operand *must* be on the top of the stack. 1287 moveToTop(Op0, I); 1288 1289 // Change the second operand to the stack register that the operand is in. 1290 // Change from the pseudo instruction to the concrete instruction. 1291 MI->RemoveOperand(0); 1292 MI->RemoveOperand(1); 1293 MI->getOperand(0).setReg(getSTReg(Op1)); 1294 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1295 1296 // If we kill the second operand, make sure to pop it from the stack. 1297 if (Op0 != Op1 && KillsOp1) { 1298 // Get this value off of the register stack. 1299 freeStackSlotAfter(I, Op1); 1300 } 1301} 1302 1303 1304/// handleSpecialFP - Handle special instructions which behave unlike other 1305/// floating point instructions. This is primarily intended for use by pseudo 1306/// instructions. 1307/// 1308void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { 1309 MachineInstr *MI = I; 1310 switch (MI->getOpcode()) { 1311 default: llvm_unreachable("Unknown SpecialFP instruction!"); 1312 case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! 1313 case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! 1314 case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! 1315 assert(StackTop == 0 && "Stack should be empty after a call!"); 1316 pushReg(getFPReg(MI->getOperand(0))); 1317 break; 1318 case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! 1319 case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! 1320 case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! 1321 // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. 1322 // The pattern we expect is: 1323 // CALL 1324 // FP1 = FpGET_ST0 1325 // FP4 = FpGET_ST1 1326 // 1327 // At this point, we've pushed FP1 on the top of stack, so it should be 1328 // present if it isn't dead. If it was dead, we already emitted a pop to 1329 // remove it from the stack and StackTop = 0. 1330 1331 // Push FP4 as top of stack next. 1332 pushReg(getFPReg(MI->getOperand(0))); 1333 1334 // If StackTop was 0 before we pushed our operand, then ST(0) must have been 1335 // dead. In this case, the ST(1) value is the only thing that is live, so 1336 // it should be on the TOS (after the pop that was emitted) and is. Just 1337 // continue in this case. 1338 if (StackTop == 1) 1339 break; 1340 1341 // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top 1342 // elements so that our accounting is correct. 1343 unsigned RegOnTop = getStackEntry(0); 1344 unsigned RegNo = getStackEntry(1); 1345 1346 // Swap the slots the regs are in. 1347 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 1348 1349 // Swap stack slot contents. 1350 if (RegMap[RegOnTop] >= StackTop) 1351 report_fatal_error("Access past stack top!"); 1352 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 1353 break; 1354 } 1355 case X86::FpSET_ST0_32: 1356 case X86::FpSET_ST0_64: 1357 case X86::FpSET_ST0_80: { 1358 // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm 1359 // arguments that use an st constraint. We expect a sequence of 1360 // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM 1361 unsigned Op0 = getFPReg(MI->getOperand(0)); 1362 1363 if (!MI->killsRegister(X86::FP0 + Op0)) { 1364 // Duplicate Op0 into a temporary on the stack top. 1365 duplicateToTop(Op0, getScratchReg(), I); 1366 } else { 1367 // Op0 is killed, so just swap it into position. 1368 moveToTop(Op0, I); 1369 } 1370 --StackTop; // "Forget" we have something on the top of stack! 1371 break; 1372 } 1373 case X86::FpSET_ST1_32: 1374 case X86::FpSET_ST1_64: 1375 case X86::FpSET_ST1_80: { 1376 // Set up st(1) for inline asm. We are assuming that st(0) has already been 1377 // set up by FpSET_ST0, and our StackTop is off by one because of it. 1378 unsigned Op0 = getFPReg(MI->getOperand(0)); 1379 // Restore the actual StackTop from before Fp_SET_ST0. 1380 // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we 1381 // are not enforcing the constraint. 1382 ++StackTop; 1383 unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). 1384 if (!MI->killsRegister(X86::FP0 + Op0)) { 1385 duplicateToTop(Op0, getScratchReg(), I); 1386 moveToTop(RegOnTop, I); 1387 } else if (getSTReg(Op0) != X86::ST1) { 1388 // We have the wrong value at st(1). Shuffle! Untested! 1389 moveToTop(getStackEntry(1), I); 1390 moveToTop(Op0, I); 1391 moveToTop(RegOnTop, I); 1392 } 1393 assert(StackTop >= 2 && "Too few live registers"); 1394 StackTop -= 2; // "Forget" both st(0) and st(1). 1395 break; 1396 } 1397 case X86::MOV_Fp3232: 1398 case X86::MOV_Fp3264: 1399 case X86::MOV_Fp6432: 1400 case X86::MOV_Fp6464: 1401 case X86::MOV_Fp3280: 1402 case X86::MOV_Fp6480: 1403 case X86::MOV_Fp8032: 1404 case X86::MOV_Fp8064: 1405 case X86::MOV_Fp8080: { 1406 const MachineOperand &MO1 = MI->getOperand(1); 1407 unsigned SrcReg = getFPReg(MO1); 1408 1409 const MachineOperand &MO0 = MI->getOperand(0); 1410 unsigned DestReg = getFPReg(MO0); 1411 if (MI->killsRegister(X86::FP0+SrcReg)) { 1412 // If the input operand is killed, we can just change the owner of the 1413 // incoming stack slot into the result. 1414 unsigned Slot = getSlot(SrcReg); 1415 assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); 1416 Stack[Slot] = DestReg; 1417 RegMap[DestReg] = Slot; 1418 1419 } else { 1420 // For FMOV we just duplicate the specified value to a new stack slot. 1421 // This could be made better, but would require substantial changes. 1422 duplicateToTop(SrcReg, DestReg, I); 1423 } 1424 } 1425 break; 1426 case TargetOpcode::INLINEASM: { 1427 // The inline asm MachineInstr currently only *uses* FP registers for the 1428 // 'f' constraint. These should be turned into the current ST(x) register 1429 // in the machine instr. Also, any kills should be explicitly popped after 1430 // the inline asm. 1431 unsigned Kills = 0; 1432 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1433 MachineOperand &Op = MI->getOperand(i); 1434 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1435 continue; 1436 assert(Op.isUse() && "Only handle inline asm uses right now"); 1437 1438 unsigned FPReg = getFPReg(Op); 1439 Op.setReg(getSTReg(FPReg)); 1440 1441 // If we kill this operand, make sure to pop it from the stack after the 1442 // asm. We just remember it for now, and pop them all off at the end in 1443 // a batch. 1444 if (Op.isKill()) 1445 Kills |= 1U << FPReg; 1446 } 1447 1448 // If this asm kills any FP registers (is the last use of them) we must 1449 // explicitly emit pop instructions for them. Do this now after the asm has 1450 // executed so that the ST(x) numbers are not off (which would happen if we 1451 // did this inline with operand rewriting). 1452 // 1453 // Note: this might be a non-optimal pop sequence. We might be able to do 1454 // better by trying to pop in stack order or something. 1455 MachineBasicBlock::iterator InsertPt = MI; 1456 while (Kills) { 1457 unsigned FPReg = CountTrailingZeros_32(Kills); 1458 freeStackSlotAfter(InsertPt, FPReg); 1459 Kills &= ~(1U << FPReg); 1460 } 1461 // Don't delete the inline asm! 1462 return; 1463 } 1464 1465 case X86::RET: 1466 case X86::RETI: 1467 // If RET has an FP register use operand, pass the first one in ST(0) and 1468 // the second one in ST(1). 1469 1470 // Find the register operands. 1471 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; 1472 unsigned LiveMask = 0; 1473 1474 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1475 MachineOperand &Op = MI->getOperand(i); 1476 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1477 continue; 1478 // FP Register uses must be kills unless there are two uses of the same 1479 // register, in which case only one will be a kill. 1480 assert(Op.isUse() && 1481 (Op.isKill() || // Marked kill. 1482 getFPReg(Op) == FirstFPRegOp || // Second instance. 1483 MI->killsRegister(Op.getReg())) && // Later use is marked kill. 1484 "Ret only defs operands, and values aren't live beyond it"); 1485 1486 if (FirstFPRegOp == ~0U) 1487 FirstFPRegOp = getFPReg(Op); 1488 else { 1489 assert(SecondFPRegOp == ~0U && "More than two fp operands!"); 1490 SecondFPRegOp = getFPReg(Op); 1491 } 1492 LiveMask |= (1 << getFPReg(Op)); 1493 1494 // Remove the operand so that later passes don't see it. 1495 MI->RemoveOperand(i); 1496 --i, --e; 1497 } 1498 1499 // We may have been carrying spurious live-ins, so make sure only the returned 1500 // registers are left live. 1501 adjustLiveRegs(LiveMask, MI); 1502 if (!LiveMask) return; // Quick check to see if any are possible. 1503 1504 // There are only four possibilities here: 1505 // 1) we are returning a single FP value. In this case, it has to be in 1506 // ST(0) already, so just declare success by removing the value from the 1507 // FP Stack. 1508 if (SecondFPRegOp == ~0U) { 1509 // Assert that the top of stack contains the right FP register. 1510 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && 1511 "Top of stack not the right register for RET!"); 1512 1513 // Ok, everything is good, mark the value as not being on the stack 1514 // anymore so that our assertion about the stack being empty at end of 1515 // block doesn't fire. 1516 StackTop = 0; 1517 return; 1518 } 1519 1520 // Otherwise, we are returning two values: 1521 // 2) If returning the same value for both, we only have one thing in the FP 1522 // stack. Consider: RET FP1, FP1 1523 if (StackTop == 1) { 1524 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& 1525 "Stack misconfiguration for RET!"); 1526 1527 // Duplicate the TOS so that we return it twice. Just pick some other FPx 1528 // register to hold it. 1529 unsigned NewReg = getScratchReg(); 1530 duplicateToTop(FirstFPRegOp, NewReg, MI); 1531 FirstFPRegOp = NewReg; 1532 } 1533 1534 /// Okay we know we have two different FPx operands now: 1535 assert(StackTop == 2 && "Must have two values live!"); 1536 1537 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently 1538 /// in ST(1). In this case, emit an fxch. 1539 if (getStackEntry(0) == SecondFPRegOp) { 1540 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); 1541 moveToTop(FirstFPRegOp, MI); 1542 } 1543 1544 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in 1545 /// ST(1). Just remove both from our understanding of the stack and return. 1546 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); 1547 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); 1548 StackTop = 0; 1549 return; 1550 } 1551 1552 I = MBB->erase(I); // Remove the pseudo instruction 1553 1554 // We want to leave I pointing to the previous instruction, but what if we 1555 // just erased the first instruction? 1556 if (I == MBB->begin()) { 1557 DEBUG(dbgs() << "Inserting dummy KILL\n"); 1558 I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)); 1559 } else 1560 --I; 1561} 1562 1563// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. 1564bool FPS::translateCopy(MachineInstr *MI) { 1565 unsigned DstReg = MI->getOperand(0).getReg(); 1566 unsigned SrcReg = MI->getOperand(1).getReg(); 1567 1568 if (DstReg == X86::ST0) { 1569 MI->setDesc(TII->get(X86::FpSET_ST0_80)); 1570 MI->RemoveOperand(0); 1571 return true; 1572 } 1573 if (DstReg == X86::ST1) { 1574 MI->setDesc(TII->get(X86::FpSET_ST1_80)); 1575 MI->RemoveOperand(0); 1576 return true; 1577 } 1578 if (SrcReg == X86::ST0) { 1579 MI->setDesc(TII->get(X86::FpGET_ST0_80)); 1580 return true; 1581 } 1582 if (SrcReg == X86::ST1) { 1583 MI->setDesc(TII->get(X86::FpGET_ST1_80)); 1584 return true; 1585 } 1586 if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { 1587 MI->setDesc(TII->get(X86::MOV_Fp8080)); 1588 return true; 1589 } 1590 return false; 1591} 1592