X86FloatingPoint.cpp revision 3f490f3469dd30b47d7b59a9b84750c31dc19633
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the pass which converts floating point instructions from 11// pseudo registers into register stack instructions. This pass uses live 12// variable information to indicate where the FPn registers are used and their 13// lifetimes. 14// 15// The x87 hardware tracks liveness of the stack registers, so it is necessary 16// to implement exact liveness tracking between basic blocks. The CFG edges are 17// partitioned into bundles where the same FP registers must be live in 18// identical stack positions. Instructions are inserted at the end of each basic 19// block to rearrange the live registers to match the outgoing bundle. 20// 21// This approach avoids splitting critical edges at the potential cost of more 22// live register shuffling instructions when critical edges are present. 23// 24//===----------------------------------------------------------------------===// 25 26#define DEBUG_TYPE "x86-codegen" 27#include "X86.h" 28#include "X86InstrInfo.h" 29#include "llvm/ADT/DepthFirstIterator.h" 30#include "llvm/ADT/DenseMap.h" 31#include "llvm/ADT/SmallPtrSet.h" 32#include "llvm/ADT/SmallVector.h" 33#include "llvm/ADT/Statistic.h" 34#include "llvm/ADT/STLExtras.h" 35#include "llvm/CodeGen/MachineFunctionPass.h" 36#include "llvm/CodeGen/MachineInstrBuilder.h" 37#include "llvm/CodeGen/MachineRegisterInfo.h" 38#include "llvm/CodeGen/Passes.h" 39#include "llvm/Support/Debug.h" 40#include "llvm/Support/ErrorHandling.h" 41#include "llvm/Support/raw_ostream.h" 42#include "llvm/Target/TargetInstrInfo.h" 43#include "llvm/Target/TargetMachine.h" 44#include <algorithm> 45using namespace llvm; 46 47STATISTIC(NumFXCH, "Number of fxch instructions inserted"); 48STATISTIC(NumFP , "Number of floating point instructions"); 49 50namespace { 51 struct FPS : public MachineFunctionPass { 52 static char ID; 53 FPS() : MachineFunctionPass(ID) { 54 // This is really only to keep valgrind quiet. 55 // The logic in isLive() is too much for it. 56 memset(Stack, 0, sizeof(Stack)); 57 memset(RegMap, 0, sizeof(RegMap)); 58 } 59 60 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 61 AU.setPreservesCFG(); 62 AU.addPreservedID(MachineLoopInfoID); 63 AU.addPreservedID(MachineDominatorsID); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 virtual bool runOnMachineFunction(MachineFunction &MF); 68 69 virtual const char *getPassName() const { return "X86 FP Stackifier"; } 70 71 private: 72 const TargetInstrInfo *TII; // Machine instruction info. 73 74 // Two CFG edges are related if they leave the same block, or enter the same 75 // block. The transitive closure of an edge under this relation is a 76 // LiveBundle. It represents a set of CFG edges where the live FP stack 77 // registers must be allocated identically in the x87 stack. 78 // 79 // A LiveBundle is usually all the edges leaving a block, or all the edges 80 // entering a block, but it can contain more edges if critical edges are 81 // present. 82 // 83 // The set of live FP registers in a LiveBundle is calculated by bundleCFG, 84 // but the exact mapping of FP registers to stack slots is fixed later. 85 struct LiveBundle { 86 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. 87 unsigned Mask; 88 89 // Number of pre-assigned live registers in FixStack. This is 0 when the 90 // stack order has not yet been fixed. 91 unsigned FixCount; 92 93 // Assigned stack order for live-in registers. 94 // FixStack[i] == getStackEntry(i) for all i < FixCount. 95 unsigned char FixStack[8]; 96 97 LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {} 98 99 // Have the live registers been assigned a stack order yet? 100 bool isFixed() const { return !Mask || FixCount; } 101 }; 102 103 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges 104 // with no live FP registers. 105 SmallVector<LiveBundle, 8> LiveBundles; 106 107 // Map each MBB in the current function to an (ingoing, outgoing) index into 108 // LiveBundles. Blocks with no FP registers live in or out map to (0, 0) 109 // and are not actually stored in the map. 110 DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle; 111 112 // Return a bitmask of FP registers in block's live-in list. 113 unsigned calcLiveInMask(MachineBasicBlock *MBB) { 114 unsigned Mask = 0; 115 for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), 116 E = MBB->livein_end(); I != E; ++I) { 117 unsigned Reg = *I - X86::FP0; 118 if (Reg < 8) 119 Mask |= 1 << Reg; 120 } 121 return Mask; 122 } 123 124 // Partition all the CFG edges into LiveBundles. 125 void bundleCFG(MachineFunction &MF); 126 127 MachineBasicBlock *MBB; // Current basic block 128 unsigned Stack[8]; // FP<n> Registers in each stack slot... 129 unsigned RegMap[8]; // Track which stack slot contains each register 130 unsigned StackTop; // The current top of the FP stack. 131 132 // Set up our stack model to match the incoming registers to MBB. 133 void setupBlockStack(); 134 135 // Shuffle live registers to match the expectations of successor blocks. 136 void finishBlockStack(); 137 138 void dumpStack() const { 139 dbgs() << "Stack contents:"; 140 for (unsigned i = 0; i != StackTop; ++i) { 141 dbgs() << " FP" << Stack[i]; 142 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); 143 } 144 dbgs() << "\n"; 145 } 146 147 /// getSlot - Return the stack slot number a particular register number is 148 /// in. 149 unsigned getSlot(unsigned RegNo) const { 150 assert(RegNo < 8 && "Regno out of range!"); 151 return RegMap[RegNo]; 152 } 153 154 /// isLive - Is RegNo currently live in the stack? 155 bool isLive(unsigned RegNo) const { 156 unsigned Slot = getSlot(RegNo); 157 return Slot < StackTop && Stack[Slot] == RegNo; 158 } 159 160 /// getScratchReg - Return an FP register that is not currently in use. 161 unsigned getScratchReg() { 162 for (int i = 7; i >= 0; --i) 163 if (!isLive(i)) 164 return i; 165 llvm_unreachable("Ran out of scratch FP registers"); 166 } 167 168 /// getStackEntry - Return the X86::FP<n> register in register ST(i). 169 unsigned getStackEntry(unsigned STi) const { 170 if (STi >= StackTop) 171 report_fatal_error("Access past stack top!"); 172 return Stack[StackTop-1-STi]; 173 } 174 175 /// getSTReg - Return the X86::ST(i) register which contains the specified 176 /// FP<RegNo> register. 177 unsigned getSTReg(unsigned RegNo) const { 178 return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; 179 } 180 181 // pushReg - Push the specified FP<n> register onto the stack. 182 void pushReg(unsigned Reg) { 183 assert(Reg < 8 && "Register number out of range!"); 184 if (StackTop >= 8) 185 report_fatal_error("Stack overflow!"); 186 Stack[StackTop] = Reg; 187 RegMap[Reg] = StackTop++; 188 } 189 190 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } 191 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { 192 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 193 if (isAtTop(RegNo)) return; 194 195 unsigned STReg = getSTReg(RegNo); 196 unsigned RegOnTop = getStackEntry(0); 197 198 // Swap the slots the regs are in. 199 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 200 201 // Swap stack slot contents. 202 if (RegMap[RegOnTop] >= StackTop) 203 report_fatal_error("Access past stack top!"); 204 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 205 206 // Emit an fxch to update the runtime processors version of the state. 207 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); 208 ++NumFXCH; 209 } 210 211 void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { 212 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 213 unsigned STReg = getSTReg(RegNo); 214 pushReg(AsReg); // New register on top of stack 215 216 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); 217 } 218 219 /// popStackAfter - Pop the current value off of the top of the FP stack 220 /// after the specified instruction. 221 void popStackAfter(MachineBasicBlock::iterator &I); 222 223 /// freeStackSlotAfter - Free the specified register from the register 224 /// stack, so that it is no longer in a register. If the register is 225 /// currently at the top of the stack, we just pop the current instruction, 226 /// otherwise we store the current top-of-stack into the specified slot, 227 /// then pop the top of stack. 228 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); 229 230 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted 231 /// instruction. 232 MachineBasicBlock::iterator 233 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); 234 235 /// Adjust the live registers to be the set in Mask. 236 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); 237 238 /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is 239 /// st(0), FP reg FixStack[1] is st(1) etc. 240 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, 241 MachineBasicBlock::iterator I); 242 243 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 244 245 void handleZeroArgFP(MachineBasicBlock::iterator &I); 246 void handleOneArgFP(MachineBasicBlock::iterator &I); 247 void handleOneArgFPRW(MachineBasicBlock::iterator &I); 248 void handleTwoArgFP(MachineBasicBlock::iterator &I); 249 void handleCompareFP(MachineBasicBlock::iterator &I); 250 void handleCondMovFP(MachineBasicBlock::iterator &I); 251 void handleSpecialFP(MachineBasicBlock::iterator &I); 252 253 bool translateCopy(MachineInstr*); 254 }; 255 char FPS::ID = 0; 256} 257 258FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } 259 260/// getFPReg - Return the X86::FPx register number for the specified operand. 261/// For example, this returns 3 for X86::FP3. 262static unsigned getFPReg(const MachineOperand &MO) { 263 assert(MO.isReg() && "Expected an FP register!"); 264 unsigned Reg = MO.getReg(); 265 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); 266 return Reg - X86::FP0; 267} 268 269/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP 270/// register references into FP stack references. 271/// 272bool FPS::runOnMachineFunction(MachineFunction &MF) { 273 // We only need to run this pass if there are any FP registers used in this 274 // function. If it is all integer, there is nothing for us to do! 275 bool FPIsUsed = false; 276 277 assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); 278 for (unsigned i = 0; i <= 6; ++i) 279 if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { 280 FPIsUsed = true; 281 break; 282 } 283 284 // Early exit. 285 if (!FPIsUsed) return false; 286 287 TII = MF.getTarget().getInstrInfo(); 288 289 // Prepare cross-MBB liveness. 290 bundleCFG(MF); 291 292 StackTop = 0; 293 294 // Process the function in depth first order so that we process at least one 295 // of the predecessors for every reachable block in the function. 296 SmallPtrSet<MachineBasicBlock*, 8> Processed; 297 MachineBasicBlock *Entry = MF.begin(); 298 299 bool Changed = false; 300 for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> > 301 I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed); 302 I != E; ++I) 303 Changed |= processBasicBlock(MF, **I); 304 305 // Process any unreachable blocks in arbitrary order now. 306 if (MF.size() != Processed.size()) 307 for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) 308 if (Processed.insert(BB)) 309 Changed |= processBasicBlock(MF, *BB); 310 311 BlockBundle.clear(); 312 LiveBundles.clear(); 313 314 return Changed; 315} 316 317/// bundleCFG - Scan all the basic blocks to determine consistent live-in and 318/// live-out sets for the FP registers. Consistent means that the set of 319/// registers live-out from a block is identical to the live-in set of all 320/// successors. This is not enforced by the normal live-in lists since 321/// registers may be implicitly defined, or not used by all successors. 322void FPS::bundleCFG(MachineFunction &MF) { 323 assert(LiveBundles.empty() && "Stale data in LiveBundles"); 324 assert(BlockBundle.empty() && "Stale data in BlockBundle"); 325 SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp; 326 327 // LiveBundle[0] is the empty live-in set. 328 LiveBundles.resize(1); 329 330 // First gather the actual live-in masks for all MBBs. 331 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 332 MachineBasicBlock *MBB = I; 333 const unsigned Mask = calcLiveInMask(MBB); 334 if (!Mask) 335 continue; 336 // Ingoing bundle index. 337 unsigned &Idx = BlockBundle[MBB].first; 338 // Already assigned an ingoing bundle? 339 if (Idx) 340 continue; 341 // Allocate a new LiveBundle struct for this block's live-ins. 342 const unsigned BundleIdx = Idx = LiveBundles.size(); 343 DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#" 344 << MBB->getNumber()); 345 LiveBundles.push_back(Mask); 346 LiveBundle &Bundle = LiveBundles.back(); 347 348 // Make sure all predecessors have the same live-out set. 349 PropUp.insert(MBB); 350 351 // Keep pushing liveness up and down the CFG until convergence. 352 // Only critical edges cause iteration here, but when they do, multiple 353 // blocks can be assigned to the same LiveBundle index. 354 do { 355 // Assign BundleIdx as liveout from predecessors in PropUp. 356 for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(), 357 E = PropUp.end(); I != E; ++I) { 358 MachineBasicBlock *MBB = *I; 359 for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(), 360 LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) { 361 MachineBasicBlock *PredMBB = *LinkI; 362 // PredMBB's liveout bundle should be set to LIIdx. 363 unsigned &Idx = BlockBundle[PredMBB].second; 364 if (Idx) { 365 assert(Idx == BundleIdx && "Inconsistent CFG"); 366 continue; 367 } 368 Idx = BundleIdx; 369 DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber()); 370 // Propagate to siblings. 371 if (PredMBB->succ_size() > 1) 372 PropDown.insert(PredMBB); 373 } 374 } 375 PropUp.clear(); 376 377 // Assign BundleIdx as livein to successors in PropDown. 378 for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(), 379 E = PropDown.end(); I != E; ++I) { 380 MachineBasicBlock *MBB = *I; 381 for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(), 382 LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) { 383 MachineBasicBlock *SuccMBB = *LinkI; 384 // LinkMBB's livein bundle should be set to BundleIdx. 385 unsigned &Idx = BlockBundle[SuccMBB].first; 386 if (Idx) { 387 assert(Idx == BundleIdx && "Inconsistent CFG"); 388 continue; 389 } 390 Idx = BundleIdx; 391 DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber()); 392 // Propagate to siblings. 393 if (SuccMBB->pred_size() > 1) 394 PropUp.insert(SuccMBB); 395 // Also accumulate the bundle liveness mask from the liveins here. 396 Bundle.Mask |= calcLiveInMask(SuccMBB); 397 } 398 } 399 PropDown.clear(); 400 } while (!PropUp.empty()); 401 DEBUG({ 402 dbgs() << " live:"; 403 for (unsigned i = 0; i < 8; ++i) 404 if (Bundle.Mask & (1<<i)) 405 dbgs() << " %FP" << i; 406 dbgs() << '\n'; 407 }); 408 } 409} 410 411/// processBasicBlock - Loop over all of the instructions in the basic block, 412/// transforming FP instructions into their stack form. 413/// 414bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { 415 bool Changed = false; 416 MBB = &BB; 417 418 setupBlockStack(); 419 420 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 421 MachineInstr *MI = I; 422 uint64_t Flags = MI->getDesc().TSFlags; 423 424 unsigned FPInstClass = Flags & X86II::FPTypeMask; 425 if (MI->isInlineAsm()) 426 FPInstClass = X86II::SpecialFP; 427 428 if (MI->isCopy() && translateCopy(MI)) 429 FPInstClass = X86II::SpecialFP; 430 431 if (FPInstClass == X86II::NotFP) 432 continue; // Efficiently ignore non-fp insts! 433 434 MachineInstr *PrevMI = 0; 435 if (I != BB.begin()) 436 PrevMI = prior(I); 437 438 ++NumFP; // Keep track of # of pseudo instrs 439 DEBUG(dbgs() << "\nFPInst:\t" << *MI); 440 441 // Get dead variables list now because the MI pointer may be deleted as part 442 // of processing! 443 SmallVector<unsigned, 8> DeadRegs; 444 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 445 const MachineOperand &MO = MI->getOperand(i); 446 if (MO.isReg() && MO.isDead()) 447 DeadRegs.push_back(MO.getReg()); 448 } 449 450 switch (FPInstClass) { 451 case X86II::ZeroArgFP: handleZeroArgFP(I); break; 452 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) 453 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) 454 case X86II::TwoArgFP: handleTwoArgFP(I); break; 455 case X86II::CompareFP: handleCompareFP(I); break; 456 case X86II::CondMovFP: handleCondMovFP(I); break; 457 case X86II::SpecialFP: handleSpecialFP(I); break; 458 default: llvm_unreachable("Unknown FP Type!"); 459 } 460 461 // Check to see if any of the values defined by this instruction are dead 462 // after definition. If so, pop them. 463 for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { 464 unsigned Reg = DeadRegs[i]; 465 if (Reg >= X86::FP0 && Reg <= X86::FP6) { 466 DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); 467 freeStackSlotAfter(I, Reg-X86::FP0); 468 } 469 } 470 471 // Print out all of the instructions expanded to if -debug 472 DEBUG( 473 MachineBasicBlock::iterator PrevI(PrevMI); 474 if (I == PrevI) { 475 dbgs() << "Just deleted pseudo instruction\n"; 476 } else { 477 MachineBasicBlock::iterator Start = I; 478 // Rewind to first instruction newly inserted. 479 while (Start != BB.begin() && prior(Start) != PrevI) --Start; 480 dbgs() << "Inserted instructions:\n\t"; 481 Start->print(dbgs(), &MF.getTarget()); 482 while (++Start != llvm::next(I)) {} 483 } 484 dumpStack(); 485 ); 486 487 Changed = true; 488 } 489 490 finishBlockStack(); 491 492 return Changed; 493} 494 495/// setupBlockStack - Use the BlockBundle map to set up our model of the stack 496/// to match predecessors' live out stack. 497void FPS::setupBlockStack() { 498 DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() 499 << " derived from " << MBB->getName() << ".\n"); 500 StackTop = 0; 501 const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first]; 502 503 if (!Bundle.Mask) { 504 DEBUG(dbgs() << "Block has no FP live-ins.\n"); 505 return; 506 } 507 508 // Depth-first iteration should ensure that we always have an assigned stack. 509 assert(Bundle.isFixed() && "Reached block before any predecessors"); 510 511 // Push the fixed live-in registers. 512 for (unsigned i = Bundle.FixCount; i > 0; --i) { 513 MBB->addLiveIn(X86::ST0+i-1); 514 DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" 515 << unsigned(Bundle.FixStack[i-1]) << '\n'); 516 pushReg(Bundle.FixStack[i-1]); 517 } 518 519 // Kill off unwanted live-ins. This can happen with a critical edge. 520 // FIXME: We could keep these live registers around as zombies. They may need 521 // to be revived at the end of a short block. It might save a few instrs. 522 adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); 523 DEBUG(MBB->dump()); 524} 525 526/// finishBlockStack - Revive live-outs that are implicitly defined out of 527/// MBB. Shuffle live registers to match the expected fixed stack of any 528/// predecessors, and ensure that all predecessors are expecting the same 529/// stack. 530void FPS::finishBlockStack() { 531 // The RET handling below takes care of return blocks for us. 532 if (MBB->succ_empty()) 533 return; 534 535 DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() 536 << " derived from " << MBB->getName() << ".\n"); 537 538 unsigned BundleIdx = BlockBundle.lookup(MBB).second; 539 LiveBundle &Bundle = LiveBundles[BundleIdx]; 540 541 // We may need to kill and define some registers to match successors. 542 // FIXME: This can probably be combined with the shuffle below. 543 MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); 544 adjustLiveRegs(Bundle.Mask, Term); 545 546 if (!Bundle.Mask) { 547 DEBUG(dbgs() << "No live-outs.\n"); 548 return; 549 } 550 551 // Has the stack order been fixed yet? 552 DEBUG(dbgs() << "LB#" << BundleIdx << ": "); 553 if (Bundle.isFixed()) { 554 DEBUG(dbgs() << "Shuffling stack to match.\n"); 555 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); 556 } else { 557 // Not fixed yet, we get to choose. 558 DEBUG(dbgs() << "Fixing stack order now.\n"); 559 Bundle.FixCount = StackTop; 560 for (unsigned i = 0; i < StackTop; ++i) 561 Bundle.FixStack[i] = getStackEntry(i); 562 } 563} 564 565 566//===----------------------------------------------------------------------===// 567// Efficient Lookup Table Support 568//===----------------------------------------------------------------------===// 569 570namespace { 571 struct TableEntry { 572 unsigned from; 573 unsigned to; 574 bool operator<(const TableEntry &TE) const { return from < TE.from; } 575 friend bool operator<(const TableEntry &TE, unsigned V) { 576 return TE.from < V; 577 } 578 friend bool ATTRIBUTE_USED operator<(unsigned V, const TableEntry &TE) { 579 return V < TE.from; 580 } 581 }; 582} 583 584#ifndef NDEBUG 585static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { 586 for (unsigned i = 0; i != NumEntries-1; ++i) 587 if (!(Table[i] < Table[i+1])) return false; 588 return true; 589} 590#endif 591 592static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { 593 const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); 594 if (I != Table+N && I->from == Opcode) 595 return I->to; 596 return -1; 597} 598 599#ifdef NDEBUG 600#define ASSERT_SORTED(TABLE) 601#else 602#define ASSERT_SORTED(TABLE) \ 603 { static bool TABLE##Checked = false; \ 604 if (!TABLE##Checked) { \ 605 assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ 606 "All lookup tables must be sorted for efficient access!"); \ 607 TABLE##Checked = true; \ 608 } \ 609 } 610#endif 611 612//===----------------------------------------------------------------------===// 613// Register File -> Register Stack Mapping Methods 614//===----------------------------------------------------------------------===// 615 616// OpcodeTable - Sorted map of register instructions to their stack version. 617// The first element is an register file pseudo instruction, the second is the 618// concrete X86 instruction which uses the register stack. 619// 620static const TableEntry OpcodeTable[] = { 621 { X86::ABS_Fp32 , X86::ABS_F }, 622 { X86::ABS_Fp64 , X86::ABS_F }, 623 { X86::ABS_Fp80 , X86::ABS_F }, 624 { X86::ADD_Fp32m , X86::ADD_F32m }, 625 { X86::ADD_Fp64m , X86::ADD_F64m }, 626 { X86::ADD_Fp64m32 , X86::ADD_F32m }, 627 { X86::ADD_Fp80m32 , X86::ADD_F32m }, 628 { X86::ADD_Fp80m64 , X86::ADD_F64m }, 629 { X86::ADD_FpI16m32 , X86::ADD_FI16m }, 630 { X86::ADD_FpI16m64 , X86::ADD_FI16m }, 631 { X86::ADD_FpI16m80 , X86::ADD_FI16m }, 632 { X86::ADD_FpI32m32 , X86::ADD_FI32m }, 633 { X86::ADD_FpI32m64 , X86::ADD_FI32m }, 634 { X86::ADD_FpI32m80 , X86::ADD_FI32m }, 635 { X86::CHS_Fp32 , X86::CHS_F }, 636 { X86::CHS_Fp64 , X86::CHS_F }, 637 { X86::CHS_Fp80 , X86::CHS_F }, 638 { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, 639 { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, 640 { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, 641 { X86::CMOVB_Fp32 , X86::CMOVB_F }, 642 { X86::CMOVB_Fp64 , X86::CMOVB_F }, 643 { X86::CMOVB_Fp80 , X86::CMOVB_F }, 644 { X86::CMOVE_Fp32 , X86::CMOVE_F }, 645 { X86::CMOVE_Fp64 , X86::CMOVE_F }, 646 { X86::CMOVE_Fp80 , X86::CMOVE_F }, 647 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, 648 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, 649 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, 650 { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, 651 { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, 652 { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, 653 { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, 654 { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, 655 { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, 656 { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, 657 { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, 658 { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, 659 { X86::CMOVP_Fp32 , X86::CMOVP_F }, 660 { X86::CMOVP_Fp64 , X86::CMOVP_F }, 661 { X86::CMOVP_Fp80 , X86::CMOVP_F }, 662 { X86::COS_Fp32 , X86::COS_F }, 663 { X86::COS_Fp64 , X86::COS_F }, 664 { X86::COS_Fp80 , X86::COS_F }, 665 { X86::DIVR_Fp32m , X86::DIVR_F32m }, 666 { X86::DIVR_Fp64m , X86::DIVR_F64m }, 667 { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, 668 { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, 669 { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, 670 { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, 671 { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, 672 { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, 673 { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, 674 { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, 675 { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, 676 { X86::DIV_Fp32m , X86::DIV_F32m }, 677 { X86::DIV_Fp64m , X86::DIV_F64m }, 678 { X86::DIV_Fp64m32 , X86::DIV_F32m }, 679 { X86::DIV_Fp80m32 , X86::DIV_F32m }, 680 { X86::DIV_Fp80m64 , X86::DIV_F64m }, 681 { X86::DIV_FpI16m32 , X86::DIV_FI16m }, 682 { X86::DIV_FpI16m64 , X86::DIV_FI16m }, 683 { X86::DIV_FpI16m80 , X86::DIV_FI16m }, 684 { X86::DIV_FpI32m32 , X86::DIV_FI32m }, 685 { X86::DIV_FpI32m64 , X86::DIV_FI32m }, 686 { X86::DIV_FpI32m80 , X86::DIV_FI32m }, 687 { X86::ILD_Fp16m32 , X86::ILD_F16m }, 688 { X86::ILD_Fp16m64 , X86::ILD_F16m }, 689 { X86::ILD_Fp16m80 , X86::ILD_F16m }, 690 { X86::ILD_Fp32m32 , X86::ILD_F32m }, 691 { X86::ILD_Fp32m64 , X86::ILD_F32m }, 692 { X86::ILD_Fp32m80 , X86::ILD_F32m }, 693 { X86::ILD_Fp64m32 , X86::ILD_F64m }, 694 { X86::ILD_Fp64m64 , X86::ILD_F64m }, 695 { X86::ILD_Fp64m80 , X86::ILD_F64m }, 696 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, 697 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, 698 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, 699 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, 700 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, 701 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, 702 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, 703 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, 704 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, 705 { X86::IST_Fp16m32 , X86::IST_F16m }, 706 { X86::IST_Fp16m64 , X86::IST_F16m }, 707 { X86::IST_Fp16m80 , X86::IST_F16m }, 708 { X86::IST_Fp32m32 , X86::IST_F32m }, 709 { X86::IST_Fp32m64 , X86::IST_F32m }, 710 { X86::IST_Fp32m80 , X86::IST_F32m }, 711 { X86::IST_Fp64m32 , X86::IST_FP64m }, 712 { X86::IST_Fp64m64 , X86::IST_FP64m }, 713 { X86::IST_Fp64m80 , X86::IST_FP64m }, 714 { X86::LD_Fp032 , X86::LD_F0 }, 715 { X86::LD_Fp064 , X86::LD_F0 }, 716 { X86::LD_Fp080 , X86::LD_F0 }, 717 { X86::LD_Fp132 , X86::LD_F1 }, 718 { X86::LD_Fp164 , X86::LD_F1 }, 719 { X86::LD_Fp180 , X86::LD_F1 }, 720 { X86::LD_Fp32m , X86::LD_F32m }, 721 { X86::LD_Fp32m64 , X86::LD_F32m }, 722 { X86::LD_Fp32m80 , X86::LD_F32m }, 723 { X86::LD_Fp64m , X86::LD_F64m }, 724 { X86::LD_Fp64m80 , X86::LD_F64m }, 725 { X86::LD_Fp80m , X86::LD_F80m }, 726 { X86::MUL_Fp32m , X86::MUL_F32m }, 727 { X86::MUL_Fp64m , X86::MUL_F64m }, 728 { X86::MUL_Fp64m32 , X86::MUL_F32m }, 729 { X86::MUL_Fp80m32 , X86::MUL_F32m }, 730 { X86::MUL_Fp80m64 , X86::MUL_F64m }, 731 { X86::MUL_FpI16m32 , X86::MUL_FI16m }, 732 { X86::MUL_FpI16m64 , X86::MUL_FI16m }, 733 { X86::MUL_FpI16m80 , X86::MUL_FI16m }, 734 { X86::MUL_FpI32m32 , X86::MUL_FI32m }, 735 { X86::MUL_FpI32m64 , X86::MUL_FI32m }, 736 { X86::MUL_FpI32m80 , X86::MUL_FI32m }, 737 { X86::SIN_Fp32 , X86::SIN_F }, 738 { X86::SIN_Fp64 , X86::SIN_F }, 739 { X86::SIN_Fp80 , X86::SIN_F }, 740 { X86::SQRT_Fp32 , X86::SQRT_F }, 741 { X86::SQRT_Fp64 , X86::SQRT_F }, 742 { X86::SQRT_Fp80 , X86::SQRT_F }, 743 { X86::ST_Fp32m , X86::ST_F32m }, 744 { X86::ST_Fp64m , X86::ST_F64m }, 745 { X86::ST_Fp64m32 , X86::ST_F32m }, 746 { X86::ST_Fp80m32 , X86::ST_F32m }, 747 { X86::ST_Fp80m64 , X86::ST_F64m }, 748 { X86::ST_FpP80m , X86::ST_FP80m }, 749 { X86::SUBR_Fp32m , X86::SUBR_F32m }, 750 { X86::SUBR_Fp64m , X86::SUBR_F64m }, 751 { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, 752 { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, 753 { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, 754 { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, 755 { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, 756 { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, 757 { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, 758 { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, 759 { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, 760 { X86::SUB_Fp32m , X86::SUB_F32m }, 761 { X86::SUB_Fp64m , X86::SUB_F64m }, 762 { X86::SUB_Fp64m32 , X86::SUB_F32m }, 763 { X86::SUB_Fp80m32 , X86::SUB_F32m }, 764 { X86::SUB_Fp80m64 , X86::SUB_F64m }, 765 { X86::SUB_FpI16m32 , X86::SUB_FI16m }, 766 { X86::SUB_FpI16m64 , X86::SUB_FI16m }, 767 { X86::SUB_FpI16m80 , X86::SUB_FI16m }, 768 { X86::SUB_FpI32m32 , X86::SUB_FI32m }, 769 { X86::SUB_FpI32m64 , X86::SUB_FI32m }, 770 { X86::SUB_FpI32m80 , X86::SUB_FI32m }, 771 { X86::TST_Fp32 , X86::TST_F }, 772 { X86::TST_Fp64 , X86::TST_F }, 773 { X86::TST_Fp80 , X86::TST_F }, 774 { X86::UCOM_FpIr32 , X86::UCOM_FIr }, 775 { X86::UCOM_FpIr64 , X86::UCOM_FIr }, 776 { X86::UCOM_FpIr80 , X86::UCOM_FIr }, 777 { X86::UCOM_Fpr32 , X86::UCOM_Fr }, 778 { X86::UCOM_Fpr64 , X86::UCOM_Fr }, 779 { X86::UCOM_Fpr80 , X86::UCOM_Fr }, 780}; 781 782static unsigned getConcreteOpcode(unsigned Opcode) { 783 ASSERT_SORTED(OpcodeTable); 784 int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); 785 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); 786 return Opc; 787} 788 789//===----------------------------------------------------------------------===// 790// Helper Methods 791//===----------------------------------------------------------------------===// 792 793// PopTable - Sorted map of instructions to their popping version. The first 794// element is an instruction, the second is the version which pops. 795// 796static const TableEntry PopTable[] = { 797 { X86::ADD_FrST0 , X86::ADD_FPrST0 }, 798 799 { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, 800 { X86::DIV_FrST0 , X86::DIV_FPrST0 }, 801 802 { X86::IST_F16m , X86::IST_FP16m }, 803 { X86::IST_F32m , X86::IST_FP32m }, 804 805 { X86::MUL_FrST0 , X86::MUL_FPrST0 }, 806 807 { X86::ST_F32m , X86::ST_FP32m }, 808 { X86::ST_F64m , X86::ST_FP64m }, 809 { X86::ST_Frr , X86::ST_FPrr }, 810 811 { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, 812 { X86::SUB_FrST0 , X86::SUB_FPrST0 }, 813 814 { X86::UCOM_FIr , X86::UCOM_FIPr }, 815 816 { X86::UCOM_FPr , X86::UCOM_FPPr }, 817 { X86::UCOM_Fr , X86::UCOM_FPr }, 818}; 819 820/// popStackAfter - Pop the current value off of the top of the FP stack after 821/// the specified instruction. This attempts to be sneaky and combine the pop 822/// into the instruction itself if possible. The iterator is left pointing to 823/// the last instruction, be it a new pop instruction inserted, or the old 824/// instruction if it was modified in place. 825/// 826void FPS::popStackAfter(MachineBasicBlock::iterator &I) { 827 MachineInstr* MI = I; 828 DebugLoc dl = MI->getDebugLoc(); 829 ASSERT_SORTED(PopTable); 830 if (StackTop == 0) 831 report_fatal_error("Cannot pop empty stack!"); 832 RegMap[Stack[--StackTop]] = ~0; // Update state 833 834 // Check to see if there is a popping version of this instruction... 835 int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); 836 if (Opcode != -1) { 837 I->setDesc(TII->get(Opcode)); 838 if (Opcode == X86::UCOM_FPPr) 839 I->RemoveOperand(0); 840 } else { // Insert an explicit pop 841 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); 842 } 843} 844 845/// freeStackSlotAfter - Free the specified register from the register stack, so 846/// that it is no longer in a register. If the register is currently at the top 847/// of the stack, we just pop the current instruction, otherwise we store the 848/// current top-of-stack into the specified slot, then pop the top of stack. 849void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { 850 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. 851 popStackAfter(I); 852 return; 853 } 854 855 // Otherwise, store the top of stack into the dead slot, killing the operand 856 // without having to add in an explicit xchg then pop. 857 // 858 I = freeStackSlotBefore(++I, FPRegNo); 859} 860 861/// freeStackSlotBefore - Free the specified register without trying any 862/// folding. 863MachineBasicBlock::iterator 864FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { 865 unsigned STReg = getSTReg(FPRegNo); 866 unsigned OldSlot = getSlot(FPRegNo); 867 unsigned TopReg = Stack[StackTop-1]; 868 Stack[OldSlot] = TopReg; 869 RegMap[TopReg] = OldSlot; 870 RegMap[FPRegNo] = ~0; 871 Stack[--StackTop] = ~0; 872 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg); 873} 874 875/// adjustLiveRegs - Kill and revive registers such that exactly the FP 876/// registers with a bit in Mask are live. 877void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { 878 unsigned Defs = Mask; 879 unsigned Kills = 0; 880 for (unsigned i = 0; i < StackTop; ++i) { 881 unsigned RegNo = Stack[i]; 882 if (!(Defs & (1 << RegNo))) 883 // This register is live, but we don't want it. 884 Kills |= (1 << RegNo); 885 else 886 // We don't need to imp-def this live register. 887 Defs &= ~(1 << RegNo); 888 } 889 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); 890 891 // Produce implicit-defs for free by using killed registers. 892 while (Kills && Defs) { 893 unsigned KReg = CountTrailingZeros_32(Kills); 894 unsigned DReg = CountTrailingZeros_32(Defs); 895 DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); 896 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); 897 std::swap(RegMap[KReg], RegMap[DReg]); 898 Kills &= ~(1 << KReg); 899 Defs &= ~(1 << DReg); 900 } 901 902 // Kill registers by popping. 903 if (Kills && I != MBB->begin()) { 904 MachineBasicBlock::iterator I2 = llvm::prior(I); 905 for (;;) { 906 unsigned KReg = getStackEntry(0); 907 if (!(Kills & (1 << KReg))) 908 break; 909 DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); 910 popStackAfter(I2); 911 Kills &= ~(1 << KReg); 912 } 913 } 914 915 // Manually kill the rest. 916 while (Kills) { 917 unsigned KReg = CountTrailingZeros_32(Kills); 918 DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); 919 freeStackSlotBefore(I, KReg); 920 Kills &= ~(1 << KReg); 921 } 922 923 // Load zeros for all the imp-defs. 924 while(Defs) { 925 unsigned DReg = CountTrailingZeros_32(Defs); 926 DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); 927 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); 928 pushReg(DReg); 929 Defs &= ~(1 << DReg); 930 } 931 932 // Now we should have the correct registers live. 933 DEBUG(dumpStack()); 934 assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch"); 935} 936 937/// shuffleStackTop - emit fxch instructions before I to shuffle the top 938/// FixCount entries into the order given by FixStack. 939/// FIXME: Is there a better algorithm than insertion sort? 940void FPS::shuffleStackTop(const unsigned char *FixStack, 941 unsigned FixCount, 942 MachineBasicBlock::iterator I) { 943 // Move items into place, starting from the desired stack bottom. 944 while (FixCount--) { 945 // Old register at position FixCount. 946 unsigned OldReg = getStackEntry(FixCount); 947 // Desired register at position FixCount. 948 unsigned Reg = FixStack[FixCount]; 949 if (Reg == OldReg) 950 continue; 951 // (Reg st0) (OldReg st0) = (Reg OldReg st0) 952 moveToTop(Reg, I); 953 moveToTop(OldReg, I); 954 } 955 DEBUG(dumpStack()); 956} 957 958 959//===----------------------------------------------------------------------===// 960// Instruction transformation implementation 961//===----------------------------------------------------------------------===// 962 963/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> 964/// 965void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { 966 MachineInstr *MI = I; 967 unsigned DestReg = getFPReg(MI->getOperand(0)); 968 969 // Change from the pseudo instruction to the concrete instruction. 970 MI->RemoveOperand(0); // Remove the explicit ST(0) operand 971 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 972 973 // Result gets pushed on the stack. 974 pushReg(DestReg); 975} 976 977/// handleOneArgFP - fst <mem>, ST(0) 978/// 979void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { 980 MachineInstr *MI = I; 981 unsigned NumOps = MI->getDesc().getNumOperands(); 982 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && 983 "Can only handle fst* & ftst instructions!"); 984 985 // Is this the last use of the source register? 986 unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); 987 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 988 989 // FISTP64m is strange because there isn't a non-popping versions. 990 // If we have one _and_ we don't want to pop the operand, duplicate the value 991 // on the stack instead of moving it. This ensure that popping the value is 992 // always ok. 993 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. 994 // 995 if (!KillsSrc && 996 (MI->getOpcode() == X86::IST_Fp64m32 || 997 MI->getOpcode() == X86::ISTT_Fp16m32 || 998 MI->getOpcode() == X86::ISTT_Fp32m32 || 999 MI->getOpcode() == X86::ISTT_Fp64m32 || 1000 MI->getOpcode() == X86::IST_Fp64m64 || 1001 MI->getOpcode() == X86::ISTT_Fp16m64 || 1002 MI->getOpcode() == X86::ISTT_Fp32m64 || 1003 MI->getOpcode() == X86::ISTT_Fp64m64 || 1004 MI->getOpcode() == X86::IST_Fp64m80 || 1005 MI->getOpcode() == X86::ISTT_Fp16m80 || 1006 MI->getOpcode() == X86::ISTT_Fp32m80 || 1007 MI->getOpcode() == X86::ISTT_Fp64m80 || 1008 MI->getOpcode() == X86::ST_FpP80m)) { 1009 duplicateToTop(Reg, getScratchReg(), I); 1010 } else { 1011 moveToTop(Reg, I); // Move to the top of the stack... 1012 } 1013 1014 // Convert from the pseudo instruction to the concrete instruction. 1015 MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand 1016 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1017 1018 if (MI->getOpcode() == X86::IST_FP64m || 1019 MI->getOpcode() == X86::ISTT_FP16m || 1020 MI->getOpcode() == X86::ISTT_FP32m || 1021 MI->getOpcode() == X86::ISTT_FP64m || 1022 MI->getOpcode() == X86::ST_FP80m) { 1023 if (StackTop == 0) 1024 report_fatal_error("Stack empty??"); 1025 --StackTop; 1026 } else if (KillsSrc) { // Last use of operand? 1027 popStackAfter(I); 1028 } 1029} 1030 1031 1032/// handleOneArgFPRW: Handle instructions that read from the top of stack and 1033/// replace the value with a newly computed value. These instructions may have 1034/// non-fp operands after their FP operands. 1035/// 1036/// Examples: 1037/// R1 = fchs R2 1038/// R1 = fadd R2, [mem] 1039/// 1040void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { 1041 MachineInstr *MI = I; 1042#ifndef NDEBUG 1043 unsigned NumOps = MI->getDesc().getNumOperands(); 1044 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); 1045#endif 1046 1047 // Is this the last use of the source register? 1048 unsigned Reg = getFPReg(MI->getOperand(1)); 1049 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 1050 1051 if (KillsSrc) { 1052 // If this is the last use of the source register, just make sure it's on 1053 // the top of the stack. 1054 moveToTop(Reg, I); 1055 if (StackTop == 0) 1056 report_fatal_error("Stack cannot be empty!"); 1057 --StackTop; 1058 pushReg(getFPReg(MI->getOperand(0))); 1059 } else { 1060 // If this is not the last use of the source register, _copy_ it to the top 1061 // of the stack. 1062 duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); 1063 } 1064 1065 // Change from the pseudo instruction to the concrete instruction. 1066 MI->RemoveOperand(1); // Drop the source operand. 1067 MI->RemoveOperand(0); // Drop the destination operand. 1068 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1069} 1070 1071 1072//===----------------------------------------------------------------------===// 1073// Define tables of various ways to map pseudo instructions 1074// 1075 1076// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) 1077static const TableEntry ForwardST0Table[] = { 1078 { X86::ADD_Fp32 , X86::ADD_FST0r }, 1079 { X86::ADD_Fp64 , X86::ADD_FST0r }, 1080 { X86::ADD_Fp80 , X86::ADD_FST0r }, 1081 { X86::DIV_Fp32 , X86::DIV_FST0r }, 1082 { X86::DIV_Fp64 , X86::DIV_FST0r }, 1083 { X86::DIV_Fp80 , X86::DIV_FST0r }, 1084 { X86::MUL_Fp32 , X86::MUL_FST0r }, 1085 { X86::MUL_Fp64 , X86::MUL_FST0r }, 1086 { X86::MUL_Fp80 , X86::MUL_FST0r }, 1087 { X86::SUB_Fp32 , X86::SUB_FST0r }, 1088 { X86::SUB_Fp64 , X86::SUB_FST0r }, 1089 { X86::SUB_Fp80 , X86::SUB_FST0r }, 1090}; 1091 1092// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) 1093static const TableEntry ReverseST0Table[] = { 1094 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative 1095 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative 1096 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative 1097 { X86::DIV_Fp32 , X86::DIVR_FST0r }, 1098 { X86::DIV_Fp64 , X86::DIVR_FST0r }, 1099 { X86::DIV_Fp80 , X86::DIVR_FST0r }, 1100 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative 1101 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative 1102 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative 1103 { X86::SUB_Fp32 , X86::SUBR_FST0r }, 1104 { X86::SUB_Fp64 , X86::SUBR_FST0r }, 1105 { X86::SUB_Fp80 , X86::SUBR_FST0r }, 1106}; 1107 1108// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) 1109static const TableEntry ForwardSTiTable[] = { 1110 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative 1111 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative 1112 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative 1113 { X86::DIV_Fp32 , X86::DIVR_FrST0 }, 1114 { X86::DIV_Fp64 , X86::DIVR_FrST0 }, 1115 { X86::DIV_Fp80 , X86::DIVR_FrST0 }, 1116 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative 1117 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative 1118 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative 1119 { X86::SUB_Fp32 , X86::SUBR_FrST0 }, 1120 { X86::SUB_Fp64 , X86::SUBR_FrST0 }, 1121 { X86::SUB_Fp80 , X86::SUBR_FrST0 }, 1122}; 1123 1124// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) 1125static const TableEntry ReverseSTiTable[] = { 1126 { X86::ADD_Fp32 , X86::ADD_FrST0 }, 1127 { X86::ADD_Fp64 , X86::ADD_FrST0 }, 1128 { X86::ADD_Fp80 , X86::ADD_FrST0 }, 1129 { X86::DIV_Fp32 , X86::DIV_FrST0 }, 1130 { X86::DIV_Fp64 , X86::DIV_FrST0 }, 1131 { X86::DIV_Fp80 , X86::DIV_FrST0 }, 1132 { X86::MUL_Fp32 , X86::MUL_FrST0 }, 1133 { X86::MUL_Fp64 , X86::MUL_FrST0 }, 1134 { X86::MUL_Fp80 , X86::MUL_FrST0 }, 1135 { X86::SUB_Fp32 , X86::SUB_FrST0 }, 1136 { X86::SUB_Fp64 , X86::SUB_FrST0 }, 1137 { X86::SUB_Fp80 , X86::SUB_FrST0 }, 1138}; 1139 1140 1141/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual 1142/// instructions which need to be simplified and possibly transformed. 1143/// 1144/// Result: ST(0) = fsub ST(0), ST(i) 1145/// ST(i) = fsub ST(0), ST(i) 1146/// ST(0) = fsubr ST(0), ST(i) 1147/// ST(i) = fsubr ST(0), ST(i) 1148/// 1149void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { 1150 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1151 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1152 MachineInstr *MI = I; 1153 1154 unsigned NumOperands = MI->getDesc().getNumOperands(); 1155 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); 1156 unsigned Dest = getFPReg(MI->getOperand(0)); 1157 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1158 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1159 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1160 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1161 DebugLoc dl = MI->getDebugLoc(); 1162 1163 unsigned TOS = getStackEntry(0); 1164 1165 // One of our operands must be on the top of the stack. If neither is yet, we 1166 // need to move one. 1167 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? 1168 // We can choose to move either operand to the top of the stack. If one of 1169 // the operands is killed by this instruction, we want that one so that we 1170 // can update right on top of the old version. 1171 if (KillsOp0) { 1172 moveToTop(Op0, I); // Move dead operand to TOS. 1173 TOS = Op0; 1174 } else if (KillsOp1) { 1175 moveToTop(Op1, I); 1176 TOS = Op1; 1177 } else { 1178 // All of the operands are live after this instruction executes, so we 1179 // cannot update on top of any operand. Because of this, we must 1180 // duplicate one of the stack elements to the top. It doesn't matter 1181 // which one we pick. 1182 // 1183 duplicateToTop(Op0, Dest, I); 1184 Op0 = TOS = Dest; 1185 KillsOp0 = true; 1186 } 1187 } else if (!KillsOp0 && !KillsOp1) { 1188 // If we DO have one of our operands at the top of the stack, but we don't 1189 // have a dead operand, we must duplicate one of the operands to a new slot 1190 // on the stack. 1191 duplicateToTop(Op0, Dest, I); 1192 Op0 = TOS = Dest; 1193 KillsOp0 = true; 1194 } 1195 1196 // Now we know that one of our operands is on the top of the stack, and at 1197 // least one of our operands is killed by this instruction. 1198 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && 1199 "Stack conditions not set up right!"); 1200 1201 // We decide which form to use based on what is on the top of the stack, and 1202 // which operand is killed by this instruction. 1203 const TableEntry *InstTable; 1204 bool isForward = TOS == Op0; 1205 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); 1206 if (updateST0) { 1207 if (isForward) 1208 InstTable = ForwardST0Table; 1209 else 1210 InstTable = ReverseST0Table; 1211 } else { 1212 if (isForward) 1213 InstTable = ForwardSTiTable; 1214 else 1215 InstTable = ReverseSTiTable; 1216 } 1217 1218 int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), 1219 MI->getOpcode()); 1220 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); 1221 1222 // NotTOS - The register which is not on the top of stack... 1223 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; 1224 1225 // Replace the old instruction with a new instruction 1226 MBB->remove(I++); 1227 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); 1228 1229 // If both operands are killed, pop one off of the stack in addition to 1230 // overwriting the other one. 1231 if (KillsOp0 && KillsOp1 && Op0 != Op1) { 1232 assert(!updateST0 && "Should have updated other operand!"); 1233 popStackAfter(I); // Pop the top of stack 1234 } 1235 1236 // Update stack information so that we know the destination register is now on 1237 // the stack. 1238 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); 1239 assert(UpdatedSlot < StackTop && Dest < 7); 1240 Stack[UpdatedSlot] = Dest; 1241 RegMap[Dest] = UpdatedSlot; 1242 MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction 1243} 1244 1245/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP 1246/// register arguments and no explicit destinations. 1247/// 1248void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { 1249 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1250 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1251 MachineInstr *MI = I; 1252 1253 unsigned NumOperands = MI->getDesc().getNumOperands(); 1254 assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); 1255 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1256 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1257 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1258 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1259 1260 // Make sure the first operand is on the top of stack, the other one can be 1261 // anywhere. 1262 moveToTop(Op0, I); 1263 1264 // Change from the pseudo instruction to the concrete instruction. 1265 MI->getOperand(0).setReg(getSTReg(Op1)); 1266 MI->RemoveOperand(1); 1267 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1268 1269 // If any of the operands are killed by this instruction, free them. 1270 if (KillsOp0) freeStackSlotAfter(I, Op0); 1271 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); 1272} 1273 1274/// handleCondMovFP - Handle two address conditional move instructions. These 1275/// instructions move a st(i) register to st(0) iff a condition is true. These 1276/// instructions require that the first operand is at the top of the stack, but 1277/// otherwise don't modify the stack at all. 1278void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { 1279 MachineInstr *MI = I; 1280 1281 unsigned Op0 = getFPReg(MI->getOperand(0)); 1282 unsigned Op1 = getFPReg(MI->getOperand(2)); 1283 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1284 1285 // The first operand *must* be on the top of the stack. 1286 moveToTop(Op0, I); 1287 1288 // Change the second operand to the stack register that the operand is in. 1289 // Change from the pseudo instruction to the concrete instruction. 1290 MI->RemoveOperand(0); 1291 MI->RemoveOperand(1); 1292 MI->getOperand(0).setReg(getSTReg(Op1)); 1293 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1294 1295 // If we kill the second operand, make sure to pop it from the stack. 1296 if (Op0 != Op1 && KillsOp1) { 1297 // Get this value off of the register stack. 1298 freeStackSlotAfter(I, Op1); 1299 } 1300} 1301 1302 1303/// handleSpecialFP - Handle special instructions which behave unlike other 1304/// floating point instructions. This is primarily intended for use by pseudo 1305/// instructions. 1306/// 1307void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { 1308 MachineInstr *MI = I; 1309 DebugLoc dl = MI->getDebugLoc(); 1310 switch (MI->getOpcode()) { 1311 default: llvm_unreachable("Unknown SpecialFP instruction!"); 1312 case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! 1313 case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! 1314 case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! 1315 assert(StackTop == 0 && "Stack should be empty after a call!"); 1316 pushReg(getFPReg(MI->getOperand(0))); 1317 break; 1318 case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! 1319 case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! 1320 case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! 1321 // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. 1322 // The pattern we expect is: 1323 // CALL 1324 // FP1 = FpGET_ST0 1325 // FP4 = FpGET_ST1 1326 // 1327 // At this point, we've pushed FP1 on the top of stack, so it should be 1328 // present if it isn't dead. If it was dead, we already emitted a pop to 1329 // remove it from the stack and StackTop = 0. 1330 1331 // Push FP4 as top of stack next. 1332 pushReg(getFPReg(MI->getOperand(0))); 1333 1334 // If StackTop was 0 before we pushed our operand, then ST(0) must have been 1335 // dead. In this case, the ST(1) value is the only thing that is live, so 1336 // it should be on the TOS (after the pop that was emitted) and is. Just 1337 // continue in this case. 1338 if (StackTop == 1) 1339 break; 1340 1341 // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top 1342 // elements so that our accounting is correct. 1343 unsigned RegOnTop = getStackEntry(0); 1344 unsigned RegNo = getStackEntry(1); 1345 1346 // Swap the slots the regs are in. 1347 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 1348 1349 // Swap stack slot contents. 1350 if (RegMap[RegOnTop] >= StackTop) 1351 report_fatal_error("Access past stack top!"); 1352 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 1353 break; 1354 } 1355 case X86::FpSET_ST0_32: 1356 case X86::FpSET_ST0_64: 1357 case X86::FpSET_ST0_80: { 1358 // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm 1359 // arguments that use an st constraint. We expect a sequence of 1360 // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM 1361 unsigned Op0 = getFPReg(MI->getOperand(0)); 1362 1363 if (!MI->killsRegister(X86::FP0 + Op0)) { 1364 // Duplicate Op0 into a temporary on the stack top. 1365 duplicateToTop(Op0, getScratchReg(), I); 1366 } else { 1367 // Op0 is killed, so just swap it into position. 1368 moveToTop(Op0, I); 1369 } 1370 --StackTop; // "Forget" we have something on the top of stack! 1371 break; 1372 } 1373 case X86::FpSET_ST1_32: 1374 case X86::FpSET_ST1_64: 1375 case X86::FpSET_ST1_80: { 1376 // Set up st(1) for inline asm. We are assuming that st(0) has already been 1377 // set up by FpSET_ST0, and our StackTop is off by one because of it. 1378 unsigned Op0 = getFPReg(MI->getOperand(0)); 1379 // Restore the actual StackTop from before Fp_SET_ST0. 1380 // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we 1381 // are not enforcing the constraint. 1382 ++StackTop; 1383 unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). 1384 if (!MI->killsRegister(X86::FP0 + Op0)) { 1385 duplicateToTop(Op0, getScratchReg(), I); 1386 moveToTop(RegOnTop, I); 1387 } else if (getSTReg(Op0) != X86::ST1) { 1388 // We have the wrong value at st(1). Shuffle! Untested! 1389 moveToTop(getStackEntry(1), I); 1390 moveToTop(Op0, I); 1391 moveToTop(RegOnTop, I); 1392 } 1393 assert(StackTop >= 2 && "Too few live registers"); 1394 StackTop -= 2; // "Forget" both st(0) and st(1). 1395 break; 1396 } 1397 case X86::MOV_Fp3232: 1398 case X86::MOV_Fp3264: 1399 case X86::MOV_Fp6432: 1400 case X86::MOV_Fp6464: 1401 case X86::MOV_Fp3280: 1402 case X86::MOV_Fp6480: 1403 case X86::MOV_Fp8032: 1404 case X86::MOV_Fp8064: 1405 case X86::MOV_Fp8080: { 1406 const MachineOperand &MO1 = MI->getOperand(1); 1407 unsigned SrcReg = getFPReg(MO1); 1408 1409 const MachineOperand &MO0 = MI->getOperand(0); 1410 unsigned DestReg = getFPReg(MO0); 1411 if (MI->killsRegister(X86::FP0+SrcReg)) { 1412 // If the input operand is killed, we can just change the owner of the 1413 // incoming stack slot into the result. 1414 unsigned Slot = getSlot(SrcReg); 1415 assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); 1416 Stack[Slot] = DestReg; 1417 RegMap[DestReg] = Slot; 1418 1419 } else { 1420 // For FMOV we just duplicate the specified value to a new stack slot. 1421 // This could be made better, but would require substantial changes. 1422 duplicateToTop(SrcReg, DestReg, I); 1423 } 1424 } 1425 break; 1426 case TargetOpcode::INLINEASM: { 1427 // The inline asm MachineInstr currently only *uses* FP registers for the 1428 // 'f' constraint. These should be turned into the current ST(x) register 1429 // in the machine instr. Also, any kills should be explicitly popped after 1430 // the inline asm. 1431 unsigned Kills = 0; 1432 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1433 MachineOperand &Op = MI->getOperand(i); 1434 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1435 continue; 1436 assert(Op.isUse() && "Only handle inline asm uses right now"); 1437 1438 unsigned FPReg = getFPReg(Op); 1439 Op.setReg(getSTReg(FPReg)); 1440 1441 // If we kill this operand, make sure to pop it from the stack after the 1442 // asm. We just remember it for now, and pop them all off at the end in 1443 // a batch. 1444 if (Op.isKill()) 1445 Kills |= 1U << FPReg; 1446 } 1447 1448 // If this asm kills any FP registers (is the last use of them) we must 1449 // explicitly emit pop instructions for them. Do this now after the asm has 1450 // executed so that the ST(x) numbers are not off (which would happen if we 1451 // did this inline with operand rewriting). 1452 // 1453 // Note: this might be a non-optimal pop sequence. We might be able to do 1454 // better by trying to pop in stack order or something. 1455 MachineBasicBlock::iterator InsertPt = MI; 1456 while (Kills) { 1457 unsigned FPReg = CountTrailingZeros_32(Kills); 1458 freeStackSlotAfter(InsertPt, FPReg); 1459 Kills &= ~(1U << FPReg); 1460 } 1461 // Don't delete the inline asm! 1462 return; 1463 } 1464 1465 case X86::RET: 1466 case X86::RETI: 1467 // If RET has an FP register use operand, pass the first one in ST(0) and 1468 // the second one in ST(1). 1469 1470 // Find the register operands. 1471 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; 1472 unsigned LiveMask = 0; 1473 1474 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1475 MachineOperand &Op = MI->getOperand(i); 1476 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1477 continue; 1478 // FP Register uses must be kills unless there are two uses of the same 1479 // register, in which case only one will be a kill. 1480 assert(Op.isUse() && 1481 (Op.isKill() || // Marked kill. 1482 getFPReg(Op) == FirstFPRegOp || // Second instance. 1483 MI->killsRegister(Op.getReg())) && // Later use is marked kill. 1484 "Ret only defs operands, and values aren't live beyond it"); 1485 1486 if (FirstFPRegOp == ~0U) 1487 FirstFPRegOp = getFPReg(Op); 1488 else { 1489 assert(SecondFPRegOp == ~0U && "More than two fp operands!"); 1490 SecondFPRegOp = getFPReg(Op); 1491 } 1492 LiveMask |= (1 << getFPReg(Op)); 1493 1494 // Remove the operand so that later passes don't see it. 1495 MI->RemoveOperand(i); 1496 --i, --e; 1497 } 1498 1499 // We may have been carrying spurious live-ins, so make sure only the returned 1500 // registers are left live. 1501 adjustLiveRegs(LiveMask, MI); 1502 if (!LiveMask) return; // Quick check to see if any are possible. 1503 1504 // There are only four possibilities here: 1505 // 1) we are returning a single FP value. In this case, it has to be in 1506 // ST(0) already, so just declare success by removing the value from the 1507 // FP Stack. 1508 if (SecondFPRegOp == ~0U) { 1509 // Assert that the top of stack contains the right FP register. 1510 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && 1511 "Top of stack not the right register for RET!"); 1512 1513 // Ok, everything is good, mark the value as not being on the stack 1514 // anymore so that our assertion about the stack being empty at end of 1515 // block doesn't fire. 1516 StackTop = 0; 1517 return; 1518 } 1519 1520 // Otherwise, we are returning two values: 1521 // 2) If returning the same value for both, we only have one thing in the FP 1522 // stack. Consider: RET FP1, FP1 1523 if (StackTop == 1) { 1524 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& 1525 "Stack misconfiguration for RET!"); 1526 1527 // Duplicate the TOS so that we return it twice. Just pick some other FPx 1528 // register to hold it. 1529 unsigned NewReg = getScratchReg(); 1530 duplicateToTop(FirstFPRegOp, NewReg, MI); 1531 FirstFPRegOp = NewReg; 1532 } 1533 1534 /// Okay we know we have two different FPx operands now: 1535 assert(StackTop == 2 && "Must have two values live!"); 1536 1537 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently 1538 /// in ST(1). In this case, emit an fxch. 1539 if (getStackEntry(0) == SecondFPRegOp) { 1540 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); 1541 moveToTop(FirstFPRegOp, MI); 1542 } 1543 1544 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in 1545 /// ST(1). Just remove both from our understanding of the stack and return. 1546 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); 1547 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); 1548 StackTop = 0; 1549 return; 1550 } 1551 1552 I = MBB->erase(I); // Remove the pseudo instruction 1553 1554 // We want to leave I pointing to the previous instruction, but what if we 1555 // just erased the first instruction? 1556 if (I == MBB->begin()) { 1557 DEBUG(dbgs() << "Inserting dummy KILL\n"); 1558 I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)); 1559 } else 1560 --I; 1561} 1562 1563// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. 1564bool FPS::translateCopy(MachineInstr *MI) { 1565 unsigned DstReg = MI->getOperand(0).getReg(); 1566 unsigned SrcReg = MI->getOperand(1).getReg(); 1567 1568 if (DstReg == X86::ST0) { 1569 MI->setDesc(TII->get(X86::FpSET_ST0_80)); 1570 MI->RemoveOperand(0); 1571 return true; 1572 } 1573 if (DstReg == X86::ST1) { 1574 MI->setDesc(TII->get(X86::FpSET_ST1_80)); 1575 MI->RemoveOperand(0); 1576 return true; 1577 } 1578 if (SrcReg == X86::ST0) { 1579 MI->setDesc(TII->get(X86::FpGET_ST0_80)); 1580 return true; 1581 } 1582 if (SrcReg == X86::ST1) { 1583 MI->setDesc(TII->get(X86::FpGET_ST1_80)); 1584 return true; 1585 } 1586 if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { 1587 MI->setDesc(TII->get(X86::MOV_Fp8080)); 1588 return true; 1589 } 1590 return false; 1591} 1592