X86FloatingPoint.cpp revision e928ec9480072ed1298fba2fbd8faa0e89253bf1
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the pass which converts floating point instructions from 11// pseudo registers into register stack instructions. This pass uses live 12// variable information to indicate where the FPn registers are used and their 13// lifetimes. 14// 15// The x87 hardware tracks liveness of the stack registers, so it is necessary 16// to implement exact liveness tracking between basic blocks. The CFG edges are 17// partitioned into bundles where the same FP registers must be live in 18// identical stack positions. Instructions are inserted at the end of each basic 19// block to rearrange the live registers to match the outgoing bundle. 20// 21// This approach avoids splitting critical edges at the potential cost of more 22// live register shuffling instructions when critical edges are present. 23// 24//===----------------------------------------------------------------------===// 25 26#define DEBUG_TYPE "x86-codegen" 27#include "X86.h" 28#include "X86InstrInfo.h" 29#include "llvm/ADT/DepthFirstIterator.h" 30#include "llvm/ADT/DenseMap.h" 31#include "llvm/ADT/SmallPtrSet.h" 32#include "llvm/ADT/SmallVector.h" 33#include "llvm/ADT/Statistic.h" 34#include "llvm/ADT/STLExtras.h" 35#include "llvm/CodeGen/MachineFunctionPass.h" 36#include "llvm/CodeGen/MachineInstrBuilder.h" 37#include "llvm/CodeGen/MachineRegisterInfo.h" 38#include "llvm/CodeGen/Passes.h" 39#include "llvm/Support/Debug.h" 40#include "llvm/Support/ErrorHandling.h" 41#include "llvm/Support/raw_ostream.h" 42#include "llvm/Target/TargetInstrInfo.h" 43#include "llvm/Target/TargetMachine.h" 44#include <algorithm> 45using namespace llvm; 46 47STATISTIC(NumFXCH, "Number of fxch instructions inserted"); 48STATISTIC(NumFP , "Number of floating point instructions"); 49 50namespace { 51 struct FPS : public MachineFunctionPass { 52 static char ID; 53 FPS() : MachineFunctionPass(&ID) {} 54 55 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 56 AU.setPreservesCFG(); 57 AU.addPreservedID(MachineLoopInfoID); 58 AU.addPreservedID(MachineDominatorsID); 59 MachineFunctionPass::getAnalysisUsage(AU); 60 } 61 62 virtual bool runOnMachineFunction(MachineFunction &MF); 63 64 virtual const char *getPassName() const { return "X86 FP Stackifier"; } 65 66 private: 67 const TargetInstrInfo *TII; // Machine instruction info. 68 69 // Two CFG edges are related if they leave the same block, or enter the same 70 // block. The transitive closure of an edge under this relation is a 71 // LiveBundle. It represents a set of CFG edges where the live FP stack 72 // registers must be allocated identically in the x87 stack. 73 // 74 // A LiveBundle is usually all the edges leaving a block, or all the edges 75 // entering a block, but it can contain more edges if critical edges are 76 // present. 77 // 78 // The set of live FP registers in a LiveBundle is calculated by bundleCFG, 79 // but the exact mapping of FP registers to stack slots is fixed later. 80 struct LiveBundle { 81 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. 82 unsigned Mask; 83 84 // Number of pre-assigned live registers in FixStack. This is 0 when the 85 // stack order has not yet been fixed. 86 unsigned FixCount; 87 88 // Assigned stack order for live-in registers. 89 // FixStack[i] == getStackEntry(i) for all i < FixCount. 90 unsigned char FixStack[8]; 91 92 LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {} 93 94 // Have the live registers been assigned a stack order yet? 95 bool isFixed() const { return !Mask || FixCount; } 96 }; 97 98 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges 99 // with no live FP registers. 100 SmallVector<LiveBundle, 8> LiveBundles; 101 102 // Map each MBB in the current function to an (ingoing, outgoing) index into 103 // LiveBundles. Blocks with no FP registers live in or out map to (0, 0) 104 // and are not actually stored in the map. 105 DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle; 106 107 // Return a bitmask of FP registers in block's live-in list. 108 unsigned calcLiveInMask(MachineBasicBlock *MBB) { 109 unsigned Mask = 0; 110 for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), 111 E = MBB->livein_end(); I != E; ++I) { 112 unsigned Reg = *I - X86::FP0; 113 if (Reg < 8) 114 Mask |= 1 << Reg; 115 } 116 return Mask; 117 } 118 119 // Partition all the CFG edges into LiveBundles. 120 void bundleCFG(MachineFunction &MF); 121 122 MachineBasicBlock *MBB; // Current basic block 123 unsigned Stack[8]; // FP<n> Registers in each stack slot... 124 unsigned RegMap[8]; // Track which stack slot contains each register 125 unsigned StackTop; // The current top of the FP stack. 126 127 // Set up our stack model to match the incoming registers to MBB. 128 void setupBlockStack(); 129 130 // Shuffle live registers to match the expectations of successor blocks. 131 void finishBlockStack(); 132 133 void dumpStack() const { 134 dbgs() << "Stack contents:"; 135 for (unsigned i = 0; i != StackTop; ++i) { 136 dbgs() << " FP" << Stack[i]; 137 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); 138 } 139 dbgs() << "\n"; 140 } 141 142 /// isStackEmpty - Return true if the FP stack is empty. 143 bool isStackEmpty() const { 144 return StackTop == 0; 145 } 146 147 // getSlot - Return the stack slot number a particular register number is 148 // in. 149 unsigned getSlot(unsigned RegNo) const { 150 assert(RegNo < 8 && "Regno out of range!"); 151 return RegMap[RegNo]; 152 } 153 154 // isLive - Is RegNo currently live in the stack? 155 bool isLive(unsigned RegNo) const { 156 unsigned Slot = getSlot(RegNo); 157 return Slot < StackTop && Stack[Slot] == RegNo; 158 } 159 160 // getStackEntry - Return the X86::FP<n> register in register ST(i). 161 unsigned getStackEntry(unsigned STi) const { 162 assert(STi < StackTop && "Access past stack top!"); 163 return Stack[StackTop-1-STi]; 164 } 165 166 // getSTReg - Return the X86::ST(i) register which contains the specified 167 // FP<RegNo> register. 168 unsigned getSTReg(unsigned RegNo) const { 169 return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; 170 } 171 172 // pushReg - Push the specified FP<n> register onto the stack. 173 void pushReg(unsigned Reg) { 174 assert(Reg < 8 && "Register number out of range!"); 175 assert(StackTop < 8 && "Stack overflow!"); 176 Stack[StackTop] = Reg; 177 RegMap[Reg] = StackTop++; 178 } 179 180 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } 181 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { 182 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 183 if (isAtTop(RegNo)) return; 184 185 unsigned STReg = getSTReg(RegNo); 186 unsigned RegOnTop = getStackEntry(0); 187 188 // Swap the slots the regs are in. 189 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 190 191 // Swap stack slot contents. 192 assert(RegMap[RegOnTop] < StackTop); 193 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 194 195 // Emit an fxch to update the runtime processors version of the state. 196 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); 197 ++NumFXCH; 198 } 199 200 void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { 201 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 202 unsigned STReg = getSTReg(RegNo); 203 pushReg(AsReg); // New register on top of stack 204 205 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); 206 } 207 208 // popStackAfter - Pop the current value off of the top of the FP stack 209 // after the specified instruction. 210 void popStackAfter(MachineBasicBlock::iterator &I); 211 212 // freeStackSlotAfter - Free the specified register from the register stack, 213 // so that it is no longer in a register. If the register is currently at 214 // the top of the stack, we just pop the current instruction, otherwise we 215 // store the current top-of-stack into the specified slot, then pop the top 216 // of stack. 217 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); 218 219 // freeStackSlotBefore - Just the pop, no folding. Return the inserted 220 // instruction. 221 MachineBasicBlock::iterator 222 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); 223 224 // Adjust the live registers to be the set in Mask. 225 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); 226 227 // Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is 228 //st(0), FP reg FixStack[1] is st(1) etc. 229 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, 230 MachineBasicBlock::iterator I); 231 232 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 233 234 void handleZeroArgFP(MachineBasicBlock::iterator &I); 235 void handleOneArgFP(MachineBasicBlock::iterator &I); 236 void handleOneArgFPRW(MachineBasicBlock::iterator &I); 237 void handleTwoArgFP(MachineBasicBlock::iterator &I); 238 void handleCompareFP(MachineBasicBlock::iterator &I); 239 void handleCondMovFP(MachineBasicBlock::iterator &I); 240 void handleSpecialFP(MachineBasicBlock::iterator &I); 241 242 bool translateCopy(MachineInstr*); 243 }; 244 char FPS::ID = 0; 245} 246 247FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } 248 249/// getFPReg - Return the X86::FPx register number for the specified operand. 250/// For example, this returns 3 for X86::FP3. 251static unsigned getFPReg(const MachineOperand &MO) { 252 assert(MO.isReg() && "Expected an FP register!"); 253 unsigned Reg = MO.getReg(); 254 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); 255 return Reg - X86::FP0; 256} 257 258/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP 259/// register references into FP stack references. 260/// 261bool FPS::runOnMachineFunction(MachineFunction &MF) { 262 // We only need to run this pass if there are any FP registers used in this 263 // function. If it is all integer, there is nothing for us to do! 264 bool FPIsUsed = false; 265 266 assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); 267 for (unsigned i = 0; i <= 6; ++i) 268 if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { 269 FPIsUsed = true; 270 break; 271 } 272 273 // Early exit. 274 if (!FPIsUsed) return false; 275 276 TII = MF.getTarget().getInstrInfo(); 277 278 // Prepare cross-MBB liveness. 279 bundleCFG(MF); 280 281 StackTop = 0; 282 283 // Process the function in depth first order so that we process at least one 284 // of the predecessors for every reachable block in the function. 285 SmallPtrSet<MachineBasicBlock*, 8> Processed; 286 MachineBasicBlock *Entry = MF.begin(); 287 288 bool Changed = false; 289 for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> > 290 I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed); 291 I != E; ++I) 292 Changed |= processBasicBlock(MF, **I); 293 294 // Process any unreachable blocks in arbitrary order now. 295 if (MF.size() != Processed.size()) 296 for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) 297 if (Processed.insert(BB)) 298 Changed |= processBasicBlock(MF, *BB); 299 300 BlockBundle.clear(); 301 LiveBundles.clear(); 302 303 return Changed; 304} 305 306/// bundleCFG - Scan all the basic blocks to determine consistent live-in and 307/// live-out sets for the FP registers. Consistent means that the set of 308/// registers live-out from a block is identical to the live-in set of all 309/// successors. This is not enforced by the normal live-in lists since 310/// registers may be implicitly defined, or not used by all successors. 311void FPS::bundleCFG(MachineFunction &MF) { 312 assert(LiveBundles.empty() && "Stale data in LiveBundles"); 313 assert(BlockBundle.empty() && "Stale data in BlockBundle"); 314 SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp; 315 316 // LiveBundle[0] is the empty live-in set. 317 LiveBundles.resize(1); 318 319 // First gather the actual live-in masks for all MBBs. 320 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 321 MachineBasicBlock *MBB = I; 322 const unsigned Mask = calcLiveInMask(MBB); 323 if (!Mask) 324 continue; 325 // Ingoing bundle index. 326 unsigned &Idx = BlockBundle[MBB].first; 327 // Already assigned an ingoing bundle? 328 if (Idx) 329 continue; 330 // Allocate a new LiveBundle struct for this block's live-ins. 331 const unsigned BundleIdx = Idx = LiveBundles.size(); 332 DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#" 333 << MBB->getNumber()); 334 LiveBundles.push_back(Mask); 335 LiveBundle &Bundle = LiveBundles.back(); 336 337 // Make sure all predecessors have the same live-out set. 338 PropUp.insert(MBB); 339 340 // Keep pushing liveness up and down the CFG until convergence. 341 // Only critical edges cause iteration here, but when they do, multiple 342 // blocks can be assigned to the same LiveBundle index. 343 do { 344 // Assign BundleIdx as liveout from predecessors in PropUp. 345 for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(), 346 E = PropUp.end(); I != E; ++I) { 347 MachineBasicBlock *MBB = *I; 348 for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(), 349 LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) { 350 MachineBasicBlock *PredMBB = *LinkI; 351 // PredMBB's liveout bundle should be set to LIIdx. 352 unsigned &Idx = BlockBundle[PredMBB].second; 353 if (Idx) { 354 assert(Idx == BundleIdx && "Inconsistent CFG"); 355 continue; 356 } 357 Idx = BundleIdx; 358 DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber()); 359 // Propagate to siblings. 360 if (PredMBB->succ_size() > 1) 361 PropDown.insert(PredMBB); 362 } 363 } 364 PropUp.clear(); 365 366 // Assign BundleIdx as livein to successors in PropDown. 367 for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(), 368 E = PropDown.end(); I != E; ++I) { 369 MachineBasicBlock *MBB = *I; 370 for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(), 371 LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) { 372 MachineBasicBlock *SuccMBB = *LinkI; 373 // LinkMBB's livein bundle should be set to BundleIdx. 374 unsigned &Idx = BlockBundle[SuccMBB].first; 375 if (Idx) { 376 assert(Idx == BundleIdx && "Inconsistent CFG"); 377 continue; 378 } 379 Idx = BundleIdx; 380 DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber()); 381 // Propagate to siblings. 382 if (SuccMBB->pred_size() > 1) 383 PropUp.insert(SuccMBB); 384 // Also accumulate the bundle liveness mask from the liveins here. 385 Bundle.Mask |= calcLiveInMask(SuccMBB); 386 } 387 } 388 PropDown.clear(); 389 } while (!PropUp.empty()); 390 DEBUG({ 391 dbgs() << " live:"; 392 for (unsigned i = 0; i < 8; ++i) 393 if (Bundle.Mask & (1<<i)) 394 dbgs() << " %FP" << i; 395 dbgs() << '\n'; 396 }); 397 } 398} 399 400/// processBasicBlock - Loop over all of the instructions in the basic block, 401/// transforming FP instructions into their stack form. 402/// 403bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { 404 bool Changed = false; 405 MBB = &BB; 406 407 setupBlockStack(); 408 409 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 410 MachineInstr *MI = I; 411 uint64_t Flags = MI->getDesc().TSFlags; 412 413 unsigned FPInstClass = Flags & X86II::FPTypeMask; 414 if (MI->isInlineAsm()) 415 FPInstClass = X86II::SpecialFP; 416 417 if (MI->isCopy() && translateCopy(MI)) 418 FPInstClass = X86II::SpecialFP; 419 420 if (FPInstClass == X86II::NotFP) 421 continue; // Efficiently ignore non-fp insts! 422 423 MachineInstr *PrevMI = 0; 424 if (I != BB.begin()) 425 PrevMI = prior(I); 426 427 ++NumFP; // Keep track of # of pseudo instrs 428 DEBUG(dbgs() << "\nFPInst:\t" << *MI); 429 430 // Get dead variables list now because the MI pointer may be deleted as part 431 // of processing! 432 SmallVector<unsigned, 8> DeadRegs; 433 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 434 const MachineOperand &MO = MI->getOperand(i); 435 if (MO.isReg() && MO.isDead()) 436 DeadRegs.push_back(MO.getReg()); 437 } 438 439 switch (FPInstClass) { 440 case X86II::ZeroArgFP: handleZeroArgFP(I); break; 441 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) 442 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) 443 case X86II::TwoArgFP: handleTwoArgFP(I); break; 444 case X86II::CompareFP: handleCompareFP(I); break; 445 case X86II::CondMovFP: handleCondMovFP(I); break; 446 case X86II::SpecialFP: handleSpecialFP(I); break; 447 default: llvm_unreachable("Unknown FP Type!"); 448 } 449 450 // Check to see if any of the values defined by this instruction are dead 451 // after definition. If so, pop them. 452 for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { 453 unsigned Reg = DeadRegs[i]; 454 if (Reg >= X86::FP0 && Reg <= X86::FP6) { 455 DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); 456 freeStackSlotAfter(I, Reg-X86::FP0); 457 } 458 } 459 460 // Print out all of the instructions expanded to if -debug 461 DEBUG( 462 MachineBasicBlock::iterator PrevI(PrevMI); 463 if (I == PrevI) { 464 dbgs() << "Just deleted pseudo instruction\n"; 465 } else { 466 MachineBasicBlock::iterator Start = I; 467 // Rewind to first instruction newly inserted. 468 while (Start != BB.begin() && prior(Start) != PrevI) --Start; 469 dbgs() << "Inserted instructions:\n\t"; 470 Start->print(dbgs(), &MF.getTarget()); 471 while (++Start != llvm::next(I)) {} 472 } 473 dumpStack(); 474 ); 475 476 Changed = true; 477 } 478 479 finishBlockStack(); 480 481 return Changed; 482} 483 484/// setupBlockStack - Use the BlockBundle map to set up our model of the stack 485/// to match predecessors' live out stack. 486void FPS::setupBlockStack() { 487 DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() 488 << " derived from " << MBB->getName() << ".\n"); 489 StackTop = 0; 490 const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first]; 491 492 if (!Bundle.Mask) { 493 DEBUG(dbgs() << "Block has no FP live-ins.\n"); 494 return; 495 } 496 497 // Depth-first iteration should ensure that we always have an assigned stack. 498 assert(Bundle.isFixed() && "Reached block before any predecessors"); 499 500 // Push the fixed live-in registers. 501 for (unsigned i = Bundle.FixCount; i > 0; --i) { 502 MBB->addLiveIn(X86::ST0+i-1); 503 DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" 504 << unsigned(Bundle.FixStack[i-1]) << '\n'); 505 pushReg(Bundle.FixStack[i-1]); 506 } 507 508 // Kill off unwanted live-ins. This can happen with a critical edge. 509 // FIXME: We could keep these live registers around as zombies. They may need 510 // to be revived at the end of a short block. It might save a few instrs. 511 adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); 512 DEBUG(MBB->dump()); 513} 514 515/// finishBlockStack - Revive live-outs that are implicitly defined out of 516/// MBB. Shuffle live registers to match the expected fixed stack of any 517/// predecessors, and ensure that all predecessors are expecting the same 518/// stack. 519void FPS::finishBlockStack() { 520 // The RET handling below takes care of return blocks for us. 521 if (MBB->succ_empty()) 522 return; 523 524 DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() 525 << " derived from " << MBB->getName() << ".\n"); 526 527 unsigned BundleIdx = BlockBundle.lookup(MBB).second; 528 LiveBundle &Bundle = LiveBundles[BundleIdx]; 529 530 // We may need to kill and define some registers to match successors. 531 // FIXME: This can probably be combined with the shuffle below. 532 MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); 533 adjustLiveRegs(Bundle.Mask, Term); 534 535 if (!Bundle.Mask) { 536 DEBUG(dbgs() << "No live-outs.\n"); 537 return; 538 } 539 540 // Has the stack order been fixed yet? 541 DEBUG(dbgs() << "LB#" << BundleIdx << ": "); 542 if (Bundle.isFixed()) { 543 DEBUG(dbgs() << "Shuffling stack to match.\n"); 544 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); 545 } else { 546 // Not fixed yet, we get to choose. 547 DEBUG(dbgs() << "Fixing stack order now.\n"); 548 Bundle.FixCount = StackTop; 549 for (unsigned i = 0; i < StackTop; ++i) 550 Bundle.FixStack[i] = getStackEntry(i); 551 } 552} 553 554 555//===----------------------------------------------------------------------===// 556// Efficient Lookup Table Support 557//===----------------------------------------------------------------------===// 558 559namespace { 560 struct TableEntry { 561 unsigned from; 562 unsigned to; 563 bool operator<(const TableEntry &TE) const { return from < TE.from; } 564 friend bool operator<(const TableEntry &TE, unsigned V) { 565 return TE.from < V; 566 } 567 friend bool operator<(unsigned V, const TableEntry &TE) { 568 return V < TE.from; 569 } 570 }; 571} 572 573#ifndef NDEBUG 574static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { 575 for (unsigned i = 0; i != NumEntries-1; ++i) 576 if (!(Table[i] < Table[i+1])) return false; 577 return true; 578} 579#endif 580 581static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { 582 const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); 583 if (I != Table+N && I->from == Opcode) 584 return I->to; 585 return -1; 586} 587 588#ifdef NDEBUG 589#define ASSERT_SORTED(TABLE) 590#else 591#define ASSERT_SORTED(TABLE) \ 592 { static bool TABLE##Checked = false; \ 593 if (!TABLE##Checked) { \ 594 assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ 595 "All lookup tables must be sorted for efficient access!"); \ 596 TABLE##Checked = true; \ 597 } \ 598 } 599#endif 600 601//===----------------------------------------------------------------------===// 602// Register File -> Register Stack Mapping Methods 603//===----------------------------------------------------------------------===// 604 605// OpcodeTable - Sorted map of register instructions to their stack version. 606// The first element is an register file pseudo instruction, the second is the 607// concrete X86 instruction which uses the register stack. 608// 609static const TableEntry OpcodeTable[] = { 610 { X86::ABS_Fp32 , X86::ABS_F }, 611 { X86::ABS_Fp64 , X86::ABS_F }, 612 { X86::ABS_Fp80 , X86::ABS_F }, 613 { X86::ADD_Fp32m , X86::ADD_F32m }, 614 { X86::ADD_Fp64m , X86::ADD_F64m }, 615 { X86::ADD_Fp64m32 , X86::ADD_F32m }, 616 { X86::ADD_Fp80m32 , X86::ADD_F32m }, 617 { X86::ADD_Fp80m64 , X86::ADD_F64m }, 618 { X86::ADD_FpI16m32 , X86::ADD_FI16m }, 619 { X86::ADD_FpI16m64 , X86::ADD_FI16m }, 620 { X86::ADD_FpI16m80 , X86::ADD_FI16m }, 621 { X86::ADD_FpI32m32 , X86::ADD_FI32m }, 622 { X86::ADD_FpI32m64 , X86::ADD_FI32m }, 623 { X86::ADD_FpI32m80 , X86::ADD_FI32m }, 624 { X86::CHS_Fp32 , X86::CHS_F }, 625 { X86::CHS_Fp64 , X86::CHS_F }, 626 { X86::CHS_Fp80 , X86::CHS_F }, 627 { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, 628 { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, 629 { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, 630 { X86::CMOVB_Fp32 , X86::CMOVB_F }, 631 { X86::CMOVB_Fp64 , X86::CMOVB_F }, 632 { X86::CMOVB_Fp80 , X86::CMOVB_F }, 633 { X86::CMOVE_Fp32 , X86::CMOVE_F }, 634 { X86::CMOVE_Fp64 , X86::CMOVE_F }, 635 { X86::CMOVE_Fp80 , X86::CMOVE_F }, 636 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, 637 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, 638 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, 639 { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, 640 { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, 641 { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, 642 { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, 643 { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, 644 { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, 645 { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, 646 { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, 647 { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, 648 { X86::CMOVP_Fp32 , X86::CMOVP_F }, 649 { X86::CMOVP_Fp64 , X86::CMOVP_F }, 650 { X86::CMOVP_Fp80 , X86::CMOVP_F }, 651 { X86::COS_Fp32 , X86::COS_F }, 652 { X86::COS_Fp64 , X86::COS_F }, 653 { X86::COS_Fp80 , X86::COS_F }, 654 { X86::DIVR_Fp32m , X86::DIVR_F32m }, 655 { X86::DIVR_Fp64m , X86::DIVR_F64m }, 656 { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, 657 { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, 658 { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, 659 { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, 660 { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, 661 { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, 662 { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, 663 { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, 664 { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, 665 { X86::DIV_Fp32m , X86::DIV_F32m }, 666 { X86::DIV_Fp64m , X86::DIV_F64m }, 667 { X86::DIV_Fp64m32 , X86::DIV_F32m }, 668 { X86::DIV_Fp80m32 , X86::DIV_F32m }, 669 { X86::DIV_Fp80m64 , X86::DIV_F64m }, 670 { X86::DIV_FpI16m32 , X86::DIV_FI16m }, 671 { X86::DIV_FpI16m64 , X86::DIV_FI16m }, 672 { X86::DIV_FpI16m80 , X86::DIV_FI16m }, 673 { X86::DIV_FpI32m32 , X86::DIV_FI32m }, 674 { X86::DIV_FpI32m64 , X86::DIV_FI32m }, 675 { X86::DIV_FpI32m80 , X86::DIV_FI32m }, 676 { X86::ILD_Fp16m32 , X86::ILD_F16m }, 677 { X86::ILD_Fp16m64 , X86::ILD_F16m }, 678 { X86::ILD_Fp16m80 , X86::ILD_F16m }, 679 { X86::ILD_Fp32m32 , X86::ILD_F32m }, 680 { X86::ILD_Fp32m64 , X86::ILD_F32m }, 681 { X86::ILD_Fp32m80 , X86::ILD_F32m }, 682 { X86::ILD_Fp64m32 , X86::ILD_F64m }, 683 { X86::ILD_Fp64m64 , X86::ILD_F64m }, 684 { X86::ILD_Fp64m80 , X86::ILD_F64m }, 685 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, 686 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, 687 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, 688 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, 689 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, 690 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, 691 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, 692 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, 693 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, 694 { X86::IST_Fp16m32 , X86::IST_F16m }, 695 { X86::IST_Fp16m64 , X86::IST_F16m }, 696 { X86::IST_Fp16m80 , X86::IST_F16m }, 697 { X86::IST_Fp32m32 , X86::IST_F32m }, 698 { X86::IST_Fp32m64 , X86::IST_F32m }, 699 { X86::IST_Fp32m80 , X86::IST_F32m }, 700 { X86::IST_Fp64m32 , X86::IST_FP64m }, 701 { X86::IST_Fp64m64 , X86::IST_FP64m }, 702 { X86::IST_Fp64m80 , X86::IST_FP64m }, 703 { X86::LD_Fp032 , X86::LD_F0 }, 704 { X86::LD_Fp064 , X86::LD_F0 }, 705 { X86::LD_Fp080 , X86::LD_F0 }, 706 { X86::LD_Fp132 , X86::LD_F1 }, 707 { X86::LD_Fp164 , X86::LD_F1 }, 708 { X86::LD_Fp180 , X86::LD_F1 }, 709 { X86::LD_Fp32m , X86::LD_F32m }, 710 { X86::LD_Fp32m64 , X86::LD_F32m }, 711 { X86::LD_Fp32m80 , X86::LD_F32m }, 712 { X86::LD_Fp64m , X86::LD_F64m }, 713 { X86::LD_Fp64m80 , X86::LD_F64m }, 714 { X86::LD_Fp80m , X86::LD_F80m }, 715 { X86::MUL_Fp32m , X86::MUL_F32m }, 716 { X86::MUL_Fp64m , X86::MUL_F64m }, 717 { X86::MUL_Fp64m32 , X86::MUL_F32m }, 718 { X86::MUL_Fp80m32 , X86::MUL_F32m }, 719 { X86::MUL_Fp80m64 , X86::MUL_F64m }, 720 { X86::MUL_FpI16m32 , X86::MUL_FI16m }, 721 { X86::MUL_FpI16m64 , X86::MUL_FI16m }, 722 { X86::MUL_FpI16m80 , X86::MUL_FI16m }, 723 { X86::MUL_FpI32m32 , X86::MUL_FI32m }, 724 { X86::MUL_FpI32m64 , X86::MUL_FI32m }, 725 { X86::MUL_FpI32m80 , X86::MUL_FI32m }, 726 { X86::SIN_Fp32 , X86::SIN_F }, 727 { X86::SIN_Fp64 , X86::SIN_F }, 728 { X86::SIN_Fp80 , X86::SIN_F }, 729 { X86::SQRT_Fp32 , X86::SQRT_F }, 730 { X86::SQRT_Fp64 , X86::SQRT_F }, 731 { X86::SQRT_Fp80 , X86::SQRT_F }, 732 { X86::ST_Fp32m , X86::ST_F32m }, 733 { X86::ST_Fp64m , X86::ST_F64m }, 734 { X86::ST_Fp64m32 , X86::ST_F32m }, 735 { X86::ST_Fp80m32 , X86::ST_F32m }, 736 { X86::ST_Fp80m64 , X86::ST_F64m }, 737 { X86::ST_FpP80m , X86::ST_FP80m }, 738 { X86::SUBR_Fp32m , X86::SUBR_F32m }, 739 { X86::SUBR_Fp64m , X86::SUBR_F64m }, 740 { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, 741 { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, 742 { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, 743 { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, 744 { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, 745 { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, 746 { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, 747 { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, 748 { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, 749 { X86::SUB_Fp32m , X86::SUB_F32m }, 750 { X86::SUB_Fp64m , X86::SUB_F64m }, 751 { X86::SUB_Fp64m32 , X86::SUB_F32m }, 752 { X86::SUB_Fp80m32 , X86::SUB_F32m }, 753 { X86::SUB_Fp80m64 , X86::SUB_F64m }, 754 { X86::SUB_FpI16m32 , X86::SUB_FI16m }, 755 { X86::SUB_FpI16m64 , X86::SUB_FI16m }, 756 { X86::SUB_FpI16m80 , X86::SUB_FI16m }, 757 { X86::SUB_FpI32m32 , X86::SUB_FI32m }, 758 { X86::SUB_FpI32m64 , X86::SUB_FI32m }, 759 { X86::SUB_FpI32m80 , X86::SUB_FI32m }, 760 { X86::TST_Fp32 , X86::TST_F }, 761 { X86::TST_Fp64 , X86::TST_F }, 762 { X86::TST_Fp80 , X86::TST_F }, 763 { X86::UCOM_FpIr32 , X86::UCOM_FIr }, 764 { X86::UCOM_FpIr64 , X86::UCOM_FIr }, 765 { X86::UCOM_FpIr80 , X86::UCOM_FIr }, 766 { X86::UCOM_Fpr32 , X86::UCOM_Fr }, 767 { X86::UCOM_Fpr64 , X86::UCOM_Fr }, 768 { X86::UCOM_Fpr80 , X86::UCOM_Fr }, 769}; 770 771static unsigned getConcreteOpcode(unsigned Opcode) { 772 ASSERT_SORTED(OpcodeTable); 773 int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); 774 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); 775 return Opc; 776} 777 778//===----------------------------------------------------------------------===// 779// Helper Methods 780//===----------------------------------------------------------------------===// 781 782// PopTable - Sorted map of instructions to their popping version. The first 783// element is an instruction, the second is the version which pops. 784// 785static const TableEntry PopTable[] = { 786 { X86::ADD_FrST0 , X86::ADD_FPrST0 }, 787 788 { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, 789 { X86::DIV_FrST0 , X86::DIV_FPrST0 }, 790 791 { X86::IST_F16m , X86::IST_FP16m }, 792 { X86::IST_F32m , X86::IST_FP32m }, 793 794 { X86::MUL_FrST0 , X86::MUL_FPrST0 }, 795 796 { X86::ST_F32m , X86::ST_FP32m }, 797 { X86::ST_F64m , X86::ST_FP64m }, 798 { X86::ST_Frr , X86::ST_FPrr }, 799 800 { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, 801 { X86::SUB_FrST0 , X86::SUB_FPrST0 }, 802 803 { X86::UCOM_FIr , X86::UCOM_FIPr }, 804 805 { X86::UCOM_FPr , X86::UCOM_FPPr }, 806 { X86::UCOM_Fr , X86::UCOM_FPr }, 807}; 808 809/// popStackAfter - Pop the current value off of the top of the FP stack after 810/// the specified instruction. This attempts to be sneaky and combine the pop 811/// into the instruction itself if possible. The iterator is left pointing to 812/// the last instruction, be it a new pop instruction inserted, or the old 813/// instruction if it was modified in place. 814/// 815void FPS::popStackAfter(MachineBasicBlock::iterator &I) { 816 MachineInstr* MI = I; 817 DebugLoc dl = MI->getDebugLoc(); 818 ASSERT_SORTED(PopTable); 819 assert(StackTop > 0 && "Cannot pop empty stack!"); 820 RegMap[Stack[--StackTop]] = ~0; // Update state 821 822 // Check to see if there is a popping version of this instruction... 823 int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); 824 if (Opcode != -1) { 825 I->setDesc(TII->get(Opcode)); 826 if (Opcode == X86::UCOM_FPPr) 827 I->RemoveOperand(0); 828 } else { // Insert an explicit pop 829 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); 830 } 831} 832 833/// freeStackSlotAfter - Free the specified register from the register stack, so 834/// that it is no longer in a register. If the register is currently at the top 835/// of the stack, we just pop the current instruction, otherwise we store the 836/// current top-of-stack into the specified slot, then pop the top of stack. 837void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { 838 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. 839 popStackAfter(I); 840 return; 841 } 842 843 // Otherwise, store the top of stack into the dead slot, killing the operand 844 // without having to add in an explicit xchg then pop. 845 // 846 I = freeStackSlotBefore(++I, FPRegNo); 847} 848 849/// freeStackSlotBefore - Free the specified register without trying any 850/// folding. 851MachineBasicBlock::iterator 852FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { 853 unsigned STReg = getSTReg(FPRegNo); 854 unsigned OldSlot = getSlot(FPRegNo); 855 unsigned TopReg = Stack[StackTop-1]; 856 Stack[OldSlot] = TopReg; 857 RegMap[TopReg] = OldSlot; 858 RegMap[FPRegNo] = ~0; 859 Stack[--StackTop] = ~0; 860 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg); 861} 862 863/// adjustLiveRegs - Kill and revive registers such that exactly the FP 864/// registers with a bit in Mask are live. 865void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { 866 unsigned Defs = Mask; 867 unsigned Kills = 0; 868 for (unsigned i = 0; i < StackTop; ++i) { 869 unsigned RegNo = Stack[i]; 870 if (!(Defs & (1 << RegNo))) 871 // This register is live, but we don't want it. 872 Kills |= (1 << RegNo); 873 else 874 // We don't need to imp-def this live register. 875 Defs &= ~(1 << RegNo); 876 } 877 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); 878 879 // Produce implicit-defs for free by using killed registers. 880 while (Kills && Defs) { 881 unsigned KReg = CountTrailingZeros_32(Kills); 882 unsigned DReg = CountTrailingZeros_32(Defs); 883 DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); 884 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); 885 std::swap(RegMap[KReg], RegMap[DReg]); 886 Kills &= ~(1 << KReg); 887 Defs &= ~(1 << DReg); 888 } 889 890 // Kill registers by popping. 891 if (Kills && I != MBB->begin()) { 892 MachineBasicBlock::iterator I2 = llvm::prior(I); 893 for (;;) { 894 unsigned KReg = getStackEntry(0); 895 if (!(Kills & (1 << KReg))) 896 break; 897 DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); 898 popStackAfter(I2); 899 Kills &= ~(1 << KReg); 900 } 901 } 902 903 // Manually kill the rest. 904 while (Kills) { 905 unsigned KReg = CountTrailingZeros_32(Kills); 906 DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); 907 freeStackSlotBefore(I, KReg); 908 Kills &= ~(1 << KReg); 909 } 910 911 // Load zeros for all the imp-defs. 912 while(Defs) { 913 unsigned DReg = CountTrailingZeros_32(Defs); 914 DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); 915 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); 916 pushReg(DReg); 917 Defs &= ~(1 << DReg); 918 } 919 920 // Now we should have the correct registers live. 921 DEBUG(dumpStack()); 922 assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch"); 923} 924 925/// shuffleStackTop - emit fxch instructions before I to shuffle the top 926/// FixCount entries into the order given by FixStack. 927/// FIXME: Is there a better algorithm than insertion sort? 928void FPS::shuffleStackTop(const unsigned char *FixStack, 929 unsigned FixCount, 930 MachineBasicBlock::iterator I) { 931 // Move items into place, starting from the desired stack bottom. 932 while (FixCount--) { 933 // Old register at position FixCount. 934 unsigned OldReg = getStackEntry(FixCount); 935 // Desired register at position FixCount. 936 unsigned Reg = FixStack[FixCount]; 937 if (Reg == OldReg) 938 continue; 939 // (Reg st0) (OldReg st0) = (Reg OldReg st0) 940 moveToTop(Reg, I); 941 moveToTop(OldReg, I); 942 } 943 DEBUG(dumpStack()); 944} 945 946 947//===----------------------------------------------------------------------===// 948// Instruction transformation implementation 949//===----------------------------------------------------------------------===// 950 951/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> 952/// 953void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { 954 MachineInstr *MI = I; 955 unsigned DestReg = getFPReg(MI->getOperand(0)); 956 957 // Change from the pseudo instruction to the concrete instruction. 958 MI->RemoveOperand(0); // Remove the explicit ST(0) operand 959 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 960 961 // Result gets pushed on the stack. 962 pushReg(DestReg); 963} 964 965/// handleOneArgFP - fst <mem>, ST(0) 966/// 967void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { 968 MachineInstr *MI = I; 969 unsigned NumOps = MI->getDesc().getNumOperands(); 970 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && 971 "Can only handle fst* & ftst instructions!"); 972 973 // Is this the last use of the source register? 974 unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); 975 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 976 977 // FISTP64m is strange because there isn't a non-popping versions. 978 // If we have one _and_ we don't want to pop the operand, duplicate the value 979 // on the stack instead of moving it. This ensure that popping the value is 980 // always ok. 981 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. 982 // 983 if (!KillsSrc && 984 (MI->getOpcode() == X86::IST_Fp64m32 || 985 MI->getOpcode() == X86::ISTT_Fp16m32 || 986 MI->getOpcode() == X86::ISTT_Fp32m32 || 987 MI->getOpcode() == X86::ISTT_Fp64m32 || 988 MI->getOpcode() == X86::IST_Fp64m64 || 989 MI->getOpcode() == X86::ISTT_Fp16m64 || 990 MI->getOpcode() == X86::ISTT_Fp32m64 || 991 MI->getOpcode() == X86::ISTT_Fp64m64 || 992 MI->getOpcode() == X86::IST_Fp64m80 || 993 MI->getOpcode() == X86::ISTT_Fp16m80 || 994 MI->getOpcode() == X86::ISTT_Fp32m80 || 995 MI->getOpcode() == X86::ISTT_Fp64m80 || 996 MI->getOpcode() == X86::ST_FpP80m)) { 997 duplicateToTop(Reg, 7 /*temp register*/, I); 998 } else { 999 moveToTop(Reg, I); // Move to the top of the stack... 1000 } 1001 1002 // Convert from the pseudo instruction to the concrete instruction. 1003 MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand 1004 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1005 1006 if (MI->getOpcode() == X86::IST_FP64m || 1007 MI->getOpcode() == X86::ISTT_FP16m || 1008 MI->getOpcode() == X86::ISTT_FP32m || 1009 MI->getOpcode() == X86::ISTT_FP64m || 1010 MI->getOpcode() == X86::ST_FP80m) { 1011 assert(StackTop > 0 && "Stack empty??"); 1012 --StackTop; 1013 } else if (KillsSrc) { // Last use of operand? 1014 popStackAfter(I); 1015 } 1016} 1017 1018 1019/// handleOneArgFPRW: Handle instructions that read from the top of stack and 1020/// replace the value with a newly computed value. These instructions may have 1021/// non-fp operands after their FP operands. 1022/// 1023/// Examples: 1024/// R1 = fchs R2 1025/// R1 = fadd R2, [mem] 1026/// 1027void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { 1028 MachineInstr *MI = I; 1029#ifndef NDEBUG 1030 unsigned NumOps = MI->getDesc().getNumOperands(); 1031 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); 1032#endif 1033 1034 // Is this the last use of the source register? 1035 unsigned Reg = getFPReg(MI->getOperand(1)); 1036 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 1037 1038 if (KillsSrc) { 1039 // If this is the last use of the source register, just make sure it's on 1040 // the top of the stack. 1041 moveToTop(Reg, I); 1042 assert(StackTop > 0 && "Stack cannot be empty!"); 1043 --StackTop; 1044 pushReg(getFPReg(MI->getOperand(0))); 1045 } else { 1046 // If this is not the last use of the source register, _copy_ it to the top 1047 // of the stack. 1048 duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); 1049 } 1050 1051 // Change from the pseudo instruction to the concrete instruction. 1052 MI->RemoveOperand(1); // Drop the source operand. 1053 MI->RemoveOperand(0); // Drop the destination operand. 1054 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1055} 1056 1057 1058//===----------------------------------------------------------------------===// 1059// Define tables of various ways to map pseudo instructions 1060// 1061 1062// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) 1063static const TableEntry ForwardST0Table[] = { 1064 { X86::ADD_Fp32 , X86::ADD_FST0r }, 1065 { X86::ADD_Fp64 , X86::ADD_FST0r }, 1066 { X86::ADD_Fp80 , X86::ADD_FST0r }, 1067 { X86::DIV_Fp32 , X86::DIV_FST0r }, 1068 { X86::DIV_Fp64 , X86::DIV_FST0r }, 1069 { X86::DIV_Fp80 , X86::DIV_FST0r }, 1070 { X86::MUL_Fp32 , X86::MUL_FST0r }, 1071 { X86::MUL_Fp64 , X86::MUL_FST0r }, 1072 { X86::MUL_Fp80 , X86::MUL_FST0r }, 1073 { X86::SUB_Fp32 , X86::SUB_FST0r }, 1074 { X86::SUB_Fp64 , X86::SUB_FST0r }, 1075 { X86::SUB_Fp80 , X86::SUB_FST0r }, 1076}; 1077 1078// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) 1079static const TableEntry ReverseST0Table[] = { 1080 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative 1081 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative 1082 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative 1083 { X86::DIV_Fp32 , X86::DIVR_FST0r }, 1084 { X86::DIV_Fp64 , X86::DIVR_FST0r }, 1085 { X86::DIV_Fp80 , X86::DIVR_FST0r }, 1086 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative 1087 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative 1088 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative 1089 { X86::SUB_Fp32 , X86::SUBR_FST0r }, 1090 { X86::SUB_Fp64 , X86::SUBR_FST0r }, 1091 { X86::SUB_Fp80 , X86::SUBR_FST0r }, 1092}; 1093 1094// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) 1095static const TableEntry ForwardSTiTable[] = { 1096 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative 1097 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative 1098 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative 1099 { X86::DIV_Fp32 , X86::DIVR_FrST0 }, 1100 { X86::DIV_Fp64 , X86::DIVR_FrST0 }, 1101 { X86::DIV_Fp80 , X86::DIVR_FrST0 }, 1102 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative 1103 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative 1104 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative 1105 { X86::SUB_Fp32 , X86::SUBR_FrST0 }, 1106 { X86::SUB_Fp64 , X86::SUBR_FrST0 }, 1107 { X86::SUB_Fp80 , X86::SUBR_FrST0 }, 1108}; 1109 1110// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) 1111static const TableEntry ReverseSTiTable[] = { 1112 { X86::ADD_Fp32 , X86::ADD_FrST0 }, 1113 { X86::ADD_Fp64 , X86::ADD_FrST0 }, 1114 { X86::ADD_Fp80 , X86::ADD_FrST0 }, 1115 { X86::DIV_Fp32 , X86::DIV_FrST0 }, 1116 { X86::DIV_Fp64 , X86::DIV_FrST0 }, 1117 { X86::DIV_Fp80 , X86::DIV_FrST0 }, 1118 { X86::MUL_Fp32 , X86::MUL_FrST0 }, 1119 { X86::MUL_Fp64 , X86::MUL_FrST0 }, 1120 { X86::MUL_Fp80 , X86::MUL_FrST0 }, 1121 { X86::SUB_Fp32 , X86::SUB_FrST0 }, 1122 { X86::SUB_Fp64 , X86::SUB_FrST0 }, 1123 { X86::SUB_Fp80 , X86::SUB_FrST0 }, 1124}; 1125 1126 1127/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual 1128/// instructions which need to be simplified and possibly transformed. 1129/// 1130/// Result: ST(0) = fsub ST(0), ST(i) 1131/// ST(i) = fsub ST(0), ST(i) 1132/// ST(0) = fsubr ST(0), ST(i) 1133/// ST(i) = fsubr ST(0), ST(i) 1134/// 1135void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { 1136 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1137 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1138 MachineInstr *MI = I; 1139 1140 unsigned NumOperands = MI->getDesc().getNumOperands(); 1141 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); 1142 unsigned Dest = getFPReg(MI->getOperand(0)); 1143 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1144 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1145 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1146 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1147 DebugLoc dl = MI->getDebugLoc(); 1148 1149 unsigned TOS = getStackEntry(0); 1150 1151 // One of our operands must be on the top of the stack. If neither is yet, we 1152 // need to move one. 1153 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? 1154 // We can choose to move either operand to the top of the stack. If one of 1155 // the operands is killed by this instruction, we want that one so that we 1156 // can update right on top of the old version. 1157 if (KillsOp0) { 1158 moveToTop(Op0, I); // Move dead operand to TOS. 1159 TOS = Op0; 1160 } else if (KillsOp1) { 1161 moveToTop(Op1, I); 1162 TOS = Op1; 1163 } else { 1164 // All of the operands are live after this instruction executes, so we 1165 // cannot update on top of any operand. Because of this, we must 1166 // duplicate one of the stack elements to the top. It doesn't matter 1167 // which one we pick. 1168 // 1169 duplicateToTop(Op0, Dest, I); 1170 Op0 = TOS = Dest; 1171 KillsOp0 = true; 1172 } 1173 } else if (!KillsOp0 && !KillsOp1) { 1174 // If we DO have one of our operands at the top of the stack, but we don't 1175 // have a dead operand, we must duplicate one of the operands to a new slot 1176 // on the stack. 1177 duplicateToTop(Op0, Dest, I); 1178 Op0 = TOS = Dest; 1179 KillsOp0 = true; 1180 } 1181 1182 // Now we know that one of our operands is on the top of the stack, and at 1183 // least one of our operands is killed by this instruction. 1184 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && 1185 "Stack conditions not set up right!"); 1186 1187 // We decide which form to use based on what is on the top of the stack, and 1188 // which operand is killed by this instruction. 1189 const TableEntry *InstTable; 1190 bool isForward = TOS == Op0; 1191 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); 1192 if (updateST0) { 1193 if (isForward) 1194 InstTable = ForwardST0Table; 1195 else 1196 InstTable = ReverseST0Table; 1197 } else { 1198 if (isForward) 1199 InstTable = ForwardSTiTable; 1200 else 1201 InstTable = ReverseSTiTable; 1202 } 1203 1204 int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), 1205 MI->getOpcode()); 1206 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); 1207 1208 // NotTOS - The register which is not on the top of stack... 1209 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; 1210 1211 // Replace the old instruction with a new instruction 1212 MBB->remove(I++); 1213 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); 1214 1215 // If both operands are killed, pop one off of the stack in addition to 1216 // overwriting the other one. 1217 if (KillsOp0 && KillsOp1 && Op0 != Op1) { 1218 assert(!updateST0 && "Should have updated other operand!"); 1219 popStackAfter(I); // Pop the top of stack 1220 } 1221 1222 // Update stack information so that we know the destination register is now on 1223 // the stack. 1224 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); 1225 assert(UpdatedSlot < StackTop && Dest < 7); 1226 Stack[UpdatedSlot] = Dest; 1227 RegMap[Dest] = UpdatedSlot; 1228 MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction 1229} 1230 1231/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP 1232/// register arguments and no explicit destinations. 1233/// 1234void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { 1235 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1236 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1237 MachineInstr *MI = I; 1238 1239 unsigned NumOperands = MI->getDesc().getNumOperands(); 1240 assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); 1241 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1242 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1243 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1244 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1245 1246 // Make sure the first operand is on the top of stack, the other one can be 1247 // anywhere. 1248 moveToTop(Op0, I); 1249 1250 // Change from the pseudo instruction to the concrete instruction. 1251 MI->getOperand(0).setReg(getSTReg(Op1)); 1252 MI->RemoveOperand(1); 1253 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1254 1255 // If any of the operands are killed by this instruction, free them. 1256 if (KillsOp0) freeStackSlotAfter(I, Op0); 1257 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); 1258} 1259 1260/// handleCondMovFP - Handle two address conditional move instructions. These 1261/// instructions move a st(i) register to st(0) iff a condition is true. These 1262/// instructions require that the first operand is at the top of the stack, but 1263/// otherwise don't modify the stack at all. 1264void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { 1265 MachineInstr *MI = I; 1266 1267 unsigned Op0 = getFPReg(MI->getOperand(0)); 1268 unsigned Op1 = getFPReg(MI->getOperand(2)); 1269 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1270 1271 // The first operand *must* be on the top of the stack. 1272 moveToTop(Op0, I); 1273 1274 // Change the second operand to the stack register that the operand is in. 1275 // Change from the pseudo instruction to the concrete instruction. 1276 MI->RemoveOperand(0); 1277 MI->RemoveOperand(1); 1278 MI->getOperand(0).setReg(getSTReg(Op1)); 1279 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1280 1281 // If we kill the second operand, make sure to pop it from the stack. 1282 if (Op0 != Op1 && KillsOp1) { 1283 // Get this value off of the register stack. 1284 freeStackSlotAfter(I, Op1); 1285 } 1286} 1287 1288 1289/// handleSpecialFP - Handle special instructions which behave unlike other 1290/// floating point instructions. This is primarily intended for use by pseudo 1291/// instructions. 1292/// 1293void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { 1294 MachineInstr *MI = I; 1295 DebugLoc dl = MI->getDebugLoc(); 1296 switch (MI->getOpcode()) { 1297 default: llvm_unreachable("Unknown SpecialFP instruction!"); 1298 case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! 1299 case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! 1300 case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! 1301 assert(StackTop == 0 && "Stack should be empty after a call!"); 1302 pushReg(getFPReg(MI->getOperand(0))); 1303 break; 1304 case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! 1305 case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! 1306 case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! 1307 // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. 1308 // The pattern we expect is: 1309 // CALL 1310 // FP1 = FpGET_ST0 1311 // FP4 = FpGET_ST1 1312 // 1313 // At this point, we've pushed FP1 on the top of stack, so it should be 1314 // present if it isn't dead. If it was dead, we already emitted a pop to 1315 // remove it from the stack and StackTop = 0. 1316 1317 // Push FP4 as top of stack next. 1318 pushReg(getFPReg(MI->getOperand(0))); 1319 1320 // If StackTop was 0 before we pushed our operand, then ST(0) must have been 1321 // dead. In this case, the ST(1) value is the only thing that is live, so 1322 // it should be on the TOS (after the pop that was emitted) and is. Just 1323 // continue in this case. 1324 if (StackTop == 1) 1325 break; 1326 1327 // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top 1328 // elements so that our accounting is correct. 1329 unsigned RegOnTop = getStackEntry(0); 1330 unsigned RegNo = getStackEntry(1); 1331 1332 // Swap the slots the regs are in. 1333 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 1334 1335 // Swap stack slot contents. 1336 assert(RegMap[RegOnTop] < StackTop); 1337 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 1338 break; 1339 } 1340 case X86::FpSET_ST0_32: 1341 case X86::FpSET_ST0_64: 1342 case X86::FpSET_ST0_80: { 1343 // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm 1344 // arguments that use an st constraint. We expect a sequence of 1345 // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM 1346 unsigned Op0 = getFPReg(MI->getOperand(0)); 1347 1348 if (!MI->killsRegister(X86::FP0 + Op0)) { 1349 // Duplicate Op0 into a temporary on the stack top. 1350 // This actually assumes that FP7 is dead. 1351 duplicateToTop(Op0, 7, I); 1352 } else { 1353 // Op0 is killed, so just swap it into position. 1354 moveToTop(Op0, I); 1355 } 1356 --StackTop; // "Forget" we have something on the top of stack! 1357 break; 1358 } 1359 case X86::FpSET_ST1_32: 1360 case X86::FpSET_ST1_64: 1361 case X86::FpSET_ST1_80: { 1362 // Set up st(1) for inline asm. We are assuming that st(0) has already been 1363 // set up by FpSET_ST0, and our StackTop is off by one because of it. 1364 unsigned Op0 = getFPReg(MI->getOperand(0)); 1365 // Restore the actual StackTop from before Fp_SET_ST0. 1366 // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we 1367 // are not enforcing the constraint. 1368 ++StackTop; 1369 unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). 1370 if (!MI->killsRegister(X86::FP0 + Op0)) { 1371 // Assume FP6 is not live, use it as a scratch register. 1372 duplicateToTop(Op0, 6, I); 1373 moveToTop(RegOnTop, I); 1374 } else if (getSTReg(Op0) != X86::ST1) { 1375 // We have the wrong value at st(1). Shuffle! Untested! 1376 moveToTop(getStackEntry(1), I); 1377 moveToTop(Op0, I); 1378 moveToTop(RegOnTop, I); 1379 } 1380 assert(StackTop >= 2 && "Too few live registers"); 1381 StackTop -= 2; // "Forget" both st(0) and st(1). 1382 break; 1383 } 1384 case X86::MOV_Fp3232: 1385 case X86::MOV_Fp3264: 1386 case X86::MOV_Fp6432: 1387 case X86::MOV_Fp6464: 1388 case X86::MOV_Fp3280: 1389 case X86::MOV_Fp6480: 1390 case X86::MOV_Fp8032: 1391 case X86::MOV_Fp8064: 1392 case X86::MOV_Fp8080: { 1393 const MachineOperand &MO1 = MI->getOperand(1); 1394 unsigned SrcReg = getFPReg(MO1); 1395 1396 const MachineOperand &MO0 = MI->getOperand(0); 1397 unsigned DestReg = getFPReg(MO0); 1398 if (MI->killsRegister(X86::FP0+SrcReg)) { 1399 // If the input operand is killed, we can just change the owner of the 1400 // incoming stack slot into the result. 1401 unsigned Slot = getSlot(SrcReg); 1402 assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); 1403 Stack[Slot] = DestReg; 1404 RegMap[DestReg] = Slot; 1405 1406 } else { 1407 // For FMOV we just duplicate the specified value to a new stack slot. 1408 // This could be made better, but would require substantial changes. 1409 duplicateToTop(SrcReg, DestReg, I); 1410 } 1411 } 1412 break; 1413 case TargetOpcode::INLINEASM: { 1414 // The inline asm MachineInstr currently only *uses* FP registers for the 1415 // 'f' constraint. These should be turned into the current ST(x) register 1416 // in the machine instr. Also, any kills should be explicitly popped after 1417 // the inline asm. 1418 unsigned Kills = 0; 1419 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1420 MachineOperand &Op = MI->getOperand(i); 1421 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1422 continue; 1423 assert(Op.isUse() && "Only handle inline asm uses right now"); 1424 1425 unsigned FPReg = getFPReg(Op); 1426 Op.setReg(getSTReg(FPReg)); 1427 1428 // If we kill this operand, make sure to pop it from the stack after the 1429 // asm. We just remember it for now, and pop them all off at the end in 1430 // a batch. 1431 if (Op.isKill()) 1432 Kills |= 1U << FPReg; 1433 } 1434 1435 // If this asm kills any FP registers (is the last use of them) we must 1436 // explicitly emit pop instructions for them. Do this now after the asm has 1437 // executed so that the ST(x) numbers are not off (which would happen if we 1438 // did this inline with operand rewriting). 1439 // 1440 // Note: this might be a non-optimal pop sequence. We might be able to do 1441 // better by trying to pop in stack order or something. 1442 MachineBasicBlock::iterator InsertPt = MI; 1443 while (Kills) { 1444 unsigned FPReg = CountTrailingZeros_32(Kills); 1445 freeStackSlotAfter(InsertPt, FPReg); 1446 Kills &= ~(1U << FPReg); 1447 } 1448 // Don't delete the inline asm! 1449 return; 1450 } 1451 1452 case X86::RET: 1453 case X86::RETI: 1454 // If RET has an FP register use operand, pass the first one in ST(0) and 1455 // the second one in ST(1). 1456 1457 // Find the register operands. 1458 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; 1459 unsigned LiveMask = 0; 1460 1461 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1462 MachineOperand &Op = MI->getOperand(i); 1463 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1464 continue; 1465 // FP Register uses must be kills unless there are two uses of the same 1466 // register, in which case only one will be a kill. 1467 assert(Op.isUse() && 1468 (Op.isKill() || // Marked kill. 1469 getFPReg(Op) == FirstFPRegOp || // Second instance. 1470 MI->killsRegister(Op.getReg())) && // Later use is marked kill. 1471 "Ret only defs operands, and values aren't live beyond it"); 1472 1473 if (FirstFPRegOp == ~0U) 1474 FirstFPRegOp = getFPReg(Op); 1475 else { 1476 assert(SecondFPRegOp == ~0U && "More than two fp operands!"); 1477 SecondFPRegOp = getFPReg(Op); 1478 } 1479 LiveMask |= (1 << getFPReg(Op)); 1480 1481 // Remove the operand so that later passes don't see it. 1482 MI->RemoveOperand(i); 1483 --i, --e; 1484 } 1485 1486 // We may have been carrying spurious live-ins, so make sure only the returned 1487 // registers are left live. 1488 adjustLiveRegs(LiveMask, MI); 1489 if (!LiveMask) return; // Quick check to see if any are possible. 1490 1491 // There are only four possibilities here: 1492 // 1) we are returning a single FP value. In this case, it has to be in 1493 // ST(0) already, so just declare success by removing the value from the 1494 // FP Stack. 1495 if (SecondFPRegOp == ~0U) { 1496 // Assert that the top of stack contains the right FP register. 1497 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && 1498 "Top of stack not the right register for RET!"); 1499 1500 // Ok, everything is good, mark the value as not being on the stack 1501 // anymore so that our assertion about the stack being empty at end of 1502 // block doesn't fire. 1503 StackTop = 0; 1504 return; 1505 } 1506 1507 // Otherwise, we are returning two values: 1508 // 2) If returning the same value for both, we only have one thing in the FP 1509 // stack. Consider: RET FP1, FP1 1510 if (StackTop == 1) { 1511 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& 1512 "Stack misconfiguration for RET!"); 1513 1514 // Duplicate the TOS so that we return it twice. Just pick some other FPx 1515 // register to hold it. 1516 unsigned NewReg = (FirstFPRegOp+1)%7; 1517 duplicateToTop(FirstFPRegOp, NewReg, MI); 1518 FirstFPRegOp = NewReg; 1519 } 1520 1521 /// Okay we know we have two different FPx operands now: 1522 assert(StackTop == 2 && "Must have two values live!"); 1523 1524 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently 1525 /// in ST(1). In this case, emit an fxch. 1526 if (getStackEntry(0) == SecondFPRegOp) { 1527 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); 1528 moveToTop(FirstFPRegOp, MI); 1529 } 1530 1531 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in 1532 /// ST(1). Just remove both from our understanding of the stack and return. 1533 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); 1534 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); 1535 StackTop = 0; 1536 return; 1537 } 1538 1539 I = MBB->erase(I); // Remove the pseudo instruction 1540 1541 // We want to leave I pointing to the previous instruction, but what if we 1542 // just erased the first instruction? 1543 if (I == MBB->begin()) { 1544 DEBUG(dbgs() << "Inserting dummy KILL\n"); 1545 I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)); 1546 } else 1547 --I; 1548} 1549 1550// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. 1551bool FPS::translateCopy(MachineInstr *MI) { 1552 unsigned DstReg = MI->getOperand(0).getReg(); 1553 unsigned SrcReg = MI->getOperand(1).getReg(); 1554 1555 if (DstReg == X86::ST0) { 1556 MI->setDesc(TII->get(X86::FpSET_ST0_80)); 1557 MI->RemoveOperand(0); 1558 return true; 1559 } 1560 if (DstReg == X86::ST1) { 1561 MI->setDesc(TII->get(X86::FpSET_ST1_80)); 1562 MI->RemoveOperand(0); 1563 return true; 1564 } 1565 if (SrcReg == X86::ST0) { 1566 MI->setDesc(TII->get(X86::FpGET_ST0_80)); 1567 return true; 1568 } 1569 if (SrcReg == X86::ST1) { 1570 MI->setDesc(TII->get(X86::FpGET_ST1_80)); 1571 return true; 1572 } 1573 if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { 1574 MI->setDesc(TII->get(X86::MOV_Fp8080)); 1575 return true; 1576 } 1577 return false; 1578} 1579