X86FloatingPoint.cpp revision a40ce7e394283d78bdda3d7ea728ba2ffae0ef39
1//===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the pass which converts floating point instructions from 11// pseudo registers into register stack instructions. This pass uses live 12// variable information to indicate where the FPn registers are used and their 13// lifetimes. 14// 15// The x87 hardware tracks liveness of the stack registers, so it is necessary 16// to implement exact liveness tracking between basic blocks. The CFG edges are 17// partitioned into bundles where the same FP registers must be live in 18// identical stack positions. Instructions are inserted at the end of each basic 19// block to rearrange the live registers to match the outgoing bundle. 20// 21// This approach avoids splitting critical edges at the potential cost of more 22// live register shuffling instructions when critical edges are present. 23// 24//===----------------------------------------------------------------------===// 25 26#define DEBUG_TYPE "x86-codegen" 27#include "X86.h" 28#include "X86InstrInfo.h" 29#include "llvm/ADT/DepthFirstIterator.h" 30#include "llvm/ADT/DenseMap.h" 31#include "llvm/ADT/SmallPtrSet.h" 32#include "llvm/ADT/SmallVector.h" 33#include "llvm/ADT/Statistic.h" 34#include "llvm/ADT/STLExtras.h" 35#include "llvm/CodeGen/MachineFunctionPass.h" 36#include "llvm/CodeGen/MachineInstrBuilder.h" 37#include "llvm/CodeGen/MachineRegisterInfo.h" 38#include "llvm/CodeGen/Passes.h" 39#include "llvm/Support/Debug.h" 40#include "llvm/Support/ErrorHandling.h" 41#include "llvm/Support/raw_ostream.h" 42#include "llvm/Target/TargetInstrInfo.h" 43#include "llvm/Target/TargetMachine.h" 44#include <algorithm> 45using namespace llvm; 46 47STATISTIC(NumFXCH, "Number of fxch instructions inserted"); 48STATISTIC(NumFP , "Number of floating point instructions"); 49 50namespace { 51 struct FPS : public MachineFunctionPass { 52 static char ID; 53 FPS() : MachineFunctionPass(&ID) { 54 // This is really only to keep valgrind quiet. 55 // The logic in isLive() is too much for it. 56 memset(Stack, 0, sizeof(Stack)); 57 memset(RegMap, 0, sizeof(RegMap)); 58 } 59 60 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 61 AU.setPreservesCFG(); 62 AU.addPreservedID(MachineLoopInfoID); 63 AU.addPreservedID(MachineDominatorsID); 64 MachineFunctionPass::getAnalysisUsage(AU); 65 } 66 67 virtual bool runOnMachineFunction(MachineFunction &MF); 68 69 virtual const char *getPassName() const { return "X86 FP Stackifier"; } 70 71 private: 72 const TargetInstrInfo *TII; // Machine instruction info. 73 74 // Two CFG edges are related if they leave the same block, or enter the same 75 // block. The transitive closure of an edge under this relation is a 76 // LiveBundle. It represents a set of CFG edges where the live FP stack 77 // registers must be allocated identically in the x87 stack. 78 // 79 // A LiveBundle is usually all the edges leaving a block, or all the edges 80 // entering a block, but it can contain more edges if critical edges are 81 // present. 82 // 83 // The set of live FP registers in a LiveBundle is calculated by bundleCFG, 84 // but the exact mapping of FP registers to stack slots is fixed later. 85 struct LiveBundle { 86 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c. 87 unsigned Mask; 88 89 // Number of pre-assigned live registers in FixStack. This is 0 when the 90 // stack order has not yet been fixed. 91 unsigned FixCount; 92 93 // Assigned stack order for live-in registers. 94 // FixStack[i] == getStackEntry(i) for all i < FixCount. 95 unsigned char FixStack[8]; 96 97 LiveBundle(unsigned m = 0) : Mask(m), FixCount(0) {} 98 99 // Have the live registers been assigned a stack order yet? 100 bool isFixed() const { return !Mask || FixCount; } 101 }; 102 103 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges 104 // with no live FP registers. 105 SmallVector<LiveBundle, 8> LiveBundles; 106 107 // Map each MBB in the current function to an (ingoing, outgoing) index into 108 // LiveBundles. Blocks with no FP registers live in or out map to (0, 0) 109 // and are not actually stored in the map. 110 DenseMap<MachineBasicBlock*, std::pair<unsigned, unsigned> > BlockBundle; 111 112 // Return a bitmask of FP registers in block's live-in list. 113 unsigned calcLiveInMask(MachineBasicBlock *MBB) { 114 unsigned Mask = 0; 115 for (MachineBasicBlock::livein_iterator I = MBB->livein_begin(), 116 E = MBB->livein_end(); I != E; ++I) { 117 unsigned Reg = *I - X86::FP0; 118 if (Reg < 8) 119 Mask |= 1 << Reg; 120 } 121 return Mask; 122 } 123 124 // Partition all the CFG edges into LiveBundles. 125 void bundleCFG(MachineFunction &MF); 126 127 MachineBasicBlock *MBB; // Current basic block 128 unsigned Stack[8]; // FP<n> Registers in each stack slot... 129 unsigned RegMap[8]; // Track which stack slot contains each register 130 unsigned StackTop; // The current top of the FP stack. 131 132 // Set up our stack model to match the incoming registers to MBB. 133 void setupBlockStack(); 134 135 // Shuffle live registers to match the expectations of successor blocks. 136 void finishBlockStack(); 137 138 void dumpStack() const { 139 dbgs() << "Stack contents:"; 140 for (unsigned i = 0; i != StackTop; ++i) { 141 dbgs() << " FP" << Stack[i]; 142 assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!"); 143 } 144 dbgs() << "\n"; 145 } 146 147 /// isStackEmpty - Return true if the FP stack is empty. 148 bool isStackEmpty() const { 149 return StackTop == 0; 150 } 151 152 /// getSlot - Return the stack slot number a particular register number is 153 /// in. 154 unsigned getSlot(unsigned RegNo) const { 155 assert(RegNo < 8 && "Regno out of range!"); 156 return RegMap[RegNo]; 157 } 158 159 /// isLive - Is RegNo currently live in the stack? 160 bool isLive(unsigned RegNo) const { 161 unsigned Slot = getSlot(RegNo); 162 return Slot < StackTop && Stack[Slot] == RegNo; 163 } 164 165 /// getScratchReg - Return an FP register that is not currently in use. 166 unsigned getScratchReg() { 167 for (int i = 7; i >= 0; --i) 168 if (!isLive(i)) 169 return i; 170 llvm_unreachable("Ran out of scratch FP registers"); 171 } 172 173 /// getStackEntry - Return the X86::FP<n> register in register ST(i). 174 unsigned getStackEntry(unsigned STi) const { 175 assert(STi < StackTop && "Access past stack top!"); 176 return Stack[StackTop-1-STi]; 177 } 178 179 /// getSTReg - Return the X86::ST(i) register which contains the specified 180 /// FP<RegNo> register. 181 unsigned getSTReg(unsigned RegNo) const { 182 return StackTop - 1 - getSlot(RegNo) + llvm::X86::ST0; 183 } 184 185 // pushReg - Push the specified FP<n> register onto the stack. 186 void pushReg(unsigned Reg) { 187 assert(Reg < 8 && "Register number out of range!"); 188 assert(StackTop < 8 && "Stack overflow!"); 189 Stack[StackTop] = Reg; 190 RegMap[Reg] = StackTop++; 191 } 192 193 bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; } 194 void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) { 195 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 196 if (isAtTop(RegNo)) return; 197 198 unsigned STReg = getSTReg(RegNo); 199 unsigned RegOnTop = getStackEntry(0); 200 201 // Swap the slots the regs are in. 202 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 203 204 // Swap stack slot contents. 205 assert(RegMap[RegOnTop] < StackTop); 206 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 207 208 // Emit an fxch to update the runtime processors version of the state. 209 BuildMI(*MBB, I, dl, TII->get(X86::XCH_F)).addReg(STReg); 210 ++NumFXCH; 211 } 212 213 void duplicateToTop(unsigned RegNo, unsigned AsReg, MachineInstr *I) { 214 DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc(); 215 unsigned STReg = getSTReg(RegNo); 216 pushReg(AsReg); // New register on top of stack 217 218 BuildMI(*MBB, I, dl, TII->get(X86::LD_Frr)).addReg(STReg); 219 } 220 221 /// popStackAfter - Pop the current value off of the top of the FP stack 222 /// after the specified instruction. 223 void popStackAfter(MachineBasicBlock::iterator &I); 224 225 /// freeStackSlotAfter - Free the specified register from the register 226 /// stack, so that it is no longer in a register. If the register is 227 /// currently at the top of the stack, we just pop the current instruction, 228 /// otherwise we store the current top-of-stack into the specified slot, 229 /// then pop the top of stack. 230 void freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned Reg); 231 232 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted 233 /// instruction. 234 MachineBasicBlock::iterator 235 freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo); 236 237 /// Adjust the live registers to be the set in Mask. 238 void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I); 239 240 /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is 241 /// st(0), FP reg FixStack[1] is st(1) etc. 242 void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount, 243 MachineBasicBlock::iterator I); 244 245 bool processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB); 246 247 void handleZeroArgFP(MachineBasicBlock::iterator &I); 248 void handleOneArgFP(MachineBasicBlock::iterator &I); 249 void handleOneArgFPRW(MachineBasicBlock::iterator &I); 250 void handleTwoArgFP(MachineBasicBlock::iterator &I); 251 void handleCompareFP(MachineBasicBlock::iterator &I); 252 void handleCondMovFP(MachineBasicBlock::iterator &I); 253 void handleSpecialFP(MachineBasicBlock::iterator &I); 254 255 bool translateCopy(MachineInstr*); 256 }; 257 char FPS::ID = 0; 258} 259 260FunctionPass *llvm::createX86FloatingPointStackifierPass() { return new FPS(); } 261 262/// getFPReg - Return the X86::FPx register number for the specified operand. 263/// For example, this returns 3 for X86::FP3. 264static unsigned getFPReg(const MachineOperand &MO) { 265 assert(MO.isReg() && "Expected an FP register!"); 266 unsigned Reg = MO.getReg(); 267 assert(Reg >= X86::FP0 && Reg <= X86::FP6 && "Expected FP register!"); 268 return Reg - X86::FP0; 269} 270 271/// runOnMachineFunction - Loop over all of the basic blocks, transforming FP 272/// register references into FP stack references. 273/// 274bool FPS::runOnMachineFunction(MachineFunction &MF) { 275 // We only need to run this pass if there are any FP registers used in this 276 // function. If it is all integer, there is nothing for us to do! 277 bool FPIsUsed = false; 278 279 assert(X86::FP6 == X86::FP0+6 && "Register enums aren't sorted right!"); 280 for (unsigned i = 0; i <= 6; ++i) 281 if (MF.getRegInfo().isPhysRegUsed(X86::FP0+i)) { 282 FPIsUsed = true; 283 break; 284 } 285 286 // Early exit. 287 if (!FPIsUsed) return false; 288 289 TII = MF.getTarget().getInstrInfo(); 290 291 // Prepare cross-MBB liveness. 292 bundleCFG(MF); 293 294 StackTop = 0; 295 296 // Process the function in depth first order so that we process at least one 297 // of the predecessors for every reachable block in the function. 298 SmallPtrSet<MachineBasicBlock*, 8> Processed; 299 MachineBasicBlock *Entry = MF.begin(); 300 301 bool Changed = false; 302 for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 8> > 303 I = df_ext_begin(Entry, Processed), E = df_ext_end(Entry, Processed); 304 I != E; ++I) 305 Changed |= processBasicBlock(MF, **I); 306 307 // Process any unreachable blocks in arbitrary order now. 308 if (MF.size() != Processed.size()) 309 for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) 310 if (Processed.insert(BB)) 311 Changed |= processBasicBlock(MF, *BB); 312 313 BlockBundle.clear(); 314 LiveBundles.clear(); 315 316 return Changed; 317} 318 319/// bundleCFG - Scan all the basic blocks to determine consistent live-in and 320/// live-out sets for the FP registers. Consistent means that the set of 321/// registers live-out from a block is identical to the live-in set of all 322/// successors. This is not enforced by the normal live-in lists since 323/// registers may be implicitly defined, or not used by all successors. 324void FPS::bundleCFG(MachineFunction &MF) { 325 assert(LiveBundles.empty() && "Stale data in LiveBundles"); 326 assert(BlockBundle.empty() && "Stale data in BlockBundle"); 327 SmallPtrSet<MachineBasicBlock*, 8> PropDown, PropUp; 328 329 // LiveBundle[0] is the empty live-in set. 330 LiveBundles.resize(1); 331 332 // First gather the actual live-in masks for all MBBs. 333 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 334 MachineBasicBlock *MBB = I; 335 const unsigned Mask = calcLiveInMask(MBB); 336 if (!Mask) 337 continue; 338 // Ingoing bundle index. 339 unsigned &Idx = BlockBundle[MBB].first; 340 // Already assigned an ingoing bundle? 341 if (Idx) 342 continue; 343 // Allocate a new LiveBundle struct for this block's live-ins. 344 const unsigned BundleIdx = Idx = LiveBundles.size(); 345 DEBUG(dbgs() << "Creating LB#" << BundleIdx << ": in:BB#" 346 << MBB->getNumber()); 347 LiveBundles.push_back(Mask); 348 LiveBundle &Bundle = LiveBundles.back(); 349 350 // Make sure all predecessors have the same live-out set. 351 PropUp.insert(MBB); 352 353 // Keep pushing liveness up and down the CFG until convergence. 354 // Only critical edges cause iteration here, but when they do, multiple 355 // blocks can be assigned to the same LiveBundle index. 356 do { 357 // Assign BundleIdx as liveout from predecessors in PropUp. 358 for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropUp.begin(), 359 E = PropUp.end(); I != E; ++I) { 360 MachineBasicBlock *MBB = *I; 361 for (MachineBasicBlock::const_pred_iterator LinkI = MBB->pred_begin(), 362 LinkE = MBB->pred_end(); LinkI != LinkE; ++LinkI) { 363 MachineBasicBlock *PredMBB = *LinkI; 364 // PredMBB's liveout bundle should be set to LIIdx. 365 unsigned &Idx = BlockBundle[PredMBB].second; 366 if (Idx) { 367 assert(Idx == BundleIdx && "Inconsistent CFG"); 368 continue; 369 } 370 Idx = BundleIdx; 371 DEBUG(dbgs() << " out:BB#" << PredMBB->getNumber()); 372 // Propagate to siblings. 373 if (PredMBB->succ_size() > 1) 374 PropDown.insert(PredMBB); 375 } 376 } 377 PropUp.clear(); 378 379 // Assign BundleIdx as livein to successors in PropDown. 380 for (SmallPtrSet<MachineBasicBlock*, 16>::iterator I = PropDown.begin(), 381 E = PropDown.end(); I != E; ++I) { 382 MachineBasicBlock *MBB = *I; 383 for (MachineBasicBlock::const_succ_iterator LinkI = MBB->succ_begin(), 384 LinkE = MBB->succ_end(); LinkI != LinkE; ++LinkI) { 385 MachineBasicBlock *SuccMBB = *LinkI; 386 // LinkMBB's livein bundle should be set to BundleIdx. 387 unsigned &Idx = BlockBundle[SuccMBB].first; 388 if (Idx) { 389 assert(Idx == BundleIdx && "Inconsistent CFG"); 390 continue; 391 } 392 Idx = BundleIdx; 393 DEBUG(dbgs() << " in:BB#" << SuccMBB->getNumber()); 394 // Propagate to siblings. 395 if (SuccMBB->pred_size() > 1) 396 PropUp.insert(SuccMBB); 397 // Also accumulate the bundle liveness mask from the liveins here. 398 Bundle.Mask |= calcLiveInMask(SuccMBB); 399 } 400 } 401 PropDown.clear(); 402 } while (!PropUp.empty()); 403 DEBUG({ 404 dbgs() << " live:"; 405 for (unsigned i = 0; i < 8; ++i) 406 if (Bundle.Mask & (1<<i)) 407 dbgs() << " %FP" << i; 408 dbgs() << '\n'; 409 }); 410 } 411} 412 413/// processBasicBlock - Loop over all of the instructions in the basic block, 414/// transforming FP instructions into their stack form. 415/// 416bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { 417 bool Changed = false; 418 MBB = &BB; 419 420 setupBlockStack(); 421 422 for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { 423 MachineInstr *MI = I; 424 uint64_t Flags = MI->getDesc().TSFlags; 425 426 unsigned FPInstClass = Flags & X86II::FPTypeMask; 427 if (MI->isInlineAsm()) 428 FPInstClass = X86II::SpecialFP; 429 430 if (MI->isCopy() && translateCopy(MI)) 431 FPInstClass = X86II::SpecialFP; 432 433 if (FPInstClass == X86II::NotFP) 434 continue; // Efficiently ignore non-fp insts! 435 436 MachineInstr *PrevMI = 0; 437 if (I != BB.begin()) 438 PrevMI = prior(I); 439 440 ++NumFP; // Keep track of # of pseudo instrs 441 DEBUG(dbgs() << "\nFPInst:\t" << *MI); 442 443 // Get dead variables list now because the MI pointer may be deleted as part 444 // of processing! 445 SmallVector<unsigned, 8> DeadRegs; 446 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 447 const MachineOperand &MO = MI->getOperand(i); 448 if (MO.isReg() && MO.isDead()) 449 DeadRegs.push_back(MO.getReg()); 450 } 451 452 switch (FPInstClass) { 453 case X86II::ZeroArgFP: handleZeroArgFP(I); break; 454 case X86II::OneArgFP: handleOneArgFP(I); break; // fstp ST(0) 455 case X86II::OneArgFPRW: handleOneArgFPRW(I); break; // ST(0) = fsqrt(ST(0)) 456 case X86II::TwoArgFP: handleTwoArgFP(I); break; 457 case X86II::CompareFP: handleCompareFP(I); break; 458 case X86II::CondMovFP: handleCondMovFP(I); break; 459 case X86II::SpecialFP: handleSpecialFP(I); break; 460 default: llvm_unreachable("Unknown FP Type!"); 461 } 462 463 // Check to see if any of the values defined by this instruction are dead 464 // after definition. If so, pop them. 465 for (unsigned i = 0, e = DeadRegs.size(); i != e; ++i) { 466 unsigned Reg = DeadRegs[i]; 467 if (Reg >= X86::FP0 && Reg <= X86::FP6) { 468 DEBUG(dbgs() << "Register FP#" << Reg-X86::FP0 << " is dead!\n"); 469 freeStackSlotAfter(I, Reg-X86::FP0); 470 } 471 } 472 473 // Print out all of the instructions expanded to if -debug 474 DEBUG( 475 MachineBasicBlock::iterator PrevI(PrevMI); 476 if (I == PrevI) { 477 dbgs() << "Just deleted pseudo instruction\n"; 478 } else { 479 MachineBasicBlock::iterator Start = I; 480 // Rewind to first instruction newly inserted. 481 while (Start != BB.begin() && prior(Start) != PrevI) --Start; 482 dbgs() << "Inserted instructions:\n\t"; 483 Start->print(dbgs(), &MF.getTarget()); 484 while (++Start != llvm::next(I)) {} 485 } 486 dumpStack(); 487 ); 488 489 Changed = true; 490 } 491 492 finishBlockStack(); 493 494 return Changed; 495} 496 497/// setupBlockStack - Use the BlockBundle map to set up our model of the stack 498/// to match predecessors' live out stack. 499void FPS::setupBlockStack() { 500 DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB->getNumber() 501 << " derived from " << MBB->getName() << ".\n"); 502 StackTop = 0; 503 const LiveBundle &Bundle = LiveBundles[BlockBundle.lookup(MBB).first]; 504 505 if (!Bundle.Mask) { 506 DEBUG(dbgs() << "Block has no FP live-ins.\n"); 507 return; 508 } 509 510 // Depth-first iteration should ensure that we always have an assigned stack. 511 assert(Bundle.isFixed() && "Reached block before any predecessors"); 512 513 // Push the fixed live-in registers. 514 for (unsigned i = Bundle.FixCount; i > 0; --i) { 515 MBB->addLiveIn(X86::ST0+i-1); 516 DEBUG(dbgs() << "Live-in st(" << (i-1) << "): %FP" 517 << unsigned(Bundle.FixStack[i-1]) << '\n'); 518 pushReg(Bundle.FixStack[i-1]); 519 } 520 521 // Kill off unwanted live-ins. This can happen with a critical edge. 522 // FIXME: We could keep these live registers around as zombies. They may need 523 // to be revived at the end of a short block. It might save a few instrs. 524 adjustLiveRegs(calcLiveInMask(MBB), MBB->begin()); 525 DEBUG(MBB->dump()); 526} 527 528/// finishBlockStack - Revive live-outs that are implicitly defined out of 529/// MBB. Shuffle live registers to match the expected fixed stack of any 530/// predecessors, and ensure that all predecessors are expecting the same 531/// stack. 532void FPS::finishBlockStack() { 533 // The RET handling below takes care of return blocks for us. 534 if (MBB->succ_empty()) 535 return; 536 537 DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB->getNumber() 538 << " derived from " << MBB->getName() << ".\n"); 539 540 unsigned BundleIdx = BlockBundle.lookup(MBB).second; 541 LiveBundle &Bundle = LiveBundles[BundleIdx]; 542 543 // We may need to kill and define some registers to match successors. 544 // FIXME: This can probably be combined with the shuffle below. 545 MachineBasicBlock::iterator Term = MBB->getFirstTerminator(); 546 adjustLiveRegs(Bundle.Mask, Term); 547 548 if (!Bundle.Mask) { 549 DEBUG(dbgs() << "No live-outs.\n"); 550 return; 551 } 552 553 // Has the stack order been fixed yet? 554 DEBUG(dbgs() << "LB#" << BundleIdx << ": "); 555 if (Bundle.isFixed()) { 556 DEBUG(dbgs() << "Shuffling stack to match.\n"); 557 shuffleStackTop(Bundle.FixStack, Bundle.FixCount, Term); 558 } else { 559 // Not fixed yet, we get to choose. 560 DEBUG(dbgs() << "Fixing stack order now.\n"); 561 Bundle.FixCount = StackTop; 562 for (unsigned i = 0; i < StackTop; ++i) 563 Bundle.FixStack[i] = getStackEntry(i); 564 } 565} 566 567 568//===----------------------------------------------------------------------===// 569// Efficient Lookup Table Support 570//===----------------------------------------------------------------------===// 571 572namespace { 573 struct TableEntry { 574 unsigned from; 575 unsigned to; 576 bool operator<(const TableEntry &TE) const { return from < TE.from; } 577 friend bool operator<(const TableEntry &TE, unsigned V) { 578 return TE.from < V; 579 } 580 friend bool operator<(unsigned V, const TableEntry &TE) { 581 return V < TE.from; 582 } 583 }; 584} 585 586#ifndef NDEBUG 587static bool TableIsSorted(const TableEntry *Table, unsigned NumEntries) { 588 for (unsigned i = 0; i != NumEntries-1; ++i) 589 if (!(Table[i] < Table[i+1])) return false; 590 return true; 591} 592#endif 593 594static int Lookup(const TableEntry *Table, unsigned N, unsigned Opcode) { 595 const TableEntry *I = std::lower_bound(Table, Table+N, Opcode); 596 if (I != Table+N && I->from == Opcode) 597 return I->to; 598 return -1; 599} 600 601#ifdef NDEBUG 602#define ASSERT_SORTED(TABLE) 603#else 604#define ASSERT_SORTED(TABLE) \ 605 { static bool TABLE##Checked = false; \ 606 if (!TABLE##Checked) { \ 607 assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \ 608 "All lookup tables must be sorted for efficient access!"); \ 609 TABLE##Checked = true; \ 610 } \ 611 } 612#endif 613 614//===----------------------------------------------------------------------===// 615// Register File -> Register Stack Mapping Methods 616//===----------------------------------------------------------------------===// 617 618// OpcodeTable - Sorted map of register instructions to their stack version. 619// The first element is an register file pseudo instruction, the second is the 620// concrete X86 instruction which uses the register stack. 621// 622static const TableEntry OpcodeTable[] = { 623 { X86::ABS_Fp32 , X86::ABS_F }, 624 { X86::ABS_Fp64 , X86::ABS_F }, 625 { X86::ABS_Fp80 , X86::ABS_F }, 626 { X86::ADD_Fp32m , X86::ADD_F32m }, 627 { X86::ADD_Fp64m , X86::ADD_F64m }, 628 { X86::ADD_Fp64m32 , X86::ADD_F32m }, 629 { X86::ADD_Fp80m32 , X86::ADD_F32m }, 630 { X86::ADD_Fp80m64 , X86::ADD_F64m }, 631 { X86::ADD_FpI16m32 , X86::ADD_FI16m }, 632 { X86::ADD_FpI16m64 , X86::ADD_FI16m }, 633 { X86::ADD_FpI16m80 , X86::ADD_FI16m }, 634 { X86::ADD_FpI32m32 , X86::ADD_FI32m }, 635 { X86::ADD_FpI32m64 , X86::ADD_FI32m }, 636 { X86::ADD_FpI32m80 , X86::ADD_FI32m }, 637 { X86::CHS_Fp32 , X86::CHS_F }, 638 { X86::CHS_Fp64 , X86::CHS_F }, 639 { X86::CHS_Fp80 , X86::CHS_F }, 640 { X86::CMOVBE_Fp32 , X86::CMOVBE_F }, 641 { X86::CMOVBE_Fp64 , X86::CMOVBE_F }, 642 { X86::CMOVBE_Fp80 , X86::CMOVBE_F }, 643 { X86::CMOVB_Fp32 , X86::CMOVB_F }, 644 { X86::CMOVB_Fp64 , X86::CMOVB_F }, 645 { X86::CMOVB_Fp80 , X86::CMOVB_F }, 646 { X86::CMOVE_Fp32 , X86::CMOVE_F }, 647 { X86::CMOVE_Fp64 , X86::CMOVE_F }, 648 { X86::CMOVE_Fp80 , X86::CMOVE_F }, 649 { X86::CMOVNBE_Fp32 , X86::CMOVNBE_F }, 650 { X86::CMOVNBE_Fp64 , X86::CMOVNBE_F }, 651 { X86::CMOVNBE_Fp80 , X86::CMOVNBE_F }, 652 { X86::CMOVNB_Fp32 , X86::CMOVNB_F }, 653 { X86::CMOVNB_Fp64 , X86::CMOVNB_F }, 654 { X86::CMOVNB_Fp80 , X86::CMOVNB_F }, 655 { X86::CMOVNE_Fp32 , X86::CMOVNE_F }, 656 { X86::CMOVNE_Fp64 , X86::CMOVNE_F }, 657 { X86::CMOVNE_Fp80 , X86::CMOVNE_F }, 658 { X86::CMOVNP_Fp32 , X86::CMOVNP_F }, 659 { X86::CMOVNP_Fp64 , X86::CMOVNP_F }, 660 { X86::CMOVNP_Fp80 , X86::CMOVNP_F }, 661 { X86::CMOVP_Fp32 , X86::CMOVP_F }, 662 { X86::CMOVP_Fp64 , X86::CMOVP_F }, 663 { X86::CMOVP_Fp80 , X86::CMOVP_F }, 664 { X86::COS_Fp32 , X86::COS_F }, 665 { X86::COS_Fp64 , X86::COS_F }, 666 { X86::COS_Fp80 , X86::COS_F }, 667 { X86::DIVR_Fp32m , X86::DIVR_F32m }, 668 { X86::DIVR_Fp64m , X86::DIVR_F64m }, 669 { X86::DIVR_Fp64m32 , X86::DIVR_F32m }, 670 { X86::DIVR_Fp80m32 , X86::DIVR_F32m }, 671 { X86::DIVR_Fp80m64 , X86::DIVR_F64m }, 672 { X86::DIVR_FpI16m32, X86::DIVR_FI16m}, 673 { X86::DIVR_FpI16m64, X86::DIVR_FI16m}, 674 { X86::DIVR_FpI16m80, X86::DIVR_FI16m}, 675 { X86::DIVR_FpI32m32, X86::DIVR_FI32m}, 676 { X86::DIVR_FpI32m64, X86::DIVR_FI32m}, 677 { X86::DIVR_FpI32m80, X86::DIVR_FI32m}, 678 { X86::DIV_Fp32m , X86::DIV_F32m }, 679 { X86::DIV_Fp64m , X86::DIV_F64m }, 680 { X86::DIV_Fp64m32 , X86::DIV_F32m }, 681 { X86::DIV_Fp80m32 , X86::DIV_F32m }, 682 { X86::DIV_Fp80m64 , X86::DIV_F64m }, 683 { X86::DIV_FpI16m32 , X86::DIV_FI16m }, 684 { X86::DIV_FpI16m64 , X86::DIV_FI16m }, 685 { X86::DIV_FpI16m80 , X86::DIV_FI16m }, 686 { X86::DIV_FpI32m32 , X86::DIV_FI32m }, 687 { X86::DIV_FpI32m64 , X86::DIV_FI32m }, 688 { X86::DIV_FpI32m80 , X86::DIV_FI32m }, 689 { X86::ILD_Fp16m32 , X86::ILD_F16m }, 690 { X86::ILD_Fp16m64 , X86::ILD_F16m }, 691 { X86::ILD_Fp16m80 , X86::ILD_F16m }, 692 { X86::ILD_Fp32m32 , X86::ILD_F32m }, 693 { X86::ILD_Fp32m64 , X86::ILD_F32m }, 694 { X86::ILD_Fp32m80 , X86::ILD_F32m }, 695 { X86::ILD_Fp64m32 , X86::ILD_F64m }, 696 { X86::ILD_Fp64m64 , X86::ILD_F64m }, 697 { X86::ILD_Fp64m80 , X86::ILD_F64m }, 698 { X86::ISTT_Fp16m32 , X86::ISTT_FP16m}, 699 { X86::ISTT_Fp16m64 , X86::ISTT_FP16m}, 700 { X86::ISTT_Fp16m80 , X86::ISTT_FP16m}, 701 { X86::ISTT_Fp32m32 , X86::ISTT_FP32m}, 702 { X86::ISTT_Fp32m64 , X86::ISTT_FP32m}, 703 { X86::ISTT_Fp32m80 , X86::ISTT_FP32m}, 704 { X86::ISTT_Fp64m32 , X86::ISTT_FP64m}, 705 { X86::ISTT_Fp64m64 , X86::ISTT_FP64m}, 706 { X86::ISTT_Fp64m80 , X86::ISTT_FP64m}, 707 { X86::IST_Fp16m32 , X86::IST_F16m }, 708 { X86::IST_Fp16m64 , X86::IST_F16m }, 709 { X86::IST_Fp16m80 , X86::IST_F16m }, 710 { X86::IST_Fp32m32 , X86::IST_F32m }, 711 { X86::IST_Fp32m64 , X86::IST_F32m }, 712 { X86::IST_Fp32m80 , X86::IST_F32m }, 713 { X86::IST_Fp64m32 , X86::IST_FP64m }, 714 { X86::IST_Fp64m64 , X86::IST_FP64m }, 715 { X86::IST_Fp64m80 , X86::IST_FP64m }, 716 { X86::LD_Fp032 , X86::LD_F0 }, 717 { X86::LD_Fp064 , X86::LD_F0 }, 718 { X86::LD_Fp080 , X86::LD_F0 }, 719 { X86::LD_Fp132 , X86::LD_F1 }, 720 { X86::LD_Fp164 , X86::LD_F1 }, 721 { X86::LD_Fp180 , X86::LD_F1 }, 722 { X86::LD_Fp32m , X86::LD_F32m }, 723 { X86::LD_Fp32m64 , X86::LD_F32m }, 724 { X86::LD_Fp32m80 , X86::LD_F32m }, 725 { X86::LD_Fp64m , X86::LD_F64m }, 726 { X86::LD_Fp64m80 , X86::LD_F64m }, 727 { X86::LD_Fp80m , X86::LD_F80m }, 728 { X86::MUL_Fp32m , X86::MUL_F32m }, 729 { X86::MUL_Fp64m , X86::MUL_F64m }, 730 { X86::MUL_Fp64m32 , X86::MUL_F32m }, 731 { X86::MUL_Fp80m32 , X86::MUL_F32m }, 732 { X86::MUL_Fp80m64 , X86::MUL_F64m }, 733 { X86::MUL_FpI16m32 , X86::MUL_FI16m }, 734 { X86::MUL_FpI16m64 , X86::MUL_FI16m }, 735 { X86::MUL_FpI16m80 , X86::MUL_FI16m }, 736 { X86::MUL_FpI32m32 , X86::MUL_FI32m }, 737 { X86::MUL_FpI32m64 , X86::MUL_FI32m }, 738 { X86::MUL_FpI32m80 , X86::MUL_FI32m }, 739 { X86::SIN_Fp32 , X86::SIN_F }, 740 { X86::SIN_Fp64 , X86::SIN_F }, 741 { X86::SIN_Fp80 , X86::SIN_F }, 742 { X86::SQRT_Fp32 , X86::SQRT_F }, 743 { X86::SQRT_Fp64 , X86::SQRT_F }, 744 { X86::SQRT_Fp80 , X86::SQRT_F }, 745 { X86::ST_Fp32m , X86::ST_F32m }, 746 { X86::ST_Fp64m , X86::ST_F64m }, 747 { X86::ST_Fp64m32 , X86::ST_F32m }, 748 { X86::ST_Fp80m32 , X86::ST_F32m }, 749 { X86::ST_Fp80m64 , X86::ST_F64m }, 750 { X86::ST_FpP80m , X86::ST_FP80m }, 751 { X86::SUBR_Fp32m , X86::SUBR_F32m }, 752 { X86::SUBR_Fp64m , X86::SUBR_F64m }, 753 { X86::SUBR_Fp64m32 , X86::SUBR_F32m }, 754 { X86::SUBR_Fp80m32 , X86::SUBR_F32m }, 755 { X86::SUBR_Fp80m64 , X86::SUBR_F64m }, 756 { X86::SUBR_FpI16m32, X86::SUBR_FI16m}, 757 { X86::SUBR_FpI16m64, X86::SUBR_FI16m}, 758 { X86::SUBR_FpI16m80, X86::SUBR_FI16m}, 759 { X86::SUBR_FpI32m32, X86::SUBR_FI32m}, 760 { X86::SUBR_FpI32m64, X86::SUBR_FI32m}, 761 { X86::SUBR_FpI32m80, X86::SUBR_FI32m}, 762 { X86::SUB_Fp32m , X86::SUB_F32m }, 763 { X86::SUB_Fp64m , X86::SUB_F64m }, 764 { X86::SUB_Fp64m32 , X86::SUB_F32m }, 765 { X86::SUB_Fp80m32 , X86::SUB_F32m }, 766 { X86::SUB_Fp80m64 , X86::SUB_F64m }, 767 { X86::SUB_FpI16m32 , X86::SUB_FI16m }, 768 { X86::SUB_FpI16m64 , X86::SUB_FI16m }, 769 { X86::SUB_FpI16m80 , X86::SUB_FI16m }, 770 { X86::SUB_FpI32m32 , X86::SUB_FI32m }, 771 { X86::SUB_FpI32m64 , X86::SUB_FI32m }, 772 { X86::SUB_FpI32m80 , X86::SUB_FI32m }, 773 { X86::TST_Fp32 , X86::TST_F }, 774 { X86::TST_Fp64 , X86::TST_F }, 775 { X86::TST_Fp80 , X86::TST_F }, 776 { X86::UCOM_FpIr32 , X86::UCOM_FIr }, 777 { X86::UCOM_FpIr64 , X86::UCOM_FIr }, 778 { X86::UCOM_FpIr80 , X86::UCOM_FIr }, 779 { X86::UCOM_Fpr32 , X86::UCOM_Fr }, 780 { X86::UCOM_Fpr64 , X86::UCOM_Fr }, 781 { X86::UCOM_Fpr80 , X86::UCOM_Fr }, 782}; 783 784static unsigned getConcreteOpcode(unsigned Opcode) { 785 ASSERT_SORTED(OpcodeTable); 786 int Opc = Lookup(OpcodeTable, array_lengthof(OpcodeTable), Opcode); 787 assert(Opc != -1 && "FP Stack instruction not in OpcodeTable!"); 788 return Opc; 789} 790 791//===----------------------------------------------------------------------===// 792// Helper Methods 793//===----------------------------------------------------------------------===// 794 795// PopTable - Sorted map of instructions to their popping version. The first 796// element is an instruction, the second is the version which pops. 797// 798static const TableEntry PopTable[] = { 799 { X86::ADD_FrST0 , X86::ADD_FPrST0 }, 800 801 { X86::DIVR_FrST0, X86::DIVR_FPrST0 }, 802 { X86::DIV_FrST0 , X86::DIV_FPrST0 }, 803 804 { X86::IST_F16m , X86::IST_FP16m }, 805 { X86::IST_F32m , X86::IST_FP32m }, 806 807 { X86::MUL_FrST0 , X86::MUL_FPrST0 }, 808 809 { X86::ST_F32m , X86::ST_FP32m }, 810 { X86::ST_F64m , X86::ST_FP64m }, 811 { X86::ST_Frr , X86::ST_FPrr }, 812 813 { X86::SUBR_FrST0, X86::SUBR_FPrST0 }, 814 { X86::SUB_FrST0 , X86::SUB_FPrST0 }, 815 816 { X86::UCOM_FIr , X86::UCOM_FIPr }, 817 818 { X86::UCOM_FPr , X86::UCOM_FPPr }, 819 { X86::UCOM_Fr , X86::UCOM_FPr }, 820}; 821 822/// popStackAfter - Pop the current value off of the top of the FP stack after 823/// the specified instruction. This attempts to be sneaky and combine the pop 824/// into the instruction itself if possible. The iterator is left pointing to 825/// the last instruction, be it a new pop instruction inserted, or the old 826/// instruction if it was modified in place. 827/// 828void FPS::popStackAfter(MachineBasicBlock::iterator &I) { 829 MachineInstr* MI = I; 830 DebugLoc dl = MI->getDebugLoc(); 831 ASSERT_SORTED(PopTable); 832 assert(StackTop > 0 && "Cannot pop empty stack!"); 833 RegMap[Stack[--StackTop]] = ~0; // Update state 834 835 // Check to see if there is a popping version of this instruction... 836 int Opcode = Lookup(PopTable, array_lengthof(PopTable), I->getOpcode()); 837 if (Opcode != -1) { 838 I->setDesc(TII->get(Opcode)); 839 if (Opcode == X86::UCOM_FPPr) 840 I->RemoveOperand(0); 841 } else { // Insert an explicit pop 842 I = BuildMI(*MBB, ++I, dl, TII->get(X86::ST_FPrr)).addReg(X86::ST0); 843 } 844} 845 846/// freeStackSlotAfter - Free the specified register from the register stack, so 847/// that it is no longer in a register. If the register is currently at the top 848/// of the stack, we just pop the current instruction, otherwise we store the 849/// current top-of-stack into the specified slot, then pop the top of stack. 850void FPS::freeStackSlotAfter(MachineBasicBlock::iterator &I, unsigned FPRegNo) { 851 if (getStackEntry(0) == FPRegNo) { // already at the top of stack? easy. 852 popStackAfter(I); 853 return; 854 } 855 856 // Otherwise, store the top of stack into the dead slot, killing the operand 857 // without having to add in an explicit xchg then pop. 858 // 859 I = freeStackSlotBefore(++I, FPRegNo); 860} 861 862/// freeStackSlotBefore - Free the specified register without trying any 863/// folding. 864MachineBasicBlock::iterator 865FPS::freeStackSlotBefore(MachineBasicBlock::iterator I, unsigned FPRegNo) { 866 unsigned STReg = getSTReg(FPRegNo); 867 unsigned OldSlot = getSlot(FPRegNo); 868 unsigned TopReg = Stack[StackTop-1]; 869 Stack[OldSlot] = TopReg; 870 RegMap[TopReg] = OldSlot; 871 RegMap[FPRegNo] = ~0; 872 Stack[--StackTop] = ~0; 873 return BuildMI(*MBB, I, DebugLoc(), TII->get(X86::ST_FPrr)).addReg(STReg); 874} 875 876/// adjustLiveRegs - Kill and revive registers such that exactly the FP 877/// registers with a bit in Mask are live. 878void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) { 879 unsigned Defs = Mask; 880 unsigned Kills = 0; 881 for (unsigned i = 0; i < StackTop; ++i) { 882 unsigned RegNo = Stack[i]; 883 if (!(Defs & (1 << RegNo))) 884 // This register is live, but we don't want it. 885 Kills |= (1 << RegNo); 886 else 887 // We don't need to imp-def this live register. 888 Defs &= ~(1 << RegNo); 889 } 890 assert((Kills & Defs) == 0 && "Register needs killing and def'ing?"); 891 892 // Produce implicit-defs for free by using killed registers. 893 while (Kills && Defs) { 894 unsigned KReg = CountTrailingZeros_32(Kills); 895 unsigned DReg = CountTrailingZeros_32(Defs); 896 DEBUG(dbgs() << "Renaming %FP" << KReg << " as imp %FP" << DReg << "\n"); 897 std::swap(Stack[getSlot(KReg)], Stack[getSlot(DReg)]); 898 std::swap(RegMap[KReg], RegMap[DReg]); 899 Kills &= ~(1 << KReg); 900 Defs &= ~(1 << DReg); 901 } 902 903 // Kill registers by popping. 904 if (Kills && I != MBB->begin()) { 905 MachineBasicBlock::iterator I2 = llvm::prior(I); 906 for (;;) { 907 unsigned KReg = getStackEntry(0); 908 if (!(Kills & (1 << KReg))) 909 break; 910 DEBUG(dbgs() << "Popping %FP" << KReg << "\n"); 911 popStackAfter(I2); 912 Kills &= ~(1 << KReg); 913 } 914 } 915 916 // Manually kill the rest. 917 while (Kills) { 918 unsigned KReg = CountTrailingZeros_32(Kills); 919 DEBUG(dbgs() << "Killing %FP" << KReg << "\n"); 920 freeStackSlotBefore(I, KReg); 921 Kills &= ~(1 << KReg); 922 } 923 924 // Load zeros for all the imp-defs. 925 while(Defs) { 926 unsigned DReg = CountTrailingZeros_32(Defs); 927 DEBUG(dbgs() << "Defining %FP" << DReg << " as 0\n"); 928 BuildMI(*MBB, I, DebugLoc(), TII->get(X86::LD_F0)); 929 pushReg(DReg); 930 Defs &= ~(1 << DReg); 931 } 932 933 // Now we should have the correct registers live. 934 DEBUG(dumpStack()); 935 assert(StackTop == CountPopulation_32(Mask) && "Live count mismatch"); 936} 937 938/// shuffleStackTop - emit fxch instructions before I to shuffle the top 939/// FixCount entries into the order given by FixStack. 940/// FIXME: Is there a better algorithm than insertion sort? 941void FPS::shuffleStackTop(const unsigned char *FixStack, 942 unsigned FixCount, 943 MachineBasicBlock::iterator I) { 944 // Move items into place, starting from the desired stack bottom. 945 while (FixCount--) { 946 // Old register at position FixCount. 947 unsigned OldReg = getStackEntry(FixCount); 948 // Desired register at position FixCount. 949 unsigned Reg = FixStack[FixCount]; 950 if (Reg == OldReg) 951 continue; 952 // (Reg st0) (OldReg st0) = (Reg OldReg st0) 953 moveToTop(Reg, I); 954 moveToTop(OldReg, I); 955 } 956 DEBUG(dumpStack()); 957} 958 959 960//===----------------------------------------------------------------------===// 961// Instruction transformation implementation 962//===----------------------------------------------------------------------===// 963 964/// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem> 965/// 966void FPS::handleZeroArgFP(MachineBasicBlock::iterator &I) { 967 MachineInstr *MI = I; 968 unsigned DestReg = getFPReg(MI->getOperand(0)); 969 970 // Change from the pseudo instruction to the concrete instruction. 971 MI->RemoveOperand(0); // Remove the explicit ST(0) operand 972 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 973 974 // Result gets pushed on the stack. 975 pushReg(DestReg); 976} 977 978/// handleOneArgFP - fst <mem>, ST(0) 979/// 980void FPS::handleOneArgFP(MachineBasicBlock::iterator &I) { 981 MachineInstr *MI = I; 982 unsigned NumOps = MI->getDesc().getNumOperands(); 983 assert((NumOps == X86::AddrNumOperands + 1 || NumOps == 1) && 984 "Can only handle fst* & ftst instructions!"); 985 986 // Is this the last use of the source register? 987 unsigned Reg = getFPReg(MI->getOperand(NumOps-1)); 988 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 989 990 // FISTP64m is strange because there isn't a non-popping versions. 991 // If we have one _and_ we don't want to pop the operand, duplicate the value 992 // on the stack instead of moving it. This ensure that popping the value is 993 // always ok. 994 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m. 995 // 996 if (!KillsSrc && 997 (MI->getOpcode() == X86::IST_Fp64m32 || 998 MI->getOpcode() == X86::ISTT_Fp16m32 || 999 MI->getOpcode() == X86::ISTT_Fp32m32 || 1000 MI->getOpcode() == X86::ISTT_Fp64m32 || 1001 MI->getOpcode() == X86::IST_Fp64m64 || 1002 MI->getOpcode() == X86::ISTT_Fp16m64 || 1003 MI->getOpcode() == X86::ISTT_Fp32m64 || 1004 MI->getOpcode() == X86::ISTT_Fp64m64 || 1005 MI->getOpcode() == X86::IST_Fp64m80 || 1006 MI->getOpcode() == X86::ISTT_Fp16m80 || 1007 MI->getOpcode() == X86::ISTT_Fp32m80 || 1008 MI->getOpcode() == X86::ISTT_Fp64m80 || 1009 MI->getOpcode() == X86::ST_FpP80m)) { 1010 duplicateToTop(Reg, getScratchReg(), I); 1011 } else { 1012 moveToTop(Reg, I); // Move to the top of the stack... 1013 } 1014 1015 // Convert from the pseudo instruction to the concrete instruction. 1016 MI->RemoveOperand(NumOps-1); // Remove explicit ST(0) operand 1017 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1018 1019 if (MI->getOpcode() == X86::IST_FP64m || 1020 MI->getOpcode() == X86::ISTT_FP16m || 1021 MI->getOpcode() == X86::ISTT_FP32m || 1022 MI->getOpcode() == X86::ISTT_FP64m || 1023 MI->getOpcode() == X86::ST_FP80m) { 1024 assert(StackTop > 0 && "Stack empty??"); 1025 --StackTop; 1026 } else if (KillsSrc) { // Last use of operand? 1027 popStackAfter(I); 1028 } 1029} 1030 1031 1032/// handleOneArgFPRW: Handle instructions that read from the top of stack and 1033/// replace the value with a newly computed value. These instructions may have 1034/// non-fp operands after their FP operands. 1035/// 1036/// Examples: 1037/// R1 = fchs R2 1038/// R1 = fadd R2, [mem] 1039/// 1040void FPS::handleOneArgFPRW(MachineBasicBlock::iterator &I) { 1041 MachineInstr *MI = I; 1042#ifndef NDEBUG 1043 unsigned NumOps = MI->getDesc().getNumOperands(); 1044 assert(NumOps >= 2 && "FPRW instructions must have 2 ops!!"); 1045#endif 1046 1047 // Is this the last use of the source register? 1048 unsigned Reg = getFPReg(MI->getOperand(1)); 1049 bool KillsSrc = MI->killsRegister(X86::FP0+Reg); 1050 1051 if (KillsSrc) { 1052 // If this is the last use of the source register, just make sure it's on 1053 // the top of the stack. 1054 moveToTop(Reg, I); 1055 assert(StackTop > 0 && "Stack cannot be empty!"); 1056 --StackTop; 1057 pushReg(getFPReg(MI->getOperand(0))); 1058 } else { 1059 // If this is not the last use of the source register, _copy_ it to the top 1060 // of the stack. 1061 duplicateToTop(Reg, getFPReg(MI->getOperand(0)), I); 1062 } 1063 1064 // Change from the pseudo instruction to the concrete instruction. 1065 MI->RemoveOperand(1); // Drop the source operand. 1066 MI->RemoveOperand(0); // Drop the destination operand. 1067 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1068} 1069 1070 1071//===----------------------------------------------------------------------===// 1072// Define tables of various ways to map pseudo instructions 1073// 1074 1075// ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i) 1076static const TableEntry ForwardST0Table[] = { 1077 { X86::ADD_Fp32 , X86::ADD_FST0r }, 1078 { X86::ADD_Fp64 , X86::ADD_FST0r }, 1079 { X86::ADD_Fp80 , X86::ADD_FST0r }, 1080 { X86::DIV_Fp32 , X86::DIV_FST0r }, 1081 { X86::DIV_Fp64 , X86::DIV_FST0r }, 1082 { X86::DIV_Fp80 , X86::DIV_FST0r }, 1083 { X86::MUL_Fp32 , X86::MUL_FST0r }, 1084 { X86::MUL_Fp64 , X86::MUL_FST0r }, 1085 { X86::MUL_Fp80 , X86::MUL_FST0r }, 1086 { X86::SUB_Fp32 , X86::SUB_FST0r }, 1087 { X86::SUB_Fp64 , X86::SUB_FST0r }, 1088 { X86::SUB_Fp80 , X86::SUB_FST0r }, 1089}; 1090 1091// ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0) 1092static const TableEntry ReverseST0Table[] = { 1093 { X86::ADD_Fp32 , X86::ADD_FST0r }, // commutative 1094 { X86::ADD_Fp64 , X86::ADD_FST0r }, // commutative 1095 { X86::ADD_Fp80 , X86::ADD_FST0r }, // commutative 1096 { X86::DIV_Fp32 , X86::DIVR_FST0r }, 1097 { X86::DIV_Fp64 , X86::DIVR_FST0r }, 1098 { X86::DIV_Fp80 , X86::DIVR_FST0r }, 1099 { X86::MUL_Fp32 , X86::MUL_FST0r }, // commutative 1100 { X86::MUL_Fp64 , X86::MUL_FST0r }, // commutative 1101 { X86::MUL_Fp80 , X86::MUL_FST0r }, // commutative 1102 { X86::SUB_Fp32 , X86::SUBR_FST0r }, 1103 { X86::SUB_Fp64 , X86::SUBR_FST0r }, 1104 { X86::SUB_Fp80 , X86::SUBR_FST0r }, 1105}; 1106 1107// ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i) 1108static const TableEntry ForwardSTiTable[] = { 1109 { X86::ADD_Fp32 , X86::ADD_FrST0 }, // commutative 1110 { X86::ADD_Fp64 , X86::ADD_FrST0 }, // commutative 1111 { X86::ADD_Fp80 , X86::ADD_FrST0 }, // commutative 1112 { X86::DIV_Fp32 , X86::DIVR_FrST0 }, 1113 { X86::DIV_Fp64 , X86::DIVR_FrST0 }, 1114 { X86::DIV_Fp80 , X86::DIVR_FrST0 }, 1115 { X86::MUL_Fp32 , X86::MUL_FrST0 }, // commutative 1116 { X86::MUL_Fp64 , X86::MUL_FrST0 }, // commutative 1117 { X86::MUL_Fp80 , X86::MUL_FrST0 }, // commutative 1118 { X86::SUB_Fp32 , X86::SUBR_FrST0 }, 1119 { X86::SUB_Fp64 , X86::SUBR_FrST0 }, 1120 { X86::SUB_Fp80 , X86::SUBR_FrST0 }, 1121}; 1122 1123// ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0) 1124static const TableEntry ReverseSTiTable[] = { 1125 { X86::ADD_Fp32 , X86::ADD_FrST0 }, 1126 { X86::ADD_Fp64 , X86::ADD_FrST0 }, 1127 { X86::ADD_Fp80 , X86::ADD_FrST0 }, 1128 { X86::DIV_Fp32 , X86::DIV_FrST0 }, 1129 { X86::DIV_Fp64 , X86::DIV_FrST0 }, 1130 { X86::DIV_Fp80 , X86::DIV_FrST0 }, 1131 { X86::MUL_Fp32 , X86::MUL_FrST0 }, 1132 { X86::MUL_Fp64 , X86::MUL_FrST0 }, 1133 { X86::MUL_Fp80 , X86::MUL_FrST0 }, 1134 { X86::SUB_Fp32 , X86::SUB_FrST0 }, 1135 { X86::SUB_Fp64 , X86::SUB_FrST0 }, 1136 { X86::SUB_Fp80 , X86::SUB_FrST0 }, 1137}; 1138 1139 1140/// handleTwoArgFP - Handle instructions like FADD and friends which are virtual 1141/// instructions which need to be simplified and possibly transformed. 1142/// 1143/// Result: ST(0) = fsub ST(0), ST(i) 1144/// ST(i) = fsub ST(0), ST(i) 1145/// ST(0) = fsubr ST(0), ST(i) 1146/// ST(i) = fsubr ST(0), ST(i) 1147/// 1148void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { 1149 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1150 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1151 MachineInstr *MI = I; 1152 1153 unsigned NumOperands = MI->getDesc().getNumOperands(); 1154 assert(NumOperands == 3 && "Illegal TwoArgFP instruction!"); 1155 unsigned Dest = getFPReg(MI->getOperand(0)); 1156 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1157 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1158 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1159 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1160 DebugLoc dl = MI->getDebugLoc(); 1161 1162 unsigned TOS = getStackEntry(0); 1163 1164 // One of our operands must be on the top of the stack. If neither is yet, we 1165 // need to move one. 1166 if (Op0 != TOS && Op1 != TOS) { // No operand at TOS? 1167 // We can choose to move either operand to the top of the stack. If one of 1168 // the operands is killed by this instruction, we want that one so that we 1169 // can update right on top of the old version. 1170 if (KillsOp0) { 1171 moveToTop(Op0, I); // Move dead operand to TOS. 1172 TOS = Op0; 1173 } else if (KillsOp1) { 1174 moveToTop(Op1, I); 1175 TOS = Op1; 1176 } else { 1177 // All of the operands are live after this instruction executes, so we 1178 // cannot update on top of any operand. Because of this, we must 1179 // duplicate one of the stack elements to the top. It doesn't matter 1180 // which one we pick. 1181 // 1182 duplicateToTop(Op0, Dest, I); 1183 Op0 = TOS = Dest; 1184 KillsOp0 = true; 1185 } 1186 } else if (!KillsOp0 && !KillsOp1) { 1187 // If we DO have one of our operands at the top of the stack, but we don't 1188 // have a dead operand, we must duplicate one of the operands to a new slot 1189 // on the stack. 1190 duplicateToTop(Op0, Dest, I); 1191 Op0 = TOS = Dest; 1192 KillsOp0 = true; 1193 } 1194 1195 // Now we know that one of our operands is on the top of the stack, and at 1196 // least one of our operands is killed by this instruction. 1197 assert((TOS == Op0 || TOS == Op1) && (KillsOp0 || KillsOp1) && 1198 "Stack conditions not set up right!"); 1199 1200 // We decide which form to use based on what is on the top of the stack, and 1201 // which operand is killed by this instruction. 1202 const TableEntry *InstTable; 1203 bool isForward = TOS == Op0; 1204 bool updateST0 = (TOS == Op0 && !KillsOp1) || (TOS == Op1 && !KillsOp0); 1205 if (updateST0) { 1206 if (isForward) 1207 InstTable = ForwardST0Table; 1208 else 1209 InstTable = ReverseST0Table; 1210 } else { 1211 if (isForward) 1212 InstTable = ForwardSTiTable; 1213 else 1214 InstTable = ReverseSTiTable; 1215 } 1216 1217 int Opcode = Lookup(InstTable, array_lengthof(ForwardST0Table), 1218 MI->getOpcode()); 1219 assert(Opcode != -1 && "Unknown TwoArgFP pseudo instruction!"); 1220 1221 // NotTOS - The register which is not on the top of stack... 1222 unsigned NotTOS = (TOS == Op0) ? Op1 : Op0; 1223 1224 // Replace the old instruction with a new instruction 1225 MBB->remove(I++); 1226 I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); 1227 1228 // If both operands are killed, pop one off of the stack in addition to 1229 // overwriting the other one. 1230 if (KillsOp0 && KillsOp1 && Op0 != Op1) { 1231 assert(!updateST0 && "Should have updated other operand!"); 1232 popStackAfter(I); // Pop the top of stack 1233 } 1234 1235 // Update stack information so that we know the destination register is now on 1236 // the stack. 1237 unsigned UpdatedSlot = getSlot(updateST0 ? TOS : NotTOS); 1238 assert(UpdatedSlot < StackTop && Dest < 7); 1239 Stack[UpdatedSlot] = Dest; 1240 RegMap[Dest] = UpdatedSlot; 1241 MBB->getParent()->DeleteMachineInstr(MI); // Remove the old instruction 1242} 1243 1244/// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP 1245/// register arguments and no explicit destinations. 1246/// 1247void FPS::handleCompareFP(MachineBasicBlock::iterator &I) { 1248 ASSERT_SORTED(ForwardST0Table); ASSERT_SORTED(ReverseST0Table); 1249 ASSERT_SORTED(ForwardSTiTable); ASSERT_SORTED(ReverseSTiTable); 1250 MachineInstr *MI = I; 1251 1252 unsigned NumOperands = MI->getDesc().getNumOperands(); 1253 assert(NumOperands == 2 && "Illegal FUCOM* instruction!"); 1254 unsigned Op0 = getFPReg(MI->getOperand(NumOperands-2)); 1255 unsigned Op1 = getFPReg(MI->getOperand(NumOperands-1)); 1256 bool KillsOp0 = MI->killsRegister(X86::FP0+Op0); 1257 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1258 1259 // Make sure the first operand is on the top of stack, the other one can be 1260 // anywhere. 1261 moveToTop(Op0, I); 1262 1263 // Change from the pseudo instruction to the concrete instruction. 1264 MI->getOperand(0).setReg(getSTReg(Op1)); 1265 MI->RemoveOperand(1); 1266 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1267 1268 // If any of the operands are killed by this instruction, free them. 1269 if (KillsOp0) freeStackSlotAfter(I, Op0); 1270 if (KillsOp1 && Op0 != Op1) freeStackSlotAfter(I, Op1); 1271} 1272 1273/// handleCondMovFP - Handle two address conditional move instructions. These 1274/// instructions move a st(i) register to st(0) iff a condition is true. These 1275/// instructions require that the first operand is at the top of the stack, but 1276/// otherwise don't modify the stack at all. 1277void FPS::handleCondMovFP(MachineBasicBlock::iterator &I) { 1278 MachineInstr *MI = I; 1279 1280 unsigned Op0 = getFPReg(MI->getOperand(0)); 1281 unsigned Op1 = getFPReg(MI->getOperand(2)); 1282 bool KillsOp1 = MI->killsRegister(X86::FP0+Op1); 1283 1284 // The first operand *must* be on the top of the stack. 1285 moveToTop(Op0, I); 1286 1287 // Change the second operand to the stack register that the operand is in. 1288 // Change from the pseudo instruction to the concrete instruction. 1289 MI->RemoveOperand(0); 1290 MI->RemoveOperand(1); 1291 MI->getOperand(0).setReg(getSTReg(Op1)); 1292 MI->setDesc(TII->get(getConcreteOpcode(MI->getOpcode()))); 1293 1294 // If we kill the second operand, make sure to pop it from the stack. 1295 if (Op0 != Op1 && KillsOp1) { 1296 // Get this value off of the register stack. 1297 freeStackSlotAfter(I, Op1); 1298 } 1299} 1300 1301 1302/// handleSpecialFP - Handle special instructions which behave unlike other 1303/// floating point instructions. This is primarily intended for use by pseudo 1304/// instructions. 1305/// 1306void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { 1307 MachineInstr *MI = I; 1308 DebugLoc dl = MI->getDebugLoc(); 1309 switch (MI->getOpcode()) { 1310 default: llvm_unreachable("Unknown SpecialFP instruction!"); 1311 case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type! 1312 case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type! 1313 case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type! 1314 assert(StackTop == 0 && "Stack should be empty after a call!"); 1315 pushReg(getFPReg(MI->getOperand(0))); 1316 break; 1317 case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type! 1318 case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type! 1319 case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type! 1320 // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm. 1321 // The pattern we expect is: 1322 // CALL 1323 // FP1 = FpGET_ST0 1324 // FP4 = FpGET_ST1 1325 // 1326 // At this point, we've pushed FP1 on the top of stack, so it should be 1327 // present if it isn't dead. If it was dead, we already emitted a pop to 1328 // remove it from the stack and StackTop = 0. 1329 1330 // Push FP4 as top of stack next. 1331 pushReg(getFPReg(MI->getOperand(0))); 1332 1333 // If StackTop was 0 before we pushed our operand, then ST(0) must have been 1334 // dead. In this case, the ST(1) value is the only thing that is live, so 1335 // it should be on the TOS (after the pop that was emitted) and is. Just 1336 // continue in this case. 1337 if (StackTop == 1) 1338 break; 1339 1340 // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top 1341 // elements so that our accounting is correct. 1342 unsigned RegOnTop = getStackEntry(0); 1343 unsigned RegNo = getStackEntry(1); 1344 1345 // Swap the slots the regs are in. 1346 std::swap(RegMap[RegNo], RegMap[RegOnTop]); 1347 1348 // Swap stack slot contents. 1349 assert(RegMap[RegOnTop] < StackTop); 1350 std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]); 1351 break; 1352 } 1353 case X86::FpSET_ST0_32: 1354 case X86::FpSET_ST0_64: 1355 case X86::FpSET_ST0_80: { 1356 // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm 1357 // arguments that use an st constraint. We expect a sequence of 1358 // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM 1359 unsigned Op0 = getFPReg(MI->getOperand(0)); 1360 1361 if (!MI->killsRegister(X86::FP0 + Op0)) { 1362 // Duplicate Op0 into a temporary on the stack top. 1363 duplicateToTop(Op0, getScratchReg(), I); 1364 } else { 1365 // Op0 is killed, so just swap it into position. 1366 moveToTop(Op0, I); 1367 } 1368 --StackTop; // "Forget" we have something on the top of stack! 1369 break; 1370 } 1371 case X86::FpSET_ST1_32: 1372 case X86::FpSET_ST1_64: 1373 case X86::FpSET_ST1_80: { 1374 // Set up st(1) for inline asm. We are assuming that st(0) has already been 1375 // set up by FpSET_ST0, and our StackTop is off by one because of it. 1376 unsigned Op0 = getFPReg(MI->getOperand(0)); 1377 // Restore the actual StackTop from before Fp_SET_ST0. 1378 // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we 1379 // are not enforcing the constraint. 1380 ++StackTop; 1381 unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0). 1382 if (!MI->killsRegister(X86::FP0 + Op0)) { 1383 duplicateToTop(Op0, getScratchReg(), I); 1384 moveToTop(RegOnTop, I); 1385 } else if (getSTReg(Op0) != X86::ST1) { 1386 // We have the wrong value at st(1). Shuffle! Untested! 1387 moveToTop(getStackEntry(1), I); 1388 moveToTop(Op0, I); 1389 moveToTop(RegOnTop, I); 1390 } 1391 assert(StackTop >= 2 && "Too few live registers"); 1392 StackTop -= 2; // "Forget" both st(0) and st(1). 1393 break; 1394 } 1395 case X86::MOV_Fp3232: 1396 case X86::MOV_Fp3264: 1397 case X86::MOV_Fp6432: 1398 case X86::MOV_Fp6464: 1399 case X86::MOV_Fp3280: 1400 case X86::MOV_Fp6480: 1401 case X86::MOV_Fp8032: 1402 case X86::MOV_Fp8064: 1403 case X86::MOV_Fp8080: { 1404 const MachineOperand &MO1 = MI->getOperand(1); 1405 unsigned SrcReg = getFPReg(MO1); 1406 1407 const MachineOperand &MO0 = MI->getOperand(0); 1408 unsigned DestReg = getFPReg(MO0); 1409 if (MI->killsRegister(X86::FP0+SrcReg)) { 1410 // If the input operand is killed, we can just change the owner of the 1411 // incoming stack slot into the result. 1412 unsigned Slot = getSlot(SrcReg); 1413 assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!"); 1414 Stack[Slot] = DestReg; 1415 RegMap[DestReg] = Slot; 1416 1417 } else { 1418 // For FMOV we just duplicate the specified value to a new stack slot. 1419 // This could be made better, but would require substantial changes. 1420 duplicateToTop(SrcReg, DestReg, I); 1421 } 1422 } 1423 break; 1424 case TargetOpcode::INLINEASM: { 1425 // The inline asm MachineInstr currently only *uses* FP registers for the 1426 // 'f' constraint. These should be turned into the current ST(x) register 1427 // in the machine instr. Also, any kills should be explicitly popped after 1428 // the inline asm. 1429 unsigned Kills = 0; 1430 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1431 MachineOperand &Op = MI->getOperand(i); 1432 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1433 continue; 1434 assert(Op.isUse() && "Only handle inline asm uses right now"); 1435 1436 unsigned FPReg = getFPReg(Op); 1437 Op.setReg(getSTReg(FPReg)); 1438 1439 // If we kill this operand, make sure to pop it from the stack after the 1440 // asm. We just remember it for now, and pop them all off at the end in 1441 // a batch. 1442 if (Op.isKill()) 1443 Kills |= 1U << FPReg; 1444 } 1445 1446 // If this asm kills any FP registers (is the last use of them) we must 1447 // explicitly emit pop instructions for them. Do this now after the asm has 1448 // executed so that the ST(x) numbers are not off (which would happen if we 1449 // did this inline with operand rewriting). 1450 // 1451 // Note: this might be a non-optimal pop sequence. We might be able to do 1452 // better by trying to pop in stack order or something. 1453 MachineBasicBlock::iterator InsertPt = MI; 1454 while (Kills) { 1455 unsigned FPReg = CountTrailingZeros_32(Kills); 1456 freeStackSlotAfter(InsertPt, FPReg); 1457 Kills &= ~(1U << FPReg); 1458 } 1459 // Don't delete the inline asm! 1460 return; 1461 } 1462 1463 case X86::RET: 1464 case X86::RETI: 1465 // If RET has an FP register use operand, pass the first one in ST(0) and 1466 // the second one in ST(1). 1467 1468 // Find the register operands. 1469 unsigned FirstFPRegOp = ~0U, SecondFPRegOp = ~0U; 1470 unsigned LiveMask = 0; 1471 1472 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1473 MachineOperand &Op = MI->getOperand(i); 1474 if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6) 1475 continue; 1476 // FP Register uses must be kills unless there are two uses of the same 1477 // register, in which case only one will be a kill. 1478 assert(Op.isUse() && 1479 (Op.isKill() || // Marked kill. 1480 getFPReg(Op) == FirstFPRegOp || // Second instance. 1481 MI->killsRegister(Op.getReg())) && // Later use is marked kill. 1482 "Ret only defs operands, and values aren't live beyond it"); 1483 1484 if (FirstFPRegOp == ~0U) 1485 FirstFPRegOp = getFPReg(Op); 1486 else { 1487 assert(SecondFPRegOp == ~0U && "More than two fp operands!"); 1488 SecondFPRegOp = getFPReg(Op); 1489 } 1490 LiveMask |= (1 << getFPReg(Op)); 1491 1492 // Remove the operand so that later passes don't see it. 1493 MI->RemoveOperand(i); 1494 --i, --e; 1495 } 1496 1497 // We may have been carrying spurious live-ins, so make sure only the returned 1498 // registers are left live. 1499 adjustLiveRegs(LiveMask, MI); 1500 if (!LiveMask) return; // Quick check to see if any are possible. 1501 1502 // There are only four possibilities here: 1503 // 1) we are returning a single FP value. In this case, it has to be in 1504 // ST(0) already, so just declare success by removing the value from the 1505 // FP Stack. 1506 if (SecondFPRegOp == ~0U) { 1507 // Assert that the top of stack contains the right FP register. 1508 assert(StackTop == 1 && FirstFPRegOp == getStackEntry(0) && 1509 "Top of stack not the right register for RET!"); 1510 1511 // Ok, everything is good, mark the value as not being on the stack 1512 // anymore so that our assertion about the stack being empty at end of 1513 // block doesn't fire. 1514 StackTop = 0; 1515 return; 1516 } 1517 1518 // Otherwise, we are returning two values: 1519 // 2) If returning the same value for both, we only have one thing in the FP 1520 // stack. Consider: RET FP1, FP1 1521 if (StackTop == 1) { 1522 assert(FirstFPRegOp == SecondFPRegOp && FirstFPRegOp == getStackEntry(0)&& 1523 "Stack misconfiguration for RET!"); 1524 1525 // Duplicate the TOS so that we return it twice. Just pick some other FPx 1526 // register to hold it. 1527 unsigned NewReg = getScratchReg(); 1528 duplicateToTop(FirstFPRegOp, NewReg, MI); 1529 FirstFPRegOp = NewReg; 1530 } 1531 1532 /// Okay we know we have two different FPx operands now: 1533 assert(StackTop == 2 && "Must have two values live!"); 1534 1535 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently 1536 /// in ST(1). In this case, emit an fxch. 1537 if (getStackEntry(0) == SecondFPRegOp) { 1538 assert(getStackEntry(1) == FirstFPRegOp && "Unknown regs live"); 1539 moveToTop(FirstFPRegOp, MI); 1540 } 1541 1542 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in 1543 /// ST(1). Just remove both from our understanding of the stack and return. 1544 assert(getStackEntry(0) == FirstFPRegOp && "Unknown regs live"); 1545 assert(getStackEntry(1) == SecondFPRegOp && "Unknown regs live"); 1546 StackTop = 0; 1547 return; 1548 } 1549 1550 I = MBB->erase(I); // Remove the pseudo instruction 1551 1552 // We want to leave I pointing to the previous instruction, but what if we 1553 // just erased the first instruction? 1554 if (I == MBB->begin()) { 1555 DEBUG(dbgs() << "Inserting dummy KILL\n"); 1556 I = BuildMI(*MBB, I, DebugLoc(), TII->get(TargetOpcode::KILL)); 1557 } else 1558 --I; 1559} 1560 1561// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands. 1562bool FPS::translateCopy(MachineInstr *MI) { 1563 unsigned DstReg = MI->getOperand(0).getReg(); 1564 unsigned SrcReg = MI->getOperand(1).getReg(); 1565 1566 if (DstReg == X86::ST0) { 1567 MI->setDesc(TII->get(X86::FpSET_ST0_80)); 1568 MI->RemoveOperand(0); 1569 return true; 1570 } 1571 if (DstReg == X86::ST1) { 1572 MI->setDesc(TII->get(X86::FpSET_ST1_80)); 1573 MI->RemoveOperand(0); 1574 return true; 1575 } 1576 if (SrcReg == X86::ST0) { 1577 MI->setDesc(TII->get(X86::FpGET_ST0_80)); 1578 return true; 1579 } 1580 if (SrcReg == X86::ST1) { 1581 MI->setDesc(TII->get(X86::FpGET_ST1_80)); 1582 return true; 1583 } 1584 if (X86::RFP80RegClass.contains(DstReg, SrcReg)) { 1585 MI->setDesc(TII->get(X86::MOV_Fp8080)); 1586 return true; 1587 } 1588 return false; 1589} 1590