1//===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// \brief This file implements a register stacking pass.
12///
13/// This pass reorders instructions to put register uses and defs in an order
14/// such that they form single-use expression trees. Registers fitting this form
15/// are then marked as "stackified", meaning references to them are replaced by
16/// "push" and "pop" from the stack.
17///
18/// This is primarily a code size optimization, since temporary values on the
19/// expression don't need to be named.
20///
21//===----------------------------------------------------------------------===//
22
23#include "WebAssembly.h"
24#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
25#include "WebAssemblyMachineFunctionInfo.h"
26#include "WebAssemblySubtarget.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/CodeGen/LiveIntervalAnalysis.h"
29#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
30#include "llvm/CodeGen/MachineDominators.h"
31#include "llvm/CodeGen/MachineInstrBuilder.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/CodeGen/Passes.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/raw_ostream.h"
36using namespace llvm;
37
38#define DEBUG_TYPE "wasm-reg-stackify"
39
40namespace {
41class WebAssemblyRegStackify final : public MachineFunctionPass {
42  const char *getPassName() const override {
43    return "WebAssembly Register Stackify";
44  }
45
46  void getAnalysisUsage(AnalysisUsage &AU) const override {
47    AU.setPreservesCFG();
48    AU.addRequired<AAResultsWrapperPass>();
49    AU.addRequired<MachineDominatorTree>();
50    AU.addRequired<LiveIntervals>();
51    AU.addPreserved<MachineBlockFrequencyInfo>();
52    AU.addPreserved<SlotIndexes>();
53    AU.addPreserved<LiveIntervals>();
54    AU.addPreservedID(LiveVariablesID);
55    AU.addPreserved<MachineDominatorTree>();
56    MachineFunctionPass::getAnalysisUsage(AU);
57  }
58
59  bool runOnMachineFunction(MachineFunction &MF) override;
60
61public:
62  static char ID; // Pass identification, replacement for typeid
63  WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
64};
65} // end anonymous namespace
66
67char WebAssemblyRegStackify::ID = 0;
68FunctionPass *llvm::createWebAssemblyRegStackify() {
69  return new WebAssemblyRegStackify();
70}
71
72// Decorate the given instruction with implicit operands that enforce the
73// expression stack ordering constraints for an instruction which is on
74// the expression stack.
75static void ImposeStackOrdering(MachineInstr *MI) {
76  // Write the opaque EXPR_STACK register.
77  if (!MI->definesRegister(WebAssembly::EXPR_STACK))
78    MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
79                                             /*isDef=*/true,
80                                             /*isImp=*/true));
81
82  // Also read the opaque EXPR_STACK register.
83  if (!MI->readsRegister(WebAssembly::EXPR_STACK))
84    MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
85                                             /*isDef=*/false,
86                                             /*isImp=*/true));
87}
88
89// Determine whether a call to the callee referenced by
90// MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
91// effects.
92static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
93                        bool &Write, bool &Effects, bool &StackPointer) {
94  // All calls can use the stack pointer.
95  StackPointer = true;
96
97  const MachineOperand &MO = MI.getOperand(CalleeOpNo);
98  if (MO.isGlobal()) {
99    const Constant *GV = MO.getGlobal();
100    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
101      if (!GA->isInterposable())
102        GV = GA->getAliasee();
103
104    if (const Function *F = dyn_cast<Function>(GV)) {
105      if (!F->doesNotThrow())
106        Effects = true;
107      if (F->doesNotAccessMemory())
108        return;
109      if (F->onlyReadsMemory()) {
110        Read = true;
111        return;
112      }
113    }
114  }
115
116  // Assume the worst.
117  Write = true;
118  Read = true;
119  Effects = true;
120}
121
122// Determine whether MI reads memory, writes memory, has side effects,
123// and/or uses the __stack_pointer value.
124static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
125                  bool &Write, bool &Effects, bool &StackPointer) {
126  assert(!MI.isPosition());
127  assert(!MI.isTerminator());
128
129  if (MI.isDebugValue())
130    return;
131
132  // Check for loads.
133  if (MI.mayLoad() && !MI.isInvariantLoad(&AA))
134    Read = true;
135
136  // Check for stores.
137  if (MI.mayStore()) {
138    Write = true;
139
140    // Check for stores to __stack_pointer.
141    for (auto MMO : MI.memoperands()) {
142      const MachinePointerInfo &MPI = MMO->getPointerInfo();
143      if (MPI.V.is<const PseudoSourceValue *>()) {
144        auto PSV = MPI.V.get<const PseudoSourceValue *>();
145        if (const ExternalSymbolPseudoSourceValue *EPSV =
146                dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
147          if (StringRef(EPSV->getSymbol()) == "__stack_pointer")
148            StackPointer = true;
149      }
150    }
151  } else if (MI.hasOrderedMemoryRef()) {
152    switch (MI.getOpcode()) {
153    case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
154    case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
155    case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
156    case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
157    case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
158    case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
159    case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
160    case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
161      // These instruction have hasUnmodeledSideEffects() returning true
162      // because they trap on overflow and invalid so they can't be arbitrarily
163      // moved, however hasOrderedMemoryRef() interprets this plus their lack
164      // of memoperands as having a potential unknown memory reference.
165      break;
166    default:
167      // Record volatile accesses, unless it's a call, as calls are handled
168      // specially below.
169      if (!MI.isCall()) {
170        Write = true;
171        Effects = true;
172      }
173      break;
174    }
175  }
176
177  // Check for side effects.
178  if (MI.hasUnmodeledSideEffects()) {
179    switch (MI.getOpcode()) {
180    case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
181    case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
182    case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
183    case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
184    case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
185    case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
186    case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
187    case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
188      // These instructions have hasUnmodeledSideEffects() returning true
189      // because they trap on overflow and invalid so they can't be arbitrarily
190      // moved, however in the specific case of register stackifying, it is safe
191      // to move them because overflow and invalid are Undefined Behavior.
192      break;
193    default:
194      Effects = true;
195      break;
196    }
197  }
198
199  // Analyze calls.
200  if (MI.isCall()) {
201    switch (MI.getOpcode()) {
202    case WebAssembly::CALL_VOID:
203    case WebAssembly::CALL_INDIRECT_VOID:
204      QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
205      break;
206    case WebAssembly::CALL_I32: case WebAssembly::CALL_I64:
207    case WebAssembly::CALL_F32: case WebAssembly::CALL_F64:
208    case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64:
209    case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64:
210      QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
211      break;
212    default:
213      llvm_unreachable("unexpected call opcode");
214    }
215  }
216}
217
218// Test whether Def is safe and profitable to rematerialize.
219static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
220                                const WebAssemblyInstrInfo *TII) {
221  return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
222}
223
224// Identify the definition for this register at this point. This is a
225// generalization of MachineRegisterInfo::getUniqueVRegDef that uses
226// LiveIntervals to handle complex cases.
227static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
228                                const MachineRegisterInfo &MRI,
229                                const LiveIntervals &LIS)
230{
231  // Most registers are in SSA form here so we try a quick MRI query first.
232  if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
233    return Def;
234
235  // MRI doesn't know what the Def is. Try asking LIS.
236  if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
237          LIS.getInstructionIndex(*Insert)))
238    return LIS.getInstructionFromIndex(ValNo->def);
239
240  return nullptr;
241}
242
243// Test whether Reg, as defined at Def, has exactly one use. This is a
244// generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
245// to handle complex cases.
246static bool HasOneUse(unsigned Reg, MachineInstr *Def,
247                      MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
248                      LiveIntervals &LIS) {
249  // Most registers are in SSA form here so we try a quick MRI query first.
250  if (MRI.hasOneUse(Reg))
251    return true;
252
253  bool HasOne = false;
254  const LiveInterval &LI = LIS.getInterval(Reg);
255  const VNInfo *DefVNI = LI.getVNInfoAt(
256      LIS.getInstructionIndex(*Def).getRegSlot());
257  assert(DefVNI);
258  for (auto I : MRI.use_nodbg_operands(Reg)) {
259    const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
260    if (Result.valueIn() == DefVNI) {
261      if (!Result.isKill())
262        return false;
263      if (HasOne)
264        return false;
265      HasOne = true;
266    }
267  }
268  return HasOne;
269}
270
271// Test whether it's safe to move Def to just before Insert.
272// TODO: Compute memory dependencies in a way that doesn't require always
273// walking the block.
274// TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
275// more precise.
276static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
277                         AliasAnalysis &AA, const LiveIntervals &LIS,
278                         const MachineRegisterInfo &MRI) {
279  assert(Def->getParent() == Insert->getParent());
280
281  // Check for register dependencies.
282  for (const MachineOperand &MO : Def->operands()) {
283    if (!MO.isReg() || MO.isUndef())
284      continue;
285    unsigned Reg = MO.getReg();
286
287    // If the register is dead here and at Insert, ignore it.
288    if (MO.isDead() && Insert->definesRegister(Reg) &&
289        !Insert->readsRegister(Reg))
290      continue;
291
292    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
293      // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
294      // from moving down, and we've already checked for that.
295      if (Reg == WebAssembly::ARGUMENTS)
296        continue;
297      // If the physical register is never modified, ignore it.
298      if (!MRI.isPhysRegModified(Reg))
299        continue;
300      // Otherwise, it's a physical register with unknown liveness.
301      return false;
302    }
303
304    // Ask LiveIntervals whether moving this virtual register use or def to
305    // Insert will change which value numbers are seen.
306    //
307    // If the operand is a use of a register that is also defined in the same
308    // instruction, test that the newly defined value reaches the insert point,
309    // since the operand will be moving along with the def.
310    const LiveInterval &LI = LIS.getInterval(Reg);
311    VNInfo *DefVNI =
312        (MO.isDef() || Def->definesRegister(Reg)) ?
313        LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()) :
314        LI.getVNInfoBefore(LIS.getInstructionIndex(*Def));
315    assert(DefVNI && "Instruction input missing value number");
316    VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*Insert));
317    if (InsVNI && DefVNI != InsVNI)
318      return false;
319  }
320
321  bool Read = false, Write = false, Effects = false, StackPointer = false;
322  Query(*Def, AA, Read, Write, Effects, StackPointer);
323
324  // If the instruction does not access memory and has no side effects, it has
325  // no additional dependencies.
326  if (!Read && !Write && !Effects && !StackPointer)
327    return true;
328
329  // Scan through the intervening instructions between Def and Insert.
330  MachineBasicBlock::const_iterator D(Def), I(Insert);
331  for (--I; I != D; --I) {
332    bool InterveningRead = false;
333    bool InterveningWrite = false;
334    bool InterveningEffects = false;
335    bool InterveningStackPointer = false;
336    Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
337          InterveningStackPointer);
338    if (Effects && InterveningEffects)
339      return false;
340    if (Read && InterveningWrite)
341      return false;
342    if (Write && (InterveningRead || InterveningWrite))
343      return false;
344    if (StackPointer && InterveningStackPointer)
345      return false;
346  }
347
348  return true;
349}
350
351/// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
352static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
353                                     const MachineBasicBlock &MBB,
354                                     const MachineRegisterInfo &MRI,
355                                     const MachineDominatorTree &MDT,
356                                     LiveIntervals &LIS,
357                                     WebAssemblyFunctionInfo &MFI) {
358  const LiveInterval &LI = LIS.getInterval(Reg);
359
360  const MachineInstr *OneUseInst = OneUse.getParent();
361  VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
362
363  for (const MachineOperand &Use : MRI.use_operands(Reg)) {
364    if (&Use == &OneUse)
365      continue;
366
367    const MachineInstr *UseInst = Use.getParent();
368    VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
369
370    if (UseVNI != OneUseVNI)
371      continue;
372
373    const MachineInstr *OneUseInst = OneUse.getParent();
374    if (UseInst == OneUseInst) {
375      // Another use in the same instruction. We need to ensure that the one
376      // selected use happens "before" it.
377      if (&OneUse > &Use)
378        return false;
379    } else {
380      // Test that the use is dominated by the one selected use.
381      while (!MDT.dominates(OneUseInst, UseInst)) {
382        // Actually, dominating is over-conservative. Test that the use would
383        // happen after the one selected use in the stack evaluation order.
384        //
385        // This is needed as a consequence of using implicit get_locals for
386        // uses and implicit set_locals for defs.
387        if (UseInst->getDesc().getNumDefs() == 0)
388          return false;
389        const MachineOperand &MO = UseInst->getOperand(0);
390        if (!MO.isReg())
391          return false;
392        unsigned DefReg = MO.getReg();
393        if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
394            !MFI.isVRegStackified(DefReg))
395          return false;
396        assert(MRI.hasOneUse(DefReg));
397        const MachineOperand &NewUse = *MRI.use_begin(DefReg);
398        const MachineInstr *NewUseInst = NewUse.getParent();
399        if (NewUseInst == OneUseInst) {
400          if (&OneUse > &NewUse)
401            return false;
402          break;
403        }
404        UseInst = NewUseInst;
405      }
406    }
407  }
408  return true;
409}
410
411/// Get the appropriate tee_local opcode for the given register class.
412static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) {
413  if (RC == &WebAssembly::I32RegClass)
414    return WebAssembly::TEE_LOCAL_I32;
415  if (RC == &WebAssembly::I64RegClass)
416    return WebAssembly::TEE_LOCAL_I64;
417  if (RC == &WebAssembly::F32RegClass)
418    return WebAssembly::TEE_LOCAL_F32;
419  if (RC == &WebAssembly::F64RegClass)
420    return WebAssembly::TEE_LOCAL_F64;
421  llvm_unreachable("Unexpected register class");
422}
423
424// Shrink LI to its uses, cleaning up LI.
425static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
426  if (LIS.shrinkToUses(&LI)) {
427    SmallVector<LiveInterval*, 4> SplitLIs;
428    LIS.splitSeparateComponents(LI, SplitLIs);
429  }
430}
431
432/// A single-use def in the same block with no intervening memory or register
433/// dependencies; move the def down and nest it with the current instruction.
434static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
435                                      MachineInstr *Def,
436                                      MachineBasicBlock &MBB,
437                                      MachineInstr *Insert, LiveIntervals &LIS,
438                                      WebAssemblyFunctionInfo &MFI,
439                                      MachineRegisterInfo &MRI) {
440  DEBUG(dbgs() << "Move for single use: "; Def->dump());
441
442  MBB.splice(Insert, &MBB, Def);
443  LIS.handleMove(*Def);
444
445  if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
446    // No one else is using this register for anything so we can just stackify
447    // it in place.
448    MFI.stackifyVReg(Reg);
449  } else {
450    // The register may have unrelated uses or defs; create a new register for
451    // just our one def and use so that we can stackify it.
452    unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
453    Def->getOperand(0).setReg(NewReg);
454    Op.setReg(NewReg);
455
456    // Tell LiveIntervals about the new register.
457    LIS.createAndComputeVirtRegInterval(NewReg);
458
459    // Tell LiveIntervals about the changes to the old register.
460    LiveInterval &LI = LIS.getInterval(Reg);
461    LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
462                     LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
463                     /*RemoveDeadValNo=*/true);
464
465    MFI.stackifyVReg(NewReg);
466
467    DEBUG(dbgs() << " - Replaced register: "; Def->dump());
468  }
469
470  ImposeStackOrdering(Def);
471  return Def;
472}
473
474/// A trivially cloneable instruction; clone it and nest the new copy with the
475/// current instruction.
476static MachineInstr *RematerializeCheapDef(
477    unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
478    MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
479    WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
480    const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
481  DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
482  DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
483
484  unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
485  TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
486  Op.setReg(NewReg);
487  MachineInstr *Clone = &*std::prev(Insert);
488  LIS.InsertMachineInstrInMaps(*Clone);
489  LIS.createAndComputeVirtRegInterval(NewReg);
490  MFI.stackifyVReg(NewReg);
491  ImposeStackOrdering(Clone);
492
493  DEBUG(dbgs() << " - Cloned to "; Clone->dump());
494
495  // Shrink the interval.
496  bool IsDead = MRI.use_empty(Reg);
497  if (!IsDead) {
498    LiveInterval &LI = LIS.getInterval(Reg);
499    ShrinkToUses(LI, LIS);
500    IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
501  }
502
503  // If that was the last use of the original, delete the original.
504  if (IsDead) {
505    DEBUG(dbgs() << " - Deleting original\n");
506    SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
507    LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
508    LIS.removeInterval(Reg);
509    LIS.RemoveMachineInstrFromMaps(Def);
510    Def.eraseFromParent();
511  }
512
513  return Clone;
514}
515
516/// A multiple-use def in the same block with no intervening memory or register
517/// dependencies; move the def down, nest it with the current instruction, and
518/// insert a tee_local to satisfy the rest of the uses. As an illustration,
519/// rewrite this:
520///
521///    Reg = INST ...        // Def
522///    INST ..., Reg, ...    // Insert
523///    INST ..., Reg, ...
524///    INST ..., Reg, ...
525///
526/// to this:
527///
528///    DefReg = INST ...     // Def (to become the new Insert)
529///    TeeReg, Reg = TEE_LOCAL_... DefReg
530///    INST ..., TeeReg, ... // Insert
531///    INST ..., Reg, ...
532///    INST ..., Reg, ...
533///
534/// with DefReg and TeeReg stackified. This eliminates a get_local from the
535/// resulting code.
536static MachineInstr *MoveAndTeeForMultiUse(
537    unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
538    MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
539    MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
540  DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
541
542  // Move Def into place.
543  MBB.splice(Insert, &MBB, Def);
544  LIS.handleMove(*Def);
545
546  // Create the Tee and attach the registers.
547  const auto *RegClass = MRI.getRegClass(Reg);
548  unsigned TeeReg = MRI.createVirtualRegister(RegClass);
549  unsigned DefReg = MRI.createVirtualRegister(RegClass);
550  MachineOperand &DefMO = Def->getOperand(0);
551  MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
552                              TII->get(GetTeeLocalOpcode(RegClass)), TeeReg)
553                          .addReg(Reg, RegState::Define)
554                          .addReg(DefReg, getUndefRegState(DefMO.isDead()));
555  Op.setReg(TeeReg);
556  DefMO.setReg(DefReg);
557  SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
558  SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
559
560  // Tell LiveIntervals we moved the original vreg def from Def to Tee.
561  LiveInterval &LI = LIS.getInterval(Reg);
562  LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
563  VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
564  I->start = TeeIdx;
565  ValNo->def = TeeIdx;
566  ShrinkToUses(LI, LIS);
567
568  // Finish stackifying the new regs.
569  LIS.createAndComputeVirtRegInterval(TeeReg);
570  LIS.createAndComputeVirtRegInterval(DefReg);
571  MFI.stackifyVReg(DefReg);
572  MFI.stackifyVReg(TeeReg);
573  ImposeStackOrdering(Def);
574  ImposeStackOrdering(Tee);
575
576  DEBUG(dbgs() << " - Replaced register: "; Def->dump());
577  DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
578  return Def;
579}
580
581namespace {
582/// A stack for walking the tree of instructions being built, visiting the
583/// MachineOperands in DFS order.
584class TreeWalkerState {
585  typedef MachineInstr::mop_iterator mop_iterator;
586  typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
587  typedef iterator_range<mop_reverse_iterator> RangeTy;
588  SmallVector<RangeTy, 4> Worklist;
589
590public:
591  explicit TreeWalkerState(MachineInstr *Insert) {
592    const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
593    if (Range.begin() != Range.end())
594      Worklist.push_back(reverse(Range));
595  }
596
597  bool Done() const { return Worklist.empty(); }
598
599  MachineOperand &Pop() {
600    RangeTy &Range = Worklist.back();
601    MachineOperand &Op = *Range.begin();
602    Range = drop_begin(Range, 1);
603    if (Range.begin() == Range.end())
604      Worklist.pop_back();
605    assert((Worklist.empty() ||
606            Worklist.back().begin() != Worklist.back().end()) &&
607           "Empty ranges shouldn't remain in the worklist");
608    return Op;
609  }
610
611  /// Push Instr's operands onto the stack to be visited.
612  void PushOperands(MachineInstr *Instr) {
613    const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
614    if (Range.begin() != Range.end())
615      Worklist.push_back(reverse(Range));
616  }
617
618  /// Some of Instr's operands are on the top of the stack; remove them and
619  /// re-insert them starting from the beginning (because we've commuted them).
620  void ResetTopOperands(MachineInstr *Instr) {
621    assert(HasRemainingOperands(Instr) &&
622           "Reseting operands should only be done when the instruction has "
623           "an operand still on the stack");
624    Worklist.back() = reverse(Instr->explicit_uses());
625  }
626
627  /// Test whether Instr has operands remaining to be visited at the top of
628  /// the stack.
629  bool HasRemainingOperands(const MachineInstr *Instr) const {
630    if (Worklist.empty())
631      return false;
632    const RangeTy &Range = Worklist.back();
633    return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
634  }
635
636  /// Test whether the given register is present on the stack, indicating an
637  /// operand in the tree that we haven't visited yet. Moving a definition of
638  /// Reg to a point in the tree after that would change its value.
639  ///
640  /// This is needed as a consequence of using implicit get_locals for
641  /// uses and implicit set_locals for defs.
642  bool IsOnStack(unsigned Reg) const {
643    for (const RangeTy &Range : Worklist)
644      for (const MachineOperand &MO : Range)
645        if (MO.isReg() && MO.getReg() == Reg)
646          return true;
647    return false;
648  }
649};
650
651/// State to keep track of whether commuting is in flight or whether it's been
652/// tried for the current instruction and didn't work.
653class CommutingState {
654  /// There are effectively three states: the initial state where we haven't
655  /// started commuting anything and we don't know anything yet, the tenative
656  /// state where we've commuted the operands of the current instruction and are
657  /// revisting it, and the declined state where we've reverted the operands
658  /// back to their original order and will no longer commute it further.
659  bool TentativelyCommuting;
660  bool Declined;
661
662  /// During the tentative state, these hold the operand indices of the commuted
663  /// operands.
664  unsigned Operand0, Operand1;
665
666public:
667  CommutingState() : TentativelyCommuting(false), Declined(false) {}
668
669  /// Stackification for an operand was not successful due to ordering
670  /// constraints. If possible, and if we haven't already tried it and declined
671  /// it, commute Insert's operands and prepare to revisit it.
672  void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
673                    const WebAssemblyInstrInfo *TII) {
674    if (TentativelyCommuting) {
675      assert(!Declined &&
676             "Don't decline commuting until you've finished trying it");
677      // Commuting didn't help. Revert it.
678      TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
679      TentativelyCommuting = false;
680      Declined = true;
681    } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
682      Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
683      Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
684      if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
685        // Tentatively commute the operands and try again.
686        TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
687        TreeWalker.ResetTopOperands(Insert);
688        TentativelyCommuting = true;
689        Declined = false;
690      }
691    }
692  }
693
694  /// Stackification for some operand was successful. Reset to the default
695  /// state.
696  void Reset() {
697    TentativelyCommuting = false;
698    Declined = false;
699  }
700};
701} // end anonymous namespace
702
703bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
704  DEBUG(dbgs() << "********** Register Stackifying **********\n"
705                  "********** Function: "
706               << MF.getName() << '\n');
707
708  bool Changed = false;
709  MachineRegisterInfo &MRI = MF.getRegInfo();
710  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
711  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
712  const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
713  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
714  MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
715  LiveIntervals &LIS = getAnalysis<LiveIntervals>();
716
717  // Walk the instructions from the bottom up. Currently we don't look past
718  // block boundaries, and the blocks aren't ordered so the block visitation
719  // order isn't significant, but we may want to change this in the future.
720  for (MachineBasicBlock &MBB : MF) {
721    // Don't use a range-based for loop, because we modify the list as we're
722    // iterating over it and the end iterator may change.
723    for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
724      MachineInstr *Insert = &*MII;
725      // Don't nest anything inside an inline asm, because we don't have
726      // constraints for $push inputs.
727      if (Insert->getOpcode() == TargetOpcode::INLINEASM)
728        continue;
729
730      // Ignore debugging intrinsics.
731      if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
732        continue;
733
734      // Iterate through the inputs in reverse order, since we'll be pulling
735      // operands off the stack in LIFO order.
736      CommutingState Commuting;
737      TreeWalkerState TreeWalker(Insert);
738      while (!TreeWalker.Done()) {
739        MachineOperand &Op = TreeWalker.Pop();
740
741        // We're only interested in explicit virtual register operands.
742        if (!Op.isReg())
743          continue;
744
745        unsigned Reg = Op.getReg();
746        assert(Op.isUse() && "explicit_uses() should only iterate over uses");
747        assert(!Op.isImplicit() &&
748               "explicit_uses() should only iterate over explicit operands");
749        if (TargetRegisterInfo::isPhysicalRegister(Reg))
750          continue;
751
752        // Identify the definition for this register at this point. Most
753        // registers are in SSA form here so we try a quick MRI query first.
754        MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
755        if (!Def)
756          continue;
757
758        // Don't nest an INLINE_ASM def into anything, because we don't have
759        // constraints for $pop outputs.
760        if (Def->getOpcode() == TargetOpcode::INLINEASM)
761          continue;
762
763        // Argument instructions represent live-in registers and not real
764        // instructions.
765        if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
766            Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
767            Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
768            Def->getOpcode() == WebAssembly::ARGUMENT_F64)
769          continue;
770
771        // Decide which strategy to take. Prefer to move a single-use value
772        // over cloning it, and prefer cloning over introducing a tee_local.
773        // For moving, we require the def to be in the same block as the use;
774        // this makes things simpler (LiveIntervals' handleMove function only
775        // supports intra-block moves) and it's MachineSink's job to catch all
776        // the sinking opportunities anyway.
777        bool SameBlock = Def->getParent() == &MBB;
778        bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
779                       !TreeWalker.IsOnStack(Reg);
780        if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
781          Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
782        } else if (ShouldRematerialize(*Def, AA, TII)) {
783          Insert =
784              RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
785                                    LIS, MFI, MRI, TII, TRI);
786        } else if (CanMove &&
787                   OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
788          Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
789                                         MRI, TII);
790        } else {
791          // We failed to stackify the operand. If the problem was ordering
792          // constraints, Commuting may be able to help.
793          if (!CanMove && SameBlock)
794            Commuting.MaybeCommute(Insert, TreeWalker, TII);
795          // Proceed to the next operand.
796          continue;
797        }
798
799        // We stackified an operand. Add the defining instruction's operands to
800        // the worklist stack now to continue to build an ever deeper tree.
801        Commuting.Reset();
802        TreeWalker.PushOperands(Insert);
803      }
804
805      // If we stackified any operands, skip over the tree to start looking for
806      // the next instruction we can build a tree on.
807      if (Insert != &*MII) {
808        ImposeStackOrdering(&*MII);
809        MII = std::prev(
810            llvm::make_reverse_iterator(MachineBasicBlock::iterator(Insert)));
811        Changed = true;
812      }
813    }
814  }
815
816  // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere so
817  // that it never looks like a use-before-def.
818  if (Changed) {
819    MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
820    for (MachineBasicBlock &MBB : MF)
821      MBB.addLiveIn(WebAssembly::EXPR_STACK);
822  }
823
824#ifndef NDEBUG
825  // Verify that pushes and pops are performed in LIFO order.
826  SmallVector<unsigned, 0> Stack;
827  for (MachineBasicBlock &MBB : MF) {
828    for (MachineInstr &MI : MBB) {
829      if (MI.isDebugValue())
830        continue;
831      for (MachineOperand &MO : reverse(MI.explicit_operands())) {
832        if (!MO.isReg())
833          continue;
834        unsigned Reg = MO.getReg();
835
836        if (MFI.isVRegStackified(Reg)) {
837          if (MO.isDef())
838            Stack.push_back(Reg);
839          else
840            assert(Stack.pop_back_val() == Reg &&
841                   "Register stack pop should be paired with a push");
842        }
843      }
844    }
845    // TODO: Generalize this code to support keeping values on the stack across
846    // basic block boundaries.
847    assert(Stack.empty() &&
848           "Register stack pushes and pops should be balanced");
849  }
850#endif
851
852  return Changed;
853}
854