1//===--- HexagonStoreWidening.cpp------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Replace sequences of "narrow" stores to adjacent memory locations with
10// a fewer "wide" stores that have the same effect.
11// For example, replace:
12//   S4_storeirb_io  %vreg100, 0, 0   ; store-immediate-byte
13//   S4_storeirb_io  %vreg100, 1, 0   ; store-immediate-byte
14// with
15//   S4_storeirh_io  %vreg100, 0, 0   ; store-immediate-halfword
16// The above is the general idea.  The actual cases handled by the code
17// may be a bit more complex.
18// The purpose of this pass is to reduce the number of outstanding stores,
19// or as one could say, "reduce store queue pressure".  Also, wide stores
20// mean fewer stores, and since there are only two memory instructions allowed
21// per packet, it also means fewer packets, and ultimately fewer cycles.
22//===---------------------------------------------------------------------===//
23
24#define DEBUG_TYPE "hexagon-widen-stores"
25
26#include "HexagonTargetMachine.h"
27
28#include "llvm/PassSupport.h"
29#include "llvm/Analysis/AliasAnalysis.h"
30#include "llvm/CodeGen/Passes.h"
31#include "llvm/CodeGen/MachineFunction.h"
32#include "llvm/CodeGen/MachineFunctionPass.h"
33#include "llvm/CodeGen/MachineInstrBuilder.h"
34#include "llvm/CodeGen/MachineRegisterInfo.h"
35#include "llvm/MC/MCInstrDesc.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/raw_ostream.h"
38#include "llvm/Target/TargetMachine.h"
39#include "llvm/Target/TargetRegisterInfo.h"
40#include "llvm/Target/TargetInstrInfo.h"
41
42#include <algorithm>
43
44
45using namespace llvm;
46
47namespace llvm {
48  FunctionPass *createHexagonStoreWidening();
49  void initializeHexagonStoreWideningPass(PassRegistry&);
50}
51
52namespace {
53  struct HexagonStoreWidening : public MachineFunctionPass {
54    const HexagonInstrInfo      *TII;
55    const HexagonRegisterInfo   *TRI;
56    const MachineRegisterInfo   *MRI;
57    AliasAnalysis               *AA;
58    MachineFunction             *MF;
59
60  public:
61    static char ID;
62    HexagonStoreWidening() : MachineFunctionPass(ID) {
63      initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
64    }
65
66    bool runOnMachineFunction(MachineFunction &MF) override;
67
68    const char *getPassName() const override {
69      return "Hexagon Store Widening";
70    }
71
72    void getAnalysisUsage(AnalysisUsage &AU) const override {
73      AU.addRequired<AAResultsWrapperPass>();
74      AU.addPreserved<AAResultsWrapperPass>();
75      MachineFunctionPass::getAnalysisUsage(AU);
76    }
77
78    static bool handledStoreType(const MachineInstr *MI);
79
80  private:
81    static const int MaxWideSize = 4;
82
83    typedef std::vector<MachineInstr*> InstrGroup;
84    typedef std::vector<InstrGroup> InstrGroupList;
85
86    bool instrAliased(InstrGroup &Stores, const MachineMemOperand &MMO);
87    bool instrAliased(InstrGroup &Stores, const MachineInstr *MI);
88    void createStoreGroup(MachineInstr *BaseStore, InstrGroup::iterator Begin,
89        InstrGroup::iterator End, InstrGroup &Group);
90    void createStoreGroups(MachineBasicBlock &MBB,
91        InstrGroupList &StoreGroups);
92    bool processBasicBlock(MachineBasicBlock &MBB);
93    bool processStoreGroup(InstrGroup &Group);
94    bool selectStores(InstrGroup::iterator Begin, InstrGroup::iterator End,
95        InstrGroup &OG, unsigned &TotalSize, unsigned MaxSize);
96    bool createWideStores(InstrGroup &OG, InstrGroup &NG, unsigned TotalSize);
97    bool replaceStores(InstrGroup &OG, InstrGroup &NG);
98    bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
99  };
100
101} // namespace
102
103
104namespace {
105
106// Some local helper functions...
107unsigned getBaseAddressRegister(const MachineInstr *MI) {
108  const MachineOperand &MO = MI->getOperand(0);
109  assert(MO.isReg() && "Expecting register operand");
110  return MO.getReg();
111}
112
113int64_t getStoreOffset(const MachineInstr *MI) {
114  unsigned OpC = MI->getOpcode();
115  assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
116
117  switch (OpC) {
118    case Hexagon::S4_storeirb_io:
119    case Hexagon::S4_storeirh_io:
120    case Hexagon::S4_storeiri_io: {
121      const MachineOperand &MO = MI->getOperand(1);
122      assert(MO.isImm() && "Expecting immediate offset");
123      return MO.getImm();
124    }
125  }
126  dbgs() << *MI;
127  llvm_unreachable("Store offset calculation missing for a handled opcode");
128  return 0;
129}
130
131const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
132  assert(!MI->memoperands_empty() && "Expecting memory operands");
133  return **MI->memoperands_begin();
134}
135
136} // namespace
137
138
139char HexagonStoreWidening::ID = 0;
140
141INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
142                "Hexason Store Widening", false, false)
143INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
144INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
145                "Hexagon Store Widening", false, false)
146
147
148// Filtering function: any stores whose opcodes are not "approved" of by
149// this function will not be subjected to widening.
150inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
151  // For now, only handle stores of immediate values.
152  // Also, reject stores to stack slots.
153  unsigned Opc = MI->getOpcode();
154  switch (Opc) {
155    case Hexagon::S4_storeirb_io:
156    case Hexagon::S4_storeirh_io:
157    case Hexagon::S4_storeiri_io:
158      // Base address must be a register. (Implement FI later.)
159      return MI->getOperand(0).isReg();
160    default:
161      return false;
162  }
163}
164
165
166// Check if the machine memory operand MMO is aliased with any of the
167// stores in the store group Stores.
168bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
169      const MachineMemOperand &MMO) {
170  if (!MMO.getValue())
171    return true;
172
173  MemoryLocation L(MMO.getValue(), MMO.getSize(), MMO.getAAInfo());
174
175  for (auto SI : Stores) {
176    const MachineMemOperand &SMO = getStoreTarget(SI);
177    if (!SMO.getValue())
178      return true;
179
180    MemoryLocation SL(SMO.getValue(), SMO.getSize(), SMO.getAAInfo());
181    if (AA->alias(L, SL))
182      return true;
183  }
184
185  return false;
186}
187
188
189// Check if the machine instruction MI accesses any storage aliased with
190// any store in the group Stores.
191bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
192      const MachineInstr *MI) {
193  for (auto &I : MI->memoperands())
194    if (instrAliased(Stores, *I))
195      return true;
196  return false;
197}
198
199
200// Inspect a machine basic block, and generate store groups out of stores
201// encountered in the block.
202//
203// A store group is a group of stores that use the same base register,
204// and which can be reordered within that group without altering the
205// semantics of the program.  A single store group could be widened as
206// a whole, if there existed a single store instruction with the same
207// semantics as the entire group.  In many cases, a single store group
208// may need more than one wide store.
209void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
210      InstrGroupList &StoreGroups) {
211  InstrGroup AllInsns;
212
213  // Copy all instruction pointers from the basic block to a temporary
214  // list.  This will allow operating on the list, and modifying its
215  // elements without affecting the basic block.
216  for (auto &I : MBB)
217    AllInsns.push_back(&I);
218
219  // Traverse all instructions in the AllInsns list, and if we encounter
220  // a store, then try to create a store group starting at that instruction
221  // i.e. a sequence of independent stores that can be widened.
222  for (auto I = AllInsns.begin(), E = AllInsns.end(); I != E; ++I) {
223    MachineInstr *MI = *I;
224    // Skip null pointers (processed instructions).
225    if (!MI || !handledStoreType(MI))
226      continue;
227
228    // Found a store.  Try to create a store group.
229    InstrGroup G;
230    createStoreGroup(MI, I+1, E, G);
231    if (G.size() > 1)
232      StoreGroups.push_back(G);
233  }
234}
235
236
237// Create a single store group.  The stores need to be independent between
238// themselves, and also there cannot be other instructions between them
239// that could read or modify storage being stored into.
240void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
241      InstrGroup::iterator Begin, InstrGroup::iterator End, InstrGroup &Group) {
242  assert(handledStoreType(BaseStore) && "Unexpected instruction");
243  unsigned BaseReg = getBaseAddressRegister(BaseStore);
244  InstrGroup Other;
245
246  Group.push_back(BaseStore);
247
248  for (auto I = Begin; I != End; ++I) {
249    MachineInstr *MI = *I;
250    if (!MI)
251      continue;
252
253    if (handledStoreType(MI)) {
254      // If this store instruction is aliased with anything already in the
255      // group, terminate the group now.
256      if (instrAliased(Group, getStoreTarget(MI)))
257        return;
258      // If this store is aliased to any of the memory instructions we have
259      // seen so far (that are not a part of this group), terminate the group.
260      if (instrAliased(Other, getStoreTarget(MI)))
261        return;
262
263      unsigned BR = getBaseAddressRegister(MI);
264      if (BR == BaseReg) {
265        Group.push_back(MI);
266        *I = 0;
267        continue;
268      }
269    }
270
271    // Assume calls are aliased to everything.
272    if (MI->isCall() || MI->hasUnmodeledSideEffects())
273      return;
274
275    if (MI->mayLoad() || MI->mayStore()) {
276      if (MI->hasOrderedMemoryRef() || instrAliased(Group, MI))
277        return;
278      Other.push_back(MI);
279    }
280  } // for
281}
282
283
284// Check if store instructions S1 and S2 are adjacent.  More precisely,
285// S2 has to access memory immediately following that accessed by S1.
286bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
287      const MachineInstr *S2) {
288  if (!handledStoreType(S1) || !handledStoreType(S2))
289    return false;
290
291  const MachineMemOperand &S1MO = getStoreTarget(S1);
292
293  // Currently only handling immediate stores.
294  int Off1 = S1->getOperand(1).getImm();
295  int Off2 = S2->getOperand(1).getImm();
296
297  return (Off1 >= 0) ? Off1+S1MO.getSize() == unsigned(Off2)
298                     : int(Off1+S1MO.getSize()) == Off2;
299}
300
301
302/// Given a sequence of adjacent stores, and a maximum size of a single wide
303/// store, pick a group of stores that  can be replaced by a single store
304/// of size not exceeding MaxSize.  The selected sequence will be recorded
305/// in OG ("old group" of instructions).
306/// OG should be empty on entry, and should be left empty if the function
307/// fails.
308bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
309      InstrGroup::iterator End, InstrGroup &OG, unsigned &TotalSize,
310      unsigned MaxSize) {
311  assert(Begin != End && "No instructions to analyze");
312  assert(OG.empty() && "Old group not empty on entry");
313
314  if (std::distance(Begin, End) <= 1)
315    return false;
316
317  MachineInstr *FirstMI = *Begin;
318  assert(!FirstMI->memoperands_empty() && "Expecting some memory operands");
319  const MachineMemOperand &FirstMMO = getStoreTarget(FirstMI);
320  unsigned Alignment = FirstMMO.getAlignment();
321  unsigned SizeAccum = FirstMMO.getSize();
322  unsigned FirstOffset = getStoreOffset(FirstMI);
323
324  // The initial value of SizeAccum should always be a power of 2.
325  assert(isPowerOf2_32(SizeAccum) && "First store size not a power of 2");
326
327  // If the size of the first store equals to or exceeds the limit, do nothing.
328  if (SizeAccum >= MaxSize)
329    return false;
330
331  // If the size of the first store is greater than or equal to the address
332  // stored to, then the store cannot be made any wider.
333  if (SizeAccum >= Alignment)
334    return false;
335
336  // The offset of a store will put restrictions on how wide the store can be.
337  // Offsets in stores of size 2^n bytes need to have the n lowest bits be 0.
338  // If the first store already exhausts the offset limits, quit.  Test this
339  // by checking if the next wider size would exceed the limit.
340  if ((2*SizeAccum-1) & FirstOffset)
341    return false;
342
343  OG.push_back(FirstMI);
344  MachineInstr *S1 = FirstMI, *S2 = *(Begin+1);
345  InstrGroup::iterator I = Begin+1;
346
347  // Pow2Num will be the largest number of elements in OG such that the sum
348  // of sizes of stores 0...Pow2Num-1 will be a power of 2.
349  unsigned Pow2Num = 1;
350  unsigned Pow2Size = SizeAccum;
351
352  // Be greedy: keep accumulating stores as long as they are to adjacent
353  // memory locations, and as long as the total number of bytes stored
354  // does not exceed the limit (MaxSize).
355  // Keep track of when the total size covered is a power of 2, since
356  // this is a size a single store can cover.
357  while (I != End) {
358    S2 = *I;
359    // Stores are sorted, so if S1 and S2 are not adjacent, there won't be
360    // any other store to fill the "hole".
361    if (!storesAreAdjacent(S1, S2))
362      break;
363
364    unsigned S2Size = getStoreTarget(S2).getSize();
365    if (SizeAccum + S2Size > std::min(MaxSize, Alignment))
366      break;
367
368    OG.push_back(S2);
369    SizeAccum += S2Size;
370    if (isPowerOf2_32(SizeAccum)) {
371      Pow2Num = OG.size();
372      Pow2Size = SizeAccum;
373    }
374    if ((2*Pow2Size-1) & FirstOffset)
375      break;
376
377    S1 = S2;
378    ++I;
379  }
380
381  // The stores don't add up to anything that can be widened.  Clean up.
382  if (Pow2Num <= 1) {
383    OG.clear();
384    return false;
385  }
386
387  // Only leave the stored being widened.
388  OG.resize(Pow2Num);
389  TotalSize = Pow2Size;
390  return true;
391}
392
393
394/// Given an "old group" OG of stores, create a "new group" NG of instructions
395/// to replace them.  Ideally, NG would only have a single instruction in it,
396/// but that may only be possible for store-immediate.
397bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
398      unsigned TotalSize) {
399  // XXX Current limitations:
400  // - only expect stores of immediate values in OG,
401  // - only handle a TotalSize of up to 4.
402
403  if (TotalSize > 4)
404    return false;
405
406  unsigned Acc = 0;  // Value accumulator.
407  unsigned Shift = 0;
408
409  for (InstrGroup::iterator I = OG.begin(), E = OG.end(); I != E; ++I) {
410    MachineInstr *MI = *I;
411    const MachineMemOperand &MMO = getStoreTarget(MI);
412    MachineOperand &SO = MI->getOperand(2);  // Source.
413    assert(SO.isImm() && "Expecting an immediate operand");
414
415    unsigned NBits = MMO.getSize()*8;
416    unsigned Mask = (0xFFFFFFFFU >> (32-NBits));
417    unsigned Val = (SO.getImm() & Mask) << Shift;
418    Acc |= Val;
419    Shift += NBits;
420  }
421
422
423  MachineInstr *FirstSt = OG.front();
424  DebugLoc DL = OG.back()->getDebugLoc();
425  const MachineMemOperand &OldM = getStoreTarget(FirstSt);
426  MachineMemOperand *NewM =
427    MF->getMachineMemOperand(OldM.getPointerInfo(), OldM.getFlags(),
428                             TotalSize, OldM.getAlignment(),
429                             OldM.getAAInfo());
430
431  if (Acc < 0x10000) {
432    // Create mem[hw] = #Acc
433    unsigned WOpc = (TotalSize == 2) ? Hexagon::S4_storeirh_io :
434                    (TotalSize == 4) ? Hexagon::S4_storeiri_io : 0;
435    assert(WOpc && "Unexpected size");
436
437    int Val = (TotalSize == 2) ? int16_t(Acc) : int(Acc);
438    const MCInstrDesc &StD = TII->get(WOpc);
439    MachineOperand &MR = FirstSt->getOperand(0);
440    int64_t Off = FirstSt->getOperand(1).getImm();
441    MachineInstr *StI = BuildMI(*MF, DL, StD)
442                          .addReg(MR.getReg(), getKillRegState(MR.isKill()))
443                          .addImm(Off)
444                          .addImm(Val);
445    StI->addMemOperand(*MF, NewM);
446    NG.push_back(StI);
447  } else {
448    // Create vreg = A2_tfrsi #Acc; mem[hw] = vreg
449    const MCInstrDesc &TfrD = TII->get(Hexagon::A2_tfrsi);
450    const TargetRegisterClass *RC = TII->getRegClass(TfrD, 0, TRI, *MF);
451    unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
452    MachineInstr *TfrI = BuildMI(*MF, DL, TfrD, VReg)
453                           .addImm(int(Acc));
454    NG.push_back(TfrI);
455
456    unsigned WOpc = (TotalSize == 2) ? Hexagon::S2_storerh_io :
457                    (TotalSize == 4) ? Hexagon::S2_storeri_io : 0;
458    assert(WOpc && "Unexpected size");
459
460    const MCInstrDesc &StD = TII->get(WOpc);
461    MachineOperand &MR = FirstSt->getOperand(0);
462    int64_t Off = FirstSt->getOperand(1).getImm();
463    MachineInstr *StI = BuildMI(*MF, DL, StD)
464                          .addReg(MR.getReg(), getKillRegState(MR.isKill()))
465                          .addImm(Off)
466                          .addReg(VReg, RegState::Kill);
467    StI->addMemOperand(*MF, NewM);
468    NG.push_back(StI);
469  }
470
471  return true;
472}
473
474
475// Replace instructions from the old group OG with instructions from the
476// new group NG.  Conceptually, remove all instructions in OG, and then
477// insert all instructions in NG, starting at where the first instruction
478// from OG was (in the order in which they appeared in the basic block).
479// (The ordering in OG does not have to match the order in the basic block.)
480bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
481  DEBUG({
482    dbgs() << "Replacing:\n";
483    for (auto I : OG)
484      dbgs() << "  " << *I;
485    dbgs() << "with\n";
486    for (auto I : NG)
487      dbgs() << "  " << *I;
488  });
489
490  MachineBasicBlock *MBB = OG.back()->getParent();
491  MachineBasicBlock::iterator InsertAt = MBB->end();
492
493  // Need to establish the insertion point.  The best one is right before
494  // the first store in the OG, but in the order in which the stores occur
495  // in the program list.  Since the ordering in OG does not correspond
496  // to the order in the program list, we need to do some work to find
497  // the insertion point.
498
499  // Create a set of all instructions in OG (for quick lookup).
500  SmallPtrSet<MachineInstr*, 4> InstrSet;
501  for (auto I : OG)
502    InstrSet.insert(I);
503
504  // Traverse the block, until we hit an instruction from OG.
505  for (auto &I : *MBB) {
506    if (InstrSet.count(&I)) {
507      InsertAt = I;
508      break;
509    }
510  }
511
512  assert((InsertAt != MBB->end()) && "Cannot locate any store from the group");
513
514  bool AtBBStart = false;
515
516  // InsertAt points at the first instruction that will be removed.  We need
517  // to move it out of the way, so it remains valid after removing all the
518  // old stores, and so we are able to recover it back to the proper insertion
519  // position.
520  if (InsertAt != MBB->begin())
521    --InsertAt;
522  else
523    AtBBStart = true;
524
525  for (auto I : OG)
526    I->eraseFromParent();
527
528  if (!AtBBStart)
529    ++InsertAt;
530  else
531    InsertAt = MBB->begin();
532
533  for (auto I : NG)
534    MBB->insert(InsertAt, I);
535
536  return true;
537}
538
539
540// Break up the group into smaller groups, each of which can be replaced by
541// a single wide store.  Widen each such smaller group and replace the old
542// instructions with the widened ones.
543bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
544  bool Changed = false;
545  InstrGroup::iterator I = Group.begin(), E = Group.end();
546  InstrGroup OG, NG;   // Old and new groups.
547  unsigned CollectedSize;
548
549  while (I != E) {
550    OG.clear();
551    NG.clear();
552
553    bool Succ = selectStores(I++, E, OG, CollectedSize, MaxWideSize) &&
554                createWideStores(OG, NG, CollectedSize)              &&
555                replaceStores(OG, NG);
556    if (!Succ)
557      continue;
558
559    assert(OG.size() > 1 && "Created invalid group");
560    assert(distance(I, E)+1 >= int(OG.size()) && "Too many elements");
561    I += OG.size()-1;
562
563    Changed = true;
564  }
565
566  return Changed;
567}
568
569
570// Process a single basic block: create the store groups, and replace them
571// with the widened stores, if possible.  Processing of each basic block
572// is independent from processing of any other basic block.  This transfor-
573// mation could be stopped after having processed any basic block without
574// any ill effects (other than not having performed widening in the unpro-
575// cessed blocks).  Also, the basic blocks can be processed in any order.
576bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
577  InstrGroupList SGs;
578  bool Changed = false;
579
580  createStoreGroups(MBB, SGs);
581
582  auto Less = [] (const MachineInstr *A, const MachineInstr *B) -> bool {
583    return getStoreOffset(A) < getStoreOffset(B);
584  };
585  for (auto &G : SGs) {
586    assert(G.size() > 1 && "Store group with fewer than 2 elements");
587    std::sort(G.begin(), G.end(), Less);
588
589    Changed |= processStoreGroup(G);
590  }
591
592  return Changed;
593}
594
595
596bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
597  if (skipFunction(*MFn.getFunction()))
598    return false;
599
600  MF = &MFn;
601  auto &ST = MFn.getSubtarget<HexagonSubtarget>();
602  TII = ST.getInstrInfo();
603  TRI = ST.getRegisterInfo();
604  MRI = &MFn.getRegInfo();
605  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
606
607  bool Changed = false;
608
609  for (auto &B : MFn)
610    Changed |= processBasicBlock(B);
611
612  return Changed;
613}
614
615
616FunctionPass *llvm::createHexagonStoreWidening() {
617  return new HexagonStoreWidening();
618}
619
620