1//===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Top-level implementation for the PowerPC target.
11//
12//===----------------------------------------------------------------------===//
13
14#include "PPCTargetMachine.h"
15#include "PPC.h"
16#include "PPCTargetObjectFile.h"
17#include "PPCTargetTransformInfo.h"
18#include "llvm/CodeGen/LiveVariables.h"
19#include "llvm/CodeGen/Passes.h"
20#include "llvm/CodeGen/TargetPassConfig.h"
21#include "llvm/IR/Function.h"
22#include "llvm/IR/LegacyPassManager.h"
23#include "llvm/MC/MCStreamer.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/FormattedStream.h"
26#include "llvm/Support/TargetRegistry.h"
27#include "llvm/Target/TargetOptions.h"
28#include "llvm/Transforms/Scalar.h"
29using namespace llvm;
30
31static cl::
32opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
33                        cl::desc("Disable CTR loops for PPC"));
34
35static cl::
36opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
37                            cl::desc("Disable PPC loop preinc prep"));
38
39static cl::opt<bool>
40VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
41  cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
42
43static cl::
44opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
45                                cl::desc("Disable VSX Swap Removal for PPC"));
46
47static cl::
48opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
49                              cl::desc("Disable QPX load splat simplification"));
50
51static cl::
52opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
53                            cl::desc("Disable machine peepholes for PPC"));
54
55static cl::opt<bool>
56EnableGEPOpt("ppc-gep-opt", cl::Hidden,
57             cl::desc("Enable optimizations on complex GEPs"),
58             cl::init(true));
59
60static cl::opt<bool>
61EnablePrefetch("enable-ppc-prefetching",
62                  cl::desc("disable software prefetching on PPC"),
63                  cl::init(false), cl::Hidden);
64
65static cl::opt<bool>
66EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
67                      cl::desc("Add extra TOC register dependencies"),
68                      cl::init(true), cl::Hidden);
69
70static cl::opt<bool>
71EnableMachineCombinerPass("ppc-machine-combiner",
72                          cl::desc("Enable the machine combiner pass"),
73                          cl::init(true), cl::Hidden);
74
75extern "C" void LLVMInitializePowerPCTarget() {
76  // Register the targets
77  RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
78  RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
79  RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
80
81  PassRegistry &PR = *PassRegistry::getPassRegistry();
82  initializePPCBoolRetToIntPass(PR);
83}
84
85/// Return the datalayout string of a subtarget.
86static std::string getDataLayoutString(const Triple &T) {
87  bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
88  std::string Ret;
89
90  // Most PPC* platforms are big endian, PPC64LE is little endian.
91  if (T.getArch() == Triple::ppc64le)
92    Ret = "e";
93  else
94    Ret = "E";
95
96  Ret += DataLayout::getManglingComponent(T);
97
98  // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
99  // pointers.
100  if (!is64Bit || T.getOS() == Triple::Lv2)
101    Ret += "-p:32:32";
102
103  // Note, the alignment values for f64 and i64 on ppc64 in Darwin
104  // documentation are wrong; these are correct (i.e. "what gcc does").
105  if (is64Bit || !T.isOSDarwin())
106    Ret += "-i64:64";
107  else
108    Ret += "-f64:32:64";
109
110  // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
111  if (is64Bit)
112    Ret += "-n32:64";
113  else
114    Ret += "-n32";
115
116  return Ret;
117}
118
119static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
120                                      const Triple &TT) {
121  std::string FullFS = FS;
122
123  // Make sure 64-bit features are available when CPUname is generic
124  if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
125    if (!FullFS.empty())
126      FullFS = "+64bit," + FullFS;
127    else
128      FullFS = "+64bit";
129  }
130
131  if (OL >= CodeGenOpt::Default) {
132    if (!FullFS.empty())
133      FullFS = "+crbits," + FullFS;
134    else
135      FullFS = "+crbits";
136  }
137
138  if (OL != CodeGenOpt::None) {
139    if (!FullFS.empty())
140      FullFS = "+invariant-function-descriptors," + FullFS;
141    else
142      FullFS = "+invariant-function-descriptors";
143  }
144
145  return FullFS;
146}
147
148static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
149  // If it isn't a Mach-O file then it's going to be a linux ELF
150  // object file.
151  if (TT.isOSDarwin())
152    return make_unique<TargetLoweringObjectFileMachO>();
153
154  return make_unique<PPC64LinuxTargetObjectFile>();
155}
156
157static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
158                                                 const TargetOptions &Options) {
159  if (Options.MCOptions.getABIName().startswith("elfv1"))
160    return PPCTargetMachine::PPC_ABI_ELFv1;
161  else if (Options.MCOptions.getABIName().startswith("elfv2"))
162    return PPCTargetMachine::PPC_ABI_ELFv2;
163
164  assert(Options.MCOptions.getABIName().empty() &&
165         "Unknown target-abi option!");
166
167  if (!TT.isMacOSX()) {
168    switch (TT.getArch()) {
169    case Triple::ppc64le:
170      return PPCTargetMachine::PPC_ABI_ELFv2;
171    case Triple::ppc64:
172      return PPCTargetMachine::PPC_ABI_ELFv1;
173    default:
174      // Fallthrough.
175      ;
176    }
177  }
178  return PPCTargetMachine::PPC_ABI_UNKNOWN;
179}
180
181static Reloc::Model getEffectiveRelocModel(const Triple &TT,
182                                           Optional<Reloc::Model> RM) {
183  if (!RM.hasValue()) {
184    if (TT.isOSDarwin())
185      return Reloc::DynamicNoPIC;
186    return Reloc::Static;
187  }
188  return *RM;
189}
190
191// The FeatureString here is a little subtle. We are modifying the feature
192// string with what are (currently) non-function specific overrides as it goes
193// into the LLVMTargetMachine constructor and then using the stored value in the
194// Subtarget constructor below it.
195PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
196                                   StringRef CPU, StringRef FS,
197                                   const TargetOptions &Options,
198                                   Optional<Reloc::Model> RM,
199                                   CodeModel::Model CM, CodeGenOpt::Level OL)
200    : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
201                        computeFSAdditions(FS, OL, TT), Options,
202                        getEffectiveRelocModel(TT, RM), CM, OL),
203      TLOF(createTLOF(getTargetTriple())),
204      TargetABI(computeTargetABI(TT, Options)),
205      Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
206
207  // For the estimates, convergence is quadratic, so we essentially double the
208  // number of digits correct after every iteration. For both FRE and FRSQRTE,
209  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
210  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
211  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
212           RefinementSteps64 = RefinementSteps + 1;
213
214  this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
215  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
216  this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
217  this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
218
219  this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
220  this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
221  this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
222  this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
223
224  initAsmInfo();
225}
226
227PPCTargetMachine::~PPCTargetMachine() {}
228
229void PPC32TargetMachine::anchor() { }
230
231PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT,
232                                       StringRef CPU, StringRef FS,
233                                       const TargetOptions &Options,
234                                       Optional<Reloc::Model> RM,
235                                       CodeModel::Model CM,
236                                       CodeGenOpt::Level OL)
237    : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
238
239void PPC64TargetMachine::anchor() { }
240
241PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT,
242                                       StringRef CPU, StringRef FS,
243                                       const TargetOptions &Options,
244                                       Optional<Reloc::Model> RM,
245                                       CodeModel::Model CM,
246                                       CodeGenOpt::Level OL)
247    : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
248
249const PPCSubtarget *
250PPCTargetMachine::getSubtargetImpl(const Function &F) const {
251  Attribute CPUAttr = F.getFnAttribute("target-cpu");
252  Attribute FSAttr = F.getFnAttribute("target-features");
253
254  std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
255                        ? CPUAttr.getValueAsString().str()
256                        : TargetCPU;
257  std::string FS = !FSAttr.hasAttribute(Attribute::None)
258                       ? FSAttr.getValueAsString().str()
259                       : TargetFS;
260
261  // FIXME: This is related to the code below to reset the target options,
262  // we need to know whether or not the soft float flag is set on the
263  // function before we can generate a subtarget. We also need to use
264  // it as a key for the subtarget since that can be the only difference
265  // between two functions.
266  bool SoftFloat =
267      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
268  // If the soft float attribute is set on the function turn on the soft float
269  // subtarget feature.
270  if (SoftFloat)
271    FS += FS.empty() ? "+soft-float" : ",+soft-float";
272
273  auto &I = SubtargetMap[CPU + FS];
274  if (!I) {
275    // This needs to be done before we create a new subtarget since any
276    // creation will depend on the TM and the code generation flags on the
277    // function that reside in TargetOptions.
278    resetTargetOptions(F);
279    I = llvm::make_unique<PPCSubtarget>(
280        TargetTriple, CPU,
281        // FIXME: It would be good to have the subtarget additions here
282        // not necessary. Anything that turns them on/off (overrides) ends
283        // up being put at the end of the feature string, but the defaults
284        // shouldn't require adding them. Fixing this means pulling Feature64Bit
285        // out of most of the target cpus in the .td file and making it set only
286        // as part of initialization via the TargetTriple.
287        computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
288  }
289  return I.get();
290}
291
292//===----------------------------------------------------------------------===//
293// Pass Pipeline Configuration
294//===----------------------------------------------------------------------===//
295
296namespace {
297/// PPC Code Generator Pass Configuration Options.
298class PPCPassConfig : public TargetPassConfig {
299public:
300  PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM)
301    : TargetPassConfig(TM, PM) {}
302
303  PPCTargetMachine &getPPCTargetMachine() const {
304    return getTM<PPCTargetMachine>();
305  }
306
307  void addIRPasses() override;
308  bool addPreISel() override;
309  bool addILPOpts() override;
310  bool addInstSelector() override;
311  void addMachineSSAOptimization() override;
312  void addPreRegAlloc() override;
313  void addPreSched2() override;
314  void addPreEmitPass() override;
315};
316} // namespace
317
318TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
319  return new PPCPassConfig(this, PM);
320}
321
322void PPCPassConfig::addIRPasses() {
323  if (TM->getOptLevel() != CodeGenOpt::None)
324    addPass(createPPCBoolRetToIntPass());
325  addPass(createAtomicExpandPass(&getPPCTargetMachine()));
326
327  // For the BG/Q (or if explicitly requested), add explicit data prefetch
328  // intrinsics.
329  bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
330                        getOptLevel() != CodeGenOpt::None;
331  if (EnablePrefetch.getNumOccurrences() > 0)
332    UsePrefetching = EnablePrefetch;
333  if (UsePrefetching)
334    addPass(createLoopDataPrefetchPass());
335
336  if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
337    // Call SeparateConstOffsetFromGEP pass to extract constants within indices
338    // and lower a GEP with multiple indices to either arithmetic operations or
339    // multiple GEPs with single index.
340    addPass(createSeparateConstOffsetFromGEPPass(TM, true));
341    // Call EarlyCSE pass to find and remove subexpressions in the lowered
342    // result.
343    addPass(createEarlyCSEPass());
344    // Do loop invariant code motion in case part of the lowered result is
345    // invariant.
346    addPass(createLICMPass());
347  }
348
349  TargetPassConfig::addIRPasses();
350}
351
352bool PPCPassConfig::addPreISel() {
353  if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
354    addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
355
356  if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
357    addPass(createPPCCTRLoops(getPPCTargetMachine()));
358
359  return false;
360}
361
362bool PPCPassConfig::addILPOpts() {
363  addPass(&EarlyIfConverterID);
364
365  if (EnableMachineCombinerPass)
366    addPass(&MachineCombinerID);
367
368  return true;
369}
370
371bool PPCPassConfig::addInstSelector() {
372  // Install an instruction selector.
373  addPass(createPPCISelDag(getPPCTargetMachine()));
374
375#ifndef NDEBUG
376  if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
377    addPass(createPPCCTRLoopsVerify());
378#endif
379
380  addPass(createPPCVSXCopyPass());
381  return false;
382}
383
384void PPCPassConfig::addMachineSSAOptimization() {
385  TargetPassConfig::addMachineSSAOptimization();
386  // For little endian, remove where possible the vector swap instructions
387  // introduced at code generation to normalize vector element order.
388  if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
389      !DisableVSXSwapRemoval)
390    addPass(createPPCVSXSwapRemovalPass());
391  // Target-specific peephole cleanups performed after instruction
392  // selection.
393  if (!DisableMIPeephole) {
394    addPass(createPPCMIPeepholePass());
395    addPass(&DeadMachineInstructionElimID);
396  }
397}
398
399void PPCPassConfig::addPreRegAlloc() {
400  if (getOptLevel() != CodeGenOpt::None) {
401    initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
402    insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
403               &PPCVSXFMAMutateID);
404  }
405
406  // FIXME: We probably don't need to run these for -fPIE.
407  if (getPPCTargetMachine().isPositionIndependent()) {
408    // FIXME: LiveVariables should not be necessary here!
409    // PPCTLSDYnamicCallPass uses LiveIntervals which previously dependet on
410    // LiveVariables. This (unnecessary) dependency has been removed now,
411    // however a stage-2 clang build fails without LiveVariables computed here.
412    addPass(&LiveVariablesID, false);
413    addPass(createPPCTLSDynamicCallPass());
414  }
415  if (EnableExtraTOCRegDeps)
416    addPass(createPPCTOCRegDepsPass());
417}
418
419void PPCPassConfig::addPreSched2() {
420  if (getOptLevel() != CodeGenOpt::None) {
421    addPass(&IfConverterID);
422
423    // This optimization must happen after anything that might do store-to-load
424    // forwarding. Here we're after RA (and, thus, when spills are inserted)
425    // but before post-RA scheduling.
426    if (!DisableQPXLoadSplat)
427      addPass(createPPCQPXLoadSplatPass());
428  }
429}
430
431void PPCPassConfig::addPreEmitPass() {
432  if (getOptLevel() != CodeGenOpt::None)
433    addPass(createPPCEarlyReturnPass(), false);
434  // Must run branch selection immediately preceding the asm printer.
435  addPass(createPPCBranchSelectionPass(), false);
436}
437
438TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() {
439  return TargetIRAnalysis([this](const Function &F) {
440    return TargetTransformInfo(PPCTTIImpl(this, F));
441  });
442}
443