1//===-- AtomicExpandPass.cpp - Expand atomic instructions -------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains a pass (at IR level) to replace atomic instructions with
11// __atomic_* library calls, or target specific instruction which implement the
12// same semantics in a way which better fits the target backend.  This can
13// include the use of (intrinsic-based) load-linked/store-conditional loops,
14// AtomicCmpXchg, or type coercions.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/AtomicExpandUtils.h"
19#include "llvm/CodeGen/Passes.h"
20#include "llvm/IR/Function.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/InstIterator.h"
23#include "llvm/IR/Instructions.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/Module.h"
26#include "llvm/Support/Debug.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Target/TargetLowering.h"
29#include "llvm/Target/TargetMachine.h"
30#include "llvm/Target/TargetSubtargetInfo.h"
31
32using namespace llvm;
33
34#define DEBUG_TYPE "atomic-expand"
35
36namespace {
37  class AtomicExpand: public FunctionPass {
38    const TargetMachine *TM;
39    const TargetLowering *TLI;
40  public:
41    static char ID; // Pass identification, replacement for typeid
42    explicit AtomicExpand(const TargetMachine *TM = nullptr)
43      : FunctionPass(ID), TM(TM), TLI(nullptr) {
44      initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
45    }
46
47    bool runOnFunction(Function &F) override;
48
49  private:
50    bool bracketInstWithFences(Instruction *I, AtomicOrdering Order,
51                               bool IsStore, bool IsLoad);
52    IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
53    LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
54    bool tryExpandAtomicLoad(LoadInst *LI);
55    bool expandAtomicLoadToLL(LoadInst *LI);
56    bool expandAtomicLoadToCmpXchg(LoadInst *LI);
57    StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
58    bool expandAtomicStore(StoreInst *SI);
59    bool tryExpandAtomicRMW(AtomicRMWInst *AI);
60    Value *
61    insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
62                      AtomicOrdering MemOpOrder,
63                      function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
64    void expandAtomicOpToLLSC(
65        Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
66        function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
67    void expandPartwordAtomicRMW(
68        AtomicRMWInst *I,
69        TargetLoweringBase::AtomicExpansionKind ExpansionKind);
70    void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
71
72    AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
73    static Value *insertRMWCmpXchgLoop(
74        IRBuilder<> &Builder, Type *ResultType, Value *Addr,
75        AtomicOrdering MemOpOrder,
76        function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
77        CreateCmpXchgInstFun CreateCmpXchg);
78
79    bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
80    bool isIdempotentRMW(AtomicRMWInst *AI);
81    bool simplifyIdempotentRMW(AtomicRMWInst *AI);
82
83    bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
84                                 Value *PointerOperand, Value *ValueOperand,
85                                 Value *CASExpected, AtomicOrdering Ordering,
86                                 AtomicOrdering Ordering2,
87                                 ArrayRef<RTLIB::Libcall> Libcalls);
88    void expandAtomicLoadToLibcall(LoadInst *LI);
89    void expandAtomicStoreToLibcall(StoreInst *LI);
90    void expandAtomicRMWToLibcall(AtomicRMWInst *I);
91    void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
92
93    friend bool
94    llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
95                                   CreateCmpXchgInstFun CreateCmpXchg);
96  };
97}
98
99char AtomicExpand::ID = 0;
100char &llvm::AtomicExpandID = AtomicExpand::ID;
101INITIALIZE_TM_PASS(AtomicExpand, "atomic-expand", "Expand Atomic instructions",
102                   false, false)
103
104FunctionPass *llvm::createAtomicExpandPass(const TargetMachine *TM) {
105  return new AtomicExpand(TM);
106}
107
108namespace {
109// Helper functions to retrieve the size of atomic instructions.
110unsigned getAtomicOpSize(LoadInst *LI) {
111  const DataLayout &DL = LI->getModule()->getDataLayout();
112  return DL.getTypeStoreSize(LI->getType());
113}
114
115unsigned getAtomicOpSize(StoreInst *SI) {
116  const DataLayout &DL = SI->getModule()->getDataLayout();
117  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
118}
119
120unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
121  const DataLayout &DL = RMWI->getModule()->getDataLayout();
122  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
123}
124
125unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
126  const DataLayout &DL = CASI->getModule()->getDataLayout();
127  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
128}
129
130// Helper functions to retrieve the alignment of atomic instructions.
131unsigned getAtomicOpAlign(LoadInst *LI) {
132  unsigned Align = LI->getAlignment();
133  // In the future, if this IR restriction is relaxed, we should
134  // return DataLayout::getABITypeAlignment when there's no align
135  // value.
136  assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
137  return Align;
138}
139
140unsigned getAtomicOpAlign(StoreInst *SI) {
141  unsigned Align = SI->getAlignment();
142  // In the future, if this IR restriction is relaxed, we should
143  // return DataLayout::getABITypeAlignment when there's no align
144  // value.
145  assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
146  return Align;
147}
148
149unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
150  // TODO(PR27168): This instruction has no alignment attribute, but unlike the
151  // default alignment for load/store, the default here is to assume
152  // it has NATURAL alignment, not DataLayout-specified alignment.
153  const DataLayout &DL = RMWI->getModule()->getDataLayout();
154  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
155}
156
157unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
158  // TODO(PR27168): same comment as above.
159  const DataLayout &DL = CASI->getModule()->getDataLayout();
160  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
161}
162
163// Determine if a particular atomic operation has a supported size,
164// and is of appropriate alignment, to be passed through for target
165// lowering. (Versus turning into a __atomic libcall)
166template <typename Inst>
167bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
168  unsigned Size = getAtomicOpSize(I);
169  unsigned Align = getAtomicOpAlign(I);
170  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
171}
172
173} // end anonymous namespace
174
175bool AtomicExpand::runOnFunction(Function &F) {
176  if (!TM || !TM->getSubtargetImpl(F)->enableAtomicExpand())
177    return false;
178  TLI = TM->getSubtargetImpl(F)->getTargetLowering();
179
180  SmallVector<Instruction *, 1> AtomicInsts;
181
182  // Changing control-flow while iterating through it is a bad idea, so gather a
183  // list of all atomic instructions before we start.
184  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
185    Instruction *I = &*II;
186    if (I->isAtomic() && !isa<FenceInst>(I))
187      AtomicInsts.push_back(I);
188  }
189
190  bool MadeChange = false;
191  for (auto I : AtomicInsts) {
192    auto LI = dyn_cast<LoadInst>(I);
193    auto SI = dyn_cast<StoreInst>(I);
194    auto RMWI = dyn_cast<AtomicRMWInst>(I);
195    auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
196    assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
197
198    // If the Size/Alignment is not supported, replace with a libcall.
199    if (LI) {
200      if (!atomicSizeSupported(TLI, LI)) {
201        expandAtomicLoadToLibcall(LI);
202        MadeChange = true;
203        continue;
204      }
205    } else if (SI) {
206      if (!atomicSizeSupported(TLI, SI)) {
207        expandAtomicStoreToLibcall(SI);
208        MadeChange = true;
209        continue;
210      }
211    } else if (RMWI) {
212      if (!atomicSizeSupported(TLI, RMWI)) {
213        expandAtomicRMWToLibcall(RMWI);
214        MadeChange = true;
215        continue;
216      }
217    } else if (CASI) {
218      if (!atomicSizeSupported(TLI, CASI)) {
219        expandAtomicCASToLibcall(CASI);
220        MadeChange = true;
221        continue;
222      }
223    }
224
225    if (TLI->shouldInsertFencesForAtomic(I)) {
226      auto FenceOrdering = AtomicOrdering::Monotonic;
227      bool IsStore, IsLoad;
228      if (LI && isAcquireOrStronger(LI->getOrdering())) {
229        FenceOrdering = LI->getOrdering();
230        LI->setOrdering(AtomicOrdering::Monotonic);
231        IsStore = false;
232        IsLoad = true;
233      } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
234        FenceOrdering = SI->getOrdering();
235        SI->setOrdering(AtomicOrdering::Monotonic);
236        IsStore = true;
237        IsLoad = false;
238      } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
239                          isAcquireOrStronger(RMWI->getOrdering()))) {
240        FenceOrdering = RMWI->getOrdering();
241        RMWI->setOrdering(AtomicOrdering::Monotonic);
242        IsStore = IsLoad = true;
243      } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
244                 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
245                  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
246        // If a compare and swap is lowered to LL/SC, we can do smarter fence
247        // insertion, with a stronger one on the success path than on the
248        // failure path. As a result, fence insertion is directly done by
249        // expandAtomicCmpXchg in that case.
250        FenceOrdering = CASI->getSuccessOrdering();
251        CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
252        CASI->setFailureOrdering(AtomicOrdering::Monotonic);
253        IsStore = IsLoad = true;
254      }
255
256      if (FenceOrdering != AtomicOrdering::Monotonic) {
257        MadeChange |= bracketInstWithFences(I, FenceOrdering, IsStore, IsLoad);
258      }
259    }
260
261    if (LI) {
262      if (LI->getType()->isFloatingPointTy()) {
263        // TODO: add a TLI hook to control this so that each target can
264        // convert to lowering the original type one at a time.
265        LI = convertAtomicLoadToIntegerType(LI);
266        assert(LI->getType()->isIntegerTy() && "invariant broken");
267        MadeChange = true;
268      }
269
270      MadeChange |= tryExpandAtomicLoad(LI);
271    } else if (SI) {
272      if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
273        // TODO: add a TLI hook to control this so that each target can
274        // convert to lowering the original type one at a time.
275        SI = convertAtomicStoreToIntegerType(SI);
276        assert(SI->getValueOperand()->getType()->isIntegerTy() &&
277               "invariant broken");
278        MadeChange = true;
279      }
280
281      if (TLI->shouldExpandAtomicStoreInIR(SI))
282        MadeChange |= expandAtomicStore(SI);
283    } else if (RMWI) {
284      // There are two different ways of expanding RMW instructions:
285      // - into a load if it is idempotent
286      // - into a Cmpxchg/LL-SC loop otherwise
287      // we try them in that order.
288
289      if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
290        MadeChange = true;
291      } else {
292        MadeChange |= tryExpandAtomicRMW(RMWI);
293      }
294    } else if (CASI) {
295      // TODO: when we're ready to make the change at the IR level, we can
296      // extend convertCmpXchgToInteger for floating point too.
297      assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
298             "unimplemented - floating point not legal at IR level");
299      if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
300        // TODO: add a TLI hook to control this so that each target can
301        // convert to lowering the original type one at a time.
302        CASI = convertCmpXchgToIntegerType(CASI);
303        assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
304               "invariant broken");
305        MadeChange = true;
306      }
307
308      unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
309      unsigned ValueSize = getAtomicOpSize(CASI);
310      if (ValueSize < MinCASSize) {
311        assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
312               "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
313        expandPartwordCmpXchg(CASI);
314      } else {
315        if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
316          MadeChange |= expandAtomicCmpXchg(CASI);
317      }
318    }
319  }
320  return MadeChange;
321}
322
323bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order,
324                                         bool IsStore, bool IsLoad) {
325  IRBuilder<> Builder(I);
326
327  auto LeadingFence = TLI->emitLeadingFence(Builder, Order, IsStore, IsLoad);
328
329  auto TrailingFence = TLI->emitTrailingFence(Builder, Order, IsStore, IsLoad);
330  // The trailing fence is emitted before the instruction instead of after
331  // because there is no easy way of setting Builder insertion point after
332  // an instruction. So we must erase it from the BB, and insert it back
333  // in the right place.
334  // We have a guard here because not every atomic operation generates a
335  // trailing fence.
336  if (TrailingFence) {
337    TrailingFence->removeFromParent();
338    TrailingFence->insertAfter(I);
339  }
340
341  return (LeadingFence || TrailingFence);
342}
343
344/// Get the iX type with the same bitwidth as T.
345IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
346                                                       const DataLayout &DL) {
347  EVT VT = TLI->getValueType(DL, T);
348  unsigned BitWidth = VT.getStoreSizeInBits();
349  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
350  return IntegerType::get(T->getContext(), BitWidth);
351}
352
353/// Convert an atomic load of a non-integral type to an integer load of the
354/// equivalent bitwidth.  See the function comment on
355/// convertAtomicStoreToIntegerType for background.
356LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
357  auto *M = LI->getModule();
358  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
359                                            M->getDataLayout());
360
361  IRBuilder<> Builder(LI);
362
363  Value *Addr = LI->getPointerOperand();
364  Type *PT = PointerType::get(NewTy,
365                              Addr->getType()->getPointerAddressSpace());
366  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
367
368  auto *NewLI = Builder.CreateLoad(NewAddr);
369  NewLI->setAlignment(LI->getAlignment());
370  NewLI->setVolatile(LI->isVolatile());
371  NewLI->setAtomic(LI->getOrdering(), LI->getSynchScope());
372  DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
373
374  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
375  LI->replaceAllUsesWith(NewVal);
376  LI->eraseFromParent();
377  return NewLI;
378}
379
380bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
381  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
382  case TargetLoweringBase::AtomicExpansionKind::None:
383    return false;
384  case TargetLoweringBase::AtomicExpansionKind::LLSC:
385    expandAtomicOpToLLSC(
386        LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
387        [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
388    return true;
389  case TargetLoweringBase::AtomicExpansionKind::LLOnly:
390    return expandAtomicLoadToLL(LI);
391  case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
392    return expandAtomicLoadToCmpXchg(LI);
393  }
394  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
395}
396
397bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
398  IRBuilder<> Builder(LI);
399
400  // On some architectures, load-linked instructions are atomic for larger
401  // sizes than normal loads. For example, the only 64-bit load guaranteed
402  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
403  Value *Val =
404      TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
405  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
406
407  LI->replaceAllUsesWith(Val);
408  LI->eraseFromParent();
409
410  return true;
411}
412
413bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
414  IRBuilder<> Builder(LI);
415  AtomicOrdering Order = LI->getOrdering();
416  Value *Addr = LI->getPointerOperand();
417  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
418  Constant *DummyVal = Constant::getNullValue(Ty);
419
420  Value *Pair = Builder.CreateAtomicCmpXchg(
421      Addr, DummyVal, DummyVal, Order,
422      AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
423  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
424
425  LI->replaceAllUsesWith(Loaded);
426  LI->eraseFromParent();
427
428  return true;
429}
430
431/// Convert an atomic store of a non-integral type to an integer store of the
432/// equivalent bitwidth.  We used to not support floating point or vector
433/// atomics in the IR at all.  The backends learned to deal with the bitcast
434/// idiom because that was the only way of expressing the notion of a atomic
435/// float or vector store.  The long term plan is to teach each backend to
436/// instruction select from the original atomic store, but as a migration
437/// mechanism, we convert back to the old format which the backends understand.
438/// Each backend will need individual work to recognize the new format.
439StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
440  IRBuilder<> Builder(SI);
441  auto *M = SI->getModule();
442  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
443                                            M->getDataLayout());
444  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
445
446  Value *Addr = SI->getPointerOperand();
447  Type *PT = PointerType::get(NewTy,
448                              Addr->getType()->getPointerAddressSpace());
449  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
450
451  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
452  NewSI->setAlignment(SI->getAlignment());
453  NewSI->setVolatile(SI->isVolatile());
454  NewSI->setAtomic(SI->getOrdering(), SI->getSynchScope());
455  DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
456  SI->eraseFromParent();
457  return NewSI;
458}
459
460bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
461  // This function is only called on atomic stores that are too large to be
462  // atomic if implemented as a native store. So we replace them by an
463  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
464  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
465  // It is the responsibility of the target to only signal expansion via
466  // shouldExpandAtomicRMW in cases where this is required and possible.
467  IRBuilder<> Builder(SI);
468  AtomicRMWInst *AI =
469      Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
470                              SI->getValueOperand(), SI->getOrdering());
471  SI->eraseFromParent();
472
473  // Now we have an appropriate swap instruction, lower it as usual.
474  return tryExpandAtomicRMW(AI);
475}
476
477static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
478                                 Value *Loaded, Value *NewVal,
479                                 AtomicOrdering MemOpOrder,
480                                 Value *&Success, Value *&NewLoaded) {
481  Value* Pair = Builder.CreateAtomicCmpXchg(
482      Addr, Loaded, NewVal, MemOpOrder,
483      AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
484  Success = Builder.CreateExtractValue(Pair, 1, "success");
485  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
486}
487
488/// Emit IR to implement the given atomicrmw operation on values in registers,
489/// returning the new value.
490static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
491                              Value *Loaded, Value *Inc) {
492  Value *NewVal;
493  switch (Op) {
494  case AtomicRMWInst::Xchg:
495    return Inc;
496  case AtomicRMWInst::Add:
497    return Builder.CreateAdd(Loaded, Inc, "new");
498  case AtomicRMWInst::Sub:
499    return Builder.CreateSub(Loaded, Inc, "new");
500  case AtomicRMWInst::And:
501    return Builder.CreateAnd(Loaded, Inc, "new");
502  case AtomicRMWInst::Nand:
503    return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
504  case AtomicRMWInst::Or:
505    return Builder.CreateOr(Loaded, Inc, "new");
506  case AtomicRMWInst::Xor:
507    return Builder.CreateXor(Loaded, Inc, "new");
508  case AtomicRMWInst::Max:
509    NewVal = Builder.CreateICmpSGT(Loaded, Inc);
510    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
511  case AtomicRMWInst::Min:
512    NewVal = Builder.CreateICmpSLE(Loaded, Inc);
513    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
514  case AtomicRMWInst::UMax:
515    NewVal = Builder.CreateICmpUGT(Loaded, Inc);
516    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
517  case AtomicRMWInst::UMin:
518    NewVal = Builder.CreateICmpULE(Loaded, Inc);
519    return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
520  default:
521    llvm_unreachable("Unknown atomic op");
522  }
523}
524
525bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
526  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
527  case TargetLoweringBase::AtomicExpansionKind::None:
528    return false;
529  case TargetLoweringBase::AtomicExpansionKind::LLSC: {
530    unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
531    unsigned ValueSize = getAtomicOpSize(AI);
532    if (ValueSize < MinCASSize) {
533      llvm_unreachable(
534          "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
535    } else {
536      auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
537        return performAtomicOp(AI->getOperation(), Builder, Loaded,
538                               AI->getValOperand());
539      };
540      expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
541                           AI->getOrdering(), PerformOp);
542    }
543    return true;
544  }
545  case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
546    unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
547    unsigned ValueSize = getAtomicOpSize(AI);
548    if (ValueSize < MinCASSize) {
549      expandPartwordAtomicRMW(AI,
550                              TargetLoweringBase::AtomicExpansionKind::CmpXChg);
551    } else {
552      expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
553    }
554    return true;
555  }
556  default:
557    llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
558  }
559}
560
561namespace {
562
563/// Result values from createMaskInstrs helper.
564struct PartwordMaskValues {
565  Type *WordType;
566  Type *ValueType;
567  Value *AlignedAddr;
568  Value *ShiftAmt;
569  Value *Mask;
570  Value *Inv_Mask;
571};
572} // end anonymous namespace
573
574/// This is a helper function which builds instructions to provide
575/// values necessary for partword atomic operations. It takes an
576/// incoming address, Addr, and ValueType, and constructs the address,
577/// shift-amounts and masks needed to work with a larger value of size
578/// WordSize.
579///
580/// AlignedAddr: Addr rounded down to a multiple of WordSize
581///
582/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
583///           from AlignAddr for it to have the same value as if
584///           ValueType was loaded from Addr.
585///
586/// Mask: Value to mask with the value loaded from AlignAddr to
587///       include only the part that would've been loaded from Addr.
588///
589/// Inv_Mask: The inverse of Mask.
590
591static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
592                                           Type *ValueType, Value *Addr,
593                                           unsigned WordSize) {
594  PartwordMaskValues Ret;
595
596  BasicBlock *BB = I->getParent();
597  Function *F = BB->getParent();
598  Module *M = I->getModule();
599
600  LLVMContext &Ctx = F->getContext();
601  const DataLayout &DL = M->getDataLayout();
602
603  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
604
605  assert(ValueSize < WordSize);
606
607  Ret.ValueType = ValueType;
608  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
609
610  Type *WordPtrType =
611      Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
612
613  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
614  Ret.AlignedAddr = Builder.CreateIntToPtr(
615      Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
616      "AlignedAddr");
617
618  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
619  if (DL.isLittleEndian()) {
620    // turn bytes into bits
621    Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
622  } else {
623    // turn bytes into bits, and count from the other side.
624    Ret.ShiftAmt =
625        Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
626  }
627
628  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
629  Ret.Mask = Builder.CreateShl(
630      ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
631      "Mask");
632  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
633
634  return Ret;
635}
636
637/// Emit IR to implement a masked version of a given atomicrmw
638/// operation. (That is, only the bits under the Mask should be
639/// affected by the operation)
640static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
641                                    IRBuilder<> &Builder, Value *Loaded,
642                                    Value *Shifted_Inc, Value *Inc,
643                                    const PartwordMaskValues &PMV) {
644  switch (Op) {
645  case AtomicRMWInst::Xchg: {
646    Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
647    Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
648    return FinalVal;
649  }
650  case AtomicRMWInst::Or:
651  case AtomicRMWInst::Xor:
652    // Or/Xor won't affect any other bits, so can just be done
653    // directly.
654    return performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
655  case AtomicRMWInst::Add:
656  case AtomicRMWInst::Sub:
657  case AtomicRMWInst::And:
658  case AtomicRMWInst::Nand: {
659    // The other arithmetic ops need to be masked into place.
660    Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
661    Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
662    Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
663    Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
664    return FinalVal;
665  }
666  case AtomicRMWInst::Max:
667  case AtomicRMWInst::Min:
668  case AtomicRMWInst::UMax:
669  case AtomicRMWInst::UMin: {
670    // Finally, comparison ops will operate on the full value, so
671    // truncate down to the original size, and expand out again after
672    // doing the operation.
673    Value *Loaded_Shiftdown = Builder.CreateTrunc(
674        Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
675    Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
676    Value *NewVal_Shiftup = Builder.CreateShl(
677        Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
678    Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
679    Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
680    return FinalVal;
681  }
682  default:
683    llvm_unreachable("Unknown atomic op");
684  }
685}
686
687/// Expand a sub-word atomicrmw operation into an appropriate
688/// word-sized operation.
689///
690/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
691/// way as a typical atomicrmw expansion. The only difference here is
692/// that the operation inside of the loop must operate only upon a
693/// part of the value.
694void AtomicExpand::expandPartwordAtomicRMW(
695    AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
696
697  assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
698
699  AtomicOrdering MemOpOrder = AI->getOrdering();
700
701  IRBuilder<> Builder(AI);
702
703  PartwordMaskValues PMV =
704      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
705                       TLI->getMinCmpXchgSizeInBits() / 8);
706
707  Value *ValOperand_Shifted =
708      Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
709                        PMV.ShiftAmt, "ValOperand_Shifted");
710
711  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
712    return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
713                                 ValOperand_Shifted, AI->getValOperand(), PMV);
714  };
715
716  // TODO: When we're ready to support LLSC conversions too, use
717  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
718  Value *OldResult =
719      insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
720                           PerformPartwordOp, createCmpXchgInstFun);
721  Value *FinalOldResult = Builder.CreateTrunc(
722      Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
723  AI->replaceAllUsesWith(FinalOldResult);
724  AI->eraseFromParent();
725}
726
727void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
728  // The basic idea here is that we're expanding a cmpxchg of a
729  // smaller memory size up to a word-sized cmpxchg. To do this, we
730  // need to add a retry-loop for strong cmpxchg, so that
731  // modifications to other parts of the word don't cause a spurious
732  // failure.
733
734  // This generates code like the following:
735  //     [[Setup mask values PMV.*]]
736  //     %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
737  //     %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
738  //     %InitLoaded = load i32* %addr
739  //     %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
740  //     br partword.cmpxchg.loop
741  // partword.cmpxchg.loop:
742  //     %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
743  //        [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
744  //     %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
745  //     %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
746  //     %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
747  //        i32 %FullWord_NewVal success_ordering failure_ordering
748  //     %OldVal = extractvalue { i32, i1 } %NewCI, 0
749  //     %Success = extractvalue { i32, i1 } %NewCI, 1
750  //     br i1 %Success, label %partword.cmpxchg.end,
751  //        label %partword.cmpxchg.failure
752  // partword.cmpxchg.failure:
753  //     %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
754  //     %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
755  //     br i1 %ShouldContinue, label %partword.cmpxchg.loop,
756  //         label %partword.cmpxchg.end
757  // partword.cmpxchg.end:
758  //    %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
759  //    %FinalOldVal = trunc i32 %tmp1 to i8
760  //    %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
761  //    %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
762
763  Value *Addr = CI->getPointerOperand();
764  Value *Cmp = CI->getCompareOperand();
765  Value *NewVal = CI->getNewValOperand();
766
767  BasicBlock *BB = CI->getParent();
768  Function *F = BB->getParent();
769  IRBuilder<> Builder(CI);
770  LLVMContext &Ctx = Builder.getContext();
771
772  const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
773
774  BasicBlock *EndBB =
775      BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
776  auto FailureBB =
777      BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
778  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
779
780  // The split call above "helpfully" added a branch at the end of BB
781  // (to the wrong place).
782  std::prev(BB->end())->eraseFromParent();
783  Builder.SetInsertPoint(BB);
784
785  PartwordMaskValues PMV = createMaskInstrs(
786      Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
787
788  // Shift the incoming values over, into the right location in the word.
789  Value *NewVal_Shifted =
790      Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
791  Value *Cmp_Shifted =
792      Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
793
794  // Load the entire current word, and mask into place the expected and new
795  // values
796  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
797  InitLoaded->setVolatile(CI->isVolatile());
798  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
799  Builder.CreateBr(LoopBB);
800
801  // partword.cmpxchg.loop:
802  Builder.SetInsertPoint(LoopBB);
803  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
804  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
805
806  // Mask/Or the expected and new values into place in the loaded word.
807  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
808  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
809  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
810      PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
811      CI->getFailureOrdering(), CI->getSynchScope());
812  NewCI->setVolatile(CI->isVolatile());
813  // When we're building a strong cmpxchg, we need a loop, so you
814  // might think we could use a weak cmpxchg inside. But, using strong
815  // allows the below comparison for ShouldContinue, and we're
816  // expecting the underlying cmpxchg to be a machine instruction,
817  // which is strong anyways.
818  NewCI->setWeak(CI->isWeak());
819
820  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
821  Value *Success = Builder.CreateExtractValue(NewCI, 1);
822
823  if (CI->isWeak())
824    Builder.CreateBr(EndBB);
825  else
826    Builder.CreateCondBr(Success, EndBB, FailureBB);
827
828  // partword.cmpxchg.failure:
829  Builder.SetInsertPoint(FailureBB);
830  // Upon failure, verify that the masked-out part of the loaded value
831  // has been modified.  If it didn't, abort the cmpxchg, since the
832  // masked-in part must've.
833  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
834  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
835  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
836
837  // Add the second value to the phi from above
838  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
839
840  // partword.cmpxchg.end:
841  Builder.SetInsertPoint(CI);
842
843  Value *FinalOldVal = Builder.CreateTrunc(
844      Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
845  Value *Res = UndefValue::get(CI->getType());
846  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
847  Res = Builder.CreateInsertValue(Res, Success, 1);
848
849  CI->replaceAllUsesWith(Res);
850  CI->eraseFromParent();
851}
852
853void AtomicExpand::expandAtomicOpToLLSC(
854    Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
855    function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
856  IRBuilder<> Builder(I);
857  Value *Loaded =
858      insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
859
860  I->replaceAllUsesWith(Loaded);
861  I->eraseFromParent();
862}
863
864Value *AtomicExpand::insertRMWLLSCLoop(
865    IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
866    AtomicOrdering MemOpOrder,
867    function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
868  LLVMContext &Ctx = Builder.getContext();
869  BasicBlock *BB = Builder.GetInsertBlock();
870  Function *F = BB->getParent();
871
872  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
873  //
874  // The standard expansion we produce is:
875  //     [...]
876  // atomicrmw.start:
877  //     %loaded = @load.linked(%addr)
878  //     %new = some_op iN %loaded, %incr
879  //     %stored = @store_conditional(%new, %addr)
880  //     %try_again = icmp i32 ne %stored, 0
881  //     br i1 %try_again, label %loop, label %atomicrmw.end
882  // atomicrmw.end:
883  //     [...]
884  BasicBlock *ExitBB =
885      BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
886  BasicBlock *LoopBB =  BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
887
888  // The split call above "helpfully" added a branch at the end of BB (to the
889  // wrong place).
890  std::prev(BB->end())->eraseFromParent();
891  Builder.SetInsertPoint(BB);
892  Builder.CreateBr(LoopBB);
893
894  // Start the main loop block now that we've taken care of the preliminaries.
895  Builder.SetInsertPoint(LoopBB);
896  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
897
898  Value *NewVal = PerformOp(Builder, Loaded);
899
900  Value *StoreSuccess =
901      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
902  Value *TryAgain = Builder.CreateICmpNE(
903      StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
904  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
905
906  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
907  return Loaded;
908}
909
910/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
911/// the equivalent bitwidth.  We used to not support pointer cmpxchg in the
912/// IR.  As a migration step, we convert back to what use to be the standard
913/// way to represent a pointer cmpxchg so that we can update backends one by
914/// one.
915AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
916  auto *M = CI->getModule();
917  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
918                                            M->getDataLayout());
919
920  IRBuilder<> Builder(CI);
921
922  Value *Addr = CI->getPointerOperand();
923  Type *PT = PointerType::get(NewTy,
924                              Addr->getType()->getPointerAddressSpace());
925  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
926
927  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
928  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
929
930
931  auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
932                                            CI->getSuccessOrdering(),
933                                            CI->getFailureOrdering(),
934                                            CI->getSynchScope());
935  NewCI->setVolatile(CI->isVolatile());
936  NewCI->setWeak(CI->isWeak());
937  DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
938
939  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
940  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
941
942  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
943
944  Value *Res = UndefValue::get(CI->getType());
945  Res = Builder.CreateInsertValue(Res, OldVal, 0);
946  Res = Builder.CreateInsertValue(Res, Succ, 1);
947
948  CI->replaceAllUsesWith(Res);
949  CI->eraseFromParent();
950  return NewCI;
951}
952
953
954bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
955  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
956  AtomicOrdering FailureOrder = CI->getFailureOrdering();
957  Value *Addr = CI->getPointerOperand();
958  BasicBlock *BB = CI->getParent();
959  Function *F = BB->getParent();
960  LLVMContext &Ctx = F->getContext();
961  // If shouldInsertFencesForAtomic() returns true, then the target does not
962  // want to deal with memory orders, and emitLeading/TrailingFence should take
963  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
964  // should preserve the ordering.
965  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
966  AtomicOrdering MemOpOrder =
967      ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
968
969  // In implementations which use a barrier to achieve release semantics, we can
970  // delay emitting this barrier until we know a store is actually going to be
971  // attempted. The cost of this delay is that we need 2 copies of the block
972  // emitting the load-linked, affecting code size.
973  //
974  // Ideally, this logic would be unconditional except for the minsize check
975  // since in other cases the extra blocks naturally collapse down to the
976  // minimal loop. Unfortunately, this puts too much stress on later
977  // optimisations so we avoid emitting the extra logic in those cases too.
978  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
979                           SuccessOrder != AtomicOrdering::Monotonic &&
980                           SuccessOrder != AtomicOrdering::Acquire &&
981                           !F->optForMinSize();
982
983  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
984  // do it even on minsize.
985  bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
986
987  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
988  //
989  // The full expansion we produce is:
990  //     [...]
991  // cmpxchg.start:
992  //     %unreleasedload = @load.linked(%addr)
993  //     %should_store = icmp eq %unreleasedload, %desired
994  //     br i1 %should_store, label %cmpxchg.fencedstore,
995  //                          label %cmpxchg.nostore
996  // cmpxchg.releasingstore:
997  //     fence?
998  //     br label cmpxchg.trystore
999  // cmpxchg.trystore:
1000  //     %loaded.trystore = phi [%unreleasedload, %releasingstore],
1001  //                            [%releasedload, %cmpxchg.releasedload]
1002  //     %stored = @store_conditional(%new, %addr)
1003  //     %success = icmp eq i32 %stored, 0
1004  //     br i1 %success, label %cmpxchg.success,
1005  //                     label %cmpxchg.releasedload/%cmpxchg.failure
1006  // cmpxchg.releasedload:
1007  //     %releasedload = @load.linked(%addr)
1008  //     %should_store = icmp eq %releasedload, %desired
1009  //     br i1 %should_store, label %cmpxchg.trystore,
1010  //                          label %cmpxchg.failure
1011  // cmpxchg.success:
1012  //     fence?
1013  //     br label %cmpxchg.end
1014  // cmpxchg.nostore:
1015  //     %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1016  //                           [%releasedload,
1017  //                               %cmpxchg.releasedload/%cmpxchg.trystore]
1018  //     @load_linked_fail_balance()?
1019  //     br label %cmpxchg.failure
1020  // cmpxchg.failure:
1021  //     fence?
1022  //     br label %cmpxchg.end
1023  // cmpxchg.end:
1024  //     %loaded = phi [%loaded.nostore, %cmpxchg.failure],
1025  //                   [%loaded.trystore, %cmpxchg.trystore]
1026  //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1027  //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1028  //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1029  //     [...]
1030  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1031  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1032  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1033  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1034  auto ReleasedLoadBB =
1035      BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1036  auto TryStoreBB =
1037      BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1038  auto ReleasingStoreBB =
1039      BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1040  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1041
1042  // This grabs the DebugLoc from CI
1043  IRBuilder<> Builder(CI);
1044
1045  // The split call above "helpfully" added a branch at the end of BB (to the
1046  // wrong place), but we might want a fence too. It's easiest to just remove
1047  // the branch entirely.
1048  std::prev(BB->end())->eraseFromParent();
1049  Builder.SetInsertPoint(BB);
1050  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1051    TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
1052                          /*IsLoad=*/true);
1053  Builder.CreateBr(StartBB);
1054
1055  // Start the main loop block now that we've taken care of the preliminaries.
1056  Builder.SetInsertPoint(StartBB);
1057  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1058  Value *ShouldStore = Builder.CreateICmpEQ(
1059      UnreleasedLoad, CI->getCompareOperand(), "should_store");
1060
1061  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1062  // jump straight past that fence instruction (if it exists).
1063  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1064
1065  Builder.SetInsertPoint(ReleasingStoreBB);
1066  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1067    TLI->emitLeadingFence(Builder, SuccessOrder, /*IsStore=*/true,
1068                          /*IsLoad=*/true);
1069  Builder.CreateBr(TryStoreBB);
1070
1071  Builder.SetInsertPoint(TryStoreBB);
1072  Value *StoreSuccess = TLI->emitStoreConditional(
1073      Builder, CI->getNewValOperand(), Addr, MemOpOrder);
1074  StoreSuccess = Builder.CreateICmpEQ(
1075      StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1076  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1077  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1078                       CI->isWeak() ? FailureBB : RetryBB);
1079
1080  Builder.SetInsertPoint(ReleasedLoadBB);
1081  Value *SecondLoad;
1082  if (HasReleasedLoadBB) {
1083    SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1084    ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
1085                                       "should_store");
1086
1087    // If the cmpxchg doesn't actually need any ordering when it fails, we can
1088    // jump straight past that fence instruction (if it exists).
1089    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1090  } else
1091    Builder.CreateUnreachable();
1092
1093  // Make sure later instructions don't get reordered with a fence if
1094  // necessary.
1095  Builder.SetInsertPoint(SuccessBB);
1096  if (ShouldInsertFencesForAtomic)
1097    TLI->emitTrailingFence(Builder, SuccessOrder, /*IsStore=*/true,
1098                           /*IsLoad=*/true);
1099  Builder.CreateBr(ExitBB);
1100
1101  Builder.SetInsertPoint(NoStoreBB);
1102  // In the failing case, where we don't execute the store-conditional, the
1103  // target might want to balance out the load-linked with a dedicated
1104  // instruction (e.g., on ARM, clearing the exclusive monitor).
1105  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1106  Builder.CreateBr(FailureBB);
1107
1108  Builder.SetInsertPoint(FailureBB);
1109  if (ShouldInsertFencesForAtomic)
1110    TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
1111                           /*IsLoad=*/true);
1112  Builder.CreateBr(ExitBB);
1113
1114  // Finally, we have control-flow based knowledge of whether the cmpxchg
1115  // succeeded or not. We expose this to later passes by converting any
1116  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1117  // PHI.
1118  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1119  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
1120  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1121  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1122
1123  // Setup the builder so we can create any PHIs we need.
1124  Value *Loaded;
1125  if (!HasReleasedLoadBB)
1126    Loaded = UnreleasedLoad;
1127  else {
1128    Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
1129    PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1130    TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1131    TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1132
1133    Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
1134    PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1135    NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
1136    NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1137
1138    Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
1139    PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1140    ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
1141    ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
1142
1143    Loaded = ExitLoaded;
1144  }
1145
1146  // Look for any users of the cmpxchg that are just comparing the loaded value
1147  // against the desired one, and replace them with the CFG-derived version.
1148  SmallVector<ExtractValueInst *, 2> PrunedInsts;
1149  for (auto User : CI->users()) {
1150    ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1151    if (!EV)
1152      continue;
1153
1154    assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1155           "weird extraction from { iN, i1 }");
1156
1157    if (EV->getIndices()[0] == 0)
1158      EV->replaceAllUsesWith(Loaded);
1159    else
1160      EV->replaceAllUsesWith(Success);
1161
1162    PrunedInsts.push_back(EV);
1163  }
1164
1165  // We can remove the instructions now we're no longer iterating through them.
1166  for (auto EV : PrunedInsts)
1167    EV->eraseFromParent();
1168
1169  if (!CI->use_empty()) {
1170    // Some use of the full struct return that we don't understand has happened,
1171    // so we've got to reconstruct it properly.
1172    Value *Res;
1173    Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1174    Res = Builder.CreateInsertValue(Res, Success, 1);
1175
1176    CI->replaceAllUsesWith(Res);
1177  }
1178
1179  CI->eraseFromParent();
1180  return true;
1181}
1182
1183bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1184  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1185  if(!C)
1186    return false;
1187
1188  AtomicRMWInst::BinOp Op = RMWI->getOperation();
1189  switch(Op) {
1190    case AtomicRMWInst::Add:
1191    case AtomicRMWInst::Sub:
1192    case AtomicRMWInst::Or:
1193    case AtomicRMWInst::Xor:
1194      return C->isZero();
1195    case AtomicRMWInst::And:
1196      return C->isMinusOne();
1197    // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1198    default:
1199      return false;
1200  }
1201}
1202
1203bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1204  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1205    tryExpandAtomicLoad(ResultingLoad);
1206    return true;
1207  }
1208  return false;
1209}
1210
1211Value *AtomicExpand::insertRMWCmpXchgLoop(
1212    IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1213    AtomicOrdering MemOpOrder,
1214    function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1215    CreateCmpXchgInstFun CreateCmpXchg) {
1216  LLVMContext &Ctx = Builder.getContext();
1217  BasicBlock *BB = Builder.GetInsertBlock();
1218  Function *F = BB->getParent();
1219
1220  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1221  //
1222  // The standard expansion we produce is:
1223  //     [...]
1224  //     %init_loaded = load atomic iN* %addr
1225  //     br label %loop
1226  // loop:
1227  //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1228  //     %new = some_op iN %loaded, %incr
1229  //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1230  //     %new_loaded = extractvalue { iN, i1 } %pair, 0
1231  //     %success = extractvalue { iN, i1 } %pair, 1
1232  //     br i1 %success, label %atomicrmw.end, label %loop
1233  // atomicrmw.end:
1234  //     [...]
1235  BasicBlock *ExitBB =
1236      BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1237  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1238
1239  // The split call above "helpfully" added a branch at the end of BB (to the
1240  // wrong place), but we want a load. It's easiest to just remove
1241  // the branch entirely.
1242  std::prev(BB->end())->eraseFromParent();
1243  Builder.SetInsertPoint(BB);
1244  LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
1245  // Atomics require at least natural alignment.
1246  InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
1247  Builder.CreateBr(LoopBB);
1248
1249  // Start the main loop block now that we've taken care of the preliminaries.
1250  Builder.SetInsertPoint(LoopBB);
1251  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1252  Loaded->addIncoming(InitLoaded, BB);
1253
1254  Value *NewVal = PerformOp(Builder, Loaded);
1255
1256  Value *NewLoaded = nullptr;
1257  Value *Success = nullptr;
1258
1259  CreateCmpXchg(Builder, Addr, Loaded, NewVal,
1260                MemOpOrder == AtomicOrdering::Unordered
1261                    ? AtomicOrdering::Monotonic
1262                    : MemOpOrder,
1263                Success, NewLoaded);
1264  assert(Success && NewLoaded);
1265
1266  Loaded->addIncoming(NewLoaded, LoopBB);
1267
1268  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1269
1270  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1271  return NewLoaded;
1272}
1273
1274// Note: This function is exposed externally by AtomicExpandUtils.h
1275bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
1276                                    CreateCmpXchgInstFun CreateCmpXchg) {
1277  IRBuilder<> Builder(AI);
1278  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1279      Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
1280      [&](IRBuilder<> &Builder, Value *Loaded) {
1281        return performAtomicOp(AI->getOperation(), Builder, Loaded,
1282                               AI->getValOperand());
1283      },
1284      CreateCmpXchg);
1285
1286  AI->replaceAllUsesWith(Loaded);
1287  AI->eraseFromParent();
1288  return true;
1289}
1290
1291// In order to use one of the sized library calls such as
1292// __atomic_fetch_add_4, the alignment must be sufficient, the size
1293// must be one of the potentially-specialized sizes, and the value
1294// type must actually exist in C on the target (otherwise, the
1295// function wouldn't actually be defined.)
1296static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
1297                                  const DataLayout &DL) {
1298  // TODO: "LargestSize" is an approximation for "largest type that
1299  // you can express in C". It seems to be the case that int128 is
1300  // supported on all 64-bit platforms, otherwise only up to 64-bit
1301  // integers are supported. If we get this wrong, then we'll try to
1302  // call a sized libcall that doesn't actually exist. There should
1303  // really be some more reliable way in LLVM of determining integer
1304  // sizes which are valid in the target's C ABI...
1305  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1306  return Align >= Size &&
1307         (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1308         Size <= LargestSize;
1309}
1310
1311void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1312  static const RTLIB::Libcall Libcalls[6] = {
1313      RTLIB::ATOMIC_LOAD,   RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1314      RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1315  unsigned Size = getAtomicOpSize(I);
1316  unsigned Align = getAtomicOpAlign(I);
1317
1318  bool expanded = expandAtomicOpToLibcall(
1319      I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
1320      I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1321  (void)expanded;
1322  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
1323}
1324
1325void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1326  static const RTLIB::Libcall Libcalls[6] = {
1327      RTLIB::ATOMIC_STORE,   RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1328      RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1329  unsigned Size = getAtomicOpSize(I);
1330  unsigned Align = getAtomicOpAlign(I);
1331
1332  bool expanded = expandAtomicOpToLibcall(
1333      I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
1334      I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1335  (void)expanded;
1336  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
1337}
1338
1339void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1340  static const RTLIB::Libcall Libcalls[6] = {
1341      RTLIB::ATOMIC_COMPARE_EXCHANGE,   RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1342      RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1343      RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1344  unsigned Size = getAtomicOpSize(I);
1345  unsigned Align = getAtomicOpAlign(I);
1346
1347  bool expanded = expandAtomicOpToLibcall(
1348      I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
1349      I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1350      Libcalls);
1351  (void)expanded;
1352  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1353}
1354
1355static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
1356  static const RTLIB::Libcall LibcallsXchg[6] = {
1357      RTLIB::ATOMIC_EXCHANGE,   RTLIB::ATOMIC_EXCHANGE_1,
1358      RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1359      RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1360  static const RTLIB::Libcall LibcallsAdd[6] = {
1361      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_ADD_1,
1362      RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1363      RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1364  static const RTLIB::Libcall LibcallsSub[6] = {
1365      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_SUB_1,
1366      RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1367      RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1368  static const RTLIB::Libcall LibcallsAnd[6] = {
1369      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_AND_1,
1370      RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1371      RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1372  static const RTLIB::Libcall LibcallsOr[6] = {
1373      RTLIB::UNKNOWN_LIBCALL,   RTLIB::ATOMIC_FETCH_OR_1,
1374      RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1375      RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1376  static const RTLIB::Libcall LibcallsXor[6] = {
1377      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_XOR_1,
1378      RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1379      RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1380  static const RTLIB::Libcall LibcallsNand[6] = {
1381      RTLIB::UNKNOWN_LIBCALL,     RTLIB::ATOMIC_FETCH_NAND_1,
1382      RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1383      RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1384
1385  switch (Op) {
1386  case AtomicRMWInst::BAD_BINOP:
1387    llvm_unreachable("Should not have BAD_BINOP.");
1388  case AtomicRMWInst::Xchg:
1389    return makeArrayRef(LibcallsXchg);
1390  case AtomicRMWInst::Add:
1391    return makeArrayRef(LibcallsAdd);
1392  case AtomicRMWInst::Sub:
1393    return makeArrayRef(LibcallsSub);
1394  case AtomicRMWInst::And:
1395    return makeArrayRef(LibcallsAnd);
1396  case AtomicRMWInst::Or:
1397    return makeArrayRef(LibcallsOr);
1398  case AtomicRMWInst::Xor:
1399    return makeArrayRef(LibcallsXor);
1400  case AtomicRMWInst::Nand:
1401    return makeArrayRef(LibcallsNand);
1402  case AtomicRMWInst::Max:
1403  case AtomicRMWInst::Min:
1404  case AtomicRMWInst::UMax:
1405  case AtomicRMWInst::UMin:
1406    // No atomic libcalls are available for max/min/umax/umin.
1407    return {};
1408  }
1409  llvm_unreachable("Unexpected AtomicRMW operation.");
1410}
1411
1412void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1413  ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1414
1415  unsigned Size = getAtomicOpSize(I);
1416  unsigned Align = getAtomicOpAlign(I);
1417
1418  bool Success = false;
1419  if (!Libcalls.empty())
1420    Success = expandAtomicOpToLibcall(
1421        I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1422        I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1423
1424  // The expansion failed: either there were no libcalls at all for
1425  // the operation (min/max), or there were only size-specialized
1426  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1427  // CAS libcall, via a CAS loop, instead.
1428  if (!Success) {
1429    expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1430                                       Value *Loaded, Value *NewVal,
1431                                       AtomicOrdering MemOpOrder,
1432                                       Value *&Success, Value *&NewLoaded) {
1433      // Create the CAS instruction normally...
1434      AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1435          Addr, Loaded, NewVal, MemOpOrder,
1436          AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
1437      Success = Builder.CreateExtractValue(Pair, 1, "success");
1438      NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1439
1440      // ...and then expand the CAS into a libcall.
1441      expandAtomicCASToLibcall(Pair);
1442    });
1443  }
1444}
1445
1446// A helper routine for the above expandAtomic*ToLibcall functions.
1447//
1448// 'Libcalls' contains an array of enum values for the particular
1449// ATOMIC libcalls to be emitted. All of the other arguments besides
1450// 'I' are extracted from the Instruction subclass by the
1451// caller. Depending on the particular call, some will be null.
1452bool AtomicExpand::expandAtomicOpToLibcall(
1453    Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1454    Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1455    AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1456  assert(Libcalls.size() == 6);
1457
1458  LLVMContext &Ctx = I->getContext();
1459  Module *M = I->getModule();
1460  const DataLayout &DL = M->getDataLayout();
1461  IRBuilder<> Builder(I);
1462  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1463
1464  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1465  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1466
1467  unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1468
1469  // TODO: the "order" argument type is "int", not int32. So
1470  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1471  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1472  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1473  Constant *OrderingVal =
1474      ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1475  Constant *Ordering2Val = nullptr;
1476  if (CASExpected) {
1477    assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1478    Ordering2Val =
1479        ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1480  }
1481  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1482
1483  RTLIB::Libcall RTLibType;
1484  if (UseSizedLibcall) {
1485    switch (Size) {
1486    case 1: RTLibType = Libcalls[1]; break;
1487    case 2: RTLibType = Libcalls[2]; break;
1488    case 4: RTLibType = Libcalls[3]; break;
1489    case 8: RTLibType = Libcalls[4]; break;
1490    case 16: RTLibType = Libcalls[5]; break;
1491    }
1492  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1493    RTLibType = Libcalls[0];
1494  } else {
1495    // Can't use sized function, and there's no generic for this
1496    // operation, so give up.
1497    return false;
1498  }
1499
1500  // Build up the function call. There's two kinds. First, the sized
1501  // variants.  These calls are going to be one of the following (with
1502  // N=1,2,4,8,16):
1503  //  iN    __atomic_load_N(iN *ptr, int ordering)
1504  //  void  __atomic_store_N(iN *ptr, iN val, int ordering)
1505  //  iN    __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1506  //  bool  __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1507  //                                    int success_order, int failure_order)
1508  //
1509  // Note that these functions can be used for non-integer atomic
1510  // operations, the values just need to be bitcast to integers on the
1511  // way in and out.
1512  //
1513  // And, then, the generic variants. They look like the following:
1514  //  void  __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1515  //  void  __atomic_store(size_t size, void *ptr, void *val, int ordering)
1516  //  void  __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1517  //                          int ordering)
1518  //  bool  __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1519  //                                  void *desired, int success_order,
1520  //                                  int failure_order)
1521  //
1522  // The different signatures are built up depending on the
1523  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1524  // variables.
1525
1526  AllocaInst *AllocaCASExpected = nullptr;
1527  Value *AllocaCASExpected_i8 = nullptr;
1528  AllocaInst *AllocaValue = nullptr;
1529  Value *AllocaValue_i8 = nullptr;
1530  AllocaInst *AllocaResult = nullptr;
1531  Value *AllocaResult_i8 = nullptr;
1532
1533  Type *ResultTy;
1534  SmallVector<Value *, 6> Args;
1535  AttributeSet Attr;
1536
1537  // 'size' argument.
1538  if (!UseSizedLibcall) {
1539    // Note, getIntPtrType is assumed equivalent to size_t.
1540    Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1541  }
1542
1543  // 'ptr' argument.
1544  Value *PtrVal =
1545      Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
1546  Args.push_back(PtrVal);
1547
1548  // 'expected' argument, if present.
1549  if (CASExpected) {
1550    AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1551    AllocaCASExpected->setAlignment(AllocaAlignment);
1552    AllocaCASExpected_i8 =
1553        Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
1554    Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1555    Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1556    Args.push_back(AllocaCASExpected_i8);
1557  }
1558
1559  // 'val' argument ('desired' for cas), if present.
1560  if (ValueOperand) {
1561    if (UseSizedLibcall) {
1562      Value *IntValue =
1563          Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1564      Args.push_back(IntValue);
1565    } else {
1566      AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1567      AllocaValue->setAlignment(AllocaAlignment);
1568      AllocaValue_i8 =
1569          Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1570      Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1571      Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1572      Args.push_back(AllocaValue_i8);
1573    }
1574  }
1575
1576  // 'ret' argument.
1577  if (!CASExpected && HasResult && !UseSizedLibcall) {
1578    AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1579    AllocaResult->setAlignment(AllocaAlignment);
1580    AllocaResult_i8 =
1581        Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
1582    Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1583    Args.push_back(AllocaResult_i8);
1584  }
1585
1586  // 'ordering' ('success_order' for cas) argument.
1587  Args.push_back(OrderingVal);
1588
1589  // 'failure_order' argument, if present.
1590  if (Ordering2Val)
1591    Args.push_back(Ordering2Val);
1592
1593  // Now, the return type.
1594  if (CASExpected) {
1595    ResultTy = Type::getInt1Ty(Ctx);
1596    Attr = Attr.addAttribute(Ctx, AttributeSet::ReturnIndex, Attribute::ZExt);
1597  } else if (HasResult && UseSizedLibcall)
1598    ResultTy = SizedIntTy;
1599  else
1600    ResultTy = Type::getVoidTy(Ctx);
1601
1602  // Done with setting up arguments and return types, create the call:
1603  SmallVector<Type *, 6> ArgTys;
1604  for (Value *Arg : Args)
1605    ArgTys.push_back(Arg->getType());
1606  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1607  Constant *LibcallFn =
1608      M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1609  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1610  Call->setAttributes(Attr);
1611  Value *Result = Call;
1612
1613  // And then, extract the results...
1614  if (ValueOperand && !UseSizedLibcall)
1615    Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1616
1617  if (CASExpected) {
1618    // The final result from the CAS is {load of 'expected' alloca, bool result
1619    // from call}
1620    Type *FinalResultTy = I->getType();
1621    Value *V = UndefValue::get(FinalResultTy);
1622    Value *ExpectedOut =
1623        Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
1624    Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1625    V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1626    V = Builder.CreateInsertValue(V, Result, 1);
1627    I->replaceAllUsesWith(V);
1628  } else if (HasResult) {
1629    Value *V;
1630    if (UseSizedLibcall)
1631      V = Builder.CreateBitOrPointerCast(Result, I->getType());
1632    else {
1633      V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
1634      Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1635    }
1636    I->replaceAllUsesWith(V);
1637  }
1638  I->eraseFromParent();
1639  return true;
1640}
1641