1//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains a pass (at IR level) to replace atomic instructions which 11// cannot be implemented as a single instruction with cmpxchg-based loops. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86.h" 16#include "X86TargetMachine.h" 17#include "llvm/CodeGen/Passes.h" 18#include "llvm/IR/Function.h" 19#include "llvm/IR/IRBuilder.h" 20#include "llvm/IR/Instructions.h" 21#include "llvm/IR/Intrinsics.h" 22#include "llvm/IR/Module.h" 23#include "llvm/Support/Debug.h" 24#include "llvm/Target/TargetLowering.h" 25#include "llvm/Target/TargetMachine.h" 26using namespace llvm; 27 28#define DEBUG_TYPE "x86-atomic-expand" 29 30namespace { 31 class X86AtomicExpandPass : public FunctionPass { 32 const X86TargetMachine *TM; 33 public: 34 static char ID; // Pass identification, replacement for typeid 35 explicit X86AtomicExpandPass(const X86TargetMachine *TM) 36 : FunctionPass(ID), TM(TM) {} 37 38 bool runOnFunction(Function &F) override; 39 bool expandAtomicInsts(Function &F); 40 41 bool needsCmpXchgNb(Type *MemType); 42 43 /// There are four kinds of atomic operations. Two never need expanding: 44 /// cmpxchg is what we expand the others *to*, and loads are easily handled 45 /// by ISelLowering. Atomicrmw and store can need expanding in some 46 /// circumstances. 47 bool shouldExpand(Instruction *Inst); 48 49 /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms 50 /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic. 51 bool shouldExpandStore(StoreInst *SI); 52 53 /// Only some atomicrmw instructions need expanding -- some operations 54 /// (e.g. max) have absolutely no architectural support; some (e.g. or) have 55 /// limited support but can't return the previous value; some (e.g. add) 56 /// have complete support in the instruction set. 57 /// 58 /// Also, naturally, 128-bit operations always need to be expanded. 59 bool shouldExpandAtomicRMW(AtomicRMWInst *AI); 60 61 bool expandAtomicRMW(AtomicRMWInst *AI); 62 bool expandAtomicStore(StoreInst *SI); 63 }; 64} 65 66char X86AtomicExpandPass::ID = 0; 67 68FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) { 69 return new X86AtomicExpandPass(TM); 70} 71 72bool X86AtomicExpandPass::runOnFunction(Function &F) { 73 SmallVector<Instruction *, 1> AtomicInsts; 74 75 // Changing control-flow while iterating through it is a bad idea, so gather a 76 // list of all atomic instructions before we start. 77 for (BasicBlock &BB : F) 78 for (Instruction &Inst : BB) { 79 if (isa<AtomicRMWInst>(&Inst) || 80 (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) 81 AtomicInsts.push_back(&Inst); 82 } 83 84 bool MadeChange = false; 85 for (Instruction *Inst : AtomicInsts) { 86 if (!shouldExpand(Inst)) 87 continue; 88 89 if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) 90 MadeChange |= expandAtomicRMW(AI); 91 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 92 MadeChange |= expandAtomicStore(SI); 93 94 assert(MadeChange && "Atomic inst not expanded when it should be?"); 95 Inst->eraseFromParent(); 96 } 97 98 return MadeChange; 99} 100 101/// Returns true if operations on the given type will need to use either 102/// cmpxchg8b or cmpxchg16b. This occurs if the type is 1 step up from the 103/// native width, and the instructions are available (otherwise we leave them 104/// alone to become __sync_fetch_and_... calls). 105bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) { 106 const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>(); 107 if (!Subtarget.hasCmpxchg16b()) 108 return false; 109 110 unsigned CmpXchgNbWidth = Subtarget.is64Bit() ? 128 : 64; 111 112 unsigned OpWidth = MemType->getPrimitiveSizeInBits(); 113 if (OpWidth == CmpXchgNbWidth) 114 return true; 115 116 return false; 117} 118 119 120bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) { 121 const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>(); 122 unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; 123 124 if (needsCmpXchgNb(AI->getType())) 125 return true; 126 127 if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth) 128 return false; 129 130 AtomicRMWInst::BinOp Op = AI->getOperation(); 131 switch (Op) { 132 default: 133 llvm_unreachable("Unknown atomic operation"); 134 case AtomicRMWInst::Xchg: 135 case AtomicRMWInst::Add: 136 case AtomicRMWInst::Sub: 137 // It's better to use xadd, xsub or xchg for these in all cases. 138 return false; 139 case AtomicRMWInst::Or: 140 case AtomicRMWInst::And: 141 case AtomicRMWInst::Xor: 142 // If the atomicrmw's result isn't actually used, we can just add a "lock" 143 // prefix to a normal instruction for these operations. 144 return !AI->use_empty(); 145 case AtomicRMWInst::Nand: 146 case AtomicRMWInst::Max: 147 case AtomicRMWInst::Min: 148 case AtomicRMWInst::UMax: 149 case AtomicRMWInst::UMin: 150 // These always require a non-trivial set of data operations on x86. We must 151 // use a cmpxchg loop. 152 return true; 153 } 154} 155 156bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) { 157 if (needsCmpXchgNb(SI->getValueOperand()->getType())) 158 return true; 159 160 return false; 161} 162 163bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) { 164 if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) 165 return shouldExpandAtomicRMW(AI); 166 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 167 return shouldExpandStore(SI); 168 return false; 169} 170 171/// Emit IR to implement the given atomicrmw operation on values in registers, 172/// returning the new value. 173static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, 174 Value *Loaded, Value *Inc) { 175 Value *NewVal; 176 switch (Op) { 177 case AtomicRMWInst::Xchg: 178 return Inc; 179 case AtomicRMWInst::Add: 180 return Builder.CreateAdd(Loaded, Inc, "new"); 181 case AtomicRMWInst::Sub: 182 return Builder.CreateSub(Loaded, Inc, "new"); 183 case AtomicRMWInst::And: 184 return Builder.CreateAnd(Loaded, Inc, "new"); 185 case AtomicRMWInst::Nand: 186 return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); 187 case AtomicRMWInst::Or: 188 return Builder.CreateOr(Loaded, Inc, "new"); 189 case AtomicRMWInst::Xor: 190 return Builder.CreateXor(Loaded, Inc, "new"); 191 case AtomicRMWInst::Max: 192 NewVal = Builder.CreateICmpSGT(Loaded, Inc); 193 return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 194 case AtomicRMWInst::Min: 195 NewVal = Builder.CreateICmpSLE(Loaded, Inc); 196 return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 197 case AtomicRMWInst::UMax: 198 NewVal = Builder.CreateICmpUGT(Loaded, Inc); 199 return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 200 case AtomicRMWInst::UMin: 201 NewVal = Builder.CreateICmpULE(Loaded, Inc); 202 return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); 203 default: 204 break; 205 } 206 llvm_unreachable("Unknown atomic op"); 207} 208 209bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { 210 AtomicOrdering Order = 211 AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); 212 Value *Addr = AI->getPointerOperand(); 213 BasicBlock *BB = AI->getParent(); 214 Function *F = BB->getParent(); 215 LLVMContext &Ctx = F->getContext(); 216 217 // Given: atomicrmw some_op iN* %addr, iN %incr ordering 218 // 219 // The standard expansion we produce is: 220 // [...] 221 // %init_loaded = load atomic iN* %addr 222 // br label %loop 223 // loop: 224 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] 225 // %new = some_op iN %loaded, %incr 226 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new 227 // %new_loaded = extractvalue { iN, i1 } %pair, 0 228 // %success = extractvalue { iN, i1 } %pair, 1 229 // br i1 %success, label %atomicrmw.end, label %loop 230 // atomicrmw.end: 231 // [...] 232 BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); 233 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); 234 235 // This grabs the DebugLoc from AI. 236 IRBuilder<> Builder(AI); 237 238 // The split call above "helpfully" added a branch at the end of BB (to the 239 // wrong place), but we want a load. It's easiest to just remove 240 // the branch entirely. 241 std::prev(BB->end())->eraseFromParent(); 242 Builder.SetInsertPoint(BB); 243 LoadInst *InitLoaded = Builder.CreateLoad(Addr); 244 InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); 245 Builder.CreateBr(LoopBB); 246 247 // Start the main loop block now that we've taken care of the preliminaries. 248 Builder.SetInsertPoint(LoopBB); 249 PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); 250 Loaded->addIncoming(InitLoaded, BB); 251 252 Value *NewVal = 253 performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); 254 255 Value *Pair = Builder.CreateAtomicCmpXchg( 256 Addr, Loaded, NewVal, Order, 257 AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); 258 Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); 259 Loaded->addIncoming(NewLoaded, LoopBB); 260 261 Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); 262 Builder.CreateCondBr(Success, ExitBB, LoopBB); 263 264 AI->replaceAllUsesWith(NewLoaded); 265 266 return true; 267} 268 269bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) { 270 // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express 271 // this in terms of the usual expansion to "atomicrmw xchg". 272 IRBuilder<> Builder(SI); 273 AtomicOrdering Order = 274 SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering(); 275 AtomicRMWInst *AI = 276 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), 277 SI->getValueOperand(), Order); 278 279 // Now we have an appropriate swap instruction, lower it as usual. 280 if (shouldExpandAtomicRMW(AI)) { 281 expandAtomicRMW(AI); 282 AI->eraseFromParent(); 283 return true; 284 } 285 286 return AI; 287} 288