IntrinsicLowering.cpp revision dc770929cb2f97397970e2942b746839fc387992
1//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IntrinsicLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Constants.h"
15#include "llvm/DerivedTypes.h"
16#include "llvm/Module.h"
17#include "llvm/Instructions.h"
18#include "llvm/Type.h"
19#include "llvm/CodeGen/IntrinsicLowering.h"
20#include "llvm/Support/Streams.h"
21#include "llvm/Target/TargetData.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/STLExtras.h"
24using namespace llvm;
25
26template <class ArgIt>
27static void EnsureFunctionExists(Module &M, const char *Name,
28                                 ArgIt ArgBegin, ArgIt ArgEnd,
29                                 const Type *RetTy) {
30  // Insert a correctly-typed definition now.
31  std::vector<const Type *> ParamTys;
32  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
33    ParamTys.push_back(I->getType());
34  M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
35}
36
37static void EnsureFPIntrinsicsExist(Module &M, Module::iterator I,
38                                 const char *FName,
39                                 const char *DName, const char *LDName) {
40  // Insert definitions for all the floating point types.
41  switch((int)I->arg_begin()->getType()->getTypeID()) {
42  case Type::FloatTyID:
43    EnsureFunctionExists(M, FName, I->arg_begin(), I->arg_end(),
44                         Type::FloatTy);
45  case Type::DoubleTyID:
46    EnsureFunctionExists(M, DName, I->arg_begin(), I->arg_end(),
47                         Type::DoubleTy);
48  case Type::X86_FP80TyID:
49  case Type::FP128TyID:
50  case Type::PPC_FP128TyID:
51    EnsureFunctionExists(M, LDName, I->arg_begin(), I->arg_end(),
52                         I->arg_begin()->getType());
53  }
54}
55
56/// ReplaceCallWith - This function is used when we want to lower an intrinsic
57/// call to a call of an external function.  This handles hard cases such as
58/// when there was already a prototype for the external function, and if that
59/// prototype doesn't match the arguments we expect to pass in.
60template <class ArgIt>
61static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
62                                 ArgIt ArgBegin, ArgIt ArgEnd,
63                                 const Type *RetTy, Constant *&FCache) {
64  if (!FCache) {
65    // If we haven't already looked up this function, check to see if the
66    // program already contains a function with this name.
67    Module *M = CI->getParent()->getParent()->getParent();
68    // Get or insert the definition now.
69    std::vector<const Type *> ParamTys;
70    for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
71      ParamTys.push_back((*I)->getType());
72    FCache = M->getOrInsertFunction(NewFn,
73                                    FunctionType::get(RetTy, ParamTys, false));
74  }
75
76  SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
77  CallInst *NewCI = CallInst::Create(FCache, Args.begin(), Args.end(),
78                                     CI->getName(), CI);
79  if (!CI->use_empty())
80    CI->replaceAllUsesWith(NewCI);
81  return NewCI;
82}
83
84void IntrinsicLowering::AddPrototypes(Module &M) {
85  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
86    if (I->isDeclaration() && !I->use_empty())
87      switch (I->getIntrinsicID()) {
88      default: break;
89      case Intrinsic::setjmp:
90        EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
91                             Type::Int32Ty);
92        break;
93      case Intrinsic::longjmp:
94        EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
95                             Type::VoidTy);
96        break;
97      case Intrinsic::siglongjmp:
98        EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
99                             Type::VoidTy);
100        break;
101      case Intrinsic::memcpy_i32:
102      case Intrinsic::memcpy_i64:
103        M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty),
104                              PointerType::getUnqual(Type::Int8Ty),
105                              PointerType::getUnqual(Type::Int8Ty),
106                              TD.getIntPtrType(), (Type *)0);
107        break;
108      case Intrinsic::memmove_i32:
109      case Intrinsic::memmove_i64:
110        M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty),
111                              PointerType::getUnqual(Type::Int8Ty),
112                              PointerType::getUnqual(Type::Int8Ty),
113                              TD.getIntPtrType(), (Type *)0);
114        break;
115      case Intrinsic::memset_i32:
116      case Intrinsic::memset_i64:
117        M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty),
118                              PointerType::getUnqual(Type::Int8Ty),
119                              Type::Int32Ty,
120                              TD.getIntPtrType(), (Type *)0);
121        break;
122      case Intrinsic::sqrt:
123        EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
124        break;
125      case Intrinsic::sin:
126        EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
127        break;
128      case Intrinsic::cos:
129        EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
130        break;
131      case Intrinsic::pow:
132        EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
133        break;
134      case Intrinsic::log:
135        EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
136        break;
137      case Intrinsic::log2:
138        EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
139        break;
140      case Intrinsic::log10:
141        EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
142        break;
143      case Intrinsic::exp:
144        EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
145        break;
146      case Intrinsic::exp2:
147        EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
148        break;
149      }
150}
151
152/// LowerBSWAP - Emit the code to lower bswap of V before the specified
153/// instruction IP.
154static Value *LowerBSWAP(Value *V, Instruction *IP) {
155  assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
156
157  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
158
159  switch(BitSize) {
160  default: assert(0 && "Unhandled type size of value to byteswap!");
161  case 16: {
162    Value *Tmp1 = BinaryOperator::CreateShl(V,
163                                ConstantInt::get(V->getType(),8),"bswap.2",IP);
164    Value *Tmp2 = BinaryOperator::CreateLShr(V,
165                                ConstantInt::get(V->getType(),8),"bswap.1",IP);
166    V = BinaryOperator::CreateOr(Tmp1, Tmp2, "bswap.i16", IP);
167    break;
168  }
169  case 32: {
170    Value *Tmp4 = BinaryOperator::CreateShl(V,
171                              ConstantInt::get(V->getType(),24),"bswap.4", IP);
172    Value *Tmp3 = BinaryOperator::CreateShl(V,
173                              ConstantInt::get(V->getType(),8),"bswap.3",IP);
174    Value *Tmp2 = BinaryOperator::CreateLShr(V,
175                              ConstantInt::get(V->getType(),8),"bswap.2",IP);
176    Value *Tmp1 = BinaryOperator::CreateLShr(V,
177                              ConstantInt::get(V->getType(),24),"bswap.1", IP);
178    Tmp3 = BinaryOperator::CreateAnd(Tmp3,
179                                     ConstantInt::get(Type::Int32Ty, 0xFF0000),
180                                     "bswap.and3", IP);
181    Tmp2 = BinaryOperator::CreateAnd(Tmp2,
182                                     ConstantInt::get(Type::Int32Ty, 0xFF00),
183                                     "bswap.and2", IP);
184    Tmp4 = BinaryOperator::CreateOr(Tmp4, Tmp3, "bswap.or1", IP);
185    Tmp2 = BinaryOperator::CreateOr(Tmp2, Tmp1, "bswap.or2", IP);
186    V = BinaryOperator::CreateOr(Tmp4, Tmp2, "bswap.i32", IP);
187    break;
188  }
189  case 64: {
190    Value *Tmp8 = BinaryOperator::CreateShl(V,
191                              ConstantInt::get(V->getType(),56),"bswap.8", IP);
192    Value *Tmp7 = BinaryOperator::CreateShl(V,
193                              ConstantInt::get(V->getType(),40),"bswap.7", IP);
194    Value *Tmp6 = BinaryOperator::CreateShl(V,
195                              ConstantInt::get(V->getType(),24),"bswap.6", IP);
196    Value *Tmp5 = BinaryOperator::CreateShl(V,
197                              ConstantInt::get(V->getType(),8),"bswap.5", IP);
198    Value* Tmp4 = BinaryOperator::CreateLShr(V,
199                              ConstantInt::get(V->getType(),8),"bswap.4", IP);
200    Value* Tmp3 = BinaryOperator::CreateLShr(V,
201                              ConstantInt::get(V->getType(),24),"bswap.3", IP);
202    Value* Tmp2 = BinaryOperator::CreateLShr(V,
203                              ConstantInt::get(V->getType(),40),"bswap.2", IP);
204    Value* Tmp1 = BinaryOperator::CreateLShr(V,
205                              ConstantInt::get(V->getType(),56),"bswap.1", IP);
206    Tmp7 = BinaryOperator::CreateAnd(Tmp7,
207                             ConstantInt::get(Type::Int64Ty,
208                               0xFF000000000000ULL),
209                             "bswap.and7", IP);
210    Tmp6 = BinaryOperator::CreateAnd(Tmp6,
211                             ConstantInt::get(Type::Int64Ty, 0xFF0000000000ULL),
212                             "bswap.and6", IP);
213    Tmp5 = BinaryOperator::CreateAnd(Tmp5,
214                             ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
215                             "bswap.and5", IP);
216    Tmp4 = BinaryOperator::CreateAnd(Tmp4,
217                             ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
218                             "bswap.and4", IP);
219    Tmp3 = BinaryOperator::CreateAnd(Tmp3,
220                             ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
221                             "bswap.and3", IP);
222    Tmp2 = BinaryOperator::CreateAnd(Tmp2,
223                             ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
224                             "bswap.and2", IP);
225    Tmp8 = BinaryOperator::CreateOr(Tmp8, Tmp7, "bswap.or1", IP);
226    Tmp6 = BinaryOperator::CreateOr(Tmp6, Tmp5, "bswap.or2", IP);
227    Tmp4 = BinaryOperator::CreateOr(Tmp4, Tmp3, "bswap.or3", IP);
228    Tmp2 = BinaryOperator::CreateOr(Tmp2, Tmp1, "bswap.or4", IP);
229    Tmp8 = BinaryOperator::CreateOr(Tmp8, Tmp6, "bswap.or5", IP);
230    Tmp4 = BinaryOperator::CreateOr(Tmp4, Tmp2, "bswap.or6", IP);
231    V = BinaryOperator::CreateOr(Tmp8, Tmp4, "bswap.i64", IP);
232    break;
233  }
234  }
235  return V;
236}
237
238/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
239/// instruction IP.
240static Value *LowerCTPOP(Value *V, Instruction *IP) {
241  assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
242
243  static const uint64_t MaskValues[6] = {
244    0x5555555555555555ULL, 0x3333333333333333ULL,
245    0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
246    0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
247  };
248
249  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
250  unsigned WordSize = (BitSize + 63) / 64;
251  Value *Count = ConstantInt::get(V->getType(), 0);
252
253  for (unsigned n = 0; n < WordSize; ++n) {
254    Value *PartValue = V;
255    for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
256         i <<= 1, ++ct) {
257      Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
258      Value *LHS = BinaryOperator::CreateAnd(
259                     PartValue, MaskCst, "cppop.and1", IP);
260      Value *VShift = BinaryOperator::CreateLShr(PartValue,
261                        ConstantInt::get(V->getType(), i), "ctpop.sh", IP);
262      Value *RHS = BinaryOperator::CreateAnd(VShift, MaskCst, "cppop.and2", IP);
263      PartValue = BinaryOperator::CreateAdd(LHS, RHS, "ctpop.step", IP);
264    }
265    Count = BinaryOperator::CreateAdd(PartValue, Count, "ctpop.part", IP);
266    if (BitSize > 64) {
267      V = BinaryOperator::CreateLShr(V, ConstantInt::get(V->getType(), 64),
268                                     "ctpop.part.sh", IP);
269      BitSize -= 64;
270    }
271  }
272
273  return Count;
274}
275
276/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
277/// instruction IP.
278static Value *LowerCTLZ(Value *V, Instruction *IP) {
279
280  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
281  for (unsigned i = 1; i < BitSize; i <<= 1) {
282    Value *ShVal = ConstantInt::get(V->getType(), i);
283    ShVal = BinaryOperator::CreateLShr(V, ShVal, "ctlz.sh", IP);
284    V = BinaryOperator::CreateOr(V, ShVal, "ctlz.step", IP);
285  }
286
287  V = BinaryOperator::CreateNot(V, "", IP);
288  return LowerCTPOP(V, IP);
289}
290
291/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes
292/// three integer arguments. The first argument is the Value from which the
293/// bits will be selected. It may be of any bit width. The second and third
294/// arguments specify a range of bits to select with the second argument
295/// specifying the low bit and the third argument specifying the high bit. Both
296/// must be type i32. The result is the corresponding selected bits from the
297/// Value in the same width as the Value (first argument). If the low bit index
298/// is higher than the high bit index then the inverse selection is done and
299/// the bits are returned in inverse order.
300/// @brief Lowering of llvm.part.select intrinsic.
301static Instruction *LowerPartSelect(CallInst *CI) {
302  // Make sure we're dealing with a part select intrinsic here
303  Function *F = CI->getCalledFunction();
304  const FunctionType *FT = F->getFunctionType();
305  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
306      FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
307      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
308    return CI;
309
310  // Get the intrinsic implementation function by converting all the . to _
311  // in the intrinsic's function name and then reconstructing the function
312  // declaration.
313  std::string Name(F->getName());
314  for (unsigned i = 4; i < Name.length(); ++i)
315    if (Name[i] == '.')
316      Name[i] = '_';
317  Module* M = F->getParent();
318  F = cast<Function>(M->getOrInsertFunction(Name, FT));
319  F->setLinkage(GlobalValue::WeakLinkage);
320
321  // If we haven't defined the impl function yet, do so now
322  if (F->isDeclaration()) {
323
324    // Get the arguments to the function
325    Function::arg_iterator args = F->arg_begin();
326    Value* Val = args++; Val->setName("Val");
327    Value* Lo = args++; Lo->setName("Lo");
328    Value* Hi = args++; Hi->setName("High");
329
330    // We want to select a range of bits here such that [Hi, Lo] is shifted
331    // down to the low bits. However, it is quite possible that Hi is smaller
332    // than Lo in which case the bits have to be reversed.
333
334    // Create the blocks we will need for the two cases (forward, reverse)
335    BasicBlock* CurBB   = BasicBlock::Create("entry", F);
336    BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent());
337    BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent());
338    BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent());
339    BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent());
340    BasicBlock *RsltBlk = BasicBlock::Create("result",  CurBB->getParent());
341
342    // Cast Hi and Lo to the size of Val so the widths are all the same
343    if (Hi->getType() != Val->getType())
344      Hi = CastInst::CreateIntegerCast(Hi, Val->getType(), false,
345                                         "tmp", CurBB);
346    if (Lo->getType() != Val->getType())
347      Lo = CastInst::CreateIntegerCast(Lo, Val->getType(), false,
348                                          "tmp", CurBB);
349
350    // Compute a few things that both cases will need, up front.
351    Constant* Zero = ConstantInt::get(Val->getType(), 0);
352    Constant* One = ConstantInt::get(Val->getType(), 1);
353    Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
354
355    // Compare the Hi and Lo bit positions. This is used to determine
356    // which case we have (forward or reverse)
357    ICmpInst *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, Hi, Lo, "less",CurBB);
358    BranchInst::Create(RevSize, FwdSize, Cmp, CurBB);
359
360    // First, copmute the number of bits in the forward case.
361    Instruction* FBitSize =
362      BinaryOperator::CreateSub(Hi, Lo,"fbits", FwdSize);
363    BranchInst::Create(Compute, FwdSize);
364
365    // Second, compute the number of bits in the reverse case.
366    Instruction* RBitSize =
367      BinaryOperator::CreateSub(Lo, Hi, "rbits", RevSize);
368    BranchInst::Create(Compute, RevSize);
369
370    // Now, compute the bit range. Start by getting the bitsize and the shift
371    // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for
372    // the number of bits we want in the range. We shift the bits down to the
373    // least significant bits, apply the mask to zero out unwanted high bits,
374    // and we have computed the "forward" result. It may still need to be
375    // reversed.
376
377    // Get the BitSize from one of the two subtractions
378    PHINode *BitSize = PHINode::Create(Val->getType(), "bits", Compute);
379    BitSize->reserveOperandSpace(2);
380    BitSize->addIncoming(FBitSize, FwdSize);
381    BitSize->addIncoming(RBitSize, RevSize);
382
383    // Get the ShiftAmount as the smaller of Hi/Lo
384    PHINode *ShiftAmt = PHINode::Create(Val->getType(), "shiftamt", Compute);
385    ShiftAmt->reserveOperandSpace(2);
386    ShiftAmt->addIncoming(Lo, FwdSize);
387    ShiftAmt->addIncoming(Hi, RevSize);
388
389    // Increment the bit size
390    Instruction *BitSizePlusOne =
391      BinaryOperator::CreateAdd(BitSize, One, "bits", Compute);
392
393    // Create a Mask to zero out the high order bits.
394    Instruction* Mask =
395      BinaryOperator::CreateShl(AllOnes, BitSizePlusOne, "mask", Compute);
396    Mask = BinaryOperator::CreateNot(Mask, "mask", Compute);
397
398    // Shift the bits down and apply the mask
399    Instruction* FRes =
400      BinaryOperator::CreateLShr(Val, ShiftAmt, "fres", Compute);
401    FRes = BinaryOperator::CreateAnd(FRes, Mask, "fres", Compute);
402    BranchInst::Create(Reverse, RsltBlk, Cmp, Compute);
403
404    // In the Reverse block we have the mask already in FRes but we must reverse
405    // it by shifting FRes bits right and putting them in RRes by shifting them
406    // in from left.
407
408    // First set up our loop counters
409    PHINode *Count = PHINode::Create(Val->getType(), "count", Reverse);
410    Count->reserveOperandSpace(2);
411    Count->addIncoming(BitSizePlusOne, Compute);
412
413    // Next, get the value that we are shifting.
414    PHINode *BitsToShift = PHINode::Create(Val->getType(), "val", Reverse);
415    BitsToShift->reserveOperandSpace(2);
416    BitsToShift->addIncoming(FRes, Compute);
417
418    // Finally, get the result of the last computation
419    PHINode *RRes = PHINode::Create(Val->getType(), "rres", Reverse);
420    RRes->reserveOperandSpace(2);
421    RRes->addIncoming(Zero, Compute);
422
423    // Decrement the counter
424    Instruction *Decr = BinaryOperator::CreateSub(Count, One, "decr", Reverse);
425    Count->addIncoming(Decr, Reverse);
426
427    // Compute the Bit that we want to move
428    Instruction *Bit =
429      BinaryOperator::CreateAnd(BitsToShift, One, "bit", Reverse);
430
431    // Compute the new value for next iteration.
432    Instruction *NewVal =
433      BinaryOperator::CreateLShr(BitsToShift, One, "rshift", Reverse);
434    BitsToShift->addIncoming(NewVal, Reverse);
435
436    // Shift the bit into the low bits of the result.
437    Instruction *NewRes =
438      BinaryOperator::CreateShl(RRes, One, "lshift", Reverse);
439    NewRes = BinaryOperator::CreateOr(NewRes, Bit, "addbit", Reverse);
440    RRes->addIncoming(NewRes, Reverse);
441
442    // Terminate loop if we've moved all the bits.
443    ICmpInst *Cond =
444      new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "cond", Reverse);
445    BranchInst::Create(RsltBlk, Reverse, Cond, Reverse);
446
447    // Finally, in the result block, select one of the two results with a PHI
448    // node and return the result;
449    CurBB = RsltBlk;
450    PHINode *BitSelect = PHINode::Create(Val->getType(), "part_select", CurBB);
451    BitSelect->reserveOperandSpace(2);
452    BitSelect->addIncoming(FRes, Compute);
453    BitSelect->addIncoming(NewRes, Reverse);
454    ReturnInst::Create(BitSelect, CurBB);
455  }
456
457  // Return a call to the implementation function
458  Value *Args[] = {
459    CI->getOperand(1),
460    CI->getOperand(2),
461    CI->getOperand(3)
462  };
463  return CallInst::Create(F, Args, array_endof(Args), CI->getName(), CI);
464}
465
466/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes
467/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
468/// The first two arguments can be any bit width. The result is the same width
469/// as %Value. The operation replaces bits between %Low and %High with the value
470/// in %Replacement. If %Replacement is not the same width, it is truncated or
471/// zero extended as appropriate to fit the bits being replaced. If %Low is
472/// greater than %High then the inverse set of bits are replaced.
473/// @brief Lowering of llvm.bit.part.set intrinsic.
474static Instruction *LowerPartSet(CallInst *CI) {
475  // Make sure we're dealing with a part select intrinsic here
476  Function *F = CI->getCalledFunction();
477  const FunctionType *FT = F->getFunctionType();
478  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
479      FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
480      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
481      !FT->getParamType(3)->isInteger())
482    return CI;
483
484  // Get the intrinsic implementation function by converting all the . to _
485  // in the intrinsic's function name and then reconstructing the function
486  // declaration.
487  std::string Name(F->getName());
488  for (unsigned i = 4; i < Name.length(); ++i)
489    if (Name[i] == '.')
490      Name[i] = '_';
491  Module* M = F->getParent();
492  F = cast<Function>(M->getOrInsertFunction(Name, FT));
493  F->setLinkage(GlobalValue::WeakLinkage);
494
495  // If we haven't defined the impl function yet, do so now
496  if (F->isDeclaration()) {
497    // Get the arguments for the function.
498    Function::arg_iterator args = F->arg_begin();
499    Value* Val = args++; Val->setName("Val");
500    Value* Rep = args++; Rep->setName("Rep");
501    Value* Lo  = args++; Lo->setName("Lo");
502    Value* Hi  = args++; Hi->setName("Hi");
503
504    // Get some types we need
505    const IntegerType* ValTy = cast<IntegerType>(Val->getType());
506    const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
507    uint32_t ValBits = ValTy->getBitWidth();
508    uint32_t RepBits = RepTy->getBitWidth();
509
510    // Constant Definitions
511    ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
512    ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
513    ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
514    ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
515    ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
516    ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
517    ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
518
519    // Basic blocks we fill in below.
520    BasicBlock* entry = BasicBlock::Create("entry", F, 0);
521    BasicBlock* large = BasicBlock::Create("large", F, 0);
522    BasicBlock* small = BasicBlock::Create("small", F, 0);
523    BasicBlock* reverse = BasicBlock::Create("reverse", F, 0);
524    BasicBlock* result = BasicBlock::Create("result", F, 0);
525
526    // BASIC BLOCK: entry
527    // First, get the number of bits that we're placing as an i32
528    ICmpInst* is_forward =
529      new ICmpInst(ICmpInst::ICMP_ULT, Lo, Hi, "", entry);
530    SelectInst* Hi_pn = SelectInst::Create(is_forward, Hi, Lo, "", entry);
531    SelectInst* Lo_pn = SelectInst::Create(is_forward, Lo, Hi, "", entry);
532    BinaryOperator* NumBits = BinaryOperator::CreateSub(Hi_pn, Lo_pn, "",entry);
533    NumBits = BinaryOperator::CreateAdd(NumBits, One, "", entry);
534    // Now, convert Lo and Hi to ValTy bit width
535    if (ValBits > 32) {
536      Lo = new ZExtInst(Lo_pn, ValTy, "", entry);
537    } else if (ValBits < 32) {
538      Lo = new TruncInst(Lo_pn, ValTy, "", entry);
539    }
540    // Determine if the replacement bits are larger than the number of bits we
541    // are replacing and deal with it.
542    ICmpInst* is_large =
543      new ICmpInst(ICmpInst::ICMP_ULT, NumBits, RepBitWidth, "", entry);
544    BranchInst::Create(large, small, is_large, entry);
545
546    // BASIC BLOCK: large
547    Instruction* MaskBits =
548      BinaryOperator::CreateSub(RepBitWidth, NumBits, "", large);
549    MaskBits = CastInst::CreateIntegerCast(MaskBits, RepMask->getType(),
550                                           false, "", large);
551    BinaryOperator* Mask1 =
552      BinaryOperator::CreateLShr(RepMask, MaskBits, "", large);
553    BinaryOperator* Rep2 = BinaryOperator::CreateAnd(Mask1, Rep, "", large);
554    BranchInst::Create(small, large);
555
556    // BASIC BLOCK: small
557    PHINode* Rep3 = PHINode::Create(RepTy, "", small);
558    Rep3->reserveOperandSpace(2);
559    Rep3->addIncoming(Rep2, large);
560    Rep3->addIncoming(Rep, entry);
561    Value* Rep4 = Rep3;
562    if (ValBits > RepBits)
563      Rep4 = new ZExtInst(Rep3, ValTy, "", small);
564    else if (ValBits < RepBits)
565      Rep4 = new TruncInst(Rep3, ValTy, "", small);
566    BranchInst::Create(result, reverse, is_forward, small);
567
568    // BASIC BLOCK: reverse (reverses the bits of the replacement)
569    // Set up our loop counter as a PHI so we can decrement on each iteration.
570    // We will loop for the number of bits in the replacement value.
571    PHINode *Count = PHINode::Create(Type::Int32Ty, "count", reverse);
572    Count->reserveOperandSpace(2);
573    Count->addIncoming(NumBits, small);
574
575    // Get the value that we are shifting bits out of as a PHI because
576    // we'll change this with each iteration.
577    PHINode *BitsToShift = PHINode::Create(Val->getType(), "val", reverse);
578    BitsToShift->reserveOperandSpace(2);
579    BitsToShift->addIncoming(Rep4, small);
580
581    // Get the result of the last computation or zero on first iteration
582    PHINode *RRes = PHINode::Create(Val->getType(), "rres", reverse);
583    RRes->reserveOperandSpace(2);
584    RRes->addIncoming(ValZero, small);
585
586    // Decrement the loop counter by one
587    Instruction *Decr = BinaryOperator::CreateSub(Count, One, "", reverse);
588    Count->addIncoming(Decr, reverse);
589
590    // Get the bit that we want to move into the result
591    Value *Bit = BinaryOperator::CreateAnd(BitsToShift, ValOne, "", reverse);
592
593    // Compute the new value of the bits to shift for the next iteration.
594    Value *NewVal = BinaryOperator::CreateLShr(BitsToShift, ValOne,"", reverse);
595    BitsToShift->addIncoming(NewVal, reverse);
596
597    // Shift the bit we extracted into the low bit of the result.
598    Instruction *NewRes = BinaryOperator::CreateShl(RRes, ValOne, "", reverse);
599    NewRes = BinaryOperator::CreateOr(NewRes, Bit, "", reverse);
600    RRes->addIncoming(NewRes, reverse);
601
602    // Terminate loop if we've moved all the bits.
603    ICmpInst *Cond = new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "", reverse);
604    BranchInst::Create(result, reverse, Cond, reverse);
605
606    // BASIC BLOCK: result
607    PHINode *Rplcmnt = PHINode::Create(Val->getType(), "", result);
608    Rplcmnt->reserveOperandSpace(2);
609    Rplcmnt->addIncoming(NewRes, reverse);
610    Rplcmnt->addIncoming(Rep4, small);
611    Value* t0   = CastInst::CreateIntegerCast(NumBits,ValTy,false,"",result);
612    Value* t1   = BinaryOperator::CreateShl(ValMask, Lo, "", result);
613    Value* t2   = BinaryOperator::CreateNot(t1, "", result);
614    Value* t3   = BinaryOperator::CreateShl(t1, t0, "", result);
615    Value* t4   = BinaryOperator::CreateOr(t2, t3, "", result);
616    Value* t5   = BinaryOperator::CreateAnd(t4, Val, "", result);
617    Value* t6   = BinaryOperator::CreateShl(Rplcmnt, Lo, "", result);
618    Value* Rslt = BinaryOperator::CreateOr(t5, t6, "part_set", result);
619    ReturnInst::Create(Rslt, result);
620  }
621
622  // Return a call to the implementation function
623  Value *Args[] = {
624    CI->getOperand(1),
625    CI->getOperand(2),
626    CI->getOperand(3),
627    CI->getOperand(4)
628  };
629  return CallInst::Create(F, Args, array_endof(Args), CI->getName(), CI);
630}
631
632static void ReplaceFPIntrinsicWithCall(CallInst *CI, Constant *FCache,
633                                       Constant *DCache, Constant *LDCache,
634                                       const char *Fname, const char *Dname,
635                                       const char *LDname) {
636  switch (CI->getOperand(1)->getType()->getTypeID()) {
637  default: assert(0 && "Invalid type in intrinsic"); abort();
638  case Type::FloatTyID:
639    ReplaceCallWith(Fname, CI, CI->op_begin()+1, CI->op_end(),
640                  Type::FloatTy, FCache);
641    break;
642  case Type::DoubleTyID:
643    ReplaceCallWith(Dname, CI, CI->op_begin()+1, CI->op_end(),
644                  Type::DoubleTy, DCache);
645    break;
646  case Type::X86_FP80TyID:
647  case Type::FP128TyID:
648  case Type::PPC_FP128TyID:
649    ReplaceCallWith(LDname, CI, CI->op_begin()+1, CI->op_end(),
650                  CI->getOperand(1)->getType(), LDCache);
651    break;
652  }
653}
654
655void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
656  Function *Callee = CI->getCalledFunction();
657  assert(Callee && "Cannot lower an indirect call!");
658
659  switch (Callee->getIntrinsicID()) {
660  case Intrinsic::not_intrinsic:
661    cerr << "Cannot lower a call to a non-intrinsic function '"
662         << Callee->getName() << "'!\n";
663    abort();
664  default:
665    cerr << "Error: Code generator does not support intrinsic function '"
666         << Callee->getName() << "'!\n";
667    abort();
668
669    // The setjmp/longjmp intrinsics should only exist in the code if it was
670    // never optimized (ie, right out of the CFE), or if it has been hacked on
671    // by the lowerinvoke pass.  In both cases, the right thing to do is to
672    // convert the call to an explicit setjmp or longjmp call.
673  case Intrinsic::setjmp: {
674    static Constant *SetjmpFCache = 0;
675    Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin()+1, CI->op_end(),
676                               Type::Int32Ty, SetjmpFCache);
677    if (CI->getType() != Type::VoidTy)
678      CI->replaceAllUsesWith(V);
679    break;
680  }
681  case Intrinsic::sigsetjmp:
682     if (CI->getType() != Type::VoidTy)
683       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
684     break;
685
686  case Intrinsic::longjmp: {
687    static Constant *LongjmpFCache = 0;
688    ReplaceCallWith("longjmp", CI, CI->op_begin()+1, CI->op_end(),
689                    Type::VoidTy, LongjmpFCache);
690    break;
691  }
692
693  case Intrinsic::siglongjmp: {
694    // Insert the call to abort
695    static Constant *AbortFCache = 0;
696    ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
697                    Type::VoidTy, AbortFCache);
698    break;
699  }
700  case Intrinsic::ctpop:
701    CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
702    break;
703
704  case Intrinsic::bswap:
705    CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
706    break;
707
708  case Intrinsic::ctlz:
709    CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
710    break;
711
712  case Intrinsic::cttz: {
713    // cttz(x) -> ctpop(~X & (X-1))
714    Value *Src = CI->getOperand(1);
715    Value *NotSrc = BinaryOperator::CreateNot(Src, Src->getName()+".not", CI);
716    Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
717    SrcM1 = BinaryOperator::CreateSub(Src, SrcM1, "", CI);
718    Src = LowerCTPOP(BinaryOperator::CreateAnd(NotSrc, SrcM1, "", CI), CI);
719    CI->replaceAllUsesWith(Src);
720    break;
721  }
722
723  case Intrinsic::part_select:
724    CI->replaceAllUsesWith(LowerPartSelect(CI));
725    break;
726
727  case Intrinsic::part_set:
728    CI->replaceAllUsesWith(LowerPartSet(CI));
729    break;
730
731  case Intrinsic::stacksave:
732  case Intrinsic::stackrestore: {
733    static bool Warned = false;
734    if (!Warned)
735      cerr << "WARNING: this target does not support the llvm.stack"
736           << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
737               "save" : "restore") << " intrinsic.\n";
738    Warned = true;
739    if (Callee->getIntrinsicID() == Intrinsic::stacksave)
740      CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
741    break;
742  }
743
744  case Intrinsic::returnaddress:
745  case Intrinsic::frameaddress:
746    cerr << "WARNING: this target does not support the llvm."
747         << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
748             "return" : "frame") << "address intrinsic.\n";
749    CI->replaceAllUsesWith(ConstantPointerNull::get(
750                                            cast<PointerType>(CI->getType())));
751    break;
752
753  case Intrinsic::prefetch:
754    break;    // Simply strip out prefetches on unsupported architectures
755
756  case Intrinsic::pcmarker:
757    break;    // Simply strip out pcmarker on unsupported architectures
758  case Intrinsic::readcyclecounter: {
759    cerr << "WARNING: this target does not support the llvm.readcyclecoun"
760         << "ter intrinsic.  It is being lowered to a constant 0\n";
761    CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
762    break;
763  }
764
765  case Intrinsic::dbg_stoppoint:
766  case Intrinsic::dbg_region_start:
767  case Intrinsic::dbg_region_end:
768  case Intrinsic::dbg_func_start:
769  case Intrinsic::dbg_declare:
770    break;    // Simply strip out debugging intrinsics
771
772  case Intrinsic::eh_exception:
773  case Intrinsic::eh_selector_i32:
774  case Intrinsic::eh_selector_i64:
775    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
776    break;
777
778  case Intrinsic::eh_typeid_for_i32:
779  case Intrinsic::eh_typeid_for_i64:
780    // Return something different to eh_selector.
781    CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
782    break;
783
784  case Intrinsic::var_annotation:
785    break;   // Strip out annotate intrinsic
786
787  case Intrinsic::memcpy_i32:
788  case Intrinsic::memcpy_i64: {
789    static Constant *MemcpyFCache = 0;
790    Value *Size = CI->getOperand(3);
791    const Type *IntPtr = TD.getIntPtrType();
792    if (Size->getType()->getPrimitiveSizeInBits() <
793        IntPtr->getPrimitiveSizeInBits())
794      Size = new ZExtInst(Size, IntPtr, "", CI);
795    else if (Size->getType()->getPrimitiveSizeInBits() >
796             IntPtr->getPrimitiveSizeInBits())
797      Size = new TruncInst(Size, IntPtr, "", CI);
798    Value *Ops[3];
799    Ops[0] = CI->getOperand(1);
800    Ops[1] = CI->getOperand(2);
801    Ops[2] = Size;
802    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
803                    MemcpyFCache);
804    break;
805  }
806  case Intrinsic::memmove_i32:
807  case Intrinsic::memmove_i64: {
808    static Constant *MemmoveFCache = 0;
809    Value *Size = CI->getOperand(3);
810    const Type *IntPtr = TD.getIntPtrType();
811    if (Size->getType()->getPrimitiveSizeInBits() <
812        IntPtr->getPrimitiveSizeInBits())
813      Size = new ZExtInst(Size, IntPtr, "", CI);
814    else if (Size->getType()->getPrimitiveSizeInBits() >
815             IntPtr->getPrimitiveSizeInBits())
816      Size = new TruncInst(Size, IntPtr, "", CI);
817    Value *Ops[3];
818    Ops[0] = CI->getOperand(1);
819    Ops[1] = CI->getOperand(2);
820    Ops[2] = Size;
821    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
822                    MemmoveFCache);
823    break;
824  }
825  case Intrinsic::memset_i32:
826  case Intrinsic::memset_i64: {
827    static Constant *MemsetFCache = 0;
828    Value *Size = CI->getOperand(3);
829    const Type *IntPtr = TD.getIntPtrType();
830    if (Size->getType()->getPrimitiveSizeInBits() <
831        IntPtr->getPrimitiveSizeInBits())
832      Size = new ZExtInst(Size, IntPtr, "", CI);
833    else if (Size->getType()->getPrimitiveSizeInBits() >
834             IntPtr->getPrimitiveSizeInBits())
835      Size = new TruncInst(Size, IntPtr, "", CI);
836    Value *Ops[3];
837    Ops[0] = CI->getOperand(1);
838    // Extend the amount to i32.
839    Ops[1] = new ZExtInst(CI->getOperand(2), Type::Int32Ty, "", CI);
840    Ops[2] = Size;
841    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
842                    MemsetFCache);
843    break;
844  }
845  case Intrinsic::sqrt: {
846    static Constant *sqrtFCache = 0;
847    static Constant *sqrtDCache = 0;
848    static Constant *sqrtLDCache = 0;
849    ReplaceFPIntrinsicWithCall(CI, sqrtFCache, sqrtDCache, sqrtLDCache,
850                               "sqrtf", "sqrt", "sqrtl");
851    break;
852  }
853  case Intrinsic::log: {
854    static Constant *logFCache = 0;
855    static Constant *logDCache = 0;
856    static Constant *logLDCache = 0;
857    ReplaceFPIntrinsicWithCall(CI, logFCache, logDCache, logLDCache,
858                               "logf", "log", "logl");
859    break;
860  }
861  case Intrinsic::log2: {
862    static Constant *log2FCache = 0;
863    static Constant *log2DCache = 0;
864    static Constant *log2LDCache = 0;
865    ReplaceFPIntrinsicWithCall(CI, log2FCache, log2DCache, log2LDCache,
866                               "log2f", "log2", "log2l");
867    break;
868  }
869  case Intrinsic::log10: {
870    static Constant *log10FCache = 0;
871    static Constant *log10DCache = 0;
872    static Constant *log10LDCache = 0;
873    ReplaceFPIntrinsicWithCall(CI, log10FCache, log10DCache, log10LDCache,
874                               "log10f", "log10", "log10l");
875    break;
876  }
877  case Intrinsic::exp: {
878    static Constant *expFCache = 0;
879    static Constant *expDCache = 0;
880    static Constant *expLDCache = 0;
881    ReplaceFPIntrinsicWithCall(CI, expFCache, expDCache, expLDCache,
882                               "expf", "exp", "expl");
883    break;
884  }
885  case Intrinsic::exp2: {
886    static Constant *exp2FCache = 0;
887    static Constant *exp2DCache = 0;
888    static Constant *exp2LDCache = 0;
889    ReplaceFPIntrinsicWithCall(CI, exp2FCache, exp2DCache, exp2LDCache,
890                               "exp2f", "exp2", "exp2l");
891    break;
892  }
893  case Intrinsic::pow: {
894    static Constant *powFCache = 0;
895    static Constant *powDCache = 0;
896    static Constant *powLDCache = 0;
897    ReplaceFPIntrinsicWithCall(CI, powFCache, powDCache, powLDCache,
898                               "powf", "pow", "powl");
899    break;
900  }
901  case Intrinsic::flt_rounds:
902     // Lower to "round to the nearest"
903     if (CI->getType() != Type::VoidTy)
904       CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
905     break;
906  }
907
908  assert(CI->use_empty() &&
909         "Lowering should have eliminated any uses of the intrinsic call!");
910  CI->eraseFromParent();
911}
912