IntrinsicLowering.cpp revision 556b4a6385d34b77e58ff5a3ce51ddae5ae6112c
1//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IntrinsicLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Constants.h"
15#include "llvm/DerivedTypes.h"
16#include "llvm/Module.h"
17#include "llvm/Instructions.h"
18#include "llvm/Type.h"
19#include "llvm/CodeGen/IntrinsicLowering.h"
20#include "llvm/Support/Streams.h"
21#include "llvm/Target/TargetData.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/STLExtras.h"
24using namespace llvm;
25
26template <class ArgIt>
27static void EnsureFunctionExists(Module &M, const char *Name,
28                                 ArgIt ArgBegin, ArgIt ArgEnd,
29                                 const Type *RetTy) {
30  // Insert a correctly-typed definition now.
31  std::vector<const Type *> ParamTys;
32  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
33    ParamTys.push_back(I->getType());
34  M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
35}
36
37static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
38                                    const char *FName,
39                                    const char *DName, const char *LDName) {
40  // Insert definitions for all the floating point types.
41  switch((int)Fn->arg_begin()->getType()->getTypeID()) {
42  case Type::FloatTyID:
43    EnsureFunctionExists(M, FName, Fn->arg_begin(), Fn->arg_end(),
44                         Type::FloatTy);
45    break;
46  case Type::DoubleTyID:
47    EnsureFunctionExists(M, DName, Fn->arg_begin(), Fn->arg_end(),
48                         Type::DoubleTy);
49    break;
50  case Type::X86_FP80TyID:
51  case Type::FP128TyID:
52  case Type::PPC_FP128TyID:
53    EnsureFunctionExists(M, LDName, Fn->arg_begin(), Fn->arg_end(),
54                         Fn->arg_begin()->getType());
55    break;
56  }
57}
58
59/// ReplaceCallWith - This function is used when we want to lower an intrinsic
60/// call to a call of an external function.  This handles hard cases such as
61/// when there was already a prototype for the external function, and if that
62/// prototype doesn't match the arguments we expect to pass in.
63template <class ArgIt>
64static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
65                                 ArgIt ArgBegin, ArgIt ArgEnd,
66                                 const Type *RetTy, Constant *&FCache) {
67  if (!FCache) {
68    // If we haven't already looked up this function, check to see if the
69    // program already contains a function with this name.
70    Module *M = CI->getParent()->getParent()->getParent();
71    // Get or insert the definition now.
72    std::vector<const Type *> ParamTys;
73    for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
74      ParamTys.push_back((*I)->getType());
75    FCache = M->getOrInsertFunction(NewFn,
76                                    FunctionType::get(RetTy, ParamTys, false));
77  }
78
79  SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
80  CallInst *NewCI = CallInst::Create(FCache, Args.begin(), Args.end(),
81                                     CI->getName(), CI);
82  if (!CI->use_empty())
83    CI->replaceAllUsesWith(NewCI);
84  return NewCI;
85}
86
87void IntrinsicLowering::AddPrototypes(Module &M) {
88  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
89    if (I->isDeclaration() && !I->use_empty())
90      switch (I->getIntrinsicID()) {
91      default: break;
92      case Intrinsic::setjmp:
93        EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
94                             Type::Int32Ty);
95        break;
96      case Intrinsic::longjmp:
97        EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
98                             Type::VoidTy);
99        break;
100      case Intrinsic::siglongjmp:
101        EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
102                             Type::VoidTy);
103        break;
104      case Intrinsic::memcpy:
105        M.getOrInsertFunction("memcpy", PointerType::getUnqual(Type::Int8Ty),
106                              PointerType::getUnqual(Type::Int8Ty),
107                              PointerType::getUnqual(Type::Int8Ty),
108                              TD.getIntPtrType(), (Type *)0);
109        break;
110      case Intrinsic::memmove:
111        M.getOrInsertFunction("memmove", PointerType::getUnqual(Type::Int8Ty),
112                              PointerType::getUnqual(Type::Int8Ty),
113                              PointerType::getUnqual(Type::Int8Ty),
114                              TD.getIntPtrType(), (Type *)0);
115        break;
116      case Intrinsic::memset:
117        M.getOrInsertFunction("memset", PointerType::getUnqual(Type::Int8Ty),
118                              PointerType::getUnqual(Type::Int8Ty),
119                              Type::Int32Ty,
120                              TD.getIntPtrType(), (Type *)0);
121        break;
122      case Intrinsic::sqrt:
123        EnsureFPIntrinsicsExist(M, I, "sqrtf", "sqrt", "sqrtl");
124        break;
125      case Intrinsic::sin:
126        EnsureFPIntrinsicsExist(M, I, "sinf", "sin", "sinl");
127        break;
128      case Intrinsic::cos:
129        EnsureFPIntrinsicsExist(M, I, "cosf", "cos", "cosl");
130        break;
131      case Intrinsic::pow:
132        EnsureFPIntrinsicsExist(M, I, "powf", "pow", "powl");
133        break;
134      case Intrinsic::log:
135        EnsureFPIntrinsicsExist(M, I, "logf", "log", "logl");
136        break;
137      case Intrinsic::log2:
138        EnsureFPIntrinsicsExist(M, I, "log2f", "log2", "log2l");
139        break;
140      case Intrinsic::log10:
141        EnsureFPIntrinsicsExist(M, I, "log10f", "log10", "log10l");
142        break;
143      case Intrinsic::exp:
144        EnsureFPIntrinsicsExist(M, I, "expf", "exp", "expl");
145        break;
146      case Intrinsic::exp2:
147        EnsureFPIntrinsicsExist(M, I, "exp2f", "exp2", "exp2l");
148        break;
149      }
150}
151
152/// LowerBSWAP - Emit the code to lower bswap of V before the specified
153/// instruction IP.
154static Value *LowerBSWAP(Value *V, Instruction *IP) {
155  assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
156
157  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
158
159  switch(BitSize) {
160  default: assert(0 && "Unhandled type size of value to byteswap!");
161  case 16: {
162    Value *Tmp1 = BinaryOperator::CreateShl(V,
163                                ConstantInt::get(V->getType(),8),"bswap.2",IP);
164    Value *Tmp2 = BinaryOperator::CreateLShr(V,
165                                ConstantInt::get(V->getType(),8),"bswap.1",IP);
166    V = BinaryOperator::CreateOr(Tmp1, Tmp2, "bswap.i16", IP);
167    break;
168  }
169  case 32: {
170    Value *Tmp4 = BinaryOperator::CreateShl(V,
171                              ConstantInt::get(V->getType(),24),"bswap.4", IP);
172    Value *Tmp3 = BinaryOperator::CreateShl(V,
173                              ConstantInt::get(V->getType(),8),"bswap.3",IP);
174    Value *Tmp2 = BinaryOperator::CreateLShr(V,
175                              ConstantInt::get(V->getType(),8),"bswap.2",IP);
176    Value *Tmp1 = BinaryOperator::CreateLShr(V,
177                              ConstantInt::get(V->getType(),24),"bswap.1", IP);
178    Tmp3 = BinaryOperator::CreateAnd(Tmp3,
179                                     ConstantInt::get(Type::Int32Ty, 0xFF0000),
180                                     "bswap.and3", IP);
181    Tmp2 = BinaryOperator::CreateAnd(Tmp2,
182                                     ConstantInt::get(Type::Int32Ty, 0xFF00),
183                                     "bswap.and2", IP);
184    Tmp4 = BinaryOperator::CreateOr(Tmp4, Tmp3, "bswap.or1", IP);
185    Tmp2 = BinaryOperator::CreateOr(Tmp2, Tmp1, "bswap.or2", IP);
186    V = BinaryOperator::CreateOr(Tmp4, Tmp2, "bswap.i32", IP);
187    break;
188  }
189  case 64: {
190    Value *Tmp8 = BinaryOperator::CreateShl(V,
191                              ConstantInt::get(V->getType(),56),"bswap.8", IP);
192    Value *Tmp7 = BinaryOperator::CreateShl(V,
193                              ConstantInt::get(V->getType(),40),"bswap.7", IP);
194    Value *Tmp6 = BinaryOperator::CreateShl(V,
195                              ConstantInt::get(V->getType(),24),"bswap.6", IP);
196    Value *Tmp5 = BinaryOperator::CreateShl(V,
197                              ConstantInt::get(V->getType(),8),"bswap.5", IP);
198    Value* Tmp4 = BinaryOperator::CreateLShr(V,
199                              ConstantInt::get(V->getType(),8),"bswap.4", IP);
200    Value* Tmp3 = BinaryOperator::CreateLShr(V,
201                              ConstantInt::get(V->getType(),24),"bswap.3", IP);
202    Value* Tmp2 = BinaryOperator::CreateLShr(V,
203                              ConstantInt::get(V->getType(),40),"bswap.2", IP);
204    Value* Tmp1 = BinaryOperator::CreateLShr(V,
205                              ConstantInt::get(V->getType(),56),"bswap.1", IP);
206    Tmp7 = BinaryOperator::CreateAnd(Tmp7,
207                             ConstantInt::get(Type::Int64Ty,
208                               0xFF000000000000ULL),
209                             "bswap.and7", IP);
210    Tmp6 = BinaryOperator::CreateAnd(Tmp6,
211                             ConstantInt::get(Type::Int64Ty, 0xFF0000000000ULL),
212                             "bswap.and6", IP);
213    Tmp5 = BinaryOperator::CreateAnd(Tmp5,
214                             ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
215                             "bswap.and5", IP);
216    Tmp4 = BinaryOperator::CreateAnd(Tmp4,
217                             ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
218                             "bswap.and4", IP);
219    Tmp3 = BinaryOperator::CreateAnd(Tmp3,
220                             ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
221                             "bswap.and3", IP);
222    Tmp2 = BinaryOperator::CreateAnd(Tmp2,
223                             ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
224                             "bswap.and2", IP);
225    Tmp8 = BinaryOperator::CreateOr(Tmp8, Tmp7, "bswap.or1", IP);
226    Tmp6 = BinaryOperator::CreateOr(Tmp6, Tmp5, "bswap.or2", IP);
227    Tmp4 = BinaryOperator::CreateOr(Tmp4, Tmp3, "bswap.or3", IP);
228    Tmp2 = BinaryOperator::CreateOr(Tmp2, Tmp1, "bswap.or4", IP);
229    Tmp8 = BinaryOperator::CreateOr(Tmp8, Tmp6, "bswap.or5", IP);
230    Tmp4 = BinaryOperator::CreateOr(Tmp4, Tmp2, "bswap.or6", IP);
231    V = BinaryOperator::CreateOr(Tmp8, Tmp4, "bswap.i64", IP);
232    break;
233  }
234  }
235  return V;
236}
237
238/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
239/// instruction IP.
240static Value *LowerCTPOP(Value *V, Instruction *IP) {
241  assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
242
243  static const uint64_t MaskValues[6] = {
244    0x5555555555555555ULL, 0x3333333333333333ULL,
245    0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
246    0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
247  };
248
249  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
250  unsigned WordSize = (BitSize + 63) / 64;
251  Value *Count = ConstantInt::get(V->getType(), 0);
252
253  for (unsigned n = 0; n < WordSize; ++n) {
254    Value *PartValue = V;
255    for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
256         i <<= 1, ++ct) {
257      Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
258      Value *LHS = BinaryOperator::CreateAnd(
259                     PartValue, MaskCst, "cppop.and1", IP);
260      Value *VShift = BinaryOperator::CreateLShr(PartValue,
261                        ConstantInt::get(V->getType(), i), "ctpop.sh", IP);
262      Value *RHS = BinaryOperator::CreateAnd(VShift, MaskCst, "cppop.and2", IP);
263      PartValue = BinaryOperator::CreateAdd(LHS, RHS, "ctpop.step", IP);
264    }
265    Count = BinaryOperator::CreateAdd(PartValue, Count, "ctpop.part", IP);
266    if (BitSize > 64) {
267      V = BinaryOperator::CreateLShr(V, ConstantInt::get(V->getType(), 64),
268                                     "ctpop.part.sh", IP);
269      BitSize -= 64;
270    }
271  }
272
273  return Count;
274}
275
276/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
277/// instruction IP.
278static Value *LowerCTLZ(Value *V, Instruction *IP) {
279
280  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
281  for (unsigned i = 1; i < BitSize; i <<= 1) {
282    Value *ShVal = ConstantInt::get(V->getType(), i);
283    ShVal = BinaryOperator::CreateLShr(V, ShVal, "ctlz.sh", IP);
284    V = BinaryOperator::CreateOr(V, ShVal, "ctlz.step", IP);
285  }
286
287  V = BinaryOperator::CreateNot(V, "", IP);
288  return LowerCTPOP(V, IP);
289}
290
291/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes
292/// three integer arguments. The first argument is the Value from which the
293/// bits will be selected. It may be of any bit width. The second and third
294/// arguments specify a range of bits to select with the second argument
295/// specifying the low bit and the third argument specifying the high bit. Both
296/// must be type i32. The result is the corresponding selected bits from the
297/// Value in the same width as the Value (first argument). If the low bit index
298/// is higher than the high bit index then the inverse selection is done and
299/// the bits are returned in inverse order.
300/// @brief Lowering of llvm.part.select intrinsic.
301static Instruction *LowerPartSelect(CallInst *CI) {
302  // Make sure we're dealing with a part select intrinsic here
303  Function *F = CI->getCalledFunction();
304  const FunctionType *FT = F->getFunctionType();
305  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
306      FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
307      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
308    return CI;
309
310  // Get the intrinsic implementation function by converting all the . to _
311  // in the intrinsic's function name and then reconstructing the function
312  // declaration.
313  std::string Name(F->getName());
314  for (unsigned i = 4; i < Name.length(); ++i)
315    if (Name[i] == '.')
316      Name[i] = '_';
317  Module* M = F->getParent();
318  F = cast<Function>(M->getOrInsertFunction(Name, FT));
319  F->setLinkage(GlobalValue::WeakLinkage);
320
321  // If we haven't defined the impl function yet, do so now
322  if (F->isDeclaration()) {
323
324    // Get the arguments to the function
325    Function::arg_iterator args = F->arg_begin();
326    Value* Val = args++; Val->setName("Val");
327    Value* Lo = args++; Lo->setName("Lo");
328    Value* Hi = args++; Hi->setName("High");
329
330    // We want to select a range of bits here such that [Hi, Lo] is shifted
331    // down to the low bits. However, it is quite possible that Hi is smaller
332    // than Lo in which case the bits have to be reversed.
333
334    // Create the blocks we will need for the two cases (forward, reverse)
335    BasicBlock* CurBB   = BasicBlock::Create("entry", F);
336    BasicBlock *RevSize = BasicBlock::Create("revsize", CurBB->getParent());
337    BasicBlock *FwdSize = BasicBlock::Create("fwdsize", CurBB->getParent());
338    BasicBlock *Compute = BasicBlock::Create("compute", CurBB->getParent());
339    BasicBlock *Reverse = BasicBlock::Create("reverse", CurBB->getParent());
340    BasicBlock *RsltBlk = BasicBlock::Create("result",  CurBB->getParent());
341
342    // Cast Hi and Lo to the size of Val so the widths are all the same
343    if (Hi->getType() != Val->getType())
344      Hi = CastInst::CreateIntegerCast(Hi, Val->getType(), false,
345                                         "tmp", CurBB);
346    if (Lo->getType() != Val->getType())
347      Lo = CastInst::CreateIntegerCast(Lo, Val->getType(), false,
348                                          "tmp", CurBB);
349
350    // Compute a few things that both cases will need, up front.
351    Constant* Zero = ConstantInt::get(Val->getType(), 0);
352    Constant* One = ConstantInt::get(Val->getType(), 1);
353    Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
354
355    // Compare the Hi and Lo bit positions. This is used to determine
356    // which case we have (forward or reverse)
357    ICmpInst *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, Hi, Lo, "less",CurBB);
358    BranchInst::Create(RevSize, FwdSize, Cmp, CurBB);
359
360    // First, copmute the number of bits in the forward case.
361    Instruction* FBitSize =
362      BinaryOperator::CreateSub(Hi, Lo,"fbits", FwdSize);
363    BranchInst::Create(Compute, FwdSize);
364
365    // Second, compute the number of bits in the reverse case.
366    Instruction* RBitSize =
367      BinaryOperator::CreateSub(Lo, Hi, "rbits", RevSize);
368    BranchInst::Create(Compute, RevSize);
369
370    // Now, compute the bit range. Start by getting the bitsize and the shift
371    // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for
372    // the number of bits we want in the range. We shift the bits down to the
373    // least significant bits, apply the mask to zero out unwanted high bits,
374    // and we have computed the "forward" result. It may still need to be
375    // reversed.
376
377    // Get the BitSize from one of the two subtractions
378    PHINode *BitSize = PHINode::Create(Val->getType(), "bits", Compute);
379    BitSize->reserveOperandSpace(2);
380    BitSize->addIncoming(FBitSize, FwdSize);
381    BitSize->addIncoming(RBitSize, RevSize);
382
383    // Get the ShiftAmount as the smaller of Hi/Lo
384    PHINode *ShiftAmt = PHINode::Create(Val->getType(), "shiftamt", Compute);
385    ShiftAmt->reserveOperandSpace(2);
386    ShiftAmt->addIncoming(Lo, FwdSize);
387    ShiftAmt->addIncoming(Hi, RevSize);
388
389    // Increment the bit size
390    Instruction *BitSizePlusOne =
391      BinaryOperator::CreateAdd(BitSize, One, "bits", Compute);
392
393    // Create a Mask to zero out the high order bits.
394    Instruction* Mask =
395      BinaryOperator::CreateShl(AllOnes, BitSizePlusOne, "mask", Compute);
396    Mask = BinaryOperator::CreateNot(Mask, "mask", Compute);
397
398    // Shift the bits down and apply the mask
399    Instruction* FRes =
400      BinaryOperator::CreateLShr(Val, ShiftAmt, "fres", Compute);
401    FRes = BinaryOperator::CreateAnd(FRes, Mask, "fres", Compute);
402    BranchInst::Create(Reverse, RsltBlk, Cmp, Compute);
403
404    // In the Reverse block we have the mask already in FRes but we must reverse
405    // it by shifting FRes bits right and putting them in RRes by shifting them
406    // in from left.
407
408    // First set up our loop counters
409    PHINode *Count = PHINode::Create(Val->getType(), "count", Reverse);
410    Count->reserveOperandSpace(2);
411    Count->addIncoming(BitSizePlusOne, Compute);
412
413    // Next, get the value that we are shifting.
414    PHINode *BitsToShift = PHINode::Create(Val->getType(), "val", Reverse);
415    BitsToShift->reserveOperandSpace(2);
416    BitsToShift->addIncoming(FRes, Compute);
417
418    // Finally, get the result of the last computation
419    PHINode *RRes = PHINode::Create(Val->getType(), "rres", Reverse);
420    RRes->reserveOperandSpace(2);
421    RRes->addIncoming(Zero, Compute);
422
423    // Decrement the counter
424    Instruction *Decr = BinaryOperator::CreateSub(Count, One, "decr", Reverse);
425    Count->addIncoming(Decr, Reverse);
426
427    // Compute the Bit that we want to move
428    Instruction *Bit =
429      BinaryOperator::CreateAnd(BitsToShift, One, "bit", Reverse);
430
431    // Compute the new value for next iteration.
432    Instruction *NewVal =
433      BinaryOperator::CreateLShr(BitsToShift, One, "rshift", Reverse);
434    BitsToShift->addIncoming(NewVal, Reverse);
435
436    // Shift the bit into the low bits of the result.
437    Instruction *NewRes =
438      BinaryOperator::CreateShl(RRes, One, "lshift", Reverse);
439    NewRes = BinaryOperator::CreateOr(NewRes, Bit, "addbit", Reverse);
440    RRes->addIncoming(NewRes, Reverse);
441
442    // Terminate loop if we've moved all the bits.
443    ICmpInst *Cond =
444      new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "cond", Reverse);
445    BranchInst::Create(RsltBlk, Reverse, Cond, Reverse);
446
447    // Finally, in the result block, select one of the two results with a PHI
448    // node and return the result;
449    CurBB = RsltBlk;
450    PHINode *BitSelect = PHINode::Create(Val->getType(), "part_select", CurBB);
451    BitSelect->reserveOperandSpace(2);
452    BitSelect->addIncoming(FRes, Compute);
453    BitSelect->addIncoming(NewRes, Reverse);
454    ReturnInst::Create(BitSelect, CurBB);
455  }
456
457  // Return a call to the implementation function
458  Value *Args[] = {
459    CI->getOperand(1),
460    CI->getOperand(2),
461    CI->getOperand(3)
462  };
463  return CallInst::Create(F, Args, array_endof(Args), CI->getName(), CI);
464}
465
466/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes
467/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
468/// The first two arguments can be any bit width. The result is the same width
469/// as %Value. The operation replaces bits between %Low and %High with the value
470/// in %Replacement. If %Replacement is not the same width, it is truncated or
471/// zero extended as appropriate to fit the bits being replaced. If %Low is
472/// greater than %High then the inverse set of bits are replaced.
473/// @brief Lowering of llvm.bit.part.set intrinsic.
474static Instruction *LowerPartSet(CallInst *CI) {
475  // Make sure we're dealing with a part select intrinsic here
476  Function *F = CI->getCalledFunction();
477  const FunctionType *FT = F->getFunctionType();
478  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
479      FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
480      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
481      !FT->getParamType(3)->isInteger())
482    return CI;
483
484  // Get the intrinsic implementation function by converting all the . to _
485  // in the intrinsic's function name and then reconstructing the function
486  // declaration.
487  std::string Name(F->getName());
488  for (unsigned i = 4; i < Name.length(); ++i)
489    if (Name[i] == '.')
490      Name[i] = '_';
491  Module* M = F->getParent();
492  F = cast<Function>(M->getOrInsertFunction(Name, FT));
493  F->setLinkage(GlobalValue::WeakLinkage);
494
495  // If we haven't defined the impl function yet, do so now
496  if (F->isDeclaration()) {
497    // Get the arguments for the function.
498    Function::arg_iterator args = F->arg_begin();
499    Value* Val = args++; Val->setName("Val");
500    Value* Rep = args++; Rep->setName("Rep");
501    Value* Lo  = args++; Lo->setName("Lo");
502    Value* Hi  = args++; Hi->setName("Hi");
503
504    // Get some types we need
505    const IntegerType* ValTy = cast<IntegerType>(Val->getType());
506    const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
507    uint32_t ValBits = ValTy->getBitWidth();
508    uint32_t RepBits = RepTy->getBitWidth();
509
510    // Constant Definitions
511    ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
512    ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
513    ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
514    ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
515    ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
516    ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
517    ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
518
519    // Basic blocks we fill in below.
520    BasicBlock* entry = BasicBlock::Create("entry", F, 0);
521    BasicBlock* large = BasicBlock::Create("large", F, 0);
522    BasicBlock* small = BasicBlock::Create("small", F, 0);
523    BasicBlock* reverse = BasicBlock::Create("reverse", F, 0);
524    BasicBlock* result = BasicBlock::Create("result", F, 0);
525
526    // BASIC BLOCK: entry
527    // First, get the number of bits that we're placing as an i32
528    ICmpInst* is_forward =
529      new ICmpInst(ICmpInst::ICMP_ULT, Lo, Hi, "", entry);
530    SelectInst* Hi_pn = SelectInst::Create(is_forward, Hi, Lo, "", entry);
531    SelectInst* Lo_pn = SelectInst::Create(is_forward, Lo, Hi, "", entry);
532    BinaryOperator* NumBits = BinaryOperator::CreateSub(Hi_pn, Lo_pn, "",entry);
533    NumBits = BinaryOperator::CreateAdd(NumBits, One, "", entry);
534    // Now, convert Lo and Hi to ValTy bit width
535    if (ValBits > 32) {
536      Lo = new ZExtInst(Lo_pn, ValTy, "", entry);
537    } else if (ValBits < 32) {
538      Lo = new TruncInst(Lo_pn, ValTy, "", entry);
539    } else {
540      Lo = Lo_pn;
541    }
542    // Determine if the replacement bits are larger than the number of bits we
543    // are replacing and deal with it.
544    ICmpInst* is_large =
545      new ICmpInst(ICmpInst::ICMP_ULT, NumBits, RepBitWidth, "", entry);
546    BranchInst::Create(large, small, is_large, entry);
547
548    // BASIC BLOCK: large
549    Instruction* MaskBits =
550      BinaryOperator::CreateSub(RepBitWidth, NumBits, "", large);
551    MaskBits = CastInst::CreateIntegerCast(MaskBits, RepMask->getType(),
552                                           false, "", large);
553    BinaryOperator* Mask1 =
554      BinaryOperator::CreateLShr(RepMask, MaskBits, "", large);
555    BinaryOperator* Rep2 = BinaryOperator::CreateAnd(Mask1, Rep, "", large);
556    BranchInst::Create(small, large);
557
558    // BASIC BLOCK: small
559    PHINode* Rep3 = PHINode::Create(RepTy, "", small);
560    Rep3->reserveOperandSpace(2);
561    Rep3->addIncoming(Rep2, large);
562    Rep3->addIncoming(Rep, entry);
563    Value* Rep4 = Rep3;
564    if (ValBits > RepBits)
565      Rep4 = new ZExtInst(Rep3, ValTy, "", small);
566    else if (ValBits < RepBits)
567      Rep4 = new TruncInst(Rep3, ValTy, "", small);
568    BranchInst::Create(result, reverse, is_forward, small);
569
570    // BASIC BLOCK: reverse (reverses the bits of the replacement)
571    // Set up our loop counter as a PHI so we can decrement on each iteration.
572    // We will loop for the number of bits in the replacement value.
573    PHINode *Count = PHINode::Create(Type::Int32Ty, "count", reverse);
574    Count->reserveOperandSpace(2);
575    Count->addIncoming(NumBits, small);
576
577    // Get the value that we are shifting bits out of as a PHI because
578    // we'll change this with each iteration.
579    PHINode *BitsToShift = PHINode::Create(Val->getType(), "val", reverse);
580    BitsToShift->reserveOperandSpace(2);
581    BitsToShift->addIncoming(Rep4, small);
582
583    // Get the result of the last computation or zero on first iteration
584    PHINode *RRes = PHINode::Create(Val->getType(), "rres", reverse);
585    RRes->reserveOperandSpace(2);
586    RRes->addIncoming(ValZero, small);
587
588    // Decrement the loop counter by one
589    Instruction *Decr = BinaryOperator::CreateSub(Count, One, "", reverse);
590    Count->addIncoming(Decr, reverse);
591
592    // Get the bit that we want to move into the result
593    Value *Bit = BinaryOperator::CreateAnd(BitsToShift, ValOne, "", reverse);
594
595    // Compute the new value of the bits to shift for the next iteration.
596    Value *NewVal = BinaryOperator::CreateLShr(BitsToShift, ValOne,"", reverse);
597    BitsToShift->addIncoming(NewVal, reverse);
598
599    // Shift the bit we extracted into the low bit of the result.
600    Instruction *NewRes = BinaryOperator::CreateShl(RRes, ValOne, "", reverse);
601    NewRes = BinaryOperator::CreateOr(NewRes, Bit, "", reverse);
602    RRes->addIncoming(NewRes, reverse);
603
604    // Terminate loop if we've moved all the bits.
605    ICmpInst *Cond = new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "", reverse);
606    BranchInst::Create(result, reverse, Cond, reverse);
607
608    // BASIC BLOCK: result
609    PHINode *Rplcmnt = PHINode::Create(Val->getType(), "", result);
610    Rplcmnt->reserveOperandSpace(2);
611    Rplcmnt->addIncoming(NewRes, reverse);
612    Rplcmnt->addIncoming(Rep4, small);
613    Value* t0   = CastInst::CreateIntegerCast(NumBits,ValTy,false,"",result);
614    Value* t1   = BinaryOperator::CreateShl(ValMask, Lo, "", result);
615    Value* t2   = BinaryOperator::CreateNot(t1, "", result);
616    Value* t3   = BinaryOperator::CreateShl(t1, t0, "", result);
617    Value* t4   = BinaryOperator::CreateOr(t2, t3, "", result);
618    Value* t5   = BinaryOperator::CreateAnd(t4, Val, "", result);
619    Value* t6   = BinaryOperator::CreateShl(Rplcmnt, Lo, "", result);
620    Value* Rslt = BinaryOperator::CreateOr(t5, t6, "part_set", result);
621    ReturnInst::Create(Rslt, result);
622  }
623
624  // Return a call to the implementation function
625  Value *Args[] = {
626    CI->getOperand(1),
627    CI->getOperand(2),
628    CI->getOperand(3),
629    CI->getOperand(4)
630  };
631  return CallInst::Create(F, Args, array_endof(Args), CI->getName(), CI);
632}
633
634static void ReplaceFPIntrinsicWithCall(CallInst *CI, Constant *FCache,
635                                       Constant *DCache, Constant *LDCache,
636                                       const char *Fname, const char *Dname,
637                                       const char *LDname) {
638  switch (CI->getOperand(1)->getType()->getTypeID()) {
639  default: assert(0 && "Invalid type in intrinsic"); abort();
640  case Type::FloatTyID:
641    ReplaceCallWith(Fname, CI, CI->op_begin()+1, CI->op_end(),
642                  Type::FloatTy, FCache);
643    break;
644  case Type::DoubleTyID:
645    ReplaceCallWith(Dname, CI, CI->op_begin()+1, CI->op_end(),
646                  Type::DoubleTy, DCache);
647    break;
648  case Type::X86_FP80TyID:
649  case Type::FP128TyID:
650  case Type::PPC_FP128TyID:
651    ReplaceCallWith(LDname, CI, CI->op_begin()+1, CI->op_end(),
652                  CI->getOperand(1)->getType(), LDCache);
653    break;
654  }
655}
656
657void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
658  Function *Callee = CI->getCalledFunction();
659  assert(Callee && "Cannot lower an indirect call!");
660
661  switch (Callee->getIntrinsicID()) {
662  case Intrinsic::not_intrinsic:
663    cerr << "Cannot lower a call to a non-intrinsic function '"
664         << Callee->getName() << "'!\n";
665    abort();
666  default:
667    cerr << "Error: Code generator does not support intrinsic function '"
668         << Callee->getName() << "'!\n";
669    abort();
670
671    // The setjmp/longjmp intrinsics should only exist in the code if it was
672    // never optimized (ie, right out of the CFE), or if it has been hacked on
673    // by the lowerinvoke pass.  In both cases, the right thing to do is to
674    // convert the call to an explicit setjmp or longjmp call.
675  case Intrinsic::setjmp: {
676    static Constant *SetjmpFCache = 0;
677    Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin()+1, CI->op_end(),
678                               Type::Int32Ty, SetjmpFCache);
679    if (CI->getType() != Type::VoidTy)
680      CI->replaceAllUsesWith(V);
681    break;
682  }
683  case Intrinsic::sigsetjmp:
684     if (CI->getType() != Type::VoidTy)
685       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
686     break;
687
688  case Intrinsic::longjmp: {
689    static Constant *LongjmpFCache = 0;
690    ReplaceCallWith("longjmp", CI, CI->op_begin()+1, CI->op_end(),
691                    Type::VoidTy, LongjmpFCache);
692    break;
693  }
694
695  case Intrinsic::siglongjmp: {
696    // Insert the call to abort
697    static Constant *AbortFCache = 0;
698    ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
699                    Type::VoidTy, AbortFCache);
700    break;
701  }
702  case Intrinsic::ctpop:
703    CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
704    break;
705
706  case Intrinsic::bswap:
707    CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
708    break;
709
710  case Intrinsic::ctlz:
711    CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
712    break;
713
714  case Intrinsic::cttz: {
715    // cttz(x) -> ctpop(~X & (X-1))
716    Value *Src = CI->getOperand(1);
717    Value *NotSrc = BinaryOperator::CreateNot(Src, Src->getName()+".not", CI);
718    Value *SrcM1 = ConstantInt::get(Src->getType(), 1);
719    SrcM1 = BinaryOperator::CreateSub(Src, SrcM1, "", CI);
720    Src = LowerCTPOP(BinaryOperator::CreateAnd(NotSrc, SrcM1, "", CI), CI);
721    CI->replaceAllUsesWith(Src);
722    break;
723  }
724
725  case Intrinsic::part_select:
726    CI->replaceAllUsesWith(LowerPartSelect(CI));
727    break;
728
729  case Intrinsic::part_set:
730    CI->replaceAllUsesWith(LowerPartSet(CI));
731    break;
732
733  case Intrinsic::stacksave:
734  case Intrinsic::stackrestore: {
735    static bool Warned = false;
736    if (!Warned)
737      cerr << "WARNING: this target does not support the llvm.stack"
738           << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
739               "save" : "restore") << " intrinsic.\n";
740    Warned = true;
741    if (Callee->getIntrinsicID() == Intrinsic::stacksave)
742      CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
743    break;
744  }
745
746  case Intrinsic::returnaddress:
747  case Intrinsic::frameaddress:
748    cerr << "WARNING: this target does not support the llvm."
749         << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
750             "return" : "frame") << "address intrinsic.\n";
751    CI->replaceAllUsesWith(ConstantPointerNull::get(
752                                            cast<PointerType>(CI->getType())));
753    break;
754
755  case Intrinsic::prefetch:
756    break;    // Simply strip out prefetches on unsupported architectures
757
758  case Intrinsic::pcmarker:
759    break;    // Simply strip out pcmarker on unsupported architectures
760  case Intrinsic::readcyclecounter: {
761    cerr << "WARNING: this target does not support the llvm.readcyclecoun"
762         << "ter intrinsic.  It is being lowered to a constant 0\n";
763    CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
764    break;
765  }
766
767  case Intrinsic::dbg_stoppoint:
768  case Intrinsic::dbg_region_start:
769  case Intrinsic::dbg_region_end:
770  case Intrinsic::dbg_func_start:
771  case Intrinsic::dbg_declare:
772    break;    // Simply strip out debugging intrinsics
773
774  case Intrinsic::eh_exception:
775  case Intrinsic::eh_selector_i32:
776  case Intrinsic::eh_selector_i64:
777    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
778    break;
779
780  case Intrinsic::eh_typeid_for_i32:
781  case Intrinsic::eh_typeid_for_i64:
782    // Return something different to eh_selector.
783    CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
784    break;
785
786  case Intrinsic::var_annotation:
787    break;   // Strip out annotate intrinsic
788
789  case Intrinsic::memcpy: {
790    static Constant *MemcpyFCache = 0;
791    Value *Size = CI->getOperand(3);
792    const Type *IntPtr = TD.getIntPtrType();
793    if (Size->getType()->getPrimitiveSizeInBits() <
794        IntPtr->getPrimitiveSizeInBits())
795      Size = new ZExtInst(Size, IntPtr, "", CI);
796    else if (Size->getType()->getPrimitiveSizeInBits() >
797             IntPtr->getPrimitiveSizeInBits())
798      Size = new TruncInst(Size, IntPtr, "", CI);
799    Value *Ops[3];
800    Ops[0] = CI->getOperand(1);
801    Ops[1] = CI->getOperand(2);
802    Ops[2] = Size;
803    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
804                    MemcpyFCache);
805    break;
806  }
807  case Intrinsic::memmove: {
808    static Constant *MemmoveFCache = 0;
809    Value *Size = CI->getOperand(3);
810    const Type *IntPtr = TD.getIntPtrType();
811    if (Size->getType()->getPrimitiveSizeInBits() <
812        IntPtr->getPrimitiveSizeInBits())
813      Size = new ZExtInst(Size, IntPtr, "", CI);
814    else if (Size->getType()->getPrimitiveSizeInBits() >
815             IntPtr->getPrimitiveSizeInBits())
816      Size = new TruncInst(Size, IntPtr, "", CI);
817    Value *Ops[3];
818    Ops[0] = CI->getOperand(1);
819    Ops[1] = CI->getOperand(2);
820    Ops[2] = Size;
821    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
822                    MemmoveFCache);
823    break;
824  }
825  case Intrinsic::memset: {
826    static Constant *MemsetFCache = 0;
827    Value *Size = CI->getOperand(3);
828    const Type *IntPtr = TD.getIntPtrType();
829    if (Size->getType()->getPrimitiveSizeInBits() <
830        IntPtr->getPrimitiveSizeInBits())
831      Size = new ZExtInst(Size, IntPtr, "", CI);
832    else if (Size->getType()->getPrimitiveSizeInBits() >
833             IntPtr->getPrimitiveSizeInBits())
834      Size = new TruncInst(Size, IntPtr, "", CI);
835    Value *Ops[3];
836    Ops[0] = CI->getOperand(1);
837    // Extend the amount to i32.
838    Ops[1] = new ZExtInst(CI->getOperand(2), Type::Int32Ty, "", CI);
839    Ops[2] = Size;
840    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
841                    MemsetFCache);
842    break;
843  }
844  case Intrinsic::sqrt: {
845    static Constant *sqrtFCache = 0;
846    static Constant *sqrtDCache = 0;
847    static Constant *sqrtLDCache = 0;
848    ReplaceFPIntrinsicWithCall(CI, sqrtFCache, sqrtDCache, sqrtLDCache,
849                               "sqrtf", "sqrt", "sqrtl");
850    break;
851  }
852  case Intrinsic::log: {
853    static Constant *logFCache = 0;
854    static Constant *logDCache = 0;
855    static Constant *logLDCache = 0;
856    ReplaceFPIntrinsicWithCall(CI, logFCache, logDCache, logLDCache,
857                               "logf", "log", "logl");
858    break;
859  }
860  case Intrinsic::log2: {
861    static Constant *log2FCache = 0;
862    static Constant *log2DCache = 0;
863    static Constant *log2LDCache = 0;
864    ReplaceFPIntrinsicWithCall(CI, log2FCache, log2DCache, log2LDCache,
865                               "log2f", "log2", "log2l");
866    break;
867  }
868  case Intrinsic::log10: {
869    static Constant *log10FCache = 0;
870    static Constant *log10DCache = 0;
871    static Constant *log10LDCache = 0;
872    ReplaceFPIntrinsicWithCall(CI, log10FCache, log10DCache, log10LDCache,
873                               "log10f", "log10", "log10l");
874    break;
875  }
876  case Intrinsic::exp: {
877    static Constant *expFCache = 0;
878    static Constant *expDCache = 0;
879    static Constant *expLDCache = 0;
880    ReplaceFPIntrinsicWithCall(CI, expFCache, expDCache, expLDCache,
881                               "expf", "exp", "expl");
882    break;
883  }
884  case Intrinsic::exp2: {
885    static Constant *exp2FCache = 0;
886    static Constant *exp2DCache = 0;
887    static Constant *exp2LDCache = 0;
888    ReplaceFPIntrinsicWithCall(CI, exp2FCache, exp2DCache, exp2LDCache,
889                               "exp2f", "exp2", "exp2l");
890    break;
891  }
892  case Intrinsic::pow: {
893    static Constant *powFCache = 0;
894    static Constant *powDCache = 0;
895    static Constant *powLDCache = 0;
896    ReplaceFPIntrinsicWithCall(CI, powFCache, powDCache, powLDCache,
897                               "powf", "pow", "powl");
898    break;
899  }
900  case Intrinsic::flt_rounds:
901     // Lower to "round to the nearest"
902     if (CI->getType() != Type::VoidTy)
903       CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
904     break;
905  }
906
907  assert(CI->use_empty() &&
908         "Lowering should have eliminated any uses of the intrinsic call!");
909  CI->eraseFromParent();
910}
911