IntrinsicLowering.cpp revision f664e41b201bad27ed3661bf50cd71f54242c114
1//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by the LLVM research group and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the IntrinsicLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Constants.h"
15#include "llvm/DerivedTypes.h"
16#include "llvm/Module.h"
17#include "llvm/Instructions.h"
18#include "llvm/Type.h"
19#include "llvm/CodeGen/IntrinsicLowering.h"
20#include "llvm/Support/Streams.h"
21#include "llvm/Target/TargetData.h"
22#include "llvm/ADT/SmallVector.h"
23using namespace llvm;
24
25template <class ArgIt>
26static void EnsureFunctionExists(Module &M, const char *Name,
27                                 ArgIt ArgBegin, ArgIt ArgEnd,
28                                 const Type *RetTy) {
29  // Insert a correctly-typed definition now.
30  std::vector<const Type *> ParamTys;
31  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
32    ParamTys.push_back(I->getType());
33  M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
34}
35
36/// ReplaceCallWith - This function is used when we want to lower an intrinsic
37/// call to a call of an external function.  This handles hard cases such as
38/// when there was already a prototype for the external function, and if that
39/// prototype doesn't match the arguments we expect to pass in.
40template <class ArgIt>
41static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
42                                 ArgIt ArgBegin, ArgIt ArgEnd,
43                                 const Type *RetTy, Constant *&FCache) {
44  if (!FCache) {
45    // If we haven't already looked up this function, check to see if the
46    // program already contains a function with this name.
47    Module *M = CI->getParent()->getParent()->getParent();
48    // Get or insert the definition now.
49    std::vector<const Type *> ParamTys;
50    for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
51      ParamTys.push_back((*I)->getType());
52    FCache = M->getOrInsertFunction(NewFn,
53                                    FunctionType::get(RetTy, ParamTys, false));
54  }
55
56  SmallVector<Value*, 8> Operands(ArgBegin, ArgEnd);
57  CallInst *NewCI = new CallInst(FCache, &Operands[0], Operands.size(),
58                                 CI->getName(), CI);
59  if (!CI->use_empty())
60    CI->replaceAllUsesWith(NewCI);
61  return NewCI;
62}
63
64void IntrinsicLowering::AddPrototypes(Module &M) {
65  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
66    if (I->isDeclaration() && !I->use_empty())
67      switch (I->getIntrinsicID()) {
68      default: break;
69      case Intrinsic::setjmp:
70        EnsureFunctionExists(M, "setjmp", I->arg_begin(), I->arg_end(),
71                             Type::Int32Ty);
72        break;
73      case Intrinsic::longjmp:
74        EnsureFunctionExists(M, "longjmp", I->arg_begin(), I->arg_end(),
75                             Type::VoidTy);
76        break;
77      case Intrinsic::siglongjmp:
78        EnsureFunctionExists(M, "abort", I->arg_end(), I->arg_end(),
79                             Type::VoidTy);
80        break;
81      case Intrinsic::memcpy_i32:
82      case Intrinsic::memcpy_i64:
83        M.getOrInsertFunction("memcpy", PointerType::get(Type::Int8Ty),
84                              PointerType::get(Type::Int8Ty),
85                              PointerType::get(Type::Int8Ty),
86                              TD.getIntPtrType(), (Type *)0);
87        break;
88      case Intrinsic::memmove_i32:
89      case Intrinsic::memmove_i64:
90        M.getOrInsertFunction("memmove", PointerType::get(Type::Int8Ty),
91                              PointerType::get(Type::Int8Ty),
92                              PointerType::get(Type::Int8Ty),
93                              TD.getIntPtrType(), (Type *)0);
94        break;
95      case Intrinsic::memset_i32:
96      case Intrinsic::memset_i64:
97        M.getOrInsertFunction("memset", PointerType::get(Type::Int8Ty),
98                              PointerType::get(Type::Int8Ty), Type::Int32Ty,
99                              TD.getIntPtrType(), (Type *)0);
100        break;
101      case Intrinsic::sqrt_f32:
102      case Intrinsic::sqrt_f64:
103        if(I->arg_begin()->getType() == Type::FloatTy)
104          EnsureFunctionExists(M, "sqrtf", I->arg_begin(), I->arg_end(),
105                               Type::FloatTy);
106        else
107          EnsureFunctionExists(M, "sqrt", I->arg_begin(), I->arg_end(),
108                               Type::DoubleTy);
109        break;
110      }
111}
112
113/// LowerBSWAP - Emit the code to lower bswap of V before the specified
114/// instruction IP.
115static Value *LowerBSWAP(Value *V, Instruction *IP) {
116  assert(V->getType()->isInteger() && "Can't bswap a non-integer type!");
117
118  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
119
120  switch(BitSize) {
121  default: assert(0 && "Unhandled type size of value to byteswap!");
122  case 16: {
123    Value *Tmp1 = BinaryOperator::createShl(V,
124                                ConstantInt::get(V->getType(),8),"bswap.2",IP);
125    Value *Tmp2 = BinaryOperator::createLShr(V,
126                                ConstantInt::get(V->getType(),8),"bswap.1",IP);
127    V = BinaryOperator::createOr(Tmp1, Tmp2, "bswap.i16", IP);
128    break;
129  }
130  case 32: {
131    Value *Tmp4 = BinaryOperator::createShl(V,
132                              ConstantInt::get(V->getType(),24),"bswap.4", IP);
133    Value *Tmp3 = BinaryOperator::createShl(V,
134                              ConstantInt::get(V->getType(),8),"bswap.3",IP);
135    Value *Tmp2 = BinaryOperator::createLShr(V,
136                              ConstantInt::get(V->getType(),8),"bswap.2",IP);
137    Value *Tmp1 = BinaryOperator::createLShr(V,
138                              ConstantInt::get(V->getType(),24),"bswap.1", IP);
139    Tmp3 = BinaryOperator::createAnd(Tmp3,
140                                     ConstantInt::get(Type::Int32Ty, 0xFF0000),
141                                     "bswap.and3", IP);
142    Tmp2 = BinaryOperator::createAnd(Tmp2,
143                                     ConstantInt::get(Type::Int32Ty, 0xFF00),
144                                     "bswap.and2", IP);
145    Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or1", IP);
146    Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or2", IP);
147    V = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.i32", IP);
148    break;
149  }
150  case 64: {
151    Value *Tmp8 = BinaryOperator::createShl(V,
152                              ConstantInt::get(V->getType(),56),"bswap.8", IP);
153    Value *Tmp7 = BinaryOperator::createShl(V,
154                              ConstantInt::get(V->getType(),40),"bswap.7", IP);
155    Value *Tmp6 = BinaryOperator::createShl(V,
156                              ConstantInt::get(V->getType(),24),"bswap.6", IP);
157    Value *Tmp5 = BinaryOperator::createShl(V,
158                              ConstantInt::get(V->getType(),8),"bswap.5", IP);
159    Value* Tmp4 = BinaryOperator::createLShr(V,
160                              ConstantInt::get(V->getType(),8),"bswap.4", IP);
161    Value* Tmp3 = BinaryOperator::createLShr(V,
162                              ConstantInt::get(V->getType(),24),"bswap.3", IP);
163    Value* Tmp2 = BinaryOperator::createLShr(V,
164                              ConstantInt::get(V->getType(),40),"bswap.2", IP);
165    Value* Tmp1 = BinaryOperator::createLShr(V,
166                              ConstantInt::get(V->getType(),56),"bswap.1", IP);
167    Tmp7 = BinaryOperator::createAnd(Tmp7,
168                             ConstantInt::get(Type::Int64Ty,
169                               0xFF000000000000ULL),
170                             "bswap.and7", IP);
171    Tmp6 = BinaryOperator::createAnd(Tmp6,
172                             ConstantInt::get(Type::Int64Ty, 0xFF0000000000ULL),
173                             "bswap.and6", IP);
174    Tmp5 = BinaryOperator::createAnd(Tmp5,
175                             ConstantInt::get(Type::Int64Ty, 0xFF00000000ULL),
176                             "bswap.and5", IP);
177    Tmp4 = BinaryOperator::createAnd(Tmp4,
178                             ConstantInt::get(Type::Int64Ty, 0xFF000000ULL),
179                             "bswap.and4", IP);
180    Tmp3 = BinaryOperator::createAnd(Tmp3,
181                             ConstantInt::get(Type::Int64Ty, 0xFF0000ULL),
182                             "bswap.and3", IP);
183    Tmp2 = BinaryOperator::createAnd(Tmp2,
184                             ConstantInt::get(Type::Int64Ty, 0xFF00ULL),
185                             "bswap.and2", IP);
186    Tmp8 = BinaryOperator::createOr(Tmp8, Tmp7, "bswap.or1", IP);
187    Tmp6 = BinaryOperator::createOr(Tmp6, Tmp5, "bswap.or2", IP);
188    Tmp4 = BinaryOperator::createOr(Tmp4, Tmp3, "bswap.or3", IP);
189    Tmp2 = BinaryOperator::createOr(Tmp2, Tmp1, "bswap.or4", IP);
190    Tmp8 = BinaryOperator::createOr(Tmp8, Tmp6, "bswap.or5", IP);
191    Tmp4 = BinaryOperator::createOr(Tmp4, Tmp2, "bswap.or6", IP);
192    V = BinaryOperator::createOr(Tmp8, Tmp4, "bswap.i64", IP);
193    break;
194  }
195  }
196  return V;
197}
198
199/// LowerCTPOP - Emit the code to lower ctpop of V before the specified
200/// instruction IP.
201static Value *LowerCTPOP(Value *V, Instruction *IP) {
202  assert(V->getType()->isInteger() && "Can't ctpop a non-integer type!");
203
204  static const uint64_t MaskValues[6] = {
205    0x5555555555555555ULL, 0x3333333333333333ULL,
206    0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
207    0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
208  };
209
210  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
211  unsigned WordSize = (BitSize + 63) / 64;
212  Value *Count = ConstantInt::get(V->getType(), 0);
213
214  for (unsigned n = 0; n < WordSize; ++n) {
215    Value *PartValue = V;
216    for (unsigned i = 1, ct = 0; i < (BitSize>64 ? 64 : BitSize);
217         i <<= 1, ++ct) {
218      Value *MaskCst = ConstantInt::get(V->getType(), MaskValues[ct]);
219      Value *LHS = BinaryOperator::createAnd(
220                     PartValue, MaskCst, "cppop.and1", IP);
221      Value *VShift = BinaryOperator::createLShr(PartValue,
222                        ConstantInt::get(V->getType(), i), "ctpop.sh", IP);
223      Value *RHS = BinaryOperator::createAnd(VShift, MaskCst, "cppop.and2", IP);
224      PartValue = BinaryOperator::createAdd(LHS, RHS, "ctpop.step", IP);
225    }
226    Count = BinaryOperator::createAdd(PartValue, Count, "ctpop.part", IP);
227    if (BitSize > 64) {
228      V = BinaryOperator::createLShr(V, ConstantInt::get(V->getType(), 64),
229                                     "ctpop.part.sh", IP);
230      BitSize -= 64;
231    }
232  }
233
234  return CastInst::createIntegerCast(Count, Type::Int32Ty, false, "ctpop", IP);
235}
236
237/// LowerCTLZ - Emit the code to lower ctlz of V before the specified
238/// instruction IP.
239static Value *LowerCTLZ(Value *V, Instruction *IP) {
240
241  unsigned BitSize = V->getType()->getPrimitiveSizeInBits();
242  for (unsigned i = 1; i < BitSize; i <<= 1) {
243    Value *ShVal = ConstantInt::get(V->getType(), i);
244    ShVal = BinaryOperator::createLShr(V, ShVal, "ctlz.sh", IP);
245    V = BinaryOperator::createOr(V, ShVal, "ctlz.step", IP);
246  }
247
248  V = BinaryOperator::createNot(V, "", IP);
249  return LowerCTPOP(V, IP);
250}
251
252/// Convert the llvm.part.select.iX.iY intrinsic. This intrinsic takes
253/// three integer arguments. The first argument is the Value from which the
254/// bits will be selected. It may be of any bit width. The second and third
255/// arguments specify a range of bits to select with the second argument
256/// specifying the low bit and the third argument specifying the high bit. Both
257/// must be type i32. The result is the corresponding selected bits from the
258/// Value in the same width as the Value (first argument). If the low bit index
259/// is higher than the high bit index then the inverse selection is done and
260/// the bits are returned in inverse order.
261/// @brief Lowering of llvm.part.select intrinsic.
262static Instruction *LowerPartSelect(CallInst *CI) {
263  // Make sure we're dealing with a part select intrinsic here
264  Function *F = CI->getCalledFunction();
265  const FunctionType *FT = F->getFunctionType();
266  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
267      FT->getNumParams() != 3 || !FT->getParamType(0)->isInteger() ||
268      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger())
269    return CI;
270
271  // Get the intrinsic implementation function by converting all the . to _
272  // in the intrinsic's function name and then reconstructing the function
273  // declaration.
274  std::string Name(F->getName());
275  for (unsigned i = 4; i < Name.length(); ++i)
276    if (Name[i] == '.')
277      Name[i] = '_';
278  Module* M = F->getParent();
279  F = cast<Function>(M->getOrInsertFunction(Name, FT));
280  F->setLinkage(GlobalValue::WeakLinkage);
281
282  // If we haven't defined the impl function yet, do so now
283  if (F->isDeclaration()) {
284
285    // Get the arguments to the function
286    Function::arg_iterator args = F->arg_begin();
287    Value* Val = args++; Val->setName("Val");
288    Value* Lo = args++; Lo->setName("Lo");
289    Value* Hi  = args++; Hi->setName("High");
290
291    // We want to select a range of bits here such that [Hi, Lo] is shifted
292    // down to the low bits. However, it is quite possible that Hi is smaller
293    // than Lo in which case the bits have to be reversed.
294
295    // Create the blocks we will need for the two cases (forward, reverse)
296    BasicBlock* CurBB   = new BasicBlock("entry", F);
297    BasicBlock *RevSize = new BasicBlock("revsize", CurBB->getParent());
298    BasicBlock *FwdSize = new BasicBlock("fwdsize", CurBB->getParent());
299    BasicBlock *Compute = new BasicBlock("compute", CurBB->getParent());
300    BasicBlock *Reverse = new BasicBlock("reverse", CurBB->getParent());
301    BasicBlock *RsltBlk = new BasicBlock("result",  CurBB->getParent());
302
303    // Cast Hi and Lo to the size of Val so the widths are all the same
304    if (Hi->getType() != Val->getType())
305      Hi = CastInst::createIntegerCast(Hi, Val->getType(), false,
306                                         "tmp", CurBB);
307    if (Lo->getType() != Val->getType())
308      Lo = CastInst::createIntegerCast(Lo, Val->getType(), false,
309                                          "tmp", CurBB);
310
311    // Compute a few things that both cases will need, up front.
312    Constant* Zero = ConstantInt::get(Val->getType(), 0);
313    Constant* One = ConstantInt::get(Val->getType(), 1);
314    Constant* AllOnes = ConstantInt::getAllOnesValue(Val->getType());
315
316    // Compare the Hi and Lo bit positions. This is used to determine
317    // which case we have (forward or reverse)
318    ICmpInst *Cmp = new ICmpInst(ICmpInst::ICMP_ULT, Hi, Lo, "less",CurBB);
319    new BranchInst(RevSize, FwdSize, Cmp, CurBB);
320
321    // First, copmute the number of bits in the forward case.
322    Instruction* FBitSize =
323      BinaryOperator::createSub(Hi, Lo,"fbits", FwdSize);
324    new BranchInst(Compute, FwdSize);
325
326    // Second, compute the number of bits in the reverse case.
327    Instruction* RBitSize =
328      BinaryOperator::createSub(Lo, Hi, "rbits", RevSize);
329    new BranchInst(Compute, RevSize);
330
331    // Now, compute the bit range. Start by getting the bitsize and the shift
332    // amount (either Hi or Lo) from PHI nodes. Then we compute a mask for
333    // the number of bits we want in the range. We shift the bits down to the
334    // least significant bits, apply the mask to zero out unwanted high bits,
335    // and we have computed the "forward" result. It may still need to be
336    // reversed.
337
338    // Get the BitSize from one of the two subtractions
339    PHINode *BitSize = new PHINode(Val->getType(), "bits", Compute);
340    BitSize->reserveOperandSpace(2);
341    BitSize->addIncoming(FBitSize, FwdSize);
342    BitSize->addIncoming(RBitSize, RevSize);
343
344    // Get the ShiftAmount as the smaller of Hi/Lo
345    PHINode *ShiftAmt = new PHINode(Val->getType(), "shiftamt", Compute);
346    ShiftAmt->reserveOperandSpace(2);
347    ShiftAmt->addIncoming(Lo, FwdSize);
348    ShiftAmt->addIncoming(Hi, RevSize);
349
350    // Increment the bit size
351    Instruction *BitSizePlusOne =
352      BinaryOperator::createAdd(BitSize, One, "bits", Compute);
353
354    // Create a Mask to zero out the high order bits.
355    Instruction* Mask =
356      BinaryOperator::createShl(AllOnes, BitSizePlusOne, "mask", Compute);
357    Mask = BinaryOperator::createNot(Mask, "mask", Compute);
358
359    // Shift the bits down and apply the mask
360    Instruction* FRes =
361      BinaryOperator::createLShr(Val, ShiftAmt, "fres", Compute);
362    FRes = BinaryOperator::createAnd(FRes, Mask, "fres", Compute);
363    new BranchInst(Reverse, RsltBlk, Cmp, Compute);
364
365    // In the Reverse block we have the mask already in FRes but we must reverse
366    // it by shifting FRes bits right and putting them in RRes by shifting them
367    // in from left.
368
369    // First set up our loop counters
370    PHINode *Count = new PHINode(Val->getType(), "count", Reverse);
371    Count->reserveOperandSpace(2);
372    Count->addIncoming(BitSizePlusOne, Compute);
373
374    // Next, get the value that we are shifting.
375    PHINode *BitsToShift   = new PHINode(Val->getType(), "val", Reverse);
376    BitsToShift->reserveOperandSpace(2);
377    BitsToShift->addIncoming(FRes, Compute);
378
379    // Finally, get the result of the last computation
380    PHINode *RRes  = new PHINode(Val->getType(), "rres", Reverse);
381    RRes->reserveOperandSpace(2);
382    RRes->addIncoming(Zero, Compute);
383
384    // Decrement the counter
385    Instruction *Decr = BinaryOperator::createSub(Count, One, "decr", Reverse);
386    Count->addIncoming(Decr, Reverse);
387
388    // Compute the Bit that we want to move
389    Instruction *Bit =
390      BinaryOperator::createAnd(BitsToShift, One, "bit", Reverse);
391
392    // Compute the new value for next iteration.
393    Instruction *NewVal =
394      BinaryOperator::createLShr(BitsToShift, One, "rshift", Reverse);
395    BitsToShift->addIncoming(NewVal, Reverse);
396
397    // Shift the bit into the low bits of the result.
398    Instruction *NewRes =
399      BinaryOperator::createShl(RRes, One, "lshift", Reverse);
400    NewRes = BinaryOperator::createOr(NewRes, Bit, "addbit", Reverse);
401    RRes->addIncoming(NewRes, Reverse);
402
403    // Terminate loop if we've moved all the bits.
404    ICmpInst *Cond =
405      new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "cond", Reverse);
406    new BranchInst(RsltBlk, Reverse, Cond, Reverse);
407
408    // Finally, in the result block, select one of the two results with a PHI
409    // node and return the result;
410    CurBB = RsltBlk;
411    PHINode *BitSelect = new PHINode(Val->getType(), "part_select", CurBB);
412    BitSelect->reserveOperandSpace(2);
413    BitSelect->addIncoming(FRes, Compute);
414    BitSelect->addIncoming(NewRes, Reverse);
415    new ReturnInst(BitSelect, CurBB);
416  }
417
418  // Return a call to the implementation function
419  Value *Args[] = {
420    CI->getOperand(1),
421    CI->getOperand(2),
422    CI->getOperand(3)
423  };
424  return new CallInst(F, Args, sizeof(Args)/sizeof(Args[0]), CI->getName(), CI);
425}
426
427/// Convert the llvm.part.set.iX.iY.iZ intrinsic. This intrinsic takes
428/// four integer arguments (iAny %Value, iAny %Replacement, i32 %Low, i32 %High)
429/// The first two arguments can be any bit width. The result is the same width
430/// as %Value. The operation replaces bits between %Low and %High with the value
431/// in %Replacement. If %Replacement is not the same width, it is truncated or
432/// zero extended as appropriate to fit the bits being replaced. If %Low is
433/// greater than %High then the inverse set of bits are replaced.
434/// @brief Lowering of llvm.bit.part.set intrinsic.
435static Instruction *LowerPartSet(CallInst *CI) {
436  // Make sure we're dealing with a part select intrinsic here
437  Function *F = CI->getCalledFunction();
438  const FunctionType *FT = F->getFunctionType();
439  if (!F->isDeclaration() || !FT->getReturnType()->isInteger() ||
440      FT->getNumParams() != 4 || !FT->getParamType(0)->isInteger() ||
441      !FT->getParamType(1)->isInteger() || !FT->getParamType(2)->isInteger() ||
442      !FT->getParamType(3)->isInteger())
443    return CI;
444
445  // Get the intrinsic implementation function by converting all the . to _
446  // in the intrinsic's function name and then reconstructing the function
447  // declaration.
448  std::string Name(F->getName());
449  for (unsigned i = 4; i < Name.length(); ++i)
450    if (Name[i] == '.')
451      Name[i] = '_';
452  Module* M = F->getParent();
453  F = cast<Function>(M->getOrInsertFunction(Name, FT));
454  F->setLinkage(GlobalValue::WeakLinkage);
455
456  // If we haven't defined the impl function yet, do so now
457  if (F->isDeclaration()) {
458    // Get the arguments for the function.
459    Function::arg_iterator args = F->arg_begin();
460    Value* Val = args++; Val->setName("Val");
461    Value* Rep = args++; Rep->setName("Rep");
462    Value* Lo  = args++; Lo->setName("Lo");
463    Value* Hi  = args++; Hi->setName("Hi");
464
465    // Get some types we need
466    const IntegerType* ValTy = cast<IntegerType>(Val->getType());
467    const IntegerType* RepTy = cast<IntegerType>(Rep->getType());
468    uint32_t ValBits = ValTy->getBitWidth();
469    uint32_t RepBits = RepTy->getBitWidth();
470
471    // Constant Definitions
472    ConstantInt* RepBitWidth = ConstantInt::get(Type::Int32Ty, RepBits);
473    ConstantInt* RepMask = ConstantInt::getAllOnesValue(RepTy);
474    ConstantInt* ValMask = ConstantInt::getAllOnesValue(ValTy);
475    ConstantInt* One = ConstantInt::get(Type::Int32Ty, 1);
476    ConstantInt* ValOne = ConstantInt::get(ValTy, 1);
477    ConstantInt* Zero = ConstantInt::get(Type::Int32Ty, 0);
478    ConstantInt* ValZero = ConstantInt::get(ValTy, 0);
479
480    // Basic blocks we fill in below.
481    BasicBlock* entry = new BasicBlock("entry", F, 0);
482    BasicBlock* large = new BasicBlock("large", F, 0);
483    BasicBlock* small = new BasicBlock("small", F, 0);
484    BasicBlock* reverse = new BasicBlock("reverse", F, 0);
485    BasicBlock* result = new BasicBlock("result", F, 0);
486
487    // BASIC BLOCK: entry
488    // First, get the number of bits that we're placing as an i32
489    ICmpInst* is_forward =
490      new ICmpInst(ICmpInst::ICMP_ULT, Lo, Hi, "", entry);
491    SelectInst* Hi_pn = new SelectInst(is_forward, Hi, Lo, "", entry);
492    SelectInst* Lo_pn = new SelectInst(is_forward, Lo, Hi, "", entry);
493    BinaryOperator* NumBits = BinaryOperator::createSub(Hi_pn, Lo_pn, "",entry);
494    NumBits = BinaryOperator::createAdd(NumBits, One, "", entry);
495    // Now, convert Lo and Hi to ValTy bit width
496    if (ValBits > 32) {
497      Lo = new ZExtInst(Lo_pn, ValTy, "", entry);
498    } else if (ValBits < 32) {
499      Lo = new TruncInst(Lo_pn, ValTy, "", entry);
500    }
501    // Determine if the replacement bits are larger than the number of bits we
502    // are replacing and deal with it.
503    ICmpInst* is_large =
504      new ICmpInst(ICmpInst::ICMP_ULT, NumBits, RepBitWidth, "", entry);
505    new BranchInst(large, small, is_large, entry);
506
507    // BASIC BLOCK: large
508    Instruction* MaskBits =
509      BinaryOperator::createSub(RepBitWidth, NumBits, "", large);
510    MaskBits = CastInst::createIntegerCast(MaskBits, RepMask->getType(),
511                                           false, "", large);
512    BinaryOperator* Mask1 =
513      BinaryOperator::createLShr(RepMask, MaskBits, "", large);
514    BinaryOperator* Rep2 = BinaryOperator::createAnd(Mask1, Rep, "", large);
515    new BranchInst(small, large);
516
517    // BASIC BLOCK: small
518    PHINode* Rep3 = new PHINode(RepTy, "", small);
519    Rep3->reserveOperandSpace(2);
520    Rep3->addIncoming(Rep2, large);
521    Rep3->addIncoming(Rep, entry);
522    Value* Rep4 = Rep3;
523    if (ValBits > RepBits)
524      Rep4 = new ZExtInst(Rep3, ValTy, "", small);
525    else if (ValBits < RepBits)
526      Rep4 = new TruncInst(Rep3, ValTy, "", small);
527    new BranchInst(result, reverse, is_forward, small);
528
529    // BASIC BLOCK: reverse (reverses the bits of the replacement)
530    // Set up our loop counter as a PHI so we can decrement on each iteration.
531    // We will loop for the number of bits in the replacement value.
532    PHINode *Count = new PHINode(Type::Int32Ty, "count", reverse);
533    Count->reserveOperandSpace(2);
534    Count->addIncoming(NumBits, small);
535
536    // Get the value that we are shifting bits out of as a PHI because
537    // we'll change this with each iteration.
538    PHINode *BitsToShift   = new PHINode(Val->getType(), "val", reverse);
539    BitsToShift->reserveOperandSpace(2);
540    BitsToShift->addIncoming(Rep4, small);
541
542    // Get the result of the last computation or zero on first iteration
543    PHINode *RRes  = new PHINode(Val->getType(), "rres", reverse);
544    RRes->reserveOperandSpace(2);
545    RRes->addIncoming(ValZero, small);
546
547    // Decrement the loop counter by one
548    Instruction *Decr = BinaryOperator::createSub(Count, One, "", reverse);
549    Count->addIncoming(Decr, reverse);
550
551    // Get the bit that we want to move into the result
552    Value *Bit = BinaryOperator::createAnd(BitsToShift, ValOne, "", reverse);
553
554    // Compute the new value of the bits to shift for the next iteration.
555    Value *NewVal = BinaryOperator::createLShr(BitsToShift, ValOne,"", reverse);
556    BitsToShift->addIncoming(NewVal, reverse);
557
558    // Shift the bit we extracted into the low bit of the result.
559    Instruction *NewRes = BinaryOperator::createShl(RRes, ValOne, "", reverse);
560    NewRes = BinaryOperator::createOr(NewRes, Bit, "", reverse);
561    RRes->addIncoming(NewRes, reverse);
562
563    // Terminate loop if we've moved all the bits.
564    ICmpInst *Cond = new ICmpInst(ICmpInst::ICMP_EQ, Decr, Zero, "", reverse);
565    new BranchInst(result, reverse, Cond, reverse);
566
567    // BASIC BLOCK: result
568    PHINode *Rplcmnt  = new PHINode(Val->getType(), "", result);
569    Rplcmnt->reserveOperandSpace(2);
570    Rplcmnt->addIncoming(NewRes, reverse);
571    Rplcmnt->addIncoming(Rep4, small);
572    Value* t0   = CastInst::createIntegerCast(NumBits,ValTy,false,"",result);
573    Value* t1   = BinaryOperator::createShl(ValMask, Lo, "", result);
574    Value* t2   = BinaryOperator::createNot(t1, "", result);
575    Value* t3   = BinaryOperator::createShl(t1, t0, "", result);
576    Value* t4   = BinaryOperator::createOr(t2, t3, "", result);
577    Value* t5   = BinaryOperator::createAnd(t4, Val, "", result);
578    Value* t6   = BinaryOperator::createShl(Rplcmnt, Lo, "", result);
579    Value* Rslt = BinaryOperator::createOr(t5, t6, "part_set", result);
580    new ReturnInst(Rslt, result);
581  }
582
583  // Return a call to the implementation function
584  Value *Args[] = {
585    CI->getOperand(1),
586    CI->getOperand(2),
587    CI->getOperand(3),
588    CI->getOperand(4)
589  };
590  return new CallInst(F, Args, sizeof(Args)/sizeof(Args[0]), CI->getName(), CI);
591}
592
593
594void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
595  Function *Callee = CI->getCalledFunction();
596  assert(Callee && "Cannot lower an indirect call!");
597
598  switch (Callee->getIntrinsicID()) {
599  case Intrinsic::not_intrinsic:
600    cerr << "Cannot lower a call to a non-intrinsic function '"
601         << Callee->getName() << "'!\n";
602    abort();
603  default:
604    cerr << "Error: Code generator does not support intrinsic function '"
605         << Callee->getName() << "'!\n";
606    abort();
607
608    // The setjmp/longjmp intrinsics should only exist in the code if it was
609    // never optimized (ie, right out of the CFE), or if it has been hacked on
610    // by the lowerinvoke pass.  In both cases, the right thing to do is to
611    // convert the call to an explicit setjmp or longjmp call.
612  case Intrinsic::setjmp: {
613    static Constant *SetjmpFCache = 0;
614    Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin()+1, CI->op_end(),
615                               Type::Int32Ty, SetjmpFCache);
616    if (CI->getType() != Type::VoidTy)
617      CI->replaceAllUsesWith(V);
618    break;
619  }
620  case Intrinsic::sigsetjmp:
621     if (CI->getType() != Type::VoidTy)
622       CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
623     break;
624
625  case Intrinsic::longjmp: {
626    static Constant *LongjmpFCache = 0;
627    ReplaceCallWith("longjmp", CI, CI->op_begin()+1, CI->op_end(),
628                    Type::VoidTy, LongjmpFCache);
629    break;
630  }
631
632  case Intrinsic::siglongjmp: {
633    // Insert the call to abort
634    static Constant *AbortFCache = 0;
635    ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(),
636                    Type::VoidTy, AbortFCache);
637    break;
638  }
639  case Intrinsic::ctpop:
640    CI->replaceAllUsesWith(LowerCTPOP(CI->getOperand(1), CI));
641    break;
642
643  case Intrinsic::bswap:
644    CI->replaceAllUsesWith(LowerBSWAP(CI->getOperand(1), CI));
645    break;
646
647  case Intrinsic::ctlz:
648    CI->replaceAllUsesWith(LowerCTLZ(CI->getOperand(1), CI));
649    break;
650
651  case Intrinsic::cttz: {
652    // cttz(x) -> ctpop(~X & (X-1))
653    Value *Src = CI->getOperand(1);
654    Value *NotSrc = BinaryOperator::createNot(Src, Src->getName()+".not", CI);
655    Value *SrcM1  = ConstantInt::get(Src->getType(), 1);
656    SrcM1 = BinaryOperator::createSub(Src, SrcM1, "", CI);
657    Src = LowerCTPOP(BinaryOperator::createAnd(NotSrc, SrcM1, "", CI), CI);
658    CI->replaceAllUsesWith(Src);
659    break;
660  }
661
662  case Intrinsic::part_select:
663    CI->replaceAllUsesWith(LowerPartSelect(CI));
664    break;
665
666  case Intrinsic::part_set:
667    CI->replaceAllUsesWith(LowerPartSet(CI));
668    break;
669
670  case Intrinsic::stacksave:
671  case Intrinsic::stackrestore: {
672    static bool Warned = false;
673    if (!Warned)
674      cerr << "WARNING: this target does not support the llvm.stack"
675           << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
676               "save" : "restore") << " intrinsic.\n";
677    Warned = true;
678    if (Callee->getIntrinsicID() == Intrinsic::stacksave)
679      CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
680    break;
681  }
682
683  case Intrinsic::returnaddress:
684  case Intrinsic::frameaddress:
685    cerr << "WARNING: this target does not support the llvm."
686         << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
687             "return" : "frame") << "address intrinsic.\n";
688    CI->replaceAllUsesWith(ConstantPointerNull::get(
689                                            cast<PointerType>(CI->getType())));
690    break;
691
692  case Intrinsic::prefetch:
693    break;    // Simply strip out prefetches on unsupported architectures
694
695  case Intrinsic::pcmarker:
696    break;    // Simply strip out pcmarker on unsupported architectures
697  case Intrinsic::readcyclecounter: {
698    cerr << "WARNING: this target does not support the llvm.readcyclecoun"
699         << "ter intrinsic.  It is being lowered to a constant 0\n";
700    CI->replaceAllUsesWith(ConstantInt::get(Type::Int64Ty, 0));
701    break;
702  }
703
704  case Intrinsic::dbg_stoppoint:
705  case Intrinsic::dbg_region_start:
706  case Intrinsic::dbg_region_end:
707  case Intrinsic::dbg_func_start:
708  case Intrinsic::dbg_declare:
709    break;    // Simply strip out debugging intrinsics
710
711  case Intrinsic::eh_exception:
712  case Intrinsic::eh_selector:
713    CI->replaceAllUsesWith(Constant::getNullValue(CI->getType()));
714    break;
715
716  case Intrinsic::eh_typeid_for:
717    // Return something different to eh_selector.
718    CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
719    break;
720
721  case Intrinsic::var_annotation:
722    break;   // Strip out annotate intrinsic
723
724  case Intrinsic::memcpy_i32:
725  case Intrinsic::memcpy_i64: {
726    static Constant *MemcpyFCache = 0;
727    Value *Size = CI->getOperand(3);
728    const Type *IntPtr = TD.getIntPtrType();
729    if (Size->getType()->getPrimitiveSizeInBits() <
730        IntPtr->getPrimitiveSizeInBits())
731      Size = new ZExtInst(Size, IntPtr, "", CI);
732    else if (Size->getType()->getPrimitiveSizeInBits() >
733             IntPtr->getPrimitiveSizeInBits())
734      Size = new TruncInst(Size, IntPtr, "", CI);
735    Value *Ops[3];
736    Ops[0] = CI->getOperand(1);
737    Ops[1] = CI->getOperand(2);
738    Ops[2] = Size;
739    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
740                    MemcpyFCache);
741    break;
742  }
743  case Intrinsic::memmove_i32:
744  case Intrinsic::memmove_i64: {
745    static Constant *MemmoveFCache = 0;
746    Value *Size = CI->getOperand(3);
747    const Type *IntPtr = TD.getIntPtrType();
748    if (Size->getType()->getPrimitiveSizeInBits() <
749        IntPtr->getPrimitiveSizeInBits())
750      Size = new ZExtInst(Size, IntPtr, "", CI);
751    else if (Size->getType()->getPrimitiveSizeInBits() >
752             IntPtr->getPrimitiveSizeInBits())
753      Size = new TruncInst(Size, IntPtr, "", CI);
754    Value *Ops[3];
755    Ops[0] = CI->getOperand(1);
756    Ops[1] = CI->getOperand(2);
757    Ops[2] = Size;
758    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
759                    MemmoveFCache);
760    break;
761  }
762  case Intrinsic::memset_i32:
763  case Intrinsic::memset_i64: {
764    static Constant *MemsetFCache = 0;
765    Value *Size = CI->getOperand(3);
766    const Type *IntPtr = TD.getIntPtrType();
767    if (Size->getType()->getPrimitiveSizeInBits() <
768        IntPtr->getPrimitiveSizeInBits())
769      Size = new ZExtInst(Size, IntPtr, "", CI);
770    else if (Size->getType()->getPrimitiveSizeInBits() >
771             IntPtr->getPrimitiveSizeInBits())
772      Size = new TruncInst(Size, IntPtr, "", CI);
773    Value *Ops[3];
774    Ops[0] = CI->getOperand(1);
775    // Extend the amount to i32.
776    Ops[1] = new ZExtInst(CI->getOperand(2), Type::Int32Ty, "", CI);
777    Ops[2] = Size;
778    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
779                    MemsetFCache);
780    break;
781  }
782  case Intrinsic::sqrt_f32: {
783    static Constant *sqrtfFCache = 0;
784    ReplaceCallWith("sqrtf", CI, CI->op_begin()+1, CI->op_end(),
785                    Type::FloatTy, sqrtfFCache);
786    break;
787  }
788  case Intrinsic::sqrt_f64: {
789    static Constant *sqrtFCache = 0;
790    ReplaceCallWith("sqrt", CI, CI->op_begin()+1, CI->op_end(),
791                    Type::DoubleTy, sqrtFCache);
792    break;
793  }
794  }
795
796  assert(CI->use_empty() &&
797         "Lowering should have eliminated any uses of the intrinsic call!");
798  CI->eraseFromParent();
799}
800