SelectionDAGBuilder.cpp revision c05d30601ced172b55be81bb529df6be91d6ae15
1//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements routines for translating from LLVM IR into SelectionDAG IR.
11//
12//===----------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "isel"
15#include "SDNodeDbgValue.h"
16#include "SelectionDAGBuilder.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/PostOrderIterator.h"
19#include "llvm/ADT/SmallSet.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/Analysis/ConstantFolding.h"
22#include "llvm/Analysis/ValueTracking.h"
23#include "llvm/Constants.h"
24#include "llvm/CallingConv.h"
25#include "llvm/DebugInfo.h"
26#include "llvm/DerivedTypes.h"
27#include "llvm/Function.h"
28#include "llvm/GlobalVariable.h"
29#include "llvm/InlineAsm.h"
30#include "llvm/Instructions.h"
31#include "llvm/Intrinsics.h"
32#include "llvm/IntrinsicInst.h"
33#include "llvm/LLVMContext.h"
34#include "llvm/Module.h"
35#include "llvm/CodeGen/Analysis.h"
36#include "llvm/CodeGen/FastISel.h"
37#include "llvm/CodeGen/FunctionLoweringInfo.h"
38#include "llvm/CodeGen/GCStrategy.h"
39#include "llvm/CodeGen/GCMetadata.h"
40#include "llvm/CodeGen/MachineFunction.h"
41#include "llvm/CodeGen/MachineFrameInfo.h"
42#include "llvm/CodeGen/MachineInstrBuilder.h"
43#include "llvm/CodeGen/MachineJumpTableInfo.h"
44#include "llvm/CodeGen/MachineModuleInfo.h"
45#include "llvm/CodeGen/MachineRegisterInfo.h"
46#include "llvm/CodeGen/SelectionDAG.h"
47#include "llvm/Target/TargetData.h"
48#include "llvm/Target/TargetFrameLowering.h"
49#include "llvm/Target/TargetInstrInfo.h"
50#include "llvm/Target/TargetIntrinsicInfo.h"
51#include "llvm/Target/TargetLibraryInfo.h"
52#include "llvm/Target/TargetLowering.h"
53#include "llvm/Target/TargetOptions.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/IntegersSubsetMapping.h"
56#include "llvm/Support/Debug.h"
57#include "llvm/Support/ErrorHandling.h"
58#include "llvm/Support/MathExtras.h"
59#include "llvm/Support/raw_ostream.h"
60#include <algorithm>
61using namespace llvm;
62
63/// LimitFloatPrecision - Generate low-precision inline sequences for
64/// some float libcalls (6, 8 or 12 bits).
65static unsigned LimitFloatPrecision;
66
67static cl::opt<unsigned, true>
68LimitFPPrecision("limit-float-precision",
69                 cl::desc("Generate low-precision inline sequences "
70                          "for some float libcalls"),
71                 cl::location(LimitFloatPrecision),
72                 cl::init(0));
73
74// Limit the width of DAG chains. This is important in general to prevent
75// prevent DAG-based analysis from blowing up. For example, alias analysis and
76// load clustering may not complete in reasonable time. It is difficult to
77// recognize and avoid this situation within each individual analysis, and
78// future analyses are likely to have the same behavior. Limiting DAG width is
79// the safe approach, and will be especially important with global DAGs.
80//
81// MaxParallelChains default is arbitrarily high to avoid affecting
82// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
83// sequence over this should have been converted to llvm.memcpy by the
84// frontend. It easy to induce this behavior with .ll code such as:
85// %buffer = alloca [4096 x i8]
86// %data = load [4096 x i8]* %argPtr
87// store [4096 x i8] %data, [4096 x i8]* %buffer
88static const unsigned MaxParallelChains = 64;
89
90static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
91                                      const SDValue *Parts, unsigned NumParts,
92                                      EVT PartVT, EVT ValueVT);
93
94/// getCopyFromParts - Create a value that contains the specified legal parts
95/// combined into the value they represent.  If the parts combine to a type
96/// larger then ValueVT then AssertOp can be used to specify whether the extra
97/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
98/// (ISD::AssertSext).
99static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
100                                const SDValue *Parts,
101                                unsigned NumParts, EVT PartVT, EVT ValueVT,
102                                ISD::NodeType AssertOp = ISD::DELETED_NODE) {
103  if (ValueVT.isVector())
104    return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
105
106  assert(NumParts > 0 && "No parts to assemble!");
107  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
108  SDValue Val = Parts[0];
109
110  if (NumParts > 1) {
111    // Assemble the value from multiple parts.
112    if (ValueVT.isInteger()) {
113      unsigned PartBits = PartVT.getSizeInBits();
114      unsigned ValueBits = ValueVT.getSizeInBits();
115
116      // Assemble the power of 2 part.
117      unsigned RoundParts = NumParts & (NumParts - 1) ?
118        1 << Log2_32(NumParts) : NumParts;
119      unsigned RoundBits = PartBits * RoundParts;
120      EVT RoundVT = RoundBits == ValueBits ?
121        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
122      SDValue Lo, Hi;
123
124      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
125
126      if (RoundParts > 2) {
127        Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
128                              PartVT, HalfVT);
129        Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
130                              RoundParts / 2, PartVT, HalfVT);
131      } else {
132        Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
133        Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
134      }
135
136      if (TLI.isBigEndian())
137        std::swap(Lo, Hi);
138
139      Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
140
141      if (RoundParts < NumParts) {
142        // Assemble the trailing non-power-of-2 part.
143        unsigned OddParts = NumParts - RoundParts;
144        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
145        Hi = getCopyFromParts(DAG, DL,
146                              Parts + RoundParts, OddParts, PartVT, OddVT);
147
148        // Combine the round and odd parts.
149        Lo = Val;
150        if (TLI.isBigEndian())
151          std::swap(Lo, Hi);
152        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
153        Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
154        Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
155                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
156                                         TLI.getPointerTy()));
157        Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
158        Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
159      }
160    } else if (PartVT.isFloatingPoint()) {
161      // FP split into multiple FP parts (for ppcf128)
162      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
163             "Unexpected split");
164      SDValue Lo, Hi;
165      Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
166      Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
167      if (TLI.isBigEndian())
168        std::swap(Lo, Hi);
169      Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
170    } else {
171      // FP split into integer parts (soft fp)
172      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
173             !PartVT.isVector() && "Unexpected split");
174      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
175      Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT);
176    }
177  }
178
179  // There is now one part, held in Val.  Correct it to match ValueVT.
180  PartVT = Val.getValueType();
181
182  if (PartVT == ValueVT)
183    return Val;
184
185  if (PartVT.isInteger() && ValueVT.isInteger()) {
186    if (ValueVT.bitsLT(PartVT)) {
187      // For a truncate, see if we have any information to
188      // indicate whether the truncated bits will always be
189      // zero or sign-extension.
190      if (AssertOp != ISD::DELETED_NODE)
191        Val = DAG.getNode(AssertOp, DL, PartVT, Val,
192                          DAG.getValueType(ValueVT));
193      return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
194    }
195    return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
196  }
197
198  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
199    // FP_ROUND's are always exact here.
200    if (ValueVT.bitsLT(Val.getValueType()))
201      return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val,
202                         DAG.getTargetConstant(1, TLI.getPointerTy()));
203
204    return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
205  }
206
207  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
208    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
209
210  llvm_unreachable("Unknown mismatch!");
211}
212
213/// getCopyFromParts - Create a value that contains the specified legal parts
214/// combined into the value they represent.  If the parts combine to a type
215/// larger then ValueVT then AssertOp can be used to specify whether the extra
216/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
217/// (ISD::AssertSext).
218static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
219                                      const SDValue *Parts, unsigned NumParts,
220                                      EVT PartVT, EVT ValueVT) {
221  assert(ValueVT.isVector() && "Not a vector value");
222  assert(NumParts > 0 && "No parts to assemble!");
223  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
224  SDValue Val = Parts[0];
225
226  // Handle a multi-element vector.
227  if (NumParts > 1) {
228    EVT IntermediateVT, RegisterVT;
229    unsigned NumIntermediates;
230    unsigned NumRegs =
231    TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
232                               NumIntermediates, RegisterVT);
233    assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
234    NumParts = NumRegs; // Silence a compiler warning.
235    assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
236    assert(RegisterVT == Parts[0].getValueType() &&
237           "Part type doesn't match part!");
238
239    // Assemble the parts into intermediate operands.
240    SmallVector<SDValue, 8> Ops(NumIntermediates);
241    if (NumIntermediates == NumParts) {
242      // If the register was not expanded, truncate or copy the value,
243      // as appropriate.
244      for (unsigned i = 0; i != NumParts; ++i)
245        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
246                                  PartVT, IntermediateVT);
247    } else if (NumParts > 0) {
248      // If the intermediate type was expanded, build the intermediate
249      // operands from the parts.
250      assert(NumParts % NumIntermediates == 0 &&
251             "Must expand into a divisible number of parts!");
252      unsigned Factor = NumParts / NumIntermediates;
253      for (unsigned i = 0; i != NumIntermediates; ++i)
254        Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
255                                  PartVT, IntermediateVT);
256    }
257
258    // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
259    // intermediate operands.
260    Val = DAG.getNode(IntermediateVT.isVector() ?
261                      ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
262                      ValueVT, &Ops[0], NumIntermediates);
263  }
264
265  // There is now one part, held in Val.  Correct it to match ValueVT.
266  PartVT = Val.getValueType();
267
268  if (PartVT == ValueVT)
269    return Val;
270
271  if (PartVT.isVector()) {
272    // If the element type of the source/dest vectors are the same, but the
273    // parts vector has more elements than the value vector, then we have a
274    // vector widening case (e.g. <2 x float> -> <4 x float>).  Extract the
275    // elements we want.
276    if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
277      assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
278             "Cannot narrow, it would be a lossy transformation");
279      return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
280                         DAG.getIntPtrConstant(0));
281    }
282
283    // Vector/Vector bitcast.
284    if (ValueVT.getSizeInBits() == PartVT.getSizeInBits())
285      return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
286
287    assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
288      "Cannot handle this kind of promotion");
289    // Promoted vector extract
290    bool Smaller = ValueVT.bitsLE(PartVT);
291    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
292                       DL, ValueVT, Val);
293
294  }
295
296  // Trivial bitcast if the types are the same size and the destination
297  // vector type is legal.
298  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() &&
299      TLI.isTypeLegal(ValueVT))
300    return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
301
302  // Handle cases such as i8 -> <1 x i1>
303  assert(ValueVT.getVectorNumElements() == 1 &&
304         "Only trivial scalar-to-vector conversions should get here!");
305
306  if (ValueVT.getVectorNumElements() == 1 &&
307      ValueVT.getVectorElementType() != PartVT) {
308    bool Smaller = ValueVT.bitsLE(PartVT);
309    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
310                       DL, ValueVT.getScalarType(), Val);
311  }
312
313  return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
314}
315
316
317
318
319static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
320                                 SDValue Val, SDValue *Parts, unsigned NumParts,
321                                 EVT PartVT);
322
323/// getCopyToParts - Create a series of nodes that contain the specified value
324/// split into legal parts.  If the parts contain more bits than Val, then, for
325/// integers, ExtendKind can be used to specify how to generate the extra bits.
326static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
327                           SDValue Val, SDValue *Parts, unsigned NumParts,
328                           EVT PartVT,
329                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
330  EVT ValueVT = Val.getValueType();
331
332  // Handle the vector case separately.
333  if (ValueVT.isVector())
334    return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
335
336  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
337  unsigned PartBits = PartVT.getSizeInBits();
338  unsigned OrigNumParts = NumParts;
339  assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
340
341  if (NumParts == 0)
342    return;
343
344  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
345  if (PartVT == ValueVT) {
346    assert(NumParts == 1 && "No-op copy with multiple parts!");
347    Parts[0] = Val;
348    return;
349  }
350
351  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
352    // If the parts cover more bits than the value has, promote the value.
353    if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
354      assert(NumParts == 1 && "Do not know what to promote to!");
355      Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
356    } else {
357      assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
358             ValueVT.isInteger() &&
359             "Unknown mismatch!");
360      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
361      Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
362      if (PartVT == MVT::x86mmx)
363        Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
364    }
365  } else if (PartBits == ValueVT.getSizeInBits()) {
366    // Different types of the same size.
367    assert(NumParts == 1 && PartVT != ValueVT);
368    Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
369  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
370    // If the parts cover less bits than value has, truncate the value.
371    assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
372           ValueVT.isInteger() &&
373           "Unknown mismatch!");
374    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
375    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
376    if (PartVT == MVT::x86mmx)
377      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
378  }
379
380  // The value may have changed - recompute ValueVT.
381  ValueVT = Val.getValueType();
382  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
383         "Failed to tile the value with PartVT!");
384
385  if (NumParts == 1) {
386    assert(PartVT == ValueVT && "Type conversion failed!");
387    Parts[0] = Val;
388    return;
389  }
390
391  // Expand the value into multiple parts.
392  if (NumParts & (NumParts - 1)) {
393    // The number of parts is not a power of 2.  Split off and copy the tail.
394    assert(PartVT.isInteger() && ValueVT.isInteger() &&
395           "Do not know what to expand to!");
396    unsigned RoundParts = 1 << Log2_32(NumParts);
397    unsigned RoundBits = RoundParts * PartBits;
398    unsigned OddParts = NumParts - RoundParts;
399    SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
400                                 DAG.getIntPtrConstant(RoundBits));
401    getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT);
402
403    if (TLI.isBigEndian())
404      // The odd parts were reversed by getCopyToParts - unreverse them.
405      std::reverse(Parts + RoundParts, Parts + NumParts);
406
407    NumParts = RoundParts;
408    ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
409    Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
410  }
411
412  // The number of parts is a power of 2.  Repeatedly bisect the value using
413  // EXTRACT_ELEMENT.
414  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
415                         EVT::getIntegerVT(*DAG.getContext(),
416                                           ValueVT.getSizeInBits()),
417                         Val);
418
419  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
420    for (unsigned i = 0; i < NumParts; i += StepSize) {
421      unsigned ThisBits = StepSize * PartBits / 2;
422      EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
423      SDValue &Part0 = Parts[i];
424      SDValue &Part1 = Parts[i+StepSize/2];
425
426      Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
427                          ThisVT, Part0, DAG.getIntPtrConstant(1));
428      Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
429                          ThisVT, Part0, DAG.getIntPtrConstant(0));
430
431      if (ThisBits == PartBits && ThisVT != PartVT) {
432        Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
433        Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
434      }
435    }
436  }
437
438  if (TLI.isBigEndian())
439    std::reverse(Parts, Parts + OrigNumParts);
440}
441
442
443/// getCopyToPartsVector - Create a series of nodes that contain the specified
444/// value split into legal parts.
445static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
446                                 SDValue Val, SDValue *Parts, unsigned NumParts,
447                                 EVT PartVT) {
448  EVT ValueVT = Val.getValueType();
449  assert(ValueVT.isVector() && "Not a vector");
450  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
451
452  if (NumParts == 1) {
453    if (PartVT == ValueVT) {
454      // Nothing to do.
455    } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
456      // Bitconvert vector->vector case.
457      Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
458    } else if (PartVT.isVector() &&
459               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
460               PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
461      EVT ElementVT = PartVT.getVectorElementType();
462      // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
463      // undef elements.
464      SmallVector<SDValue, 16> Ops;
465      for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
466        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
467                                  ElementVT, Val, DAG.getIntPtrConstant(i)));
468
469      for (unsigned i = ValueVT.getVectorNumElements(),
470           e = PartVT.getVectorNumElements(); i != e; ++i)
471        Ops.push_back(DAG.getUNDEF(ElementVT));
472
473      Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
474
475      // FIXME: Use CONCAT for 2x -> 4x.
476
477      //SDValue UndefElts = DAG.getUNDEF(VectorTy);
478      //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
479    } else if (PartVT.isVector() &&
480               PartVT.getVectorElementType().bitsGE(
481                 ValueVT.getVectorElementType()) &&
482               PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
483
484      // Promoted vector extract
485      bool Smaller = PartVT.bitsLE(ValueVT);
486      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
487                        DL, PartVT, Val);
488    } else{
489      // Vector -> scalar conversion.
490      assert(ValueVT.getVectorNumElements() == 1 &&
491             "Only trivial vector-to-scalar conversions should get here!");
492      Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
493                        PartVT, Val, DAG.getIntPtrConstant(0));
494
495      bool Smaller = ValueVT.bitsLE(PartVT);
496      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
497                         DL, PartVT, Val);
498    }
499
500    Parts[0] = Val;
501    return;
502  }
503
504  // Handle a multi-element vector.
505  EVT IntermediateVT, RegisterVT;
506  unsigned NumIntermediates;
507  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
508                                                IntermediateVT,
509                                                NumIntermediates, RegisterVT);
510  unsigned NumElements = ValueVT.getVectorNumElements();
511
512  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
513  NumParts = NumRegs; // Silence a compiler warning.
514  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
515
516  // Split the vector into intermediate operands.
517  SmallVector<SDValue, 8> Ops(NumIntermediates);
518  for (unsigned i = 0; i != NumIntermediates; ++i) {
519    if (IntermediateVT.isVector())
520      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
521                           IntermediateVT, Val,
522                   DAG.getIntPtrConstant(i * (NumElements / NumIntermediates)));
523    else
524      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
525                           IntermediateVT, Val, DAG.getIntPtrConstant(i));
526  }
527
528  // Split the intermediate operands into legal parts.
529  if (NumParts == NumIntermediates) {
530    // If the register was not expanded, promote or copy the value,
531    // as appropriate.
532    for (unsigned i = 0; i != NumParts; ++i)
533      getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT);
534  } else if (NumParts > 0) {
535    // If the intermediate type was expanded, split each the value into
536    // legal parts.
537    assert(NumParts % NumIntermediates == 0 &&
538           "Must expand into a divisible number of parts!");
539    unsigned Factor = NumParts / NumIntermediates;
540    for (unsigned i = 0; i != NumIntermediates; ++i)
541      getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT);
542  }
543}
544
545
546
547
548namespace {
549  /// RegsForValue - This struct represents the registers (physical or virtual)
550  /// that a particular set of values is assigned, and the type information
551  /// about the value. The most common situation is to represent one value at a
552  /// time, but struct or array values are handled element-wise as multiple
553  /// values.  The splitting of aggregates is performed recursively, so that we
554  /// never have aggregate-typed registers. The values at this point do not
555  /// necessarily have legal types, so each value may require one or more
556  /// registers of some legal type.
557  ///
558  struct RegsForValue {
559    /// ValueVTs - The value types of the values, which may not be legal, and
560    /// may need be promoted or synthesized from one or more registers.
561    ///
562    SmallVector<EVT, 4> ValueVTs;
563
564    /// RegVTs - The value types of the registers. This is the same size as
565    /// ValueVTs and it records, for each value, what the type of the assigned
566    /// register or registers are. (Individual values are never synthesized
567    /// from more than one type of register.)
568    ///
569    /// With virtual registers, the contents of RegVTs is redundant with TLI's
570    /// getRegisterType member function, however when with physical registers
571    /// it is necessary to have a separate record of the types.
572    ///
573    SmallVector<EVT, 4> RegVTs;
574
575    /// Regs - This list holds the registers assigned to the values.
576    /// Each legal or promoted value requires one register, and each
577    /// expanded value requires multiple registers.
578    ///
579    SmallVector<unsigned, 4> Regs;
580
581    RegsForValue() {}
582
583    RegsForValue(const SmallVector<unsigned, 4> &regs,
584                 EVT regvt, EVT valuevt)
585      : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
586
587    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
588                 unsigned Reg, Type *Ty) {
589      ComputeValueVTs(tli, Ty, ValueVTs);
590
591      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
592        EVT ValueVT = ValueVTs[Value];
593        unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
594        EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
595        for (unsigned i = 0; i != NumRegs; ++i)
596          Regs.push_back(Reg + i);
597        RegVTs.push_back(RegisterVT);
598        Reg += NumRegs;
599      }
600    }
601
602    /// areValueTypesLegal - Return true if types of all the values are legal.
603    bool areValueTypesLegal(const TargetLowering &TLI) {
604      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
605        EVT RegisterVT = RegVTs[Value];
606        if (!TLI.isTypeLegal(RegisterVT))
607          return false;
608      }
609      return true;
610    }
611
612    /// append - Add the specified values to this one.
613    void append(const RegsForValue &RHS) {
614      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
615      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
616      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
617    }
618
619    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
620    /// this value and returns the result as a ValueVTs value.  This uses
621    /// Chain/Flag as the input and updates them for the output Chain/Flag.
622    /// If the Flag pointer is NULL, no flag is used.
623    SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
624                            DebugLoc dl,
625                            SDValue &Chain, SDValue *Flag) const;
626
627    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
628    /// specified value into the registers specified by this object.  This uses
629    /// Chain/Flag as the input and updates them for the output Chain/Flag.
630    /// If the Flag pointer is NULL, no flag is used.
631    void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
632                       SDValue &Chain, SDValue *Flag) const;
633
634    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
635    /// operand list.  This adds the code marker, matching input operand index
636    /// (if applicable), and includes the number of values added into it.
637    void AddInlineAsmOperands(unsigned Kind,
638                              bool HasMatching, unsigned MatchingIdx,
639                              SelectionDAG &DAG,
640                              std::vector<SDValue> &Ops) const;
641  };
642}
643
644/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
645/// this value and returns the result as a ValueVT value.  This uses
646/// Chain/Flag as the input and updates them for the output Chain/Flag.
647/// If the Flag pointer is NULL, no flag is used.
648SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
649                                      FunctionLoweringInfo &FuncInfo,
650                                      DebugLoc dl,
651                                      SDValue &Chain, SDValue *Flag) const {
652  // A Value with type {} or [0 x %t] needs no registers.
653  if (ValueVTs.empty())
654    return SDValue();
655
656  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
657
658  // Assemble the legal parts into the final values.
659  SmallVector<SDValue, 4> Values(ValueVTs.size());
660  SmallVector<SDValue, 8> Parts;
661  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
662    // Copy the legal parts from the registers.
663    EVT ValueVT = ValueVTs[Value];
664    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
665    EVT RegisterVT = RegVTs[Value];
666
667    Parts.resize(NumRegs);
668    for (unsigned i = 0; i != NumRegs; ++i) {
669      SDValue P;
670      if (Flag == 0) {
671        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
672      } else {
673        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
674        *Flag = P.getValue(2);
675      }
676
677      Chain = P.getValue(1);
678      Parts[i] = P;
679
680      // If the source register was virtual and if we know something about it,
681      // add an assert node.
682      if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
683          !RegisterVT.isInteger() || RegisterVT.isVector())
684        continue;
685
686      const FunctionLoweringInfo::LiveOutInfo *LOI =
687        FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
688      if (!LOI)
689        continue;
690
691      unsigned RegSize = RegisterVT.getSizeInBits();
692      unsigned NumSignBits = LOI->NumSignBits;
693      unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes();
694
695      // FIXME: We capture more information than the dag can represent.  For
696      // now, just use the tightest assertzext/assertsext possible.
697      bool isSExt = true;
698      EVT FromVT(MVT::Other);
699      if (NumSignBits == RegSize)
700        isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
701      else if (NumZeroBits >= RegSize-1)
702        isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
703      else if (NumSignBits > RegSize-8)
704        isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
705      else if (NumZeroBits >= RegSize-8)
706        isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
707      else if (NumSignBits > RegSize-16)
708        isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
709      else if (NumZeroBits >= RegSize-16)
710        isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
711      else if (NumSignBits > RegSize-32)
712        isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
713      else if (NumZeroBits >= RegSize-32)
714        isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
715      else
716        continue;
717
718      // Add an assertion node.
719      assert(FromVT != MVT::Other);
720      Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
721                             RegisterVT, P, DAG.getValueType(FromVT));
722    }
723
724    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
725                                     NumRegs, RegisterVT, ValueVT);
726    Part += NumRegs;
727    Parts.clear();
728  }
729
730  return DAG.getNode(ISD::MERGE_VALUES, dl,
731                     DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
732                     &Values[0], ValueVTs.size());
733}
734
735/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
736/// specified value into the registers specified by this object.  This uses
737/// Chain/Flag as the input and updates them for the output Chain/Flag.
738/// If the Flag pointer is NULL, no flag is used.
739void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
740                                 SDValue &Chain, SDValue *Flag) const {
741  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
742
743  // Get the list of the values's legal parts.
744  unsigned NumRegs = Regs.size();
745  SmallVector<SDValue, 8> Parts(NumRegs);
746  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
747    EVT ValueVT = ValueVTs[Value];
748    unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
749    EVT RegisterVT = RegVTs[Value];
750
751    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
752                   &Parts[Part], NumParts, RegisterVT);
753    Part += NumParts;
754  }
755
756  // Copy the parts into the registers.
757  SmallVector<SDValue, 8> Chains(NumRegs);
758  for (unsigned i = 0; i != NumRegs; ++i) {
759    SDValue Part;
760    if (Flag == 0) {
761      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
762    } else {
763      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
764      *Flag = Part.getValue(1);
765    }
766
767    Chains[i] = Part.getValue(0);
768  }
769
770  if (NumRegs == 1 || Flag)
771    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
772    // flagged to it. That is the CopyToReg nodes and the user are considered
773    // a single scheduling unit. If we create a TokenFactor and return it as
774    // chain, then the TokenFactor is both a predecessor (operand) of the
775    // user as well as a successor (the TF operands are flagged to the user).
776    // c1, f1 = CopyToReg
777    // c2, f2 = CopyToReg
778    // c3     = TokenFactor c1, c2
779    // ...
780    //        = op c3, ..., f2
781    Chain = Chains[NumRegs-1];
782  else
783    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
784}
785
786/// AddInlineAsmOperands - Add this value to the specified inlineasm node
787/// operand list.  This adds the code marker and includes the number of
788/// values added into it.
789void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
790                                        unsigned MatchingIdx,
791                                        SelectionDAG &DAG,
792                                        std::vector<SDValue> &Ops) const {
793  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
794
795  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
796  if (HasMatching)
797    Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
798  else if (!Regs.empty() &&
799           TargetRegisterInfo::isVirtualRegister(Regs.front())) {
800    // Put the register class of the virtual registers in the flag word.  That
801    // way, later passes can recompute register class constraints for inline
802    // assembly as well as normal instructions.
803    // Don't do this for tied operands that can use the regclass information
804    // from the def.
805    const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
806    const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
807    Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
808  }
809
810  SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
811  Ops.push_back(Res);
812
813  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
814    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
815    EVT RegisterVT = RegVTs[Value];
816    for (unsigned i = 0; i != NumRegs; ++i) {
817      assert(Reg < Regs.size() && "Mismatch in # registers expected");
818      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
819    }
820  }
821}
822
823void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa,
824                               const TargetLibraryInfo *li) {
825  AA = &aa;
826  GFI = gfi;
827  LibInfo = li;
828  TD = DAG.getTarget().getTargetData();
829  Context = DAG.getContext();
830  LPadToCallSiteMap.clear();
831}
832
833/// clear - Clear out the current SelectionDAG and the associated
834/// state and prepare this SelectionDAGBuilder object to be used
835/// for a new block. This doesn't clear out information about
836/// additional blocks that are needed to complete switch lowering
837/// or PHI node updating; that information is cleared out as it is
838/// consumed.
839void SelectionDAGBuilder::clear() {
840  NodeMap.clear();
841  UnusedArgNodeMap.clear();
842  PendingLoads.clear();
843  PendingExports.clear();
844  CurDebugLoc = DebugLoc();
845  HasTailCall = false;
846}
847
848/// clearDanglingDebugInfo - Clear the dangling debug information
849/// map. This function is separated from the clear so that debug
850/// information that is dangling in a basic block can be properly
851/// resolved in a different basic block. This allows the
852/// SelectionDAG to resolve dangling debug information attached
853/// to PHI nodes.
854void SelectionDAGBuilder::clearDanglingDebugInfo() {
855  DanglingDebugInfoMap.clear();
856}
857
858/// getRoot - Return the current virtual root of the Selection DAG,
859/// flushing any PendingLoad items. This must be done before emitting
860/// a store or any other node that may need to be ordered after any
861/// prior load instructions.
862///
863SDValue SelectionDAGBuilder::getRoot() {
864  if (PendingLoads.empty())
865    return DAG.getRoot();
866
867  if (PendingLoads.size() == 1) {
868    SDValue Root = PendingLoads[0];
869    DAG.setRoot(Root);
870    PendingLoads.clear();
871    return Root;
872  }
873
874  // Otherwise, we have to make a token factor node.
875  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
876                               &PendingLoads[0], PendingLoads.size());
877  PendingLoads.clear();
878  DAG.setRoot(Root);
879  return Root;
880}
881
882/// getControlRoot - Similar to getRoot, but instead of flushing all the
883/// PendingLoad items, flush all the PendingExports items. It is necessary
884/// to do this before emitting a terminator instruction.
885///
886SDValue SelectionDAGBuilder::getControlRoot() {
887  SDValue Root = DAG.getRoot();
888
889  if (PendingExports.empty())
890    return Root;
891
892  // Turn all of the CopyToReg chains into one factored node.
893  if (Root.getOpcode() != ISD::EntryToken) {
894    unsigned i = 0, e = PendingExports.size();
895    for (; i != e; ++i) {
896      assert(PendingExports[i].getNode()->getNumOperands() > 1);
897      if (PendingExports[i].getNode()->getOperand(0) == Root)
898        break;  // Don't add the root if we already indirectly depend on it.
899    }
900
901    if (i == e)
902      PendingExports.push_back(Root);
903  }
904
905  Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
906                     &PendingExports[0],
907                     PendingExports.size());
908  PendingExports.clear();
909  DAG.setRoot(Root);
910  return Root;
911}
912
913void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
914  if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
915  DAG.AssignOrdering(Node, SDNodeOrder);
916
917  for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
918    AssignOrderingToNode(Node->getOperand(I).getNode());
919}
920
921void SelectionDAGBuilder::visit(const Instruction &I) {
922  // Set up outgoing PHI node register values before emitting the terminator.
923  if (isa<TerminatorInst>(&I))
924    HandlePHINodesInSuccessorBlocks(I.getParent());
925
926  CurDebugLoc = I.getDebugLoc();
927
928  visit(I.getOpcode(), I);
929
930  if (!isa<TerminatorInst>(&I) && !HasTailCall)
931    CopyToExportRegsIfNeeded(&I);
932
933  CurDebugLoc = DebugLoc();
934}
935
936void SelectionDAGBuilder::visitPHI(const PHINode &) {
937  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
938}
939
940void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
941  // Note: this doesn't use InstVisitor, because it has to work with
942  // ConstantExpr's in addition to instructions.
943  switch (Opcode) {
944  default: llvm_unreachable("Unknown instruction type encountered!");
945    // Build the switch statement using the Instruction.def file.
946#define HANDLE_INST(NUM, OPCODE, CLASS) \
947    case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
948#include "llvm/Instruction.def"
949  }
950
951  // Assign the ordering to the freshly created DAG nodes.
952  if (NodeMap.count(&I)) {
953    ++SDNodeOrder;
954    AssignOrderingToNode(getValue(&I).getNode());
955  }
956}
957
958// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
959// generate the debug data structures now that we've seen its definition.
960void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
961                                                   SDValue Val) {
962  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
963  if (DDI.getDI()) {
964    const DbgValueInst *DI = DDI.getDI();
965    DebugLoc dl = DDI.getdl();
966    unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
967    MDNode *Variable = DI->getVariable();
968    uint64_t Offset = DI->getOffset();
969    SDDbgValue *SDV;
970    if (Val.getNode()) {
971      if (!EmitFuncArgumentDbgValue(V, Variable, Offset, Val)) {
972        SDV = DAG.getDbgValue(Variable, Val.getNode(),
973                              Val.getResNo(), Offset, dl, DbgSDNodeOrder);
974        DAG.AddDbgValue(SDV, Val.getNode(), false);
975      }
976    } else
977      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
978    DanglingDebugInfoMap[V] = DanglingDebugInfo();
979  }
980}
981
982/// getValue - Return an SDValue for the given Value.
983SDValue SelectionDAGBuilder::getValue(const Value *V) {
984  // If we already have an SDValue for this value, use it. It's important
985  // to do this first, so that we don't create a CopyFromReg if we already
986  // have a regular SDValue.
987  SDValue &N = NodeMap[V];
988  if (N.getNode()) return N;
989
990  // If there's a virtual register allocated and initialized for this
991  // value, use it.
992  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
993  if (It != FuncInfo.ValueMap.end()) {
994    unsigned InReg = It->second;
995    RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
996    SDValue Chain = DAG.getEntryNode();
997    N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
998    resolveDanglingDebugInfo(V, N);
999    return N;
1000  }
1001
1002  // Otherwise create a new SDValue and remember it.
1003  SDValue Val = getValueImpl(V);
1004  NodeMap[V] = Val;
1005  resolveDanglingDebugInfo(V, Val);
1006  return Val;
1007}
1008
1009/// getNonRegisterValue - Return an SDValue for the given Value, but
1010/// don't look in FuncInfo.ValueMap for a virtual register.
1011SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
1012  // If we already have an SDValue for this value, use it.
1013  SDValue &N = NodeMap[V];
1014  if (N.getNode()) return N;
1015
1016  // Otherwise create a new SDValue and remember it.
1017  SDValue Val = getValueImpl(V);
1018  NodeMap[V] = Val;
1019  resolveDanglingDebugInfo(V, Val);
1020  return Val;
1021}
1022
1023/// getValueImpl - Helper function for getValue and getNonRegisterValue.
1024/// Create an SDValue for the given value.
1025SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
1026  if (const Constant *C = dyn_cast<Constant>(V)) {
1027    EVT VT = TLI.getValueType(V->getType(), true);
1028
1029    if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1030      return DAG.getConstant(*CI, VT);
1031
1032    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1033      return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
1034
1035    if (isa<ConstantPointerNull>(C))
1036      return DAG.getConstant(0, TLI.getPointerTy());
1037
1038    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1039      return DAG.getConstantFP(*CFP, VT);
1040
1041    if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1042      return DAG.getUNDEF(VT);
1043
1044    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1045      visit(CE->getOpcode(), *CE);
1046      SDValue N1 = NodeMap[V];
1047      assert(N1.getNode() && "visit didn't populate the NodeMap!");
1048      return N1;
1049    }
1050
1051    if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1052      SmallVector<SDValue, 4> Constants;
1053      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1054           OI != OE; ++OI) {
1055        SDNode *Val = getValue(*OI).getNode();
1056        // If the operand is an empty aggregate, there are no values.
1057        if (!Val) continue;
1058        // Add each leaf value from the operand to the Constants list
1059        // to form a flattened list of all the values.
1060        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1061          Constants.push_back(SDValue(Val, i));
1062      }
1063
1064      return DAG.getMergeValues(&Constants[0], Constants.size(),
1065                                getCurDebugLoc());
1066    }
1067
1068    if (const ConstantDataSequential *CDS =
1069          dyn_cast<ConstantDataSequential>(C)) {
1070      SmallVector<SDValue, 4> Ops;
1071      for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1072        SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1073        // Add each leaf value from the operand to the Constants list
1074        // to form a flattened list of all the values.
1075        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1076          Ops.push_back(SDValue(Val, i));
1077      }
1078
1079      if (isa<ArrayType>(CDS->getType()))
1080        return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc());
1081      return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
1082                                      VT, &Ops[0], Ops.size());
1083    }
1084
1085    if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1086      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1087             "Unknown struct or array constant!");
1088
1089      SmallVector<EVT, 4> ValueVTs;
1090      ComputeValueVTs(TLI, C->getType(), ValueVTs);
1091      unsigned NumElts = ValueVTs.size();
1092      if (NumElts == 0)
1093        return SDValue(); // empty struct
1094      SmallVector<SDValue, 4> Constants(NumElts);
1095      for (unsigned i = 0; i != NumElts; ++i) {
1096        EVT EltVT = ValueVTs[i];
1097        if (isa<UndefValue>(C))
1098          Constants[i] = DAG.getUNDEF(EltVT);
1099        else if (EltVT.isFloatingPoint())
1100          Constants[i] = DAG.getConstantFP(0, EltVT);
1101        else
1102          Constants[i] = DAG.getConstant(0, EltVT);
1103      }
1104
1105      return DAG.getMergeValues(&Constants[0], NumElts,
1106                                getCurDebugLoc());
1107    }
1108
1109    if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1110      return DAG.getBlockAddress(BA, VT);
1111
1112    VectorType *VecTy = cast<VectorType>(V->getType());
1113    unsigned NumElements = VecTy->getNumElements();
1114
1115    // Now that we know the number and type of the elements, get that number of
1116    // elements into the Ops array based on what kind of constant it is.
1117    SmallVector<SDValue, 16> Ops;
1118    if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1119      for (unsigned i = 0; i != NumElements; ++i)
1120        Ops.push_back(getValue(CV->getOperand(i)));
1121    } else {
1122      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1123      EVT EltVT = TLI.getValueType(VecTy->getElementType());
1124
1125      SDValue Op;
1126      if (EltVT.isFloatingPoint())
1127        Op = DAG.getConstantFP(0, EltVT);
1128      else
1129        Op = DAG.getConstant(0, EltVT);
1130      Ops.assign(NumElements, Op);
1131    }
1132
1133    // Create a BUILD_VECTOR node.
1134    return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
1135                                    VT, &Ops[0], Ops.size());
1136  }
1137
1138  // If this is a static alloca, generate it as the frameindex instead of
1139  // computation.
1140  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1141    DenseMap<const AllocaInst*, int>::iterator SI =
1142      FuncInfo.StaticAllocaMap.find(AI);
1143    if (SI != FuncInfo.StaticAllocaMap.end())
1144      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
1145  }
1146
1147  // If this is an instruction which fast-isel has deferred, select it now.
1148  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1149    unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1150    RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
1151    SDValue Chain = DAG.getEntryNode();
1152    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
1153  }
1154
1155  llvm_unreachable("Can't get register for value!");
1156}
1157
1158void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1159  SDValue Chain = getControlRoot();
1160  SmallVector<ISD::OutputArg, 8> Outs;
1161  SmallVector<SDValue, 8> OutVals;
1162
1163  if (!FuncInfo.CanLowerReturn) {
1164    unsigned DemoteReg = FuncInfo.DemoteRegister;
1165    const Function *F = I.getParent()->getParent();
1166
1167    // Emit a store of the return value through the virtual register.
1168    // Leave Outs empty so that LowerReturn won't try to load return
1169    // registers the usual way.
1170    SmallVector<EVT, 1> PtrValueVTs;
1171    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
1172                    PtrValueVTs);
1173
1174    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
1175    SDValue RetOp = getValue(I.getOperand(0));
1176
1177    SmallVector<EVT, 4> ValueVTs;
1178    SmallVector<uint64_t, 4> Offsets;
1179    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1180    unsigned NumValues = ValueVTs.size();
1181
1182    SmallVector<SDValue, 4> Chains(NumValues);
1183    for (unsigned i = 0; i != NumValues; ++i) {
1184      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(),
1185                                RetPtr.getValueType(), RetPtr,
1186                                DAG.getIntPtrConstant(Offsets[i]));
1187      Chains[i] =
1188        DAG.getStore(Chain, getCurDebugLoc(),
1189                     SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1190                     // FIXME: better loc info would be nice.
1191                     Add, MachinePointerInfo(), false, false, 0);
1192    }
1193
1194    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
1195                        MVT::Other, &Chains[0], NumValues);
1196  } else if (I.getNumOperands() != 0) {
1197    SmallVector<EVT, 4> ValueVTs;
1198    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
1199    unsigned NumValues = ValueVTs.size();
1200    if (NumValues) {
1201      SDValue RetOp = getValue(I.getOperand(0));
1202      for (unsigned j = 0, f = NumValues; j != f; ++j) {
1203        EVT VT = ValueVTs[j];
1204
1205        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1206
1207        const Function *F = I.getParent()->getParent();
1208        if (F->paramHasAttr(0, Attribute::SExt))
1209          ExtendKind = ISD::SIGN_EXTEND;
1210        else if (F->paramHasAttr(0, Attribute::ZExt))
1211          ExtendKind = ISD::ZERO_EXTEND;
1212
1213        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1214          VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind);
1215
1216        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
1217        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
1218        SmallVector<SDValue, 4> Parts(NumParts);
1219        getCopyToParts(DAG, getCurDebugLoc(),
1220                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1221                       &Parts[0], NumParts, PartVT, ExtendKind);
1222
1223        // 'inreg' on function refers to return value
1224        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1225        if (F->paramHasAttr(0, Attribute::InReg))
1226          Flags.setInReg();
1227
1228        // Propagate extension type if any
1229        if (ExtendKind == ISD::SIGN_EXTEND)
1230          Flags.setSExt();
1231        else if (ExtendKind == ISD::ZERO_EXTEND)
1232          Flags.setZExt();
1233
1234        for (unsigned i = 0; i < NumParts; ++i) {
1235          Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1236                                        /*isfixed=*/true));
1237          OutVals.push_back(Parts[i]);
1238        }
1239      }
1240    }
1241  }
1242
1243  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1244  CallingConv::ID CallConv =
1245    DAG.getMachineFunction().getFunction()->getCallingConv();
1246  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
1247                          Outs, OutVals, getCurDebugLoc(), DAG);
1248
1249  // Verify that the target's LowerReturn behaved as expected.
1250  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1251         "LowerReturn didn't return a valid chain!");
1252
1253  // Update the DAG with the new chain value resulting from return lowering.
1254  DAG.setRoot(Chain);
1255}
1256
1257/// CopyToExportRegsIfNeeded - If the given value has virtual registers
1258/// created for it, emit nodes to copy the value into the virtual
1259/// registers.
1260void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
1261  // Skip empty types
1262  if (V->getType()->isEmptyTy())
1263    return;
1264
1265  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1266  if (VMI != FuncInfo.ValueMap.end()) {
1267    assert(!V->use_empty() && "Unused value assigned virtual registers!");
1268    CopyValueToVirtualRegister(V, VMI->second);
1269  }
1270}
1271
1272/// ExportFromCurrentBlock - If this condition isn't known to be exported from
1273/// the current basic block, add it to ValueMap now so that we'll get a
1274/// CopyTo/FromReg.
1275void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
1276  // No need to export constants.
1277  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1278
1279  // Already exported?
1280  if (FuncInfo.isExportedInst(V)) return;
1281
1282  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1283  CopyValueToVirtualRegister(V, Reg);
1284}
1285
1286bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
1287                                                     const BasicBlock *FromBB) {
1288  // The operands of the setcc have to be in this block.  We don't know
1289  // how to export them from some other block.
1290  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1291    // Can export from current BB.
1292    if (VI->getParent() == FromBB)
1293      return true;
1294
1295    // Is already exported, noop.
1296    return FuncInfo.isExportedInst(V);
1297  }
1298
1299  // If this is an argument, we can export it if the BB is the entry block or
1300  // if it is already exported.
1301  if (isa<Argument>(V)) {
1302    if (FromBB == &FromBB->getParent()->getEntryBlock())
1303      return true;
1304
1305    // Otherwise, can only export this if it is already exported.
1306    return FuncInfo.isExportedInst(V);
1307  }
1308
1309  // Otherwise, constants can always be exported.
1310  return true;
1311}
1312
1313/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1314uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src,
1315                                            const MachineBasicBlock *Dst) const {
1316  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1317  if (!BPI)
1318    return 0;
1319  const BasicBlock *SrcBB = Src->getBasicBlock();
1320  const BasicBlock *DstBB = Dst->getBasicBlock();
1321  return BPI->getEdgeWeight(SrcBB, DstBB);
1322}
1323
1324void SelectionDAGBuilder::
1325addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst,
1326                       uint32_t Weight /* = 0 */) {
1327  if (!Weight)
1328    Weight = getEdgeWeight(Src, Dst);
1329  Src->addSuccessor(Dst, Weight);
1330}
1331
1332
1333static bool InBlock(const Value *V, const BasicBlock *BB) {
1334  if (const Instruction *I = dyn_cast<Instruction>(V))
1335    return I->getParent() == BB;
1336  return true;
1337}
1338
1339/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1340/// This function emits a branch and is used at the leaves of an OR or an
1341/// AND operator tree.
1342///
1343void
1344SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
1345                                                  MachineBasicBlock *TBB,
1346                                                  MachineBasicBlock *FBB,
1347                                                  MachineBasicBlock *CurBB,
1348                                                  MachineBasicBlock *SwitchBB) {
1349  const BasicBlock *BB = CurBB->getBasicBlock();
1350
1351  // If the leaf of the tree is a comparison, merge the condition into
1352  // the caseblock.
1353  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1354    // The operands of the cmp have to be in this block.  We don't know
1355    // how to export them from some other block.  If this is the first block
1356    // of the sequence, no exporting is needed.
1357    if (CurBB == SwitchBB ||
1358        (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1359         isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1360      ISD::CondCode Condition;
1361      if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1362        Condition = getICmpCondCode(IC->getPredicate());
1363      } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
1364        Condition = getFCmpCondCode(FC->getPredicate());
1365        if (TM.Options.NoNaNsFPMath)
1366          Condition = getFCmpCodeWithoutNaN(Condition);
1367      } else {
1368        Condition = ISD::SETEQ; // silence warning.
1369        llvm_unreachable("Unknown compare instruction");
1370      }
1371
1372      CaseBlock CB(Condition, BOp->getOperand(0),
1373                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
1374      SwitchCases.push_back(CB);
1375      return;
1376    }
1377  }
1378
1379  // Create a CaseBlock record representing this branch.
1380  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
1381               NULL, TBB, FBB, CurBB);
1382  SwitchCases.push_back(CB);
1383}
1384
1385/// FindMergedConditions - If Cond is an expression like
1386void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
1387                                               MachineBasicBlock *TBB,
1388                                               MachineBasicBlock *FBB,
1389                                               MachineBasicBlock *CurBB,
1390                                               MachineBasicBlock *SwitchBB,
1391                                               unsigned Opc) {
1392  // If this node is not part of the or/and tree, emit it as a branch.
1393  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1394  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1395      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
1396      BOp->getParent() != CurBB->getBasicBlock() ||
1397      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1398      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1399    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
1400    return;
1401  }
1402
1403  //  Create TmpBB after CurBB.
1404  MachineFunction::iterator BBI = CurBB;
1405  MachineFunction &MF = DAG.getMachineFunction();
1406  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
1407  CurBB->getParent()->insert(++BBI, TmpBB);
1408
1409  if (Opc == Instruction::Or) {
1410    // Codegen X | Y as:
1411    //   jmp_if_X TBB
1412    //   jmp TmpBB
1413    // TmpBB:
1414    //   jmp_if_Y TBB
1415    //   jmp FBB
1416    //
1417
1418    // Emit the LHS condition.
1419    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
1420
1421    // Emit the RHS condition into TmpBB.
1422    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
1423  } else {
1424    assert(Opc == Instruction::And && "Unknown merge op!");
1425    // Codegen X & Y as:
1426    //   jmp_if_X TmpBB
1427    //   jmp FBB
1428    // TmpBB:
1429    //   jmp_if_Y TBB
1430    //   jmp FBB
1431    //
1432    //  This requires creation of TmpBB after CurBB.
1433
1434    // Emit the LHS condition.
1435    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
1436
1437    // Emit the RHS condition into TmpBB.
1438    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
1439  }
1440}
1441
1442/// If the set of cases should be emitted as a series of branches, return true.
1443/// If we should emit this as a bunch of and/or'd together conditions, return
1444/// false.
1445bool
1446SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
1447  if (Cases.size() != 2) return true;
1448
1449  // If this is two comparisons of the same values or'd or and'd together, they
1450  // will get folded into a single comparison, so don't emit two blocks.
1451  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1452       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1453      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1454       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1455    return false;
1456  }
1457
1458  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1459  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1460  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1461      Cases[0].CC == Cases[1].CC &&
1462      isa<Constant>(Cases[0].CmpRHS) &&
1463      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1464    if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1465      return false;
1466    if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1467      return false;
1468  }
1469
1470  return true;
1471}
1472
1473void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1474  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1475
1476  // Update machine-CFG edges.
1477  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1478
1479  // Figure out which block is immediately after the current one.
1480  MachineBasicBlock *NextBlock = 0;
1481  MachineFunction::iterator BBI = BrMBB;
1482  if (++BBI != FuncInfo.MF->end())
1483    NextBlock = BBI;
1484
1485  if (I.isUnconditional()) {
1486    // Update machine-CFG edges.
1487    BrMBB->addSuccessor(Succ0MBB);
1488
1489    // If this is not a fall-through branch, emit the branch.
1490    if (Succ0MBB != NextBlock)
1491      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
1492                              MVT::Other, getControlRoot(),
1493                              DAG.getBasicBlock(Succ0MBB)));
1494
1495    return;
1496  }
1497
1498  // If this condition is one of the special cases we handle, do special stuff
1499  // now.
1500  const Value *CondVal = I.getCondition();
1501  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1502
1503  // If this is a series of conditions that are or'd or and'd together, emit
1504  // this as a sequence of branches instead of setcc's with and/or operations.
1505  // As long as jumps are not expensive, this should improve performance.
1506  // For example, instead of something like:
1507  //     cmp A, B
1508  //     C = seteq
1509  //     cmp D, E
1510  //     F = setle
1511  //     or C, F
1512  //     jnz foo
1513  // Emit:
1514  //     cmp A, B
1515  //     je foo
1516  //     cmp D, E
1517  //     jle foo
1518  //
1519  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1520    if (!TLI.isJumpExpensive() &&
1521        BOp->hasOneUse() &&
1522        (BOp->getOpcode() == Instruction::And ||
1523         BOp->getOpcode() == Instruction::Or)) {
1524      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1525                           BOp->getOpcode());
1526      // If the compares in later blocks need to use values not currently
1527      // exported from this block, export them now.  This block should always
1528      // be the first entry.
1529      assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1530
1531      // Allow some cases to be rejected.
1532      if (ShouldEmitAsBranches(SwitchCases)) {
1533        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
1534          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1535          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1536        }
1537
1538        // Emit the branch for this block.
1539        visitSwitchCase(SwitchCases[0], BrMBB);
1540        SwitchCases.erase(SwitchCases.begin());
1541        return;
1542      }
1543
1544      // Okay, we decided not to do this, remove any inserted MBB's and clear
1545      // SwitchCases.
1546      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
1547        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1548
1549      SwitchCases.clear();
1550    }
1551  }
1552
1553  // Create a CaseBlock record representing this branch.
1554  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1555               NULL, Succ0MBB, Succ1MBB, BrMBB);
1556
1557  // Use visitSwitchCase to actually insert the fast branch sequence for this
1558  // cond branch.
1559  visitSwitchCase(CB, BrMBB);
1560}
1561
1562/// visitSwitchCase - Emits the necessary code to represent a single node in
1563/// the binary search tree resulting from lowering a switch instruction.
1564void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
1565                                          MachineBasicBlock *SwitchBB) {
1566  SDValue Cond;
1567  SDValue CondLHS = getValue(CB.CmpLHS);
1568  DebugLoc dl = getCurDebugLoc();
1569
1570  // Build the setcc now.
1571  if (CB.CmpMHS == NULL) {
1572    // Fold "(X == true)" to X and "(X == false)" to !X to
1573    // handle common cases produced by branch lowering.
1574    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1575        CB.CC == ISD::SETEQ)
1576      Cond = CondLHS;
1577    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1578             CB.CC == ISD::SETEQ) {
1579      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
1580      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1581    } else
1582      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1583  } else {
1584    assert(CB.CC == ISD::SETCC_INVALID &&
1585           "Condition is undefined for to-the-range belonging check.");
1586
1587    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1588    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
1589
1590    SDValue CmpOp = getValue(CB.CmpMHS);
1591    EVT VT = CmpOp.getValueType();
1592
1593    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(false)) {
1594      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
1595                          ISD::SETULE);
1596    } else {
1597      SDValue SUB = DAG.getNode(ISD::SUB, dl,
1598                                VT, CmpOp, DAG.getConstant(Low, VT));
1599      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
1600                          DAG.getConstant(High-Low, VT), ISD::SETULE);
1601    }
1602  }
1603
1604  // Update successor info
1605  addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight);
1606  // TrueBB and FalseBB are always different unless the incoming IR is
1607  // degenerate. This only happens when running llc on weird IR.
1608  if (CB.TrueBB != CB.FalseBB)
1609    addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight);
1610
1611  // Set NextBlock to be the MBB immediately after the current one, if any.
1612  // This is used to avoid emitting unnecessary branches to the next block.
1613  MachineBasicBlock *NextBlock = 0;
1614  MachineFunction::iterator BBI = SwitchBB;
1615  if (++BBI != FuncInfo.MF->end())
1616    NextBlock = BBI;
1617
1618  // If the lhs block is the next block, invert the condition so that we can
1619  // fall through to the lhs instead of the rhs block.
1620  if (CB.TrueBB == NextBlock) {
1621    std::swap(CB.TrueBB, CB.FalseBB);
1622    SDValue True = DAG.getConstant(1, Cond.getValueType());
1623    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
1624  }
1625
1626  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
1627                               MVT::Other, getControlRoot(), Cond,
1628                               DAG.getBasicBlock(CB.TrueBB));
1629
1630  // Insert the false branch. Do this even if it's a fall through branch,
1631  // this makes it easier to do DAG optimizations which require inverting
1632  // the branch condition.
1633  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
1634                       DAG.getBasicBlock(CB.FalseBB));
1635
1636  DAG.setRoot(BrCond);
1637}
1638
1639/// visitJumpTable - Emit JumpTable node in the current MBB
1640void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
1641  // Emit the code for the jump table
1642  assert(JT.Reg != -1U && "Should lower JT Header first!");
1643  EVT PTy = TLI.getPointerTy();
1644  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
1645                                     JT.Reg, PTy);
1646  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
1647  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
1648                                    MVT::Other, Index.getValue(1),
1649                                    Table, Index);
1650  DAG.setRoot(BrJumpTable);
1651}
1652
1653/// visitJumpTableHeader - This function emits necessary code to produce index
1654/// in the JumpTable from switch case.
1655void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
1656                                               JumpTableHeader &JTH,
1657                                               MachineBasicBlock *SwitchBB) {
1658  // Subtract the lowest switch case value from the value being switched on and
1659  // conditional branch to default mbb if the result is greater than the
1660  // difference between smallest and largest cases.
1661  SDValue SwitchOp = getValue(JTH.SValue);
1662  EVT VT = SwitchOp.getValueType();
1663  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1664                            DAG.getConstant(JTH.First, VT));
1665
1666  // The SDNode we just created, which holds the value being switched on minus
1667  // the smallest case value, needs to be copied to a virtual register so it
1668  // can be used as an index into the jump table in a subsequent basic block.
1669  // This value may be smaller or larger than the target's pointer type, and
1670  // therefore require extension or truncating.
1671  SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
1672
1673  unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
1674  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1675                                    JumpTableReg, SwitchOp);
1676  JT.Reg = JumpTableReg;
1677
1678  // Emit the range check for the jump table, and branch to the default block
1679  // for the switch statement if the value being switched on exceeds the largest
1680  // case in the switch.
1681  SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
1682                             TLI.getSetCCResultType(Sub.getValueType()), Sub,
1683                             DAG.getConstant(JTH.Last-JTH.First,VT),
1684                             ISD::SETUGT);
1685
1686  // Set NextBlock to be the MBB immediately after the current one, if any.
1687  // This is used to avoid emitting unnecessary branches to the next block.
1688  MachineBasicBlock *NextBlock = 0;
1689  MachineFunction::iterator BBI = SwitchBB;
1690
1691  if (++BBI != FuncInfo.MF->end())
1692    NextBlock = BBI;
1693
1694  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1695                               MVT::Other, CopyTo, CMP,
1696                               DAG.getBasicBlock(JT.Default));
1697
1698  if (JT.MBB != NextBlock)
1699    BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
1700                         DAG.getBasicBlock(JT.MBB));
1701
1702  DAG.setRoot(BrCond);
1703}
1704
1705/// visitBitTestHeader - This function emits necessary code to produce value
1706/// suitable for "bit tests"
1707void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
1708                                             MachineBasicBlock *SwitchBB) {
1709  // Subtract the minimum value
1710  SDValue SwitchOp = getValue(B.SValue);
1711  EVT VT = SwitchOp.getValueType();
1712  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1713                            DAG.getConstant(B.First, VT));
1714
1715  // Check range
1716  SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
1717                                  TLI.getSetCCResultType(Sub.getValueType()),
1718                                  Sub, DAG.getConstant(B.Range, VT),
1719                                  ISD::SETUGT);
1720
1721  // Determine the type of the test operands.
1722  bool UsePtrType = false;
1723  if (!TLI.isTypeLegal(VT))
1724    UsePtrType = true;
1725  else {
1726    for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
1727      if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
1728        // Switch table case range are encoded into series of masks.
1729        // Just use pointer type, it's guaranteed to fit.
1730        UsePtrType = true;
1731        break;
1732      }
1733  }
1734  if (UsePtrType) {
1735    VT = TLI.getPointerTy();
1736    Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
1737  }
1738
1739  B.RegVT = VT;
1740  B.Reg = FuncInfo.CreateReg(VT);
1741  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1742                                    B.Reg, Sub);
1743
1744  // Set NextBlock to be the MBB immediately after the current one, if any.
1745  // This is used to avoid emitting unnecessary branches to the next block.
1746  MachineBasicBlock *NextBlock = 0;
1747  MachineFunction::iterator BBI = SwitchBB;
1748  if (++BBI != FuncInfo.MF->end())
1749    NextBlock = BBI;
1750
1751  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
1752
1753  addSuccessorWithWeight(SwitchBB, B.Default);
1754  addSuccessorWithWeight(SwitchBB, MBB);
1755
1756  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1757                                MVT::Other, CopyTo, RangeCmp,
1758                                DAG.getBasicBlock(B.Default));
1759
1760  if (MBB != NextBlock)
1761    BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
1762                          DAG.getBasicBlock(MBB));
1763
1764  DAG.setRoot(BrRange);
1765}
1766
1767/// visitBitTestCase - this function produces one "bit test"
1768void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
1769                                           MachineBasicBlock* NextMBB,
1770                                           uint32_t BranchWeightToNext,
1771                                           unsigned Reg,
1772                                           BitTestCase &B,
1773                                           MachineBasicBlock *SwitchBB) {
1774  EVT VT = BB.RegVT;
1775  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
1776                                       Reg, VT);
1777  SDValue Cmp;
1778  unsigned PopCount = CountPopulation_64(B.Mask);
1779  if (PopCount == 1) {
1780    // Testing for a single bit; just compare the shift count with what it
1781    // would need to be to shift a 1 bit in that position.
1782    Cmp = DAG.getSetCC(getCurDebugLoc(),
1783                       TLI.getSetCCResultType(VT),
1784                       ShiftOp,
1785                       DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
1786                       ISD::SETEQ);
1787  } else if (PopCount == BB.Range) {
1788    // There is only one zero bit in the range, test for it directly.
1789    Cmp = DAG.getSetCC(getCurDebugLoc(),
1790                       TLI.getSetCCResultType(VT),
1791                       ShiftOp,
1792                       DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
1793                       ISD::SETNE);
1794  } else {
1795    // Make desired shift
1796    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
1797                                    DAG.getConstant(1, VT), ShiftOp);
1798
1799    // Emit bit tests and jumps
1800    SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
1801                                VT, SwitchVal, DAG.getConstant(B.Mask, VT));
1802    Cmp = DAG.getSetCC(getCurDebugLoc(),
1803                       TLI.getSetCCResultType(VT),
1804                       AndOp, DAG.getConstant(0, VT),
1805                       ISD::SETNE);
1806  }
1807
1808  // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight.
1809  addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight);
1810  // The branch weight from SwitchBB to NextMBB is BranchWeightToNext.
1811  addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext);
1812
1813  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1814                              MVT::Other, getControlRoot(),
1815                              Cmp, DAG.getBasicBlock(B.TargetBB));
1816
1817  // Set NextBlock to be the MBB immediately after the current one, if any.
1818  // This is used to avoid emitting unnecessary branches to the next block.
1819  MachineBasicBlock *NextBlock = 0;
1820  MachineFunction::iterator BBI = SwitchBB;
1821  if (++BBI != FuncInfo.MF->end())
1822    NextBlock = BBI;
1823
1824  if (NextMBB != NextBlock)
1825    BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
1826                        DAG.getBasicBlock(NextMBB));
1827
1828  DAG.setRoot(BrAnd);
1829}
1830
1831void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
1832  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
1833
1834  // Retrieve successors.
1835  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
1836  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
1837
1838  const Value *Callee(I.getCalledValue());
1839  const Function *Fn = dyn_cast<Function>(Callee);
1840  if (isa<InlineAsm>(Callee))
1841    visitInlineAsm(&I);
1842  else if (Fn && Fn->isIntrinsic()) {
1843    assert(Fn->getIntrinsicID() == Intrinsic::donothing);
1844    // Ignore invokes to @llvm.donothing: jump directly to the next BB.
1845  } else
1846    LowerCallTo(&I, getValue(Callee), false, LandingPad);
1847
1848  // If the value of the invoke is used outside of its defining block, make it
1849  // available as a virtual register.
1850  CopyToExportRegsIfNeeded(&I);
1851
1852  // Update successor info
1853  addSuccessorWithWeight(InvokeMBB, Return);
1854  addSuccessorWithWeight(InvokeMBB, LandingPad);
1855
1856  // Drop into normal successor.
1857  DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
1858                          MVT::Other, getControlRoot(),
1859                          DAG.getBasicBlock(Return)));
1860}
1861
1862void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
1863  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
1864}
1865
1866void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
1867  assert(FuncInfo.MBB->isLandingPad() &&
1868         "Call to landingpad not in landing pad!");
1869
1870  MachineBasicBlock *MBB = FuncInfo.MBB;
1871  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
1872  AddLandingPadInfo(LP, MMI, MBB);
1873
1874  // If there aren't registers to copy the values into (e.g., during SjLj
1875  // exceptions), then don't bother to create these DAG nodes.
1876  if (TLI.getExceptionPointerRegister() == 0 &&
1877      TLI.getExceptionSelectorRegister() == 0)
1878    return;
1879
1880  SmallVector<EVT, 2> ValueVTs;
1881  ComputeValueVTs(TLI, LP.getType(), ValueVTs);
1882
1883  // Insert the EXCEPTIONADDR instruction.
1884  assert(FuncInfo.MBB->isLandingPad() &&
1885         "Call to eh.exception not in landing pad!");
1886  SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
1887  SDValue Ops[2];
1888  Ops[0] = DAG.getRoot();
1889  SDValue Op1 = DAG.getNode(ISD::EXCEPTIONADDR, getCurDebugLoc(), VTs, Ops, 1);
1890  SDValue Chain = Op1.getValue(1);
1891
1892  // Insert the EHSELECTION instruction.
1893  VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
1894  Ops[0] = Op1;
1895  Ops[1] = Chain;
1896  SDValue Op2 = DAG.getNode(ISD::EHSELECTION, getCurDebugLoc(), VTs, Ops, 2);
1897  Chain = Op2.getValue(1);
1898  Op2 = DAG.getSExtOrTrunc(Op2, getCurDebugLoc(), MVT::i32);
1899
1900  Ops[0] = Op1;
1901  Ops[1] = Op2;
1902  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
1903                            DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
1904                            &Ops[0], 2);
1905
1906  std::pair<SDValue, SDValue> RetPair = std::make_pair(Res, Chain);
1907  setValue(&LP, RetPair.first);
1908  DAG.setRoot(RetPair.second);
1909}
1910
1911/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
1912/// small case ranges).
1913bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
1914                                                 CaseRecVector& WorkList,
1915                                                 const Value* SV,
1916                                                 MachineBasicBlock *Default,
1917                                                 MachineBasicBlock *SwitchBB) {
1918  // Size is the number of Cases represented by this range.
1919  size_t Size = CR.Range.second - CR.Range.first;
1920  if (Size > 3)
1921    return false;
1922
1923  // Get the MachineFunction which holds the current MBB.  This is used when
1924  // inserting any additional MBBs necessary to represent the switch.
1925  MachineFunction *CurMF = FuncInfo.MF;
1926
1927  // Figure out which block is immediately after the current one.
1928  MachineBasicBlock *NextBlock = 0;
1929  MachineFunction::iterator BBI = CR.CaseBB;
1930
1931  if (++BBI != FuncInfo.MF->end())
1932    NextBlock = BBI;
1933
1934  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1935  // If any two of the cases has the same destination, and if one value
1936  // is the same as the other, but has one bit unset that the other has set,
1937  // use bit manipulation to do two compares at once.  For example:
1938  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
1939  // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
1940  // TODO: Handle cases where CR.CaseBB != SwitchBB.
1941  if (Size == 2 && CR.CaseBB == SwitchBB) {
1942    Case &Small = *CR.Range.first;
1943    Case &Big = *(CR.Range.second-1);
1944
1945    if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
1946      const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
1947      const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
1948
1949      // Check that there is only one bit different.
1950      if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
1951          (SmallValue | BigValue) == BigValue) {
1952        // Isolate the common bit.
1953        APInt CommonBit = BigValue & ~SmallValue;
1954        assert((SmallValue | CommonBit) == BigValue &&
1955               CommonBit.countPopulation() == 1 && "Not a common bit?");
1956
1957        SDValue CondLHS = getValue(SV);
1958        EVT VT = CondLHS.getValueType();
1959        DebugLoc DL = getCurDebugLoc();
1960
1961        SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
1962                                 DAG.getConstant(CommonBit, VT));
1963        SDValue Cond = DAG.getSetCC(DL, MVT::i1,
1964                                    Or, DAG.getConstant(BigValue, VT),
1965                                    ISD::SETEQ);
1966
1967        // Update successor info.
1968        // Both Small and Big will jump to Small.BB, so we sum up the weights.
1969        addSuccessorWithWeight(SwitchBB, Small.BB,
1970                               Small.ExtraWeight + Big.ExtraWeight);
1971        addSuccessorWithWeight(SwitchBB, Default,
1972          // The default destination is the first successor in IR.
1973          BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0);
1974
1975        // Insert the true branch.
1976        SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
1977                                     getControlRoot(), Cond,
1978                                     DAG.getBasicBlock(Small.BB));
1979
1980        // Insert the false branch.
1981        BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
1982                             DAG.getBasicBlock(Default));
1983
1984        DAG.setRoot(BrCond);
1985        return true;
1986      }
1987    }
1988  }
1989
1990  // Order cases by weight so the most likely case will be checked first.
1991  uint32_t UnhandledWeights = 0;
1992  if (BPI) {
1993    for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) {
1994      uint32_t IWeight = I->ExtraWeight;
1995      UnhandledWeights += IWeight;
1996      for (CaseItr J = CR.Range.first; J < I; ++J) {
1997        uint32_t JWeight = J->ExtraWeight;
1998        if (IWeight > JWeight)
1999          std::swap(*I, *J);
2000      }
2001    }
2002  }
2003  // Rearrange the case blocks so that the last one falls through if possible.
2004  Case &BackCase = *(CR.Range.second-1);
2005  if (Size > 1 &&
2006      NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
2007    // The last case block won't fall through into 'NextBlock' if we emit the
2008    // branches in this order.  See if rearranging a case value would help.
2009    // We start at the bottom as it's the case with the least weight.
2010    for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){
2011      if (I->BB == NextBlock) {
2012        std::swap(*I, BackCase);
2013        break;
2014      }
2015    }
2016  }
2017
2018  // Create a CaseBlock record representing a conditional branch to
2019  // the Case's target mbb if the value being switched on SV is equal
2020  // to C.
2021  MachineBasicBlock *CurBlock = CR.CaseBB;
2022  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2023    MachineBasicBlock *FallThrough;
2024    if (I != E-1) {
2025      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
2026      CurMF->insert(BBI, FallThrough);
2027
2028      // Put SV in a virtual register to make it available from the new blocks.
2029      ExportFromCurrentBlock(SV);
2030    } else {
2031      // If the last case doesn't match, go to the default block.
2032      FallThrough = Default;
2033    }
2034
2035    const Value *RHS, *LHS, *MHS;
2036    ISD::CondCode CC;
2037    if (I->High == I->Low) {
2038      // This is just small small case range :) containing exactly 1 case
2039      CC = ISD::SETEQ;
2040      LHS = SV; RHS = I->High; MHS = NULL;
2041    } else {
2042      CC = ISD::SETCC_INVALID;
2043      LHS = I->Low; MHS = SV; RHS = I->High;
2044    }
2045
2046    // The false weight should be sum of all un-handled cases.
2047    UnhandledWeights -= I->ExtraWeight;
2048    CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough,
2049                 /* me */ CurBlock,
2050                 /* trueweight */ I->ExtraWeight,
2051                 /* falseweight */ UnhandledWeights);
2052
2053    // If emitting the first comparison, just call visitSwitchCase to emit the
2054    // code into the current block.  Otherwise, push the CaseBlock onto the
2055    // vector to be later processed by SDISel, and insert the node's MBB
2056    // before the next MBB.
2057    if (CurBlock == SwitchBB)
2058      visitSwitchCase(CB, SwitchBB);
2059    else
2060      SwitchCases.push_back(CB);
2061
2062    CurBlock = FallThrough;
2063  }
2064
2065  return true;
2066}
2067
2068static inline bool areJTsAllowed(const TargetLowering &TLI) {
2069  return TLI.supportJumpTables() &&
2070          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
2071           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
2072}
2073
2074static APInt ComputeRange(const APInt &First, const APInt &Last) {
2075  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
2076  APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth);
2077  return (LastExt - FirstExt + 1ULL);
2078}
2079
2080/// handleJTSwitchCase - Emit jumptable for current switch case range
2081bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR,
2082                                             CaseRecVector &WorkList,
2083                                             const Value *SV,
2084                                             MachineBasicBlock *Default,
2085                                             MachineBasicBlock *SwitchBB) {
2086  Case& FrontCase = *CR.Range.first;
2087  Case& BackCase  = *(CR.Range.second-1);
2088
2089  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
2090  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
2091
2092  APInt TSize(First.getBitWidth(), 0);
2093  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I)
2094    TSize += I->size();
2095
2096  if (!areJTsAllowed(TLI) || TSize.ult(4))
2097    return false;
2098
2099  APInt Range = ComputeRange(First, Last);
2100  // The density is TSize / Range. Require at least 40%.
2101  // It should not be possible for IntTSize to saturate for sane code, but make
2102  // sure we handle Range saturation correctly.
2103  uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10);
2104  uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10);
2105  if (IntTSize * 10 < IntRange * 4)
2106    return false;
2107
2108  DEBUG(dbgs() << "Lowering jump table\n"
2109               << "First entry: " << First << ". Last entry: " << Last << '\n'
2110               << "Range: " << Range << ". Size: " << TSize << ".\n\n");
2111
2112  // Get the MachineFunction which holds the current MBB.  This is used when
2113  // inserting any additional MBBs necessary to represent the switch.
2114  MachineFunction *CurMF = FuncInfo.MF;
2115
2116  // Figure out which block is immediately after the current one.
2117  MachineFunction::iterator BBI = CR.CaseBB;
2118  ++BBI;
2119
2120  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2121
2122  // Create a new basic block to hold the code for loading the address
2123  // of the jump table, and jumping to it.  Update successor information;
2124  // we will either branch to the default case for the switch, or the jump
2125  // table.
2126  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2127  CurMF->insert(BBI, JumpTableBB);
2128
2129  addSuccessorWithWeight(CR.CaseBB, Default);
2130  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
2131
2132  // Build a vector of destination BBs, corresponding to each target
2133  // of the jump table. If the value of the jump table slot corresponds to
2134  // a case statement, push the case's BB onto the vector, otherwise, push
2135  // the default BB.
2136  std::vector<MachineBasicBlock*> DestBBs;
2137  APInt TEI = First;
2138  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
2139    const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
2140    const APInt &High = cast<ConstantInt>(I->High)->getValue();
2141
2142    if (Low.ule(TEI) && TEI.ule(High)) {
2143      DestBBs.push_back(I->BB);
2144      if (TEI==High)
2145        ++I;
2146    } else {
2147      DestBBs.push_back(Default);
2148    }
2149  }
2150
2151  // Calculate weight for each unique destination in CR.
2152  DenseMap<MachineBasicBlock*, uint32_t> DestWeights;
2153  if (FuncInfo.BPI)
2154    for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
2155      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
2156          DestWeights.find(I->BB);
2157      if (Itr != DestWeights.end())
2158        Itr->second += I->ExtraWeight;
2159      else
2160        DestWeights[I->BB] = I->ExtraWeight;
2161    }
2162
2163  // Update successor info. Add one edge to each unique successor.
2164  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
2165  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
2166         E = DestBBs.end(); I != E; ++I) {
2167    if (!SuccsHandled[(*I)->getNumber()]) {
2168      SuccsHandled[(*I)->getNumber()] = true;
2169      DenseMap<MachineBasicBlock*, uint32_t>::iterator Itr =
2170          DestWeights.find(*I);
2171      addSuccessorWithWeight(JumpTableBB, *I,
2172                             Itr != DestWeights.end() ? Itr->second : 0);
2173    }
2174  }
2175
2176  // Create a jump table index for this jump table.
2177  unsigned JTEncoding = TLI.getJumpTableEncoding();
2178  unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
2179                       ->createJumpTableIndex(DestBBs);
2180
2181  // Set the jump table information so that we can codegen it as a second
2182  // MachineBasicBlock
2183  JumpTable JT(-1U, JTI, JumpTableBB, Default);
2184  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
2185  if (CR.CaseBB == SwitchBB)
2186    visitJumpTableHeader(JT, JTH, SwitchBB);
2187
2188  JTCases.push_back(JumpTableBlock(JTH, JT));
2189  return true;
2190}
2191
2192/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
2193/// 2 subtrees.
2194bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
2195                                                  CaseRecVector& WorkList,
2196                                                  const Value* SV,
2197                                                  MachineBasicBlock *Default,
2198                                                  MachineBasicBlock *SwitchBB) {
2199  // Get the MachineFunction which holds the current MBB.  This is used when
2200  // inserting any additional MBBs necessary to represent the switch.
2201  MachineFunction *CurMF = FuncInfo.MF;
2202
2203  // Figure out which block is immediately after the current one.
2204  MachineFunction::iterator BBI = CR.CaseBB;
2205  ++BBI;
2206
2207  Case& FrontCase = *CR.Range.first;
2208  Case& BackCase  = *(CR.Range.second-1);
2209  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2210
2211  // Size is the number of Cases represented by this range.
2212  unsigned Size = CR.Range.second - CR.Range.first;
2213
2214  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
2215  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
2216  double FMetric = 0;
2217  CaseItr Pivot = CR.Range.first + Size/2;
2218
2219  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
2220  // (heuristically) allow us to emit JumpTable's later.
2221  APInt TSize(First.getBitWidth(), 0);
2222  for (CaseItr I = CR.Range.first, E = CR.Range.second;
2223       I!=E; ++I)
2224    TSize += I->size();
2225
2226  APInt LSize = FrontCase.size();
2227  APInt RSize = TSize-LSize;
2228  DEBUG(dbgs() << "Selecting best pivot: \n"
2229               << "First: " << First << ", Last: " << Last <<'\n'
2230               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
2231  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
2232       J!=E; ++I, ++J) {
2233    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
2234    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
2235    APInt Range = ComputeRange(LEnd, RBegin);
2236    assert((Range - 2ULL).isNonNegative() &&
2237           "Invalid case distance");
2238    // Use volatile double here to avoid excess precision issues on some hosts,
2239    // e.g. that use 80-bit X87 registers.
2240    volatile double LDensity =
2241       (double)LSize.roundToDouble() /
2242                           (LEnd - First + 1ULL).roundToDouble();
2243    volatile double RDensity =
2244      (double)RSize.roundToDouble() /
2245                           (Last - RBegin + 1ULL).roundToDouble();
2246    double Metric = Range.logBase2()*(LDensity+RDensity);
2247    // Should always split in some non-trivial place
2248    DEBUG(dbgs() <<"=>Step\n"
2249                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
2250                 << "LDensity: " << LDensity
2251                 << ", RDensity: " << RDensity << '\n'
2252                 << "Metric: " << Metric << '\n');
2253    if (FMetric < Metric) {
2254      Pivot = J;
2255      FMetric = Metric;
2256      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
2257    }
2258
2259    LSize += J->size();
2260    RSize -= J->size();
2261  }
2262  if (areJTsAllowed(TLI)) {
2263    // If our case is dense we *really* should handle it earlier!
2264    assert((FMetric > 0) && "Should handle dense range earlier!");
2265  } else {
2266    Pivot = CR.Range.first + Size/2;
2267  }
2268
2269  CaseRange LHSR(CR.Range.first, Pivot);
2270  CaseRange RHSR(Pivot, CR.Range.second);
2271  const Constant *C = Pivot->Low;
2272  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
2273
2274  // We know that we branch to the LHS if the Value being switched on is
2275  // less than the Pivot value, C.  We use this to optimize our binary
2276  // tree a bit, by recognizing that if SV is greater than or equal to the
2277  // LHS's Case Value, and that Case Value is exactly one less than the
2278  // Pivot's Value, then we can branch directly to the LHS's Target,
2279  // rather than creating a leaf node for it.
2280  if ((LHSR.second - LHSR.first) == 1 &&
2281      LHSR.first->High == CR.GE &&
2282      cast<ConstantInt>(C)->getValue() ==
2283      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
2284    TrueBB = LHSR.first->BB;
2285  } else {
2286    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2287    CurMF->insert(BBI, TrueBB);
2288    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
2289
2290    // Put SV in a virtual register to make it available from the new blocks.
2291    ExportFromCurrentBlock(SV);
2292  }
2293
2294  // Similar to the optimization above, if the Value being switched on is
2295  // known to be less than the Constant CR.LT, and the current Case Value
2296  // is CR.LT - 1, then we can branch directly to the target block for
2297  // the current Case Value, rather than emitting a RHS leaf node for it.
2298  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
2299      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
2300      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
2301    FalseBB = RHSR.first->BB;
2302  } else {
2303    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2304    CurMF->insert(BBI, FalseBB);
2305    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
2306
2307    // Put SV in a virtual register to make it available from the new blocks.
2308    ExportFromCurrentBlock(SV);
2309  }
2310
2311  // Create a CaseBlock record representing a conditional branch to
2312  // the LHS node if the value being switched on SV is less than C.
2313  // Otherwise, branch to LHS.
2314  CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
2315
2316  if (CR.CaseBB == SwitchBB)
2317    visitSwitchCase(CB, SwitchBB);
2318  else
2319    SwitchCases.push_back(CB);
2320
2321  return true;
2322}
2323
2324/// handleBitTestsSwitchCase - if current case range has few destination and
2325/// range span less, than machine word bitwidth, encode case range into series
2326/// of masks and emit bit tests with these masks.
2327bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
2328                                                   CaseRecVector& WorkList,
2329                                                   const Value* SV,
2330                                                   MachineBasicBlock* Default,
2331                                                   MachineBasicBlock *SwitchBB){
2332  EVT PTy = TLI.getPointerTy();
2333  unsigned IntPtrBits = PTy.getSizeInBits();
2334
2335  Case& FrontCase = *CR.Range.first;
2336  Case& BackCase  = *(CR.Range.second-1);
2337
2338  // Get the MachineFunction which holds the current MBB.  This is used when
2339  // inserting any additional MBBs necessary to represent the switch.
2340  MachineFunction *CurMF = FuncInfo.MF;
2341
2342  // If target does not have legal shift left, do not emit bit tests at all.
2343  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
2344    return false;
2345
2346  size_t numCmps = 0;
2347  for (CaseItr I = CR.Range.first, E = CR.Range.second;
2348       I!=E; ++I) {
2349    // Single case counts one, case range - two.
2350    numCmps += (I->Low == I->High ? 1 : 2);
2351  }
2352
2353  // Count unique destinations
2354  SmallSet<MachineBasicBlock*, 4> Dests;
2355  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
2356    Dests.insert(I->BB);
2357    if (Dests.size() > 3)
2358      // Don't bother the code below, if there are too much unique destinations
2359      return false;
2360  }
2361  DEBUG(dbgs() << "Total number of unique destinations: "
2362        << Dests.size() << '\n'
2363        << "Total number of comparisons: " << numCmps << '\n');
2364
2365  // Compute span of values.
2366  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
2367  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
2368  APInt cmpRange = maxValue - minValue;
2369
2370  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
2371               << "Low bound: " << minValue << '\n'
2372               << "High bound: " << maxValue << '\n');
2373
2374  if (cmpRange.uge(IntPtrBits) ||
2375      (!(Dests.size() == 1 && numCmps >= 3) &&
2376       !(Dests.size() == 2 && numCmps >= 5) &&
2377       !(Dests.size() >= 3 && numCmps >= 6)))
2378    return false;
2379
2380  DEBUG(dbgs() << "Emitting bit tests\n");
2381  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
2382
2383  // Optimize the case where all the case values fit in a
2384  // word without having to subtract minValue. In this case,
2385  // we can optimize away the subtraction.
2386  if (maxValue.ult(IntPtrBits)) {
2387    cmpRange = maxValue;
2388  } else {
2389    lowBound = minValue;
2390  }
2391
2392  CaseBitsVector CasesBits;
2393  unsigned i, count = 0;
2394
2395  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
2396    MachineBasicBlock* Dest = I->BB;
2397    for (i = 0; i < count; ++i)
2398      if (Dest == CasesBits[i].BB)
2399        break;
2400
2401    if (i == count) {
2402      assert((count < 3) && "Too much destinations to test!");
2403      CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/));
2404      count++;
2405    }
2406
2407    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
2408    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
2409
2410    uint64_t lo = (lowValue - lowBound).getZExtValue();
2411    uint64_t hi = (highValue - lowBound).getZExtValue();
2412    CasesBits[i].ExtraWeight += I->ExtraWeight;
2413
2414    for (uint64_t j = lo; j <= hi; j++) {
2415      CasesBits[i].Mask |=  1ULL << j;
2416      CasesBits[i].Bits++;
2417    }
2418
2419  }
2420  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
2421
2422  BitTestInfo BTC;
2423
2424  // Figure out which block is immediately after the current one.
2425  MachineFunction::iterator BBI = CR.CaseBB;
2426  ++BBI;
2427
2428  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2429
2430  DEBUG(dbgs() << "Cases:\n");
2431  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
2432    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
2433                 << ", Bits: " << CasesBits[i].Bits
2434                 << ", BB: " << CasesBits[i].BB << '\n');
2435
2436    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2437    CurMF->insert(BBI, CaseBB);
2438    BTC.push_back(BitTestCase(CasesBits[i].Mask,
2439                              CaseBB,
2440                              CasesBits[i].BB, CasesBits[i].ExtraWeight));
2441
2442    // Put SV in a virtual register to make it available from the new blocks.
2443    ExportFromCurrentBlock(SV);
2444  }
2445
2446  BitTestBlock BTB(lowBound, cmpRange, SV,
2447                   -1U, MVT::Other, (CR.CaseBB == SwitchBB),
2448                   CR.CaseBB, Default, BTC);
2449
2450  if (CR.CaseBB == SwitchBB)
2451    visitBitTestHeader(BTB, SwitchBB);
2452
2453  BitTestCases.push_back(BTB);
2454
2455  return true;
2456}
2457
2458/// Clusterify - Transform simple list of Cases into list of CaseRange's
2459size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
2460                                       const SwitchInst& SI) {
2461
2462  /// Use a shorter form of declaration, and also
2463  /// show the we want to use CRSBuilder as Clusterifier.
2464  typedef IntegersSubsetMapping<MachineBasicBlock> Clusterifier;
2465
2466  Clusterifier TheClusterifier;
2467
2468  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2469  // Start with "simple" cases
2470  for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
2471       i != e; ++i) {
2472    const BasicBlock *SuccBB = i.getCaseSuccessor();
2473    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB];
2474
2475    TheClusterifier.add(i.getCaseValueEx(), SMBB,
2476        BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0);
2477  }
2478
2479  TheClusterifier.optimize();
2480
2481  size_t numCmps = 0;
2482  for (Clusterifier::RangeIterator i = TheClusterifier.begin(),
2483       e = TheClusterifier.end(); i != e; ++i, ++numCmps) {
2484    Clusterifier::Cluster &C = *i;
2485    // Update edge weight for the cluster.
2486    unsigned W = C.first.Weight;
2487
2488    // FIXME: Currently work with ConstantInt based numbers.
2489    // Changing it to APInt based is a pretty heavy for this commit.
2490    Cases.push_back(Case(C.first.getLow().toConstantInt(),
2491                         C.first.getHigh().toConstantInt(), C.second, W));
2492
2493    if (C.first.getLow() != C.first.getHigh())
2494    // A range counts double, since it requires two compares.
2495    ++numCmps;
2496  }
2497
2498  return numCmps;
2499}
2500
2501void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
2502                                           MachineBasicBlock *Last) {
2503  // Update JTCases.
2504  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
2505    if (JTCases[i].first.HeaderBB == First)
2506      JTCases[i].first.HeaderBB = Last;
2507
2508  // Update BitTestCases.
2509  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2510    if (BitTestCases[i].Parent == First)
2511      BitTestCases[i].Parent = Last;
2512}
2513
2514void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
2515  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
2516
2517  // Figure out which block is immediately after the current one.
2518  MachineBasicBlock *NextBlock = 0;
2519  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
2520
2521  // If there is only the default destination, branch to it if it is not the
2522  // next basic block.  Otherwise, just fall through.
2523  if (!SI.getNumCases()) {
2524    // Update machine-CFG edges.
2525
2526    // If this is not a fall-through branch, emit the branch.
2527    SwitchMBB->addSuccessor(Default);
2528    if (Default != NextBlock)
2529      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
2530                              MVT::Other, getControlRoot(),
2531                              DAG.getBasicBlock(Default)));
2532
2533    return;
2534  }
2535
2536  // If there are any non-default case statements, create a vector of Cases
2537  // representing each one, and sort the vector so that we can efficiently
2538  // create a binary search tree from them.
2539  CaseVector Cases;
2540  size_t numCmps = Clusterify(Cases, SI);
2541  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
2542               << ". Total compares: " << numCmps << '\n');
2543  (void)numCmps;
2544
2545  // Get the Value to be switched on and default basic blocks, which will be
2546  // inserted into CaseBlock records, representing basic blocks in the binary
2547  // search tree.
2548  const Value *SV = SI.getCondition();
2549
2550  // Push the initial CaseRec onto the worklist
2551  CaseRecVector WorkList;
2552  WorkList.push_back(CaseRec(SwitchMBB,0,0,
2553                             CaseRange(Cases.begin(),Cases.end())));
2554
2555  while (!WorkList.empty()) {
2556    // Grab a record representing a case range to process off the worklist
2557    CaseRec CR = WorkList.back();
2558    WorkList.pop_back();
2559
2560    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2561      continue;
2562
2563    // If the range has few cases (two or less) emit a series of specific
2564    // tests.
2565    if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
2566      continue;
2567
2568    // If the switch has more than 5 blocks, and at least 40% dense, and the
2569    // target supports indirect branches, then emit a jump table rather than
2570    // lowering the switch to a binary tree of conditional branches.
2571    if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2572      continue;
2573
2574    // Emit binary tree. We need to pick a pivot, and push left and right ranges
2575    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
2576    handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
2577  }
2578}
2579
2580void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2581  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2582
2583  // Update machine-CFG edges with unique successors.
2584  SmallVector<BasicBlock*, 32> succs;
2585  succs.reserve(I.getNumSuccessors());
2586  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
2587    succs.push_back(I.getSuccessor(i));
2588  array_pod_sort(succs.begin(), succs.end());
2589  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
2590  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
2591    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
2592    addSuccessorWithWeight(IndirectBrMBB, Succ);
2593  }
2594
2595  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
2596                          MVT::Other, getControlRoot(),
2597                          getValue(I.getAddress())));
2598}
2599
2600void SelectionDAGBuilder::visitFSub(const User &I) {
2601  // -0.0 - X --> fneg
2602  Type *Ty = I.getType();
2603  if (isa<Constant>(I.getOperand(0)) &&
2604      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
2605    SDValue Op2 = getValue(I.getOperand(1));
2606    setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
2607                             Op2.getValueType(), Op2));
2608    return;
2609  }
2610
2611  visitBinary(I, ISD::FSUB);
2612}
2613
2614void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
2615  SDValue Op1 = getValue(I.getOperand(0));
2616  SDValue Op2 = getValue(I.getOperand(1));
2617  setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
2618                           Op1.getValueType(), Op1, Op2));
2619}
2620
2621void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2622  SDValue Op1 = getValue(I.getOperand(0));
2623  SDValue Op2 = getValue(I.getOperand(1));
2624
2625  MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType());
2626
2627  // Coerce the shift amount to the right type if we can.
2628  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2629    unsigned ShiftSize = ShiftTy.getSizeInBits();
2630    unsigned Op2Size = Op2.getValueType().getSizeInBits();
2631    DebugLoc DL = getCurDebugLoc();
2632
2633    // If the operand is smaller than the shift count type, promote it.
2634    if (ShiftSize > Op2Size)
2635      Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2636
2637    // If the operand is larger than the shift count type but the shift
2638    // count type has enough bits to represent any shift value, truncate
2639    // it now. This is a common case and it exposes the truncate to
2640    // optimization early.
2641    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
2642      Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2643    // Otherwise we'll need to temporarily settle for some other convenient
2644    // type.  Type legalization will make adjustments once the shiftee is split.
2645    else
2646      Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2647  }
2648
2649  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
2650                           Op1.getValueType(), Op1, Op2));
2651}
2652
2653void SelectionDAGBuilder::visitSDiv(const User &I) {
2654  SDValue Op1 = getValue(I.getOperand(0));
2655  SDValue Op2 = getValue(I.getOperand(1));
2656
2657  // Turn exact SDivs into multiplications.
2658  // FIXME: This should be in DAGCombiner, but it doesn't have access to the
2659  // exact bit.
2660  if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
2661      !isa<ConstantSDNode>(Op1) &&
2662      isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
2663    setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
2664  else
2665    setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
2666                             Op1, Op2));
2667}
2668
2669void SelectionDAGBuilder::visitICmp(const User &I) {
2670  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
2671  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2672    predicate = IC->getPredicate();
2673  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2674    predicate = ICmpInst::Predicate(IC->getPredicate());
2675  SDValue Op1 = getValue(I.getOperand(0));
2676  SDValue Op2 = getValue(I.getOperand(1));
2677  ISD::CondCode Opcode = getICmpCondCode(predicate);
2678
2679  EVT DestVT = TLI.getValueType(I.getType());
2680  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
2681}
2682
2683void SelectionDAGBuilder::visitFCmp(const User &I) {
2684  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
2685  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2686    predicate = FC->getPredicate();
2687  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2688    predicate = FCmpInst::Predicate(FC->getPredicate());
2689  SDValue Op1 = getValue(I.getOperand(0));
2690  SDValue Op2 = getValue(I.getOperand(1));
2691  ISD::CondCode Condition = getFCmpCondCode(predicate);
2692  if (TM.Options.NoNaNsFPMath)
2693    Condition = getFCmpCodeWithoutNaN(Condition);
2694  EVT DestVT = TLI.getValueType(I.getType());
2695  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
2696}
2697
2698void SelectionDAGBuilder::visitSelect(const User &I) {
2699  SmallVector<EVT, 4> ValueVTs;
2700  ComputeValueVTs(TLI, I.getType(), ValueVTs);
2701  unsigned NumValues = ValueVTs.size();
2702  if (NumValues == 0) return;
2703
2704  SmallVector<SDValue, 4> Values(NumValues);
2705  SDValue Cond     = getValue(I.getOperand(0));
2706  SDValue TrueVal  = getValue(I.getOperand(1));
2707  SDValue FalseVal = getValue(I.getOperand(2));
2708  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2709    ISD::VSELECT : ISD::SELECT;
2710
2711  for (unsigned i = 0; i != NumValues; ++i)
2712    Values[i] = DAG.getNode(OpCode, getCurDebugLoc(),
2713                            TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
2714                            Cond,
2715                            SDValue(TrueVal.getNode(),
2716                                    TrueVal.getResNo() + i),
2717                            SDValue(FalseVal.getNode(),
2718                                    FalseVal.getResNo() + i));
2719
2720  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2721                           DAG.getVTList(&ValueVTs[0], NumValues),
2722                           &Values[0], NumValues));
2723}
2724
2725void SelectionDAGBuilder::visitTrunc(const User &I) {
2726  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2727  SDValue N = getValue(I.getOperand(0));
2728  EVT DestVT = TLI.getValueType(I.getType());
2729  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
2730}
2731
2732void SelectionDAGBuilder::visitZExt(const User &I) {
2733  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2734  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2735  SDValue N = getValue(I.getOperand(0));
2736  EVT DestVT = TLI.getValueType(I.getType());
2737  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
2738}
2739
2740void SelectionDAGBuilder::visitSExt(const User &I) {
2741  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2742  // SExt also can't be a cast to bool for same reason. So, nothing much to do
2743  SDValue N = getValue(I.getOperand(0));
2744  EVT DestVT = TLI.getValueType(I.getType());
2745  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
2746}
2747
2748void SelectionDAGBuilder::visitFPTrunc(const User &I) {
2749  // FPTrunc is never a no-op cast, no need to check
2750  SDValue N = getValue(I.getOperand(0));
2751  EVT DestVT = TLI.getValueType(I.getType());
2752  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
2753                           DestVT, N,
2754                           DAG.getTargetConstant(0, TLI.getPointerTy())));
2755}
2756
2757void SelectionDAGBuilder::visitFPExt(const User &I){
2758  // FPExt is never a no-op cast, no need to check
2759  SDValue N = getValue(I.getOperand(0));
2760  EVT DestVT = TLI.getValueType(I.getType());
2761  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
2762}
2763
2764void SelectionDAGBuilder::visitFPToUI(const User &I) {
2765  // FPToUI is never a no-op cast, no need to check
2766  SDValue N = getValue(I.getOperand(0));
2767  EVT DestVT = TLI.getValueType(I.getType());
2768  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
2769}
2770
2771void SelectionDAGBuilder::visitFPToSI(const User &I) {
2772  // FPToSI is never a no-op cast, no need to check
2773  SDValue N = getValue(I.getOperand(0));
2774  EVT DestVT = TLI.getValueType(I.getType());
2775  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
2776}
2777
2778void SelectionDAGBuilder::visitUIToFP(const User &I) {
2779  // UIToFP is never a no-op cast, no need to check
2780  SDValue N = getValue(I.getOperand(0));
2781  EVT DestVT = TLI.getValueType(I.getType());
2782  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
2783}
2784
2785void SelectionDAGBuilder::visitSIToFP(const User &I){
2786  // SIToFP is never a no-op cast, no need to check
2787  SDValue N = getValue(I.getOperand(0));
2788  EVT DestVT = TLI.getValueType(I.getType());
2789  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
2790}
2791
2792void SelectionDAGBuilder::visitPtrToInt(const User &I) {
2793  // What to do depends on the size of the integer and the size of the pointer.
2794  // We can either truncate, zero extend, or no-op, accordingly.
2795  SDValue N = getValue(I.getOperand(0));
2796  EVT DestVT = TLI.getValueType(I.getType());
2797  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
2798}
2799
2800void SelectionDAGBuilder::visitIntToPtr(const User &I) {
2801  // What to do depends on the size of the integer and the size of the pointer.
2802  // We can either truncate, zero extend, or no-op, accordingly.
2803  SDValue N = getValue(I.getOperand(0));
2804  EVT DestVT = TLI.getValueType(I.getType());
2805  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
2806}
2807
2808void SelectionDAGBuilder::visitBitCast(const User &I) {
2809  SDValue N = getValue(I.getOperand(0));
2810  EVT DestVT = TLI.getValueType(I.getType());
2811
2812  // BitCast assures us that source and destination are the same size so this is
2813  // either a BITCAST or a no-op.
2814  if (DestVT != N.getValueType())
2815    setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
2816                             DestVT, N)); // convert types.
2817  else
2818    setValue(&I, N);            // noop cast.
2819}
2820
2821void SelectionDAGBuilder::visitInsertElement(const User &I) {
2822  SDValue InVec = getValue(I.getOperand(0));
2823  SDValue InVal = getValue(I.getOperand(1));
2824  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2825                              TLI.getPointerTy(),
2826                              getValue(I.getOperand(2)));
2827  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
2828                           TLI.getValueType(I.getType()),
2829                           InVec, InVal, InIdx));
2830}
2831
2832void SelectionDAGBuilder::visitExtractElement(const User &I) {
2833  SDValue InVec = getValue(I.getOperand(0));
2834  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2835                              TLI.getPointerTy(),
2836                              getValue(I.getOperand(1)));
2837  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2838                           TLI.getValueType(I.getType()), InVec, InIdx));
2839}
2840
2841// Utility for visitShuffleVector - Return true if every element in Mask,
2842// beginning from position Pos and ending in Pos+Size, falls within the
2843// specified sequential range [L, L+Pos). or is undef.
2844static bool isSequentialInRange(const SmallVectorImpl<int> &Mask,
2845                                unsigned Pos, unsigned Size, int Low) {
2846  for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
2847    if (Mask[i] >= 0 && Mask[i] != Low)
2848      return false;
2849  return true;
2850}
2851
2852void SelectionDAGBuilder::visitShuffleVector(const User &I) {
2853  SDValue Src1 = getValue(I.getOperand(0));
2854  SDValue Src2 = getValue(I.getOperand(1));
2855
2856  SmallVector<int, 8> Mask;
2857  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
2858  unsigned MaskNumElts = Mask.size();
2859
2860  EVT VT = TLI.getValueType(I.getType());
2861  EVT SrcVT = Src1.getValueType();
2862  unsigned SrcNumElts = SrcVT.getVectorNumElements();
2863
2864  if (SrcNumElts == MaskNumElts) {
2865    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2866                                      &Mask[0]));
2867    return;
2868  }
2869
2870  // Normalize the shuffle vector since mask and vector length don't match.
2871  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
2872    // Mask is longer than the source vectors and is a multiple of the source
2873    // vectors.  We can use concatenate vector to make the mask and vectors
2874    // lengths match.
2875    if (SrcNumElts*2 == MaskNumElts) {
2876      // First check for Src1 in low and Src2 in high
2877      if (isSequentialInRange(Mask, 0, SrcNumElts, 0) &&
2878          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) {
2879        // The shuffle is concatenating two vectors together.
2880        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
2881                                 VT, Src1, Src2));
2882        return;
2883      }
2884      // Then check for Src2 in low and Src1 in high
2885      if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) &&
2886          isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) {
2887        // The shuffle is concatenating two vectors together.
2888        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
2889                                 VT, Src2, Src1));
2890        return;
2891      }
2892    }
2893
2894    // Pad both vectors with undefs to make them the same length as the mask.
2895    unsigned NumConcat = MaskNumElts / SrcNumElts;
2896    bool Src1U = Src1.getOpcode() == ISD::UNDEF;
2897    bool Src2U = Src2.getOpcode() == ISD::UNDEF;
2898    SDValue UndefVal = DAG.getUNDEF(SrcVT);
2899
2900    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
2901    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
2902    MOps1[0] = Src1;
2903    MOps2[0] = Src2;
2904
2905    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2906                                                  getCurDebugLoc(), VT,
2907                                                  &MOps1[0], NumConcat);
2908    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2909                                                  getCurDebugLoc(), VT,
2910                                                  &MOps2[0], NumConcat);
2911
2912    // Readjust mask for new input vector length.
2913    SmallVector<int, 8> MappedOps;
2914    for (unsigned i = 0; i != MaskNumElts; ++i) {
2915      int Idx = Mask[i];
2916      if (Idx >= (int)SrcNumElts)
2917        Idx -= SrcNumElts - MaskNumElts;
2918      MappedOps.push_back(Idx);
2919    }
2920
2921    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2922                                      &MappedOps[0]));
2923    return;
2924  }
2925
2926  if (SrcNumElts > MaskNumElts) {
2927    // Analyze the access pattern of the vector to see if we can extract
2928    // two subvectors and do the shuffle. The analysis is done by calculating
2929    // the range of elements the mask access on both vectors.
2930    int MinRange[2] = { static_cast<int>(SrcNumElts),
2931                        static_cast<int>(SrcNumElts)};
2932    int MaxRange[2] = {-1, -1};
2933
2934    for (unsigned i = 0; i != MaskNumElts; ++i) {
2935      int Idx = Mask[i];
2936      unsigned Input = 0;
2937      if (Idx < 0)
2938        continue;
2939
2940      if (Idx >= (int)SrcNumElts) {
2941        Input = 1;
2942        Idx -= SrcNumElts;
2943      }
2944      if (Idx > MaxRange[Input])
2945        MaxRange[Input] = Idx;
2946      if (Idx < MinRange[Input])
2947        MinRange[Input] = Idx;
2948    }
2949
2950    // Check if the access is smaller than the vector size and can we find
2951    // a reasonable extract index.
2952    int RangeUse[2] = { -1, -1 };  // 0 = Unused, 1 = Extract, -1 = Can not
2953                                   // Extract.
2954    int StartIdx[2];  // StartIdx to extract from
2955    for (unsigned Input = 0; Input < 2; ++Input) {
2956      if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) {
2957        RangeUse[Input] = 0; // Unused
2958        StartIdx[Input] = 0;
2959        continue;
2960      }
2961
2962      // Find a good start index that is a multiple of the mask length. Then
2963      // see if the rest of the elements are in range.
2964      StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
2965      if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
2966          StartIdx[Input] + MaskNumElts <= SrcNumElts)
2967        RangeUse[Input] = 1; // Extract from a multiple of the mask length.
2968    }
2969
2970    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
2971      setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
2972      return;
2973    }
2974    if (RangeUse[0] >= 0 && RangeUse[1] >= 0) {
2975      // Extract appropriate subvector and generate a vector shuffle
2976      for (unsigned Input = 0; Input < 2; ++Input) {
2977        SDValue &Src = Input == 0 ? Src1 : Src2;
2978        if (RangeUse[Input] == 0)
2979          Src = DAG.getUNDEF(VT);
2980        else
2981          Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
2982                            Src, DAG.getIntPtrConstant(StartIdx[Input]));
2983      }
2984
2985      // Calculate new mask.
2986      SmallVector<int, 8> MappedOps;
2987      for (unsigned i = 0; i != MaskNumElts; ++i) {
2988        int Idx = Mask[i];
2989        if (Idx >= 0) {
2990          if (Idx < (int)SrcNumElts)
2991            Idx -= StartIdx[0];
2992          else
2993            Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
2994        }
2995        MappedOps.push_back(Idx);
2996      }
2997
2998      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2999                                        &MappedOps[0]));
3000      return;
3001    }
3002  }
3003
3004  // We can't use either concat vectors or extract subvectors so fall back to
3005  // replacing the shuffle with extract and build vector.
3006  // to insert and build vector.
3007  EVT EltVT = VT.getVectorElementType();
3008  EVT PtrVT = TLI.getPointerTy();
3009  SmallVector<SDValue,8> Ops;
3010  for (unsigned i = 0; i != MaskNumElts; ++i) {
3011    int Idx = Mask[i];
3012    SDValue Res;
3013
3014    if (Idx < 0) {
3015      Res = DAG.getUNDEF(EltVT);
3016    } else {
3017      SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3018      if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3019
3020      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
3021                        EltVT, Src, DAG.getConstant(Idx, PtrVT));
3022    }
3023
3024    Ops.push_back(Res);
3025  }
3026
3027  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
3028                           VT, &Ops[0], Ops.size()));
3029}
3030
3031void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
3032  const Value *Op0 = I.getOperand(0);
3033  const Value *Op1 = I.getOperand(1);
3034  Type *AggTy = I.getType();
3035  Type *ValTy = Op1->getType();
3036  bool IntoUndef = isa<UndefValue>(Op0);
3037  bool FromUndef = isa<UndefValue>(Op1);
3038
3039  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
3040
3041  SmallVector<EVT, 4> AggValueVTs;
3042  ComputeValueVTs(TLI, AggTy, AggValueVTs);
3043  SmallVector<EVT, 4> ValValueVTs;
3044  ComputeValueVTs(TLI, ValTy, ValValueVTs);
3045
3046  unsigned NumAggValues = AggValueVTs.size();
3047  unsigned NumValValues = ValValueVTs.size();
3048  SmallVector<SDValue, 4> Values(NumAggValues);
3049
3050  SDValue Agg = getValue(Op0);
3051  unsigned i = 0;
3052  // Copy the beginning value(s) from the original aggregate.
3053  for (; i != LinearIndex; ++i)
3054    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3055                SDValue(Agg.getNode(), Agg.getResNo() + i);
3056  // Copy values from the inserted value(s).
3057  if (NumValValues) {
3058    SDValue Val = getValue(Op1);
3059    for (; i != LinearIndex + NumValValues; ++i)
3060      Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3061                  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3062  }
3063  // Copy remaining value(s) from the original aggregate.
3064  for (; i != NumAggValues; ++i)
3065    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3066                SDValue(Agg.getNode(), Agg.getResNo() + i);
3067
3068  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
3069                           DAG.getVTList(&AggValueVTs[0], NumAggValues),
3070                           &Values[0], NumAggValues));
3071}
3072
3073void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
3074  const Value *Op0 = I.getOperand(0);
3075  Type *AggTy = Op0->getType();
3076  Type *ValTy = I.getType();
3077  bool OutOfUndef = isa<UndefValue>(Op0);
3078
3079  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
3080
3081  SmallVector<EVT, 4> ValValueVTs;
3082  ComputeValueVTs(TLI, ValTy, ValValueVTs);
3083
3084  unsigned NumValValues = ValValueVTs.size();
3085
3086  // Ignore a extractvalue that produces an empty object
3087  if (!NumValValues) {
3088    setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3089    return;
3090  }
3091
3092  SmallVector<SDValue, 4> Values(NumValValues);
3093
3094  SDValue Agg = getValue(Op0);
3095  // Copy out the selected value(s).
3096  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3097    Values[i - LinearIndex] =
3098      OutOfUndef ?
3099        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3100        SDValue(Agg.getNode(), Agg.getResNo() + i);
3101
3102  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
3103                           DAG.getVTList(&ValValueVTs[0], NumValValues),
3104                           &Values[0], NumValValues));
3105}
3106
3107void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3108  SDValue N = getValue(I.getOperand(0));
3109  // Note that the pointer operand may be a vector of pointers. Take the scalar
3110  // element which holds a pointer.
3111  Type *Ty = I.getOperand(0)->getType()->getScalarType();
3112
3113  for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
3114       OI != E; ++OI) {
3115    const Value *Idx = *OI;
3116    if (StructType *StTy = dyn_cast<StructType>(Ty)) {
3117      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
3118      if (Field) {
3119        // N = N + Offset
3120        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
3121        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
3122                        DAG.getIntPtrConstant(Offset));
3123      }
3124
3125      Ty = StTy->getElementType(Field);
3126    } else {
3127      Ty = cast<SequentialType>(Ty)->getElementType();
3128
3129      // If this is a constant subscript, handle it quickly.
3130      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
3131        if (CI->isZero()) continue;
3132        uint64_t Offs =
3133            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
3134        SDValue OffsVal;
3135        EVT PTy = TLI.getPointerTy();
3136        unsigned PtrBits = PTy.getSizeInBits();
3137        if (PtrBits < 64)
3138          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
3139                                TLI.getPointerTy(),
3140                                DAG.getConstant(Offs, MVT::i64));
3141        else
3142          OffsVal = DAG.getIntPtrConstant(Offs);
3143
3144        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
3145                        OffsVal);
3146        continue;
3147      }
3148
3149      // N = N + Idx * ElementSize;
3150      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
3151                                TD->getTypeAllocSize(Ty));
3152      SDValue IdxN = getValue(Idx);
3153
3154      // If the index is smaller or larger than intptr_t, truncate or extend
3155      // it.
3156      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
3157
3158      // If this is a multiply by a power of two, turn it into a shl
3159      // immediately.  This is a very common case.
3160      if (ElementSize != 1) {
3161        if (ElementSize.isPowerOf2()) {
3162          unsigned Amt = ElementSize.logBase2();
3163          IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
3164                             N.getValueType(), IdxN,
3165                             DAG.getConstant(Amt, IdxN.getValueType()));
3166        } else {
3167          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
3168          IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
3169                             N.getValueType(), IdxN, Scale);
3170        }
3171      }
3172
3173      N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
3174                      N.getValueType(), N, IdxN);
3175    }
3176  }
3177
3178  setValue(&I, N);
3179}
3180
3181void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3182  // If this is a fixed sized alloca in the entry block of the function,
3183  // allocate it statically on the stack.
3184  if (FuncInfo.StaticAllocaMap.count(&I))
3185    return;   // getValue will auto-populate this.
3186
3187  Type *Ty = I.getAllocatedType();
3188  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
3189  unsigned Align =
3190    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
3191             I.getAlignment());
3192
3193  SDValue AllocSize = getValue(I.getArraySize());
3194
3195  EVT IntPtr = TLI.getPointerTy();
3196  if (AllocSize.getValueType() != IntPtr)
3197    AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
3198
3199  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
3200                          AllocSize,
3201                          DAG.getConstant(TySize, IntPtr));
3202
3203  // Handle alignment.  If the requested alignment is less than or equal to
3204  // the stack alignment, ignore it.  If the size is greater than or equal to
3205  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3206  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
3207  if (Align <= StackAlign)
3208    Align = 0;
3209
3210  // Round the size of the allocation up to the stack alignment size
3211  // by add SA-1 to the size.
3212  AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
3213                          AllocSize.getValueType(), AllocSize,
3214                          DAG.getIntPtrConstant(StackAlign-1));
3215
3216  // Mask out the low bits for alignment purposes.
3217  AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
3218                          AllocSize.getValueType(), AllocSize,
3219                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
3220
3221  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
3222  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3223  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
3224                            VTs, Ops, 3);
3225  setValue(&I, DSA);
3226  DAG.setRoot(DSA.getValue(1));
3227
3228  // Inform the Frame Information that we have just allocated a variable-sized
3229  // object.
3230  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
3231}
3232
3233void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3234  if (I.isAtomic())
3235    return visitAtomicLoad(I);
3236
3237  const Value *SV = I.getOperand(0);
3238  SDValue Ptr = getValue(SV);
3239
3240  Type *Ty = I.getType();
3241
3242  bool isVolatile = I.isVolatile();
3243  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
3244  bool isInvariant = I.getMetadata("invariant.load") != 0;
3245  unsigned Alignment = I.getAlignment();
3246  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
3247  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3248
3249  SmallVector<EVT, 4> ValueVTs;
3250  SmallVector<uint64_t, 4> Offsets;
3251  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
3252  unsigned NumValues = ValueVTs.size();
3253  if (NumValues == 0)
3254    return;
3255
3256  SDValue Root;
3257  bool ConstantMemory = false;
3258  if (I.isVolatile() || NumValues > MaxParallelChains)
3259    // Serialize volatile loads with other side effects.
3260    Root = getRoot();
3261  else if (AA->pointsToConstantMemory(
3262             AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
3263    // Do not serialize (non-volatile) loads of constant memory with anything.
3264    Root = DAG.getEntryNode();
3265    ConstantMemory = true;
3266  } else {
3267    // Do not serialize non-volatile loads against each other.
3268    Root = DAG.getRoot();
3269  }
3270
3271  SmallVector<SDValue, 4> Values(NumValues);
3272  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
3273                                          NumValues));
3274  EVT PtrVT = Ptr.getValueType();
3275  unsigned ChainI = 0;
3276  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3277    // Serializing loads here may result in excessive register pressure, and
3278    // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3279    // could recover a bit by hoisting nodes upward in the chain by recognizing
3280    // they are side-effect free or do not alias. The optimizer should really
3281    // avoid this case by converting large object/array copies to llvm.memcpy
3282    // (MaxParallelChains should always remain as failsafe).
3283    if (ChainI == MaxParallelChains) {
3284      assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3285      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
3286                                  MVT::Other, &Chains[0], ChainI);
3287      Root = Chain;
3288      ChainI = 0;
3289    }
3290    SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
3291                            PtrVT, Ptr,
3292                            DAG.getConstant(Offsets[i], PtrVT));
3293    SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
3294                            A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
3295                            isNonTemporal, isInvariant, Alignment, TBAAInfo,
3296                            Ranges);
3297
3298    Values[i] = L;
3299    Chains[ChainI] = L.getValue(1);
3300  }
3301
3302  if (!ConstantMemory) {
3303    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
3304                                MVT::Other, &Chains[0], ChainI);
3305    if (isVolatile)
3306      DAG.setRoot(Chain);
3307    else
3308      PendingLoads.push_back(Chain);
3309  }
3310
3311  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
3312                           DAG.getVTList(&ValueVTs[0], NumValues),
3313                           &Values[0], NumValues));
3314}
3315
3316void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3317  if (I.isAtomic())
3318    return visitAtomicStore(I);
3319
3320  const Value *SrcV = I.getOperand(0);
3321  const Value *PtrV = I.getOperand(1);
3322
3323  SmallVector<EVT, 4> ValueVTs;
3324  SmallVector<uint64_t, 4> Offsets;
3325  ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
3326  unsigned NumValues = ValueVTs.size();
3327  if (NumValues == 0)
3328    return;
3329
3330  // Get the lowered operands. Note that we do this after
3331  // checking if NumResults is zero, because with zero results
3332  // the operands won't have values in the map.
3333  SDValue Src = getValue(SrcV);
3334  SDValue Ptr = getValue(PtrV);
3335
3336  SDValue Root = getRoot();
3337  SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
3338                                          NumValues));
3339  EVT PtrVT = Ptr.getValueType();
3340  bool isVolatile = I.isVolatile();
3341  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
3342  unsigned Alignment = I.getAlignment();
3343  const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
3344
3345  unsigned ChainI = 0;
3346  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3347    // See visitLoad comments.
3348    if (ChainI == MaxParallelChains) {
3349      SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
3350                                  MVT::Other, &Chains[0], ChainI);
3351      Root = Chain;
3352      ChainI = 0;
3353    }
3354    SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
3355                              DAG.getConstant(Offsets[i], PtrVT));
3356    SDValue St = DAG.getStore(Root, getCurDebugLoc(),
3357                              SDValue(Src.getNode(), Src.getResNo() + i),
3358                              Add, MachinePointerInfo(PtrV, Offsets[i]),
3359                              isVolatile, isNonTemporal, Alignment, TBAAInfo);
3360    Chains[ChainI] = St;
3361  }
3362
3363  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
3364                                  MVT::Other, &Chains[0], ChainI);
3365  ++SDNodeOrder;
3366  AssignOrderingToNode(StoreNode.getNode());
3367  DAG.setRoot(StoreNode);
3368}
3369
3370static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order,
3371                                    SynchronizationScope Scope,
3372                                    bool Before, DebugLoc dl,
3373                                    SelectionDAG &DAG,
3374                                    const TargetLowering &TLI) {
3375  // Fence, if necessary
3376  if (Before) {
3377    if (Order == AcquireRelease || Order == SequentiallyConsistent)
3378      Order = Release;
3379    else if (Order == Acquire || Order == Monotonic)
3380      return Chain;
3381  } else {
3382    if (Order == AcquireRelease)
3383      Order = Acquire;
3384    else if (Order == Release || Order == Monotonic)
3385      return Chain;
3386  }
3387  SDValue Ops[3];
3388  Ops[0] = Chain;
3389  Ops[1] = DAG.getConstant(Order, TLI.getPointerTy());
3390  Ops[2] = DAG.getConstant(Scope, TLI.getPointerTy());
3391  return DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3);
3392}
3393
3394void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
3395  DebugLoc dl = getCurDebugLoc();
3396  AtomicOrdering Order = I.getOrdering();
3397  SynchronizationScope Scope = I.getSynchScope();
3398
3399  SDValue InChain = getRoot();
3400
3401  if (TLI.getInsertFencesForAtomic())
3402    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
3403                                   DAG, TLI);
3404
3405  SDValue L =
3406    DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl,
3407                  getValue(I.getCompareOperand()).getValueType().getSimpleVT(),
3408                  InChain,
3409                  getValue(I.getPointerOperand()),
3410                  getValue(I.getCompareOperand()),
3411                  getValue(I.getNewValOperand()),
3412                  MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */,
3413                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
3414                  Scope);
3415
3416  SDValue OutChain = L.getValue(1);
3417
3418  if (TLI.getInsertFencesForAtomic())
3419    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
3420                                    DAG, TLI);
3421
3422  setValue(&I, L);
3423  DAG.setRoot(OutChain);
3424}
3425
3426void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
3427  DebugLoc dl = getCurDebugLoc();
3428  ISD::NodeType NT;
3429  switch (I.getOperation()) {
3430  default: llvm_unreachable("Unknown atomicrmw operation");
3431  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
3432  case AtomicRMWInst::Add:  NT = ISD::ATOMIC_LOAD_ADD; break;
3433  case AtomicRMWInst::Sub:  NT = ISD::ATOMIC_LOAD_SUB; break;
3434  case AtomicRMWInst::And:  NT = ISD::ATOMIC_LOAD_AND; break;
3435  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
3436  case AtomicRMWInst::Or:   NT = ISD::ATOMIC_LOAD_OR; break;
3437  case AtomicRMWInst::Xor:  NT = ISD::ATOMIC_LOAD_XOR; break;
3438  case AtomicRMWInst::Max:  NT = ISD::ATOMIC_LOAD_MAX; break;
3439  case AtomicRMWInst::Min:  NT = ISD::ATOMIC_LOAD_MIN; break;
3440  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
3441  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
3442  }
3443  AtomicOrdering Order = I.getOrdering();
3444  SynchronizationScope Scope = I.getSynchScope();
3445
3446  SDValue InChain = getRoot();
3447
3448  if (TLI.getInsertFencesForAtomic())
3449    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
3450                                   DAG, TLI);
3451
3452  SDValue L =
3453    DAG.getAtomic(NT, dl,
3454                  getValue(I.getValOperand()).getValueType().getSimpleVT(),
3455                  InChain,
3456                  getValue(I.getPointerOperand()),
3457                  getValue(I.getValOperand()),
3458                  I.getPointerOperand(), 0 /* Alignment */,
3459                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
3460                  Scope);
3461
3462  SDValue OutChain = L.getValue(1);
3463
3464  if (TLI.getInsertFencesForAtomic())
3465    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
3466                                    DAG, TLI);
3467
3468  setValue(&I, L);
3469  DAG.setRoot(OutChain);
3470}
3471
3472void SelectionDAGBuilder::visitFence(const FenceInst &I) {
3473  DebugLoc dl = getCurDebugLoc();
3474  SDValue Ops[3];
3475  Ops[0] = getRoot();
3476  Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy());
3477  Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy());
3478  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3));
3479}
3480
3481void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
3482  DebugLoc dl = getCurDebugLoc();
3483  AtomicOrdering Order = I.getOrdering();
3484  SynchronizationScope Scope = I.getSynchScope();
3485
3486  SDValue InChain = getRoot();
3487
3488  EVT VT = TLI.getValueType(I.getType());
3489
3490  if (I.getAlignment() * 8 < VT.getSizeInBits())
3491    report_fatal_error("Cannot generate unaligned atomic load");
3492
3493  SDValue L =
3494    DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
3495                  getValue(I.getPointerOperand()),
3496                  I.getPointerOperand(), I.getAlignment(),
3497                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
3498                  Scope);
3499
3500  SDValue OutChain = L.getValue(1);
3501
3502  if (TLI.getInsertFencesForAtomic())
3503    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
3504                                    DAG, TLI);
3505
3506  setValue(&I, L);
3507  DAG.setRoot(OutChain);
3508}
3509
3510void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
3511  DebugLoc dl = getCurDebugLoc();
3512
3513  AtomicOrdering Order = I.getOrdering();
3514  SynchronizationScope Scope = I.getSynchScope();
3515
3516  SDValue InChain = getRoot();
3517
3518  EVT VT = TLI.getValueType(I.getValueOperand()->getType());
3519
3520  if (I.getAlignment() * 8 < VT.getSizeInBits())
3521    report_fatal_error("Cannot generate unaligned atomic store");
3522
3523  if (TLI.getInsertFencesForAtomic())
3524    InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl,
3525                                   DAG, TLI);
3526
3527  SDValue OutChain =
3528    DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
3529                  InChain,
3530                  getValue(I.getPointerOperand()),
3531                  getValue(I.getValueOperand()),
3532                  I.getPointerOperand(), I.getAlignment(),
3533                  TLI.getInsertFencesForAtomic() ? Monotonic : Order,
3534                  Scope);
3535
3536  if (TLI.getInsertFencesForAtomic())
3537    OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl,
3538                                    DAG, TLI);
3539
3540  DAG.setRoot(OutChain);
3541}
3542
3543/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
3544/// node.
3545void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
3546                                               unsigned Intrinsic) {
3547  bool HasChain = !I.doesNotAccessMemory();
3548  bool OnlyLoad = HasChain && I.onlyReadsMemory();
3549
3550  // Build the operand list.
3551  SmallVector<SDValue, 8> Ops;
3552  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
3553    if (OnlyLoad) {
3554      // We don't need to serialize loads against other loads.
3555      Ops.push_back(DAG.getRoot());
3556    } else {
3557      Ops.push_back(getRoot());
3558    }
3559  }
3560
3561  // Info is set by getTgtMemInstrinsic
3562  TargetLowering::IntrinsicInfo Info;
3563  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
3564
3565  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
3566  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
3567      Info.opc == ISD::INTRINSIC_W_CHAIN)
3568    Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy()));
3569
3570  // Add all operands of the call to the operand list.
3571  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
3572    SDValue Op = getValue(I.getArgOperand(i));
3573    Ops.push_back(Op);
3574  }
3575
3576  SmallVector<EVT, 4> ValueVTs;
3577  ComputeValueVTs(TLI, I.getType(), ValueVTs);
3578
3579  if (HasChain)
3580    ValueVTs.push_back(MVT::Other);
3581
3582  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
3583
3584  // Create the node.
3585  SDValue Result;
3586  if (IsTgtIntrinsic) {
3587    // This is target intrinsic that touches memory
3588    Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
3589                                     VTs, &Ops[0], Ops.size(),
3590                                     Info.memVT,
3591                                   MachinePointerInfo(Info.ptrVal, Info.offset),
3592                                     Info.align, Info.vol,
3593                                     Info.readMem, Info.writeMem);
3594  } else if (!HasChain) {
3595    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
3596                         VTs, &Ops[0], Ops.size());
3597  } else if (!I.getType()->isVoidTy()) {
3598    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
3599                         VTs, &Ops[0], Ops.size());
3600  } else {
3601    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
3602                         VTs, &Ops[0], Ops.size());
3603  }
3604
3605  if (HasChain) {
3606    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
3607    if (OnlyLoad)
3608      PendingLoads.push_back(Chain);
3609    else
3610      DAG.setRoot(Chain);
3611  }
3612
3613  if (!I.getType()->isVoidTy()) {
3614    if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
3615      EVT VT = TLI.getValueType(PTy);
3616      Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
3617    }
3618
3619    setValue(&I, Result);
3620  } else {
3621    // Assign order to result here. If the intrinsic does not produce a result,
3622    // it won't be mapped to a SDNode and visit() will not assign it an order
3623    // number.
3624    ++SDNodeOrder;
3625    AssignOrderingToNode(Result.getNode());
3626  }
3627}
3628
3629/// GetSignificand - Get the significand and build it into a floating-point
3630/// number with exponent of 1:
3631///
3632///   Op = (Op & 0x007fffff) | 0x3f800000;
3633///
3634/// where Op is the hexidecimal representation of floating point value.
3635static SDValue
3636GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
3637  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3638                           DAG.getConstant(0x007fffff, MVT::i32));
3639  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
3640                           DAG.getConstant(0x3f800000, MVT::i32));
3641  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
3642}
3643
3644/// GetExponent - Get the exponent:
3645///
3646///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
3647///
3648/// where Op is the hexidecimal representation of floating point value.
3649static SDValue
3650GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
3651            DebugLoc dl) {
3652  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3653                           DAG.getConstant(0x7f800000, MVT::i32));
3654  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
3655                           DAG.getConstant(23, TLI.getPointerTy()));
3656  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
3657                           DAG.getConstant(127, MVT::i32));
3658  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
3659}
3660
3661/// getF32Constant - Get 32-bit floating point constant.
3662static SDValue
3663getF32Constant(SelectionDAG &DAG, unsigned Flt) {
3664  return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
3665}
3666
3667/// visitExp - Lower an exp intrinsic. Handles the special sequences for
3668/// limited-precision mode.
3669void
3670SelectionDAGBuilder::visitExp(const CallInst &I) {
3671  SDValue result;
3672  DebugLoc dl = getCurDebugLoc();
3673
3674  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3675      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3676    SDValue Op = getValue(I.getArgOperand(0));
3677
3678    // Put the exponent in the right bit position for later addition to the
3679    // final result:
3680    //
3681    //   #define LOG2OFe 1.4426950f
3682    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
3683    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
3684                             getF32Constant(DAG, 0x3fb8aa3b));
3685    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
3686
3687    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
3688    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3689    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
3690
3691    //   IntegerPartOfX <<= 23;
3692    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3693                                 DAG.getConstant(23, TLI.getPointerTy()));
3694
3695    if (LimitFloatPrecision <= 6) {
3696      // For floating-point precision of 6:
3697      //
3698      //   TwoToFractionalPartOfX =
3699      //     0.997535578f +
3700      //       (0.735607626f + 0.252464424f * x) * x;
3701      //
3702      // error 0.0144103317, which is 6 bits
3703      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3704                               getF32Constant(DAG, 0x3e814304));
3705      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3706                               getF32Constant(DAG, 0x3f3c50c8));
3707      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3708      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3709                               getF32Constant(DAG, 0x3f7f5e7e));
3710      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
3711
3712      // Add the exponent into the result in integer domain.
3713      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3714                               TwoToFracPartOfX, IntegerPartOfX);
3715
3716      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
3717    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3718      // For floating-point precision of 12:
3719      //
3720      //   TwoToFractionalPartOfX =
3721      //     0.999892986f +
3722      //       (0.696457318f +
3723      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3724      //
3725      // 0.000107046256 error, which is 13 to 14 bits
3726      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3727                               getF32Constant(DAG, 0x3da235e3));
3728      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3729                               getF32Constant(DAG, 0x3e65b8f3));
3730      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3731      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3732                               getF32Constant(DAG, 0x3f324b07));
3733      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3734      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3735                               getF32Constant(DAG, 0x3f7ff8fd));
3736      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
3737
3738      // Add the exponent into the result in integer domain.
3739      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3740                               TwoToFracPartOfX, IntegerPartOfX);
3741
3742      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
3743    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3744      // For floating-point precision of 18:
3745      //
3746      //   TwoToFractionalPartOfX =
3747      //     0.999999982f +
3748      //       (0.693148872f +
3749      //         (0.240227044f +
3750      //           (0.554906021e-1f +
3751      //             (0.961591928e-2f +
3752      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3753      //
3754      // error 2.47208000*10^(-7), which is better than 18 bits
3755      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3756                               getF32Constant(DAG, 0x3924b03e));
3757      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3758                               getF32Constant(DAG, 0x3ab24b87));
3759      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3760      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3761                               getF32Constant(DAG, 0x3c1d8c17));
3762      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3763      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3764                               getF32Constant(DAG, 0x3d634a1d));
3765      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3766      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3767                               getF32Constant(DAG, 0x3e75fe14));
3768      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3769      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3770                                getF32Constant(DAG, 0x3f317234));
3771      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3772      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3773                                getF32Constant(DAG, 0x3f800000));
3774      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
3775                                             MVT::i32, t13);
3776
3777      // Add the exponent into the result in integer domain.
3778      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3779                                TwoToFracPartOfX, IntegerPartOfX);
3780
3781      result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
3782    }
3783  } else {
3784    // No special expansion.
3785    result = DAG.getNode(ISD::FEXP, dl,
3786                         getValue(I.getArgOperand(0)).getValueType(),
3787                         getValue(I.getArgOperand(0)));
3788  }
3789
3790  setValue(&I, result);
3791}
3792
3793/// visitLog - Lower a log intrinsic. Handles the special sequences for
3794/// limited-precision mode.
3795void
3796SelectionDAGBuilder::visitLog(const CallInst &I) {
3797  SDValue result;
3798  DebugLoc dl = getCurDebugLoc();
3799
3800  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3801      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3802    SDValue Op = getValue(I.getArgOperand(0));
3803    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
3804
3805    // Scale the exponent by log(2) [0.69314718f].
3806    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
3807    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
3808                                        getF32Constant(DAG, 0x3f317218));
3809
3810    // Get the significand and build it into a floating-point number with
3811    // exponent of 1.
3812    SDValue X = GetSignificand(DAG, Op1, dl);
3813
3814    if (LimitFloatPrecision <= 6) {
3815      // For floating-point precision of 6:
3816      //
3817      //   LogofMantissa =
3818      //     -1.1609546f +
3819      //       (1.4034025f - 0.23903021f * x) * x;
3820      //
3821      // error 0.0034276066, which is better than 8 bits
3822      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3823                               getF32Constant(DAG, 0xbe74c456));
3824      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3825                               getF32Constant(DAG, 0x3fb3a2b1));
3826      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3827      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3828                                          getF32Constant(DAG, 0x3f949a29));
3829
3830      result = DAG.getNode(ISD::FADD, dl,
3831                           MVT::f32, LogOfExponent, LogOfMantissa);
3832    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3833      // For floating-point precision of 12:
3834      //
3835      //   LogOfMantissa =
3836      //     -1.7417939f +
3837      //       (2.8212026f +
3838      //         (-1.4699568f +
3839      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
3840      //
3841      // error 0.000061011436, which is 14 bits
3842      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3843                               getF32Constant(DAG, 0xbd67b6d6));
3844      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3845                               getF32Constant(DAG, 0x3ee4f4b8));
3846      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3847      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3848                               getF32Constant(DAG, 0x3fbc278b));
3849      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3850      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3851                               getF32Constant(DAG, 0x40348e95));
3852      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3853      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3854                                          getF32Constant(DAG, 0x3fdef31a));
3855
3856      result = DAG.getNode(ISD::FADD, dl,
3857                           MVT::f32, LogOfExponent, LogOfMantissa);
3858    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3859      // For floating-point precision of 18:
3860      //
3861      //   LogOfMantissa =
3862      //     -2.1072184f +
3863      //       (4.2372794f +
3864      //         (-3.7029485f +
3865      //           (2.2781945f +
3866      //             (-0.87823314f +
3867      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
3868      //
3869      // error 0.0000023660568, which is better than 18 bits
3870      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3871                               getF32Constant(DAG, 0xbc91e5ac));
3872      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3873                               getF32Constant(DAG, 0x3e4350aa));
3874      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3875      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3876                               getF32Constant(DAG, 0x3f60d3e3));
3877      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3878      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3879                               getF32Constant(DAG, 0x4011cdf0));
3880      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3881      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3882                               getF32Constant(DAG, 0x406cfd1c));
3883      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3884      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3885                               getF32Constant(DAG, 0x408797cb));
3886      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3887      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3888                                          getF32Constant(DAG, 0x4006dcab));
3889
3890      result = DAG.getNode(ISD::FADD, dl,
3891                           MVT::f32, LogOfExponent, LogOfMantissa);
3892    }
3893  } else {
3894    // No special expansion.
3895    result = DAG.getNode(ISD::FLOG, dl,
3896                         getValue(I.getArgOperand(0)).getValueType(),
3897                         getValue(I.getArgOperand(0)));
3898  }
3899
3900  setValue(&I, result);
3901}
3902
3903/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
3904/// limited-precision mode.
3905void
3906SelectionDAGBuilder::visitLog2(const CallInst &I) {
3907  SDValue result;
3908  DebugLoc dl = getCurDebugLoc();
3909
3910  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3911      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3912    SDValue Op = getValue(I.getArgOperand(0));
3913    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
3914
3915    // Get the exponent.
3916    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
3917
3918    // Get the significand and build it into a floating-point number with
3919    // exponent of 1.
3920    SDValue X = GetSignificand(DAG, Op1, dl);
3921
3922    // Different possible minimax approximations of significand in
3923    // floating-point for various degrees of accuracy over [1,2].
3924    if (LimitFloatPrecision <= 6) {
3925      // For floating-point precision of 6:
3926      //
3927      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
3928      //
3929      // error 0.0049451742, which is more than 7 bits
3930      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3931                               getF32Constant(DAG, 0xbeb08fe0));
3932      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3933                               getF32Constant(DAG, 0x40019463));
3934      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3935      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3936                                           getF32Constant(DAG, 0x3fd6633d));
3937
3938      result = DAG.getNode(ISD::FADD, dl,
3939                           MVT::f32, LogOfExponent, Log2ofMantissa);
3940    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3941      // For floating-point precision of 12:
3942      //
3943      //   Log2ofMantissa =
3944      //     -2.51285454f +
3945      //       (4.07009056f +
3946      //         (-2.12067489f +
3947      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
3948      //
3949      // error 0.0000876136000, which is better than 13 bits
3950      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3951                               getF32Constant(DAG, 0xbda7262e));
3952      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3953                               getF32Constant(DAG, 0x3f25280b));
3954      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3955      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3956                               getF32Constant(DAG, 0x4007b923));
3957      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3958      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3959                               getF32Constant(DAG, 0x40823e2f));
3960      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3961      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3962                                           getF32Constant(DAG, 0x4020d29c));
3963
3964      result = DAG.getNode(ISD::FADD, dl,
3965                           MVT::f32, LogOfExponent, Log2ofMantissa);
3966    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3967      // For floating-point precision of 18:
3968      //
3969      //   Log2ofMantissa =
3970      //     -3.0400495f +
3971      //       (6.1129976f +
3972      //         (-5.3420409f +
3973      //           (3.2865683f +
3974      //             (-1.2669343f +
3975      //               (0.27515199f -
3976      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
3977      //
3978      // error 0.0000018516, which is better than 18 bits
3979      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3980                               getF32Constant(DAG, 0xbcd2769e));
3981      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3982                               getF32Constant(DAG, 0x3e8ce0b9));
3983      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3984      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3985                               getF32Constant(DAG, 0x3fa22ae7));
3986      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3987      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3988                               getF32Constant(DAG, 0x40525723));
3989      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3990      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3991                               getF32Constant(DAG, 0x40aaf200));
3992      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3993      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3994                               getF32Constant(DAG, 0x40c39dad));
3995      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3996      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3997                                           getF32Constant(DAG, 0x4042902c));
3998
3999      result = DAG.getNode(ISD::FADD, dl,
4000                           MVT::f32, LogOfExponent, Log2ofMantissa);
4001    }
4002  } else {
4003    // No special expansion.
4004    result = DAG.getNode(ISD::FLOG2, dl,
4005                         getValue(I.getArgOperand(0)).getValueType(),
4006                         getValue(I.getArgOperand(0)));
4007  }
4008
4009  setValue(&I, result);
4010}
4011
4012/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
4013/// limited-precision mode.
4014void
4015SelectionDAGBuilder::visitLog10(const CallInst &I) {
4016  SDValue result;
4017  DebugLoc dl = getCurDebugLoc();
4018
4019  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
4020      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4021    SDValue Op = getValue(I.getArgOperand(0));
4022    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4023
4024    // Scale the exponent by log10(2) [0.30102999f].
4025    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4026    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4027                                        getF32Constant(DAG, 0x3e9a209a));
4028
4029    // Get the significand and build it into a floating-point number with
4030    // exponent of 1.
4031    SDValue X = GetSignificand(DAG, Op1, dl);
4032
4033    if (LimitFloatPrecision <= 6) {
4034      // For floating-point precision of 6:
4035      //
4036      //   Log10ofMantissa =
4037      //     -0.50419619f +
4038      //       (0.60948995f - 0.10380950f * x) * x;
4039      //
4040      // error 0.0014886165, which is 6 bits
4041      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4042                               getF32Constant(DAG, 0xbdd49a13));
4043      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4044                               getF32Constant(DAG, 0x3f1c0789));
4045      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4046      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4047                                            getF32Constant(DAG, 0x3f011300));
4048
4049      result = DAG.getNode(ISD::FADD, dl,
4050                           MVT::f32, LogOfExponent, Log10ofMantissa);
4051    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
4052      // For floating-point precision of 12:
4053      //
4054      //   Log10ofMantissa =
4055      //     -0.64831180f +
4056      //       (0.91751397f +
4057      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4058      //
4059      // error 0.00019228036, which is better than 12 bits
4060      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4061                               getF32Constant(DAG, 0x3d431f31));
4062      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4063                               getF32Constant(DAG, 0x3ea21fb2));
4064      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4065      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4066                               getF32Constant(DAG, 0x3f6ae232));
4067      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4068      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4069                                            getF32Constant(DAG, 0x3f25f7c3));
4070
4071      result = DAG.getNode(ISD::FADD, dl,
4072                           MVT::f32, LogOfExponent, Log10ofMantissa);
4073    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
4074      // For floating-point precision of 18:
4075      //
4076      //   Log10ofMantissa =
4077      //     -0.84299375f +
4078      //       (1.5327582f +
4079      //         (-1.0688956f +
4080      //           (0.49102474f +
4081      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4082      //
4083      // error 0.0000037995730, which is better than 18 bits
4084      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4085                               getF32Constant(DAG, 0x3c5d51ce));
4086      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4087                               getF32Constant(DAG, 0x3e00685a));
4088      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4089      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4090                               getF32Constant(DAG, 0x3efb6798));
4091      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4092      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4093                               getF32Constant(DAG, 0x3f88d192));
4094      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4095      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4096                               getF32Constant(DAG, 0x3fc4316c));
4097      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4098      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4099                                            getF32Constant(DAG, 0x3f57ce70));
4100
4101      result = DAG.getNode(ISD::FADD, dl,
4102                           MVT::f32, LogOfExponent, Log10ofMantissa);
4103    }
4104  } else {
4105    // No special expansion.
4106    result = DAG.getNode(ISD::FLOG10, dl,
4107                         getValue(I.getArgOperand(0)).getValueType(),
4108                         getValue(I.getArgOperand(0)));
4109  }
4110
4111  setValue(&I, result);
4112}
4113
4114/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4115/// limited-precision mode.
4116void
4117SelectionDAGBuilder::visitExp2(const CallInst &I) {
4118  SDValue result;
4119  DebugLoc dl = getCurDebugLoc();
4120
4121  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
4122      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4123    SDValue Op = getValue(I.getArgOperand(0));
4124
4125    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
4126
4127    //   FractionalPartOfX = x - (float)IntegerPartOfX;
4128    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4129    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
4130
4131    //   IntegerPartOfX <<= 23;
4132    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4133                                 DAG.getConstant(23, TLI.getPointerTy()));
4134
4135    if (LimitFloatPrecision <= 6) {
4136      // For floating-point precision of 6:
4137      //
4138      //   TwoToFractionalPartOfX =
4139      //     0.997535578f +
4140      //       (0.735607626f + 0.252464424f * x) * x;
4141      //
4142      // error 0.0144103317, which is 6 bits
4143      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4144                               getF32Constant(DAG, 0x3e814304));
4145      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4146                               getF32Constant(DAG, 0x3f3c50c8));
4147      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4148      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4149                               getF32Constant(DAG, 0x3f7f5e7e));
4150      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
4151      SDValue TwoToFractionalPartOfX =
4152        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
4153
4154      result = DAG.getNode(ISD::BITCAST, dl,
4155                           MVT::f32, TwoToFractionalPartOfX);
4156    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
4157      // For floating-point precision of 12:
4158      //
4159      //   TwoToFractionalPartOfX =
4160      //     0.999892986f +
4161      //       (0.696457318f +
4162      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
4163      //
4164      // error 0.000107046256, which is 13 to 14 bits
4165      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4166                               getF32Constant(DAG, 0x3da235e3));
4167      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4168                               getF32Constant(DAG, 0x3e65b8f3));
4169      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4170      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4171                               getF32Constant(DAG, 0x3f324b07));
4172      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4173      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4174                               getF32Constant(DAG, 0x3f7ff8fd));
4175      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
4176      SDValue TwoToFractionalPartOfX =
4177        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
4178
4179      result = DAG.getNode(ISD::BITCAST, dl,
4180                           MVT::f32, TwoToFractionalPartOfX);
4181    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
4182      // For floating-point precision of 18:
4183      //
4184      //   TwoToFractionalPartOfX =
4185      //     0.999999982f +
4186      //       (0.693148872f +
4187      //         (0.240227044f +
4188      //           (0.554906021e-1f +
4189      //             (0.961591928e-2f +
4190      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4191      // error 2.47208000*10^(-7), which is better than 18 bits
4192      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4193                               getF32Constant(DAG, 0x3924b03e));
4194      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4195                               getF32Constant(DAG, 0x3ab24b87));
4196      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4197      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4198                               getF32Constant(DAG, 0x3c1d8c17));
4199      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4200      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4201                               getF32Constant(DAG, 0x3d634a1d));
4202      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4203      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4204                               getF32Constant(DAG, 0x3e75fe14));
4205      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4206      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4207                                getF32Constant(DAG, 0x3f317234));
4208      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4209      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4210                                getF32Constant(DAG, 0x3f800000));
4211      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
4212      SDValue TwoToFractionalPartOfX =
4213        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
4214
4215      result = DAG.getNode(ISD::BITCAST, dl,
4216                           MVT::f32, TwoToFractionalPartOfX);
4217    }
4218  } else {
4219    // No special expansion.
4220    result = DAG.getNode(ISD::FEXP2, dl,
4221                         getValue(I.getArgOperand(0)).getValueType(),
4222                         getValue(I.getArgOperand(0)));
4223  }
4224
4225  setValue(&I, result);
4226}
4227
4228/// visitPow - Lower a pow intrinsic. Handles the special sequences for
4229/// limited-precision mode with x == 10.0f.
4230void
4231SelectionDAGBuilder::visitPow(const CallInst &I) {
4232  SDValue result;
4233  const Value *Val = I.getArgOperand(0);
4234  DebugLoc dl = getCurDebugLoc();
4235  bool IsExp10 = false;
4236
4237  if (getValue(Val).getValueType() == MVT::f32 &&
4238      getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
4239      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4240    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
4241      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
4242        APFloat Ten(10.0f);
4243        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
4244      }
4245    }
4246  }
4247
4248  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4249    SDValue Op = getValue(I.getArgOperand(1));
4250
4251    // Put the exponent in the right bit position for later addition to the
4252    // final result:
4253    //
4254    //   #define LOG2OF10 3.3219281f
4255    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
4256    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4257                             getF32Constant(DAG, 0x40549a78));
4258    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4259
4260    //   FractionalPartOfX = x - (float)IntegerPartOfX;
4261    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4262    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4263
4264    //   IntegerPartOfX <<= 23;
4265    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4266                                 DAG.getConstant(23, TLI.getPointerTy()));
4267
4268    if (LimitFloatPrecision <= 6) {
4269      // For floating-point precision of 6:
4270      //
4271      //   twoToFractionalPartOfX =
4272      //     0.997535578f +
4273      //       (0.735607626f + 0.252464424f * x) * x;
4274      //
4275      // error 0.0144103317, which is 6 bits
4276      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4277                               getF32Constant(DAG, 0x3e814304));
4278      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4279                               getF32Constant(DAG, 0x3f3c50c8));
4280      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4281      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4282                               getF32Constant(DAG, 0x3f7f5e7e));
4283      SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
4284      SDValue TwoToFractionalPartOfX =
4285        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
4286
4287      result = DAG.getNode(ISD::BITCAST, dl,
4288                           MVT::f32, TwoToFractionalPartOfX);
4289    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
4290      // For floating-point precision of 12:
4291      //
4292      //   TwoToFractionalPartOfX =
4293      //     0.999892986f +
4294      //       (0.696457318f +
4295      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
4296      //
4297      // error 0.000107046256, which is 13 to 14 bits
4298      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4299                               getF32Constant(DAG, 0x3da235e3));
4300      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4301                               getF32Constant(DAG, 0x3e65b8f3));
4302      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4303      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4304                               getF32Constant(DAG, 0x3f324b07));
4305      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4306      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4307                               getF32Constant(DAG, 0x3f7ff8fd));
4308      SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
4309      SDValue TwoToFractionalPartOfX =
4310        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
4311
4312      result = DAG.getNode(ISD::BITCAST, dl,
4313                           MVT::f32, TwoToFractionalPartOfX);
4314    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
4315      // For floating-point precision of 18:
4316      //
4317      //   TwoToFractionalPartOfX =
4318      //     0.999999982f +
4319      //       (0.693148872f +
4320      //         (0.240227044f +
4321      //           (0.554906021e-1f +
4322      //             (0.961591928e-2f +
4323      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4324      // error 2.47208000*10^(-7), which is better than 18 bits
4325      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4326                               getF32Constant(DAG, 0x3924b03e));
4327      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4328                               getF32Constant(DAG, 0x3ab24b87));
4329      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4330      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4331                               getF32Constant(DAG, 0x3c1d8c17));
4332      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4333      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4334                               getF32Constant(DAG, 0x3d634a1d));
4335      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4336      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4337                               getF32Constant(DAG, 0x3e75fe14));
4338      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4339      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4340                                getF32Constant(DAG, 0x3f317234));
4341      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4342      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4343                                getF32Constant(DAG, 0x3f800000));
4344      SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
4345      SDValue TwoToFractionalPartOfX =
4346        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
4347
4348      result = DAG.getNode(ISD::BITCAST, dl,
4349                           MVT::f32, TwoToFractionalPartOfX);
4350    }
4351  } else {
4352    // No special expansion.
4353    result = DAG.getNode(ISD::FPOW, dl,
4354                         getValue(I.getArgOperand(0)).getValueType(),
4355                         getValue(I.getArgOperand(0)),
4356                         getValue(I.getArgOperand(1)));
4357  }
4358
4359  setValue(&I, result);
4360}
4361
4362
4363/// ExpandPowI - Expand a llvm.powi intrinsic.
4364static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
4365                          SelectionDAG &DAG) {
4366  // If RHS is a constant, we can expand this out to a multiplication tree,
4367  // otherwise we end up lowering to a call to __powidf2 (for example).  When
4368  // optimizing for size, we only want to do this if the expansion would produce
4369  // a small number of multiplies, otherwise we do the full expansion.
4370  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4371    // Get the exponent as a positive value.
4372    unsigned Val = RHSC->getSExtValue();
4373    if ((int)Val < 0) Val = -Val;
4374
4375    // powi(x, 0) -> 1.0
4376    if (Val == 0)
4377      return DAG.getConstantFP(1.0, LHS.getValueType());
4378
4379    const Function *F = DAG.getMachineFunction().getFunction();
4380    if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
4381        // If optimizing for size, don't insert too many multiplies.  This
4382        // inserts up to 5 multiplies.
4383        CountPopulation_32(Val)+Log2_32(Val) < 7) {
4384      // We use the simple binary decomposition method to generate the multiply
4385      // sequence.  There are more optimal ways to do this (for example,
4386      // powi(x,15) generates one more multiply than it should), but this has
4387      // the benefit of being both really simple and much better than a libcall.
4388      SDValue Res;  // Logically starts equal to 1.0
4389      SDValue CurSquare = LHS;
4390      while (Val) {
4391        if (Val & 1) {
4392          if (Res.getNode())
4393            Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4394          else
4395            Res = CurSquare;  // 1.0*CurSquare.
4396        }
4397
4398        CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4399                                CurSquare, CurSquare);
4400        Val >>= 1;
4401      }
4402
4403      // If the original was negative, invert the result, producing 1/(x*x*x).
4404      if (RHSC->getSExtValue() < 0)
4405        Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4406                          DAG.getConstantFP(1.0, LHS.getValueType()), Res);
4407      return Res;
4408    }
4409  }
4410
4411  // Otherwise, expand to a libcall.
4412  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4413}
4414
4415// getTruncatedArgReg - Find underlying register used for an truncated
4416// argument.
4417static unsigned getTruncatedArgReg(const SDValue &N) {
4418  if (N.getOpcode() != ISD::TRUNCATE)
4419    return 0;
4420
4421  const SDValue &Ext = N.getOperand(0);
4422  if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){
4423    const SDValue &CFR = Ext.getOperand(0);
4424    if (CFR.getOpcode() == ISD::CopyFromReg)
4425      return cast<RegisterSDNode>(CFR.getOperand(1))->getReg();
4426    if (CFR.getOpcode() == ISD::TRUNCATE)
4427      return getTruncatedArgReg(CFR);
4428  }
4429  return 0;
4430}
4431
4432/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
4433/// argument, create the corresponding DBG_VALUE machine instruction for it now.
4434/// At the end of instruction selection, they will be inserted to the entry BB.
4435bool
4436SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
4437                                              int64_t Offset,
4438                                              const SDValue &N) {
4439  const Argument *Arg = dyn_cast<Argument>(V);
4440  if (!Arg)
4441    return false;
4442
4443  MachineFunction &MF = DAG.getMachineFunction();
4444  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
4445  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
4446
4447  // Ignore inlined function arguments here.
4448  DIVariable DV(Variable);
4449  if (DV.isInlinedFnArgument(MF.getFunction()))
4450    return false;
4451
4452  unsigned Reg = 0;
4453  // Some arguments' frame index is recorded during argument lowering.
4454  Offset = FuncInfo.getArgumentFrameIndex(Arg);
4455  if (Offset)
4456    Reg = TRI->getFrameRegister(MF);
4457
4458  if (!Reg && N.getNode()) {
4459    if (N.getOpcode() == ISD::CopyFromReg)
4460      Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
4461    else
4462      Reg = getTruncatedArgReg(N);
4463    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4464      MachineRegisterInfo &RegInfo = MF.getRegInfo();
4465      unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4466      if (PR)
4467        Reg = PR;
4468    }
4469  }
4470
4471  if (!Reg) {
4472    // Check if ValueMap has reg number.
4473    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4474    if (VMI != FuncInfo.ValueMap.end())
4475      Reg = VMI->second;
4476  }
4477
4478  if (!Reg && N.getNode()) {
4479    // Check if frame index is available.
4480    if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
4481      if (FrameIndexSDNode *FINode =
4482          dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
4483        Reg = TRI->getFrameRegister(MF);
4484        Offset = FINode->getIndex();
4485      }
4486  }
4487
4488  if (!Reg)
4489    return false;
4490
4491  MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
4492                                    TII->get(TargetOpcode::DBG_VALUE))
4493    .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
4494  FuncInfo.ArgDbgValues.push_back(&*MIB);
4495  return true;
4496}
4497
4498// VisualStudio defines setjmp as _setjmp
4499#if defined(_MSC_VER) && defined(setjmp) && \
4500                         !defined(setjmp_undefined_for_msvc)
4501#  pragma push_macro("setjmp")
4502#  undef setjmp
4503#  define setjmp_undefined_for_msvc
4504#endif
4505
4506/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
4507/// we want to emit this as a call to a named external function, return the name
4508/// otherwise lower it and return null.
4509const char *
4510SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
4511  DebugLoc dl = getCurDebugLoc();
4512  SDValue Res;
4513
4514  switch (Intrinsic) {
4515  default:
4516    // By default, turn this into a target intrinsic node.
4517    visitTargetIntrinsic(I, Intrinsic);
4518    return 0;
4519  case Intrinsic::vastart:  visitVAStart(I); return 0;
4520  case Intrinsic::vaend:    visitVAEnd(I); return 0;
4521  case Intrinsic::vacopy:   visitVACopy(I); return 0;
4522  case Intrinsic::returnaddress:
4523    setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
4524                             getValue(I.getArgOperand(0))));
4525    return 0;
4526  case Intrinsic::frameaddress:
4527    setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
4528                             getValue(I.getArgOperand(0))));
4529    return 0;
4530  case Intrinsic::setjmp:
4531    return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
4532  case Intrinsic::longjmp:
4533    return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
4534  case Intrinsic::memcpy: {
4535    // Assert for address < 256 since we support only user defined address
4536    // spaces.
4537    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
4538           < 256 &&
4539           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
4540           < 256 &&
4541           "Unknown address space");
4542    SDValue Op1 = getValue(I.getArgOperand(0));
4543    SDValue Op2 = getValue(I.getArgOperand(1));
4544    SDValue Op3 = getValue(I.getArgOperand(2));
4545    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
4546    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
4547    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
4548                              MachinePointerInfo(I.getArgOperand(0)),
4549                              MachinePointerInfo(I.getArgOperand(1))));
4550    return 0;
4551  }
4552  case Intrinsic::memset: {
4553    // Assert for address < 256 since we support only user defined address
4554    // spaces.
4555    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
4556           < 256 &&
4557           "Unknown address space");
4558    SDValue Op1 = getValue(I.getArgOperand(0));
4559    SDValue Op2 = getValue(I.getArgOperand(1));
4560    SDValue Op3 = getValue(I.getArgOperand(2));
4561    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
4562    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
4563    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
4564                              MachinePointerInfo(I.getArgOperand(0))));
4565    return 0;
4566  }
4567  case Intrinsic::memmove: {
4568    // Assert for address < 256 since we support only user defined address
4569    // spaces.
4570    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
4571           < 256 &&
4572           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
4573           < 256 &&
4574           "Unknown address space");
4575    SDValue Op1 = getValue(I.getArgOperand(0));
4576    SDValue Op2 = getValue(I.getArgOperand(1));
4577    SDValue Op3 = getValue(I.getArgOperand(2));
4578    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
4579    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
4580    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
4581                               MachinePointerInfo(I.getArgOperand(0)),
4582                               MachinePointerInfo(I.getArgOperand(1))));
4583    return 0;
4584  }
4585  case Intrinsic::dbg_declare: {
4586    const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
4587    MDNode *Variable = DI.getVariable();
4588    const Value *Address = DI.getAddress();
4589    if (!Address || !DIVariable(Variable).Verify()) {
4590      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
4591      return 0;
4592    }
4593
4594    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
4595    // but do not always have a corresponding SDNode built.  The SDNodeOrder
4596    // absolute, but not relative, values are different depending on whether
4597    // debug info exists.
4598    ++SDNodeOrder;
4599
4600    // Check if address has undef value.
4601    if (isa<UndefValue>(Address) ||
4602        (Address->use_empty() && !isa<Argument>(Address))) {
4603      DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
4604      return 0;
4605    }
4606
4607    SDValue &N = NodeMap[Address];
4608    if (!N.getNode() && isa<Argument>(Address))
4609      // Check unused arguments map.
4610      N = UnusedArgNodeMap[Address];
4611    SDDbgValue *SDV;
4612    if (N.getNode()) {
4613      if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
4614        Address = BCI->getOperand(0);
4615      // Parameters are handled specially.
4616      bool isParameter =
4617        (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable ||
4618         isa<Argument>(Address));
4619
4620      const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
4621
4622      if (isParameter && !AI) {
4623        FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
4624        if (FINode)
4625          // Byval parameter.  We have a frame index at this point.
4626          SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
4627                                0, dl, SDNodeOrder);
4628        else {
4629          // Address is an argument, so try to emit its dbg value using
4630          // virtual register info from the FuncInfo.ValueMap.
4631          EmitFuncArgumentDbgValue(Address, Variable, 0, N);
4632          return 0;
4633        }
4634      } else if (AI)
4635        SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
4636                              0, dl, SDNodeOrder);
4637      else {
4638        // Can't do anything with other non-AI cases yet.
4639        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
4640        DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t");
4641        DEBUG(Address->dump());
4642        return 0;
4643      }
4644      DAG.AddDbgValue(SDV, N.getNode(), isParameter);
4645    } else {
4646      // If Address is an argument then try to emit its dbg value using
4647      // virtual register info from the FuncInfo.ValueMap.
4648      if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
4649        // If variable is pinned by a alloca in dominating bb then
4650        // use StaticAllocaMap.
4651        if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
4652          if (AI->getParent() != DI.getParent()) {
4653            DenseMap<const AllocaInst*, int>::iterator SI =
4654              FuncInfo.StaticAllocaMap.find(AI);
4655            if (SI != FuncInfo.StaticAllocaMap.end()) {
4656              SDV = DAG.getDbgValue(Variable, SI->second,
4657                                    0, dl, SDNodeOrder);
4658              DAG.AddDbgValue(SDV, 0, false);
4659              return 0;
4660            }
4661          }
4662        }
4663        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
4664      }
4665    }
4666    return 0;
4667  }
4668  case Intrinsic::dbg_value: {
4669    const DbgValueInst &DI = cast<DbgValueInst>(I);
4670    if (!DIVariable(DI.getVariable()).Verify())
4671      return 0;
4672
4673    MDNode *Variable = DI.getVariable();
4674    uint64_t Offset = DI.getOffset();
4675    const Value *V = DI.getValue();
4676    if (!V)
4677      return 0;
4678
4679    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
4680    // but do not always have a corresponding SDNode built.  The SDNodeOrder
4681    // absolute, but not relative, values are different depending on whether
4682    // debug info exists.
4683    ++SDNodeOrder;
4684    SDDbgValue *SDV;
4685    if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) {
4686      SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
4687      DAG.AddDbgValue(SDV, 0, false);
4688    } else {
4689      // Do not use getValue() in here; we don't want to generate code at
4690      // this point if it hasn't been done yet.
4691      SDValue N = NodeMap[V];
4692      if (!N.getNode() && isa<Argument>(V))
4693        // Check unused arguments map.
4694        N = UnusedArgNodeMap[V];
4695      if (N.getNode()) {
4696        if (!EmitFuncArgumentDbgValue(V, Variable, Offset, N)) {
4697          SDV = DAG.getDbgValue(Variable, N.getNode(),
4698                                N.getResNo(), Offset, dl, SDNodeOrder);
4699          DAG.AddDbgValue(SDV, N.getNode(), false);
4700        }
4701      } else if (!V->use_empty() ) {
4702        // Do not call getValue(V) yet, as we don't want to generate code.
4703        // Remember it for later.
4704        DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
4705        DanglingDebugInfoMap[V] = DDI;
4706      } else {
4707        // We may expand this to cover more cases.  One case where we have no
4708        // data available is an unreferenced parameter.
4709        DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
4710      }
4711    }
4712
4713    // Build a debug info table entry.
4714    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
4715      V = BCI->getOperand(0);
4716    const AllocaInst *AI = dyn_cast<AllocaInst>(V);
4717    // Don't handle byval struct arguments or VLAs, for example.
4718    if (!AI) {
4719      DEBUG(dbgs() << "Dropping debug location info for:\n  " << DI << "\n");
4720      DEBUG(dbgs() << "  Last seen at:\n    " << *V << "\n");
4721      return 0;
4722    }
4723    DenseMap<const AllocaInst*, int>::iterator SI =
4724      FuncInfo.StaticAllocaMap.find(AI);
4725    if (SI == FuncInfo.StaticAllocaMap.end())
4726      return 0; // VLAs.
4727    int FI = SI->second;
4728
4729    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4730    if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
4731      MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
4732    return 0;
4733  }
4734
4735  case Intrinsic::eh_typeid_for: {
4736    // Find the type id for the given typeinfo.
4737    GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
4738    unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
4739    Res = DAG.getConstant(TypeID, MVT::i32);
4740    setValue(&I, Res);
4741    return 0;
4742  }
4743
4744  case Intrinsic::eh_return_i32:
4745  case Intrinsic::eh_return_i64:
4746    DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
4747    DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
4748                            MVT::Other,
4749                            getControlRoot(),
4750                            getValue(I.getArgOperand(0)),
4751                            getValue(I.getArgOperand(1))));
4752    return 0;
4753  case Intrinsic::eh_unwind_init:
4754    DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
4755    return 0;
4756  case Intrinsic::eh_dwarf_cfa: {
4757    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
4758                                        TLI.getPointerTy());
4759    SDValue Offset = DAG.getNode(ISD::ADD, dl,
4760                                 TLI.getPointerTy(),
4761                                 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
4762                                             TLI.getPointerTy()),
4763                                 CfaArg);
4764    SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
4765                             TLI.getPointerTy(),
4766                             DAG.getConstant(0, TLI.getPointerTy()));
4767    setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
4768                             FA, Offset));
4769    return 0;
4770  }
4771  case Intrinsic::eh_sjlj_callsite: {
4772    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4773    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
4774    assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
4775    assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
4776
4777    MMI.setCurrentCallSite(CI->getZExtValue());
4778    return 0;
4779  }
4780  case Intrinsic::eh_sjlj_functioncontext: {
4781    // Get and store the index of the function context.
4782    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
4783    AllocaInst *FnCtx =
4784      cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
4785    int FI = FuncInfo.StaticAllocaMap[FnCtx];
4786    MFI->setFunctionContextIndex(FI);
4787    return 0;
4788  }
4789  case Intrinsic::eh_sjlj_setjmp: {
4790    SDValue Ops[2];
4791    Ops[0] = getRoot();
4792    Ops[1] = getValue(I.getArgOperand(0));
4793    SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl,
4794                             DAG.getVTList(MVT::i32, MVT::Other),
4795                             Ops, 2);
4796    setValue(&I, Op.getValue(0));
4797    DAG.setRoot(Op.getValue(1));
4798    return 0;
4799  }
4800  case Intrinsic::eh_sjlj_longjmp: {
4801    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
4802                            getRoot(), getValue(I.getArgOperand(0))));
4803    return 0;
4804  }
4805
4806  case Intrinsic::x86_mmx_pslli_w:
4807  case Intrinsic::x86_mmx_pslli_d:
4808  case Intrinsic::x86_mmx_pslli_q:
4809  case Intrinsic::x86_mmx_psrli_w:
4810  case Intrinsic::x86_mmx_psrli_d:
4811  case Intrinsic::x86_mmx_psrli_q:
4812  case Intrinsic::x86_mmx_psrai_w:
4813  case Intrinsic::x86_mmx_psrai_d: {
4814    SDValue ShAmt = getValue(I.getArgOperand(1));
4815    if (isa<ConstantSDNode>(ShAmt)) {
4816      visitTargetIntrinsic(I, Intrinsic);
4817      return 0;
4818    }
4819    unsigned NewIntrinsic = 0;
4820    EVT ShAmtVT = MVT::v2i32;
4821    switch (Intrinsic) {
4822    case Intrinsic::x86_mmx_pslli_w:
4823      NewIntrinsic = Intrinsic::x86_mmx_psll_w;
4824      break;
4825    case Intrinsic::x86_mmx_pslli_d:
4826      NewIntrinsic = Intrinsic::x86_mmx_psll_d;
4827      break;
4828    case Intrinsic::x86_mmx_pslli_q:
4829      NewIntrinsic = Intrinsic::x86_mmx_psll_q;
4830      break;
4831    case Intrinsic::x86_mmx_psrli_w:
4832      NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
4833      break;
4834    case Intrinsic::x86_mmx_psrli_d:
4835      NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
4836      break;
4837    case Intrinsic::x86_mmx_psrli_q:
4838      NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
4839      break;
4840    case Intrinsic::x86_mmx_psrai_w:
4841      NewIntrinsic = Intrinsic::x86_mmx_psra_w;
4842      break;
4843    case Intrinsic::x86_mmx_psrai_d:
4844      NewIntrinsic = Intrinsic::x86_mmx_psra_d;
4845      break;
4846    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
4847    }
4848
4849    // The vector shift intrinsics with scalars uses 32b shift amounts but
4850    // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
4851    // to be zero.
4852    // We must do this early because v2i32 is not a legal type.
4853    DebugLoc dl = getCurDebugLoc();
4854    SDValue ShOps[2];
4855    ShOps[0] = ShAmt;
4856    ShOps[1] = DAG.getConstant(0, MVT::i32);
4857    ShAmt =  DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
4858    EVT DestVT = TLI.getValueType(I.getType());
4859    ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
4860    Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
4861                       DAG.getConstant(NewIntrinsic, MVT::i32),
4862                       getValue(I.getArgOperand(0)), ShAmt);
4863    setValue(&I, Res);
4864    return 0;
4865  }
4866  case Intrinsic::x86_avx_vinsertf128_pd_256:
4867  case Intrinsic::x86_avx_vinsertf128_ps_256:
4868  case Intrinsic::x86_avx_vinsertf128_si_256:
4869  case Intrinsic::x86_avx2_vinserti128: {
4870    DebugLoc dl = getCurDebugLoc();
4871    EVT DestVT = TLI.getValueType(I.getType());
4872    EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType());
4873    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(2))->getZExtValue() & 1) *
4874                   ElVT.getVectorNumElements();
4875    Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT,
4876                      getValue(I.getArgOperand(0)),
4877                      getValue(I.getArgOperand(1)),
4878                      DAG.getIntPtrConstant(Idx));
4879    setValue(&I, Res);
4880    return 0;
4881  }
4882  case Intrinsic::x86_avx_vextractf128_pd_256:
4883  case Intrinsic::x86_avx_vextractf128_ps_256:
4884  case Intrinsic::x86_avx_vextractf128_si_256:
4885  case Intrinsic::x86_avx2_vextracti128: {
4886    DebugLoc dl = getCurDebugLoc();
4887    EVT DestVT = TLI.getValueType(I.getType());
4888    uint64_t Idx = (cast<ConstantInt>(I.getArgOperand(1))->getZExtValue() & 1) *
4889                   DestVT.getVectorNumElements();
4890    Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT,
4891                      getValue(I.getArgOperand(0)),
4892                      DAG.getIntPtrConstant(Idx));
4893    setValue(&I, Res);
4894    return 0;
4895  }
4896  case Intrinsic::convertff:
4897  case Intrinsic::convertfsi:
4898  case Intrinsic::convertfui:
4899  case Intrinsic::convertsif:
4900  case Intrinsic::convertuif:
4901  case Intrinsic::convertss:
4902  case Intrinsic::convertsu:
4903  case Intrinsic::convertus:
4904  case Intrinsic::convertuu: {
4905    ISD::CvtCode Code = ISD::CVT_INVALID;
4906    switch (Intrinsic) {
4907    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
4908    case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
4909    case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
4910    case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
4911    case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
4912    case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
4913    case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
4914    case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
4915    case Intrinsic::convertus:  Code = ISD::CVT_US; break;
4916    case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
4917    }
4918    EVT DestVT = TLI.getValueType(I.getType());
4919    const Value *Op1 = I.getArgOperand(0);
4920    Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
4921                               DAG.getValueType(DestVT),
4922                               DAG.getValueType(getValue(Op1).getValueType()),
4923                               getValue(I.getArgOperand(1)),
4924                               getValue(I.getArgOperand(2)),
4925                               Code);
4926    setValue(&I, Res);
4927    return 0;
4928  }
4929  case Intrinsic::sqrt:
4930    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
4931                             getValue(I.getArgOperand(0)).getValueType(),
4932                             getValue(I.getArgOperand(0))));
4933    return 0;
4934  case Intrinsic::powi:
4935    setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
4936                            getValue(I.getArgOperand(1)), DAG));
4937    return 0;
4938  case Intrinsic::sin:
4939    setValue(&I, DAG.getNode(ISD::FSIN, dl,
4940                             getValue(I.getArgOperand(0)).getValueType(),
4941                             getValue(I.getArgOperand(0))));
4942    return 0;
4943  case Intrinsic::cos:
4944    setValue(&I, DAG.getNode(ISD::FCOS, dl,
4945                             getValue(I.getArgOperand(0)).getValueType(),
4946                             getValue(I.getArgOperand(0))));
4947    return 0;
4948  case Intrinsic::log:
4949    visitLog(I);
4950    return 0;
4951  case Intrinsic::log2:
4952    visitLog2(I);
4953    return 0;
4954  case Intrinsic::log10:
4955    visitLog10(I);
4956    return 0;
4957  case Intrinsic::exp:
4958    visitExp(I);
4959    return 0;
4960  case Intrinsic::exp2:
4961    visitExp2(I);
4962    return 0;
4963  case Intrinsic::pow:
4964    visitPow(I);
4965    return 0;
4966  case Intrinsic::fabs:
4967    setValue(&I, DAG.getNode(ISD::FABS, dl,
4968                             getValue(I.getArgOperand(0)).getValueType(),
4969                             getValue(I.getArgOperand(0))));
4970    return 0;
4971  case Intrinsic::floor:
4972    setValue(&I, DAG.getNode(ISD::FFLOOR, dl,
4973                             getValue(I.getArgOperand(0)).getValueType(),
4974                             getValue(I.getArgOperand(0))));
4975    return 0;
4976  case Intrinsic::fma:
4977    setValue(&I, DAG.getNode(ISD::FMA, dl,
4978                             getValue(I.getArgOperand(0)).getValueType(),
4979                             getValue(I.getArgOperand(0)),
4980                             getValue(I.getArgOperand(1)),
4981                             getValue(I.getArgOperand(2))));
4982    return 0;
4983  case Intrinsic::fmuladd: {
4984    EVT VT = TLI.getValueType(I.getType());
4985    if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
4986        TLI.isOperationLegal(ISD::FMA, VT) &&
4987        TLI.isFMAFasterThanMulAndAdd(VT)){
4988      setValue(&I, DAG.getNode(ISD::FMA, dl,
4989                               getValue(I.getArgOperand(0)).getValueType(),
4990                               getValue(I.getArgOperand(0)),
4991                               getValue(I.getArgOperand(1)),
4992                               getValue(I.getArgOperand(2))));
4993    } else {
4994      SDValue Mul = DAG.getNode(ISD::FMUL, dl,
4995                                getValue(I.getArgOperand(0)).getValueType(),
4996                                getValue(I.getArgOperand(0)),
4997                                getValue(I.getArgOperand(1)));
4998      SDValue Add = DAG.getNode(ISD::FADD, dl,
4999                                getValue(I.getArgOperand(0)).getValueType(),
5000                                Mul,
5001                                getValue(I.getArgOperand(2)));
5002      setValue(&I, Add);
5003    }
5004    return 0;
5005  }
5006  case Intrinsic::convert_to_fp16:
5007    setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
5008                             MVT::i16, getValue(I.getArgOperand(0))));
5009    return 0;
5010  case Intrinsic::convert_from_fp16:
5011    setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
5012                             MVT::f32, getValue(I.getArgOperand(0))));
5013    return 0;
5014  case Intrinsic::pcmarker: {
5015    SDValue Tmp = getValue(I.getArgOperand(0));
5016    DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
5017    return 0;
5018  }
5019  case Intrinsic::readcyclecounter: {
5020    SDValue Op = getRoot();
5021    Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
5022                      DAG.getVTList(MVT::i64, MVT::Other),
5023                      &Op, 1);
5024    setValue(&I, Res);
5025    DAG.setRoot(Res.getValue(1));
5026    return 0;
5027  }
5028  case Intrinsic::bswap:
5029    setValue(&I, DAG.getNode(ISD::BSWAP, dl,
5030                             getValue(I.getArgOperand(0)).getValueType(),
5031                             getValue(I.getArgOperand(0))));
5032    return 0;
5033  case Intrinsic::cttz: {
5034    SDValue Arg = getValue(I.getArgOperand(0));
5035    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
5036    EVT Ty = Arg.getValueType();
5037    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
5038                             dl, Ty, Arg));
5039    return 0;
5040  }
5041  case Intrinsic::ctlz: {
5042    SDValue Arg = getValue(I.getArgOperand(0));
5043    ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
5044    EVT Ty = Arg.getValueType();
5045    setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
5046                             dl, Ty, Arg));
5047    return 0;
5048  }
5049  case Intrinsic::ctpop: {
5050    SDValue Arg = getValue(I.getArgOperand(0));
5051    EVT Ty = Arg.getValueType();
5052    setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
5053    return 0;
5054  }
5055  case Intrinsic::stacksave: {
5056    SDValue Op = getRoot();
5057    Res = DAG.getNode(ISD::STACKSAVE, dl,
5058                      DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
5059    setValue(&I, Res);
5060    DAG.setRoot(Res.getValue(1));
5061    return 0;
5062  }
5063  case Intrinsic::stackrestore: {
5064    Res = getValue(I.getArgOperand(0));
5065    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
5066    return 0;
5067  }
5068  case Intrinsic::stackprotector: {
5069    // Emit code into the DAG to store the stack guard onto the stack.
5070    MachineFunction &MF = DAG.getMachineFunction();
5071    MachineFrameInfo *MFI = MF.getFrameInfo();
5072    EVT PtrTy = TLI.getPointerTy();
5073
5074    SDValue Src = getValue(I.getArgOperand(0));   // The guard's value.
5075    AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
5076
5077    int FI = FuncInfo.StaticAllocaMap[Slot];
5078    MFI->setStackProtectorIndex(FI);
5079
5080    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
5081
5082    // Store the stack protector onto the stack.
5083    Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
5084                       MachinePointerInfo::getFixedStack(FI),
5085                       true, false, 0);
5086    setValue(&I, Res);
5087    DAG.setRoot(Res);
5088    return 0;
5089  }
5090  case Intrinsic::objectsize: {
5091    // If we don't know by now, we're never going to know.
5092    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
5093
5094    assert(CI && "Non-constant type in __builtin_object_size?");
5095
5096    SDValue Arg = getValue(I.getCalledValue());
5097    EVT Ty = Arg.getValueType();
5098
5099    if (CI->isZero())
5100      Res = DAG.getConstant(-1ULL, Ty);
5101    else
5102      Res = DAG.getConstant(0, Ty);
5103
5104    setValue(&I, Res);
5105    return 0;
5106  }
5107  case Intrinsic::var_annotation:
5108    // Discard annotate attributes
5109    return 0;
5110
5111  case Intrinsic::init_trampoline: {
5112    const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
5113
5114    SDValue Ops[6];
5115    Ops[0] = getRoot();
5116    Ops[1] = getValue(I.getArgOperand(0));
5117    Ops[2] = getValue(I.getArgOperand(1));
5118    Ops[3] = getValue(I.getArgOperand(2));
5119    Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
5120    Ops[5] = DAG.getSrcValue(F);
5121
5122    Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6);
5123
5124    DAG.setRoot(Res);
5125    return 0;
5126  }
5127  case Intrinsic::adjust_trampoline: {
5128    setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl,
5129                             TLI.getPointerTy(),
5130                             getValue(I.getArgOperand(0))));
5131    return 0;
5132  }
5133  case Intrinsic::gcroot:
5134    if (GFI) {
5135      const Value *Alloca = I.getArgOperand(0)->stripPointerCasts();
5136      const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
5137
5138      FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
5139      GFI->addStackRoot(FI->getIndex(), TypeMap);
5140    }
5141    return 0;
5142  case Intrinsic::gcread:
5143  case Intrinsic::gcwrite:
5144    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
5145  case Intrinsic::flt_rounds:
5146    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
5147    return 0;
5148
5149  case Intrinsic::expect: {
5150    // Just replace __builtin_expect(exp, c) with EXP.
5151    setValue(&I, getValue(I.getArgOperand(0)));
5152    return 0;
5153  }
5154
5155  case Intrinsic::trap: {
5156    StringRef TrapFuncName = TM.Options.getTrapFunctionName();
5157    if (TrapFuncName.empty()) {
5158      DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
5159      return 0;
5160    }
5161    TargetLowering::ArgListTy Args;
5162    TargetLowering::
5163    CallLoweringInfo CLI(getRoot(), I.getType(),
5164                 false, false, false, false, 0, CallingConv::C,
5165                 /*isTailCall=*/false,
5166                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
5167                 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()),
5168                 Args, DAG, getCurDebugLoc());
5169    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
5170    DAG.setRoot(Result.second);
5171    return 0;
5172  }
5173  case Intrinsic::debugtrap: {
5174    DAG.setRoot(DAG.getNode(ISD::DEBUGTRAP, dl,MVT::Other, getRoot()));
5175    return 0;
5176  }
5177  case Intrinsic::uadd_with_overflow:
5178  case Intrinsic::sadd_with_overflow:
5179  case Intrinsic::usub_with_overflow:
5180  case Intrinsic::ssub_with_overflow:
5181  case Intrinsic::umul_with_overflow:
5182  case Intrinsic::smul_with_overflow: {
5183    ISD::NodeType Op;
5184    switch (Intrinsic) {
5185    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
5186    case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break;
5187    case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break;
5188    case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break;
5189    case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break;
5190    case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break;
5191    case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break;
5192    }
5193    SDValue Op1 = getValue(I.getArgOperand(0));
5194    SDValue Op2 = getValue(I.getArgOperand(1));
5195
5196    SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
5197    setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
5198    return 0;
5199  }
5200  case Intrinsic::prefetch: {
5201    SDValue Ops[5];
5202    unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
5203    Ops[0] = getRoot();
5204    Ops[1] = getValue(I.getArgOperand(0));
5205    Ops[2] = getValue(I.getArgOperand(1));
5206    Ops[3] = getValue(I.getArgOperand(2));
5207    Ops[4] = getValue(I.getArgOperand(3));
5208    DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
5209                                        DAG.getVTList(MVT::Other),
5210                                        &Ops[0], 5,
5211                                        EVT::getIntegerVT(*Context, 8),
5212                                        MachinePointerInfo(I.getArgOperand(0)),
5213                                        0, /* align */
5214                                        false, /* volatile */
5215                                        rw==0, /* read */
5216                                        rw==1)); /* write */
5217    return 0;
5218  }
5219  case Intrinsic::lifetime_start:
5220  case Intrinsic::lifetime_end: {
5221    SDValue Ops[2];
5222    AllocaInst *LifetimeObject =dyn_cast_or_null<AllocaInst>(
5223                                   GetUnderlyingObject(I.getArgOperand(1), TD));
5224    // Could not find an Alloca.
5225    if (!LifetimeObject)
5226      return 0;
5227
5228    int FI = FuncInfo.StaticAllocaMap[LifetimeObject];
5229    Ops[0] = getRoot();
5230    Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true);
5231    bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
5232    unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
5233
5234    Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2);
5235    DAG.setRoot(Res);
5236    return 0;
5237  }
5238  case Intrinsic::invariant_start:
5239    // Discard region information.
5240    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
5241    return 0;
5242  case Intrinsic::invariant_end:
5243    // Discard region information.
5244    return 0;
5245  case Intrinsic::donothing:
5246    // ignore
5247    return 0;
5248  }
5249}
5250
5251void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
5252                                      bool isTailCall,
5253                                      MachineBasicBlock *LandingPad) {
5254  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
5255  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
5256  Type *RetTy = FTy->getReturnType();
5257  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
5258  MCSymbol *BeginLabel = 0;
5259
5260  TargetLowering::ArgListTy Args;
5261  TargetLowering::ArgListEntry Entry;
5262  Args.reserve(CS.arg_size());
5263
5264  // Check whether the function can return without sret-demotion.
5265  SmallVector<ISD::OutputArg, 4> Outs;
5266  GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
5267                Outs, TLI);
5268
5269  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
5270                                           DAG.getMachineFunction(),
5271                                           FTy->isVarArg(), Outs,
5272                                           FTy->getContext());
5273
5274  SDValue DemoteStackSlot;
5275  int DemoteStackIdx = -100;
5276
5277  if (!CanLowerReturn) {
5278    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
5279                      FTy->getReturnType());
5280    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
5281                      FTy->getReturnType());
5282    MachineFunction &MF = DAG.getMachineFunction();
5283    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
5284    Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
5285
5286    DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
5287    Entry.Node = DemoteStackSlot;
5288    Entry.Ty = StackSlotPtrType;
5289    Entry.isSExt = false;
5290    Entry.isZExt = false;
5291    Entry.isInReg = false;
5292    Entry.isSRet = true;
5293    Entry.isNest = false;
5294    Entry.isByVal = false;
5295    Entry.Alignment = Align;
5296    Args.push_back(Entry);
5297    RetTy = Type::getVoidTy(FTy->getContext());
5298  }
5299
5300  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
5301       i != e; ++i) {
5302    const Value *V = *i;
5303
5304    // Skip empty types
5305    if (V->getType()->isEmptyTy())
5306      continue;
5307
5308    SDValue ArgNode = getValue(V);
5309    Entry.Node = ArgNode; Entry.Ty = V->getType();
5310
5311    unsigned attrInd = i - CS.arg_begin() + 1;
5312    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
5313    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
5314    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
5315    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
5316    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
5317    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
5318    Entry.Alignment = CS.getParamAlignment(attrInd);
5319    Args.push_back(Entry);
5320  }
5321
5322  if (LandingPad) {
5323    // Insert a label before the invoke call to mark the try range.  This can be
5324    // used to detect deletion of the invoke via the MachineModuleInfo.
5325    BeginLabel = MMI.getContext().CreateTempSymbol();
5326
5327    // For SjLj, keep track of which landing pads go with which invokes
5328    // so as to maintain the ordering of pads in the LSDA.
5329    unsigned CallSiteIndex = MMI.getCurrentCallSite();
5330    if (CallSiteIndex) {
5331      MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
5332      LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex);
5333
5334      // Now that the call site is handled, stop tracking it.
5335      MMI.setCurrentCallSite(0);
5336    }
5337
5338    // Both PendingLoads and PendingExports must be flushed here;
5339    // this call might not return.
5340    (void)getRoot();
5341    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
5342  }
5343
5344  // Check if target-independent constraints permit a tail call here.
5345  // Target-dependent constraints are checked within TLI.LowerCallTo.
5346  if (isTailCall &&
5347      !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
5348    isTailCall = false;
5349
5350  // If there's a possibility that fast-isel has already selected some amount
5351  // of the current basic block, don't emit a tail call.
5352  if (isTailCall && TM.Options.EnableFastISel)
5353    isTailCall = false;
5354
5355  TargetLowering::
5356  CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG,
5357                       getCurDebugLoc(), CS);
5358  std::pair<SDValue,SDValue> Result = TLI.LowerCallTo(CLI);
5359  assert((isTailCall || Result.second.getNode()) &&
5360         "Non-null chain expected with non-tail call!");
5361  assert((Result.second.getNode() || !Result.first.getNode()) &&
5362         "Null value expected with tail call!");
5363  if (Result.first.getNode()) {
5364    setValue(CS.getInstruction(), Result.first);
5365  } else if (!CanLowerReturn && Result.second.getNode()) {
5366    // The instruction result is the result of loading from the
5367    // hidden sret parameter.
5368    SmallVector<EVT, 1> PVTs;
5369    Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
5370
5371    ComputeValueVTs(TLI, PtrRetTy, PVTs);
5372    assert(PVTs.size() == 1 && "Pointers should fit in one register");
5373    EVT PtrVT = PVTs[0];
5374
5375    SmallVector<EVT, 4> RetTys;
5376    SmallVector<uint64_t, 4> Offsets;
5377    RetTy = FTy->getReturnType();
5378    ComputeValueVTs(TLI, RetTy, RetTys, &Offsets);
5379
5380    unsigned NumValues = RetTys.size();
5381    SmallVector<SDValue, 4> Values(NumValues);
5382    SmallVector<SDValue, 4> Chains(NumValues);
5383
5384    for (unsigned i = 0; i < NumValues; ++i) {
5385      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
5386                                DemoteStackSlot,
5387                                DAG.getConstant(Offsets[i], PtrVT));
5388      SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add,
5389                  MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
5390                              false, false, false, 1);
5391      Values[i] = L;
5392      Chains[i] = L.getValue(1);
5393    }
5394
5395    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
5396                                MVT::Other, &Chains[0], NumValues);
5397    PendingLoads.push_back(Chain);
5398
5399    setValue(CS.getInstruction(),
5400             DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
5401                         DAG.getVTList(&RetTys[0], RetTys.size()),
5402                         &Values[0], Values.size()));
5403  }
5404
5405  // Assign order to nodes here. If the call does not produce a result, it won't
5406  // be mapped to a SDNode and visit() will not assign it an order number.
5407  if (!Result.second.getNode()) {
5408    // As a special case, a null chain means that a tail call has been emitted and
5409    // the DAG root is already updated.
5410    HasTailCall = true;
5411    ++SDNodeOrder;
5412    AssignOrderingToNode(DAG.getRoot().getNode());
5413  } else {
5414    DAG.setRoot(Result.second);
5415    ++SDNodeOrder;
5416    AssignOrderingToNode(Result.second.getNode());
5417  }
5418
5419  if (LandingPad) {
5420    // Insert a label at the end of the invoke call to mark the try range.  This
5421    // can be used to detect deletion of the invoke via the MachineModuleInfo.
5422    MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
5423    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
5424
5425    // Inform MachineModuleInfo of range.
5426    MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
5427  }
5428}
5429
5430/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
5431/// value is equal or not-equal to zero.
5432static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
5433  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
5434       UI != E; ++UI) {
5435    if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
5436      if (IC->isEquality())
5437        if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
5438          if (C->isNullValue())
5439            continue;
5440    // Unknown instruction.
5441    return false;
5442  }
5443  return true;
5444}
5445
5446static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
5447                             Type *LoadTy,
5448                             SelectionDAGBuilder &Builder) {
5449
5450  // Check to see if this load can be trivially constant folded, e.g. if the
5451  // input is from a string literal.
5452  if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
5453    // Cast pointer to the type we really want to load.
5454    LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
5455                                         PointerType::getUnqual(LoadTy));
5456
5457    if (const Constant *LoadCst =
5458          ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
5459                                       Builder.TD))
5460      return Builder.getValue(LoadCst);
5461  }
5462
5463  // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
5464  // still constant memory, the input chain can be the entry node.
5465  SDValue Root;
5466  bool ConstantMemory = false;
5467
5468  // Do not serialize (non-volatile) loads of constant memory with anything.
5469  if (Builder.AA->pointsToConstantMemory(PtrVal)) {
5470    Root = Builder.DAG.getEntryNode();
5471    ConstantMemory = true;
5472  } else {
5473    // Do not serialize non-volatile loads against each other.
5474    Root = Builder.DAG.getRoot();
5475  }
5476
5477  SDValue Ptr = Builder.getValue(PtrVal);
5478  SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
5479                                        Ptr, MachinePointerInfo(PtrVal),
5480                                        false /*volatile*/,
5481                                        false /*nontemporal*/,
5482                                        false /*isinvariant*/, 1 /* align=1 */);
5483
5484  if (!ConstantMemory)
5485    Builder.PendingLoads.push_back(LoadVal.getValue(1));
5486  return LoadVal;
5487}
5488
5489
5490/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
5491/// If so, return true and lower it, otherwise return false and it will be
5492/// lowered like a normal call.
5493bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
5494  // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
5495  if (I.getNumArgOperands() != 3)
5496    return false;
5497
5498  const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
5499  if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
5500      !I.getArgOperand(2)->getType()->isIntegerTy() ||
5501      !I.getType()->isIntegerTy())
5502    return false;
5503
5504  const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
5505
5506  // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
5507  // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
5508  if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
5509    bool ActuallyDoIt = true;
5510    MVT LoadVT;
5511    Type *LoadTy;
5512    switch (Size->getZExtValue()) {
5513    default:
5514      LoadVT = MVT::Other;
5515      LoadTy = 0;
5516      ActuallyDoIt = false;
5517      break;
5518    case 2:
5519      LoadVT = MVT::i16;
5520      LoadTy = Type::getInt16Ty(Size->getContext());
5521      break;
5522    case 4:
5523      LoadVT = MVT::i32;
5524      LoadTy = Type::getInt32Ty(Size->getContext());
5525      break;
5526    case 8:
5527      LoadVT = MVT::i64;
5528      LoadTy = Type::getInt64Ty(Size->getContext());
5529      break;
5530        /*
5531    case 16:
5532      LoadVT = MVT::v4i32;
5533      LoadTy = Type::getInt32Ty(Size->getContext());
5534      LoadTy = VectorType::get(LoadTy, 4);
5535      break;
5536         */
5537    }
5538
5539    // This turns into unaligned loads.  We only do this if the target natively
5540    // supports the MVT we'll be loading or if it is small enough (<= 4) that
5541    // we'll only produce a small number of byte loads.
5542
5543    // Require that we can find a legal MVT, and only do this if the target
5544    // supports unaligned loads of that type.  Expanding into byte loads would
5545    // bloat the code.
5546    if (ActuallyDoIt && Size->getZExtValue() > 4) {
5547      // TODO: Handle 5 byte compare as 4-byte + 1 byte.
5548      // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
5549      if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
5550        ActuallyDoIt = false;
5551    }
5552
5553    if (ActuallyDoIt) {
5554      SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
5555      SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
5556
5557      SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
5558                                 ISD::SETNE);
5559      EVT CallVT = TLI.getValueType(I.getType(), true);
5560      setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
5561      return true;
5562    }
5563  }
5564
5565
5566  return false;
5567}
5568
5569/// visitUnaryFloatCall - If a call instruction is a unary floating-point
5570/// operation (as expected), translate it to an SDNode with the specified opcode
5571/// and return true.
5572bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I,
5573                                              unsigned Opcode) {
5574  // Sanity check that it really is a unary floating-point call.
5575  if (I.getNumArgOperands() != 1 ||
5576      !I.getArgOperand(0)->getType()->isFloatingPointTy() ||
5577      I.getType() != I.getArgOperand(0)->getType() ||
5578      !I.onlyReadsMemory())
5579    return false;
5580
5581  SDValue Tmp = getValue(I.getArgOperand(0));
5582  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp));
5583  return true;
5584}
5585
5586void SelectionDAGBuilder::visitCall(const CallInst &I) {
5587  // Handle inline assembly differently.
5588  if (isa<InlineAsm>(I.getCalledValue())) {
5589    visitInlineAsm(&I);
5590    return;
5591  }
5592
5593  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
5594  ComputeUsesVAFloatArgument(I, &MMI);
5595
5596  const char *RenameFn = 0;
5597  if (Function *F = I.getCalledFunction()) {
5598    if (F->isDeclaration()) {
5599      if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
5600        if (unsigned IID = II->getIntrinsicID(F)) {
5601          RenameFn = visitIntrinsicCall(I, IID);
5602          if (!RenameFn)
5603            return;
5604        }
5605      }
5606      if (unsigned IID = F->getIntrinsicID()) {
5607        RenameFn = visitIntrinsicCall(I, IID);
5608        if (!RenameFn)
5609          return;
5610      }
5611    }
5612
5613    // Check for well-known libc/libm calls.  If the function is internal, it
5614    // can't be a library call.
5615    LibFunc::Func Func;
5616    if (!F->hasLocalLinkage() && F->hasName() &&
5617        LibInfo->getLibFunc(F->getName(), Func) &&
5618        LibInfo->hasOptimizedCodeGen(Func)) {
5619      switch (Func) {
5620      default: break;
5621      case LibFunc::copysign:
5622      case LibFunc::copysignf:
5623      case LibFunc::copysignl:
5624        if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
5625            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
5626            I.getType() == I.getArgOperand(0)->getType() &&
5627            I.getType() == I.getArgOperand(1)->getType() &&
5628            I.onlyReadsMemory()) {
5629          SDValue LHS = getValue(I.getArgOperand(0));
5630          SDValue RHS = getValue(I.getArgOperand(1));
5631          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
5632                                   LHS.getValueType(), LHS, RHS));
5633          return;
5634        }
5635        break;
5636      case LibFunc::fabs:
5637      case LibFunc::fabsf:
5638      case LibFunc::fabsl:
5639        if (visitUnaryFloatCall(I, ISD::FABS))
5640          return;
5641        break;
5642      case LibFunc::sin:
5643      case LibFunc::sinf:
5644      case LibFunc::sinl:
5645        if (visitUnaryFloatCall(I, ISD::FSIN))
5646          return;
5647        break;
5648      case LibFunc::cos:
5649      case LibFunc::cosf:
5650      case LibFunc::cosl:
5651        if (visitUnaryFloatCall(I, ISD::FCOS))
5652          return;
5653        break;
5654      case LibFunc::sqrt:
5655      case LibFunc::sqrtf:
5656      case LibFunc::sqrtl:
5657        if (visitUnaryFloatCall(I, ISD::FSQRT))
5658          return;
5659        break;
5660      case LibFunc::floor:
5661      case LibFunc::floorf:
5662      case LibFunc::floorl:
5663        if (visitUnaryFloatCall(I, ISD::FFLOOR))
5664          return;
5665        break;
5666      case LibFunc::nearbyint:
5667      case LibFunc::nearbyintf:
5668      case LibFunc::nearbyintl:
5669        if (visitUnaryFloatCall(I, ISD::FNEARBYINT))
5670          return;
5671        break;
5672      case LibFunc::ceil:
5673      case LibFunc::ceilf:
5674      case LibFunc::ceill:
5675        if (visitUnaryFloatCall(I, ISD::FCEIL))
5676          return;
5677        break;
5678      case LibFunc::rint:
5679      case LibFunc::rintf:
5680      case LibFunc::rintl:
5681        if (visitUnaryFloatCall(I, ISD::FRINT))
5682          return;
5683        break;
5684      case LibFunc::trunc:
5685      case LibFunc::truncf:
5686      case LibFunc::truncl:
5687        if (visitUnaryFloatCall(I, ISD::FTRUNC))
5688          return;
5689        break;
5690      case LibFunc::log2:
5691      case LibFunc::log2f:
5692      case LibFunc::log2l:
5693        if (visitUnaryFloatCall(I, ISD::FLOG2))
5694          return;
5695        break;
5696      case LibFunc::exp2:
5697      case LibFunc::exp2f:
5698      case LibFunc::exp2l:
5699        if (visitUnaryFloatCall(I, ISD::FEXP2))
5700          return;
5701        break;
5702      case LibFunc::memcmp:
5703        if (visitMemCmpCall(I))
5704          return;
5705        break;
5706      }
5707    }
5708  }
5709
5710  SDValue Callee;
5711  if (!RenameFn)
5712    Callee = getValue(I.getCalledValue());
5713  else
5714    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
5715
5716  // Check if we can potentially perform a tail call. More detailed checking is
5717  // be done within LowerCallTo, after more information about the call is known.
5718  LowerCallTo(&I, Callee, I.isTailCall());
5719}
5720
5721namespace {
5722
5723/// AsmOperandInfo - This contains information for each constraint that we are
5724/// lowering.
5725class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo {
5726public:
5727  /// CallOperand - If this is the result output operand or a clobber
5728  /// this is null, otherwise it is the incoming operand to the CallInst.
5729  /// This gets modified as the asm is processed.
5730  SDValue CallOperand;
5731
5732  /// AssignedRegs - If this is a register or register class operand, this
5733  /// contains the set of register corresponding to the operand.
5734  RegsForValue AssignedRegs;
5735
5736  explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
5737    : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
5738  }
5739
5740  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
5741  /// corresponds to.  If there is no Value* for this operand, it returns
5742  /// MVT::Other.
5743  EVT getCallOperandValEVT(LLVMContext &Context,
5744                           const TargetLowering &TLI,
5745                           const TargetData *TD) const {
5746    if (CallOperandVal == 0) return MVT::Other;
5747
5748    if (isa<BasicBlock>(CallOperandVal))
5749      return TLI.getPointerTy();
5750
5751    llvm::Type *OpTy = CallOperandVal->getType();
5752
5753    // FIXME: code duplicated from TargetLowering::ParseConstraints().
5754    // If this is an indirect operand, the operand is a pointer to the
5755    // accessed type.
5756    if (isIndirect) {
5757      llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
5758      if (!PtrTy)
5759        report_fatal_error("Indirect operand for inline asm not a pointer!");
5760      OpTy = PtrTy->getElementType();
5761    }
5762
5763    // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
5764    if (StructType *STy = dyn_cast<StructType>(OpTy))
5765      if (STy->getNumElements() == 1)
5766        OpTy = STy->getElementType(0);
5767
5768    // If OpTy is not a single value, it may be a struct/union that we
5769    // can tile with integers.
5770    if (!OpTy->isSingleValueType() && OpTy->isSized()) {
5771      unsigned BitSize = TD->getTypeSizeInBits(OpTy);
5772      switch (BitSize) {
5773      default: break;
5774      case 1:
5775      case 8:
5776      case 16:
5777      case 32:
5778      case 64:
5779      case 128:
5780        OpTy = IntegerType::get(Context, BitSize);
5781        break;
5782      }
5783    }
5784
5785    return TLI.getValueType(OpTy, true);
5786  }
5787};
5788
5789typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
5790
5791} // end anonymous namespace
5792
5793/// GetRegistersForValue - Assign registers (virtual or physical) for the
5794/// specified operand.  We prefer to assign virtual registers, to allow the
5795/// register allocator to handle the assignment process.  However, if the asm
5796/// uses features that we can't model on machineinstrs, we have SDISel do the
5797/// allocation.  This produces generally horrible, but correct, code.
5798///
5799///   OpInfo describes the operand.
5800///
5801static void GetRegistersForValue(SelectionDAG &DAG,
5802                                 const TargetLowering &TLI,
5803                                 DebugLoc DL,
5804                                 SDISelAsmOperandInfo &OpInfo) {
5805  LLVMContext &Context = *DAG.getContext();
5806
5807  MachineFunction &MF = DAG.getMachineFunction();
5808  SmallVector<unsigned, 4> Regs;
5809
5810  // If this is a constraint for a single physreg, or a constraint for a
5811  // register class, find it.
5812  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
5813    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
5814                                     OpInfo.ConstraintVT);
5815
5816  unsigned NumRegs = 1;
5817  if (OpInfo.ConstraintVT != MVT::Other) {
5818    // If this is a FP input in an integer register (or visa versa) insert a bit
5819    // cast of the input value.  More generally, handle any case where the input
5820    // value disagrees with the register class we plan to stick this in.
5821    if (OpInfo.Type == InlineAsm::isInput &&
5822        PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
5823      // Try to convert to the first EVT that the reg class contains.  If the
5824      // types are identical size, use a bitcast to convert (e.g. two differing
5825      // vector types).
5826      EVT RegVT = *PhysReg.second->vt_begin();
5827      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
5828        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
5829                                         RegVT, OpInfo.CallOperand);
5830        OpInfo.ConstraintVT = RegVT;
5831      } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
5832        // If the input is a FP value and we want it in FP registers, do a
5833        // bitcast to the corresponding integer type.  This turns an f64 value
5834        // into i64, which can be passed with two i32 values on a 32-bit
5835        // machine.
5836        RegVT = EVT::getIntegerVT(Context,
5837                                  OpInfo.ConstraintVT.getSizeInBits());
5838        OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL,
5839                                         RegVT, OpInfo.CallOperand);
5840        OpInfo.ConstraintVT = RegVT;
5841      }
5842    }
5843
5844    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
5845  }
5846
5847  EVT RegVT;
5848  EVT ValueVT = OpInfo.ConstraintVT;
5849
5850  // If this is a constraint for a specific physical register, like {r17},
5851  // assign it now.
5852  if (unsigned AssignedReg = PhysReg.first) {
5853    const TargetRegisterClass *RC = PhysReg.second;
5854    if (OpInfo.ConstraintVT == MVT::Other)
5855      ValueVT = *RC->vt_begin();
5856
5857    // Get the actual register value type.  This is important, because the user
5858    // may have asked for (e.g.) the AX register in i32 type.  We need to
5859    // remember that AX is actually i16 to get the right extension.
5860    RegVT = *RC->vt_begin();
5861
5862    // This is a explicit reference to a physical register.
5863    Regs.push_back(AssignedReg);
5864
5865    // If this is an expanded reference, add the rest of the regs to Regs.
5866    if (NumRegs != 1) {
5867      TargetRegisterClass::iterator I = RC->begin();
5868      for (; *I != AssignedReg; ++I)
5869        assert(I != RC->end() && "Didn't find reg!");
5870
5871      // Already added the first reg.
5872      --NumRegs; ++I;
5873      for (; NumRegs; --NumRegs, ++I) {
5874        assert(I != RC->end() && "Ran out of registers to allocate!");
5875        Regs.push_back(*I);
5876      }
5877    }
5878
5879    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
5880    return;
5881  }
5882
5883  // Otherwise, if this was a reference to an LLVM register class, create vregs
5884  // for this reference.
5885  if (const TargetRegisterClass *RC = PhysReg.second) {
5886    RegVT = *RC->vt_begin();
5887    if (OpInfo.ConstraintVT == MVT::Other)
5888      ValueVT = RegVT;
5889
5890    // Create the appropriate number of virtual registers.
5891    MachineRegisterInfo &RegInfo = MF.getRegInfo();
5892    for (; NumRegs; --NumRegs)
5893      Regs.push_back(RegInfo.createVirtualRegister(RC));
5894
5895    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
5896    return;
5897  }
5898
5899  // Otherwise, we couldn't allocate enough registers for this.
5900}
5901
5902/// visitInlineAsm - Handle a call to an InlineAsm object.
5903///
5904void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
5905  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
5906
5907  /// ConstraintOperands - Information about all of the constraints.
5908  SDISelAsmOperandInfoVector ConstraintOperands;
5909
5910  TargetLowering::AsmOperandInfoVector
5911    TargetConstraints = TLI.ParseConstraints(CS);
5912
5913  bool hasMemory = false;
5914
5915  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
5916  unsigned ResNo = 0;   // ResNo - The result number of the next output.
5917  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
5918    ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
5919    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
5920
5921    EVT OpVT = MVT::Other;
5922
5923    // Compute the value type for each operand.
5924    switch (OpInfo.Type) {
5925    case InlineAsm::isOutput:
5926      // Indirect outputs just consume an argument.
5927      if (OpInfo.isIndirect) {
5928        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
5929        break;
5930      }
5931
5932      // The return value of the call is this value.  As such, there is no
5933      // corresponding argument.
5934      assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
5935      if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
5936        OpVT = TLI.getValueType(STy->getElementType(ResNo));
5937      } else {
5938        assert(ResNo == 0 && "Asm only has one result!");
5939        OpVT = TLI.getValueType(CS.getType());
5940      }
5941      ++ResNo;
5942      break;
5943    case InlineAsm::isInput:
5944      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
5945      break;
5946    case InlineAsm::isClobber:
5947      // Nothing to do.
5948      break;
5949    }
5950
5951    // If this is an input or an indirect output, process the call argument.
5952    // BasicBlocks are labels, currently appearing only in asm's.
5953    if (OpInfo.CallOperandVal) {
5954      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
5955        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
5956      } else {
5957        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
5958      }
5959
5960      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
5961    }
5962
5963    OpInfo.ConstraintVT = OpVT;
5964
5965    // Indirect operand accesses access memory.
5966    if (OpInfo.isIndirect)
5967      hasMemory = true;
5968    else {
5969      for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
5970        TargetLowering::ConstraintType
5971          CType = TLI.getConstraintType(OpInfo.Codes[j]);
5972        if (CType == TargetLowering::C_Memory) {
5973          hasMemory = true;
5974          break;
5975        }
5976      }
5977    }
5978  }
5979
5980  SDValue Chain, Flag;
5981
5982  // We won't need to flush pending loads if this asm doesn't touch
5983  // memory and is nonvolatile.
5984  if (hasMemory || IA->hasSideEffects())
5985    Chain = getRoot();
5986  else
5987    Chain = DAG.getRoot();
5988
5989  // Second pass over the constraints: compute which constraint option to use
5990  // and assign registers to constraints that want a specific physreg.
5991  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
5992    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5993
5994    // If this is an output operand with a matching input operand, look up the
5995    // matching input. If their types mismatch, e.g. one is an integer, the
5996    // other is floating point, or their sizes are different, flag it as an
5997    // error.
5998    if (OpInfo.hasMatchingInput()) {
5999      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
6000
6001      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
6002        std::pair<unsigned, const TargetRegisterClass*> MatchRC =
6003          TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
6004                                           OpInfo.ConstraintVT);
6005        std::pair<unsigned, const TargetRegisterClass*> InputRC =
6006          TLI.getRegForInlineAsmConstraint(Input.ConstraintCode,
6007                                           Input.ConstraintVT);
6008        if ((OpInfo.ConstraintVT.isInteger() !=
6009             Input.ConstraintVT.isInteger()) ||
6010            (MatchRC.second != InputRC.second)) {
6011          report_fatal_error("Unsupported asm: input constraint"
6012                             " with a matching output constraint of"
6013                             " incompatible type!");
6014        }
6015        Input.ConstraintVT = OpInfo.ConstraintVT;
6016      }
6017    }
6018
6019    // Compute the constraint code and ConstraintType to use.
6020    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
6021
6022    // If this is a memory input, and if the operand is not indirect, do what we
6023    // need to to provide an address for the memory input.
6024    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6025        !OpInfo.isIndirect) {
6026      assert((OpInfo.isMultipleAlternative ||
6027              (OpInfo.Type == InlineAsm::isInput)) &&
6028             "Can only indirectify direct input operands!");
6029
6030      // Memory operands really want the address of the value.  If we don't have
6031      // an indirect input, put it in the constpool if we can, otherwise spill
6032      // it to a stack slot.
6033      // TODO: This isn't quite right. We need to handle these according to
6034      // the addressing mode that the constraint wants. Also, this may take
6035      // an additional register for the computation and we don't want that
6036      // either.
6037
6038      // If the operand is a float, integer, or vector constant, spill to a
6039      // constant pool entry to get its address.
6040      const Value *OpVal = OpInfo.CallOperandVal;
6041      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
6042          isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
6043        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
6044                                                 TLI.getPointerTy());
6045      } else {
6046        // Otherwise, create a stack slot and emit a store to it before the
6047        // asm.
6048        Type *Ty = OpVal->getType();
6049        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
6050        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
6051        MachineFunction &MF = DAG.getMachineFunction();
6052        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
6053        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
6054        Chain = DAG.getStore(Chain, getCurDebugLoc(),
6055                             OpInfo.CallOperand, StackSlot,
6056                             MachinePointerInfo::getFixedStack(SSFI),
6057                             false, false, 0);
6058        OpInfo.CallOperand = StackSlot;
6059      }
6060
6061      // There is no longer a Value* corresponding to this operand.
6062      OpInfo.CallOperandVal = 0;
6063
6064      // It is now an indirect operand.
6065      OpInfo.isIndirect = true;
6066    }
6067
6068    // If this constraint is for a specific register, allocate it before
6069    // anything else.
6070    if (OpInfo.ConstraintType == TargetLowering::C_Register)
6071      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
6072  }
6073
6074  // Second pass - Loop over all of the operands, assigning virtual or physregs
6075  // to register class operands.
6076  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
6077    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
6078
6079    // C_Register operands have already been allocated, Other/Memory don't need
6080    // to be.
6081    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
6082      GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo);
6083  }
6084
6085  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
6086  std::vector<SDValue> AsmNodeOperands;
6087  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
6088  AsmNodeOperands.push_back(
6089          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
6090                                      TLI.getPointerTy()));
6091
6092  // If we have a !srcloc metadata node associated with it, we want to attach
6093  // this to the ultimately generated inline asm machineinstr.  To do this, we
6094  // pass in the third operand as this (potentially null) inline asm MDNode.
6095  const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
6096  AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
6097
6098  // Remember the HasSideEffect, AlignStack and AsmDialect bits as operand 3.
6099  unsigned ExtraInfo = 0;
6100  if (IA->hasSideEffects())
6101    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
6102  if (IA->isAlignStack())
6103    ExtraInfo |= InlineAsm::Extra_IsAlignStack;
6104  // Set the asm dialect.
6105  ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
6106  AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
6107                                                  TLI.getPointerTy()));
6108
6109  // Loop over all of the inputs, copying the operand values into the
6110  // appropriate registers and processing the output regs.
6111  RegsForValue RetValRegs;
6112
6113  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
6114  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
6115
6116  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
6117    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
6118
6119    switch (OpInfo.Type) {
6120    case InlineAsm::isOutput: {
6121      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
6122          OpInfo.ConstraintType != TargetLowering::C_Register) {
6123        // Memory output, or 'other' output (e.g. 'X' constraint).
6124        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
6125
6126        // Add information to the INLINEASM node to know about this output.
6127        unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
6128        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
6129                                                        TLI.getPointerTy()));
6130        AsmNodeOperands.push_back(OpInfo.CallOperand);
6131        break;
6132      }
6133
6134      // Otherwise, this is a register or register class output.
6135
6136      // Copy the output from the appropriate register.  Find a register that
6137      // we can use.
6138      if (OpInfo.AssignedRegs.Regs.empty()) {
6139        LLVMContext &Ctx = *DAG.getContext();
6140        Ctx.emitError(CS.getInstruction(),
6141                      "couldn't allocate output register for constraint '" +
6142                           Twine(OpInfo.ConstraintCode) + "'");
6143        break;
6144      }
6145
6146      // If this is an indirect operand, store through the pointer after the
6147      // asm.
6148      if (OpInfo.isIndirect) {
6149        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
6150                                                      OpInfo.CallOperandVal));
6151      } else {
6152        // This is the result value of the call.
6153        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
6154        // Concatenate this output onto the outputs list.
6155        RetValRegs.append(OpInfo.AssignedRegs);
6156      }
6157
6158      // Add information to the INLINEASM node to know that this register is
6159      // set.
6160      OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
6161                                           InlineAsm::Kind_RegDefEarlyClobber :
6162                                               InlineAsm::Kind_RegDef,
6163                                               false,
6164                                               0,
6165                                               DAG,
6166                                               AsmNodeOperands);
6167      break;
6168    }
6169    case InlineAsm::isInput: {
6170      SDValue InOperandVal = OpInfo.CallOperand;
6171
6172      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
6173        // If this is required to match an output register we have already set,
6174        // just use its register.
6175        unsigned OperandNo = OpInfo.getMatchedOperand();
6176
6177        // Scan until we find the definition we already emitted of this operand.
6178        // When we find it, create a RegsForValue operand.
6179        unsigned CurOp = InlineAsm::Op_FirstOperand;
6180        for (; OperandNo; --OperandNo) {
6181          // Advance to the next operand.
6182          unsigned OpFlag =
6183            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
6184          assert((InlineAsm::isRegDefKind(OpFlag) ||
6185                  InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
6186                  InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
6187          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
6188        }
6189
6190        unsigned OpFlag =
6191          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
6192        if (InlineAsm::isRegDefKind(OpFlag) ||
6193            InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
6194          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
6195          if (OpInfo.isIndirect) {
6196            // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
6197            LLVMContext &Ctx = *DAG.getContext();
6198            Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
6199                          " don't know how to handle tied "
6200                          "indirect register inputs");
6201          }
6202
6203          RegsForValue MatchedRegs;
6204          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
6205          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
6206          MatchedRegs.RegVTs.push_back(RegVT);
6207          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
6208          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
6209               i != e; ++i)
6210            MatchedRegs.Regs.push_back
6211              (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
6212
6213          // Use the produced MatchedRegs object to
6214          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
6215                                    Chain, &Flag);
6216          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
6217                                           true, OpInfo.getMatchedOperand(),
6218                                           DAG, AsmNodeOperands);
6219          break;
6220        }
6221
6222        assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
6223        assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
6224               "Unexpected number of operands");
6225        // Add information to the INLINEASM node to know about this input.
6226        // See InlineAsm.h isUseOperandTiedToDef.
6227        OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
6228                                                    OpInfo.getMatchedOperand());
6229        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
6230                                                        TLI.getPointerTy()));
6231        AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
6232        break;
6233      }
6234
6235      // Treat indirect 'X' constraint as memory.
6236      if (OpInfo.ConstraintType == TargetLowering::C_Other &&
6237          OpInfo.isIndirect)
6238        OpInfo.ConstraintType = TargetLowering::C_Memory;
6239
6240      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
6241        std::vector<SDValue> Ops;
6242        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
6243                                         Ops, DAG);
6244        if (Ops.empty()) {
6245          LLVMContext &Ctx = *DAG.getContext();
6246          Ctx.emitError(CS.getInstruction(),
6247                        "invalid operand for inline asm constraint '" +
6248                        Twine(OpInfo.ConstraintCode) + "'");
6249          break;
6250        }
6251
6252        // Add information to the INLINEASM node to know about this input.
6253        unsigned ResOpType =
6254          InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
6255        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
6256                                                        TLI.getPointerTy()));
6257        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
6258        break;
6259      }
6260
6261      if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
6262        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
6263        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
6264               "Memory operands expect pointer values");
6265
6266        // Add information to the INLINEASM node to know about this input.
6267        unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
6268        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
6269                                                        TLI.getPointerTy()));
6270        AsmNodeOperands.push_back(InOperandVal);
6271        break;
6272      }
6273
6274      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
6275              OpInfo.ConstraintType == TargetLowering::C_Register) &&
6276             "Unknown constraint type!");
6277
6278      // TODO: Support this.
6279      if (OpInfo.isIndirect) {
6280        LLVMContext &Ctx = *DAG.getContext();
6281        Ctx.emitError(CS.getInstruction(),
6282                      "Don't know how to handle indirect register inputs yet "
6283                      "for constraint '" + Twine(OpInfo.ConstraintCode) + "'");
6284        break;
6285      }
6286
6287      // Copy the input into the appropriate registers.
6288      if (OpInfo.AssignedRegs.Regs.empty()) {
6289        LLVMContext &Ctx = *DAG.getContext();
6290        Ctx.emitError(CS.getInstruction(),
6291                      "couldn't allocate input reg for constraint '" +
6292                           Twine(OpInfo.ConstraintCode) + "'");
6293        break;
6294      }
6295
6296      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
6297                                        Chain, &Flag);
6298
6299      OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
6300                                               DAG, AsmNodeOperands);
6301      break;
6302    }
6303    case InlineAsm::isClobber: {
6304      // Add the clobbered value to the operand list, so that the register
6305      // allocator is aware that the physreg got clobbered.
6306      if (!OpInfo.AssignedRegs.Regs.empty())
6307        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
6308                                                 false, 0, DAG,
6309                                                 AsmNodeOperands);
6310      break;
6311    }
6312    }
6313  }
6314
6315  // Finish up input operands.  Set the input chain and add the flag last.
6316  AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
6317  if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
6318
6319  Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
6320                      DAG.getVTList(MVT::Other, MVT::Glue),
6321                      &AsmNodeOperands[0], AsmNodeOperands.size());
6322  Flag = Chain.getValue(1);
6323
6324  // If this asm returns a register value, copy the result from that register
6325  // and set it as the value of the call.
6326  if (!RetValRegs.Regs.empty()) {
6327    SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
6328                                             Chain, &Flag);
6329
6330    // FIXME: Why don't we do this for inline asms with MRVs?
6331    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
6332      EVT ResultType = TLI.getValueType(CS.getType());
6333
6334      // If any of the results of the inline asm is a vector, it may have the
6335      // wrong width/num elts.  This can happen for register classes that can
6336      // contain multiple different value types.  The preg or vreg allocated may
6337      // not have the same VT as was expected.  Convert it to the right type
6338      // with bit_convert.
6339      if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
6340        Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
6341                          ResultType, Val);
6342
6343      } else if (ResultType != Val.getValueType() &&
6344                 ResultType.isInteger() && Val.getValueType().isInteger()) {
6345        // If a result value was tied to an input value, the computed result may
6346        // have a wider width than the expected result.  Extract the relevant
6347        // portion.
6348        Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
6349      }
6350
6351      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
6352    }
6353
6354    setValue(CS.getInstruction(), Val);
6355    // Don't need to use this as a chain in this case.
6356    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
6357      return;
6358  }
6359
6360  std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
6361
6362  // Process indirect outputs, first output all of the flagged copies out of
6363  // physregs.
6364  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
6365    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
6366    const Value *Ptr = IndirectStoresToEmit[i].second;
6367    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
6368                                             Chain, &Flag);
6369    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
6370  }
6371
6372  // Emit the non-flagged stores from the physregs.
6373  SmallVector<SDValue, 8> OutChains;
6374  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
6375    SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
6376                               StoresToEmit[i].first,
6377                               getValue(StoresToEmit[i].second),
6378                               MachinePointerInfo(StoresToEmit[i].second),
6379                               false, false, 0);
6380    OutChains.push_back(Val);
6381  }
6382
6383  if (!OutChains.empty())
6384    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
6385                        &OutChains[0], OutChains.size());
6386
6387  DAG.setRoot(Chain);
6388}
6389
6390void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
6391  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
6392                          MVT::Other, getRoot(),
6393                          getValue(I.getArgOperand(0)),
6394                          DAG.getSrcValue(I.getArgOperand(0))));
6395}
6396
6397void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
6398  const TargetData &TD = *TLI.getTargetData();
6399  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
6400                           getRoot(), getValue(I.getOperand(0)),
6401                           DAG.getSrcValue(I.getOperand(0)),
6402                           TD.getABITypeAlignment(I.getType()));
6403  setValue(&I, V);
6404  DAG.setRoot(V.getValue(1));
6405}
6406
6407void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
6408  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
6409                          MVT::Other, getRoot(),
6410                          getValue(I.getArgOperand(0)),
6411                          DAG.getSrcValue(I.getArgOperand(0))));
6412}
6413
6414void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
6415  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
6416                          MVT::Other, getRoot(),
6417                          getValue(I.getArgOperand(0)),
6418                          getValue(I.getArgOperand(1)),
6419                          DAG.getSrcValue(I.getArgOperand(0)),
6420                          DAG.getSrcValue(I.getArgOperand(1))));
6421}
6422
6423/// TargetLowering::LowerCallTo - This is the default LowerCallTo
6424/// implementation, which just calls LowerCall.
6425/// FIXME: When all targets are
6426/// migrated to using LowerCall, this hook should be integrated into SDISel.
6427std::pair<SDValue, SDValue>
6428TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
6429  // Handle all of the outgoing arguments.
6430  CLI.Outs.clear();
6431  CLI.OutVals.clear();
6432  ArgListTy &Args = CLI.Args;
6433  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
6434    SmallVector<EVT, 4> ValueVTs;
6435    ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
6436    for (unsigned Value = 0, NumValues = ValueVTs.size();
6437         Value != NumValues; ++Value) {
6438      EVT VT = ValueVTs[Value];
6439      Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
6440      SDValue Op = SDValue(Args[i].Node.getNode(),
6441                           Args[i].Node.getResNo() + Value);
6442      ISD::ArgFlagsTy Flags;
6443      unsigned OriginalAlignment =
6444        getTargetData()->getABITypeAlignment(ArgTy);
6445
6446      if (Args[i].isZExt)
6447        Flags.setZExt();
6448      if (Args[i].isSExt)
6449        Flags.setSExt();
6450      if (Args[i].isInReg)
6451        Flags.setInReg();
6452      if (Args[i].isSRet)
6453        Flags.setSRet();
6454      if (Args[i].isByVal) {
6455        Flags.setByVal();
6456        PointerType *Ty = cast<PointerType>(Args[i].Ty);
6457        Type *ElementTy = Ty->getElementType();
6458        Flags.setByValSize(getTargetData()->getTypeAllocSize(ElementTy));
6459        // For ByVal, alignment should come from FE.  BE will guess if this
6460        // info is not there but there are cases it cannot get right.
6461        unsigned FrameAlign;
6462        if (Args[i].Alignment)
6463          FrameAlign = Args[i].Alignment;
6464        else
6465          FrameAlign = getByValTypeAlignment(ElementTy);
6466        Flags.setByValAlign(FrameAlign);
6467      }
6468      if (Args[i].isNest)
6469        Flags.setNest();
6470      Flags.setOrigAlign(OriginalAlignment);
6471
6472      EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT);
6473      unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT);
6474      SmallVector<SDValue, 4> Parts(NumParts);
6475      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
6476
6477      if (Args[i].isSExt)
6478        ExtendKind = ISD::SIGN_EXTEND;
6479      else if (Args[i].isZExt)
6480        ExtendKind = ISD::ZERO_EXTEND;
6481
6482      getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts,
6483                     PartVT, ExtendKind);
6484
6485      for (unsigned j = 0; j != NumParts; ++j) {
6486        // if it isn't first piece, alignment must be 1
6487        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
6488                               i < CLI.NumFixedArgs);
6489        if (NumParts > 1 && j == 0)
6490          MyFlags.Flags.setSplit();
6491        else if (j != 0)
6492          MyFlags.Flags.setOrigAlign(1);
6493
6494        CLI.Outs.push_back(MyFlags);
6495        CLI.OutVals.push_back(Parts[j]);
6496      }
6497    }
6498  }
6499
6500  // Handle the incoming return values from the call.
6501  CLI.Ins.clear();
6502  SmallVector<EVT, 4> RetTys;
6503  ComputeValueVTs(*this, CLI.RetTy, RetTys);
6504  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
6505    EVT VT = RetTys[I];
6506    EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
6507    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
6508    for (unsigned i = 0; i != NumRegs; ++i) {
6509      ISD::InputArg MyFlags;
6510      MyFlags.VT = RegisterVT.getSimpleVT();
6511      MyFlags.Used = CLI.IsReturnValueUsed;
6512      if (CLI.RetSExt)
6513        MyFlags.Flags.setSExt();
6514      if (CLI.RetZExt)
6515        MyFlags.Flags.setZExt();
6516      if (CLI.IsInReg)
6517        MyFlags.Flags.setInReg();
6518      CLI.Ins.push_back(MyFlags);
6519    }
6520  }
6521
6522  SmallVector<SDValue, 4> InVals;
6523  CLI.Chain = LowerCall(CLI, InVals);
6524
6525  // Verify that the target's LowerCall behaved as expected.
6526  assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other &&
6527         "LowerCall didn't return a valid chain!");
6528  assert((!CLI.IsTailCall || InVals.empty()) &&
6529         "LowerCall emitted a return value for a tail call!");
6530  assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) &&
6531         "LowerCall didn't emit the correct number of values!");
6532
6533  // For a tail call, the return value is merely live-out and there aren't
6534  // any nodes in the DAG representing it. Return a special value to
6535  // indicate that a tail call has been emitted and no more Instructions
6536  // should be processed in the current block.
6537  if (CLI.IsTailCall) {
6538    CLI.DAG.setRoot(CLI.Chain);
6539    return std::make_pair(SDValue(), SDValue());
6540  }
6541
6542  DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) {
6543          assert(InVals[i].getNode() &&
6544                 "LowerCall emitted a null value!");
6545          assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() &&
6546                 "LowerCall emitted a value with the wrong type!");
6547        });
6548
6549  // Collect the legal value parts into potentially illegal values
6550  // that correspond to the original function's return values.
6551  ISD::NodeType AssertOp = ISD::DELETED_NODE;
6552  if (CLI.RetSExt)
6553    AssertOp = ISD::AssertSext;
6554  else if (CLI.RetZExt)
6555    AssertOp = ISD::AssertZext;
6556  SmallVector<SDValue, 4> ReturnValues;
6557  unsigned CurReg = 0;
6558  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
6559    EVT VT = RetTys[I];
6560    EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT);
6561    unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT);
6562
6563    ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg],
6564                                            NumRegs, RegisterVT, VT,
6565                                            AssertOp));
6566    CurReg += NumRegs;
6567  }
6568
6569  // For a function returning void, there is no return value. We can't create
6570  // such a node, so we just return a null return value in that case. In
6571  // that case, nothing will actually look at the value.
6572  if (ReturnValues.empty())
6573    return std::make_pair(SDValue(), CLI.Chain);
6574
6575  SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL,
6576                                CLI.DAG.getVTList(&RetTys[0], RetTys.size()),
6577                            &ReturnValues[0], ReturnValues.size());
6578  return std::make_pair(Res, CLI.Chain);
6579}
6580
6581void TargetLowering::LowerOperationWrapper(SDNode *N,
6582                                           SmallVectorImpl<SDValue> &Results,
6583                                           SelectionDAG &DAG) const {
6584  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
6585  if (Res.getNode())
6586    Results.push_back(Res);
6587}
6588
6589SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
6590  llvm_unreachable("LowerOperation not implemented for this target!");
6591}
6592
6593void
6594SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
6595  SDValue Op = getNonRegisterValue(V);
6596  assert((Op.getOpcode() != ISD::CopyFromReg ||
6597          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
6598         "Copy from a reg to the same reg!");
6599  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
6600
6601  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
6602  SDValue Chain = DAG.getEntryNode();
6603  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
6604  PendingExports.push_back(Chain);
6605}
6606
6607#include "llvm/CodeGen/SelectionDAGISel.h"
6608
6609/// isOnlyUsedInEntryBlock - If the specified argument is only used in the
6610/// entry block, return true.  This includes arguments used by switches, since
6611/// the switch may expand into multiple basic blocks.
6612static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) {
6613  // With FastISel active, we may be splitting blocks, so force creation
6614  // of virtual registers for all non-dead arguments.
6615  if (FastISel)
6616    return A->use_empty();
6617
6618  const BasicBlock *Entry = A->getParent()->begin();
6619  for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end();
6620       UI != E; ++UI) {
6621    const User *U = *UI;
6622    if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U))
6623      return false;  // Use not in entry block.
6624  }
6625  return true;
6626}
6627
6628void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
6629  // If this is the entry block, emit arguments.
6630  const Function &F = *LLVMBB->getParent();
6631  SelectionDAG &DAG = SDB->DAG;
6632  DebugLoc dl = SDB->getCurDebugLoc();
6633  const TargetData *TD = TLI.getTargetData();
6634  SmallVector<ISD::InputArg, 16> Ins;
6635
6636  // Check whether the function can return without sret-demotion.
6637  SmallVector<ISD::OutputArg, 4> Outs;
6638  GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
6639                Outs, TLI);
6640
6641  if (!FuncInfo->CanLowerReturn) {
6642    // Put in an sret pointer parameter before all the other parameters.
6643    SmallVector<EVT, 1> ValueVTs;
6644    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
6645
6646    // NOTE: Assuming that a pointer will never break down to more than one VT
6647    // or one register.
6648    ISD::ArgFlagsTy Flags;
6649    Flags.setSRet();
6650    EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
6651    ISD::InputArg RetArg(Flags, RegisterVT, true);
6652    Ins.push_back(RetArg);
6653  }
6654
6655  // Set up the incoming argument description vector.
6656  unsigned Idx = 1;
6657  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
6658       I != E; ++I, ++Idx) {
6659    SmallVector<EVT, 4> ValueVTs;
6660    ComputeValueVTs(TLI, I->getType(), ValueVTs);
6661    bool isArgValueUsed = !I->use_empty();
6662    for (unsigned Value = 0, NumValues = ValueVTs.size();
6663         Value != NumValues; ++Value) {
6664      EVT VT = ValueVTs[Value];
6665      Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
6666      ISD::ArgFlagsTy Flags;
6667      unsigned OriginalAlignment =
6668        TD->getABITypeAlignment(ArgTy);
6669
6670      if (F.paramHasAttr(Idx, Attribute::ZExt))
6671        Flags.setZExt();
6672      if (F.paramHasAttr(Idx, Attribute::SExt))
6673        Flags.setSExt();
6674      if (F.paramHasAttr(Idx, Attribute::InReg))
6675        Flags.setInReg();
6676      if (F.paramHasAttr(Idx, Attribute::StructRet))
6677        Flags.setSRet();
6678      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
6679        Flags.setByVal();
6680        PointerType *Ty = cast<PointerType>(I->getType());
6681        Type *ElementTy = Ty->getElementType();
6682        Flags.setByValSize(TD->getTypeAllocSize(ElementTy));
6683        // For ByVal, alignment should be passed from FE.  BE will guess if
6684        // this info is not there but there are cases it cannot get right.
6685        unsigned FrameAlign;
6686        if (F.getParamAlignment(Idx))
6687          FrameAlign = F.getParamAlignment(Idx);
6688        else
6689          FrameAlign = TLI.getByValTypeAlignment(ElementTy);
6690        Flags.setByValAlign(FrameAlign);
6691      }
6692      if (F.paramHasAttr(Idx, Attribute::Nest))
6693        Flags.setNest();
6694      Flags.setOrigAlign(OriginalAlignment);
6695
6696      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6697      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
6698      for (unsigned i = 0; i != NumRegs; ++i) {
6699        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
6700        if (NumRegs > 1 && i == 0)
6701          MyFlags.Flags.setSplit();
6702        // if it isn't first piece, alignment must be 1
6703        else if (i > 0)
6704          MyFlags.Flags.setOrigAlign(1);
6705        Ins.push_back(MyFlags);
6706      }
6707    }
6708  }
6709
6710  // Call the target to set up the argument values.
6711  SmallVector<SDValue, 8> InVals;
6712  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
6713                                             F.isVarArg(), Ins,
6714                                             dl, DAG, InVals);
6715
6716  // Verify that the target's LowerFormalArguments behaved as expected.
6717  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
6718         "LowerFormalArguments didn't return a valid chain!");
6719  assert(InVals.size() == Ins.size() &&
6720         "LowerFormalArguments didn't emit the correct number of values!");
6721  DEBUG({
6722      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
6723        assert(InVals[i].getNode() &&
6724               "LowerFormalArguments emitted a null value!");
6725        assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
6726               "LowerFormalArguments emitted a value with the wrong type!");
6727      }
6728    });
6729
6730  // Update the DAG with the new chain value resulting from argument lowering.
6731  DAG.setRoot(NewRoot);
6732
6733  // Set up the argument values.
6734  unsigned i = 0;
6735  Idx = 1;
6736  if (!FuncInfo->CanLowerReturn) {
6737    // Create a virtual register for the sret pointer, and put in a copy
6738    // from the sret argument into it.
6739    SmallVector<EVT, 1> ValueVTs;
6740    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
6741    EVT VT = ValueVTs[0];
6742    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6743    ISD::NodeType AssertOp = ISD::DELETED_NODE;
6744    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
6745                                        RegVT, VT, AssertOp);
6746
6747    MachineFunction& MF = SDB->DAG.getMachineFunction();
6748    MachineRegisterInfo& RegInfo = MF.getRegInfo();
6749    unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
6750    FuncInfo->DemoteRegister = SRetReg;
6751    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
6752                                    SRetReg, ArgValue);
6753    DAG.setRoot(NewRoot);
6754
6755    // i indexes lowered arguments.  Bump it past the hidden sret argument.
6756    // Idx indexes LLVM arguments.  Don't touch it.
6757    ++i;
6758  }
6759
6760  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
6761      ++I, ++Idx) {
6762    SmallVector<SDValue, 4> ArgValues;
6763    SmallVector<EVT, 4> ValueVTs;
6764    ComputeValueVTs(TLI, I->getType(), ValueVTs);
6765    unsigned NumValues = ValueVTs.size();
6766
6767    // If this argument is unused then remember its value. It is used to generate
6768    // debugging information.
6769    if (I->use_empty() && NumValues)
6770      SDB->setUnusedArgValue(I, InVals[i]);
6771
6772    for (unsigned Val = 0; Val != NumValues; ++Val) {
6773      EVT VT = ValueVTs[Val];
6774      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6775      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
6776
6777      if (!I->use_empty()) {
6778        ISD::NodeType AssertOp = ISD::DELETED_NODE;
6779        if (F.paramHasAttr(Idx, Attribute::SExt))
6780          AssertOp = ISD::AssertSext;
6781        else if (F.paramHasAttr(Idx, Attribute::ZExt))
6782          AssertOp = ISD::AssertZext;
6783
6784        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
6785                                             NumParts, PartVT, VT,
6786                                             AssertOp));
6787      }
6788
6789      i += NumParts;
6790    }
6791
6792    // We don't need to do anything else for unused arguments.
6793    if (ArgValues.empty())
6794      continue;
6795
6796    // Note down frame index.
6797    if (FrameIndexSDNode *FI =
6798        dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
6799      FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
6800
6801    SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
6802                                     SDB->getCurDebugLoc());
6803
6804    SDB->setValue(I, Res);
6805    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) {
6806      if (LoadSDNode *LNode =
6807          dyn_cast<LoadSDNode>(Res.getOperand(0).getNode()))
6808        if (FrameIndexSDNode *FI =
6809            dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
6810        FuncInfo->setArgumentFrameIndex(I, FI->getIndex());
6811    }
6812
6813    // If this argument is live outside of the entry block, insert a copy from
6814    // wherever we got it to the vreg that other BB's will reference it as.
6815    if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
6816      // If we can, though, try to skip creating an unnecessary vreg.
6817      // FIXME: This isn't very clean... it would be nice to make this more
6818      // general.  It's also subtly incompatible with the hacks FastISel
6819      // uses with vregs.
6820      unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
6821      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
6822        FuncInfo->ValueMap[I] = Reg;
6823        continue;
6824      }
6825    }
6826    if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) {
6827      FuncInfo->InitializeRegForValue(I);
6828      SDB->CopyToExportRegsIfNeeded(I);
6829    }
6830  }
6831
6832  assert(i == InVals.size() && "Argument register count mismatch!");
6833
6834  // Finally, if the target has anything special to do, allow it to do so.
6835  // FIXME: this should insert code into the DAG!
6836  EmitFunctionEntryCode();
6837}
6838
6839/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
6840/// ensure constants are generated when needed.  Remember the virtual registers
6841/// that need to be added to the Machine PHI nodes as input.  We cannot just
6842/// directly add them, because expansion might result in multiple MBB's for one
6843/// BB.  As such, the start of the BB might correspond to a different MBB than
6844/// the end.
6845///
6846void
6847SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
6848  const TerminatorInst *TI = LLVMBB->getTerminator();
6849
6850  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
6851
6852  // Check successor nodes' PHI nodes that expect a constant to be available
6853  // from this block.
6854  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
6855    const BasicBlock *SuccBB = TI->getSuccessor(succ);
6856    if (!isa<PHINode>(SuccBB->begin())) continue;
6857    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
6858
6859    // If this terminator has multiple identical successors (common for
6860    // switches), only handle each succ once.
6861    if (!SuccsHandled.insert(SuccMBB)) continue;
6862
6863    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
6864
6865    // At this point we know that there is a 1-1 correspondence between LLVM PHI
6866    // nodes and Machine PHI nodes, but the incoming operands have not been
6867    // emitted yet.
6868    for (BasicBlock::const_iterator I = SuccBB->begin();
6869         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
6870      // Ignore dead phi's.
6871      if (PN->use_empty()) continue;
6872
6873      // Skip empty types
6874      if (PN->getType()->isEmptyTy())
6875        continue;
6876
6877      unsigned Reg;
6878      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
6879
6880      if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
6881        unsigned &RegOut = ConstantsOut[C];
6882        if (RegOut == 0) {
6883          RegOut = FuncInfo.CreateRegs(C->getType());
6884          CopyValueToVirtualRegister(C, RegOut);
6885        }
6886        Reg = RegOut;
6887      } else {
6888        DenseMap<const Value *, unsigned>::iterator I =
6889          FuncInfo.ValueMap.find(PHIOp);
6890        if (I != FuncInfo.ValueMap.end())
6891          Reg = I->second;
6892        else {
6893          assert(isa<AllocaInst>(PHIOp) &&
6894                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
6895                 "Didn't codegen value into a register!??");
6896          Reg = FuncInfo.CreateRegs(PHIOp->getType());
6897          CopyValueToVirtualRegister(PHIOp, Reg);
6898        }
6899      }
6900
6901      // Remember that this register needs to added to the machine PHI node as
6902      // the input for this MBB.
6903      SmallVector<EVT, 4> ValueVTs;
6904      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
6905      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
6906        EVT VT = ValueVTs[vti];
6907        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
6908        for (unsigned i = 0, e = NumRegisters; i != e; ++i)
6909          FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
6910        Reg += NumRegisters;
6911      }
6912    }
6913  }
6914  ConstantsOut.clear();
6915}
6916