SelectionDAGBuilder.cpp revision e8aa0b417ca5d0fc33b6079aa11b81cf86667956
1//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements routines for translating from LLVM IR into SelectionDAG IR.
11//
12//===----------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "isel"
15#include "SelectionDAGBuilder.h"
16#include "FunctionLoweringInfo.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/Analysis/AliasAnalysis.h"
20#include "llvm/Constants.h"
21#include "llvm/CallingConv.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/Function.h"
24#include "llvm/GlobalVariable.h"
25#include "llvm/InlineAsm.h"
26#include "llvm/Instructions.h"
27#include "llvm/Intrinsics.h"
28#include "llvm/IntrinsicInst.h"
29#include "llvm/LLVMContext.h"
30#include "llvm/Module.h"
31#include "llvm/CodeGen/FastISel.h"
32#include "llvm/CodeGen/GCStrategy.h"
33#include "llvm/CodeGen/GCMetadata.h"
34#include "llvm/CodeGen/MachineFunction.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineInstrBuilder.h"
37#include "llvm/CodeGen/MachineJumpTableInfo.h"
38#include "llvm/CodeGen/MachineModuleInfo.h"
39#include "llvm/CodeGen/MachineRegisterInfo.h"
40#include "llvm/CodeGen/PseudoSourceValue.h"
41#include "llvm/CodeGen/SelectionDAG.h"
42#include "llvm/CodeGen/DwarfWriter.h"
43#include "llvm/Analysis/DebugInfo.h"
44#include "llvm/Target/TargetRegisterInfo.h"
45#include "llvm/Target/TargetData.h"
46#include "llvm/Target/TargetFrameInfo.h"
47#include "llvm/Target/TargetInstrInfo.h"
48#include "llvm/Target/TargetIntrinsicInfo.h"
49#include "llvm/Target/TargetLowering.h"
50#include "llvm/Target/TargetOptions.h"
51#include "llvm/Support/Compiler.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Debug.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/MathExtras.h"
56#include "llvm/Support/raw_ostream.h"
57#include <algorithm>
58using namespace llvm;
59
60/// LimitFloatPrecision - Generate low-precision inline sequences for
61/// some float libcalls (6, 8 or 12 bits).
62static unsigned LimitFloatPrecision;
63
64static cl::opt<unsigned, true>
65LimitFPPrecision("limit-float-precision",
66                 cl::desc("Generate low-precision inline sequences "
67                          "for some float libcalls"),
68                 cl::location(LimitFloatPrecision),
69                 cl::init(0));
70
71namespace {
72  /// RegsForValue - This struct represents the registers (physical or virtual)
73  /// that a particular set of values is assigned, and the type information about
74  /// the value. The most common situation is to represent one value at a time,
75  /// but struct or array values are handled element-wise as multiple values.
76  /// The splitting of aggregates is performed recursively, so that we never
77  /// have aggregate-typed registers. The values at this point do not necessarily
78  /// have legal types, so each value may require one or more registers of some
79  /// legal type.
80  ///
81  struct RegsForValue {
82    /// TLI - The TargetLowering object.
83    ///
84    const TargetLowering *TLI;
85
86    /// ValueVTs - The value types of the values, which may not be legal, and
87    /// may need be promoted or synthesized from one or more registers.
88    ///
89    SmallVector<EVT, 4> ValueVTs;
90
91    /// RegVTs - The value types of the registers. This is the same size as
92    /// ValueVTs and it records, for each value, what the type of the assigned
93    /// register or registers are. (Individual values are never synthesized
94    /// from more than one type of register.)
95    ///
96    /// With virtual registers, the contents of RegVTs is redundant with TLI's
97    /// getRegisterType member function, however when with physical registers
98    /// it is necessary to have a separate record of the types.
99    ///
100    SmallVector<EVT, 4> RegVTs;
101
102    /// Regs - This list holds the registers assigned to the values.
103    /// Each legal or promoted value requires one register, and each
104    /// expanded value requires multiple registers.
105    ///
106    SmallVector<unsigned, 4> Regs;
107
108    RegsForValue() : TLI(0) {}
109
110    RegsForValue(const TargetLowering &tli,
111                 const SmallVector<unsigned, 4> &regs,
112                 EVT regvt, EVT valuevt)
113      : TLI(&tli),  ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
114    RegsForValue(const TargetLowering &tli,
115                 const SmallVector<unsigned, 4> &regs,
116                 const SmallVector<EVT, 4> &regvts,
117                 const SmallVector<EVT, 4> &valuevts)
118      : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
119    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
120                 unsigned Reg, const Type *Ty) : TLI(&tli) {
121      ComputeValueVTs(tli, Ty, ValueVTs);
122
123      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
124        EVT ValueVT = ValueVTs[Value];
125        unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT);
126        EVT RegisterVT = TLI->getRegisterType(Context, ValueVT);
127        for (unsigned i = 0; i != NumRegs; ++i)
128          Regs.push_back(Reg + i);
129        RegVTs.push_back(RegisterVT);
130        Reg += NumRegs;
131      }
132    }
133
134    /// append - Add the specified values to this one.
135    void append(const RegsForValue &RHS) {
136      TLI = RHS.TLI;
137      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
138      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
139      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
140    }
141
142
143    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
144    /// this value and returns the result as a ValueVTs value.  This uses
145    /// Chain/Flag as the input and updates them for the output Chain/Flag.
146    /// If the Flag pointer is NULL, no flag is used.
147    SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
148                              SDValue &Chain, SDValue *Flag) const;
149
150    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
151    /// specified value into the registers specified by this object.  This uses
152    /// Chain/Flag as the input and updates them for the output Chain/Flag.
153    /// If the Flag pointer is NULL, no flag is used.
154    void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
155                       SDValue &Chain, SDValue *Flag) const;
156
157    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
158    /// operand list.  This adds the code marker, matching input operand index
159    /// (if applicable), and includes the number of values added into it.
160    void AddInlineAsmOperands(unsigned Code,
161                              bool HasMatching, unsigned MatchingIdx,
162                              SelectionDAG &DAG, std::vector<SDValue> &Ops) const;
163  };
164}
165
166/// getCopyFromParts - Create a value that contains the specified legal parts
167/// combined into the value they represent.  If the parts combine to a type
168/// larger then ValueVT then AssertOp can be used to specify whether the extra
169/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
170/// (ISD::AssertSext).
171static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
172                                const SDValue *Parts,
173                                unsigned NumParts, EVT PartVT, EVT ValueVT,
174                                ISD::NodeType AssertOp = ISD::DELETED_NODE) {
175  assert(NumParts > 0 && "No parts to assemble!");
176  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
177  SDValue Val = Parts[0];
178
179  if (NumParts > 1) {
180    // Assemble the value from multiple parts.
181    if (!ValueVT.isVector() && ValueVT.isInteger()) {
182      unsigned PartBits = PartVT.getSizeInBits();
183      unsigned ValueBits = ValueVT.getSizeInBits();
184
185      // Assemble the power of 2 part.
186      unsigned RoundParts = NumParts & (NumParts - 1) ?
187        1 << Log2_32(NumParts) : NumParts;
188      unsigned RoundBits = PartBits * RoundParts;
189      EVT RoundVT = RoundBits == ValueBits ?
190        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
191      SDValue Lo, Hi;
192
193      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
194
195      if (RoundParts > 2) {
196        Lo = getCopyFromParts(DAG, dl, Parts, RoundParts/2, PartVT, HalfVT);
197        Hi = getCopyFromParts(DAG, dl, Parts+RoundParts/2, RoundParts/2,
198                              PartVT, HalfVT);
199      } else {
200        Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
201        Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
202      }
203      if (TLI.isBigEndian())
204        std::swap(Lo, Hi);
205      Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
206
207      if (RoundParts < NumParts) {
208        // Assemble the trailing non-power-of-2 part.
209        unsigned OddParts = NumParts - RoundParts;
210        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
211        Hi = getCopyFromParts(DAG, dl,
212                              Parts+RoundParts, OddParts, PartVT, OddVT);
213
214        // Combine the round and odd parts.
215        Lo = Val;
216        if (TLI.isBigEndian())
217          std::swap(Lo, Hi);
218        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
219        Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
220        Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
221                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
222                                         TLI.getPointerTy()));
223        Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
224        Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
225      }
226    } else if (ValueVT.isVector()) {
227      // Handle a multi-element vector.
228      EVT IntermediateVT, RegisterVT;
229      unsigned NumIntermediates;
230      unsigned NumRegs =
231        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
232                                   NumIntermediates, RegisterVT);
233      assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
234      NumParts = NumRegs; // Silence a compiler warning.
235      assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
236      assert(RegisterVT == Parts[0].getValueType() &&
237             "Part type doesn't match part!");
238
239      // Assemble the parts into intermediate operands.
240      SmallVector<SDValue, 8> Ops(NumIntermediates);
241      if (NumIntermediates == NumParts) {
242        // If the register was not expanded, truncate or copy the value,
243        // as appropriate.
244        for (unsigned i = 0; i != NumParts; ++i)
245          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
246                                    PartVT, IntermediateVT);
247      } else if (NumParts > 0) {
248        // If the intermediate type was expanded, build the intermediate operands
249        // from the parts.
250        assert(NumParts % NumIntermediates == 0 &&
251               "Must expand into a divisible number of parts!");
252        unsigned Factor = NumParts / NumIntermediates;
253        for (unsigned i = 0; i != NumIntermediates; ++i)
254          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
255                                    PartVT, IntermediateVT);
256      }
257
258      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the intermediate
259      // operands.
260      Val = DAG.getNode(IntermediateVT.isVector() ?
261                        ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
262                        ValueVT, &Ops[0], NumIntermediates);
263    } else if (PartVT.isFloatingPoint()) {
264      // FP split into multiple FP parts (for ppcf128)
265      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
266             "Unexpected split");
267      SDValue Lo, Hi;
268      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
269      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
270      if (TLI.isBigEndian())
271        std::swap(Lo, Hi);
272      Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
273    } else {
274      // FP split into integer parts (soft fp)
275      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
276             !PartVT.isVector() && "Unexpected split");
277      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
278      Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
279    }
280  }
281
282  // There is now one part, held in Val.  Correct it to match ValueVT.
283  PartVT = Val.getValueType();
284
285  if (PartVT == ValueVT)
286    return Val;
287
288  if (PartVT.isVector()) {
289    assert(ValueVT.isVector() && "Unknown vector conversion!");
290    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
291  }
292
293  if (ValueVT.isVector()) {
294    assert(ValueVT.getVectorElementType() == PartVT &&
295           ValueVT.getVectorNumElements() == 1 &&
296           "Only trivial scalar-to-vector conversions should get here!");
297    return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
298  }
299
300  if (PartVT.isInteger() &&
301      ValueVT.isInteger()) {
302    if (ValueVT.bitsLT(PartVT)) {
303      // For a truncate, see if we have any information to
304      // indicate whether the truncated bits will always be
305      // zero or sign-extension.
306      if (AssertOp != ISD::DELETED_NODE)
307        Val = DAG.getNode(AssertOp, dl, PartVT, Val,
308                          DAG.getValueType(ValueVT));
309      return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
310    } else {
311      return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
312    }
313  }
314
315  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
316    if (ValueVT.bitsLT(Val.getValueType()))
317      // FP_ROUND's are always exact here.
318      return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
319                         DAG.getIntPtrConstant(1));
320    return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
321  }
322
323  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
324    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
325
326  llvm_unreachable("Unknown mismatch!");
327  return SDValue();
328}
329
330/// getCopyToParts - Create a series of nodes that contain the specified value
331/// split into legal parts.  If the parts contain more bits than Val, then, for
332/// integers, ExtendKind can be used to specify how to generate the extra bits.
333static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, SDValue Val,
334                           SDValue *Parts, unsigned NumParts, EVT PartVT,
335                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
336  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
337  EVT PtrVT = TLI.getPointerTy();
338  EVT ValueVT = Val.getValueType();
339  unsigned PartBits = PartVT.getSizeInBits();
340  unsigned OrigNumParts = NumParts;
341  assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
342
343  if (!NumParts)
344    return;
345
346  if (!ValueVT.isVector()) {
347    if (PartVT == ValueVT) {
348      assert(NumParts == 1 && "No-op copy with multiple parts!");
349      Parts[0] = Val;
350      return;
351    }
352
353    if (NumParts * PartBits > ValueVT.getSizeInBits()) {
354      // If the parts cover more bits than the value has, promote the value.
355      if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
356        assert(NumParts == 1 && "Do not know what to promote to!");
357        Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
358      } else if (PartVT.isInteger() && ValueVT.isInteger()) {
359        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
360        Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
361      } else {
362        llvm_unreachable("Unknown mismatch!");
363      }
364    } else if (PartBits == ValueVT.getSizeInBits()) {
365      // Different types of the same size.
366      assert(NumParts == 1 && PartVT != ValueVT);
367      Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
368    } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
369      // If the parts cover less bits than value has, truncate the value.
370      if (PartVT.isInteger() && ValueVT.isInteger()) {
371        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
372        Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
373      } else {
374        llvm_unreachable("Unknown mismatch!");
375      }
376    }
377
378    // The value may have changed - recompute ValueVT.
379    ValueVT = Val.getValueType();
380    assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
381           "Failed to tile the value with PartVT!");
382
383    if (NumParts == 1) {
384      assert(PartVT == ValueVT && "Type conversion failed!");
385      Parts[0] = Val;
386      return;
387    }
388
389    // Expand the value into multiple parts.
390    if (NumParts & (NumParts - 1)) {
391      // The number of parts is not a power of 2.  Split off and copy the tail.
392      assert(PartVT.isInteger() && ValueVT.isInteger() &&
393             "Do not know what to expand to!");
394      unsigned RoundParts = 1 << Log2_32(NumParts);
395      unsigned RoundBits = RoundParts * PartBits;
396      unsigned OddParts = NumParts - RoundParts;
397      SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
398                                   DAG.getConstant(RoundBits,
399                                                   TLI.getPointerTy()));
400      getCopyToParts(DAG, dl, OddVal, Parts + RoundParts, OddParts, PartVT);
401      if (TLI.isBigEndian())
402        // The odd parts were reversed by getCopyToParts - unreverse them.
403        std::reverse(Parts + RoundParts, Parts + NumParts);
404      NumParts = RoundParts;
405      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
406      Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
407    }
408
409    // The number of parts is a power of 2.  Repeatedly bisect the value using
410    // EXTRACT_ELEMENT.
411    Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
412                           EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()),
413                           Val);
414    for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
415      for (unsigned i = 0; i < NumParts; i += StepSize) {
416        unsigned ThisBits = StepSize * PartBits / 2;
417        EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
418        SDValue &Part0 = Parts[i];
419        SDValue &Part1 = Parts[i+StepSize/2];
420
421        Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
422                            ThisVT, Part0,
423                            DAG.getConstant(1, PtrVT));
424        Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
425                            ThisVT, Part0,
426                            DAG.getConstant(0, PtrVT));
427
428        if (ThisBits == PartBits && ThisVT != PartVT) {
429          Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
430                                                PartVT, Part0);
431          Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
432                                                PartVT, Part1);
433        }
434      }
435    }
436
437    if (TLI.isBigEndian())
438      std::reverse(Parts, Parts + OrigNumParts);
439
440    return;
441  }
442
443  // Vector ValueVT.
444  if (NumParts == 1) {
445    if (PartVT != ValueVT) {
446      if (PartVT.isVector()) {
447        Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
448      } else {
449        assert(ValueVT.getVectorElementType() == PartVT &&
450               ValueVT.getVectorNumElements() == 1 &&
451               "Only trivial vector-to-scalar conversions should get here!");
452        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
453                          PartVT, Val,
454                          DAG.getConstant(0, PtrVT));
455      }
456    }
457
458    Parts[0] = Val;
459    return;
460  }
461
462  // Handle a multi-element vector.
463  EVT IntermediateVT, RegisterVT;
464  unsigned NumIntermediates;
465  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
466                              IntermediateVT, NumIntermediates, RegisterVT);
467  unsigned NumElements = ValueVT.getVectorNumElements();
468
469  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
470  NumParts = NumRegs; // Silence a compiler warning.
471  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
472
473  // Split the vector into intermediate operands.
474  SmallVector<SDValue, 8> Ops(NumIntermediates);
475  for (unsigned i = 0; i != NumIntermediates; ++i)
476    if (IntermediateVT.isVector())
477      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
478                           IntermediateVT, Val,
479                           DAG.getConstant(i * (NumElements / NumIntermediates),
480                                           PtrVT));
481    else
482      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
483                           IntermediateVT, Val,
484                           DAG.getConstant(i, PtrVT));
485
486  // Split the intermediate operands into legal parts.
487  if (NumParts == NumIntermediates) {
488    // If the register was not expanded, promote or copy the value,
489    // as appropriate.
490    for (unsigned i = 0; i != NumParts; ++i)
491      getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
492  } else if (NumParts > 0) {
493    // If the intermediate type was expanded, split each the value into
494    // legal parts.
495    assert(NumParts % NumIntermediates == 0 &&
496           "Must expand into a divisible number of parts!");
497    unsigned Factor = NumParts / NumIntermediates;
498    for (unsigned i = 0; i != NumIntermediates; ++i)
499      getCopyToParts(DAG, dl, Ops[i], &Parts[i * Factor], Factor, PartVT);
500  }
501}
502
503
504void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
505  AA = &aa;
506  GFI = gfi;
507  TD = DAG.getTarget().getTargetData();
508}
509
510/// clear - Clear out the curret SelectionDAG and the associated
511/// state and prepare this SelectionDAGBuilder object to be used
512/// for a new block. This doesn't clear out information about
513/// additional blocks that are needed to complete switch lowering
514/// or PHI node updating; that information is cleared out as it is
515/// consumed.
516void SelectionDAGBuilder::clear() {
517  NodeMap.clear();
518  PendingLoads.clear();
519  PendingExports.clear();
520  EdgeMapping.clear();
521  DAG.clear();
522  CurDebugLoc = DebugLoc::getUnknownLoc();
523  HasTailCall = false;
524}
525
526/// getRoot - Return the current virtual root of the Selection DAG,
527/// flushing any PendingLoad items. This must be done before emitting
528/// a store or any other node that may need to be ordered after any
529/// prior load instructions.
530///
531SDValue SelectionDAGBuilder::getRoot() {
532  if (PendingLoads.empty())
533    return DAG.getRoot();
534
535  if (PendingLoads.size() == 1) {
536    SDValue Root = PendingLoads[0];
537    DAG.setRoot(Root);
538    PendingLoads.clear();
539    return Root;
540  }
541
542  // Otherwise, we have to make a token factor node.
543  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
544                               &PendingLoads[0], PendingLoads.size());
545  PendingLoads.clear();
546  DAG.setRoot(Root);
547  return Root;
548}
549
550/// getControlRoot - Similar to getRoot, but instead of flushing all the
551/// PendingLoad items, flush all the PendingExports items. It is necessary
552/// to do this before emitting a terminator instruction.
553///
554SDValue SelectionDAGBuilder::getControlRoot() {
555  SDValue Root = DAG.getRoot();
556
557  if (PendingExports.empty())
558    return Root;
559
560  // Turn all of the CopyToReg chains into one factored node.
561  if (Root.getOpcode() != ISD::EntryToken) {
562    unsigned i = 0, e = PendingExports.size();
563    for (; i != e; ++i) {
564      assert(PendingExports[i].getNode()->getNumOperands() > 1);
565      if (PendingExports[i].getNode()->getOperand(0) == Root)
566        break;  // Don't add the root if we already indirectly depend on it.
567    }
568
569    if (i == e)
570      PendingExports.push_back(Root);
571  }
572
573  Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
574                     &PendingExports[0],
575                     PendingExports.size());
576  PendingExports.clear();
577  DAG.setRoot(Root);
578  return Root;
579}
580
581void SelectionDAGBuilder::visit(Instruction &I) {
582  visit(I.getOpcode(), I);
583}
584
585void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
586  // Tell the DAG that we're processing a new instruction.
587  DAG.NewInst();
588
589  // Note: this doesn't use InstVisitor, because it has to work with
590  // ConstantExpr's in addition to instructions.
591  switch (Opcode) {
592  default: llvm_unreachable("Unknown instruction type encountered!");
593    // Build the switch statement using the Instruction.def file.
594#define HANDLE_INST(NUM, OPCODE, CLASS) \
595  case Instruction::OPCODE:return visit##OPCODE((CLASS&)I);
596#include "llvm/Instruction.def"
597  }
598}
599
600SDValue SelectionDAGBuilder::getValue(const Value *V) {
601  SDValue &N = NodeMap[V];
602  if (N.getNode()) return N;
603
604  if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(V))) {
605    EVT VT = TLI.getValueType(V->getType(), true);
606
607    if (ConstantInt *CI = dyn_cast<ConstantInt>(C))
608      return N = DAG.getConstant(*CI, VT);
609
610    if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
611      return N = DAG.getGlobalAddress(GV, VT);
612
613    if (isa<ConstantPointerNull>(C))
614      return N = DAG.getConstant(0, TLI.getPointerTy());
615
616    if (ConstantFP *CFP = dyn_cast<ConstantFP>(C))
617      return N = DAG.getConstantFP(*CFP, VT);
618
619    if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
620      return N = DAG.getUNDEF(VT);
621
622    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
623      visit(CE->getOpcode(), *CE);
624      SDValue N1 = NodeMap[V];
625      assert(N1.getNode() && "visit didn't populate the ValueMap!");
626      return N1;
627    }
628
629    if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
630      SmallVector<SDValue, 4> Constants;
631      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
632           OI != OE; ++OI) {
633        SDNode *Val = getValue(*OI).getNode();
634        // If the operand is an empty aggregate, there are no values.
635        if (!Val) continue;
636        // Add each leaf value from the operand to the Constants list
637        // to form a flattened list of all the values.
638        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
639          Constants.push_back(SDValue(Val, i));
640      }
641      return DAG.getMergeValues(&Constants[0], Constants.size(),
642                                getCurDebugLoc());
643    }
644
645    if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
646      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
647             "Unknown struct or array constant!");
648
649      SmallVector<EVT, 4> ValueVTs;
650      ComputeValueVTs(TLI, C->getType(), ValueVTs);
651      unsigned NumElts = ValueVTs.size();
652      if (NumElts == 0)
653        return SDValue(); // empty struct
654      SmallVector<SDValue, 4> Constants(NumElts);
655      for (unsigned i = 0; i != NumElts; ++i) {
656        EVT EltVT = ValueVTs[i];
657        if (isa<UndefValue>(C))
658          Constants[i] = DAG.getUNDEF(EltVT);
659        else if (EltVT.isFloatingPoint())
660          Constants[i] = DAG.getConstantFP(0, EltVT);
661        else
662          Constants[i] = DAG.getConstant(0, EltVT);
663      }
664      return DAG.getMergeValues(&Constants[0], NumElts, getCurDebugLoc());
665    }
666
667    if (BlockAddress *BA = dyn_cast<BlockAddress>(C))
668      return DAG.getBlockAddress(BA, VT);
669
670    const VectorType *VecTy = cast<VectorType>(V->getType());
671    unsigned NumElements = VecTy->getNumElements();
672
673    // Now that we know the number and type of the elements, get that number of
674    // elements into the Ops array based on what kind of constant it is.
675    SmallVector<SDValue, 16> Ops;
676    if (ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
677      for (unsigned i = 0; i != NumElements; ++i)
678        Ops.push_back(getValue(CP->getOperand(i)));
679    } else {
680      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
681      EVT EltVT = TLI.getValueType(VecTy->getElementType());
682
683      SDValue Op;
684      if (EltVT.isFloatingPoint())
685        Op = DAG.getConstantFP(0, EltVT);
686      else
687        Op = DAG.getConstant(0, EltVT);
688      Ops.assign(NumElements, Op);
689    }
690
691    // Create a BUILD_VECTOR node.
692    return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
693                                    VT, &Ops[0], Ops.size());
694  }
695
696  // If this is a static alloca, generate it as the frameindex instead of
697  // computation.
698  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
699    DenseMap<const AllocaInst*, int>::iterator SI =
700      FuncInfo.StaticAllocaMap.find(AI);
701    if (SI != FuncInfo.StaticAllocaMap.end())
702      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
703  }
704
705  unsigned InReg = FuncInfo.ValueMap[V];
706  assert(InReg && "Value not in map!");
707
708  RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
709  SDValue Chain = DAG.getEntryNode();
710  return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
711}
712
713/// Get the EVTs and ArgFlags collections that represent the return type
714/// of the given function.  This does not require a DAG or a return value, and
715/// is suitable for use before any DAGs for the function are constructed.
716static void getReturnInfo(const Type* ReturnType,
717                   Attributes attr, SmallVectorImpl<EVT> &OutVTs,
718                   SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
719                   TargetLowering &TLI,
720                   SmallVectorImpl<uint64_t> *Offsets = 0) {
721  SmallVector<EVT, 4> ValueVTs;
722  ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets);
723  unsigned NumValues = ValueVTs.size();
724  if ( NumValues == 0 ) return;
725
726  for (unsigned j = 0, f = NumValues; j != f; ++j) {
727    EVT VT = ValueVTs[j];
728    ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
729
730    if (attr & Attribute::SExt)
731      ExtendKind = ISD::SIGN_EXTEND;
732    else if (attr & Attribute::ZExt)
733      ExtendKind = ISD::ZERO_EXTEND;
734
735    // FIXME: C calling convention requires the return type to be promoted to
736    // at least 32-bit. But this is not necessary for non-C calling
737    // conventions. The frontend should mark functions whose return values
738    // require promoting with signext or zeroext attributes.
739    if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
740      EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
741      if (VT.bitsLT(MinVT))
742        VT = MinVT;
743    }
744
745    unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
746    EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
747    // 'inreg' on function refers to return value
748    ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
749    if (attr & Attribute::InReg)
750      Flags.setInReg();
751
752    // Propagate extension type if any
753    if (attr & Attribute::SExt)
754      Flags.setSExt();
755    else if (attr & Attribute::ZExt)
756      Flags.setZExt();
757
758    for (unsigned i = 0; i < NumParts; ++i) {
759      OutVTs.push_back(PartVT);
760      OutFlags.push_back(Flags);
761    }
762  }
763}
764
765void SelectionDAGBuilder::visitRet(ReturnInst &I) {
766  SDValue Chain = getControlRoot();
767  SmallVector<ISD::OutputArg, 8> Outs;
768  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
769
770  if (!FLI.CanLowerReturn) {
771    unsigned DemoteReg = FLI.DemoteRegister;
772    const Function *F = I.getParent()->getParent();
773
774    // Emit a store of the return value through the virtual register.
775    // Leave Outs empty so that LowerReturn won't try to load return
776    // registers the usual way.
777    SmallVector<EVT, 1> PtrValueVTs;
778    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
779                    PtrValueVTs);
780
781    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
782    SDValue RetOp = getValue(I.getOperand(0));
783
784    SmallVector<EVT, 4> ValueVTs;
785    SmallVector<uint64_t, 4> Offsets;
786    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
787    unsigned NumValues = ValueVTs.size();
788
789    SmallVector<SDValue, 4> Chains(NumValues);
790    EVT PtrVT = PtrValueVTs[0];
791    for (unsigned i = 0; i != NumValues; ++i)
792      Chains[i] = DAG.getStore(Chain, getCurDebugLoc(),
793                  SDValue(RetOp.getNode(), RetOp.getResNo() + i),
794                  DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
795                  DAG.getConstant(Offsets[i], PtrVT)),
796                  NULL, Offsets[i], false, 0);
797    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
798                        MVT::Other, &Chains[0], NumValues);
799  }
800  else {
801    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
802      SmallVector<EVT, 4> ValueVTs;
803      ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
804      unsigned NumValues = ValueVTs.size();
805      if (NumValues == 0) continue;
806
807      SDValue RetOp = getValue(I.getOperand(i));
808      for (unsigned j = 0, f = NumValues; j != f; ++j) {
809        EVT VT = ValueVTs[j];
810
811        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
812
813        const Function *F = I.getParent()->getParent();
814        if (F->paramHasAttr(0, Attribute::SExt))
815          ExtendKind = ISD::SIGN_EXTEND;
816        else if (F->paramHasAttr(0, Attribute::ZExt))
817          ExtendKind = ISD::ZERO_EXTEND;
818
819        // FIXME: C calling convention requires the return type to be promoted to
820        // at least 32-bit. But this is not necessary for non-C calling
821        // conventions. The frontend should mark functions whose return values
822        // require promoting with signext or zeroext attributes.
823        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
824          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
825          if (VT.bitsLT(MinVT))
826            VT = MinVT;
827        }
828
829        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
830        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
831        SmallVector<SDValue, 4> Parts(NumParts);
832        getCopyToParts(DAG, getCurDebugLoc(),
833                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
834                       &Parts[0], NumParts, PartVT, ExtendKind);
835
836        // 'inreg' on function refers to return value
837        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
838        if (F->paramHasAttr(0, Attribute::InReg))
839          Flags.setInReg();
840
841        // Propagate extension type if any
842        if (F->paramHasAttr(0, Attribute::SExt))
843          Flags.setSExt();
844        else if (F->paramHasAttr(0, Attribute::ZExt))
845          Flags.setZExt();
846
847        for (unsigned i = 0; i < NumParts; ++i)
848          Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
849      }
850    }
851  }
852
853  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
854  CallingConv::ID CallConv =
855    DAG.getMachineFunction().getFunction()->getCallingConv();
856  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
857                          Outs, getCurDebugLoc(), DAG);
858
859  // Verify that the target's LowerReturn behaved as expected.
860  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
861         "LowerReturn didn't return a valid chain!");
862
863  // Update the DAG with the new chain value resulting from return lowering.
864  DAG.setRoot(Chain);
865}
866
867/// CopyToExportRegsIfNeeded - If the given value has virtual registers
868/// created for it, emit nodes to copy the value into the virtual
869/// registers.
870void SelectionDAGBuilder::CopyToExportRegsIfNeeded(Value *V) {
871  if (!V->use_empty()) {
872    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
873    if (VMI != FuncInfo.ValueMap.end())
874      CopyValueToVirtualRegister(V, VMI->second);
875  }
876}
877
878/// ExportFromCurrentBlock - If this condition isn't known to be exported from
879/// the current basic block, add it to ValueMap now so that we'll get a
880/// CopyTo/FromReg.
881void SelectionDAGBuilder::ExportFromCurrentBlock(Value *V) {
882  // No need to export constants.
883  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
884
885  // Already exported?
886  if (FuncInfo.isExportedInst(V)) return;
887
888  unsigned Reg = FuncInfo.InitializeRegForValue(V);
889  CopyValueToVirtualRegister(V, Reg);
890}
891
892bool SelectionDAGBuilder::isExportableFromCurrentBlock(Value *V,
893                                                     const BasicBlock *FromBB) {
894  // The operands of the setcc have to be in this block.  We don't know
895  // how to export them from some other block.
896  if (Instruction *VI = dyn_cast<Instruction>(V)) {
897    // Can export from current BB.
898    if (VI->getParent() == FromBB)
899      return true;
900
901    // Is already exported, noop.
902    return FuncInfo.isExportedInst(V);
903  }
904
905  // If this is an argument, we can export it if the BB is the entry block or
906  // if it is already exported.
907  if (isa<Argument>(V)) {
908    if (FromBB == &FromBB->getParent()->getEntryBlock())
909      return true;
910
911    // Otherwise, can only export this if it is already exported.
912    return FuncInfo.isExportedInst(V);
913  }
914
915  // Otherwise, constants can always be exported.
916  return true;
917}
918
919static bool InBlock(const Value *V, const BasicBlock *BB) {
920  if (const Instruction *I = dyn_cast<Instruction>(V))
921    return I->getParent() == BB;
922  return true;
923}
924
925/// getFCmpCondCode - Return the ISD condition code corresponding to
926/// the given LLVM IR floating-point condition code.  This includes
927/// consideration of global floating-point math flags.
928///
929static ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred) {
930  ISD::CondCode FPC, FOC;
931  switch (Pred) {
932  case FCmpInst::FCMP_FALSE: FOC = FPC = ISD::SETFALSE; break;
933  case FCmpInst::FCMP_OEQ:   FOC = ISD::SETEQ; FPC = ISD::SETOEQ; break;
934  case FCmpInst::FCMP_OGT:   FOC = ISD::SETGT; FPC = ISD::SETOGT; break;
935  case FCmpInst::FCMP_OGE:   FOC = ISD::SETGE; FPC = ISD::SETOGE; break;
936  case FCmpInst::FCMP_OLT:   FOC = ISD::SETLT; FPC = ISD::SETOLT; break;
937  case FCmpInst::FCMP_OLE:   FOC = ISD::SETLE; FPC = ISD::SETOLE; break;
938  case FCmpInst::FCMP_ONE:   FOC = ISD::SETNE; FPC = ISD::SETONE; break;
939  case FCmpInst::FCMP_ORD:   FOC = FPC = ISD::SETO;   break;
940  case FCmpInst::FCMP_UNO:   FOC = FPC = ISD::SETUO;  break;
941  case FCmpInst::FCMP_UEQ:   FOC = ISD::SETEQ; FPC = ISD::SETUEQ; break;
942  case FCmpInst::FCMP_UGT:   FOC = ISD::SETGT; FPC = ISD::SETUGT; break;
943  case FCmpInst::FCMP_UGE:   FOC = ISD::SETGE; FPC = ISD::SETUGE; break;
944  case FCmpInst::FCMP_ULT:   FOC = ISD::SETLT; FPC = ISD::SETULT; break;
945  case FCmpInst::FCMP_ULE:   FOC = ISD::SETLE; FPC = ISD::SETULE; break;
946  case FCmpInst::FCMP_UNE:   FOC = ISD::SETNE; FPC = ISD::SETUNE; break;
947  case FCmpInst::FCMP_TRUE:  FOC = FPC = ISD::SETTRUE; break;
948  default:
949    llvm_unreachable("Invalid FCmp predicate opcode!");
950    FOC = FPC = ISD::SETFALSE;
951    break;
952  }
953  if (FiniteOnlyFPMath())
954    return FOC;
955  else
956    return FPC;
957}
958
959/// getICmpCondCode - Return the ISD condition code corresponding to
960/// the given LLVM IR integer condition code.
961///
962static ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred) {
963  switch (Pred) {
964  case ICmpInst::ICMP_EQ:  return ISD::SETEQ;
965  case ICmpInst::ICMP_NE:  return ISD::SETNE;
966  case ICmpInst::ICMP_SLE: return ISD::SETLE;
967  case ICmpInst::ICMP_ULE: return ISD::SETULE;
968  case ICmpInst::ICMP_SGE: return ISD::SETGE;
969  case ICmpInst::ICMP_UGE: return ISD::SETUGE;
970  case ICmpInst::ICMP_SLT: return ISD::SETLT;
971  case ICmpInst::ICMP_ULT: return ISD::SETULT;
972  case ICmpInst::ICMP_SGT: return ISD::SETGT;
973  case ICmpInst::ICMP_UGT: return ISD::SETUGT;
974  default:
975    llvm_unreachable("Invalid ICmp predicate opcode!");
976    return ISD::SETNE;
977  }
978}
979
980/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
981/// This function emits a branch and is used at the leaves of an OR or an
982/// AND operator tree.
983///
984void
985SelectionDAGBuilder::EmitBranchForMergedCondition(Value *Cond,
986                                                  MachineBasicBlock *TBB,
987                                                  MachineBasicBlock *FBB,
988                                                  MachineBasicBlock *CurBB) {
989  const BasicBlock *BB = CurBB->getBasicBlock();
990
991  // If the leaf of the tree is a comparison, merge the condition into
992  // the caseblock.
993  if (CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
994    // The operands of the cmp have to be in this block.  We don't know
995    // how to export them from some other block.  If this is the first block
996    // of the sequence, no exporting is needed.
997    if (CurBB == CurMBB ||
998        (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
999         isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1000      ISD::CondCode Condition;
1001      if (ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1002        Condition = getICmpCondCode(IC->getPredicate());
1003      } else if (FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
1004        Condition = getFCmpCondCode(FC->getPredicate());
1005      } else {
1006        Condition = ISD::SETEQ; // silence warning.
1007        llvm_unreachable("Unknown compare instruction");
1008      }
1009
1010      CaseBlock CB(Condition, BOp->getOperand(0),
1011                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
1012      SwitchCases.push_back(CB);
1013      return;
1014    }
1015  }
1016
1017  // Create a CaseBlock record representing this branch.
1018  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
1019               NULL, TBB, FBB, CurBB);
1020  SwitchCases.push_back(CB);
1021}
1022
1023/// FindMergedConditions - If Cond is an expression like
1024void SelectionDAGBuilder::FindMergedConditions(Value *Cond,
1025                                               MachineBasicBlock *TBB,
1026                                               MachineBasicBlock *FBB,
1027                                               MachineBasicBlock *CurBB,
1028                                               unsigned Opc) {
1029  // If this node is not part of the or/and tree, emit it as a branch.
1030  Instruction *BOp = dyn_cast<Instruction>(Cond);
1031  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1032      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
1033      BOp->getParent() != CurBB->getBasicBlock() ||
1034      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1035      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1036    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB);
1037    return;
1038  }
1039
1040  //  Create TmpBB after CurBB.
1041  MachineFunction::iterator BBI = CurBB;
1042  MachineFunction &MF = DAG.getMachineFunction();
1043  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
1044  CurBB->getParent()->insert(++BBI, TmpBB);
1045
1046  if (Opc == Instruction::Or) {
1047    // Codegen X | Y as:
1048    //   jmp_if_X TBB
1049    //   jmp TmpBB
1050    // TmpBB:
1051    //   jmp_if_Y TBB
1052    //   jmp FBB
1053    //
1054
1055    // Emit the LHS condition.
1056    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, Opc);
1057
1058    // Emit the RHS condition into TmpBB.
1059    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
1060  } else {
1061    assert(Opc == Instruction::And && "Unknown merge op!");
1062    // Codegen X & Y as:
1063    //   jmp_if_X TmpBB
1064    //   jmp FBB
1065    // TmpBB:
1066    //   jmp_if_Y TBB
1067    //   jmp FBB
1068    //
1069    //  This requires creation of TmpBB after CurBB.
1070
1071    // Emit the LHS condition.
1072    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, Opc);
1073
1074    // Emit the RHS condition into TmpBB.
1075    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, Opc);
1076  }
1077}
1078
1079/// If the set of cases should be emitted as a series of branches, return true.
1080/// If we should emit this as a bunch of and/or'd together conditions, return
1081/// false.
1082bool
1083SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
1084  if (Cases.size() != 2) return true;
1085
1086  // If this is two comparisons of the same values or'd or and'd together, they
1087  // will get folded into a single comparison, so don't emit two blocks.
1088  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1089       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1090      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1091       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1092    return false;
1093  }
1094
1095  return true;
1096}
1097
1098void SelectionDAGBuilder::visitBr(BranchInst &I) {
1099  // Update machine-CFG edges.
1100  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1101
1102  // Figure out which block is immediately after the current one.
1103  MachineBasicBlock *NextBlock = 0;
1104  MachineFunction::iterator BBI = CurMBB;
1105  if (++BBI != FuncInfo.MF->end())
1106    NextBlock = BBI;
1107
1108  if (I.isUnconditional()) {
1109    // Update machine-CFG edges.
1110    CurMBB->addSuccessor(Succ0MBB);
1111
1112    // If this is not a fall-through branch, emit the branch.
1113    if (Succ0MBB != NextBlock)
1114      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
1115                              MVT::Other, getControlRoot(),
1116                              DAG.getBasicBlock(Succ0MBB)));
1117    return;
1118  }
1119
1120  // If this condition is one of the special cases we handle, do special stuff
1121  // now.
1122  Value *CondVal = I.getCondition();
1123  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1124
1125  // If this is a series of conditions that are or'd or and'd together, emit
1126  // this as a sequence of branches instead of setcc's with and/or operations.
1127  // For example, instead of something like:
1128  //     cmp A, B
1129  //     C = seteq
1130  //     cmp D, E
1131  //     F = setle
1132  //     or C, F
1133  //     jnz foo
1134  // Emit:
1135  //     cmp A, B
1136  //     je foo
1137  //     cmp D, E
1138  //     jle foo
1139  //
1140  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1141    if (BOp->hasOneUse() &&
1142        (BOp->getOpcode() == Instruction::And ||
1143         BOp->getOpcode() == Instruction::Or)) {
1144      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, CurMBB, BOp->getOpcode());
1145      // If the compares in later blocks need to use values not currently
1146      // exported from this block, export them now.  This block should always
1147      // be the first entry.
1148      assert(SwitchCases[0].ThisBB == CurMBB && "Unexpected lowering!");
1149
1150      // Allow some cases to be rejected.
1151      if (ShouldEmitAsBranches(SwitchCases)) {
1152        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
1153          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1154          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1155        }
1156
1157        // Emit the branch for this block.
1158        visitSwitchCase(SwitchCases[0]);
1159        SwitchCases.erase(SwitchCases.begin());
1160        return;
1161      }
1162
1163      // Okay, we decided not to do this, remove any inserted MBB's and clear
1164      // SwitchCases.
1165      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
1166        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1167
1168      SwitchCases.clear();
1169    }
1170  }
1171
1172  // Create a CaseBlock record representing this branch.
1173  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1174               NULL, Succ0MBB, Succ1MBB, CurMBB);
1175  // Use visitSwitchCase to actually insert the fast branch sequence for this
1176  // cond branch.
1177  visitSwitchCase(CB);
1178}
1179
1180/// visitSwitchCase - Emits the necessary code to represent a single node in
1181/// the binary search tree resulting from lowering a switch instruction.
1182void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB) {
1183  SDValue Cond;
1184  SDValue CondLHS = getValue(CB.CmpLHS);
1185  DebugLoc dl = getCurDebugLoc();
1186
1187  // Build the setcc now.
1188  if (CB.CmpMHS == NULL) {
1189    // Fold "(X == true)" to X and "(X == false)" to !X to
1190    // handle common cases produced by branch lowering.
1191    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1192        CB.CC == ISD::SETEQ)
1193      Cond = CondLHS;
1194    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1195             CB.CC == ISD::SETEQ) {
1196      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
1197      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1198    } else
1199      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1200  } else {
1201    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
1202
1203    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1204    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
1205
1206    SDValue CmpOp = getValue(CB.CmpMHS);
1207    EVT VT = CmpOp.getValueType();
1208
1209    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
1210      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
1211                          ISD::SETLE);
1212    } else {
1213      SDValue SUB = DAG.getNode(ISD::SUB, dl,
1214                                VT, CmpOp, DAG.getConstant(Low, VT));
1215      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
1216                          DAG.getConstant(High-Low, VT), ISD::SETULE);
1217    }
1218  }
1219
1220  // Update successor info
1221  CurMBB->addSuccessor(CB.TrueBB);
1222  CurMBB->addSuccessor(CB.FalseBB);
1223
1224  // Set NextBlock to be the MBB immediately after the current one, if any.
1225  // This is used to avoid emitting unnecessary branches to the next block.
1226  MachineBasicBlock *NextBlock = 0;
1227  MachineFunction::iterator BBI = CurMBB;
1228  if (++BBI != FuncInfo.MF->end())
1229    NextBlock = BBI;
1230
1231  // If the lhs block is the next block, invert the condition so that we can
1232  // fall through to the lhs instead of the rhs block.
1233  if (CB.TrueBB == NextBlock) {
1234    std::swap(CB.TrueBB, CB.FalseBB);
1235    SDValue True = DAG.getConstant(1, Cond.getValueType());
1236    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
1237  }
1238  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
1239                               MVT::Other, getControlRoot(), Cond,
1240                               DAG.getBasicBlock(CB.TrueBB));
1241
1242  // If the branch was constant folded, fix up the CFG.
1243  if (BrCond.getOpcode() == ISD::BR) {
1244    CurMBB->removeSuccessor(CB.FalseBB);
1245    DAG.setRoot(BrCond);
1246  } else {
1247    // Otherwise, go ahead and insert the false branch.
1248    if (BrCond == getControlRoot())
1249      CurMBB->removeSuccessor(CB.TrueBB);
1250
1251    if (CB.FalseBB == NextBlock)
1252      DAG.setRoot(BrCond);
1253    else
1254      DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
1255                              DAG.getBasicBlock(CB.FalseBB)));
1256  }
1257}
1258
1259/// visitJumpTable - Emit JumpTable node in the current MBB
1260void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
1261  // Emit the code for the jump table
1262  assert(JT.Reg != -1U && "Should lower JT Header first!");
1263  EVT PTy = TLI.getPointerTy();
1264  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
1265                                     JT.Reg, PTy);
1266  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
1267  DAG.setRoot(DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
1268                          MVT::Other, Index.getValue(1),
1269                          Table, Index));
1270}
1271
1272/// visitJumpTableHeader - This function emits necessary code to produce index
1273/// in the JumpTable from switch case.
1274void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
1275                                               JumpTableHeader &JTH) {
1276  // Subtract the lowest switch case value from the value being switched on and
1277  // conditional branch to default mbb if the result is greater than the
1278  // difference between smallest and largest cases.
1279  SDValue SwitchOp = getValue(JTH.SValue);
1280  EVT VT = SwitchOp.getValueType();
1281  SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1282                            DAG.getConstant(JTH.First, VT));
1283
1284  // The SDNode we just created, which holds the value being switched on minus
1285  // the the smallest case value, needs to be copied to a virtual register so it
1286  // can be used as an index into the jump table in a subsequent basic block.
1287  // This value may be smaller or larger than the target's pointer type, and
1288  // therefore require extension or truncating.
1289  SwitchOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
1290
1291  unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy());
1292  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1293                                    JumpTableReg, SwitchOp);
1294  JT.Reg = JumpTableReg;
1295
1296  // Emit the range check for the jump table, and branch to the default block
1297  // for the switch statement if the value being switched on exceeds the largest
1298  // case in the switch.
1299  SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
1300                             TLI.getSetCCResultType(SUB.getValueType()), SUB,
1301                             DAG.getConstant(JTH.Last-JTH.First,VT),
1302                             ISD::SETUGT);
1303
1304  // Set NextBlock to be the MBB immediately after the current one, if any.
1305  // This is used to avoid emitting unnecessary branches to the next block.
1306  MachineBasicBlock *NextBlock = 0;
1307  MachineFunction::iterator BBI = CurMBB;
1308  if (++BBI != FuncInfo.MF->end())
1309    NextBlock = BBI;
1310
1311  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1312                               MVT::Other, CopyTo, CMP,
1313                               DAG.getBasicBlock(JT.Default));
1314
1315  if (JT.MBB == NextBlock)
1316    DAG.setRoot(BrCond);
1317  else
1318    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
1319                            DAG.getBasicBlock(JT.MBB)));
1320}
1321
1322/// visitBitTestHeader - This function emits necessary code to produce value
1323/// suitable for "bit tests"
1324void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B) {
1325  // Subtract the minimum value
1326  SDValue SwitchOp = getValue(B.SValue);
1327  EVT VT = SwitchOp.getValueType();
1328  SDValue SUB = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1329                            DAG.getConstant(B.First, VT));
1330
1331  // Check range
1332  SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
1333                                  TLI.getSetCCResultType(SUB.getValueType()),
1334                                  SUB, DAG.getConstant(B.Range, VT),
1335                                  ISD::SETUGT);
1336
1337  SDValue ShiftOp = DAG.getZExtOrTrunc(SUB, getCurDebugLoc(), TLI.getPointerTy());
1338
1339  B.Reg = FuncInfo.MakeReg(TLI.getPointerTy());
1340  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1341                                    B.Reg, ShiftOp);
1342
1343  // Set NextBlock to be the MBB immediately after the current one, if any.
1344  // This is used to avoid emitting unnecessary branches to the next block.
1345  MachineBasicBlock *NextBlock = 0;
1346  MachineFunction::iterator BBI = CurMBB;
1347  if (++BBI != FuncInfo.MF->end())
1348    NextBlock = BBI;
1349
1350  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
1351
1352  CurMBB->addSuccessor(B.Default);
1353  CurMBB->addSuccessor(MBB);
1354
1355  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1356                                MVT::Other, CopyTo, RangeCmp,
1357                                DAG.getBasicBlock(B.Default));
1358
1359  if (MBB == NextBlock)
1360    DAG.setRoot(BrRange);
1361  else
1362    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
1363                            DAG.getBasicBlock(MBB)));
1364}
1365
1366/// visitBitTestCase - this function produces one "bit test"
1367void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
1368                                           unsigned Reg,
1369                                           BitTestCase &B) {
1370  // Make desired shift
1371  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
1372                                       TLI.getPointerTy());
1373  SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
1374                                  TLI.getPointerTy(),
1375                                  DAG.getConstant(1, TLI.getPointerTy()),
1376                                  ShiftOp);
1377
1378  // Emit bit tests and jumps
1379  SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
1380                              TLI.getPointerTy(), SwitchVal,
1381                              DAG.getConstant(B.Mask, TLI.getPointerTy()));
1382  SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(),
1383                                TLI.getSetCCResultType(AndOp.getValueType()),
1384                                AndOp, DAG.getConstant(0, TLI.getPointerTy()),
1385                                ISD::SETNE);
1386
1387  CurMBB->addSuccessor(B.TargetBB);
1388  CurMBB->addSuccessor(NextMBB);
1389
1390  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1391                              MVT::Other, getControlRoot(),
1392                              AndCmp, DAG.getBasicBlock(B.TargetBB));
1393
1394  // Set NextBlock to be the MBB immediately after the current one, if any.
1395  // This is used to avoid emitting unnecessary branches to the next block.
1396  MachineBasicBlock *NextBlock = 0;
1397  MachineFunction::iterator BBI = CurMBB;
1398  if (++BBI != FuncInfo.MF->end())
1399    NextBlock = BBI;
1400
1401  if (NextMBB == NextBlock)
1402    DAG.setRoot(BrAnd);
1403  else
1404    DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
1405                            DAG.getBasicBlock(NextMBB)));
1406}
1407
1408void SelectionDAGBuilder::visitInvoke(InvokeInst &I) {
1409  // Retrieve successors.
1410  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
1411  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
1412
1413  const Value *Callee(I.getCalledValue());
1414  if (isa<InlineAsm>(Callee))
1415    visitInlineAsm(&I);
1416  else
1417    LowerCallTo(&I, getValue(Callee), false, LandingPad);
1418
1419  // If the value of the invoke is used outside of its defining block, make it
1420  // available as a virtual register.
1421  CopyToExportRegsIfNeeded(&I);
1422
1423  // Update successor info
1424  CurMBB->addSuccessor(Return);
1425  CurMBB->addSuccessor(LandingPad);
1426
1427  // Drop into normal successor.
1428  DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
1429                          MVT::Other, getControlRoot(),
1430                          DAG.getBasicBlock(Return)));
1431}
1432
1433void SelectionDAGBuilder::visitUnwind(UnwindInst &I) {
1434}
1435
1436/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
1437/// small case ranges).
1438bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
1439                                                 CaseRecVector& WorkList,
1440                                                 Value* SV,
1441                                                 MachineBasicBlock* Default) {
1442  Case& BackCase  = *(CR.Range.second-1);
1443
1444  // Size is the number of Cases represented by this range.
1445  size_t Size = CR.Range.second - CR.Range.first;
1446  if (Size > 3)
1447    return false;
1448
1449  // Get the MachineFunction which holds the current MBB.  This is used when
1450  // inserting any additional MBBs necessary to represent the switch.
1451  MachineFunction *CurMF = FuncInfo.MF;
1452
1453  // Figure out which block is immediately after the current one.
1454  MachineBasicBlock *NextBlock = 0;
1455  MachineFunction::iterator BBI = CR.CaseBB;
1456
1457  if (++BBI != FuncInfo.MF->end())
1458    NextBlock = BBI;
1459
1460  // TODO: If any two of the cases has the same destination, and if one value
1461  // is the same as the other, but has one bit unset that the other has set,
1462  // use bit manipulation to do two compares at once.  For example:
1463  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
1464
1465  // Rearrange the case blocks so that the last one falls through if possible.
1466  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
1467    // The last case block won't fall through into 'NextBlock' if we emit the
1468    // branches in this order.  See if rearranging a case value would help.
1469    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
1470      if (I->BB == NextBlock) {
1471        std::swap(*I, BackCase);
1472        break;
1473      }
1474    }
1475  }
1476
1477  // Create a CaseBlock record representing a conditional branch to
1478  // the Case's target mbb if the value being switched on SV is equal
1479  // to C.
1480  MachineBasicBlock *CurBlock = CR.CaseBB;
1481  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
1482    MachineBasicBlock *FallThrough;
1483    if (I != E-1) {
1484      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
1485      CurMF->insert(BBI, FallThrough);
1486
1487      // Put SV in a virtual register to make it available from the new blocks.
1488      ExportFromCurrentBlock(SV);
1489    } else {
1490      // If the last case doesn't match, go to the default block.
1491      FallThrough = Default;
1492    }
1493
1494    Value *RHS, *LHS, *MHS;
1495    ISD::CondCode CC;
1496    if (I->High == I->Low) {
1497      // This is just small small case range :) containing exactly 1 case
1498      CC = ISD::SETEQ;
1499      LHS = SV; RHS = I->High; MHS = NULL;
1500    } else {
1501      CC = ISD::SETLE;
1502      LHS = I->Low; MHS = SV; RHS = I->High;
1503    }
1504    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
1505
1506    // If emitting the first comparison, just call visitSwitchCase to emit the
1507    // code into the current block.  Otherwise, push the CaseBlock onto the
1508    // vector to be later processed by SDISel, and insert the node's MBB
1509    // before the next MBB.
1510    if (CurBlock == CurMBB)
1511      visitSwitchCase(CB);
1512    else
1513      SwitchCases.push_back(CB);
1514
1515    CurBlock = FallThrough;
1516  }
1517
1518  return true;
1519}
1520
1521static inline bool areJTsAllowed(const TargetLowering &TLI) {
1522  return !DisableJumpTables &&
1523          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1524           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
1525}
1526
1527static APInt ComputeRange(const APInt &First, const APInt &Last) {
1528  APInt LastExt(Last), FirstExt(First);
1529  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
1530  LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
1531  return (LastExt - FirstExt + 1ULL);
1532}
1533
1534/// handleJTSwitchCase - Emit jumptable for current switch case range
1535bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
1536                                             CaseRecVector& WorkList,
1537                                             Value* SV,
1538                                             MachineBasicBlock* Default) {
1539  Case& FrontCase = *CR.Range.first;
1540  Case& BackCase  = *(CR.Range.second-1);
1541
1542  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
1543  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
1544
1545  APInt TSize(First.getBitWidth(), 0);
1546  for (CaseItr I = CR.Range.first, E = CR.Range.second;
1547       I!=E; ++I)
1548    TSize += I->size();
1549
1550  if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
1551    return false;
1552
1553  APInt Range = ComputeRange(First, Last);
1554  double Density = TSize.roundToDouble() / Range.roundToDouble();
1555  if (Density < 0.4)
1556    return false;
1557
1558  DEBUG(errs() << "Lowering jump table\n"
1559               << "First entry: " << First << ". Last entry: " << Last << '\n'
1560               << "Range: " << Range
1561               << "Size: " << TSize << ". Density: " << Density << "\n\n");
1562
1563  // Get the MachineFunction which holds the current MBB.  This is used when
1564  // inserting any additional MBBs necessary to represent the switch.
1565  MachineFunction *CurMF = FuncInfo.MF;
1566
1567  // Figure out which block is immediately after the current one.
1568  MachineFunction::iterator BBI = CR.CaseBB;
1569  ++BBI;
1570
1571  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1572
1573  // Create a new basic block to hold the code for loading the address
1574  // of the jump table, and jumping to it.  Update successor information;
1575  // we will either branch to the default case for the switch, or the jump
1576  // table.
1577  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1578  CurMF->insert(BBI, JumpTableBB);
1579  CR.CaseBB->addSuccessor(Default);
1580  CR.CaseBB->addSuccessor(JumpTableBB);
1581
1582  // Build a vector of destination BBs, corresponding to each target
1583  // of the jump table. If the value of the jump table slot corresponds to
1584  // a case statement, push the case's BB onto the vector, otherwise, push
1585  // the default BB.
1586  std::vector<MachineBasicBlock*> DestBBs;
1587  APInt TEI = First;
1588  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
1589    const APInt& Low = cast<ConstantInt>(I->Low)->getValue();
1590    const APInt& High = cast<ConstantInt>(I->High)->getValue();
1591
1592    if (Low.sle(TEI) && TEI.sle(High)) {
1593      DestBBs.push_back(I->BB);
1594      if (TEI==High)
1595        ++I;
1596    } else {
1597      DestBBs.push_back(Default);
1598    }
1599  }
1600
1601  // Update successor info. Add one edge to each unique successor.
1602  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
1603  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
1604         E = DestBBs.end(); I != E; ++I) {
1605    if (!SuccsHandled[(*I)->getNumber()]) {
1606      SuccsHandled[(*I)->getNumber()] = true;
1607      JumpTableBB->addSuccessor(*I);
1608    }
1609  }
1610
1611  // Create a jump table index for this jump table, or return an existing
1612  // one.
1613  unsigned JTI = CurMF->getJumpTableInfo()->getJumpTableIndex(DestBBs);
1614
1615  // Set the jump table information so that we can codegen it as a second
1616  // MachineBasicBlock
1617  JumpTable JT(-1U, JTI, JumpTableBB, Default);
1618  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == CurMBB));
1619  if (CR.CaseBB == CurMBB)
1620    visitJumpTableHeader(JT, JTH);
1621
1622  JTCases.push_back(JumpTableBlock(JTH, JT));
1623
1624  return true;
1625}
1626
1627/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
1628/// 2 subtrees.
1629bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
1630                                                  CaseRecVector& WorkList,
1631                                                  Value* SV,
1632                                                  MachineBasicBlock* Default) {
1633  // Get the MachineFunction which holds the current MBB.  This is used when
1634  // inserting any additional MBBs necessary to represent the switch.
1635  MachineFunction *CurMF = FuncInfo.MF;
1636
1637  // Figure out which block is immediately after the current one.
1638  MachineFunction::iterator BBI = CR.CaseBB;
1639  ++BBI;
1640
1641  Case& FrontCase = *CR.Range.first;
1642  Case& BackCase  = *(CR.Range.second-1);
1643  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1644
1645  // Size is the number of Cases represented by this range.
1646  unsigned Size = CR.Range.second - CR.Range.first;
1647
1648  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
1649  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
1650  double FMetric = 0;
1651  CaseItr Pivot = CR.Range.first + Size/2;
1652
1653  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
1654  // (heuristically) allow us to emit JumpTable's later.
1655  APInt TSize(First.getBitWidth(), 0);
1656  for (CaseItr I = CR.Range.first, E = CR.Range.second;
1657       I!=E; ++I)
1658    TSize += I->size();
1659
1660  APInt LSize = FrontCase.size();
1661  APInt RSize = TSize-LSize;
1662  DEBUG(errs() << "Selecting best pivot: \n"
1663               << "First: " << First << ", Last: " << Last <<'\n'
1664               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
1665  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
1666       J!=E; ++I, ++J) {
1667    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
1668    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
1669    APInt Range = ComputeRange(LEnd, RBegin);
1670    assert((Range - 2ULL).isNonNegative() &&
1671           "Invalid case distance");
1672    double LDensity = (double)LSize.roundToDouble() /
1673                           (LEnd - First + 1ULL).roundToDouble();
1674    double RDensity = (double)RSize.roundToDouble() /
1675                           (Last - RBegin + 1ULL).roundToDouble();
1676    double Metric = Range.logBase2()*(LDensity+RDensity);
1677    // Should always split in some non-trivial place
1678    DEBUG(errs() <<"=>Step\n"
1679                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
1680                 << "LDensity: " << LDensity
1681                 << ", RDensity: " << RDensity << '\n'
1682                 << "Metric: " << Metric << '\n');
1683    if (FMetric < Metric) {
1684      Pivot = J;
1685      FMetric = Metric;
1686      DEBUG(errs() << "Current metric set to: " << FMetric << '\n');
1687    }
1688
1689    LSize += J->size();
1690    RSize -= J->size();
1691  }
1692  if (areJTsAllowed(TLI)) {
1693    // If our case is dense we *really* should handle it earlier!
1694    assert((FMetric > 0) && "Should handle dense range earlier!");
1695  } else {
1696    Pivot = CR.Range.first + Size/2;
1697  }
1698
1699  CaseRange LHSR(CR.Range.first, Pivot);
1700  CaseRange RHSR(Pivot, CR.Range.second);
1701  Constant *C = Pivot->Low;
1702  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
1703
1704  // We know that we branch to the LHS if the Value being switched on is
1705  // less than the Pivot value, C.  We use this to optimize our binary
1706  // tree a bit, by recognizing that if SV is greater than or equal to the
1707  // LHS's Case Value, and that Case Value is exactly one less than the
1708  // Pivot's Value, then we can branch directly to the LHS's Target,
1709  // rather than creating a leaf node for it.
1710  if ((LHSR.second - LHSR.first) == 1 &&
1711      LHSR.first->High == CR.GE &&
1712      cast<ConstantInt>(C)->getValue() ==
1713      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
1714    TrueBB = LHSR.first->BB;
1715  } else {
1716    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1717    CurMF->insert(BBI, TrueBB);
1718    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
1719
1720    // Put SV in a virtual register to make it available from the new blocks.
1721    ExportFromCurrentBlock(SV);
1722  }
1723
1724  // Similar to the optimization above, if the Value being switched on is
1725  // known to be less than the Constant CR.LT, and the current Case Value
1726  // is CR.LT - 1, then we can branch directly to the target block for
1727  // the current Case Value, rather than emitting a RHS leaf node for it.
1728  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
1729      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
1730      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
1731    FalseBB = RHSR.first->BB;
1732  } else {
1733    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1734    CurMF->insert(BBI, FalseBB);
1735    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
1736
1737    // Put SV in a virtual register to make it available from the new blocks.
1738    ExportFromCurrentBlock(SV);
1739  }
1740
1741  // Create a CaseBlock record representing a conditional branch to
1742  // the LHS node if the value being switched on SV is less than C.
1743  // Otherwise, branch to LHS.
1744  CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
1745
1746  if (CR.CaseBB == CurMBB)
1747    visitSwitchCase(CB);
1748  else
1749    SwitchCases.push_back(CB);
1750
1751  return true;
1752}
1753
1754/// handleBitTestsSwitchCase - if current case range has few destination and
1755/// range span less, than machine word bitwidth, encode case range into series
1756/// of masks and emit bit tests with these masks.
1757bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
1758                                                   CaseRecVector& WorkList,
1759                                                   Value* SV,
1760                                                   MachineBasicBlock* Default){
1761  EVT PTy = TLI.getPointerTy();
1762  unsigned IntPtrBits = PTy.getSizeInBits();
1763
1764  Case& FrontCase = *CR.Range.first;
1765  Case& BackCase  = *(CR.Range.second-1);
1766
1767  // Get the MachineFunction which holds the current MBB.  This is used when
1768  // inserting any additional MBBs necessary to represent the switch.
1769  MachineFunction *CurMF = FuncInfo.MF;
1770
1771  // If target does not have legal shift left, do not emit bit tests at all.
1772  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
1773    return false;
1774
1775  size_t numCmps = 0;
1776  for (CaseItr I = CR.Range.first, E = CR.Range.second;
1777       I!=E; ++I) {
1778    // Single case counts one, case range - two.
1779    numCmps += (I->Low == I->High ? 1 : 2);
1780  }
1781
1782  // Count unique destinations
1783  SmallSet<MachineBasicBlock*, 4> Dests;
1784  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
1785    Dests.insert(I->BB);
1786    if (Dests.size() > 3)
1787      // Don't bother the code below, if there are too much unique destinations
1788      return false;
1789  }
1790  DEBUG(errs() << "Total number of unique destinations: " << Dests.size() << '\n'
1791               << "Total number of comparisons: " << numCmps << '\n');
1792
1793  // Compute span of values.
1794  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
1795  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
1796  APInt cmpRange = maxValue - minValue;
1797
1798  DEBUG(errs() << "Compare range: " << cmpRange << '\n'
1799               << "Low bound: " << minValue << '\n'
1800               << "High bound: " << maxValue << '\n');
1801
1802  if (cmpRange.uge(APInt(cmpRange.getBitWidth(), IntPtrBits)) ||
1803      (!(Dests.size() == 1 && numCmps >= 3) &&
1804       !(Dests.size() == 2 && numCmps >= 5) &&
1805       !(Dests.size() >= 3 && numCmps >= 6)))
1806    return false;
1807
1808  DEBUG(errs() << "Emitting bit tests\n");
1809  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
1810
1811  // Optimize the case where all the case values fit in a
1812  // word without having to subtract minValue. In this case,
1813  // we can optimize away the subtraction.
1814  if (minValue.isNonNegative() &&
1815      maxValue.slt(APInt(maxValue.getBitWidth(), IntPtrBits))) {
1816    cmpRange = maxValue;
1817  } else {
1818    lowBound = minValue;
1819  }
1820
1821  CaseBitsVector CasesBits;
1822  unsigned i, count = 0;
1823
1824  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
1825    MachineBasicBlock* Dest = I->BB;
1826    for (i = 0; i < count; ++i)
1827      if (Dest == CasesBits[i].BB)
1828        break;
1829
1830    if (i == count) {
1831      assert((count < 3) && "Too much destinations to test!");
1832      CasesBits.push_back(CaseBits(0, Dest, 0));
1833      count++;
1834    }
1835
1836    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
1837    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
1838
1839    uint64_t lo = (lowValue - lowBound).getZExtValue();
1840    uint64_t hi = (highValue - lowBound).getZExtValue();
1841
1842    for (uint64_t j = lo; j <= hi; j++) {
1843      CasesBits[i].Mask |=  1ULL << j;
1844      CasesBits[i].Bits++;
1845    }
1846
1847  }
1848  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
1849
1850  BitTestInfo BTC;
1851
1852  // Figure out which block is immediately after the current one.
1853  MachineFunction::iterator BBI = CR.CaseBB;
1854  ++BBI;
1855
1856  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1857
1858  DEBUG(errs() << "Cases:\n");
1859  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
1860    DEBUG(errs() << "Mask: " << CasesBits[i].Mask
1861                 << ", Bits: " << CasesBits[i].Bits
1862                 << ", BB: " << CasesBits[i].BB << '\n');
1863
1864    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1865    CurMF->insert(BBI, CaseBB);
1866    BTC.push_back(BitTestCase(CasesBits[i].Mask,
1867                              CaseBB,
1868                              CasesBits[i].BB));
1869
1870    // Put SV in a virtual register to make it available from the new blocks.
1871    ExportFromCurrentBlock(SV);
1872  }
1873
1874  BitTestBlock BTB(lowBound, cmpRange, SV,
1875                   -1U, (CR.CaseBB == CurMBB),
1876                   CR.CaseBB, Default, BTC);
1877
1878  if (CR.CaseBB == CurMBB)
1879    visitBitTestHeader(BTB);
1880
1881  BitTestCases.push_back(BTB);
1882
1883  return true;
1884}
1885
1886
1887/// Clusterify - Transform simple list of Cases into list of CaseRange's
1888size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
1889                                       const SwitchInst& SI) {
1890  size_t numCmps = 0;
1891
1892  // Start with "simple" cases
1893  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
1894    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
1895    Cases.push_back(Case(SI.getSuccessorValue(i),
1896                         SI.getSuccessorValue(i),
1897                         SMBB));
1898  }
1899  std::sort(Cases.begin(), Cases.end(), CaseCmp());
1900
1901  // Merge case into clusters
1902  if (Cases.size() >= 2)
1903    // Must recompute end() each iteration because it may be
1904    // invalidated by erase if we hold on to it
1905    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
1906      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
1907      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
1908      MachineBasicBlock* nextBB = J->BB;
1909      MachineBasicBlock* currentBB = I->BB;
1910
1911      // If the two neighboring cases go to the same destination, merge them
1912      // into a single case.
1913      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
1914        I->High = J->High;
1915        J = Cases.erase(J);
1916      } else {
1917        I = J++;
1918      }
1919    }
1920
1921  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
1922    if (I->Low != I->High)
1923      // A range counts double, since it requires two compares.
1924      ++numCmps;
1925  }
1926
1927  return numCmps;
1928}
1929
1930void SelectionDAGBuilder::visitSwitch(SwitchInst &SI) {
1931  // Figure out which block is immediately after the current one.
1932  MachineBasicBlock *NextBlock = 0;
1933
1934  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
1935
1936  // If there is only the default destination, branch to it if it is not the
1937  // next basic block.  Otherwise, just fall through.
1938  if (SI.getNumOperands() == 2) {
1939    // Update machine-CFG edges.
1940
1941    // If this is not a fall-through branch, emit the branch.
1942    CurMBB->addSuccessor(Default);
1943    if (Default != NextBlock)
1944      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
1945                              MVT::Other, getControlRoot(),
1946                              DAG.getBasicBlock(Default)));
1947    return;
1948  }
1949
1950  // If there are any non-default case statements, create a vector of Cases
1951  // representing each one, and sort the vector so that we can efficiently
1952  // create a binary search tree from them.
1953  CaseVector Cases;
1954  size_t numCmps = Clusterify(Cases, SI);
1955  DEBUG(errs() << "Clusterify finished. Total clusters: " << Cases.size()
1956               << ". Total compares: " << numCmps << '\n');
1957  numCmps = 0;
1958
1959  // Get the Value to be switched on and default basic blocks, which will be
1960  // inserted into CaseBlock records, representing basic blocks in the binary
1961  // search tree.
1962  Value *SV = SI.getOperand(0);
1963
1964  // Push the initial CaseRec onto the worklist
1965  CaseRecVector WorkList;
1966  WorkList.push_back(CaseRec(CurMBB,0,0,CaseRange(Cases.begin(),Cases.end())));
1967
1968  while (!WorkList.empty()) {
1969    // Grab a record representing a case range to process off the worklist
1970    CaseRec CR = WorkList.back();
1971    WorkList.pop_back();
1972
1973    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default))
1974      continue;
1975
1976    // If the range has few cases (two or less) emit a series of specific
1977    // tests.
1978    if (handleSmallSwitchRange(CR, WorkList, SV, Default))
1979      continue;
1980
1981    // If the switch has more than 5 blocks, and at least 40% dense, and the
1982    // target supports indirect branches, then emit a jump table rather than
1983    // lowering the switch to a binary tree of conditional branches.
1984    if (handleJTSwitchCase(CR, WorkList, SV, Default))
1985      continue;
1986
1987    // Emit binary tree. We need to pick a pivot, and push left and right ranges
1988    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
1989    handleBTSplitSwitchCase(CR, WorkList, SV, Default);
1990  }
1991}
1992
1993void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
1994  // Update machine-CFG edges.
1995  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
1996    CurMBB->addSuccessor(FuncInfo.MBBMap[I.getSuccessor(i)]);
1997
1998  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
1999                          MVT::Other, getControlRoot(),
2000                          getValue(I.getAddress())));
2001}
2002
2003
2004void SelectionDAGBuilder::visitFSub(User &I) {
2005  // -0.0 - X --> fneg
2006  const Type *Ty = I.getType();
2007  if (isa<VectorType>(Ty)) {
2008    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
2009      const VectorType *DestTy = cast<VectorType>(I.getType());
2010      const Type *ElTy = DestTy->getElementType();
2011      unsigned VL = DestTy->getNumElements();
2012      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
2013      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
2014      if (CV == CNZ) {
2015        SDValue Op2 = getValue(I.getOperand(1));
2016        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
2017                                 Op2.getValueType(), Op2));
2018        return;
2019      }
2020    }
2021  }
2022  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
2023    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
2024      SDValue Op2 = getValue(I.getOperand(1));
2025      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
2026                               Op2.getValueType(), Op2));
2027      return;
2028    }
2029
2030  visitBinary(I, ISD::FSUB);
2031}
2032
2033void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
2034  SDValue Op1 = getValue(I.getOperand(0));
2035  SDValue Op2 = getValue(I.getOperand(1));
2036
2037  setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
2038                           Op1.getValueType(), Op1, Op2));
2039}
2040
2041void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
2042  SDValue Op1 = getValue(I.getOperand(0));
2043  SDValue Op2 = getValue(I.getOperand(1));
2044  if (!isa<VectorType>(I.getType()) &&
2045      Op2.getValueType() != TLI.getShiftAmountTy()) {
2046    // If the operand is smaller than the shift count type, promote it.
2047    EVT PTy = TLI.getPointerTy();
2048    EVT STy = TLI.getShiftAmountTy();
2049    if (STy.bitsGT(Op2.getValueType()))
2050      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
2051                        TLI.getShiftAmountTy(), Op2);
2052    // If the operand is larger than the shift count type but the shift
2053    // count type has enough bits to represent any shift value, truncate
2054    // it now. This is a common case and it exposes the truncate to
2055    // optimization early.
2056    else if (STy.getSizeInBits() >=
2057             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
2058      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2059                        TLI.getShiftAmountTy(), Op2);
2060    // Otherwise we'll need to temporarily settle for some other
2061    // convenient type; type legalization will make adjustments as
2062    // needed.
2063    else if (PTy.bitsLT(Op2.getValueType()))
2064      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2065                        TLI.getPointerTy(), Op2);
2066    else if (PTy.bitsGT(Op2.getValueType()))
2067      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
2068                        TLI.getPointerTy(), Op2);
2069  }
2070
2071  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
2072                           Op1.getValueType(), Op1, Op2));
2073}
2074
2075void SelectionDAGBuilder::visitICmp(User &I) {
2076  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
2077  if (ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2078    predicate = IC->getPredicate();
2079  else if (ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2080    predicate = ICmpInst::Predicate(IC->getPredicate());
2081  SDValue Op1 = getValue(I.getOperand(0));
2082  SDValue Op2 = getValue(I.getOperand(1));
2083  ISD::CondCode Opcode = getICmpCondCode(predicate);
2084
2085  EVT DestVT = TLI.getValueType(I.getType());
2086  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
2087}
2088
2089void SelectionDAGBuilder::visitFCmp(User &I) {
2090  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
2091  if (FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2092    predicate = FC->getPredicate();
2093  else if (ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2094    predicate = FCmpInst::Predicate(FC->getPredicate());
2095  SDValue Op1 = getValue(I.getOperand(0));
2096  SDValue Op2 = getValue(I.getOperand(1));
2097  ISD::CondCode Condition = getFCmpCondCode(predicate);
2098  EVT DestVT = TLI.getValueType(I.getType());
2099  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
2100}
2101
2102void SelectionDAGBuilder::visitSelect(User &I) {
2103  SmallVector<EVT, 4> ValueVTs;
2104  ComputeValueVTs(TLI, I.getType(), ValueVTs);
2105  unsigned NumValues = ValueVTs.size();
2106  if (NumValues != 0) {
2107    SmallVector<SDValue, 4> Values(NumValues);
2108    SDValue Cond     = getValue(I.getOperand(0));
2109    SDValue TrueVal  = getValue(I.getOperand(1));
2110    SDValue FalseVal = getValue(I.getOperand(2));
2111
2112    for (unsigned i = 0; i != NumValues; ++i)
2113      Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
2114                              TrueVal.getNode()->getValueType(i), Cond,
2115                              SDValue(TrueVal.getNode(), TrueVal.getResNo() + i),
2116                              SDValue(FalseVal.getNode(), FalseVal.getResNo() + i));
2117
2118    setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2119                             DAG.getVTList(&ValueVTs[0], NumValues),
2120                             &Values[0], NumValues));
2121  }
2122}
2123
2124
2125void SelectionDAGBuilder::visitTrunc(User &I) {
2126  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2127  SDValue N = getValue(I.getOperand(0));
2128  EVT DestVT = TLI.getValueType(I.getType());
2129  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
2130}
2131
2132void SelectionDAGBuilder::visitZExt(User &I) {
2133  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2134  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2135  SDValue N = getValue(I.getOperand(0));
2136  EVT DestVT = TLI.getValueType(I.getType());
2137  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
2138}
2139
2140void SelectionDAGBuilder::visitSExt(User &I) {
2141  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2142  // SExt also can't be a cast to bool for same reason. So, nothing much to do
2143  SDValue N = getValue(I.getOperand(0));
2144  EVT DestVT = TLI.getValueType(I.getType());
2145  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
2146}
2147
2148void SelectionDAGBuilder::visitFPTrunc(User &I) {
2149  // FPTrunc is never a no-op cast, no need to check
2150  SDValue N = getValue(I.getOperand(0));
2151  EVT DestVT = TLI.getValueType(I.getType());
2152  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
2153                           DestVT, N, DAG.getIntPtrConstant(0)));
2154}
2155
2156void SelectionDAGBuilder::visitFPExt(User &I){
2157  // FPTrunc is never a no-op cast, no need to check
2158  SDValue N = getValue(I.getOperand(0));
2159  EVT DestVT = TLI.getValueType(I.getType());
2160  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
2161}
2162
2163void SelectionDAGBuilder::visitFPToUI(User &I) {
2164  // FPToUI is never a no-op cast, no need to check
2165  SDValue N = getValue(I.getOperand(0));
2166  EVT DestVT = TLI.getValueType(I.getType());
2167  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
2168}
2169
2170void SelectionDAGBuilder::visitFPToSI(User &I) {
2171  // FPToSI is never a no-op cast, no need to check
2172  SDValue N = getValue(I.getOperand(0));
2173  EVT DestVT = TLI.getValueType(I.getType());
2174  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
2175}
2176
2177void SelectionDAGBuilder::visitUIToFP(User &I) {
2178  // UIToFP is never a no-op cast, no need to check
2179  SDValue N = getValue(I.getOperand(0));
2180  EVT DestVT = TLI.getValueType(I.getType());
2181  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
2182}
2183
2184void SelectionDAGBuilder::visitSIToFP(User &I){
2185  // SIToFP is never a no-op cast, no need to check
2186  SDValue N = getValue(I.getOperand(0));
2187  EVT DestVT = TLI.getValueType(I.getType());
2188  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
2189}
2190
2191void SelectionDAGBuilder::visitPtrToInt(User &I) {
2192  // What to do depends on the size of the integer and the size of the pointer.
2193  // We can either truncate, zero extend, or no-op, accordingly.
2194  SDValue N = getValue(I.getOperand(0));
2195  EVT SrcVT = N.getValueType();
2196  EVT DestVT = TLI.getValueType(I.getType());
2197  SDValue Result = DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT);
2198  setValue(&I, Result);
2199}
2200
2201void SelectionDAGBuilder::visitIntToPtr(User &I) {
2202  // What to do depends on the size of the integer and the size of the pointer.
2203  // We can either truncate, zero extend, or no-op, accordingly.
2204  SDValue N = getValue(I.getOperand(0));
2205  EVT SrcVT = N.getValueType();
2206  EVT DestVT = TLI.getValueType(I.getType());
2207  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
2208}
2209
2210void SelectionDAGBuilder::visitBitCast(User &I) {
2211  SDValue N = getValue(I.getOperand(0));
2212  EVT DestVT = TLI.getValueType(I.getType());
2213
2214  // BitCast assures us that source and destination are the same size so this
2215  // is either a BIT_CONVERT or a no-op.
2216  if (DestVT != N.getValueType())
2217    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
2218                             DestVT, N)); // convert types
2219  else
2220    setValue(&I, N); // noop cast.
2221}
2222
2223void SelectionDAGBuilder::visitInsertElement(User &I) {
2224  SDValue InVec = getValue(I.getOperand(0));
2225  SDValue InVal = getValue(I.getOperand(1));
2226  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2227                                TLI.getPointerTy(),
2228                                getValue(I.getOperand(2)));
2229
2230  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
2231                           TLI.getValueType(I.getType()),
2232                           InVec, InVal, InIdx));
2233}
2234
2235void SelectionDAGBuilder::visitExtractElement(User &I) {
2236  SDValue InVec = getValue(I.getOperand(0));
2237  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2238                                TLI.getPointerTy(),
2239                                getValue(I.getOperand(1)));
2240  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2241                           TLI.getValueType(I.getType()), InVec, InIdx));
2242}
2243
2244
2245// Utility for visitShuffleVector - Returns true if the mask is mask starting
2246// from SIndx and increasing to the element length (undefs are allowed).
2247static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
2248  unsigned MaskNumElts = Mask.size();
2249  for (unsigned i = 0; i != MaskNumElts; ++i)
2250    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
2251      return false;
2252  return true;
2253}
2254
2255void SelectionDAGBuilder::visitShuffleVector(User &I) {
2256  SmallVector<int, 8> Mask;
2257  SDValue Src1 = getValue(I.getOperand(0));
2258  SDValue Src2 = getValue(I.getOperand(1));
2259
2260  // Convert the ConstantVector mask operand into an array of ints, with -1
2261  // representing undef values.
2262  SmallVector<Constant*, 8> MaskElts;
2263  cast<Constant>(I.getOperand(2))->getVectorElements(*DAG.getContext(),
2264                                                     MaskElts);
2265  unsigned MaskNumElts = MaskElts.size();
2266  for (unsigned i = 0; i != MaskNumElts; ++i) {
2267    if (isa<UndefValue>(MaskElts[i]))
2268      Mask.push_back(-1);
2269    else
2270      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
2271  }
2272
2273  EVT VT = TLI.getValueType(I.getType());
2274  EVT SrcVT = Src1.getValueType();
2275  unsigned SrcNumElts = SrcVT.getVectorNumElements();
2276
2277  if (SrcNumElts == MaskNumElts) {
2278    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2279                                      &Mask[0]));
2280    return;
2281  }
2282
2283  // Normalize the shuffle vector since mask and vector length don't match.
2284  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
2285    // Mask is longer than the source vectors and is a multiple of the source
2286    // vectors.  We can use concatenate vector to make the mask and vectors
2287    // lengths match.
2288    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
2289      // The shuffle is concatenating two vectors together.
2290      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
2291                               VT, Src1, Src2));
2292      return;
2293    }
2294
2295    // Pad both vectors with undefs to make them the same length as the mask.
2296    unsigned NumConcat = MaskNumElts / SrcNumElts;
2297    bool Src1U = Src1.getOpcode() == ISD::UNDEF;
2298    bool Src2U = Src2.getOpcode() == ISD::UNDEF;
2299    SDValue UndefVal = DAG.getUNDEF(SrcVT);
2300
2301    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
2302    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
2303    MOps1[0] = Src1;
2304    MOps2[0] = Src2;
2305
2306    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2307                                                  getCurDebugLoc(), VT,
2308                                                  &MOps1[0], NumConcat);
2309    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2310                                                  getCurDebugLoc(), VT,
2311                                                  &MOps2[0], NumConcat);
2312
2313    // Readjust mask for new input vector length.
2314    SmallVector<int, 8> MappedOps;
2315    for (unsigned i = 0; i != MaskNumElts; ++i) {
2316      int Idx = Mask[i];
2317      if (Idx < (int)SrcNumElts)
2318        MappedOps.push_back(Idx);
2319      else
2320        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
2321    }
2322    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2323                                      &MappedOps[0]));
2324    return;
2325  }
2326
2327  if (SrcNumElts > MaskNumElts) {
2328    // Analyze the access pattern of the vector to see if we can extract
2329    // two subvectors and do the shuffle. The analysis is done by calculating
2330    // the range of elements the mask access on both vectors.
2331    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
2332    int MaxRange[2] = {-1, -1};
2333
2334    for (unsigned i = 0; i != MaskNumElts; ++i) {
2335      int Idx = Mask[i];
2336      int Input = 0;
2337      if (Idx < 0)
2338        continue;
2339
2340      if (Idx >= (int)SrcNumElts) {
2341        Input = 1;
2342        Idx -= SrcNumElts;
2343      }
2344      if (Idx > MaxRange[Input])
2345        MaxRange[Input] = Idx;
2346      if (Idx < MinRange[Input])
2347        MinRange[Input] = Idx;
2348    }
2349
2350    // Check if the access is smaller than the vector size and can we find
2351    // a reasonable extract index.
2352    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not Extract.
2353    int StartIdx[2];  // StartIdx to extract from
2354    for (int Input=0; Input < 2; ++Input) {
2355      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
2356        RangeUse[Input] = 0; // Unused
2357        StartIdx[Input] = 0;
2358      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
2359        // Fits within range but we should see if we can find a good
2360        // start index that is a multiple of the mask length.
2361        if (MaxRange[Input] < (int)MaskNumElts) {
2362          RangeUse[Input] = 1; // Extract from beginning of the vector
2363          StartIdx[Input] = 0;
2364        } else {
2365          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
2366          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
2367              StartIdx[Input] + MaskNumElts < SrcNumElts)
2368            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
2369        }
2370      }
2371    }
2372
2373    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
2374      setValue(&I, DAG.getUNDEF(VT));  // Vectors are not used.
2375      return;
2376    }
2377    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
2378      // Extract appropriate subvector and generate a vector shuffle
2379      for (int Input=0; Input < 2; ++Input) {
2380        SDValue& Src = Input == 0 ? Src1 : Src2;
2381        if (RangeUse[Input] == 0) {
2382          Src = DAG.getUNDEF(VT);
2383        } else {
2384          Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
2385                            Src, DAG.getIntPtrConstant(StartIdx[Input]));
2386        }
2387      }
2388      // Calculate new mask.
2389      SmallVector<int, 8> MappedOps;
2390      for (unsigned i = 0; i != MaskNumElts; ++i) {
2391        int Idx = Mask[i];
2392        if (Idx < 0)
2393          MappedOps.push_back(Idx);
2394        else if (Idx < (int)SrcNumElts)
2395          MappedOps.push_back(Idx - StartIdx[0]);
2396        else
2397          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
2398      }
2399      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2400                                        &MappedOps[0]));
2401      return;
2402    }
2403  }
2404
2405  // We can't use either concat vectors or extract subvectors so fall back to
2406  // replacing the shuffle with extract and build vector.
2407  // to insert and build vector.
2408  EVT EltVT = VT.getVectorElementType();
2409  EVT PtrVT = TLI.getPointerTy();
2410  SmallVector<SDValue,8> Ops;
2411  for (unsigned i = 0; i != MaskNumElts; ++i) {
2412    if (Mask[i] < 0) {
2413      Ops.push_back(DAG.getUNDEF(EltVT));
2414    } else {
2415      int Idx = Mask[i];
2416      if (Idx < (int)SrcNumElts)
2417        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2418                                  EltVT, Src1, DAG.getConstant(Idx, PtrVT)));
2419      else
2420        Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2421                                  EltVT, Src2,
2422                                  DAG.getConstant(Idx - SrcNumElts, PtrVT)));
2423    }
2424  }
2425  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
2426                           VT, &Ops[0], Ops.size()));
2427}
2428
2429void SelectionDAGBuilder::visitInsertValue(InsertValueInst &I) {
2430  const Value *Op0 = I.getOperand(0);
2431  const Value *Op1 = I.getOperand(1);
2432  const Type *AggTy = I.getType();
2433  const Type *ValTy = Op1->getType();
2434  bool IntoUndef = isa<UndefValue>(Op0);
2435  bool FromUndef = isa<UndefValue>(Op1);
2436
2437  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
2438                                            I.idx_begin(), I.idx_end());
2439
2440  SmallVector<EVT, 4> AggValueVTs;
2441  ComputeValueVTs(TLI, AggTy, AggValueVTs);
2442  SmallVector<EVT, 4> ValValueVTs;
2443  ComputeValueVTs(TLI, ValTy, ValValueVTs);
2444
2445  unsigned NumAggValues = AggValueVTs.size();
2446  unsigned NumValValues = ValValueVTs.size();
2447  SmallVector<SDValue, 4> Values(NumAggValues);
2448
2449  SDValue Agg = getValue(Op0);
2450  SDValue Val = getValue(Op1);
2451  unsigned i = 0;
2452  // Copy the beginning value(s) from the original aggregate.
2453  for (; i != LinearIndex; ++i)
2454    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2455                SDValue(Agg.getNode(), Agg.getResNo() + i);
2456  // Copy values from the inserted value(s).
2457  for (; i != LinearIndex + NumValValues; ++i)
2458    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2459                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
2460  // Copy remaining value(s) from the original aggregate.
2461  for (; i != NumAggValues; ++i)
2462    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2463                SDValue(Agg.getNode(), Agg.getResNo() + i);
2464
2465  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2466                           DAG.getVTList(&AggValueVTs[0], NumAggValues),
2467                           &Values[0], NumAggValues));
2468}
2469
2470void SelectionDAGBuilder::visitExtractValue(ExtractValueInst &I) {
2471  const Value *Op0 = I.getOperand(0);
2472  const Type *AggTy = Op0->getType();
2473  const Type *ValTy = I.getType();
2474  bool OutOfUndef = isa<UndefValue>(Op0);
2475
2476  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
2477                                            I.idx_begin(), I.idx_end());
2478
2479  SmallVector<EVT, 4> ValValueVTs;
2480  ComputeValueVTs(TLI, ValTy, ValValueVTs);
2481
2482  unsigned NumValValues = ValValueVTs.size();
2483  SmallVector<SDValue, 4> Values(NumValValues);
2484
2485  SDValue Agg = getValue(Op0);
2486  // Copy out the selected value(s).
2487  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
2488    Values[i - LinearIndex] =
2489      OutOfUndef ?
2490        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
2491        SDValue(Agg.getNode(), Agg.getResNo() + i);
2492
2493  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2494                           DAG.getVTList(&ValValueVTs[0], NumValValues),
2495                           &Values[0], NumValValues));
2496}
2497
2498
2499void SelectionDAGBuilder::visitGetElementPtr(User &I) {
2500  SDValue N = getValue(I.getOperand(0));
2501  const Type *Ty = I.getOperand(0)->getType();
2502
2503  for (GetElementPtrInst::op_iterator OI = I.op_begin()+1, E = I.op_end();
2504       OI != E; ++OI) {
2505    Value *Idx = *OI;
2506    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
2507      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
2508      if (Field) {
2509        // N = N + Offset
2510        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
2511        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
2512                        DAG.getIntPtrConstant(Offset));
2513      }
2514      Ty = StTy->getElementType(Field);
2515    } else {
2516      Ty = cast<SequentialType>(Ty)->getElementType();
2517
2518      // If this is a constant subscript, handle it quickly.
2519      if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
2520        if (CI->getZExtValue() == 0) continue;
2521        uint64_t Offs =
2522            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
2523        SDValue OffsVal;
2524        EVT PTy = TLI.getPointerTy();
2525        unsigned PtrBits = PTy.getSizeInBits();
2526        if (PtrBits < 64) {
2527          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2528                                TLI.getPointerTy(),
2529                                DAG.getConstant(Offs, MVT::i64));
2530        } else
2531          OffsVal = DAG.getIntPtrConstant(Offs);
2532        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
2533                        OffsVal);
2534        continue;
2535      }
2536
2537      // N = N + Idx * ElementSize;
2538      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
2539                                TD->getTypeAllocSize(Ty));
2540      SDValue IdxN = getValue(Idx);
2541
2542      // If the index is smaller or larger than intptr_t, truncate or extend
2543      // it.
2544      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
2545
2546      // If this is a multiply by a power of two, turn it into a shl
2547      // immediately.  This is a very common case.
2548      if (ElementSize != 1) {
2549        if (ElementSize.isPowerOf2()) {
2550          unsigned Amt = ElementSize.logBase2();
2551          IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
2552                             N.getValueType(), IdxN,
2553                             DAG.getConstant(Amt, TLI.getPointerTy()));
2554        } else {
2555          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
2556          IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
2557                             N.getValueType(), IdxN, Scale);
2558        }
2559      }
2560
2561      N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2562                      N.getValueType(), N, IdxN);
2563    }
2564  }
2565  setValue(&I, N);
2566}
2567
2568void SelectionDAGBuilder::visitAlloca(AllocaInst &I) {
2569  // If this is a fixed sized alloca in the entry block of the function,
2570  // allocate it statically on the stack.
2571  if (FuncInfo.StaticAllocaMap.count(&I))
2572    return;   // getValue will auto-populate this.
2573
2574  const Type *Ty = I.getAllocatedType();
2575  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
2576  unsigned Align =
2577    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
2578             I.getAlignment());
2579
2580  SDValue AllocSize = getValue(I.getArraySize());
2581
2582  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(),
2583                          AllocSize,
2584                          DAG.getConstant(TySize, AllocSize.getValueType()));
2585
2586
2587
2588  EVT IntPtr = TLI.getPointerTy();
2589  AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
2590
2591  // Handle alignment.  If the requested alignment is less than or equal to
2592  // the stack alignment, ignore it.  If the size is greater than or equal to
2593  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
2594  unsigned StackAlign =
2595    TLI.getTargetMachine().getFrameInfo()->getStackAlignment();
2596  if (Align <= StackAlign)
2597    Align = 0;
2598
2599  // Round the size of the allocation up to the stack alignment size
2600  // by add SA-1 to the size.
2601  AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2602                          AllocSize.getValueType(), AllocSize,
2603                          DAG.getIntPtrConstant(StackAlign-1));
2604  // Mask out the low bits for alignment purposes.
2605  AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
2606                          AllocSize.getValueType(), AllocSize,
2607                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
2608
2609  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
2610  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
2611  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
2612                            VTs, Ops, 3);
2613  setValue(&I, DSA);
2614  DAG.setRoot(DSA.getValue(1));
2615
2616  // Inform the Frame Information that we have just allocated a variable-sized
2617  // object.
2618  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject();
2619}
2620
2621void SelectionDAGBuilder::visitLoad(LoadInst &I) {
2622  const Value *SV = I.getOperand(0);
2623  SDValue Ptr = getValue(SV);
2624
2625  const Type *Ty = I.getType();
2626  bool isVolatile = I.isVolatile();
2627  unsigned Alignment = I.getAlignment();
2628
2629  SmallVector<EVT, 4> ValueVTs;
2630  SmallVector<uint64_t, 4> Offsets;
2631  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
2632  unsigned NumValues = ValueVTs.size();
2633  if (NumValues == 0)
2634    return;
2635
2636  SDValue Root;
2637  bool ConstantMemory = false;
2638  if (I.isVolatile())
2639    // Serialize volatile loads with other side effects.
2640    Root = getRoot();
2641  else if (AA->pointsToConstantMemory(SV)) {
2642    // Do not serialize (non-volatile) loads of constant memory with anything.
2643    Root = DAG.getEntryNode();
2644    ConstantMemory = true;
2645  } else {
2646    // Do not serialize non-volatile loads against each other.
2647    Root = DAG.getRoot();
2648  }
2649
2650  SmallVector<SDValue, 4> Values(NumValues);
2651  SmallVector<SDValue, 4> Chains(NumValues);
2652  EVT PtrVT = Ptr.getValueType();
2653  for (unsigned i = 0; i != NumValues; ++i) {
2654    SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
2655                            DAG.getNode(ISD::ADD, getCurDebugLoc(),
2656                                        PtrVT, Ptr,
2657                                        DAG.getConstant(Offsets[i], PtrVT)),
2658                            SV, Offsets[i], isVolatile, Alignment);
2659    Values[i] = L;
2660    Chains[i] = L.getValue(1);
2661  }
2662
2663  if (!ConstantMemory) {
2664    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
2665                                  MVT::Other,
2666                                  &Chains[0], NumValues);
2667    if (isVolatile)
2668      DAG.setRoot(Chain);
2669    else
2670      PendingLoads.push_back(Chain);
2671  }
2672
2673  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2674                           DAG.getVTList(&ValueVTs[0], NumValues),
2675                           &Values[0], NumValues));
2676}
2677
2678
2679void SelectionDAGBuilder::visitStore(StoreInst &I) {
2680  Value *SrcV = I.getOperand(0);
2681  Value *PtrV = I.getOperand(1);
2682
2683  SmallVector<EVT, 4> ValueVTs;
2684  SmallVector<uint64_t, 4> Offsets;
2685  ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
2686  unsigned NumValues = ValueVTs.size();
2687  if (NumValues == 0)
2688    return;
2689
2690  // Get the lowered operands. Note that we do this after
2691  // checking if NumResults is zero, because with zero results
2692  // the operands won't have values in the map.
2693  SDValue Src = getValue(SrcV);
2694  SDValue Ptr = getValue(PtrV);
2695
2696  SDValue Root = getRoot();
2697  SmallVector<SDValue, 4> Chains(NumValues);
2698  EVT PtrVT = Ptr.getValueType();
2699  bool isVolatile = I.isVolatile();
2700  unsigned Alignment = I.getAlignment();
2701  for (unsigned i = 0; i != NumValues; ++i)
2702    Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
2703                             SDValue(Src.getNode(), Src.getResNo() + i),
2704                             DAG.getNode(ISD::ADD, getCurDebugLoc(),
2705                                         PtrVT, Ptr,
2706                                         DAG.getConstant(Offsets[i], PtrVT)),
2707                             PtrV, Offsets[i], isVolatile, Alignment);
2708
2709  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
2710                          MVT::Other, &Chains[0], NumValues));
2711}
2712
2713/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
2714/// node.
2715void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
2716                                               unsigned Intrinsic) {
2717  bool HasChain = !I.doesNotAccessMemory();
2718  bool OnlyLoad = HasChain && I.onlyReadsMemory();
2719
2720  // Build the operand list.
2721  SmallVector<SDValue, 8> Ops;
2722  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
2723    if (OnlyLoad) {
2724      // We don't need to serialize loads against other loads.
2725      Ops.push_back(DAG.getRoot());
2726    } else {
2727      Ops.push_back(getRoot());
2728    }
2729  }
2730
2731  // Info is set by getTgtMemInstrinsic
2732  TargetLowering::IntrinsicInfo Info;
2733  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
2734
2735  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
2736  if (!IsTgtIntrinsic)
2737    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
2738
2739  // Add all operands of the call to the operand list.
2740  for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) {
2741    SDValue Op = getValue(I.getOperand(i));
2742    assert(TLI.isTypeLegal(Op.getValueType()) &&
2743           "Intrinsic uses a non-legal type?");
2744    Ops.push_back(Op);
2745  }
2746
2747  SmallVector<EVT, 4> ValueVTs;
2748  ComputeValueVTs(TLI, I.getType(), ValueVTs);
2749#ifndef NDEBUG
2750  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
2751    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
2752           "Intrinsic uses a non-legal type?");
2753  }
2754#endif // NDEBUG
2755  if (HasChain)
2756    ValueVTs.push_back(MVT::Other);
2757
2758  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
2759
2760  // Create the node.
2761  SDValue Result;
2762  if (IsTgtIntrinsic) {
2763    // This is target intrinsic that touches memory
2764    Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
2765                                     VTs, &Ops[0], Ops.size(),
2766                                     Info.memVT, Info.ptrVal, Info.offset,
2767                                     Info.align, Info.vol,
2768                                     Info.readMem, Info.writeMem);
2769  }
2770  else if (!HasChain)
2771    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
2772                         VTs, &Ops[0], Ops.size());
2773  else if (I.getType() != Type::getVoidTy(*DAG.getContext()))
2774    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
2775                         VTs, &Ops[0], Ops.size());
2776  else
2777    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
2778                         VTs, &Ops[0], Ops.size());
2779
2780  if (HasChain) {
2781    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
2782    if (OnlyLoad)
2783      PendingLoads.push_back(Chain);
2784    else
2785      DAG.setRoot(Chain);
2786  }
2787  if (I.getType() != Type::getVoidTy(*DAG.getContext())) {
2788    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
2789      EVT VT = TLI.getValueType(PTy);
2790      Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
2791    }
2792    setValue(&I, Result);
2793  }
2794}
2795
2796/// GetSignificand - Get the significand and build it into a floating-point
2797/// number with exponent of 1:
2798///
2799///   Op = (Op & 0x007fffff) | 0x3f800000;
2800///
2801/// where Op is the hexidecimal representation of floating point value.
2802static SDValue
2803GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
2804  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
2805                           DAG.getConstant(0x007fffff, MVT::i32));
2806  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
2807                           DAG.getConstant(0x3f800000, MVT::i32));
2808  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
2809}
2810
2811/// GetExponent - Get the exponent:
2812///
2813///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
2814///
2815/// where Op is the hexidecimal representation of floating point value.
2816static SDValue
2817GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
2818            DebugLoc dl) {
2819  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
2820                           DAG.getConstant(0x7f800000, MVT::i32));
2821  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
2822                           DAG.getConstant(23, TLI.getPointerTy()));
2823  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
2824                           DAG.getConstant(127, MVT::i32));
2825  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
2826}
2827
2828/// getF32Constant - Get 32-bit floating point constant.
2829static SDValue
2830getF32Constant(SelectionDAG &DAG, unsigned Flt) {
2831  return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
2832}
2833
2834/// Inlined utility function to implement binary input atomic intrinsics for
2835/// visitIntrinsicCall: I is a call instruction
2836///                     Op is the associated NodeType for I
2837const char *
2838SelectionDAGBuilder::implVisitBinaryAtomic(CallInst& I, ISD::NodeType Op) {
2839  SDValue Root = getRoot();
2840  SDValue L =
2841    DAG.getAtomic(Op, getCurDebugLoc(),
2842                  getValue(I.getOperand(2)).getValueType().getSimpleVT(),
2843                  Root,
2844                  getValue(I.getOperand(1)),
2845                  getValue(I.getOperand(2)),
2846                  I.getOperand(1));
2847  setValue(&I, L);
2848  DAG.setRoot(L.getValue(1));
2849  return 0;
2850}
2851
2852// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
2853const char *
2854SelectionDAGBuilder::implVisitAluOverflow(CallInst &I, ISD::NodeType Op) {
2855  SDValue Op1 = getValue(I.getOperand(1));
2856  SDValue Op2 = getValue(I.getOperand(2));
2857
2858  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
2859  SDValue Result = DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2);
2860
2861  setValue(&I, Result);
2862  return 0;
2863}
2864
2865/// visitExp - Lower an exp intrinsic. Handles the special sequences for
2866/// limited-precision mode.
2867void
2868SelectionDAGBuilder::visitExp(CallInst &I) {
2869  SDValue result;
2870  DebugLoc dl = getCurDebugLoc();
2871
2872  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
2873      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
2874    SDValue Op = getValue(I.getOperand(1));
2875
2876    // Put the exponent in the right bit position for later addition to the
2877    // final result:
2878    //
2879    //   #define LOG2OFe 1.4426950f
2880    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
2881    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
2882                             getF32Constant(DAG, 0x3fb8aa3b));
2883    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
2884
2885    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
2886    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
2887    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
2888
2889    //   IntegerPartOfX <<= 23;
2890    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
2891                                 DAG.getConstant(23, TLI.getPointerTy()));
2892
2893    if (LimitFloatPrecision <= 6) {
2894      // For floating-point precision of 6:
2895      //
2896      //   TwoToFractionalPartOfX =
2897      //     0.997535578f +
2898      //       (0.735607626f + 0.252464424f * x) * x;
2899      //
2900      // error 0.0144103317, which is 6 bits
2901      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
2902                               getF32Constant(DAG, 0x3e814304));
2903      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
2904                               getF32Constant(DAG, 0x3f3c50c8));
2905      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
2906      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
2907                               getF32Constant(DAG, 0x3f7f5e7e));
2908      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
2909
2910      // Add the exponent into the result in integer domain.
2911      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2912                               TwoToFracPartOfX, IntegerPartOfX);
2913
2914      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
2915    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
2916      // For floating-point precision of 12:
2917      //
2918      //   TwoToFractionalPartOfX =
2919      //     0.999892986f +
2920      //       (0.696457318f +
2921      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
2922      //
2923      // 0.000107046256 error, which is 13 to 14 bits
2924      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
2925                               getF32Constant(DAG, 0x3da235e3));
2926      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
2927                               getF32Constant(DAG, 0x3e65b8f3));
2928      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
2929      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
2930                               getF32Constant(DAG, 0x3f324b07));
2931      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
2932      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
2933                               getF32Constant(DAG, 0x3f7ff8fd));
2934      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
2935
2936      // Add the exponent into the result in integer domain.
2937      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2938                               TwoToFracPartOfX, IntegerPartOfX);
2939
2940      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
2941    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
2942      // For floating-point precision of 18:
2943      //
2944      //   TwoToFractionalPartOfX =
2945      //     0.999999982f +
2946      //       (0.693148872f +
2947      //         (0.240227044f +
2948      //           (0.554906021e-1f +
2949      //             (0.961591928e-2f +
2950      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
2951      //
2952      // error 2.47208000*10^(-7), which is better than 18 bits
2953      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
2954                               getF32Constant(DAG, 0x3924b03e));
2955      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
2956                               getF32Constant(DAG, 0x3ab24b87));
2957      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
2958      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
2959                               getF32Constant(DAG, 0x3c1d8c17));
2960      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
2961      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
2962                               getF32Constant(DAG, 0x3d634a1d));
2963      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
2964      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
2965                               getF32Constant(DAG, 0x3e75fe14));
2966      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
2967      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
2968                                getF32Constant(DAG, 0x3f317234));
2969      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
2970      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
2971                                getF32Constant(DAG, 0x3f800000));
2972      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
2973                                             MVT::i32, t13);
2974
2975      // Add the exponent into the result in integer domain.
2976      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
2977                                TwoToFracPartOfX, IntegerPartOfX);
2978
2979      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
2980    }
2981  } else {
2982    // No special expansion.
2983    result = DAG.getNode(ISD::FEXP, dl,
2984                         getValue(I.getOperand(1)).getValueType(),
2985                         getValue(I.getOperand(1)));
2986  }
2987
2988  setValue(&I, result);
2989}
2990
2991/// visitLog - Lower a log intrinsic. Handles the special sequences for
2992/// limited-precision mode.
2993void
2994SelectionDAGBuilder::visitLog(CallInst &I) {
2995  SDValue result;
2996  DebugLoc dl = getCurDebugLoc();
2997
2998  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
2999      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3000    SDValue Op = getValue(I.getOperand(1));
3001    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3002
3003    // Scale the exponent by log(2) [0.69314718f].
3004    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
3005    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
3006                                        getF32Constant(DAG, 0x3f317218));
3007
3008    // Get the significand and build it into a floating-point number with
3009    // exponent of 1.
3010    SDValue X = GetSignificand(DAG, Op1, dl);
3011
3012    if (LimitFloatPrecision <= 6) {
3013      // For floating-point precision of 6:
3014      //
3015      //   LogofMantissa =
3016      //     -1.1609546f +
3017      //       (1.4034025f - 0.23903021f * x) * x;
3018      //
3019      // error 0.0034276066, which is better than 8 bits
3020      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3021                               getF32Constant(DAG, 0xbe74c456));
3022      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3023                               getF32Constant(DAG, 0x3fb3a2b1));
3024      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3025      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3026                                          getF32Constant(DAG, 0x3f949a29));
3027
3028      result = DAG.getNode(ISD::FADD, dl,
3029                           MVT::f32, LogOfExponent, LogOfMantissa);
3030    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3031      // For floating-point precision of 12:
3032      //
3033      //   LogOfMantissa =
3034      //     -1.7417939f +
3035      //       (2.8212026f +
3036      //         (-1.4699568f +
3037      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
3038      //
3039      // error 0.000061011436, which is 14 bits
3040      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3041                               getF32Constant(DAG, 0xbd67b6d6));
3042      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3043                               getF32Constant(DAG, 0x3ee4f4b8));
3044      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3045      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3046                               getF32Constant(DAG, 0x3fbc278b));
3047      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3048      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3049                               getF32Constant(DAG, 0x40348e95));
3050      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3051      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3052                                          getF32Constant(DAG, 0x3fdef31a));
3053
3054      result = DAG.getNode(ISD::FADD, dl,
3055                           MVT::f32, LogOfExponent, LogOfMantissa);
3056    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3057      // For floating-point precision of 18:
3058      //
3059      //   LogOfMantissa =
3060      //     -2.1072184f +
3061      //       (4.2372794f +
3062      //         (-3.7029485f +
3063      //           (2.2781945f +
3064      //             (-0.87823314f +
3065      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
3066      //
3067      // error 0.0000023660568, which is better than 18 bits
3068      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3069                               getF32Constant(DAG, 0xbc91e5ac));
3070      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3071                               getF32Constant(DAG, 0x3e4350aa));
3072      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3073      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3074                               getF32Constant(DAG, 0x3f60d3e3));
3075      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3076      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3077                               getF32Constant(DAG, 0x4011cdf0));
3078      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3079      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3080                               getF32Constant(DAG, 0x406cfd1c));
3081      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3082      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3083                               getF32Constant(DAG, 0x408797cb));
3084      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3085      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3086                                          getF32Constant(DAG, 0x4006dcab));
3087
3088      result = DAG.getNode(ISD::FADD, dl,
3089                           MVT::f32, LogOfExponent, LogOfMantissa);
3090    }
3091  } else {
3092    // No special expansion.
3093    result = DAG.getNode(ISD::FLOG, dl,
3094                         getValue(I.getOperand(1)).getValueType(),
3095                         getValue(I.getOperand(1)));
3096  }
3097
3098  setValue(&I, result);
3099}
3100
3101/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
3102/// limited-precision mode.
3103void
3104SelectionDAGBuilder::visitLog2(CallInst &I) {
3105  SDValue result;
3106  DebugLoc dl = getCurDebugLoc();
3107
3108  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
3109      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3110    SDValue Op = getValue(I.getOperand(1));
3111    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3112
3113    // Get the exponent.
3114    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
3115
3116    // Get the significand and build it into a floating-point number with
3117    // exponent of 1.
3118    SDValue X = GetSignificand(DAG, Op1, dl);
3119
3120    // Different possible minimax approximations of significand in
3121    // floating-point for various degrees of accuracy over [1,2].
3122    if (LimitFloatPrecision <= 6) {
3123      // For floating-point precision of 6:
3124      //
3125      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
3126      //
3127      // error 0.0049451742, which is more than 7 bits
3128      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3129                               getF32Constant(DAG, 0xbeb08fe0));
3130      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3131                               getF32Constant(DAG, 0x40019463));
3132      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3133      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3134                                           getF32Constant(DAG, 0x3fd6633d));
3135
3136      result = DAG.getNode(ISD::FADD, dl,
3137                           MVT::f32, LogOfExponent, Log2ofMantissa);
3138    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3139      // For floating-point precision of 12:
3140      //
3141      //   Log2ofMantissa =
3142      //     -2.51285454f +
3143      //       (4.07009056f +
3144      //         (-2.12067489f +
3145      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
3146      //
3147      // error 0.0000876136000, which is better than 13 bits
3148      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3149                               getF32Constant(DAG, 0xbda7262e));
3150      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3151                               getF32Constant(DAG, 0x3f25280b));
3152      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3153      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3154                               getF32Constant(DAG, 0x4007b923));
3155      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3156      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3157                               getF32Constant(DAG, 0x40823e2f));
3158      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3159      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3160                                           getF32Constant(DAG, 0x4020d29c));
3161
3162      result = DAG.getNode(ISD::FADD, dl,
3163                           MVT::f32, LogOfExponent, Log2ofMantissa);
3164    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3165      // For floating-point precision of 18:
3166      //
3167      //   Log2ofMantissa =
3168      //     -3.0400495f +
3169      //       (6.1129976f +
3170      //         (-5.3420409f +
3171      //           (3.2865683f +
3172      //             (-1.2669343f +
3173      //               (0.27515199f -
3174      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
3175      //
3176      // error 0.0000018516, which is better than 18 bits
3177      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3178                               getF32Constant(DAG, 0xbcd2769e));
3179      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3180                               getF32Constant(DAG, 0x3e8ce0b9));
3181      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3182      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3183                               getF32Constant(DAG, 0x3fa22ae7));
3184      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3185      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3186                               getF32Constant(DAG, 0x40525723));
3187      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3188      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3189                               getF32Constant(DAG, 0x40aaf200));
3190      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3191      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3192                               getF32Constant(DAG, 0x40c39dad));
3193      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3194      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3195                                           getF32Constant(DAG, 0x4042902c));
3196
3197      result = DAG.getNode(ISD::FADD, dl,
3198                           MVT::f32, LogOfExponent, Log2ofMantissa);
3199    }
3200  } else {
3201    // No special expansion.
3202    result = DAG.getNode(ISD::FLOG2, dl,
3203                         getValue(I.getOperand(1)).getValueType(),
3204                         getValue(I.getOperand(1)));
3205  }
3206
3207  setValue(&I, result);
3208}
3209
3210/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
3211/// limited-precision mode.
3212void
3213SelectionDAGBuilder::visitLog10(CallInst &I) {
3214  SDValue result;
3215  DebugLoc dl = getCurDebugLoc();
3216
3217  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
3218      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3219    SDValue Op = getValue(I.getOperand(1));
3220    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3221
3222    // Scale the exponent by log10(2) [0.30102999f].
3223    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
3224    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
3225                                        getF32Constant(DAG, 0x3e9a209a));
3226
3227    // Get the significand and build it into a floating-point number with
3228    // exponent of 1.
3229    SDValue X = GetSignificand(DAG, Op1, dl);
3230
3231    if (LimitFloatPrecision <= 6) {
3232      // For floating-point precision of 6:
3233      //
3234      //   Log10ofMantissa =
3235      //     -0.50419619f +
3236      //       (0.60948995f - 0.10380950f * x) * x;
3237      //
3238      // error 0.0014886165, which is 6 bits
3239      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3240                               getF32Constant(DAG, 0xbdd49a13));
3241      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3242                               getF32Constant(DAG, 0x3f1c0789));
3243      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3244      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3245                                            getF32Constant(DAG, 0x3f011300));
3246
3247      result = DAG.getNode(ISD::FADD, dl,
3248                           MVT::f32, LogOfExponent, Log10ofMantissa);
3249    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3250      // For floating-point precision of 12:
3251      //
3252      //   Log10ofMantissa =
3253      //     -0.64831180f +
3254      //       (0.91751397f +
3255      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
3256      //
3257      // error 0.00019228036, which is better than 12 bits
3258      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3259                               getF32Constant(DAG, 0x3d431f31));
3260      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
3261                               getF32Constant(DAG, 0x3ea21fb2));
3262      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3263      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3264                               getF32Constant(DAG, 0x3f6ae232));
3265      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3266      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
3267                                            getF32Constant(DAG, 0x3f25f7c3));
3268
3269      result = DAG.getNode(ISD::FADD, dl,
3270                           MVT::f32, LogOfExponent, Log10ofMantissa);
3271    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3272      // For floating-point precision of 18:
3273      //
3274      //   Log10ofMantissa =
3275      //     -0.84299375f +
3276      //       (1.5327582f +
3277      //         (-1.0688956f +
3278      //           (0.49102474f +
3279      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
3280      //
3281      // error 0.0000037995730, which is better than 18 bits
3282      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3283                               getF32Constant(DAG, 0x3c5d51ce));
3284      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
3285                               getF32Constant(DAG, 0x3e00685a));
3286      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3287      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3288                               getF32Constant(DAG, 0x3efb6798));
3289      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3290      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
3291                               getF32Constant(DAG, 0x3f88d192));
3292      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3293      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3294                               getF32Constant(DAG, 0x3fc4316c));
3295      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3296      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
3297                                            getF32Constant(DAG, 0x3f57ce70));
3298
3299      result = DAG.getNode(ISD::FADD, dl,
3300                           MVT::f32, LogOfExponent, Log10ofMantissa);
3301    }
3302  } else {
3303    // No special expansion.
3304    result = DAG.getNode(ISD::FLOG10, dl,
3305                         getValue(I.getOperand(1)).getValueType(),
3306                         getValue(I.getOperand(1)));
3307  }
3308
3309  setValue(&I, result);
3310}
3311
3312/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
3313/// limited-precision mode.
3314void
3315SelectionDAGBuilder::visitExp2(CallInst &I) {
3316  SDValue result;
3317  DebugLoc dl = getCurDebugLoc();
3318
3319  if (getValue(I.getOperand(1)).getValueType() == MVT::f32 &&
3320      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3321    SDValue Op = getValue(I.getOperand(1));
3322
3323    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
3324
3325    //   FractionalPartOfX = x - (float)IntegerPartOfX;
3326    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3327    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
3328
3329    //   IntegerPartOfX <<= 23;
3330    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3331                                 DAG.getConstant(23, TLI.getPointerTy()));
3332
3333    if (LimitFloatPrecision <= 6) {
3334      // For floating-point precision of 6:
3335      //
3336      //   TwoToFractionalPartOfX =
3337      //     0.997535578f +
3338      //       (0.735607626f + 0.252464424f * x) * x;
3339      //
3340      // error 0.0144103317, which is 6 bits
3341      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3342                               getF32Constant(DAG, 0x3e814304));
3343      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3344                               getF32Constant(DAG, 0x3f3c50c8));
3345      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3346      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3347                               getF32Constant(DAG, 0x3f7f5e7e));
3348      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
3349      SDValue TwoToFractionalPartOfX =
3350        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
3351
3352      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3353                           MVT::f32, TwoToFractionalPartOfX);
3354    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3355      // For floating-point precision of 12:
3356      //
3357      //   TwoToFractionalPartOfX =
3358      //     0.999892986f +
3359      //       (0.696457318f +
3360      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3361      //
3362      // error 0.000107046256, which is 13 to 14 bits
3363      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3364                               getF32Constant(DAG, 0x3da235e3));
3365      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3366                               getF32Constant(DAG, 0x3e65b8f3));
3367      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3368      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3369                               getF32Constant(DAG, 0x3f324b07));
3370      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3371      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3372                               getF32Constant(DAG, 0x3f7ff8fd));
3373      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
3374      SDValue TwoToFractionalPartOfX =
3375        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
3376
3377      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3378                           MVT::f32, TwoToFractionalPartOfX);
3379    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3380      // For floating-point precision of 18:
3381      //
3382      //   TwoToFractionalPartOfX =
3383      //     0.999999982f +
3384      //       (0.693148872f +
3385      //         (0.240227044f +
3386      //           (0.554906021e-1f +
3387      //             (0.961591928e-2f +
3388      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3389      // error 2.47208000*10^(-7), which is better than 18 bits
3390      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3391                               getF32Constant(DAG, 0x3924b03e));
3392      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3393                               getF32Constant(DAG, 0x3ab24b87));
3394      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3395      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3396                               getF32Constant(DAG, 0x3c1d8c17));
3397      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3398      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3399                               getF32Constant(DAG, 0x3d634a1d));
3400      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3401      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3402                               getF32Constant(DAG, 0x3e75fe14));
3403      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3404      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3405                                getF32Constant(DAG, 0x3f317234));
3406      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3407      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3408                                getF32Constant(DAG, 0x3f800000));
3409      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
3410      SDValue TwoToFractionalPartOfX =
3411        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
3412
3413      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3414                           MVT::f32, TwoToFractionalPartOfX);
3415    }
3416  } else {
3417    // No special expansion.
3418    result = DAG.getNode(ISD::FEXP2, dl,
3419                         getValue(I.getOperand(1)).getValueType(),
3420                         getValue(I.getOperand(1)));
3421  }
3422
3423  setValue(&I, result);
3424}
3425
3426/// visitPow - Lower a pow intrinsic. Handles the special sequences for
3427/// limited-precision mode with x == 10.0f.
3428void
3429SelectionDAGBuilder::visitPow(CallInst &I) {
3430  SDValue result;
3431  Value *Val = I.getOperand(1);
3432  DebugLoc dl = getCurDebugLoc();
3433  bool IsExp10 = false;
3434
3435  if (getValue(Val).getValueType() == MVT::f32 &&
3436      getValue(I.getOperand(2)).getValueType() == MVT::f32 &&
3437      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3438    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
3439      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
3440        APFloat Ten(10.0f);
3441        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
3442      }
3443    }
3444  }
3445
3446  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3447    SDValue Op = getValue(I.getOperand(2));
3448
3449    // Put the exponent in the right bit position for later addition to the
3450    // final result:
3451    //
3452    //   #define LOG2OF10 3.3219281f
3453    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
3454    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
3455                             getF32Constant(DAG, 0x40549a78));
3456    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
3457
3458    //   FractionalPartOfX = x - (float)IntegerPartOfX;
3459    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3460    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
3461
3462    //   IntegerPartOfX <<= 23;
3463    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3464                                 DAG.getConstant(23, TLI.getPointerTy()));
3465
3466    if (LimitFloatPrecision <= 6) {
3467      // For floating-point precision of 6:
3468      //
3469      //   twoToFractionalPartOfX =
3470      //     0.997535578f +
3471      //       (0.735607626f + 0.252464424f * x) * x;
3472      //
3473      // error 0.0144103317, which is 6 bits
3474      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3475                               getF32Constant(DAG, 0x3e814304));
3476      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3477                               getF32Constant(DAG, 0x3f3c50c8));
3478      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3479      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3480                               getF32Constant(DAG, 0x3f7f5e7e));
3481      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
3482      SDValue TwoToFractionalPartOfX =
3483        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
3484
3485      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3486                           MVT::f32, TwoToFractionalPartOfX);
3487    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3488      // For floating-point precision of 12:
3489      //
3490      //   TwoToFractionalPartOfX =
3491      //     0.999892986f +
3492      //       (0.696457318f +
3493      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3494      //
3495      // error 0.000107046256, which is 13 to 14 bits
3496      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3497                               getF32Constant(DAG, 0x3da235e3));
3498      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3499                               getF32Constant(DAG, 0x3e65b8f3));
3500      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3501      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3502                               getF32Constant(DAG, 0x3f324b07));
3503      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3504      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3505                               getF32Constant(DAG, 0x3f7ff8fd));
3506      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
3507      SDValue TwoToFractionalPartOfX =
3508        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
3509
3510      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3511                           MVT::f32, TwoToFractionalPartOfX);
3512    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3513      // For floating-point precision of 18:
3514      //
3515      //   TwoToFractionalPartOfX =
3516      //     0.999999982f +
3517      //       (0.693148872f +
3518      //         (0.240227044f +
3519      //           (0.554906021e-1f +
3520      //             (0.961591928e-2f +
3521      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3522      // error 2.47208000*10^(-7), which is better than 18 bits
3523      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3524                               getF32Constant(DAG, 0x3924b03e));
3525      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3526                               getF32Constant(DAG, 0x3ab24b87));
3527      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3528      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3529                               getF32Constant(DAG, 0x3c1d8c17));
3530      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3531      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3532                               getF32Constant(DAG, 0x3d634a1d));
3533      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3534      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3535                               getF32Constant(DAG, 0x3e75fe14));
3536      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3537      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3538                                getF32Constant(DAG, 0x3f317234));
3539      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3540      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3541                                getF32Constant(DAG, 0x3f800000));
3542      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
3543      SDValue TwoToFractionalPartOfX =
3544        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
3545
3546      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3547                           MVT::f32, TwoToFractionalPartOfX);
3548    }
3549  } else {
3550    // No special expansion.
3551    result = DAG.getNode(ISD::FPOW, dl,
3552                         getValue(I.getOperand(1)).getValueType(),
3553                         getValue(I.getOperand(1)),
3554                         getValue(I.getOperand(2)));
3555  }
3556
3557  setValue(&I, result);
3558}
3559
3560/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
3561/// we want to emit this as a call to a named external function, return the name
3562/// otherwise lower it and return null.
3563const char *
3564SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
3565  DebugLoc dl = getCurDebugLoc();
3566  switch (Intrinsic) {
3567  default:
3568    // By default, turn this into a target intrinsic node.
3569    visitTargetIntrinsic(I, Intrinsic);
3570    return 0;
3571  case Intrinsic::vastart:  visitVAStart(I); return 0;
3572  case Intrinsic::vaend:    visitVAEnd(I); return 0;
3573  case Intrinsic::vacopy:   visitVACopy(I); return 0;
3574  case Intrinsic::returnaddress:
3575    setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
3576                             getValue(I.getOperand(1))));
3577    return 0;
3578  case Intrinsic::frameaddress:
3579    setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
3580                             getValue(I.getOperand(1))));
3581    return 0;
3582  case Intrinsic::setjmp:
3583    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
3584    break;
3585  case Intrinsic::longjmp:
3586    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
3587    break;
3588  case Intrinsic::memcpy: {
3589    SDValue Op1 = getValue(I.getOperand(1));
3590    SDValue Op2 = getValue(I.getOperand(2));
3591    SDValue Op3 = getValue(I.getOperand(3));
3592    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
3593    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
3594                              I.getOperand(1), 0, I.getOperand(2), 0));
3595    return 0;
3596  }
3597  case Intrinsic::memset: {
3598    SDValue Op1 = getValue(I.getOperand(1));
3599    SDValue Op2 = getValue(I.getOperand(2));
3600    SDValue Op3 = getValue(I.getOperand(3));
3601    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
3602    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align,
3603                              I.getOperand(1), 0));
3604    return 0;
3605  }
3606  case Intrinsic::memmove: {
3607    SDValue Op1 = getValue(I.getOperand(1));
3608    SDValue Op2 = getValue(I.getOperand(2));
3609    SDValue Op3 = getValue(I.getOperand(3));
3610    unsigned Align = cast<ConstantInt>(I.getOperand(4))->getZExtValue();
3611
3612    // If the source and destination are known to not be aliases, we can
3613    // lower memmove as memcpy.
3614    uint64_t Size = -1ULL;
3615    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
3616      Size = C->getZExtValue();
3617    if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) ==
3618        AliasAnalysis::NoAlias) {
3619      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, false,
3620                                I.getOperand(1), 0, I.getOperand(2), 0));
3621      return 0;
3622    }
3623
3624    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align,
3625                               I.getOperand(1), 0, I.getOperand(2), 0));
3626    return 0;
3627  }
3628  case Intrinsic::dbg_stoppoint:
3629  case Intrinsic::dbg_region_start:
3630  case Intrinsic::dbg_region_end:
3631  case Intrinsic::dbg_func_start:
3632    // FIXME - Remove this instructions once the dust settles.
3633    return 0;
3634  case Intrinsic::dbg_declare: {
3635    if (OptLevel != CodeGenOpt::None)
3636      // FIXME: Variable debug info is not supported here.
3637      return 0;
3638    DwarfWriter *DW = DAG.getDwarfWriter();
3639    if (!DW)
3640      return 0;
3641    DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
3642    if (!isValidDebugInfoIntrinsic(DI, CodeGenOpt::None))
3643      return 0;
3644
3645    MDNode *Variable = DI.getVariable();
3646    Value *Address = DI.getAddress();
3647    if (BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
3648      Address = BCI->getOperand(0);
3649    AllocaInst *AI = dyn_cast<AllocaInst>(Address);
3650    // Don't handle byval struct arguments or VLAs, for example.
3651    if (!AI)
3652      return 0;
3653    DenseMap<const AllocaInst*, int>::iterator SI =
3654      FuncInfo.StaticAllocaMap.find(AI);
3655    if (SI == FuncInfo.StaticAllocaMap.end())
3656      return 0; // VLAs.
3657    int FI = SI->second;
3658
3659    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
3660    if (MMI) {
3661      MetadataContext &TheMetadata =
3662        DI.getParent()->getContext().getMetadata();
3663      unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
3664      MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI);
3665      MMI->setVariableDbgInfo(Variable, FI, Dbg);
3666    }
3667    return 0;
3668  }
3669  case Intrinsic::eh_exception: {
3670    // Insert the EXCEPTIONADDR instruction.
3671    assert(CurMBB->isLandingPad() &&"Call to eh.exception not in landing pad!");
3672    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
3673    SDValue Ops[1];
3674    Ops[0] = DAG.getRoot();
3675    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
3676    setValue(&I, Op);
3677    DAG.setRoot(Op.getValue(1));
3678    return 0;
3679  }
3680
3681  case Intrinsic::eh_selector: {
3682    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
3683
3684    if (CurMBB->isLandingPad())
3685      AddCatchInfo(I, MMI, CurMBB);
3686    else {
3687#ifndef NDEBUG
3688      FuncInfo.CatchInfoLost.insert(&I);
3689#endif
3690      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
3691      unsigned Reg = TLI.getExceptionSelectorRegister();
3692      if (Reg) CurMBB->addLiveIn(Reg);
3693    }
3694
3695    // Insert the EHSELECTION instruction.
3696    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
3697    SDValue Ops[2];
3698    Ops[0] = getValue(I.getOperand(1));
3699    Ops[1] = getRoot();
3700    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
3701
3702    DAG.setRoot(Op.getValue(1));
3703
3704    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
3705    return 0;
3706  }
3707
3708  case Intrinsic::eh_typeid_for: {
3709    MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
3710
3711    if (MMI) {
3712      // Find the type id for the given typeinfo.
3713      GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1));
3714
3715      unsigned TypeID = MMI->getTypeIDFor(GV);
3716      setValue(&I, DAG.getConstant(TypeID, MVT::i32));
3717    } else {
3718      // Return something different to eh_selector.
3719      setValue(&I, DAG.getConstant(1, MVT::i32));
3720    }
3721
3722    return 0;
3723  }
3724
3725  case Intrinsic::eh_return_i32:
3726  case Intrinsic::eh_return_i64:
3727    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
3728      MMI->setCallsEHReturn(true);
3729      DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
3730                              MVT::Other,
3731                              getControlRoot(),
3732                              getValue(I.getOperand(1)),
3733                              getValue(I.getOperand(2))));
3734    } else {
3735      setValue(&I, DAG.getConstant(0, TLI.getPointerTy()));
3736    }
3737
3738    return 0;
3739  case Intrinsic::eh_unwind_init:
3740    if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo()) {
3741      MMI->setCallsUnwindInit(true);
3742    }
3743
3744    return 0;
3745
3746  case Intrinsic::eh_dwarf_cfa: {
3747    EVT VT = getValue(I.getOperand(1)).getValueType();
3748    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl,
3749                                        TLI.getPointerTy());
3750
3751    SDValue Offset = DAG.getNode(ISD::ADD, dl,
3752                                 TLI.getPointerTy(),
3753                                 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
3754                                             TLI.getPointerTy()),
3755                                 CfaArg);
3756    setValue(&I, DAG.getNode(ISD::ADD, dl,
3757                             TLI.getPointerTy(),
3758                             DAG.getNode(ISD::FRAMEADDR, dl,
3759                                         TLI.getPointerTy(),
3760                                         DAG.getConstant(0,
3761                                                         TLI.getPointerTy())),
3762                             Offset));
3763    return 0;
3764  }
3765  case Intrinsic::convertff:
3766  case Intrinsic::convertfsi:
3767  case Intrinsic::convertfui:
3768  case Intrinsic::convertsif:
3769  case Intrinsic::convertuif:
3770  case Intrinsic::convertss:
3771  case Intrinsic::convertsu:
3772  case Intrinsic::convertus:
3773  case Intrinsic::convertuu: {
3774    ISD::CvtCode Code = ISD::CVT_INVALID;
3775    switch (Intrinsic) {
3776    case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
3777    case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
3778    case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
3779    case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
3780    case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
3781    case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
3782    case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
3783    case Intrinsic::convertus:  Code = ISD::CVT_US; break;
3784    case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
3785    }
3786    EVT DestVT = TLI.getValueType(I.getType());
3787    Value* Op1 = I.getOperand(1);
3788    setValue(&I, DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
3789                                DAG.getValueType(DestVT),
3790                                DAG.getValueType(getValue(Op1).getValueType()),
3791                                getValue(I.getOperand(2)),
3792                                getValue(I.getOperand(3)),
3793                                Code));
3794    return 0;
3795  }
3796
3797  case Intrinsic::sqrt:
3798    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
3799                             getValue(I.getOperand(1)).getValueType(),
3800                             getValue(I.getOperand(1))));
3801    return 0;
3802  case Intrinsic::powi:
3803    setValue(&I, DAG.getNode(ISD::FPOWI, dl,
3804                             getValue(I.getOperand(1)).getValueType(),
3805                             getValue(I.getOperand(1)),
3806                             getValue(I.getOperand(2))));
3807    return 0;
3808  case Intrinsic::sin:
3809    setValue(&I, DAG.getNode(ISD::FSIN, dl,
3810                             getValue(I.getOperand(1)).getValueType(),
3811                             getValue(I.getOperand(1))));
3812    return 0;
3813  case Intrinsic::cos:
3814    setValue(&I, DAG.getNode(ISD::FCOS, dl,
3815                             getValue(I.getOperand(1)).getValueType(),
3816                             getValue(I.getOperand(1))));
3817    return 0;
3818  case Intrinsic::log:
3819    visitLog(I);
3820    return 0;
3821  case Intrinsic::log2:
3822    visitLog2(I);
3823    return 0;
3824  case Intrinsic::log10:
3825    visitLog10(I);
3826    return 0;
3827  case Intrinsic::exp:
3828    visitExp(I);
3829    return 0;
3830  case Intrinsic::exp2:
3831    visitExp2(I);
3832    return 0;
3833  case Intrinsic::pow:
3834    visitPow(I);
3835    return 0;
3836  case Intrinsic::pcmarker: {
3837    SDValue Tmp = getValue(I.getOperand(1));
3838    DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
3839    return 0;
3840  }
3841  case Intrinsic::readcyclecounter: {
3842    SDValue Op = getRoot();
3843    SDValue Tmp = DAG.getNode(ISD::READCYCLECOUNTER, dl,
3844                              DAG.getVTList(MVT::i64, MVT::Other),
3845                              &Op, 1);
3846    setValue(&I, Tmp);
3847    DAG.setRoot(Tmp.getValue(1));
3848    return 0;
3849  }
3850  case Intrinsic::bswap:
3851    setValue(&I, DAG.getNode(ISD::BSWAP, dl,
3852                             getValue(I.getOperand(1)).getValueType(),
3853                             getValue(I.getOperand(1))));
3854    return 0;
3855  case Intrinsic::cttz: {
3856    SDValue Arg = getValue(I.getOperand(1));
3857    EVT Ty = Arg.getValueType();
3858    SDValue result = DAG.getNode(ISD::CTTZ, dl, Ty, Arg);
3859    setValue(&I, result);
3860    return 0;
3861  }
3862  case Intrinsic::ctlz: {
3863    SDValue Arg = getValue(I.getOperand(1));
3864    EVT Ty = Arg.getValueType();
3865    SDValue result = DAG.getNode(ISD::CTLZ, dl, Ty, Arg);
3866    setValue(&I, result);
3867    return 0;
3868  }
3869  case Intrinsic::ctpop: {
3870    SDValue Arg = getValue(I.getOperand(1));
3871    EVT Ty = Arg.getValueType();
3872    SDValue result = DAG.getNode(ISD::CTPOP, dl, Ty, Arg);
3873    setValue(&I, result);
3874    return 0;
3875  }
3876  case Intrinsic::stacksave: {
3877    SDValue Op = getRoot();
3878    SDValue Tmp = DAG.getNode(ISD::STACKSAVE, dl,
3879              DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
3880    setValue(&I, Tmp);
3881    DAG.setRoot(Tmp.getValue(1));
3882    return 0;
3883  }
3884  case Intrinsic::stackrestore: {
3885    SDValue Tmp = getValue(I.getOperand(1));
3886    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Tmp));
3887    return 0;
3888  }
3889  case Intrinsic::stackprotector: {
3890    // Emit code into the DAG to store the stack guard onto the stack.
3891    MachineFunction &MF = DAG.getMachineFunction();
3892    MachineFrameInfo *MFI = MF.getFrameInfo();
3893    EVT PtrTy = TLI.getPointerTy();
3894
3895    SDValue Src = getValue(I.getOperand(1));   // The guard's value.
3896    AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2));
3897
3898    int FI = FuncInfo.StaticAllocaMap[Slot];
3899    MFI->setStackProtectorIndex(FI);
3900
3901    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
3902
3903    // Store the stack protector onto the stack.
3904    SDValue Result = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
3905                                  PseudoSourceValue::getFixedStack(FI),
3906                                  0, true);
3907    setValue(&I, Result);
3908    DAG.setRoot(Result);
3909    return 0;
3910  }
3911  case Intrinsic::objectsize: {
3912    // If we don't know by now, we're never going to know.
3913    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
3914
3915    assert(CI && "Non-constant type in __builtin_object_size?");
3916
3917    SDValue Arg = getValue(I.getOperand(0));
3918    EVT Ty = Arg.getValueType();
3919
3920    if (CI->getZExtValue() < 2)
3921      setValue(&I, DAG.getConstant(-1ULL, Ty));
3922    else
3923      setValue(&I, DAG.getConstant(0, Ty));
3924    return 0;
3925  }
3926  case Intrinsic::var_annotation:
3927    // Discard annotate attributes
3928    return 0;
3929
3930  case Intrinsic::init_trampoline: {
3931    const Function *F = cast<Function>(I.getOperand(2)->stripPointerCasts());
3932
3933    SDValue Ops[6];
3934    Ops[0] = getRoot();
3935    Ops[1] = getValue(I.getOperand(1));
3936    Ops[2] = getValue(I.getOperand(2));
3937    Ops[3] = getValue(I.getOperand(3));
3938    Ops[4] = DAG.getSrcValue(I.getOperand(1));
3939    Ops[5] = DAG.getSrcValue(F);
3940
3941    SDValue Tmp = DAG.getNode(ISD::TRAMPOLINE, dl,
3942                              DAG.getVTList(TLI.getPointerTy(), MVT::Other),
3943                              Ops, 6);
3944
3945    setValue(&I, Tmp);
3946    DAG.setRoot(Tmp.getValue(1));
3947    return 0;
3948  }
3949
3950  case Intrinsic::gcroot:
3951    if (GFI) {
3952      Value *Alloca = I.getOperand(1);
3953      Constant *TypeMap = cast<Constant>(I.getOperand(2));
3954
3955      FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
3956      GFI->addStackRoot(FI->getIndex(), TypeMap);
3957    }
3958    return 0;
3959
3960  case Intrinsic::gcread:
3961  case Intrinsic::gcwrite:
3962    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
3963    return 0;
3964
3965  case Intrinsic::flt_rounds: {
3966    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
3967    return 0;
3968  }
3969
3970  case Intrinsic::trap: {
3971    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
3972    return 0;
3973  }
3974
3975  case Intrinsic::uadd_with_overflow:
3976    return implVisitAluOverflow(I, ISD::UADDO);
3977  case Intrinsic::sadd_with_overflow:
3978    return implVisitAluOverflow(I, ISD::SADDO);
3979  case Intrinsic::usub_with_overflow:
3980    return implVisitAluOverflow(I, ISD::USUBO);
3981  case Intrinsic::ssub_with_overflow:
3982    return implVisitAluOverflow(I, ISD::SSUBO);
3983  case Intrinsic::umul_with_overflow:
3984    return implVisitAluOverflow(I, ISD::UMULO);
3985  case Intrinsic::smul_with_overflow:
3986    return implVisitAluOverflow(I, ISD::SMULO);
3987
3988  case Intrinsic::prefetch: {
3989    SDValue Ops[4];
3990    Ops[0] = getRoot();
3991    Ops[1] = getValue(I.getOperand(1));
3992    Ops[2] = getValue(I.getOperand(2));
3993    Ops[3] = getValue(I.getOperand(3));
3994    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
3995    return 0;
3996  }
3997
3998  case Intrinsic::memory_barrier: {
3999    SDValue Ops[6];
4000    Ops[0] = getRoot();
4001    for (int x = 1; x < 6; ++x)
4002      Ops[x] = getValue(I.getOperand(x));
4003
4004    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
4005    return 0;
4006  }
4007  case Intrinsic::atomic_cmp_swap: {
4008    SDValue Root = getRoot();
4009    SDValue L =
4010      DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
4011                    getValue(I.getOperand(2)).getValueType().getSimpleVT(),
4012                    Root,
4013                    getValue(I.getOperand(1)),
4014                    getValue(I.getOperand(2)),
4015                    getValue(I.getOperand(3)),
4016                    I.getOperand(1));
4017    setValue(&I, L);
4018    DAG.setRoot(L.getValue(1));
4019    return 0;
4020  }
4021  case Intrinsic::atomic_load_add:
4022    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
4023  case Intrinsic::atomic_load_sub:
4024    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
4025  case Intrinsic::atomic_load_or:
4026    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
4027  case Intrinsic::atomic_load_xor:
4028    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
4029  case Intrinsic::atomic_load_and:
4030    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
4031  case Intrinsic::atomic_load_nand:
4032    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
4033  case Intrinsic::atomic_load_max:
4034    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
4035  case Intrinsic::atomic_load_min:
4036    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
4037  case Intrinsic::atomic_load_umin:
4038    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
4039  case Intrinsic::atomic_load_umax:
4040    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
4041  case Intrinsic::atomic_swap:
4042    return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
4043
4044  case Intrinsic::invariant_start:
4045  case Intrinsic::lifetime_start:
4046    // Discard region information.
4047    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
4048    return 0;
4049  case Intrinsic::invariant_end:
4050  case Intrinsic::lifetime_end:
4051    // Discard region information.
4052    return 0;
4053  }
4054}
4055
4056/// Test if the given instruction is in a position to be optimized
4057/// with a tail-call. This roughly means that it's in a block with
4058/// a return and there's nothing that needs to be scheduled
4059/// between it and the return.
4060///
4061/// This function only tests target-independent requirements.
4062/// For target-dependent requirements, a target should override
4063/// TargetLowering::IsEligibleForTailCallOptimization.
4064///
4065static bool
4066isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
4067                     const TargetLowering &TLI) {
4068  const BasicBlock *ExitBB = I->getParent();
4069  const TerminatorInst *Term = ExitBB->getTerminator();
4070  const ReturnInst *Ret = dyn_cast<ReturnInst>(Term);
4071  const Function *F = ExitBB->getParent();
4072
4073  // The block must end in a return statement or an unreachable.
4074  if (!Ret && !isa<UnreachableInst>(Term)) return false;
4075
4076  // If I will have a chain, make sure no other instruction that will have a
4077  // chain interposes between I and the return.
4078  if (I->mayHaveSideEffects() || I->mayReadFromMemory() ||
4079      !I->isSafeToSpeculativelyExecute())
4080    for (BasicBlock::const_iterator BBI = prior(prior(ExitBB->end())); ;
4081         --BBI) {
4082      if (&*BBI == I)
4083        break;
4084      if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
4085          !BBI->isSafeToSpeculativelyExecute())
4086        return false;
4087    }
4088
4089  // If the block ends with a void return or unreachable, it doesn't matter
4090  // what the call's return type is.
4091  if (!Ret || Ret->getNumOperands() == 0) return true;
4092
4093  // If the return value is undef, it doesn't matter what the call's
4094  // return type is.
4095  if (isa<UndefValue>(Ret->getOperand(0))) return true;
4096
4097  // Conservatively require the attributes of the call to match those of
4098  // the return. Ignore noalias because it doesn't affect the call sequence.
4099  unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
4100  if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
4101    return false;
4102
4103  // Otherwise, make sure the unmodified return value of I is the return value.
4104  for (const Instruction *U = dyn_cast<Instruction>(Ret->getOperand(0)); ;
4105       U = dyn_cast<Instruction>(U->getOperand(0))) {
4106    if (!U)
4107      return false;
4108    if (!U->hasOneUse())
4109      return false;
4110    if (U == I)
4111      break;
4112    // Check for a truly no-op truncate.
4113    if (isa<TruncInst>(U) &&
4114        TLI.isTruncateFree(U->getOperand(0)->getType(), U->getType()))
4115      continue;
4116    // Check for a truly no-op bitcast.
4117    if (isa<BitCastInst>(U) &&
4118        (U->getOperand(0)->getType() == U->getType() ||
4119         (isa<PointerType>(U->getOperand(0)->getType()) &&
4120          isa<PointerType>(U->getType()))))
4121      continue;
4122    // Otherwise it's not a true no-op.
4123    return false;
4124  }
4125
4126  return true;
4127}
4128
4129void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
4130                                      bool isTailCall,
4131                                      MachineBasicBlock *LandingPad) {
4132  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
4133  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
4134  const Type *RetTy = FTy->getReturnType();
4135  MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
4136  unsigned BeginLabel = 0, EndLabel = 0;
4137
4138  TargetLowering::ArgListTy Args;
4139  TargetLowering::ArgListEntry Entry;
4140  Args.reserve(CS.arg_size());
4141
4142  // Check whether the function can return without sret-demotion.
4143  SmallVector<EVT, 4> OutVTs;
4144  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
4145  SmallVector<uint64_t, 4> Offsets;
4146  getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
4147    OutVTs, OutsFlags, TLI, &Offsets);
4148
4149
4150  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
4151                        FTy->isVarArg(), OutVTs, OutsFlags, DAG);
4152
4153  SDValue DemoteStackSlot;
4154
4155  if (!CanLowerReturn) {
4156    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
4157                      FTy->getReturnType());
4158    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
4159                      FTy->getReturnType());
4160    MachineFunction &MF = DAG.getMachineFunction();
4161    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
4162    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
4163
4164    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
4165    Entry.Node = DemoteStackSlot;
4166    Entry.Ty = StackSlotPtrType;
4167    Entry.isSExt = false;
4168    Entry.isZExt = false;
4169    Entry.isInReg = false;
4170    Entry.isSRet = true;
4171    Entry.isNest = false;
4172    Entry.isByVal = false;
4173    Entry.Alignment = Align;
4174    Args.push_back(Entry);
4175    RetTy = Type::getVoidTy(FTy->getContext());
4176  }
4177
4178  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
4179       i != e; ++i) {
4180    SDValue ArgNode = getValue(*i);
4181    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
4182
4183    unsigned attrInd = i - CS.arg_begin() + 1;
4184    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
4185    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
4186    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
4187    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
4188    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
4189    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
4190    Entry.Alignment = CS.getParamAlignment(attrInd);
4191    Args.push_back(Entry);
4192  }
4193
4194  if (LandingPad && MMI) {
4195    // Insert a label before the invoke call to mark the try range.  This can be
4196    // used to detect deletion of the invoke via the MachineModuleInfo.
4197    BeginLabel = MMI->NextLabelID();
4198
4199    // Both PendingLoads and PendingExports must be flushed here;
4200    // this call might not return.
4201    (void)getRoot();
4202    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
4203                             getControlRoot(), BeginLabel));
4204  }
4205
4206  // Check if target-independent constraints permit a tail call here.
4207  // Target-dependent constraints are checked within TLI.LowerCallTo.
4208  if (isTailCall &&
4209      !isInTailCallPosition(CS.getInstruction(),
4210                            CS.getAttributes().getRetAttributes(),
4211                            TLI))
4212    isTailCall = false;
4213
4214  std::pair<SDValue,SDValue> Result =
4215    TLI.LowerCallTo(getRoot(), RetTy,
4216                    CS.paramHasAttr(0, Attribute::SExt),
4217                    CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
4218                    CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
4219                    CS.getCallingConv(),
4220                    isTailCall,
4221                    !CS.getInstruction()->use_empty(),
4222                    Callee, Args, DAG, getCurDebugLoc());
4223  assert((isTailCall || Result.second.getNode()) &&
4224         "Non-null chain expected with non-tail call!");
4225  assert((Result.second.getNode() || !Result.first.getNode()) &&
4226         "Null value expected with tail call!");
4227  if (Result.first.getNode())
4228    setValue(CS.getInstruction(), Result.first);
4229  else if (!CanLowerReturn && Result.second.getNode()) {
4230    // The instruction result is the result of loading from the
4231    // hidden sret parameter.
4232    SmallVector<EVT, 1> PVTs;
4233    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
4234
4235    ComputeValueVTs(TLI, PtrRetTy, PVTs);
4236    assert(PVTs.size() == 1 && "Pointers should fit in one register");
4237    EVT PtrVT = PVTs[0];
4238    unsigned NumValues = OutVTs.size();
4239    SmallVector<SDValue, 4> Values(NumValues);
4240    SmallVector<SDValue, 4> Chains(NumValues);
4241
4242    for (unsigned i = 0; i < NumValues; ++i) {
4243      SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
4244        DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot,
4245        DAG.getConstant(Offsets[i], PtrVT)),
4246        NULL, Offsets[i], false, 1);
4247      Values[i] = L;
4248      Chains[i] = L.getValue(1);
4249    }
4250    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
4251                                MVT::Other, &Chains[0], NumValues);
4252    PendingLoads.push_back(Chain);
4253
4254    setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES,
4255             getCurDebugLoc(), DAG.getVTList(&OutVTs[0], NumValues),
4256             &Values[0], NumValues));
4257  }
4258  // As a special case, a null chain means that a tail call has
4259  // been emitted and the DAG root is already updated.
4260  if (Result.second.getNode())
4261    DAG.setRoot(Result.second);
4262  else
4263    HasTailCall = true;
4264
4265  if (LandingPad && MMI) {
4266    // Insert a label at the end of the invoke call to mark the try range.  This
4267    // can be used to detect deletion of the invoke via the MachineModuleInfo.
4268    EndLabel = MMI->NextLabelID();
4269    DAG.setRoot(DAG.getLabel(ISD::EH_LABEL, getCurDebugLoc(),
4270                             getRoot(), EndLabel));
4271
4272    // Inform MachineModuleInfo of range.
4273    MMI->addInvoke(LandingPad, BeginLabel, EndLabel);
4274  }
4275}
4276
4277
4278void SelectionDAGBuilder::visitCall(CallInst &I) {
4279  const char *RenameFn = 0;
4280  if (Function *F = I.getCalledFunction()) {
4281    if (F->isDeclaration()) {
4282      const TargetIntrinsicInfo *II = TLI.getTargetMachine().getIntrinsicInfo();
4283      if (II) {
4284        if (unsigned IID = II->getIntrinsicID(F)) {
4285          RenameFn = visitIntrinsicCall(I, IID);
4286          if (!RenameFn)
4287            return;
4288        }
4289      }
4290      if (unsigned IID = F->getIntrinsicID()) {
4291        RenameFn = visitIntrinsicCall(I, IID);
4292        if (!RenameFn)
4293          return;
4294      }
4295    }
4296
4297    // Check for well-known libc/libm calls.  If the function is internal, it
4298    // can't be a library call.
4299    if (!F->hasLocalLinkage() && F->hasName()) {
4300      StringRef Name = F->getName();
4301      if (Name == "copysign" || Name == "copysignf") {
4302        if (I.getNumOperands() == 3 &&   // Basic sanity checks.
4303            I.getOperand(1)->getType()->isFloatingPoint() &&
4304            I.getType() == I.getOperand(1)->getType() &&
4305            I.getType() == I.getOperand(2)->getType()) {
4306          SDValue LHS = getValue(I.getOperand(1));
4307          SDValue RHS = getValue(I.getOperand(2));
4308          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
4309                                   LHS.getValueType(), LHS, RHS));
4310          return;
4311        }
4312      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
4313        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
4314            I.getOperand(1)->getType()->isFloatingPoint() &&
4315            I.getType() == I.getOperand(1)->getType()) {
4316          SDValue Tmp = getValue(I.getOperand(1));
4317          setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
4318                                   Tmp.getValueType(), Tmp));
4319          return;
4320        }
4321      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
4322        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
4323            I.getOperand(1)->getType()->isFloatingPoint() &&
4324            I.getType() == I.getOperand(1)->getType() &&
4325            I.onlyReadsMemory()) {
4326          SDValue Tmp = getValue(I.getOperand(1));
4327          setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
4328                                   Tmp.getValueType(), Tmp));
4329          return;
4330        }
4331      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
4332        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
4333            I.getOperand(1)->getType()->isFloatingPoint() &&
4334            I.getType() == I.getOperand(1)->getType() &&
4335            I.onlyReadsMemory()) {
4336          SDValue Tmp = getValue(I.getOperand(1));
4337          setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
4338                                   Tmp.getValueType(), Tmp));
4339          return;
4340        }
4341      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
4342        if (I.getNumOperands() == 2 &&   // Basic sanity checks.
4343            I.getOperand(1)->getType()->isFloatingPoint() &&
4344            I.getType() == I.getOperand(1)->getType() &&
4345            I.onlyReadsMemory()) {
4346          SDValue Tmp = getValue(I.getOperand(1));
4347          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
4348                                   Tmp.getValueType(), Tmp));
4349          return;
4350        }
4351      }
4352    }
4353  } else if (isa<InlineAsm>(I.getOperand(0))) {
4354    visitInlineAsm(&I);
4355    return;
4356  }
4357
4358  SDValue Callee;
4359  if (!RenameFn)
4360    Callee = getValue(I.getOperand(0));
4361  else
4362    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
4363
4364  // Check if we can potentially perform a tail call. More detailed
4365  // checking is be done within LowerCallTo, after more information
4366  // about the call is known.
4367  bool isTailCall = PerformTailCallOpt && I.isTailCall();
4368
4369  LowerCallTo(&I, Callee, isTailCall);
4370}
4371
4372
4373/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
4374/// this value and returns the result as a ValueVT value.  This uses
4375/// Chain/Flag as the input and updates them for the output Chain/Flag.
4376/// If the Flag pointer is NULL, no flag is used.
4377SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
4378                                      SDValue &Chain,
4379                                      SDValue *Flag) const {
4380  // Assemble the legal parts into the final values.
4381  SmallVector<SDValue, 4> Values(ValueVTs.size());
4382  SmallVector<SDValue, 8> Parts;
4383  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
4384    // Copy the legal parts from the registers.
4385    EVT ValueVT = ValueVTs[Value];
4386    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
4387    EVT RegisterVT = RegVTs[Value];
4388
4389    Parts.resize(NumRegs);
4390    for (unsigned i = 0; i != NumRegs; ++i) {
4391      SDValue P;
4392      if (Flag == 0)
4393        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
4394      else {
4395        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
4396        *Flag = P.getValue(2);
4397      }
4398      Chain = P.getValue(1);
4399
4400      // If the source register was virtual and if we know something about it,
4401      // add an assert node.
4402      if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
4403          RegisterVT.isInteger() && !RegisterVT.isVector()) {
4404        unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
4405        FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
4406        if (FLI.LiveOutRegInfo.size() > SlotNo) {
4407          FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo];
4408
4409          unsigned RegSize = RegisterVT.getSizeInBits();
4410          unsigned NumSignBits = LOI.NumSignBits;
4411          unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
4412
4413          // FIXME: We capture more information than the dag can represent.  For
4414          // now, just use the tightest assertzext/assertsext possible.
4415          bool isSExt = true;
4416          EVT FromVT(MVT::Other);
4417          if (NumSignBits == RegSize)
4418            isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
4419          else if (NumZeroBits >= RegSize-1)
4420            isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
4421          else if (NumSignBits > RegSize-8)
4422            isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
4423          else if (NumZeroBits >= RegSize-8)
4424            isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
4425          else if (NumSignBits > RegSize-16)
4426            isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
4427          else if (NumZeroBits >= RegSize-16)
4428            isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
4429          else if (NumSignBits > RegSize-32)
4430            isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
4431          else if (NumZeroBits >= RegSize-32)
4432            isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
4433
4434          if (FromVT != MVT::Other) {
4435            P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
4436                            RegisterVT, P, DAG.getValueType(FromVT));
4437
4438          }
4439        }
4440      }
4441
4442      Parts[i] = P;
4443    }
4444
4445    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
4446                                     NumRegs, RegisterVT, ValueVT);
4447    Part += NumRegs;
4448    Parts.clear();
4449  }
4450
4451  return DAG.getNode(ISD::MERGE_VALUES, dl,
4452                     DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
4453                     &Values[0], ValueVTs.size());
4454}
4455
4456/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
4457/// specified value into the registers specified by this object.  This uses
4458/// Chain/Flag as the input and updates them for the output Chain/Flag.
4459/// If the Flag pointer is NULL, no flag is used.
4460void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
4461                                 SDValue &Chain, SDValue *Flag) const {
4462  // Get the list of the values's legal parts.
4463  unsigned NumRegs = Regs.size();
4464  SmallVector<SDValue, 8> Parts(NumRegs);
4465  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
4466    EVT ValueVT = ValueVTs[Value];
4467    unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
4468    EVT RegisterVT = RegVTs[Value];
4469
4470    getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
4471                   &Parts[Part], NumParts, RegisterVT);
4472    Part += NumParts;
4473  }
4474
4475  // Copy the parts into the registers.
4476  SmallVector<SDValue, 8> Chains(NumRegs);
4477  for (unsigned i = 0; i != NumRegs; ++i) {
4478    SDValue Part;
4479    if (Flag == 0)
4480      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
4481    else {
4482      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
4483      *Flag = Part.getValue(1);
4484    }
4485    Chains[i] = Part.getValue(0);
4486  }
4487
4488  if (NumRegs == 1 || Flag)
4489    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
4490    // flagged to it. That is the CopyToReg nodes and the user are considered
4491    // a single scheduling unit. If we create a TokenFactor and return it as
4492    // chain, then the TokenFactor is both a predecessor (operand) of the
4493    // user as well as a successor (the TF operands are flagged to the user).
4494    // c1, f1 = CopyToReg
4495    // c2, f2 = CopyToReg
4496    // c3     = TokenFactor c1, c2
4497    // ...
4498    //        = op c3, ..., f2
4499    Chain = Chains[NumRegs-1];
4500  else
4501    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
4502}
4503
4504/// AddInlineAsmOperands - Add this value to the specified inlineasm node
4505/// operand list.  This adds the code marker and includes the number of
4506/// values added into it.
4507void RegsForValue::AddInlineAsmOperands(unsigned Code,
4508                                        bool HasMatching,unsigned MatchingIdx,
4509                                        SelectionDAG &DAG,
4510                                        std::vector<SDValue> &Ops) const {
4511  EVT IntPtrTy = DAG.getTargetLoweringInfo().getPointerTy();
4512  assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
4513  unsigned Flag = Code | (Regs.size() << 3);
4514  if (HasMatching)
4515    Flag |= 0x80000000 | (MatchingIdx << 16);
4516  Ops.push_back(DAG.getTargetConstant(Flag, IntPtrTy));
4517  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
4518    unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
4519    EVT RegisterVT = RegVTs[Value];
4520    for (unsigned i = 0; i != NumRegs; ++i) {
4521      assert(Reg < Regs.size() && "Mismatch in # registers expected");
4522      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
4523    }
4524  }
4525}
4526
4527/// isAllocatableRegister - If the specified register is safe to allocate,
4528/// i.e. it isn't a stack pointer or some other special register, return the
4529/// register class for the register.  Otherwise, return null.
4530static const TargetRegisterClass *
4531isAllocatableRegister(unsigned Reg, MachineFunction &MF,
4532                      const TargetLowering &TLI,
4533                      const TargetRegisterInfo *TRI) {
4534  EVT FoundVT = MVT::Other;
4535  const TargetRegisterClass *FoundRC = 0;
4536  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
4537       E = TRI->regclass_end(); RCI != E; ++RCI) {
4538    EVT ThisVT = MVT::Other;
4539
4540    const TargetRegisterClass *RC = *RCI;
4541    // If none of the the value types for this register class are valid, we
4542    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
4543    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
4544         I != E; ++I) {
4545      if (TLI.isTypeLegal(*I)) {
4546        // If we have already found this register in a different register class,
4547        // choose the one with the largest VT specified.  For example, on
4548        // PowerPC, we favor f64 register classes over f32.
4549        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
4550          ThisVT = *I;
4551          break;
4552        }
4553      }
4554    }
4555
4556    if (ThisVT == MVT::Other) continue;
4557
4558    // NOTE: This isn't ideal.  In particular, this might allocate the
4559    // frame pointer in functions that need it (due to them not being taken
4560    // out of allocation, because a variable sized allocation hasn't been seen
4561    // yet).  This is a slight code pessimization, but should still work.
4562    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
4563         E = RC->allocation_order_end(MF); I != E; ++I)
4564      if (*I == Reg) {
4565        // We found a matching register class.  Keep looking at others in case
4566        // we find one with larger registers that this physreg is also in.
4567        FoundRC = RC;
4568        FoundVT = ThisVT;
4569        break;
4570      }
4571  }
4572  return FoundRC;
4573}
4574
4575
4576namespace llvm {
4577/// AsmOperandInfo - This contains information for each constraint that we are
4578/// lowering.
4579class VISIBILITY_HIDDEN SDISelAsmOperandInfo :
4580    public TargetLowering::AsmOperandInfo {
4581public:
4582  /// CallOperand - If this is the result output operand or a clobber
4583  /// this is null, otherwise it is the incoming operand to the CallInst.
4584  /// This gets modified as the asm is processed.
4585  SDValue CallOperand;
4586
4587  /// AssignedRegs - If this is a register or register class operand, this
4588  /// contains the set of register corresponding to the operand.
4589  RegsForValue AssignedRegs;
4590
4591  explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
4592    : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
4593  }
4594
4595  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
4596  /// busy in OutputRegs/InputRegs.
4597  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
4598                         std::set<unsigned> &OutputRegs,
4599                         std::set<unsigned> &InputRegs,
4600                         const TargetRegisterInfo &TRI) const {
4601    if (isOutReg) {
4602      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
4603        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
4604    }
4605    if (isInReg) {
4606      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
4607        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
4608    }
4609  }
4610
4611  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
4612  /// corresponds to.  If there is no Value* for this operand, it returns
4613  /// MVT::Other.
4614  EVT getCallOperandValEVT(LLVMContext &Context,
4615                           const TargetLowering &TLI,
4616                           const TargetData *TD) const {
4617    if (CallOperandVal == 0) return MVT::Other;
4618
4619    if (isa<BasicBlock>(CallOperandVal))
4620      return TLI.getPointerTy();
4621
4622    const llvm::Type *OpTy = CallOperandVal->getType();
4623
4624    // If this is an indirect operand, the operand is a pointer to the
4625    // accessed type.
4626    if (isIndirect)
4627      OpTy = cast<PointerType>(OpTy)->getElementType();
4628
4629    // If OpTy is not a single value, it may be a struct/union that we
4630    // can tile with integers.
4631    if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4632      unsigned BitSize = TD->getTypeSizeInBits(OpTy);
4633      switch (BitSize) {
4634      default: break;
4635      case 1:
4636      case 8:
4637      case 16:
4638      case 32:
4639      case 64:
4640      case 128:
4641        OpTy = IntegerType::get(Context, BitSize);
4642        break;
4643      }
4644    }
4645
4646    return TLI.getValueType(OpTy, true);
4647  }
4648
4649private:
4650  /// MarkRegAndAliases - Mark the specified register and all aliases in the
4651  /// specified set.
4652  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
4653                                const TargetRegisterInfo &TRI) {
4654    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
4655    Regs.insert(Reg);
4656    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
4657      for (; *Aliases; ++Aliases)
4658        Regs.insert(*Aliases);
4659  }
4660};
4661} // end llvm namespace.
4662
4663
4664/// GetRegistersForValue - Assign registers (virtual or physical) for the
4665/// specified operand.  We prefer to assign virtual registers, to allow the
4666/// register allocator handle the assignment process.  However, if the asm uses
4667/// features that we can't model on machineinstrs, we have SDISel do the
4668/// allocation.  This produces generally horrible, but correct, code.
4669///
4670///   OpInfo describes the operand.
4671///   Input and OutputRegs are the set of already allocated physical registers.
4672///
4673void SelectionDAGBuilder::
4674GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
4675                     std::set<unsigned> &OutputRegs,
4676                     std::set<unsigned> &InputRegs) {
4677  LLVMContext &Context = FuncInfo.Fn->getContext();
4678
4679  // Compute whether this value requires an input register, an output register,
4680  // or both.
4681  bool isOutReg = false;
4682  bool isInReg = false;
4683  switch (OpInfo.Type) {
4684  case InlineAsm::isOutput:
4685    isOutReg = true;
4686
4687    // If there is an input constraint that matches this, we need to reserve
4688    // the input register so no other inputs allocate to it.
4689    isInReg = OpInfo.hasMatchingInput();
4690    break;
4691  case InlineAsm::isInput:
4692    isInReg = true;
4693    isOutReg = false;
4694    break;
4695  case InlineAsm::isClobber:
4696    isOutReg = true;
4697    isInReg = true;
4698    break;
4699  }
4700
4701
4702  MachineFunction &MF = DAG.getMachineFunction();
4703  SmallVector<unsigned, 4> Regs;
4704
4705  // If this is a constraint for a single physreg, or a constraint for a
4706  // register class, find it.
4707  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
4708    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
4709                                     OpInfo.ConstraintVT);
4710
4711  unsigned NumRegs = 1;
4712  if (OpInfo.ConstraintVT != MVT::Other) {
4713    // If this is a FP input in an integer register (or visa versa) insert a bit
4714    // cast of the input value.  More generally, handle any case where the input
4715    // value disagrees with the register class we plan to stick this in.
4716    if (OpInfo.Type == InlineAsm::isInput &&
4717        PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
4718      // Try to convert to the first EVT that the reg class contains.  If the
4719      // types are identical size, use a bitcast to convert (e.g. two differing
4720      // vector types).
4721      EVT RegVT = *PhysReg.second->vt_begin();
4722      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
4723        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
4724                                         RegVT, OpInfo.CallOperand);
4725        OpInfo.ConstraintVT = RegVT;
4726      } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
4727        // If the input is a FP value and we want it in FP registers, do a
4728        // bitcast to the corresponding integer type.  This turns an f64 value
4729        // into i64, which can be passed with two i32 values on a 32-bit
4730        // machine.
4731        RegVT = EVT::getIntegerVT(Context,
4732                                  OpInfo.ConstraintVT.getSizeInBits());
4733        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
4734                                         RegVT, OpInfo.CallOperand);
4735        OpInfo.ConstraintVT = RegVT;
4736      }
4737    }
4738
4739    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
4740  }
4741
4742  EVT RegVT;
4743  EVT ValueVT = OpInfo.ConstraintVT;
4744
4745  // If this is a constraint for a specific physical register, like {r17},
4746  // assign it now.
4747  if (unsigned AssignedReg = PhysReg.first) {
4748    const TargetRegisterClass *RC = PhysReg.second;
4749    if (OpInfo.ConstraintVT == MVT::Other)
4750      ValueVT = *RC->vt_begin();
4751
4752    // Get the actual register value type.  This is important, because the user
4753    // may have asked for (e.g.) the AX register in i32 type.  We need to
4754    // remember that AX is actually i16 to get the right extension.
4755    RegVT = *RC->vt_begin();
4756
4757    // This is a explicit reference to a physical register.
4758    Regs.push_back(AssignedReg);
4759
4760    // If this is an expanded reference, add the rest of the regs to Regs.
4761    if (NumRegs != 1) {
4762      TargetRegisterClass::iterator I = RC->begin();
4763      for (; *I != AssignedReg; ++I)
4764        assert(I != RC->end() && "Didn't find reg!");
4765
4766      // Already added the first reg.
4767      --NumRegs; ++I;
4768      for (; NumRegs; --NumRegs, ++I) {
4769        assert(I != RC->end() && "Ran out of registers to allocate!");
4770        Regs.push_back(*I);
4771      }
4772    }
4773    OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
4774    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
4775    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
4776    return;
4777  }
4778
4779  // Otherwise, if this was a reference to an LLVM register class, create vregs
4780  // for this reference.
4781  if (const TargetRegisterClass *RC = PhysReg.second) {
4782    RegVT = *RC->vt_begin();
4783    if (OpInfo.ConstraintVT == MVT::Other)
4784      ValueVT = RegVT;
4785
4786    // Create the appropriate number of virtual registers.
4787    MachineRegisterInfo &RegInfo = MF.getRegInfo();
4788    for (; NumRegs; --NumRegs)
4789      Regs.push_back(RegInfo.createVirtualRegister(RC));
4790
4791    OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT);
4792    return;
4793  }
4794
4795  // This is a reference to a register class that doesn't directly correspond
4796  // to an LLVM register class.  Allocate NumRegs consecutive, available,
4797  // registers from the class.
4798  std::vector<unsigned> RegClassRegs
4799    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
4800                                            OpInfo.ConstraintVT);
4801
4802  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
4803  unsigned NumAllocated = 0;
4804  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
4805    unsigned Reg = RegClassRegs[i];
4806    // See if this register is available.
4807    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
4808        (isInReg  && InputRegs.count(Reg))) {    // Already used.
4809      // Make sure we find consecutive registers.
4810      NumAllocated = 0;
4811      continue;
4812    }
4813
4814    // Check to see if this register is allocatable (i.e. don't give out the
4815    // stack pointer).
4816    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
4817    if (!RC) {        // Couldn't allocate this register.
4818      // Reset NumAllocated to make sure we return consecutive registers.
4819      NumAllocated = 0;
4820      continue;
4821    }
4822
4823    // Okay, this register is good, we can use it.
4824    ++NumAllocated;
4825
4826    // If we allocated enough consecutive registers, succeed.
4827    if (NumAllocated == NumRegs) {
4828      unsigned RegStart = (i-NumAllocated)+1;
4829      unsigned RegEnd   = i+1;
4830      // Mark all of the allocated registers used.
4831      for (unsigned i = RegStart; i != RegEnd; ++i)
4832        Regs.push_back(RegClassRegs[i]);
4833
4834      OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(),
4835                                         OpInfo.ConstraintVT);
4836      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
4837      return;
4838    }
4839  }
4840
4841  // Otherwise, we couldn't allocate enough registers for this.
4842}
4843
4844/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
4845/// processed uses a memory 'm' constraint.
4846static bool
4847hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
4848                          const TargetLowering &TLI) {
4849  for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
4850    InlineAsm::ConstraintInfo &CI = CInfos[i];
4851    for (unsigned j = 0, ee = CI.Codes.size(); j != ee; ++j) {
4852      TargetLowering::ConstraintType CType = TLI.getConstraintType(CI.Codes[j]);
4853      if (CType == TargetLowering::C_Memory)
4854        return true;
4855    }
4856
4857    // Indirect operand accesses access memory.
4858    if (CI.isIndirect)
4859      return true;
4860  }
4861
4862  return false;
4863}
4864
4865/// visitInlineAsm - Handle a call to an InlineAsm object.
4866///
4867void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
4868  InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
4869
4870  /// ConstraintOperands - Information about all of the constraints.
4871  std::vector<SDISelAsmOperandInfo> ConstraintOperands;
4872
4873  std::set<unsigned> OutputRegs, InputRegs;
4874
4875  // Do a prepass over the constraints, canonicalizing them, and building up the
4876  // ConstraintOperands list.
4877  std::vector<InlineAsm::ConstraintInfo>
4878    ConstraintInfos = IA->ParseConstraints();
4879
4880  bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
4881
4882  SDValue Chain, Flag;
4883
4884  // We won't need to flush pending loads if this asm doesn't touch
4885  // memory and is nonvolatile.
4886  if (hasMemory || IA->hasSideEffects())
4887    Chain = getRoot();
4888  else
4889    Chain = DAG.getRoot();
4890
4891  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
4892  unsigned ResNo = 0;   // ResNo - The result number of the next output.
4893  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
4894    ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
4895    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
4896
4897    EVT OpVT = MVT::Other;
4898
4899    // Compute the value type for each operand.
4900    switch (OpInfo.Type) {
4901    case InlineAsm::isOutput:
4902      // Indirect outputs just consume an argument.
4903      if (OpInfo.isIndirect) {
4904        OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
4905        break;
4906      }
4907
4908      // The return value of the call is this value.  As such, there is no
4909      // corresponding argument.
4910      assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
4911             "Bad inline asm!");
4912      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
4913        OpVT = TLI.getValueType(STy->getElementType(ResNo));
4914      } else {
4915        assert(ResNo == 0 && "Asm only has one result!");
4916        OpVT = TLI.getValueType(CS.getType());
4917      }
4918      ++ResNo;
4919      break;
4920    case InlineAsm::isInput:
4921      OpInfo.CallOperandVal = CS.getArgument(ArgNo++);
4922      break;
4923    case InlineAsm::isClobber:
4924      // Nothing to do.
4925      break;
4926    }
4927
4928    // If this is an input or an indirect output, process the call argument.
4929    // BasicBlocks are labels, currently appearing only in asm's.
4930    if (OpInfo.CallOperandVal) {
4931      // Strip bitcasts, if any.  This mostly comes up for functions.
4932      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
4933
4934      if (BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
4935        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
4936      } else {
4937        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
4938      }
4939
4940      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
4941    }
4942
4943    OpInfo.ConstraintVT = OpVT;
4944  }
4945
4946  // Second pass over the constraints: compute which constraint option to use
4947  // and assign registers to constraints that want a specific physreg.
4948  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
4949    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
4950
4951    // If this is an output operand with a matching input operand, look up the
4952    // matching input. If their types mismatch, e.g. one is an integer, the
4953    // other is floating point, or their sizes are different, flag it as an
4954    // error.
4955    if (OpInfo.hasMatchingInput()) {
4956      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
4957      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
4958        if ((OpInfo.ConstraintVT.isInteger() !=
4959             Input.ConstraintVT.isInteger()) ||
4960            (OpInfo.ConstraintVT.getSizeInBits() !=
4961             Input.ConstraintVT.getSizeInBits())) {
4962          llvm_report_error("Unsupported asm: input constraint"
4963                            " with a matching output constraint of incompatible"
4964                            " type!");
4965        }
4966        Input.ConstraintVT = OpInfo.ConstraintVT;
4967      }
4968    }
4969
4970    // Compute the constraint code and ConstraintType to use.
4971    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG);
4972
4973    // If this is a memory input, and if the operand is not indirect, do what we
4974    // need to to provide an address for the memory input.
4975    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
4976        !OpInfo.isIndirect) {
4977      assert(OpInfo.Type == InlineAsm::isInput &&
4978             "Can only indirectify direct input operands!");
4979
4980      // Memory operands really want the address of the value.  If we don't have
4981      // an indirect input, put it in the constpool if we can, otherwise spill
4982      // it to a stack slot.
4983
4984      // If the operand is a float, integer, or vector constant, spill to a
4985      // constant pool entry to get its address.
4986      Value *OpVal = OpInfo.CallOperandVal;
4987      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
4988          isa<ConstantVector>(OpVal)) {
4989        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
4990                                                 TLI.getPointerTy());
4991      } else {
4992        // Otherwise, create a stack slot and emit a store to it before the
4993        // asm.
4994        const Type *Ty = OpVal->getType();
4995        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
4996        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
4997        MachineFunction &MF = DAG.getMachineFunction();
4998        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
4999        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
5000        Chain = DAG.getStore(Chain, getCurDebugLoc(),
5001                             OpInfo.CallOperand, StackSlot, NULL, 0);
5002        OpInfo.CallOperand = StackSlot;
5003      }
5004
5005      // There is no longer a Value* corresponding to this operand.
5006      OpInfo.CallOperandVal = 0;
5007      // It is now an indirect operand.
5008      OpInfo.isIndirect = true;
5009    }
5010
5011    // If this constraint is for a specific register, allocate it before
5012    // anything else.
5013    if (OpInfo.ConstraintType == TargetLowering::C_Register)
5014      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
5015  }
5016  ConstraintInfos.clear();
5017
5018
5019  // Second pass - Loop over all of the operands, assigning virtual or physregs
5020  // to register class operands.
5021  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
5022    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5023
5024    // C_Register operands have already been allocated, Other/Memory don't need
5025    // to be.
5026    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
5027      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
5028  }
5029
5030  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
5031  std::vector<SDValue> AsmNodeOperands;
5032  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
5033  AsmNodeOperands.push_back(
5034          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), MVT::Other));
5035
5036
5037  // Loop over all of the inputs, copying the operand values into the
5038  // appropriate registers and processing the output regs.
5039  RegsForValue RetValRegs;
5040
5041  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
5042  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
5043
5044  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
5045    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5046
5047    switch (OpInfo.Type) {
5048    case InlineAsm::isOutput: {
5049      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
5050          OpInfo.ConstraintType != TargetLowering::C_Register) {
5051        // Memory output, or 'other' output (e.g. 'X' constraint).
5052        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
5053
5054        // Add information to the INLINEASM node to know about this output.
5055        unsigned ResOpType = 4/*MEM*/ | (1<<3);
5056        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5057                                                        TLI.getPointerTy()));
5058        AsmNodeOperands.push_back(OpInfo.CallOperand);
5059        break;
5060      }
5061
5062      // Otherwise, this is a register or register class output.
5063
5064      // Copy the output from the appropriate register.  Find a register that
5065      // we can use.
5066      if (OpInfo.AssignedRegs.Regs.empty()) {
5067        llvm_report_error("Couldn't allocate output reg for"
5068                          " constraint '" + OpInfo.ConstraintCode + "'!");
5069      }
5070
5071      // If this is an indirect operand, store through the pointer after the
5072      // asm.
5073      if (OpInfo.isIndirect) {
5074        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
5075                                                      OpInfo.CallOperandVal));
5076      } else {
5077        // This is the result value of the call.
5078        assert(CS.getType() != Type::getVoidTy(*DAG.getContext()) &&
5079               "Bad inline asm!");
5080        // Concatenate this output onto the outputs list.
5081        RetValRegs.append(OpInfo.AssignedRegs);
5082      }
5083
5084      // Add information to the INLINEASM node to know that this register is
5085      // set.
5086      OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
5087                                               6 /* EARLYCLOBBER REGDEF */ :
5088                                               2 /* REGDEF */ ,
5089                                               false,
5090                                               0,
5091                                               DAG, AsmNodeOperands);
5092      break;
5093    }
5094    case InlineAsm::isInput: {
5095      SDValue InOperandVal = OpInfo.CallOperand;
5096
5097      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
5098        // If this is required to match an output register we have already set,
5099        // just use its register.
5100        unsigned OperandNo = OpInfo.getMatchedOperand();
5101
5102        // Scan until we find the definition we already emitted of this operand.
5103        // When we find it, create a RegsForValue operand.
5104        unsigned CurOp = 2;  // The first operand.
5105        for (; OperandNo; --OperandNo) {
5106          // Advance to the next operand.
5107          unsigned OpFlag =
5108            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
5109          assert(((OpFlag & 7) == 2 /*REGDEF*/ ||
5110                  (OpFlag & 7) == 6 /*EARLYCLOBBER REGDEF*/ ||
5111                  (OpFlag & 7) == 4 /*MEM*/) &&
5112                 "Skipped past definitions?");
5113          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
5114        }
5115
5116        unsigned OpFlag =
5117          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
5118        if ((OpFlag & 7) == 2 /*REGDEF*/
5119            || (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
5120          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
5121          if (OpInfo.isIndirect) {
5122            llvm_report_error("Don't know how to handle tied indirect "
5123                              "register inputs yet!");
5124          }
5125          RegsForValue MatchedRegs;
5126          MatchedRegs.TLI = &TLI;
5127          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
5128          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
5129          MatchedRegs.RegVTs.push_back(RegVT);
5130          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
5131          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
5132               i != e; ++i)
5133            MatchedRegs.Regs.
5134              push_back(RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
5135
5136          // Use the produced MatchedRegs object to
5137          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
5138                                    Chain, &Flag);
5139          MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
5140                                           true, OpInfo.getMatchedOperand(),
5141                                           DAG, AsmNodeOperands);
5142          break;
5143        } else {
5144          assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
5145          assert((InlineAsm::getNumOperandRegisters(OpFlag)) == 1 &&
5146                 "Unexpected number of operands");
5147          // Add information to the INLINEASM node to know about this input.
5148          // See InlineAsm.h isUseOperandTiedToDef.
5149          OpFlag |= 0x80000000 | (OpInfo.getMatchedOperand() << 16);
5150          AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
5151                                                          TLI.getPointerTy()));
5152          AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
5153          break;
5154        }
5155      }
5156
5157      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
5158        assert(!OpInfo.isIndirect &&
5159               "Don't know how to handle indirect other inputs yet!");
5160
5161        std::vector<SDValue> Ops;
5162        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
5163                                         hasMemory, Ops, DAG);
5164        if (Ops.empty()) {
5165          llvm_report_error("Invalid operand for inline asm"
5166                            " constraint '" + OpInfo.ConstraintCode + "'!");
5167        }
5168
5169        // Add information to the INLINEASM node to know about this input.
5170        unsigned ResOpType = 3 /*IMM*/ | (Ops.size() << 3);
5171        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5172                                                        TLI.getPointerTy()));
5173        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
5174        break;
5175      } else if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
5176        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
5177        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
5178               "Memory operands expect pointer values");
5179
5180        // Add information to the INLINEASM node to know about this input.
5181        unsigned ResOpType = 4/*MEM*/ | (1<<3);
5182        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5183                                                        TLI.getPointerTy()));
5184        AsmNodeOperands.push_back(InOperandVal);
5185        break;
5186      }
5187
5188      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
5189              OpInfo.ConstraintType == TargetLowering::C_Register) &&
5190             "Unknown constraint type!");
5191      assert(!OpInfo.isIndirect &&
5192             "Don't know how to handle indirect register inputs yet!");
5193
5194      // Copy the input into the appropriate registers.
5195      if (OpInfo.AssignedRegs.Regs.empty()) {
5196        llvm_report_error("Couldn't allocate input reg for"
5197                          " constraint '"+ OpInfo.ConstraintCode +"'!");
5198      }
5199
5200      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
5201                                        Chain, &Flag);
5202
5203      OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
5204                                               DAG, AsmNodeOperands);
5205      break;
5206    }
5207    case InlineAsm::isClobber: {
5208      // Add the clobbered value to the operand list, so that the register
5209      // allocator is aware that the physreg got clobbered.
5210      if (!OpInfo.AssignedRegs.Regs.empty())
5211        OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
5212                                                 false, 0, DAG,AsmNodeOperands);
5213      break;
5214    }
5215    }
5216  }
5217
5218  // Finish up input operands.
5219  AsmNodeOperands[0] = Chain;
5220  if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
5221
5222  Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
5223                      DAG.getVTList(MVT::Other, MVT::Flag),
5224                      &AsmNodeOperands[0], AsmNodeOperands.size());
5225  Flag = Chain.getValue(1);
5226
5227  // If this asm returns a register value, copy the result from that register
5228  // and set it as the value of the call.
5229  if (!RetValRegs.Regs.empty()) {
5230    SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
5231                                             Chain, &Flag);
5232
5233    // FIXME: Why don't we do this for inline asms with MRVs?
5234    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
5235      EVT ResultType = TLI.getValueType(CS.getType());
5236
5237      // If any of the results of the inline asm is a vector, it may have the
5238      // wrong width/num elts.  This can happen for register classes that can
5239      // contain multiple different value types.  The preg or vreg allocated may
5240      // not have the same VT as was expected.  Convert it to the right type
5241      // with bit_convert.
5242      if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
5243        Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
5244                          ResultType, Val);
5245
5246      } else if (ResultType != Val.getValueType() &&
5247                 ResultType.isInteger() && Val.getValueType().isInteger()) {
5248        // If a result value was tied to an input value, the computed result may
5249        // have a wider width than the expected result.  Extract the relevant
5250        // portion.
5251        Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
5252      }
5253
5254      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
5255    }
5256
5257    setValue(CS.getInstruction(), Val);
5258    // Don't need to use this as a chain in this case.
5259    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
5260      return;
5261  }
5262
5263  std::vector<std::pair<SDValue, Value*> > StoresToEmit;
5264
5265  // Process indirect outputs, first output all of the flagged copies out of
5266  // physregs.
5267  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
5268    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
5269    Value *Ptr = IndirectStoresToEmit[i].second;
5270    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
5271                                             Chain, &Flag);
5272    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
5273
5274  }
5275
5276  // Emit the non-flagged stores from the physregs.
5277  SmallVector<SDValue, 8> OutChains;
5278  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i)
5279    OutChains.push_back(DAG.getStore(Chain, getCurDebugLoc(),
5280                                    StoresToEmit[i].first,
5281                                    getValue(StoresToEmit[i].second),
5282                                    StoresToEmit[i].second, 0));
5283  if (!OutChains.empty())
5284    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
5285                        &OutChains[0], OutChains.size());
5286  DAG.setRoot(Chain);
5287}
5288
5289void SelectionDAGBuilder::visitVAStart(CallInst &I) {
5290  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
5291                          MVT::Other, getRoot(),
5292                          getValue(I.getOperand(1)),
5293                          DAG.getSrcValue(I.getOperand(1))));
5294}
5295
5296void SelectionDAGBuilder::visitVAArg(VAArgInst &I) {
5297  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
5298                           getRoot(), getValue(I.getOperand(0)),
5299                           DAG.getSrcValue(I.getOperand(0)));
5300  setValue(&I, V);
5301  DAG.setRoot(V.getValue(1));
5302}
5303
5304void SelectionDAGBuilder::visitVAEnd(CallInst &I) {
5305  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
5306                          MVT::Other, getRoot(),
5307                          getValue(I.getOperand(1)),
5308                          DAG.getSrcValue(I.getOperand(1))));
5309}
5310
5311void SelectionDAGBuilder::visitVACopy(CallInst &I) {
5312  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
5313                          MVT::Other, getRoot(),
5314                          getValue(I.getOperand(1)),
5315                          getValue(I.getOperand(2)),
5316                          DAG.getSrcValue(I.getOperand(1)),
5317                          DAG.getSrcValue(I.getOperand(2))));
5318}
5319
5320/// TargetLowering::LowerCallTo - This is the default LowerCallTo
5321/// implementation, which just calls LowerCall.
5322/// FIXME: When all targets are
5323/// migrated to using LowerCall, this hook should be integrated into SDISel.
5324std::pair<SDValue, SDValue>
5325TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
5326                            bool RetSExt, bool RetZExt, bool isVarArg,
5327                            bool isInreg, unsigned NumFixedArgs,
5328                            CallingConv::ID CallConv, bool isTailCall,
5329                            bool isReturnValueUsed,
5330                            SDValue Callee,
5331                            ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
5332
5333  assert((!isTailCall || PerformTailCallOpt) &&
5334         "isTailCall set when tail-call optimizations are disabled!");
5335
5336  // Handle all of the outgoing arguments.
5337  SmallVector<ISD::OutputArg, 32> Outs;
5338  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
5339    SmallVector<EVT, 4> ValueVTs;
5340    ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
5341    for (unsigned Value = 0, NumValues = ValueVTs.size();
5342         Value != NumValues; ++Value) {
5343      EVT VT = ValueVTs[Value];
5344      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
5345      SDValue Op = SDValue(Args[i].Node.getNode(),
5346                           Args[i].Node.getResNo() + Value);
5347      ISD::ArgFlagsTy Flags;
5348      unsigned OriginalAlignment =
5349        getTargetData()->getABITypeAlignment(ArgTy);
5350
5351      if (Args[i].isZExt)
5352        Flags.setZExt();
5353      if (Args[i].isSExt)
5354        Flags.setSExt();
5355      if (Args[i].isInReg)
5356        Flags.setInReg();
5357      if (Args[i].isSRet)
5358        Flags.setSRet();
5359      if (Args[i].isByVal) {
5360        Flags.setByVal();
5361        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
5362        const Type *ElementTy = Ty->getElementType();
5363        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
5364        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
5365        // For ByVal, alignment should come from FE.  BE will guess if this
5366        // info is not there but there are cases it cannot get right.
5367        if (Args[i].Alignment)
5368          FrameAlign = Args[i].Alignment;
5369        Flags.setByValAlign(FrameAlign);
5370        Flags.setByValSize(FrameSize);
5371      }
5372      if (Args[i].isNest)
5373        Flags.setNest();
5374      Flags.setOrigAlign(OriginalAlignment);
5375
5376      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
5377      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
5378      SmallVector<SDValue, 4> Parts(NumParts);
5379      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
5380
5381      if (Args[i].isSExt)
5382        ExtendKind = ISD::SIGN_EXTEND;
5383      else if (Args[i].isZExt)
5384        ExtendKind = ISD::ZERO_EXTEND;
5385
5386      getCopyToParts(DAG, dl, Op, &Parts[0], NumParts, PartVT, ExtendKind);
5387
5388      for (unsigned j = 0; j != NumParts; ++j) {
5389        // if it isn't first piece, alignment must be 1
5390        ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs);
5391        if (NumParts > 1 && j == 0)
5392          MyFlags.Flags.setSplit();
5393        else if (j != 0)
5394          MyFlags.Flags.setOrigAlign(1);
5395
5396        Outs.push_back(MyFlags);
5397      }
5398    }
5399  }
5400
5401  // Handle the incoming return values from the call.
5402  SmallVector<ISD::InputArg, 32> Ins;
5403  SmallVector<EVT, 4> RetTys;
5404  ComputeValueVTs(*this, RetTy, RetTys);
5405  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
5406    EVT VT = RetTys[I];
5407    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
5408    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
5409    for (unsigned i = 0; i != NumRegs; ++i) {
5410      ISD::InputArg MyFlags;
5411      MyFlags.VT = RegisterVT;
5412      MyFlags.Used = isReturnValueUsed;
5413      if (RetSExt)
5414        MyFlags.Flags.setSExt();
5415      if (RetZExt)
5416        MyFlags.Flags.setZExt();
5417      if (isInreg)
5418        MyFlags.Flags.setInReg();
5419      Ins.push_back(MyFlags);
5420    }
5421  }
5422
5423  // Check if target-dependent constraints permit a tail call here.
5424  // Target-independent constraints should be checked by the caller.
5425  if (isTailCall &&
5426      !IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, Ins, DAG))
5427    isTailCall = false;
5428
5429  SmallVector<SDValue, 4> InVals;
5430  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
5431                    Outs, Ins, dl, DAG, InVals);
5432
5433  // Verify that the target's LowerCall behaved as expected.
5434  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
5435         "LowerCall didn't return a valid chain!");
5436  assert((!isTailCall || InVals.empty()) &&
5437         "LowerCall emitted a return value for a tail call!");
5438  assert((isTailCall || InVals.size() == Ins.size()) &&
5439         "LowerCall didn't emit the correct number of values!");
5440  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5441          assert(InVals[i].getNode() &&
5442                 "LowerCall emitted a null value!");
5443          assert(Ins[i].VT == InVals[i].getValueType() &&
5444                 "LowerCall emitted a value with the wrong type!");
5445        });
5446
5447  // For a tail call, the return value is merely live-out and there aren't
5448  // any nodes in the DAG representing it. Return a special value to
5449  // indicate that a tail call has been emitted and no more Instructions
5450  // should be processed in the current block.
5451  if (isTailCall) {
5452    DAG.setRoot(Chain);
5453    return std::make_pair(SDValue(), SDValue());
5454  }
5455
5456  // Collect the legal value parts into potentially illegal values
5457  // that correspond to the original function's return values.
5458  ISD::NodeType AssertOp = ISD::DELETED_NODE;
5459  if (RetSExt)
5460    AssertOp = ISD::AssertSext;
5461  else if (RetZExt)
5462    AssertOp = ISD::AssertZext;
5463  SmallVector<SDValue, 4> ReturnValues;
5464  unsigned CurReg = 0;
5465  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
5466    EVT VT = RetTys[I];
5467    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
5468    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
5469
5470    SDValue ReturnValue =
5471      getCopyFromParts(DAG, dl, &InVals[CurReg], NumRegs, RegisterVT, VT,
5472                       AssertOp);
5473    ReturnValues.push_back(ReturnValue);
5474    CurReg += NumRegs;
5475  }
5476
5477  // For a function returning void, there is no return value. We can't create
5478  // such a node, so we just return a null return value in that case. In
5479  // that case, nothing will actualy look at the value.
5480  if (ReturnValues.empty())
5481    return std::make_pair(SDValue(), Chain);
5482
5483  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
5484                            DAG.getVTList(&RetTys[0], RetTys.size()),
5485                            &ReturnValues[0], ReturnValues.size());
5486
5487  return std::make_pair(Res, Chain);
5488}
5489
5490void TargetLowering::LowerOperationWrapper(SDNode *N,
5491                                           SmallVectorImpl<SDValue> &Results,
5492                                           SelectionDAG &DAG) {
5493  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
5494  if (Res.getNode())
5495    Results.push_back(Res);
5496}
5497
5498SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
5499  llvm_unreachable("LowerOperation not implemented for this target!");
5500  return SDValue();
5501}
5502
5503
5504void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
5505  SDValue Op = getValue(V);
5506  assert((Op.getOpcode() != ISD::CopyFromReg ||
5507          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
5508         "Copy from a reg to the same reg!");
5509  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
5510
5511  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
5512  SDValue Chain = DAG.getEntryNode();
5513  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
5514  PendingExports.push_back(Chain);
5515}
5516
5517#include "llvm/CodeGen/SelectionDAGISel.h"
5518
5519void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
5520  // If this is the entry block, emit arguments.
5521  Function &F = *LLVMBB->getParent();
5522  SelectionDAG &DAG = SDB->DAG;
5523  SDValue OldRoot = DAG.getRoot();
5524  DebugLoc dl = SDB->getCurDebugLoc();
5525  const TargetData *TD = TLI.getTargetData();
5526  SmallVector<ISD::InputArg, 16> Ins;
5527
5528  // Check whether the function can return without sret-demotion.
5529  SmallVector<EVT, 4> OutVTs;
5530  SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
5531  getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
5532                OutVTs, OutsFlags, TLI);
5533  FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
5534
5535  FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(),
5536    OutVTs, OutsFlags, DAG);
5537  if (!FLI.CanLowerReturn) {
5538    // Put in an sret pointer parameter before all the other parameters.
5539    SmallVector<EVT, 1> ValueVTs;
5540    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
5541
5542    // NOTE: Assuming that a pointer will never break down to more than one VT
5543    // or one register.
5544    ISD::ArgFlagsTy Flags;
5545    Flags.setSRet();
5546    EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
5547    ISD::InputArg RetArg(Flags, RegisterVT, true);
5548    Ins.push_back(RetArg);
5549  }
5550
5551  // Set up the incoming argument description vector.
5552  unsigned Idx = 1;
5553  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
5554       I != E; ++I, ++Idx) {
5555    SmallVector<EVT, 4> ValueVTs;
5556    ComputeValueVTs(TLI, I->getType(), ValueVTs);
5557    bool isArgValueUsed = !I->use_empty();
5558    for (unsigned Value = 0, NumValues = ValueVTs.size();
5559         Value != NumValues; ++Value) {
5560      EVT VT = ValueVTs[Value];
5561      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
5562      ISD::ArgFlagsTy Flags;
5563      unsigned OriginalAlignment =
5564        TD->getABITypeAlignment(ArgTy);
5565
5566      if (F.paramHasAttr(Idx, Attribute::ZExt))
5567        Flags.setZExt();
5568      if (F.paramHasAttr(Idx, Attribute::SExt))
5569        Flags.setSExt();
5570      if (F.paramHasAttr(Idx, Attribute::InReg))
5571        Flags.setInReg();
5572      if (F.paramHasAttr(Idx, Attribute::StructRet))
5573        Flags.setSRet();
5574      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
5575        Flags.setByVal();
5576        const PointerType *Ty = cast<PointerType>(I->getType());
5577        const Type *ElementTy = Ty->getElementType();
5578        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
5579        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
5580        // For ByVal, alignment should be passed from FE.  BE will guess if
5581        // this info is not there but there are cases it cannot get right.
5582        if (F.getParamAlignment(Idx))
5583          FrameAlign = F.getParamAlignment(Idx);
5584        Flags.setByValAlign(FrameAlign);
5585        Flags.setByValSize(FrameSize);
5586      }
5587      if (F.paramHasAttr(Idx, Attribute::Nest))
5588        Flags.setNest();
5589      Flags.setOrigAlign(OriginalAlignment);
5590
5591      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
5592      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
5593      for (unsigned i = 0; i != NumRegs; ++i) {
5594        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
5595        if (NumRegs > 1 && i == 0)
5596          MyFlags.Flags.setSplit();
5597        // if it isn't first piece, alignment must be 1
5598        else if (i > 0)
5599          MyFlags.Flags.setOrigAlign(1);
5600        Ins.push_back(MyFlags);
5601      }
5602    }
5603  }
5604
5605  // Call the target to set up the argument values.
5606  SmallVector<SDValue, 8> InVals;
5607  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
5608                                             F.isVarArg(), Ins,
5609                                             dl, DAG, InVals);
5610
5611  // Verify that the target's LowerFormalArguments behaved as expected.
5612  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
5613         "LowerFormalArguments didn't return a valid chain!");
5614  assert(InVals.size() == Ins.size() &&
5615         "LowerFormalArguments didn't emit the correct number of values!");
5616  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5617          assert(InVals[i].getNode() &&
5618                 "LowerFormalArguments emitted a null value!");
5619          assert(Ins[i].VT == InVals[i].getValueType() &&
5620                 "LowerFormalArguments emitted a value with the wrong type!");
5621        });
5622
5623  // Update the DAG with the new chain value resulting from argument lowering.
5624  DAG.setRoot(NewRoot);
5625
5626  // Set up the argument values.
5627  unsigned i = 0;
5628  Idx = 1;
5629  if (!FLI.CanLowerReturn) {
5630    // Create a virtual register for the sret pointer, and put in a copy
5631    // from the sret argument into it.
5632    SmallVector<EVT, 1> ValueVTs;
5633    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
5634    EVT VT = ValueVTs[0];
5635    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
5636    ISD::NodeType AssertOp = ISD::DELETED_NODE;
5637    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT,
5638                                        VT, AssertOp);
5639
5640    MachineFunction& MF = SDB->DAG.getMachineFunction();
5641    MachineRegisterInfo& RegInfo = MF.getRegInfo();
5642    unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
5643    FLI.DemoteRegister = SRetReg;
5644    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue);
5645    DAG.setRoot(NewRoot);
5646
5647    // i indexes lowered arguments.  Bump it past the hidden sret argument.
5648    // Idx indexes LLVM arguments.  Don't touch it.
5649    ++i;
5650  }
5651  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
5652      ++I, ++Idx) {
5653    SmallVector<SDValue, 4> ArgValues;
5654    SmallVector<EVT, 4> ValueVTs;
5655    ComputeValueVTs(TLI, I->getType(), ValueVTs);
5656    unsigned NumValues = ValueVTs.size();
5657    for (unsigned Value = 0; Value != NumValues; ++Value) {
5658      EVT VT = ValueVTs[Value];
5659      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
5660      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
5661
5662      if (!I->use_empty()) {
5663        ISD::NodeType AssertOp = ISD::DELETED_NODE;
5664        if (F.paramHasAttr(Idx, Attribute::SExt))
5665          AssertOp = ISD::AssertSext;
5666        else if (F.paramHasAttr(Idx, Attribute::ZExt))
5667          AssertOp = ISD::AssertZext;
5668
5669        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
5670                                             PartVT, VT, AssertOp));
5671      }
5672      i += NumParts;
5673    }
5674    if (!I->use_empty()) {
5675      SDB->setValue(I, DAG.getMergeValues(&ArgValues[0], NumValues,
5676                                          SDB->getCurDebugLoc()));
5677      // If this argument is live outside of the entry block, insert a copy from
5678      // whereever we got it to the vreg that other BB's will reference it as.
5679      SDB->CopyToExportRegsIfNeeded(I);
5680    }
5681  }
5682  assert(i == InVals.size() && "Argument register count mismatch!");
5683
5684  // Finally, if the target has anything special to do, allow it to do so.
5685  // FIXME: this should insert code into the DAG!
5686  EmitFunctionEntryCode(F, SDB->DAG.getMachineFunction());
5687}
5688
5689/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
5690/// ensure constants are generated when needed.  Remember the virtual registers
5691/// that need to be added to the Machine PHI nodes as input.  We cannot just
5692/// directly add them, because expansion might result in multiple MBB's for one
5693/// BB.  As such, the start of the BB might correspond to a different MBB than
5694/// the end.
5695///
5696void
5697SelectionDAGISel::HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB) {
5698  TerminatorInst *TI = LLVMBB->getTerminator();
5699
5700  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
5701
5702  // Check successor nodes' PHI nodes that expect a constant to be available
5703  // from this block.
5704  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
5705    BasicBlock *SuccBB = TI->getSuccessor(succ);
5706    if (!isa<PHINode>(SuccBB->begin())) continue;
5707    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
5708
5709    // If this terminator has multiple identical successors (common for
5710    // switches), only handle each succ once.
5711    if (!SuccsHandled.insert(SuccMBB)) continue;
5712
5713    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
5714    PHINode *PN;
5715
5716    // At this point we know that there is a 1-1 correspondence between LLVM PHI
5717    // nodes and Machine PHI nodes, but the incoming operands have not been
5718    // emitted yet.
5719    for (BasicBlock::iterator I = SuccBB->begin();
5720         (PN = dyn_cast<PHINode>(I)); ++I) {
5721      // Ignore dead phi's.
5722      if (PN->use_empty()) continue;
5723
5724      unsigned Reg;
5725      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
5726
5727      if (Constant *C = dyn_cast<Constant>(PHIOp)) {
5728        unsigned &RegOut = SDB->ConstantsOut[C];
5729        if (RegOut == 0) {
5730          RegOut = FuncInfo->CreateRegForValue(C);
5731          SDB->CopyValueToVirtualRegister(C, RegOut);
5732        }
5733        Reg = RegOut;
5734      } else {
5735        Reg = FuncInfo->ValueMap[PHIOp];
5736        if (Reg == 0) {
5737          assert(isa<AllocaInst>(PHIOp) &&
5738                 FuncInfo->StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
5739                 "Didn't codegen value into a register!??");
5740          Reg = FuncInfo->CreateRegForValue(PHIOp);
5741          SDB->CopyValueToVirtualRegister(PHIOp, Reg);
5742        }
5743      }
5744
5745      // Remember that this register needs to added to the machine PHI node as
5746      // the input for this MBB.
5747      SmallVector<EVT, 4> ValueVTs;
5748      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
5749      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
5750        EVT VT = ValueVTs[vti];
5751        unsigned NumRegisters = TLI.getNumRegisters(*CurDAG->getContext(), VT);
5752        for (unsigned i = 0, e = NumRegisters; i != e; ++i)
5753          SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
5754        Reg += NumRegisters;
5755      }
5756    }
5757  }
5758  SDB->ConstantsOut.clear();
5759}
5760
5761/// This is the Fast-ISel version of HandlePHINodesInSuccessorBlocks. It only
5762/// supports legal types, and it emits MachineInstrs directly instead of
5763/// creating SelectionDAG nodes.
5764///
5765bool
5766SelectionDAGISel::HandlePHINodesInSuccessorBlocksFast(BasicBlock *LLVMBB,
5767                                                      FastISel *F) {
5768  TerminatorInst *TI = LLVMBB->getTerminator();
5769
5770  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
5771  unsigned OrigNumPHINodesToUpdate = SDB->PHINodesToUpdate.size();
5772
5773  // Check successor nodes' PHI nodes that expect a constant to be available
5774  // from this block.
5775  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
5776    BasicBlock *SuccBB = TI->getSuccessor(succ);
5777    if (!isa<PHINode>(SuccBB->begin())) continue;
5778    MachineBasicBlock *SuccMBB = FuncInfo->MBBMap[SuccBB];
5779
5780    // If this terminator has multiple identical successors (common for
5781    // switches), only handle each succ once.
5782    if (!SuccsHandled.insert(SuccMBB)) continue;
5783
5784    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
5785    PHINode *PN;
5786
5787    // At this point we know that there is a 1-1 correspondence between LLVM PHI
5788    // nodes and Machine PHI nodes, but the incoming operands have not been
5789    // emitted yet.
5790    for (BasicBlock::iterator I = SuccBB->begin();
5791         (PN = dyn_cast<PHINode>(I)); ++I) {
5792      // Ignore dead phi's.
5793      if (PN->use_empty()) continue;
5794
5795      // Only handle legal types. Two interesting things to note here. First,
5796      // by bailing out early, we may leave behind some dead instructions,
5797      // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its
5798      // own moves. Second, this check is necessary becuase FastISel doesn't
5799      // use CreateRegForValue to create registers, so it always creates
5800      // exactly one register for each non-void instruction.
5801      EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true);
5802      if (VT == MVT::Other || !TLI.isTypeLegal(VT)) {
5803        // Promote MVT::i1.
5804        if (VT == MVT::i1)
5805          VT = TLI.getTypeToTransformTo(*CurDAG->getContext(), VT);
5806        else {
5807          SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
5808          return false;
5809        }
5810      }
5811
5812      Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
5813
5814      unsigned Reg = F->getRegForValue(PHIOp);
5815      if (Reg == 0) {
5816        SDB->PHINodesToUpdate.resize(OrigNumPHINodesToUpdate);
5817        return false;
5818      }
5819      SDB->PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg));
5820    }
5821  }
5822
5823  return true;
5824}
5825