SelectionDAGBuilder.cpp revision 7da5d3fc657bf434a245d11ad91f22feb86296c5
1//===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements routines for translating from LLVM IR into SelectionDAG IR.
11//
12//===----------------------------------------------------------------------===//
13
14#define DEBUG_TYPE "isel"
15#include "SDNodeDbgValue.h"
16#include "SelectionDAGBuilder.h"
17#include "llvm/ADT/BitVector.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/Analysis/AliasAnalysis.h"
20#include "llvm/Analysis/ConstantFolding.h"
21#include "llvm/Constants.h"
22#include "llvm/CallingConv.h"
23#include "llvm/DerivedTypes.h"
24#include "llvm/Function.h"
25#include "llvm/GlobalVariable.h"
26#include "llvm/InlineAsm.h"
27#include "llvm/Instructions.h"
28#include "llvm/Intrinsics.h"
29#include "llvm/IntrinsicInst.h"
30#include "llvm/LLVMContext.h"
31#include "llvm/Module.h"
32#include "llvm/CodeGen/Analysis.h"
33#include "llvm/CodeGen/FastISel.h"
34#include "llvm/CodeGen/FunctionLoweringInfo.h"
35#include "llvm/CodeGen/GCStrategy.h"
36#include "llvm/CodeGen/GCMetadata.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineFrameInfo.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineJumpTableInfo.h"
41#include "llvm/CodeGen/MachineModuleInfo.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/PseudoSourceValue.h"
44#include "llvm/CodeGen/SelectionDAG.h"
45#include "llvm/Analysis/DebugInfo.h"
46#include "llvm/Target/TargetRegisterInfo.h"
47#include "llvm/Target/TargetData.h"
48#include "llvm/Target/TargetFrameInfo.h"
49#include "llvm/Target/TargetInstrInfo.h"
50#include "llvm/Target/TargetIntrinsicInfo.h"
51#include "llvm/Target/TargetLowering.h"
52#include "llvm/Target/TargetOptions.h"
53#include "llvm/Support/Compiler.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/MathExtras.h"
58#include "llvm/Support/raw_ostream.h"
59#include <algorithm>
60using namespace llvm;
61
62/// LimitFloatPrecision - Generate low-precision inline sequences for
63/// some float libcalls (6, 8 or 12 bits).
64static unsigned LimitFloatPrecision;
65
66static cl::opt<unsigned, true>
67LimitFPPrecision("limit-float-precision",
68                 cl::desc("Generate low-precision inline sequences "
69                          "for some float libcalls"),
70                 cl::location(LimitFloatPrecision),
71                 cl::init(0));
72
73/// getCopyFromParts - Create a value that contains the specified legal parts
74/// combined into the value they represent.  If the parts combine to a type
75/// larger then ValueVT then AssertOp can be used to specify whether the extra
76/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
77/// (ISD::AssertSext).
78static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
79                                const SDValue *Parts,
80                                unsigned NumParts, EVT PartVT, EVT ValueVT,
81                                ISD::NodeType AssertOp = ISD::DELETED_NODE) {
82  assert(NumParts > 0 && "No parts to assemble!");
83  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
84  SDValue Val = Parts[0];
85
86  if (NumParts > 1) {
87    // Assemble the value from multiple parts.
88    if (!ValueVT.isVector() && ValueVT.isInteger()) {
89      unsigned PartBits = PartVT.getSizeInBits();
90      unsigned ValueBits = ValueVT.getSizeInBits();
91
92      // Assemble the power of 2 part.
93      unsigned RoundParts = NumParts & (NumParts - 1) ?
94        1 << Log2_32(NumParts) : NumParts;
95      unsigned RoundBits = PartBits * RoundParts;
96      EVT RoundVT = RoundBits == ValueBits ?
97        ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
98      SDValue Lo, Hi;
99
100      EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
101
102      if (RoundParts > 2) {
103        Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
104                              PartVT, HalfVT);
105        Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
106                              RoundParts / 2, PartVT, HalfVT);
107      } else {
108        Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
109        Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[1]);
110      }
111
112      if (TLI.isBigEndian())
113        std::swap(Lo, Hi);
114
115      Val = DAG.getNode(ISD::BUILD_PAIR, dl, RoundVT, Lo, Hi);
116
117      if (RoundParts < NumParts) {
118        // Assemble the trailing non-power-of-2 part.
119        unsigned OddParts = NumParts - RoundParts;
120        EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
121        Hi = getCopyFromParts(DAG, dl,
122                              Parts + RoundParts, OddParts, PartVT, OddVT);
123
124        // Combine the round and odd parts.
125        Lo = Val;
126        if (TLI.isBigEndian())
127          std::swap(Lo, Hi);
128        EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
129        Hi = DAG.getNode(ISD::ANY_EXTEND, dl, TotalVT, Hi);
130        Hi = DAG.getNode(ISD::SHL, dl, TotalVT, Hi,
131                         DAG.getConstant(Lo.getValueType().getSizeInBits(),
132                                         TLI.getPointerTy()));
133        Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, TotalVT, Lo);
134        Val = DAG.getNode(ISD::OR, dl, TotalVT, Lo, Hi);
135      }
136    } else if (ValueVT.isVector()) {
137      // Handle a multi-element vector.
138      EVT IntermediateVT, RegisterVT;
139      unsigned NumIntermediates;
140      unsigned NumRegs =
141        TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
142                                   NumIntermediates, RegisterVT);
143      assert(NumRegs == NumParts
144             && "Part count doesn't match vector breakdown!");
145      NumParts = NumRegs; // Silence a compiler warning.
146      assert(RegisterVT == PartVT
147             && "Part type doesn't match vector breakdown!");
148      assert(RegisterVT == Parts[0].getValueType() &&
149             "Part type doesn't match part!");
150
151      // Assemble the parts into intermediate operands.
152      SmallVector<SDValue, 8> Ops(NumIntermediates);
153      if (NumIntermediates == NumParts) {
154        // If the register was not expanded, truncate or copy the value,
155        // as appropriate.
156        for (unsigned i = 0; i != NumParts; ++i)
157          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
158                                    PartVT, IntermediateVT);
159      } else if (NumParts > 0) {
160        // If the intermediate type was expanded, build the intermediate
161        // operands from the parts.
162        assert(NumParts % NumIntermediates == 0 &&
163               "Must expand into a divisible number of parts!");
164        unsigned Factor = NumParts / NumIntermediates;
165        for (unsigned i = 0; i != NumIntermediates; ++i)
166          Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
167                                    PartVT, IntermediateVT);
168      }
169
170      // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
171      // intermediate operands.
172      Val = DAG.getNode(IntermediateVT.isVector() ?
173                        ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, dl,
174                        ValueVT, &Ops[0], NumIntermediates);
175    } else if (PartVT.isFloatingPoint()) {
176      // FP split into multiple FP parts (for ppcf128)
177      assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
178             "Unexpected split");
179      SDValue Lo, Hi;
180      Lo = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[0]);
181      Hi = DAG.getNode(ISD::BIT_CONVERT, dl, EVT(MVT::f64), Parts[1]);
182      if (TLI.isBigEndian())
183        std::swap(Lo, Hi);
184      Val = DAG.getNode(ISD::BUILD_PAIR, dl, ValueVT, Lo, Hi);
185    } else {
186      // FP split into integer parts (soft fp)
187      assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
188             !PartVT.isVector() && "Unexpected split");
189      EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
190      Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
191    }
192  }
193
194  // There is now one part, held in Val.  Correct it to match ValueVT.
195  PartVT = Val.getValueType();
196
197  if (PartVT == ValueVT)
198    return Val;
199
200  if (PartVT.isVector()) {
201    assert(ValueVT.isVector() && "Unknown vector conversion!");
202    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
203  }
204
205  if (ValueVT.isVector()) {
206    assert(ValueVT.getVectorElementType() == PartVT &&
207           ValueVT.getVectorNumElements() == 1 &&
208           "Only trivial scalar-to-vector conversions should get here!");
209    return DAG.getNode(ISD::BUILD_VECTOR, dl, ValueVT, Val);
210  }
211
212  if (PartVT.isInteger() &&
213      ValueVT.isInteger()) {
214    if (ValueVT.bitsLT(PartVT)) {
215      // For a truncate, see if we have any information to
216      // indicate whether the truncated bits will always be
217      // zero or sign-extension.
218      if (AssertOp != ISD::DELETED_NODE)
219        Val = DAG.getNode(AssertOp, dl, PartVT, Val,
220                          DAG.getValueType(ValueVT));
221      return DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
222    } else {
223      return DAG.getNode(ISD::ANY_EXTEND, dl, ValueVT, Val);
224    }
225  }
226
227  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
228    if (ValueVT.bitsLT(Val.getValueType())) {
229      // FP_ROUND's are always exact here.
230      return DAG.getNode(ISD::FP_ROUND, dl, ValueVT, Val,
231                         DAG.getIntPtrConstant(1));
232    }
233
234    return DAG.getNode(ISD::FP_EXTEND, dl, ValueVT, Val);
235  }
236
237  if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
238    return DAG.getNode(ISD::BIT_CONVERT, dl, ValueVT, Val);
239
240  llvm_unreachable("Unknown mismatch!");
241  return SDValue();
242}
243
244/// getCopyToParts - Create a series of nodes that contain the specified value
245/// split into legal parts.  If the parts contain more bits than Val, then, for
246/// integers, ExtendKind can be used to specify how to generate the extra bits.
247static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
248                           SDValue Val, SDValue *Parts, unsigned NumParts,
249                           EVT PartVT,
250                           ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
251  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
252  EVT PtrVT = TLI.getPointerTy();
253  EVT ValueVT = Val.getValueType();
254  unsigned PartBits = PartVT.getSizeInBits();
255  unsigned OrigNumParts = NumParts;
256  assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!");
257
258  if (!NumParts)
259    return;
260
261  if (!ValueVT.isVector()) {
262    if (PartVT == ValueVT) {
263      assert(NumParts == 1 && "No-op copy with multiple parts!");
264      Parts[0] = Val;
265      return;
266    }
267
268    if (NumParts * PartBits > ValueVT.getSizeInBits()) {
269      // If the parts cover more bits than the value has, promote the value.
270      if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
271        assert(NumParts == 1 && "Do not know what to promote to!");
272        Val = DAG.getNode(ISD::FP_EXTEND, dl, PartVT, Val);
273      } else if (PartVT.isInteger() && ValueVT.isInteger()) {
274        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
275        Val = DAG.getNode(ExtendKind, dl, ValueVT, Val);
276      } else {
277        llvm_unreachable("Unknown mismatch!");
278      }
279    } else if (PartBits == ValueVT.getSizeInBits()) {
280      // Different types of the same size.
281      assert(NumParts == 1 && PartVT != ValueVT);
282      Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
283    } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
284      // If the parts cover less bits than value has, truncate the value.
285      if (PartVT.isInteger() && ValueVT.isInteger()) {
286        ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
287        Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
288      } else {
289        llvm_unreachable("Unknown mismatch!");
290      }
291    }
292
293    // The value may have changed - recompute ValueVT.
294    ValueVT = Val.getValueType();
295    assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
296           "Failed to tile the value with PartVT!");
297
298    if (NumParts == 1) {
299      assert(PartVT == ValueVT && "Type conversion failed!");
300      Parts[0] = Val;
301      return;
302    }
303
304    // Expand the value into multiple parts.
305    if (NumParts & (NumParts - 1)) {
306      // The number of parts is not a power of 2.  Split off and copy the tail.
307      assert(PartVT.isInteger() && ValueVT.isInteger() &&
308             "Do not know what to expand to!");
309      unsigned RoundParts = 1 << Log2_32(NumParts);
310      unsigned RoundBits = RoundParts * PartBits;
311      unsigned OddParts = NumParts - RoundParts;
312      SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
313                                   DAG.getConstant(RoundBits,
314                                                   TLI.getPointerTy()));
315      getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
316                     OddParts, PartVT);
317
318      if (TLI.isBigEndian())
319        // The odd parts were reversed by getCopyToParts - unreverse them.
320        std::reverse(Parts + RoundParts, Parts + NumParts);
321
322      NumParts = RoundParts;
323      ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
324      Val = DAG.getNode(ISD::TRUNCATE, dl, ValueVT, Val);
325    }
326
327    // The number of parts is a power of 2.  Repeatedly bisect the value using
328    // EXTRACT_ELEMENT.
329    Parts[0] = DAG.getNode(ISD::BIT_CONVERT, dl,
330                           EVT::getIntegerVT(*DAG.getContext(),
331                                             ValueVT.getSizeInBits()),
332                           Val);
333
334    for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
335      for (unsigned i = 0; i < NumParts; i += StepSize) {
336        unsigned ThisBits = StepSize * PartBits / 2;
337        EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
338        SDValue &Part0 = Parts[i];
339        SDValue &Part1 = Parts[i+StepSize/2];
340
341        Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
342                            ThisVT, Part0,
343                            DAG.getConstant(1, PtrVT));
344        Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, dl,
345                            ThisVT, Part0,
346                            DAG.getConstant(0, PtrVT));
347
348        if (ThisBits == PartBits && ThisVT != PartVT) {
349          Part0 = DAG.getNode(ISD::BIT_CONVERT, dl,
350                                                PartVT, Part0);
351          Part1 = DAG.getNode(ISD::BIT_CONVERT, dl,
352                                                PartVT, Part1);
353        }
354      }
355    }
356
357    if (TLI.isBigEndian())
358      std::reverse(Parts, Parts + OrigNumParts);
359
360    return;
361  }
362
363  // Vector ValueVT.
364  if (NumParts == 1) {
365    if (PartVT != ValueVT) {
366      if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
367        Val = DAG.getNode(ISD::BIT_CONVERT, dl, PartVT, Val);
368      } else {
369        assert(ValueVT.getVectorElementType() == PartVT &&
370               ValueVT.getVectorNumElements() == 1 &&
371               "Only trivial vector-to-scalar conversions should get here!");
372        Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
373                          PartVT, Val,
374                          DAG.getConstant(0, PtrVT));
375      }
376    }
377
378    Parts[0] = Val;
379    return;
380  }
381
382  // Handle a multi-element vector.
383  EVT IntermediateVT, RegisterVT;
384  unsigned NumIntermediates;
385  unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT,
386                              IntermediateVT, NumIntermediates, RegisterVT);
387  unsigned NumElements = ValueVT.getVectorNumElements();
388
389  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
390  NumParts = NumRegs; // Silence a compiler warning.
391  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
392
393  // Split the vector into intermediate operands.
394  SmallVector<SDValue, 8> Ops(NumIntermediates);
395  for (unsigned i = 0; i != NumIntermediates; ++i) {
396    if (IntermediateVT.isVector())
397      Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
398                           IntermediateVT, Val,
399                           DAG.getConstant(i * (NumElements / NumIntermediates),
400                                           PtrVT));
401    else
402      Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
403                           IntermediateVT, Val,
404                           DAG.getConstant(i, PtrVT));
405  }
406
407  // Split the intermediate operands into legal parts.
408  if (NumParts == NumIntermediates) {
409    // If the register was not expanded, promote or copy the value,
410    // as appropriate.
411    for (unsigned i = 0; i != NumParts; ++i)
412      getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
413  } else if (NumParts > 0) {
414    // If the intermediate type was expanded, split each the value into
415    // legal parts.
416    assert(NumParts % NumIntermediates == 0 &&
417           "Must expand into a divisible number of parts!");
418    unsigned Factor = NumParts / NumIntermediates;
419    for (unsigned i = 0; i != NumIntermediates; ++i)
420      getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
421  }
422}
423
424namespace {
425  /// RegsForValue - This struct represents the registers (physical or virtual)
426  /// that a particular set of values is assigned, and the type information
427  /// about the value. The most common situation is to represent one value at a
428  /// time, but struct or array values are handled element-wise as multiple
429  /// values.  The splitting of aggregates is performed recursively, so that we
430  /// never have aggregate-typed registers. The values at this point do not
431  /// necessarily have legal types, so each value may require one or more
432  /// registers of some legal type.
433  ///
434  struct RegsForValue {
435    /// ValueVTs - The value types of the values, which may not be legal, and
436    /// may need be promoted or synthesized from one or more registers.
437    ///
438    SmallVector<EVT, 4> ValueVTs;
439
440    /// RegVTs - The value types of the registers. This is the same size as
441    /// ValueVTs and it records, for each value, what the type of the assigned
442    /// register or registers are. (Individual values are never synthesized
443    /// from more than one type of register.)
444    ///
445    /// With virtual registers, the contents of RegVTs is redundant with TLI's
446    /// getRegisterType member function, however when with physical registers
447    /// it is necessary to have a separate record of the types.
448    ///
449    SmallVector<EVT, 4> RegVTs;
450
451    /// Regs - This list holds the registers assigned to the values.
452    /// Each legal or promoted value requires one register, and each
453    /// expanded value requires multiple registers.
454    ///
455    SmallVector<unsigned, 4> Regs;
456
457    RegsForValue() {}
458
459    RegsForValue(const SmallVector<unsigned, 4> &regs,
460                 EVT regvt, EVT valuevt)
461      : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {}
462
463    RegsForValue(const SmallVector<unsigned, 4> &regs,
464                 const SmallVector<EVT, 4> &regvts,
465                 const SmallVector<EVT, 4> &valuevts)
466      : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {}
467
468    RegsForValue(LLVMContext &Context, const TargetLowering &tli,
469                 unsigned Reg, const Type *Ty) {
470      ComputeValueVTs(tli, Ty, ValueVTs);
471
472      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
473        EVT ValueVT = ValueVTs[Value];
474        unsigned NumRegs = tli.getNumRegisters(Context, ValueVT);
475        EVT RegisterVT = tli.getRegisterType(Context, ValueVT);
476        for (unsigned i = 0; i != NumRegs; ++i)
477          Regs.push_back(Reg + i);
478        RegVTs.push_back(RegisterVT);
479        Reg += NumRegs;
480      }
481    }
482
483    /// areValueTypesLegal - Return true if types of all the values are legal.
484    bool areValueTypesLegal(const TargetLowering &TLI) {
485      for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) {
486        EVT RegisterVT = RegVTs[Value];
487        if (!TLI.isTypeLegal(RegisterVT))
488          return false;
489      }
490      return true;
491    }
492
493    /// append - Add the specified values to this one.
494    void append(const RegsForValue &RHS) {
495      ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end());
496      RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end());
497      Regs.append(RHS.Regs.begin(), RHS.Regs.end());
498    }
499
500    /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
501    /// this value and returns the result as a ValueVTs value.  This uses
502    /// Chain/Flag as the input and updates them for the output Chain/Flag.
503    /// If the Flag pointer is NULL, no flag is used.
504    SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo,
505                            DebugLoc dl,
506                            SDValue &Chain, SDValue *Flag) const;
507
508    /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
509    /// specified value into the registers specified by this object.  This uses
510    /// Chain/Flag as the input and updates them for the output Chain/Flag.
511    /// If the Flag pointer is NULL, no flag is used.
512    void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
513                       SDValue &Chain, SDValue *Flag) const;
514
515    /// AddInlineAsmOperands - Add this value to the specified inlineasm node
516    /// operand list.  This adds the code marker, matching input operand index
517    /// (if applicable), and includes the number of values added into it.
518    void AddInlineAsmOperands(unsigned Kind,
519                              bool HasMatching, unsigned MatchingIdx,
520                              SelectionDAG &DAG,
521                              std::vector<SDValue> &Ops) const;
522  };
523}
524
525/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from
526/// this value and returns the result as a ValueVT value.  This uses
527/// Chain/Flag as the input and updates them for the output Chain/Flag.
528/// If the Flag pointer is NULL, no flag is used.
529SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
530                                      FunctionLoweringInfo &FuncInfo,
531                                      DebugLoc dl,
532                                      SDValue &Chain, SDValue *Flag) const {
533  // A Value with type {} or [0 x %t] needs no registers.
534  if (ValueVTs.empty())
535    return SDValue();
536
537  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
538
539  // Assemble the legal parts into the final values.
540  SmallVector<SDValue, 4> Values(ValueVTs.size());
541  SmallVector<SDValue, 8> Parts;
542  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
543    // Copy the legal parts from the registers.
544    EVT ValueVT = ValueVTs[Value];
545    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
546    EVT RegisterVT = RegVTs[Value];
547
548    Parts.resize(NumRegs);
549    for (unsigned i = 0; i != NumRegs; ++i) {
550      SDValue P;
551      if (Flag == 0) {
552        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
553      } else {
554        P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
555        *Flag = P.getValue(2);
556      }
557
558      Chain = P.getValue(1);
559
560      // If the source register was virtual and if we know something about it,
561      // add an assert node.
562      if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
563          RegisterVT.isInteger() && !RegisterVT.isVector()) {
564        unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
565        if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
566          const FunctionLoweringInfo::LiveOutInfo &LOI =
567            FuncInfo.LiveOutRegInfo[SlotNo];
568
569          unsigned RegSize = RegisterVT.getSizeInBits();
570          unsigned NumSignBits = LOI.NumSignBits;
571          unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
572
573          // FIXME: We capture more information than the dag can represent.  For
574          // now, just use the tightest assertzext/assertsext possible.
575          bool isSExt = true;
576          EVT FromVT(MVT::Other);
577          if (NumSignBits == RegSize)
578            isSExt = true, FromVT = MVT::i1;   // ASSERT SEXT 1
579          else if (NumZeroBits >= RegSize-1)
580            isSExt = false, FromVT = MVT::i1;  // ASSERT ZEXT 1
581          else if (NumSignBits > RegSize-8)
582            isSExt = true, FromVT = MVT::i8;   // ASSERT SEXT 8
583          else if (NumZeroBits >= RegSize-8)
584            isSExt = false, FromVT = MVT::i8;  // ASSERT ZEXT 8
585          else if (NumSignBits > RegSize-16)
586            isSExt = true, FromVT = MVT::i16;  // ASSERT SEXT 16
587          else if (NumZeroBits >= RegSize-16)
588            isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
589          else if (NumSignBits > RegSize-32)
590            isSExt = true, FromVT = MVT::i32;  // ASSERT SEXT 32
591          else if (NumZeroBits >= RegSize-32)
592            isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
593
594          if (FromVT != MVT::Other)
595            P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
596                            RegisterVT, P, DAG.getValueType(FromVT));
597        }
598      }
599
600      Parts[i] = P;
601    }
602
603    Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
604                                     NumRegs, RegisterVT, ValueVT);
605    Part += NumRegs;
606    Parts.clear();
607  }
608
609  return DAG.getNode(ISD::MERGE_VALUES, dl,
610                     DAG.getVTList(&ValueVTs[0], ValueVTs.size()),
611                     &Values[0], ValueVTs.size());
612}
613
614/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
615/// specified value into the registers specified by this object.  This uses
616/// Chain/Flag as the input and updates them for the output Chain/Flag.
617/// If the Flag pointer is NULL, no flag is used.
618void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
619                                 SDValue &Chain, SDValue *Flag) const {
620  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
621
622  // Get the list of the values's legal parts.
623  unsigned NumRegs = Regs.size();
624  SmallVector<SDValue, 8> Parts(NumRegs);
625  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
626    EVT ValueVT = ValueVTs[Value];
627    unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT);
628    EVT RegisterVT = RegVTs[Value];
629
630    getCopyToParts(DAG, dl,
631                   Val.getValue(Val.getResNo() + Value),
632                   &Parts[Part], NumParts, RegisterVT);
633    Part += NumParts;
634  }
635
636  // Copy the parts into the registers.
637  SmallVector<SDValue, 8> Chains(NumRegs);
638  for (unsigned i = 0; i != NumRegs; ++i) {
639    SDValue Part;
640    if (Flag == 0) {
641      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
642    } else {
643      Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
644      *Flag = Part.getValue(1);
645    }
646
647    Chains[i] = Part.getValue(0);
648  }
649
650  if (NumRegs == 1 || Flag)
651    // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
652    // flagged to it. That is the CopyToReg nodes and the user are considered
653    // a single scheduling unit. If we create a TokenFactor and return it as
654    // chain, then the TokenFactor is both a predecessor (operand) of the
655    // user as well as a successor (the TF operands are flagged to the user).
656    // c1, f1 = CopyToReg
657    // c2, f2 = CopyToReg
658    // c3     = TokenFactor c1, c2
659    // ...
660    //        = op c3, ..., f2
661    Chain = Chains[NumRegs-1];
662  else
663    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs);
664}
665
666/// AddInlineAsmOperands - Add this value to the specified inlineasm node
667/// operand list.  This adds the code marker and includes the number of
668/// values added into it.
669void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
670                                        unsigned MatchingIdx,
671                                        SelectionDAG &DAG,
672                                        std::vector<SDValue> &Ops) const {
673  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
674
675  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
676  if (HasMatching)
677    Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
678  SDValue Res = DAG.getTargetConstant(Flag, MVT::i32);
679  Ops.push_back(Res);
680
681  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
682    unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
683    EVT RegisterVT = RegVTs[Value];
684    for (unsigned i = 0; i != NumRegs; ++i) {
685      assert(Reg < Regs.size() && "Mismatch in # registers expected");
686      Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT));
687    }
688  }
689}
690
691void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) {
692  AA = &aa;
693  GFI = gfi;
694  TD = DAG.getTarget().getTargetData();
695}
696
697/// clear - Clear out the current SelectionDAG and the associated
698/// state and prepare this SelectionDAGBuilder object to be used
699/// for a new block. This doesn't clear out information about
700/// additional blocks that are needed to complete switch lowering
701/// or PHI node updating; that information is cleared out as it is
702/// consumed.
703void SelectionDAGBuilder::clear() {
704  NodeMap.clear();
705  UnusedArgNodeMap.clear();
706  PendingLoads.clear();
707  PendingExports.clear();
708  DanglingDebugInfoMap.clear();
709  CurDebugLoc = DebugLoc();
710  HasTailCall = false;
711}
712
713/// getRoot - Return the current virtual root of the Selection DAG,
714/// flushing any PendingLoad items. This must be done before emitting
715/// a store or any other node that may need to be ordered after any
716/// prior load instructions.
717///
718SDValue SelectionDAGBuilder::getRoot() {
719  if (PendingLoads.empty())
720    return DAG.getRoot();
721
722  if (PendingLoads.size() == 1) {
723    SDValue Root = PendingLoads[0];
724    DAG.setRoot(Root);
725    PendingLoads.clear();
726    return Root;
727  }
728
729  // Otherwise, we have to make a token factor node.
730  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
731                               &PendingLoads[0], PendingLoads.size());
732  PendingLoads.clear();
733  DAG.setRoot(Root);
734  return Root;
735}
736
737/// getControlRoot - Similar to getRoot, but instead of flushing all the
738/// PendingLoad items, flush all the PendingExports items. It is necessary
739/// to do this before emitting a terminator instruction.
740///
741SDValue SelectionDAGBuilder::getControlRoot() {
742  SDValue Root = DAG.getRoot();
743
744  if (PendingExports.empty())
745    return Root;
746
747  // Turn all of the CopyToReg chains into one factored node.
748  if (Root.getOpcode() != ISD::EntryToken) {
749    unsigned i = 0, e = PendingExports.size();
750    for (; i != e; ++i) {
751      assert(PendingExports[i].getNode()->getNumOperands() > 1);
752      if (PendingExports[i].getNode()->getOperand(0) == Root)
753        break;  // Don't add the root if we already indirectly depend on it.
754    }
755
756    if (i == e)
757      PendingExports.push_back(Root);
758  }
759
760  Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
761                     &PendingExports[0],
762                     PendingExports.size());
763  PendingExports.clear();
764  DAG.setRoot(Root);
765  return Root;
766}
767
768void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) {
769  if (DAG.GetOrdering(Node) != 0) return; // Already has ordering.
770  DAG.AssignOrdering(Node, SDNodeOrder);
771
772  for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I)
773    AssignOrderingToNode(Node->getOperand(I).getNode());
774}
775
776void SelectionDAGBuilder::visit(const Instruction &I) {
777  // Set up outgoing PHI node register values before emitting the terminator.
778  if (isa<TerminatorInst>(&I))
779    HandlePHINodesInSuccessorBlocks(I.getParent());
780
781  CurDebugLoc = I.getDebugLoc();
782
783  visit(I.getOpcode(), I);
784
785  if (!isa<TerminatorInst>(&I) && !HasTailCall)
786    CopyToExportRegsIfNeeded(&I);
787
788  CurDebugLoc = DebugLoc();
789}
790
791void SelectionDAGBuilder::visitPHI(const PHINode &) {
792  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
793}
794
795void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
796  // Note: this doesn't use InstVisitor, because it has to work with
797  // ConstantExpr's in addition to instructions.
798  switch (Opcode) {
799  default: llvm_unreachable("Unknown instruction type encountered!");
800    // Build the switch statement using the Instruction.def file.
801#define HANDLE_INST(NUM, OPCODE, CLASS) \
802    case Instruction::OPCODE: visit##OPCODE((CLASS&)I); break;
803#include "llvm/Instruction.def"
804  }
805
806  // Assign the ordering to the freshly created DAG nodes.
807  if (NodeMap.count(&I)) {
808    ++SDNodeOrder;
809    AssignOrderingToNode(getValue(&I).getNode());
810  }
811}
812
813// resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
814// generate the debug data structures now that we've seen its definition.
815void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
816                                                   SDValue Val) {
817  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
818  if (DDI.getDI()) {
819    const DbgValueInst *DI = DDI.getDI();
820    DebugLoc dl = DDI.getdl();
821    unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
822    MDNode *Variable = DI->getVariable();
823    uint64_t Offset = DI->getOffset();
824    SDDbgValue *SDV;
825    if (Val.getNode()) {
826      if (!EmitFuncArgumentDbgValue(*DI, V, Variable, Offset, Val)) {
827        SDV = DAG.getDbgValue(Variable, Val.getNode(),
828                              Val.getResNo(), Offset, dl, DbgSDNodeOrder);
829        DAG.AddDbgValue(SDV, Val.getNode(), false);
830      }
831    } else {
832      SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
833                            Offset, dl, SDNodeOrder);
834      DAG.AddDbgValue(SDV, 0, false);
835    }
836    DanglingDebugInfoMap[V] = DanglingDebugInfo();
837  }
838}
839
840// getValue - Return an SDValue for the given Value.
841SDValue SelectionDAGBuilder::getValue(const Value *V) {
842  // If we already have an SDValue for this value, use it. It's important
843  // to do this first, so that we don't create a CopyFromReg if we already
844  // have a regular SDValue.
845  SDValue &N = NodeMap[V];
846  if (N.getNode()) return N;
847
848  // If there's a virtual register allocated and initialized for this
849  // value, use it.
850  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
851  if (It != FuncInfo.ValueMap.end()) {
852    unsigned InReg = It->second;
853    RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
854    SDValue Chain = DAG.getEntryNode();
855    return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
856  }
857
858  // Otherwise create a new SDValue and remember it.
859  SDValue Val = getValueImpl(V);
860  NodeMap[V] = Val;
861  resolveDanglingDebugInfo(V, Val);
862  return Val;
863}
864
865/// getNonRegisterValue - Return an SDValue for the given Value, but
866/// don't look in FuncInfo.ValueMap for a virtual register.
867SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
868  // If we already have an SDValue for this value, use it.
869  SDValue &N = NodeMap[V];
870  if (N.getNode()) return N;
871
872  // Otherwise create a new SDValue and remember it.
873  SDValue Val = getValueImpl(V);
874  NodeMap[V] = Val;
875  resolveDanglingDebugInfo(V, Val);
876  return Val;
877}
878
879/// getValueImpl - Helper function for getValue and getNonRegisterValue.
880/// Create an SDValue for the given value.
881SDValue SelectionDAGBuilder::getValueImpl(const Value *V) {
882  if (const Constant *C = dyn_cast<Constant>(V)) {
883    EVT VT = TLI.getValueType(V->getType(), true);
884
885    if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
886      return DAG.getConstant(*CI, VT);
887
888    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
889      return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT);
890
891    if (isa<ConstantPointerNull>(C))
892      return DAG.getConstant(0, TLI.getPointerTy());
893
894    if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
895      return DAG.getConstantFP(*CFP, VT);
896
897    if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
898      return DAG.getUNDEF(VT);
899
900    if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
901      visit(CE->getOpcode(), *CE);
902      SDValue N1 = NodeMap[V];
903      assert(N1.getNode() && "visit didn't populate the NodeMap!");
904      return N1;
905    }
906
907    if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
908      SmallVector<SDValue, 4> Constants;
909      for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
910           OI != OE; ++OI) {
911        SDNode *Val = getValue(*OI).getNode();
912        // If the operand is an empty aggregate, there are no values.
913        if (!Val) continue;
914        // Add each leaf value from the operand to the Constants list
915        // to form a flattened list of all the values.
916        for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
917          Constants.push_back(SDValue(Val, i));
918      }
919
920      return DAG.getMergeValues(&Constants[0], Constants.size(),
921                                getCurDebugLoc());
922    }
923
924    if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
925      assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
926             "Unknown struct or array constant!");
927
928      SmallVector<EVT, 4> ValueVTs;
929      ComputeValueVTs(TLI, C->getType(), ValueVTs);
930      unsigned NumElts = ValueVTs.size();
931      if (NumElts == 0)
932        return SDValue(); // empty struct
933      SmallVector<SDValue, 4> Constants(NumElts);
934      for (unsigned i = 0; i != NumElts; ++i) {
935        EVT EltVT = ValueVTs[i];
936        if (isa<UndefValue>(C))
937          Constants[i] = DAG.getUNDEF(EltVT);
938        else if (EltVT.isFloatingPoint())
939          Constants[i] = DAG.getConstantFP(0, EltVT);
940        else
941          Constants[i] = DAG.getConstant(0, EltVT);
942      }
943
944      return DAG.getMergeValues(&Constants[0], NumElts,
945                                getCurDebugLoc());
946    }
947
948    if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
949      return DAG.getBlockAddress(BA, VT);
950
951    const VectorType *VecTy = cast<VectorType>(V->getType());
952    unsigned NumElements = VecTy->getNumElements();
953
954    // Now that we know the number and type of the elements, get that number of
955    // elements into the Ops array based on what kind of constant it is.
956    SmallVector<SDValue, 16> Ops;
957    if (const ConstantVector *CP = dyn_cast<ConstantVector>(C)) {
958      for (unsigned i = 0; i != NumElements; ++i)
959        Ops.push_back(getValue(CP->getOperand(i)));
960    } else {
961      assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
962      EVT EltVT = TLI.getValueType(VecTy->getElementType());
963
964      SDValue Op;
965      if (EltVT.isFloatingPoint())
966        Op = DAG.getConstantFP(0, EltVT);
967      else
968        Op = DAG.getConstant(0, EltVT);
969      Ops.assign(NumElements, Op);
970    }
971
972    // Create a BUILD_VECTOR node.
973    return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
974                                    VT, &Ops[0], Ops.size());
975  }
976
977  // If this is a static alloca, generate it as the frameindex instead of
978  // computation.
979  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
980    DenseMap<const AllocaInst*, int>::iterator SI =
981      FuncInfo.StaticAllocaMap.find(AI);
982    if (SI != FuncInfo.StaticAllocaMap.end())
983      return DAG.getFrameIndex(SI->second, TLI.getPointerTy());
984  }
985
986  // If this is an instruction which fast-isel has deferred, select it now.
987  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
988    unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
989    RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType());
990    SDValue Chain = DAG.getEntryNode();
991    return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
992  }
993
994  llvm_unreachable("Can't get register for value!");
995  return SDValue();
996}
997
998void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
999  SDValue Chain = getControlRoot();
1000  SmallVector<ISD::OutputArg, 8> Outs;
1001  SmallVector<SDValue, 8> OutVals;
1002
1003  if (!FuncInfo.CanLowerReturn) {
1004    unsigned DemoteReg = FuncInfo.DemoteRegister;
1005    const Function *F = I.getParent()->getParent();
1006
1007    // Emit a store of the return value through the virtual register.
1008    // Leave Outs empty so that LowerReturn won't try to load return
1009    // registers the usual way.
1010    SmallVector<EVT, 1> PtrValueVTs;
1011    ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
1012                    PtrValueVTs);
1013
1014    SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
1015    SDValue RetOp = getValue(I.getOperand(0));
1016
1017    SmallVector<EVT, 4> ValueVTs;
1018    SmallVector<uint64_t, 4> Offsets;
1019    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1020    unsigned NumValues = ValueVTs.size();
1021
1022    SmallVector<SDValue, 4> Chains(NumValues);
1023    EVT PtrVT = PtrValueVTs[0];
1024    for (unsigned i = 0; i != NumValues; ++i) {
1025      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
1026                                DAG.getConstant(Offsets[i], PtrVT));
1027      Chains[i] =
1028        DAG.getStore(Chain, getCurDebugLoc(),
1029                     SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1030                     Add, NULL, Offsets[i], false, false, 0);
1031    }
1032
1033    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
1034                        MVT::Other, &Chains[0], NumValues);
1035  } else if (I.getNumOperands() != 0) {
1036    SmallVector<EVT, 4> ValueVTs;
1037    ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
1038    unsigned NumValues = ValueVTs.size();
1039    if (NumValues) {
1040      SDValue RetOp = getValue(I.getOperand(0));
1041      for (unsigned j = 0, f = NumValues; j != f; ++j) {
1042        EVT VT = ValueVTs[j];
1043
1044        ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1045
1046        const Function *F = I.getParent()->getParent();
1047        if (F->paramHasAttr(0, Attribute::SExt))
1048          ExtendKind = ISD::SIGN_EXTEND;
1049        else if (F->paramHasAttr(0, Attribute::ZExt))
1050          ExtendKind = ISD::ZERO_EXTEND;
1051
1052        // FIXME: C calling convention requires the return type to be promoted
1053        // to at least 32-bit. But this is not necessary for non-C calling
1054        // conventions. The frontend should mark functions whose return values
1055        // require promoting with signext or zeroext attributes.
1056        if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
1057          EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
1058          if (VT.bitsLT(MinVT))
1059            VT = MinVT;
1060        }
1061
1062        unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
1063        EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
1064        SmallVector<SDValue, 4> Parts(NumParts);
1065        getCopyToParts(DAG, getCurDebugLoc(),
1066                       SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1067                       &Parts[0], NumParts, PartVT, ExtendKind);
1068
1069        // 'inreg' on function refers to return value
1070        ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1071        if (F->paramHasAttr(0, Attribute::InReg))
1072          Flags.setInReg();
1073
1074        // Propagate extension type if any
1075        if (F->paramHasAttr(0, Attribute::SExt))
1076          Flags.setSExt();
1077        else if (F->paramHasAttr(0, Attribute::ZExt))
1078          Flags.setZExt();
1079
1080        for (unsigned i = 0; i < NumParts; ++i) {
1081          Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1082                                        /*isfixed=*/true));
1083          OutVals.push_back(Parts[i]);
1084        }
1085      }
1086    }
1087  }
1088
1089  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1090  CallingConv::ID CallConv =
1091    DAG.getMachineFunction().getFunction()->getCallingConv();
1092  Chain = TLI.LowerReturn(Chain, CallConv, isVarArg,
1093                          Outs, OutVals, getCurDebugLoc(), DAG);
1094
1095  // Verify that the target's LowerReturn behaved as expected.
1096  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1097         "LowerReturn didn't return a valid chain!");
1098
1099  // Update the DAG with the new chain value resulting from return lowering.
1100  DAG.setRoot(Chain);
1101}
1102
1103/// CopyToExportRegsIfNeeded - If the given value has virtual registers
1104/// created for it, emit nodes to copy the value into the virtual
1105/// registers.
1106void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) {
1107  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1108  if (VMI != FuncInfo.ValueMap.end()) {
1109    assert(!V->use_empty() && "Unused value assigned virtual registers!");
1110    CopyValueToVirtualRegister(V, VMI->second);
1111  }
1112}
1113
1114/// ExportFromCurrentBlock - If this condition isn't known to be exported from
1115/// the current basic block, add it to ValueMap now so that we'll get a
1116/// CopyTo/FromReg.
1117void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) {
1118  // No need to export constants.
1119  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1120
1121  // Already exported?
1122  if (FuncInfo.isExportedInst(V)) return;
1123
1124  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1125  CopyValueToVirtualRegister(V, Reg);
1126}
1127
1128bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
1129                                                     const BasicBlock *FromBB) {
1130  // The operands of the setcc have to be in this block.  We don't know
1131  // how to export them from some other block.
1132  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1133    // Can export from current BB.
1134    if (VI->getParent() == FromBB)
1135      return true;
1136
1137    // Is already exported, noop.
1138    return FuncInfo.isExportedInst(V);
1139  }
1140
1141  // If this is an argument, we can export it if the BB is the entry block or
1142  // if it is already exported.
1143  if (isa<Argument>(V)) {
1144    if (FromBB == &FromBB->getParent()->getEntryBlock())
1145      return true;
1146
1147    // Otherwise, can only export this if it is already exported.
1148    return FuncInfo.isExportedInst(V);
1149  }
1150
1151  // Otherwise, constants can always be exported.
1152  return true;
1153}
1154
1155static bool InBlock(const Value *V, const BasicBlock *BB) {
1156  if (const Instruction *I = dyn_cast<Instruction>(V))
1157    return I->getParent() == BB;
1158  return true;
1159}
1160
1161/// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1162/// This function emits a branch and is used at the leaves of an OR or an
1163/// AND operator tree.
1164///
1165void
1166SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
1167                                                  MachineBasicBlock *TBB,
1168                                                  MachineBasicBlock *FBB,
1169                                                  MachineBasicBlock *CurBB,
1170                                                  MachineBasicBlock *SwitchBB) {
1171  const BasicBlock *BB = CurBB->getBasicBlock();
1172
1173  // If the leaf of the tree is a comparison, merge the condition into
1174  // the caseblock.
1175  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1176    // The operands of the cmp have to be in this block.  We don't know
1177    // how to export them from some other block.  If this is the first block
1178    // of the sequence, no exporting is needed.
1179    if (CurBB == SwitchBB ||
1180        (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1181         isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1182      ISD::CondCode Condition;
1183      if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1184        Condition = getICmpCondCode(IC->getPredicate());
1185      } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) {
1186        Condition = getFCmpCondCode(FC->getPredicate());
1187      } else {
1188        Condition = ISD::SETEQ; // silence warning.
1189        llvm_unreachable("Unknown compare instruction");
1190      }
1191
1192      CaseBlock CB(Condition, BOp->getOperand(0),
1193                   BOp->getOperand(1), NULL, TBB, FBB, CurBB);
1194      SwitchCases.push_back(CB);
1195      return;
1196    }
1197  }
1198
1199  // Create a CaseBlock record representing this branch.
1200  CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()),
1201               NULL, TBB, FBB, CurBB);
1202  SwitchCases.push_back(CB);
1203}
1204
1205/// FindMergedConditions - If Cond is an expression like
1206void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
1207                                               MachineBasicBlock *TBB,
1208                                               MachineBasicBlock *FBB,
1209                                               MachineBasicBlock *CurBB,
1210                                               MachineBasicBlock *SwitchBB,
1211                                               unsigned Opc) {
1212  // If this node is not part of the or/and tree, emit it as a branch.
1213  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1214  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1215      (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() ||
1216      BOp->getParent() != CurBB->getBasicBlock() ||
1217      !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1218      !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1219    EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB);
1220    return;
1221  }
1222
1223  //  Create TmpBB after CurBB.
1224  MachineFunction::iterator BBI = CurBB;
1225  MachineFunction &MF = DAG.getMachineFunction();
1226  MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock());
1227  CurBB->getParent()->insert(++BBI, TmpBB);
1228
1229  if (Opc == Instruction::Or) {
1230    // Codegen X | Y as:
1231    //   jmp_if_X TBB
1232    //   jmp TmpBB
1233    // TmpBB:
1234    //   jmp_if_Y TBB
1235    //   jmp FBB
1236    //
1237
1238    // Emit the LHS condition.
1239    FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc);
1240
1241    // Emit the RHS condition into TmpBB.
1242    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
1243  } else {
1244    assert(Opc == Instruction::And && "Unknown merge op!");
1245    // Codegen X & Y as:
1246    //   jmp_if_X TmpBB
1247    //   jmp FBB
1248    // TmpBB:
1249    //   jmp_if_Y TBB
1250    //   jmp FBB
1251    //
1252    //  This requires creation of TmpBB after CurBB.
1253
1254    // Emit the LHS condition.
1255    FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc);
1256
1257    // Emit the RHS condition into TmpBB.
1258    FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc);
1259  }
1260}
1261
1262/// If the set of cases should be emitted as a series of branches, return true.
1263/// If we should emit this as a bunch of and/or'd together conditions, return
1264/// false.
1265bool
1266SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
1267  if (Cases.size() != 2) return true;
1268
1269  // If this is two comparisons of the same values or'd or and'd together, they
1270  // will get folded into a single comparison, so don't emit two blocks.
1271  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1272       Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1273      (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1274       Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1275    return false;
1276  }
1277
1278  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1279  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1280  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1281      Cases[0].CC == Cases[1].CC &&
1282      isa<Constant>(Cases[0].CmpRHS) &&
1283      cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1284    if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1285      return false;
1286    if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1287      return false;
1288  }
1289
1290  return true;
1291}
1292
1293void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1294  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1295
1296  // Update machine-CFG edges.
1297  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1298
1299  // Figure out which block is immediately after the current one.
1300  MachineBasicBlock *NextBlock = 0;
1301  MachineFunction::iterator BBI = BrMBB;
1302  if (++BBI != FuncInfo.MF->end())
1303    NextBlock = BBI;
1304
1305  if (I.isUnconditional()) {
1306    // Update machine-CFG edges.
1307    BrMBB->addSuccessor(Succ0MBB);
1308
1309    // If this is not a fall-through branch, emit the branch.
1310    if (Succ0MBB != NextBlock)
1311      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
1312                              MVT::Other, getControlRoot(),
1313                              DAG.getBasicBlock(Succ0MBB)));
1314
1315    return;
1316  }
1317
1318  // If this condition is one of the special cases we handle, do special stuff
1319  // now.
1320  const Value *CondVal = I.getCondition();
1321  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1322
1323  // If this is a series of conditions that are or'd or and'd together, emit
1324  // this as a sequence of branches instead of setcc's with and/or operations.
1325  // For example, instead of something like:
1326  //     cmp A, B
1327  //     C = seteq
1328  //     cmp D, E
1329  //     F = setle
1330  //     or C, F
1331  //     jnz foo
1332  // Emit:
1333  //     cmp A, B
1334  //     je foo
1335  //     cmp D, E
1336  //     jle foo
1337  //
1338  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1339    if (BOp->hasOneUse() &&
1340        (BOp->getOpcode() == Instruction::And ||
1341         BOp->getOpcode() == Instruction::Or)) {
1342      FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1343                           BOp->getOpcode());
1344      // If the compares in later blocks need to use values not currently
1345      // exported from this block, export them now.  This block should always
1346      // be the first entry.
1347      assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1348
1349      // Allow some cases to be rejected.
1350      if (ShouldEmitAsBranches(SwitchCases)) {
1351        for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
1352          ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1353          ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1354        }
1355
1356        // Emit the branch for this block.
1357        visitSwitchCase(SwitchCases[0], BrMBB);
1358        SwitchCases.erase(SwitchCases.begin());
1359        return;
1360      }
1361
1362      // Okay, we decided not to do this, remove any inserted MBB's and clear
1363      // SwitchCases.
1364      for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
1365        FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1366
1367      SwitchCases.clear();
1368    }
1369  }
1370
1371  // Create a CaseBlock record representing this branch.
1372  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1373               NULL, Succ0MBB, Succ1MBB, BrMBB);
1374
1375  // Use visitSwitchCase to actually insert the fast branch sequence for this
1376  // cond branch.
1377  visitSwitchCase(CB, BrMBB);
1378}
1379
1380/// visitSwitchCase - Emits the necessary code to represent a single node in
1381/// the binary search tree resulting from lowering a switch instruction.
1382void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
1383                                          MachineBasicBlock *SwitchBB) {
1384  SDValue Cond;
1385  SDValue CondLHS = getValue(CB.CmpLHS);
1386  DebugLoc dl = getCurDebugLoc();
1387
1388  // Build the setcc now.
1389  if (CB.CmpMHS == NULL) {
1390    // Fold "(X == true)" to X and "(X == false)" to !X to
1391    // handle common cases produced by branch lowering.
1392    if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1393        CB.CC == ISD::SETEQ)
1394      Cond = CondLHS;
1395    else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1396             CB.CC == ISD::SETEQ) {
1397      SDValue True = DAG.getConstant(1, CondLHS.getValueType());
1398      Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1399    } else
1400      Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1401  } else {
1402    assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
1403
1404    const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1405    const APInt& High  = cast<ConstantInt>(CB.CmpRHS)->getValue();
1406
1407    SDValue CmpOp = getValue(CB.CmpMHS);
1408    EVT VT = CmpOp.getValueType();
1409
1410    if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
1411      Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT),
1412                          ISD::SETLE);
1413    } else {
1414      SDValue SUB = DAG.getNode(ISD::SUB, dl,
1415                                VT, CmpOp, DAG.getConstant(Low, VT));
1416      Cond = DAG.getSetCC(dl, MVT::i1, SUB,
1417                          DAG.getConstant(High-Low, VT), ISD::SETULE);
1418    }
1419  }
1420
1421  // Update successor info
1422  SwitchBB->addSuccessor(CB.TrueBB);
1423  SwitchBB->addSuccessor(CB.FalseBB);
1424
1425  // Set NextBlock to be the MBB immediately after the current one, if any.
1426  // This is used to avoid emitting unnecessary branches to the next block.
1427  MachineBasicBlock *NextBlock = 0;
1428  MachineFunction::iterator BBI = SwitchBB;
1429  if (++BBI != FuncInfo.MF->end())
1430    NextBlock = BBI;
1431
1432  // If the lhs block is the next block, invert the condition so that we can
1433  // fall through to the lhs instead of the rhs block.
1434  if (CB.TrueBB == NextBlock) {
1435    std::swap(CB.TrueBB, CB.FalseBB);
1436    SDValue True = DAG.getConstant(1, Cond.getValueType());
1437    Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
1438  }
1439
1440  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
1441                               MVT::Other, getControlRoot(), Cond,
1442                               DAG.getBasicBlock(CB.TrueBB));
1443
1444  // Insert the false branch.
1445  if (CB.FalseBB != NextBlock)
1446    BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
1447                         DAG.getBasicBlock(CB.FalseBB));
1448
1449  DAG.setRoot(BrCond);
1450}
1451
1452/// visitJumpTable - Emit JumpTable node in the current MBB
1453void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
1454  // Emit the code for the jump table
1455  assert(JT.Reg != -1U && "Should lower JT Header first!");
1456  EVT PTy = TLI.getPointerTy();
1457  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
1458                                     JT.Reg, PTy);
1459  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
1460  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(),
1461                                    MVT::Other, Index.getValue(1),
1462                                    Table, Index);
1463  DAG.setRoot(BrJumpTable);
1464}
1465
1466/// visitJumpTableHeader - This function emits necessary code to produce index
1467/// in the JumpTable from switch case.
1468void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
1469                                               JumpTableHeader &JTH,
1470                                               MachineBasicBlock *SwitchBB) {
1471  // Subtract the lowest switch case value from the value being switched on and
1472  // conditional branch to default mbb if the result is greater than the
1473  // difference between smallest and largest cases.
1474  SDValue SwitchOp = getValue(JTH.SValue);
1475  EVT VT = SwitchOp.getValueType();
1476  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1477                            DAG.getConstant(JTH.First, VT));
1478
1479  // The SDNode we just created, which holds the value being switched on minus
1480  // the smallest case value, needs to be copied to a virtual register so it
1481  // can be used as an index into the jump table in a subsequent basic block.
1482  // This value may be smaller or larger than the target's pointer type, and
1483  // therefore require extension or truncating.
1484  SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy());
1485
1486  unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy());
1487  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1488                                    JumpTableReg, SwitchOp);
1489  JT.Reg = JumpTableReg;
1490
1491  // Emit the range check for the jump table, and branch to the default block
1492  // for the switch statement if the value being switched on exceeds the largest
1493  // case in the switch.
1494  SDValue CMP = DAG.getSetCC(getCurDebugLoc(),
1495                             TLI.getSetCCResultType(Sub.getValueType()), Sub,
1496                             DAG.getConstant(JTH.Last-JTH.First,VT),
1497                             ISD::SETUGT);
1498
1499  // Set NextBlock to be the MBB immediately after the current one, if any.
1500  // This is used to avoid emitting unnecessary branches to the next block.
1501  MachineBasicBlock *NextBlock = 0;
1502  MachineFunction::iterator BBI = SwitchBB;
1503
1504  if (++BBI != FuncInfo.MF->end())
1505    NextBlock = BBI;
1506
1507  SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1508                               MVT::Other, CopyTo, CMP,
1509                               DAG.getBasicBlock(JT.Default));
1510
1511  if (JT.MBB != NextBlock)
1512    BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond,
1513                         DAG.getBasicBlock(JT.MBB));
1514
1515  DAG.setRoot(BrCond);
1516}
1517
1518/// visitBitTestHeader - This function emits necessary code to produce value
1519/// suitable for "bit tests"
1520void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
1521                                             MachineBasicBlock *SwitchBB) {
1522  // Subtract the minimum value
1523  SDValue SwitchOp = getValue(B.SValue);
1524  EVT VT = SwitchOp.getValueType();
1525  SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp,
1526                            DAG.getConstant(B.First, VT));
1527
1528  // Check range
1529  SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(),
1530                                  TLI.getSetCCResultType(Sub.getValueType()),
1531                                  Sub, DAG.getConstant(B.Range, VT),
1532                                  ISD::SETUGT);
1533
1534  SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
1535                                       TLI.getPointerTy());
1536
1537  B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
1538  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
1539                                    B.Reg, ShiftOp);
1540
1541  // Set NextBlock to be the MBB immediately after the current one, if any.
1542  // This is used to avoid emitting unnecessary branches to the next block.
1543  MachineBasicBlock *NextBlock = 0;
1544  MachineFunction::iterator BBI = SwitchBB;
1545  if (++BBI != FuncInfo.MF->end())
1546    NextBlock = BBI;
1547
1548  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
1549
1550  SwitchBB->addSuccessor(B.Default);
1551  SwitchBB->addSuccessor(MBB);
1552
1553  SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1554                                MVT::Other, CopyTo, RangeCmp,
1555                                DAG.getBasicBlock(B.Default));
1556
1557  if (MBB != NextBlock)
1558    BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo,
1559                          DAG.getBasicBlock(MBB));
1560
1561  DAG.setRoot(BrRange);
1562}
1563
1564/// visitBitTestCase - this function produces one "bit test"
1565void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
1566                                           unsigned Reg,
1567                                           BitTestCase &B,
1568                                           MachineBasicBlock *SwitchBB) {
1569  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
1570                                       TLI.getPointerTy());
1571  SDValue Cmp;
1572  if (CountPopulation_64(B.Mask) == 1) {
1573    // Testing for a single bit; just compare the shift count with what it
1574    // would need to be to shift a 1 bit in that position.
1575    Cmp = DAG.getSetCC(getCurDebugLoc(),
1576                       TLI.getSetCCResultType(ShiftOp.getValueType()),
1577                       ShiftOp,
1578                       DAG.getConstant(CountTrailingZeros_64(B.Mask),
1579                                       TLI.getPointerTy()),
1580                       ISD::SETEQ);
1581  } else {
1582    // Make desired shift
1583    SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
1584                                    TLI.getPointerTy(),
1585                                    DAG.getConstant(1, TLI.getPointerTy()),
1586                                    ShiftOp);
1587
1588    // Emit bit tests and jumps
1589    SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
1590                                TLI.getPointerTy(), SwitchVal,
1591                                DAG.getConstant(B.Mask, TLI.getPointerTy()));
1592    Cmp = DAG.getSetCC(getCurDebugLoc(),
1593                       TLI.getSetCCResultType(AndOp.getValueType()),
1594                       AndOp, DAG.getConstant(0, TLI.getPointerTy()),
1595                       ISD::SETNE);
1596  }
1597
1598  SwitchBB->addSuccessor(B.TargetBB);
1599  SwitchBB->addSuccessor(NextMBB);
1600
1601  SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
1602                              MVT::Other, getControlRoot(),
1603                              Cmp, DAG.getBasicBlock(B.TargetBB));
1604
1605  // Set NextBlock to be the MBB immediately after the current one, if any.
1606  // This is used to avoid emitting unnecessary branches to the next block.
1607  MachineBasicBlock *NextBlock = 0;
1608  MachineFunction::iterator BBI = SwitchBB;
1609  if (++BBI != FuncInfo.MF->end())
1610    NextBlock = BBI;
1611
1612  if (NextMBB != NextBlock)
1613    BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd,
1614                        DAG.getBasicBlock(NextMBB));
1615
1616  DAG.setRoot(BrAnd);
1617}
1618
1619void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
1620  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
1621
1622  // Retrieve successors.
1623  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
1624  MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)];
1625
1626  const Value *Callee(I.getCalledValue());
1627  if (isa<InlineAsm>(Callee))
1628    visitInlineAsm(&I);
1629  else
1630    LowerCallTo(&I, getValue(Callee), false, LandingPad);
1631
1632  // If the value of the invoke is used outside of its defining block, make it
1633  // available as a virtual register.
1634  CopyToExportRegsIfNeeded(&I);
1635
1636  // Update successor info
1637  InvokeMBB->addSuccessor(Return);
1638  InvokeMBB->addSuccessor(LandingPad);
1639
1640  // Drop into normal successor.
1641  DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
1642                          MVT::Other, getControlRoot(),
1643                          DAG.getBasicBlock(Return)));
1644}
1645
1646void SelectionDAGBuilder::visitUnwind(const UnwindInst &I) {
1647}
1648
1649/// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for
1650/// small case ranges).
1651bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
1652                                                 CaseRecVector& WorkList,
1653                                                 const Value* SV,
1654                                                 MachineBasicBlock *Default,
1655                                                 MachineBasicBlock *SwitchBB) {
1656  Case& BackCase  = *(CR.Range.second-1);
1657
1658  // Size is the number of Cases represented by this range.
1659  size_t Size = CR.Range.second - CR.Range.first;
1660  if (Size > 3)
1661    return false;
1662
1663  // Get the MachineFunction which holds the current MBB.  This is used when
1664  // inserting any additional MBBs necessary to represent the switch.
1665  MachineFunction *CurMF = FuncInfo.MF;
1666
1667  // Figure out which block is immediately after the current one.
1668  MachineBasicBlock *NextBlock = 0;
1669  MachineFunction::iterator BBI = CR.CaseBB;
1670
1671  if (++BBI != FuncInfo.MF->end())
1672    NextBlock = BBI;
1673
1674  // TODO: If any two of the cases has the same destination, and if one value
1675  // is the same as the other, but has one bit unset that the other has set,
1676  // use bit manipulation to do two compares at once.  For example:
1677  // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
1678
1679  // Rearrange the case blocks so that the last one falls through if possible.
1680  if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
1681    // The last case block won't fall through into 'NextBlock' if we emit the
1682    // branches in this order.  See if rearranging a case value would help.
1683    for (CaseItr I = CR.Range.first, E = CR.Range.second-1; I != E; ++I) {
1684      if (I->BB == NextBlock) {
1685        std::swap(*I, BackCase);
1686        break;
1687      }
1688    }
1689  }
1690
1691  // Create a CaseBlock record representing a conditional branch to
1692  // the Case's target mbb if the value being switched on SV is equal
1693  // to C.
1694  MachineBasicBlock *CurBlock = CR.CaseBB;
1695  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) {
1696    MachineBasicBlock *FallThrough;
1697    if (I != E-1) {
1698      FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock());
1699      CurMF->insert(BBI, FallThrough);
1700
1701      // Put SV in a virtual register to make it available from the new blocks.
1702      ExportFromCurrentBlock(SV);
1703    } else {
1704      // If the last case doesn't match, go to the default block.
1705      FallThrough = Default;
1706    }
1707
1708    const Value *RHS, *LHS, *MHS;
1709    ISD::CondCode CC;
1710    if (I->High == I->Low) {
1711      // This is just small small case range :) containing exactly 1 case
1712      CC = ISD::SETEQ;
1713      LHS = SV; RHS = I->High; MHS = NULL;
1714    } else {
1715      CC = ISD::SETLE;
1716      LHS = I->Low; MHS = SV; RHS = I->High;
1717    }
1718    CaseBlock CB(CC, LHS, RHS, MHS, I->BB, FallThrough, CurBlock);
1719
1720    // If emitting the first comparison, just call visitSwitchCase to emit the
1721    // code into the current block.  Otherwise, push the CaseBlock onto the
1722    // vector to be later processed by SDISel, and insert the node's MBB
1723    // before the next MBB.
1724    if (CurBlock == SwitchBB)
1725      visitSwitchCase(CB, SwitchBB);
1726    else
1727      SwitchCases.push_back(CB);
1728
1729    CurBlock = FallThrough;
1730  }
1731
1732  return true;
1733}
1734
1735static inline bool areJTsAllowed(const TargetLowering &TLI) {
1736  return !DisableJumpTables &&
1737          (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1738           TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other));
1739}
1740
1741static APInt ComputeRange(const APInt &First, const APInt &Last) {
1742  APInt LastExt(Last), FirstExt(First);
1743  uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
1744  LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
1745  return (LastExt - FirstExt + 1ULL);
1746}
1747
1748/// handleJTSwitchCase - Emit jumptable for current switch case range
1749bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
1750                                             CaseRecVector& WorkList,
1751                                             const Value* SV,
1752                                             MachineBasicBlock* Default,
1753                                             MachineBasicBlock *SwitchBB) {
1754  Case& FrontCase = *CR.Range.first;
1755  Case& BackCase  = *(CR.Range.second-1);
1756
1757  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
1758  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
1759
1760  APInt TSize(First.getBitWidth(), 0);
1761  for (CaseItr I = CR.Range.first, E = CR.Range.second;
1762       I!=E; ++I)
1763    TSize += I->size();
1764
1765  if (!areJTsAllowed(TLI) || TSize.ult(4))
1766    return false;
1767
1768  APInt Range = ComputeRange(First, Last);
1769  double Density = TSize.roundToDouble() / Range.roundToDouble();
1770  if (Density < 0.4)
1771    return false;
1772
1773  DEBUG(dbgs() << "Lowering jump table\n"
1774               << "First entry: " << First << ". Last entry: " << Last << '\n'
1775               << "Range: " << Range
1776               << "Size: " << TSize << ". Density: " << Density << "\n\n");
1777
1778  // Get the MachineFunction which holds the current MBB.  This is used when
1779  // inserting any additional MBBs necessary to represent the switch.
1780  MachineFunction *CurMF = FuncInfo.MF;
1781
1782  // Figure out which block is immediately after the current one.
1783  MachineFunction::iterator BBI = CR.CaseBB;
1784  ++BBI;
1785
1786  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1787
1788  // Create a new basic block to hold the code for loading the address
1789  // of the jump table, and jumping to it.  Update successor information;
1790  // we will either branch to the default case for the switch, or the jump
1791  // table.
1792  MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1793  CurMF->insert(BBI, JumpTableBB);
1794  CR.CaseBB->addSuccessor(Default);
1795  CR.CaseBB->addSuccessor(JumpTableBB);
1796
1797  // Build a vector of destination BBs, corresponding to each target
1798  // of the jump table. If the value of the jump table slot corresponds to
1799  // a case statement, push the case's BB onto the vector, otherwise, push
1800  // the default BB.
1801  std::vector<MachineBasicBlock*> DestBBs;
1802  APInt TEI = First;
1803  for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) {
1804    const APInt &Low = cast<ConstantInt>(I->Low)->getValue();
1805    const APInt &High = cast<ConstantInt>(I->High)->getValue();
1806
1807    if (Low.sle(TEI) && TEI.sle(High)) {
1808      DestBBs.push_back(I->BB);
1809      if (TEI==High)
1810        ++I;
1811    } else {
1812      DestBBs.push_back(Default);
1813    }
1814  }
1815
1816  // Update successor info. Add one edge to each unique successor.
1817  BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs());
1818  for (std::vector<MachineBasicBlock*>::iterator I = DestBBs.begin(),
1819         E = DestBBs.end(); I != E; ++I) {
1820    if (!SuccsHandled[(*I)->getNumber()]) {
1821      SuccsHandled[(*I)->getNumber()] = true;
1822      JumpTableBB->addSuccessor(*I);
1823    }
1824  }
1825
1826  // Create a jump table index for this jump table.
1827  unsigned JTEncoding = TLI.getJumpTableEncoding();
1828  unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding)
1829                       ->createJumpTableIndex(DestBBs);
1830
1831  // Set the jump table information so that we can codegen it as a second
1832  // MachineBasicBlock
1833  JumpTable JT(-1U, JTI, JumpTableBB, Default);
1834  JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB));
1835  if (CR.CaseBB == SwitchBB)
1836    visitJumpTableHeader(JT, JTH, SwitchBB);
1837
1838  JTCases.push_back(JumpTableBlock(JTH, JT));
1839
1840  return true;
1841}
1842
1843/// handleBTSplitSwitchCase - emit comparison and split binary search tree into
1844/// 2 subtrees.
1845bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR,
1846                                                  CaseRecVector& WorkList,
1847                                                  const Value* SV,
1848                                                  MachineBasicBlock *Default,
1849                                                  MachineBasicBlock *SwitchBB) {
1850  // Get the MachineFunction which holds the current MBB.  This is used when
1851  // inserting any additional MBBs necessary to represent the switch.
1852  MachineFunction *CurMF = FuncInfo.MF;
1853
1854  // Figure out which block is immediately after the current one.
1855  MachineFunction::iterator BBI = CR.CaseBB;
1856  ++BBI;
1857
1858  Case& FrontCase = *CR.Range.first;
1859  Case& BackCase  = *(CR.Range.second-1);
1860  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
1861
1862  // Size is the number of Cases represented by this range.
1863  unsigned Size = CR.Range.second - CR.Range.first;
1864
1865  const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
1866  const APInt &Last  = cast<ConstantInt>(BackCase.High)->getValue();
1867  double FMetric = 0;
1868  CaseItr Pivot = CR.Range.first + Size/2;
1869
1870  // Select optimal pivot, maximizing sum density of LHS and RHS. This will
1871  // (heuristically) allow us to emit JumpTable's later.
1872  APInt TSize(First.getBitWidth(), 0);
1873  for (CaseItr I = CR.Range.first, E = CR.Range.second;
1874       I!=E; ++I)
1875    TSize += I->size();
1876
1877  APInt LSize = FrontCase.size();
1878  APInt RSize = TSize-LSize;
1879  DEBUG(dbgs() << "Selecting best pivot: \n"
1880               << "First: " << First << ", Last: " << Last <<'\n'
1881               << "LSize: " << LSize << ", RSize: " << RSize << '\n');
1882  for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
1883       J!=E; ++I, ++J) {
1884    const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
1885    const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
1886    APInt Range = ComputeRange(LEnd, RBegin);
1887    assert((Range - 2ULL).isNonNegative() &&
1888           "Invalid case distance");
1889    double LDensity = (double)LSize.roundToDouble() /
1890                           (LEnd - First + 1ULL).roundToDouble();
1891    double RDensity = (double)RSize.roundToDouble() /
1892                           (Last - RBegin + 1ULL).roundToDouble();
1893    double Metric = Range.logBase2()*(LDensity+RDensity);
1894    // Should always split in some non-trivial place
1895    DEBUG(dbgs() <<"=>Step\n"
1896                 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n'
1897                 << "LDensity: " << LDensity
1898                 << ", RDensity: " << RDensity << '\n'
1899                 << "Metric: " << Metric << '\n');
1900    if (FMetric < Metric) {
1901      Pivot = J;
1902      FMetric = Metric;
1903      DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n');
1904    }
1905
1906    LSize += J->size();
1907    RSize -= J->size();
1908  }
1909  if (areJTsAllowed(TLI)) {
1910    // If our case is dense we *really* should handle it earlier!
1911    assert((FMetric > 0) && "Should handle dense range earlier!");
1912  } else {
1913    Pivot = CR.Range.first + Size/2;
1914  }
1915
1916  CaseRange LHSR(CR.Range.first, Pivot);
1917  CaseRange RHSR(Pivot, CR.Range.second);
1918  Constant *C = Pivot->Low;
1919  MachineBasicBlock *FalseBB = 0, *TrueBB = 0;
1920
1921  // We know that we branch to the LHS if the Value being switched on is
1922  // less than the Pivot value, C.  We use this to optimize our binary
1923  // tree a bit, by recognizing that if SV is greater than or equal to the
1924  // LHS's Case Value, and that Case Value is exactly one less than the
1925  // Pivot's Value, then we can branch directly to the LHS's Target,
1926  // rather than creating a leaf node for it.
1927  if ((LHSR.second - LHSR.first) == 1 &&
1928      LHSR.first->High == CR.GE &&
1929      cast<ConstantInt>(C)->getValue() ==
1930      (cast<ConstantInt>(CR.GE)->getValue() + 1LL)) {
1931    TrueBB = LHSR.first->BB;
1932  } else {
1933    TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1934    CurMF->insert(BBI, TrueBB);
1935    WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR));
1936
1937    // Put SV in a virtual register to make it available from the new blocks.
1938    ExportFromCurrentBlock(SV);
1939  }
1940
1941  // Similar to the optimization above, if the Value being switched on is
1942  // known to be less than the Constant CR.LT, and the current Case Value
1943  // is CR.LT - 1, then we can branch directly to the target block for
1944  // the current Case Value, rather than emitting a RHS leaf node for it.
1945  if ((RHSR.second - RHSR.first) == 1 && CR.LT &&
1946      cast<ConstantInt>(RHSR.first->Low)->getValue() ==
1947      (cast<ConstantInt>(CR.LT)->getValue() - 1LL)) {
1948    FalseBB = RHSR.first->BB;
1949  } else {
1950    FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
1951    CurMF->insert(BBI, FalseBB);
1952    WorkList.push_back(CaseRec(FalseBB,CR.LT,C,RHSR));
1953
1954    // Put SV in a virtual register to make it available from the new blocks.
1955    ExportFromCurrentBlock(SV);
1956  }
1957
1958  // Create a CaseBlock record representing a conditional branch to
1959  // the LHS node if the value being switched on SV is less than C.
1960  // Otherwise, branch to LHS.
1961  CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB);
1962
1963  if (CR.CaseBB == SwitchBB)
1964    visitSwitchCase(CB, SwitchBB);
1965  else
1966    SwitchCases.push_back(CB);
1967
1968  return true;
1969}
1970
1971/// handleBitTestsSwitchCase - if current case range has few destination and
1972/// range span less, than machine word bitwidth, encode case range into series
1973/// of masks and emit bit tests with these masks.
1974bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
1975                                                   CaseRecVector& WorkList,
1976                                                   const Value* SV,
1977                                                   MachineBasicBlock* Default,
1978                                                   MachineBasicBlock *SwitchBB){
1979  EVT PTy = TLI.getPointerTy();
1980  unsigned IntPtrBits = PTy.getSizeInBits();
1981
1982  Case& FrontCase = *CR.Range.first;
1983  Case& BackCase  = *(CR.Range.second-1);
1984
1985  // Get the MachineFunction which holds the current MBB.  This is used when
1986  // inserting any additional MBBs necessary to represent the switch.
1987  MachineFunction *CurMF = FuncInfo.MF;
1988
1989  // If target does not have legal shift left, do not emit bit tests at all.
1990  if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy()))
1991    return false;
1992
1993  size_t numCmps = 0;
1994  for (CaseItr I = CR.Range.first, E = CR.Range.second;
1995       I!=E; ++I) {
1996    // Single case counts one, case range - two.
1997    numCmps += (I->Low == I->High ? 1 : 2);
1998  }
1999
2000  // Count unique destinations
2001  SmallSet<MachineBasicBlock*, 4> Dests;
2002  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
2003    Dests.insert(I->BB);
2004    if (Dests.size() > 3)
2005      // Don't bother the code below, if there are too much unique destinations
2006      return false;
2007  }
2008  DEBUG(dbgs() << "Total number of unique destinations: "
2009        << Dests.size() << '\n'
2010        << "Total number of comparisons: " << numCmps << '\n');
2011
2012  // Compute span of values.
2013  const APInt& minValue = cast<ConstantInt>(FrontCase.Low)->getValue();
2014  const APInt& maxValue = cast<ConstantInt>(BackCase.High)->getValue();
2015  APInt cmpRange = maxValue - minValue;
2016
2017  DEBUG(dbgs() << "Compare range: " << cmpRange << '\n'
2018               << "Low bound: " << minValue << '\n'
2019               << "High bound: " << maxValue << '\n');
2020
2021  if (cmpRange.uge(IntPtrBits) ||
2022      (!(Dests.size() == 1 && numCmps >= 3) &&
2023       !(Dests.size() == 2 && numCmps >= 5) &&
2024       !(Dests.size() >= 3 && numCmps >= 6)))
2025    return false;
2026
2027  DEBUG(dbgs() << "Emitting bit tests\n");
2028  APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth());
2029
2030  // Optimize the case where all the case values fit in a
2031  // word without having to subtract minValue. In this case,
2032  // we can optimize away the subtraction.
2033  if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) {
2034    cmpRange = maxValue;
2035  } else {
2036    lowBound = minValue;
2037  }
2038
2039  CaseBitsVector CasesBits;
2040  unsigned i, count = 0;
2041
2042  for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) {
2043    MachineBasicBlock* Dest = I->BB;
2044    for (i = 0; i < count; ++i)
2045      if (Dest == CasesBits[i].BB)
2046        break;
2047
2048    if (i == count) {
2049      assert((count < 3) && "Too much destinations to test!");
2050      CasesBits.push_back(CaseBits(0, Dest, 0));
2051      count++;
2052    }
2053
2054    const APInt& lowValue = cast<ConstantInt>(I->Low)->getValue();
2055    const APInt& highValue = cast<ConstantInt>(I->High)->getValue();
2056
2057    uint64_t lo = (lowValue - lowBound).getZExtValue();
2058    uint64_t hi = (highValue - lowBound).getZExtValue();
2059
2060    for (uint64_t j = lo; j <= hi; j++) {
2061      CasesBits[i].Mask |=  1ULL << j;
2062      CasesBits[i].Bits++;
2063    }
2064
2065  }
2066  std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp());
2067
2068  BitTestInfo BTC;
2069
2070  // Figure out which block is immediately after the current one.
2071  MachineFunction::iterator BBI = CR.CaseBB;
2072  ++BBI;
2073
2074  const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock();
2075
2076  DEBUG(dbgs() << "Cases:\n");
2077  for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) {
2078    DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask
2079                 << ", Bits: " << CasesBits[i].Bits
2080                 << ", BB: " << CasesBits[i].BB << '\n');
2081
2082    MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB);
2083    CurMF->insert(BBI, CaseBB);
2084    BTC.push_back(BitTestCase(CasesBits[i].Mask,
2085                              CaseBB,
2086                              CasesBits[i].BB));
2087
2088    // Put SV in a virtual register to make it available from the new blocks.
2089    ExportFromCurrentBlock(SV);
2090  }
2091
2092  BitTestBlock BTB(lowBound, cmpRange, SV,
2093                   -1U, (CR.CaseBB == SwitchBB),
2094                   CR.CaseBB, Default, BTC);
2095
2096  if (CR.CaseBB == SwitchBB)
2097    visitBitTestHeader(BTB, SwitchBB);
2098
2099  BitTestCases.push_back(BTB);
2100
2101  return true;
2102}
2103
2104/// Clusterify - Transform simple list of Cases into list of CaseRange's
2105size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
2106                                       const SwitchInst& SI) {
2107  size_t numCmps = 0;
2108
2109  // Start with "simple" cases
2110  for (size_t i = 1; i < SI.getNumSuccessors(); ++i) {
2111    MachineBasicBlock *SMBB = FuncInfo.MBBMap[SI.getSuccessor(i)];
2112    Cases.push_back(Case(SI.getSuccessorValue(i),
2113                         SI.getSuccessorValue(i),
2114                         SMBB));
2115  }
2116  std::sort(Cases.begin(), Cases.end(), CaseCmp());
2117
2118  // Merge case into clusters
2119  if (Cases.size() >= 2)
2120    // Must recompute end() each iteration because it may be
2121    // invalidated by erase if we hold on to it
2122    for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
2123      const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
2124      const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
2125      MachineBasicBlock* nextBB = J->BB;
2126      MachineBasicBlock* currentBB = I->BB;
2127
2128      // If the two neighboring cases go to the same destination, merge them
2129      // into a single case.
2130      if ((nextValue - currentValue == 1) && (currentBB == nextBB)) {
2131        I->High = J->High;
2132        J = Cases.erase(J);
2133      } else {
2134        I = J++;
2135      }
2136    }
2137
2138  for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) {
2139    if (I->Low != I->High)
2140      // A range counts double, since it requires two compares.
2141      ++numCmps;
2142  }
2143
2144  return numCmps;
2145}
2146
2147void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
2148  MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
2149
2150  // Figure out which block is immediately after the current one.
2151  MachineBasicBlock *NextBlock = 0;
2152  MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()];
2153
2154  // If there is only the default destination, branch to it if it is not the
2155  // next basic block.  Otherwise, just fall through.
2156  if (SI.getNumOperands() == 2) {
2157    // Update machine-CFG edges.
2158
2159    // If this is not a fall-through branch, emit the branch.
2160    SwitchMBB->addSuccessor(Default);
2161    if (Default != NextBlock)
2162      DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(),
2163                              MVT::Other, getControlRoot(),
2164                              DAG.getBasicBlock(Default)));
2165
2166    return;
2167  }
2168
2169  // If there are any non-default case statements, create a vector of Cases
2170  // representing each one, and sort the vector so that we can efficiently
2171  // create a binary search tree from them.
2172  CaseVector Cases;
2173  size_t numCmps = Clusterify(Cases, SI);
2174  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size()
2175               << ". Total compares: " << numCmps << '\n');
2176  numCmps = 0;
2177
2178  // Get the Value to be switched on and default basic blocks, which will be
2179  // inserted into CaseBlock records, representing basic blocks in the binary
2180  // search tree.
2181  const Value *SV = SI.getOperand(0);
2182
2183  // Push the initial CaseRec onto the worklist
2184  CaseRecVector WorkList;
2185  WorkList.push_back(CaseRec(SwitchMBB,0,0,
2186                             CaseRange(Cases.begin(),Cases.end())));
2187
2188  while (!WorkList.empty()) {
2189    // Grab a record representing a case range to process off the worklist
2190    CaseRec CR = WorkList.back();
2191    WorkList.pop_back();
2192
2193    if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2194      continue;
2195
2196    // If the range has few cases (two or less) emit a series of specific
2197    // tests.
2198    if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB))
2199      continue;
2200
2201    // If the switch has more than 5 blocks, and at least 40% dense, and the
2202    // target supports indirect branches, then emit a jump table rather than
2203    // lowering the switch to a binary tree of conditional branches.
2204    if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB))
2205      continue;
2206
2207    // Emit binary tree. We need to pick a pivot, and push left and right ranges
2208    // onto the worklist. Leafs are handled via handleSmallSwitchRange() call.
2209    handleBTSplitSwitchCase(CR, WorkList, SV, Default, SwitchMBB);
2210  }
2211}
2212
2213void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2214  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2215
2216  // Update machine-CFG edges with unique successors.
2217  SmallVector<BasicBlock*, 32> succs;
2218  succs.reserve(I.getNumSuccessors());
2219  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i)
2220    succs.push_back(I.getSuccessor(i));
2221  array_pod_sort(succs.begin(), succs.end());
2222  succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
2223  for (unsigned i = 0, e = succs.size(); i != e; ++i)
2224    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
2225
2226  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
2227                          MVT::Other, getControlRoot(),
2228                          getValue(I.getAddress())));
2229}
2230
2231void SelectionDAGBuilder::visitFSub(const User &I) {
2232  // -0.0 - X --> fneg
2233  const Type *Ty = I.getType();
2234  if (Ty->isVectorTy()) {
2235    if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
2236      const VectorType *DestTy = cast<VectorType>(I.getType());
2237      const Type *ElTy = DestTy->getElementType();
2238      unsigned VL = DestTy->getNumElements();
2239      std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
2240      Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
2241      if (CV == CNZ) {
2242        SDValue Op2 = getValue(I.getOperand(1));
2243        setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
2244                                 Op2.getValueType(), Op2));
2245        return;
2246      }
2247    }
2248  }
2249
2250  if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
2251    if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
2252      SDValue Op2 = getValue(I.getOperand(1));
2253      setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
2254                               Op2.getValueType(), Op2));
2255      return;
2256    }
2257
2258  visitBinary(I, ISD::FSUB);
2259}
2260
2261void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
2262  SDValue Op1 = getValue(I.getOperand(0));
2263  SDValue Op2 = getValue(I.getOperand(1));
2264  setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(),
2265                           Op1.getValueType(), Op1, Op2));
2266}
2267
2268void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2269  SDValue Op1 = getValue(I.getOperand(0));
2270  SDValue Op2 = getValue(I.getOperand(1));
2271  if (!I.getType()->isVectorTy() &&
2272      Op2.getValueType() != TLI.getShiftAmountTy()) {
2273    // If the operand is smaller than the shift count type, promote it.
2274    EVT PTy = TLI.getPointerTy();
2275    EVT STy = TLI.getShiftAmountTy();
2276    if (STy.bitsGT(Op2.getValueType()))
2277      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
2278                        TLI.getShiftAmountTy(), Op2);
2279    // If the operand is larger than the shift count type but the shift
2280    // count type has enough bits to represent any shift value, truncate
2281    // it now. This is a common case and it exposes the truncate to
2282    // optimization early.
2283    else if (STy.getSizeInBits() >=
2284             Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
2285      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2286                        TLI.getShiftAmountTy(), Op2);
2287    // Otherwise we'll need to temporarily settle for some other
2288    // convenient type; type legalization will make adjustments as
2289    // needed.
2290    else if (PTy.bitsLT(Op2.getValueType()))
2291      Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2292                        TLI.getPointerTy(), Op2);
2293    else if (PTy.bitsGT(Op2.getValueType()))
2294      Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
2295                        TLI.getPointerTy(), Op2);
2296  }
2297
2298  setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
2299                           Op1.getValueType(), Op1, Op2));
2300}
2301
2302void SelectionDAGBuilder::visitICmp(const User &I) {
2303  ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
2304  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2305    predicate = IC->getPredicate();
2306  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2307    predicate = ICmpInst::Predicate(IC->getPredicate());
2308  SDValue Op1 = getValue(I.getOperand(0));
2309  SDValue Op2 = getValue(I.getOperand(1));
2310  ISD::CondCode Opcode = getICmpCondCode(predicate);
2311
2312  EVT DestVT = TLI.getValueType(I.getType());
2313  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode));
2314}
2315
2316void SelectionDAGBuilder::visitFCmp(const User &I) {
2317  FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE;
2318  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2319    predicate = FC->getPredicate();
2320  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2321    predicate = FCmpInst::Predicate(FC->getPredicate());
2322  SDValue Op1 = getValue(I.getOperand(0));
2323  SDValue Op2 = getValue(I.getOperand(1));
2324  ISD::CondCode Condition = getFCmpCondCode(predicate);
2325  EVT DestVT = TLI.getValueType(I.getType());
2326  setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition));
2327}
2328
2329void SelectionDAGBuilder::visitSelect(const User &I) {
2330  SmallVector<EVT, 4> ValueVTs;
2331  ComputeValueVTs(TLI, I.getType(), ValueVTs);
2332  unsigned NumValues = ValueVTs.size();
2333  if (NumValues == 0) return;
2334
2335  SmallVector<SDValue, 4> Values(NumValues);
2336  SDValue Cond     = getValue(I.getOperand(0));
2337  SDValue TrueVal  = getValue(I.getOperand(1));
2338  SDValue FalseVal = getValue(I.getOperand(2));
2339
2340  for (unsigned i = 0; i != NumValues; ++i)
2341    Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
2342                          TrueVal.getNode()->getValueType(TrueVal.getResNo()+i),
2343                            Cond,
2344                            SDValue(TrueVal.getNode(),
2345                                    TrueVal.getResNo() + i),
2346                            SDValue(FalseVal.getNode(),
2347                                    FalseVal.getResNo() + i));
2348
2349  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2350                           DAG.getVTList(&ValueVTs[0], NumValues),
2351                           &Values[0], NumValues));
2352}
2353
2354void SelectionDAGBuilder::visitTrunc(const User &I) {
2355  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2356  SDValue N = getValue(I.getOperand(0));
2357  EVT DestVT = TLI.getValueType(I.getType());
2358  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N));
2359}
2360
2361void SelectionDAGBuilder::visitZExt(const User &I) {
2362  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2363  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2364  SDValue N = getValue(I.getOperand(0));
2365  EVT DestVT = TLI.getValueType(I.getType());
2366  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N));
2367}
2368
2369void SelectionDAGBuilder::visitSExt(const User &I) {
2370  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2371  // SExt also can't be a cast to bool for same reason. So, nothing much to do
2372  SDValue N = getValue(I.getOperand(0));
2373  EVT DestVT = TLI.getValueType(I.getType());
2374  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N));
2375}
2376
2377void SelectionDAGBuilder::visitFPTrunc(const User &I) {
2378  // FPTrunc is never a no-op cast, no need to check
2379  SDValue N = getValue(I.getOperand(0));
2380  EVT DestVT = TLI.getValueType(I.getType());
2381  setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(),
2382                           DestVT, N, DAG.getIntPtrConstant(0)));
2383}
2384
2385void SelectionDAGBuilder::visitFPExt(const User &I){
2386  // FPTrunc is never a no-op cast, no need to check
2387  SDValue N = getValue(I.getOperand(0));
2388  EVT DestVT = TLI.getValueType(I.getType());
2389  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N));
2390}
2391
2392void SelectionDAGBuilder::visitFPToUI(const User &I) {
2393  // FPToUI is never a no-op cast, no need to check
2394  SDValue N = getValue(I.getOperand(0));
2395  EVT DestVT = TLI.getValueType(I.getType());
2396  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N));
2397}
2398
2399void SelectionDAGBuilder::visitFPToSI(const User &I) {
2400  // FPToSI is never a no-op cast, no need to check
2401  SDValue N = getValue(I.getOperand(0));
2402  EVT DestVT = TLI.getValueType(I.getType());
2403  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N));
2404}
2405
2406void SelectionDAGBuilder::visitUIToFP(const User &I) {
2407  // UIToFP is never a no-op cast, no need to check
2408  SDValue N = getValue(I.getOperand(0));
2409  EVT DestVT = TLI.getValueType(I.getType());
2410  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N));
2411}
2412
2413void SelectionDAGBuilder::visitSIToFP(const User &I){
2414  // SIToFP is never a no-op cast, no need to check
2415  SDValue N = getValue(I.getOperand(0));
2416  EVT DestVT = TLI.getValueType(I.getType());
2417  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N));
2418}
2419
2420void SelectionDAGBuilder::visitPtrToInt(const User &I) {
2421  // What to do depends on the size of the integer and the size of the pointer.
2422  // We can either truncate, zero extend, or no-op, accordingly.
2423  SDValue N = getValue(I.getOperand(0));
2424  EVT DestVT = TLI.getValueType(I.getType());
2425  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
2426}
2427
2428void SelectionDAGBuilder::visitIntToPtr(const User &I) {
2429  // What to do depends on the size of the integer and the size of the pointer.
2430  // We can either truncate, zero extend, or no-op, accordingly.
2431  SDValue N = getValue(I.getOperand(0));
2432  EVT DestVT = TLI.getValueType(I.getType());
2433  setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT));
2434}
2435
2436void SelectionDAGBuilder::visitBitCast(const User &I) {
2437  SDValue N = getValue(I.getOperand(0));
2438  EVT DestVT = TLI.getValueType(I.getType());
2439
2440  // BitCast assures us that source and destination are the same size so this is
2441  // either a BIT_CONVERT or a no-op.
2442  if (DestVT != N.getValueType())
2443    setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
2444                             DestVT, N)); // convert types.
2445  else
2446    setValue(&I, N);            // noop cast.
2447}
2448
2449void SelectionDAGBuilder::visitInsertElement(const User &I) {
2450  SDValue InVec = getValue(I.getOperand(0));
2451  SDValue InVal = getValue(I.getOperand(1));
2452  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2453                              TLI.getPointerTy(),
2454                              getValue(I.getOperand(2)));
2455  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(),
2456                           TLI.getValueType(I.getType()),
2457                           InVec, InVal, InIdx));
2458}
2459
2460void SelectionDAGBuilder::visitExtractElement(const User &I) {
2461  SDValue InVec = getValue(I.getOperand(0));
2462  SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(),
2463                              TLI.getPointerTy(),
2464                              getValue(I.getOperand(1)));
2465  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2466                           TLI.getValueType(I.getType()), InVec, InIdx));
2467}
2468
2469// Utility for visitShuffleVector - Returns true if the mask is mask starting
2470// from SIndx and increasing to the element length (undefs are allowed).
2471static bool SequentialMask(SmallVectorImpl<int> &Mask, unsigned SIndx) {
2472  unsigned MaskNumElts = Mask.size();
2473  for (unsigned i = 0; i != MaskNumElts; ++i)
2474    if ((Mask[i] >= 0) && (Mask[i] != (int)(i + SIndx)))
2475      return false;
2476  return true;
2477}
2478
2479void SelectionDAGBuilder::visitShuffleVector(const User &I) {
2480  SmallVector<int, 8> Mask;
2481  SDValue Src1 = getValue(I.getOperand(0));
2482  SDValue Src2 = getValue(I.getOperand(1));
2483
2484  // Convert the ConstantVector mask operand into an array of ints, with -1
2485  // representing undef values.
2486  SmallVector<Constant*, 8> MaskElts;
2487  cast<Constant>(I.getOperand(2))->getVectorElements(MaskElts);
2488  unsigned MaskNumElts = MaskElts.size();
2489  for (unsigned i = 0; i != MaskNumElts; ++i) {
2490    if (isa<UndefValue>(MaskElts[i]))
2491      Mask.push_back(-1);
2492    else
2493      Mask.push_back(cast<ConstantInt>(MaskElts[i])->getSExtValue());
2494  }
2495
2496  EVT VT = TLI.getValueType(I.getType());
2497  EVT SrcVT = Src1.getValueType();
2498  unsigned SrcNumElts = SrcVT.getVectorNumElements();
2499
2500  if (SrcNumElts == MaskNumElts) {
2501    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2502                                      &Mask[0]));
2503    return;
2504  }
2505
2506  // Normalize the shuffle vector since mask and vector length don't match.
2507  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
2508    // Mask is longer than the source vectors and is a multiple of the source
2509    // vectors.  We can use concatenate vector to make the mask and vectors
2510    // lengths match.
2511    if (SrcNumElts*2 == MaskNumElts && SequentialMask(Mask, 0)) {
2512      // The shuffle is concatenating two vectors together.
2513      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(),
2514                               VT, Src1, Src2));
2515      return;
2516    }
2517
2518    // Pad both vectors with undefs to make them the same length as the mask.
2519    unsigned NumConcat = MaskNumElts / SrcNumElts;
2520    bool Src1U = Src1.getOpcode() == ISD::UNDEF;
2521    bool Src2U = Src2.getOpcode() == ISD::UNDEF;
2522    SDValue UndefVal = DAG.getUNDEF(SrcVT);
2523
2524    SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
2525    SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
2526    MOps1[0] = Src1;
2527    MOps2[0] = Src2;
2528
2529    Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2530                                                  getCurDebugLoc(), VT,
2531                                                  &MOps1[0], NumConcat);
2532    Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS,
2533                                                  getCurDebugLoc(), VT,
2534                                                  &MOps2[0], NumConcat);
2535
2536    // Readjust mask for new input vector length.
2537    SmallVector<int, 8> MappedOps;
2538    for (unsigned i = 0; i != MaskNumElts; ++i) {
2539      int Idx = Mask[i];
2540      if (Idx < (int)SrcNumElts)
2541        MappedOps.push_back(Idx);
2542      else
2543        MappedOps.push_back(Idx + MaskNumElts - SrcNumElts);
2544    }
2545
2546    setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2547                                      &MappedOps[0]));
2548    return;
2549  }
2550
2551  if (SrcNumElts > MaskNumElts) {
2552    // Analyze the access pattern of the vector to see if we can extract
2553    // two subvectors and do the shuffle. The analysis is done by calculating
2554    // the range of elements the mask access on both vectors.
2555    int MinRange[2] = { SrcNumElts+1, SrcNumElts+1};
2556    int MaxRange[2] = {-1, -1};
2557
2558    for (unsigned i = 0; i != MaskNumElts; ++i) {
2559      int Idx = Mask[i];
2560      int Input = 0;
2561      if (Idx < 0)
2562        continue;
2563
2564      if (Idx >= (int)SrcNumElts) {
2565        Input = 1;
2566        Idx -= SrcNumElts;
2567      }
2568      if (Idx > MaxRange[Input])
2569        MaxRange[Input] = Idx;
2570      if (Idx < MinRange[Input])
2571        MinRange[Input] = Idx;
2572    }
2573
2574    // Check if the access is smaller than the vector size and can we find
2575    // a reasonable extract index.
2576    int RangeUse[2] = { 2, 2 };  // 0 = Unused, 1 = Extract, 2 = Can not
2577                                 // Extract.
2578    int StartIdx[2];  // StartIdx to extract from
2579    for (int Input=0; Input < 2; ++Input) {
2580      if (MinRange[Input] == (int)(SrcNumElts+1) && MaxRange[Input] == -1) {
2581        RangeUse[Input] = 0; // Unused
2582        StartIdx[Input] = 0;
2583      } else if (MaxRange[Input] - MinRange[Input] < (int)MaskNumElts) {
2584        // Fits within range but we should see if we can find a good
2585        // start index that is a multiple of the mask length.
2586        if (MaxRange[Input] < (int)MaskNumElts) {
2587          RangeUse[Input] = 1; // Extract from beginning of the vector
2588          StartIdx[Input] = 0;
2589        } else {
2590          StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
2591          if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
2592              StartIdx[Input] + MaskNumElts < SrcNumElts)
2593            RangeUse[Input] = 1; // Extract from a multiple of the mask length.
2594        }
2595      }
2596    }
2597
2598    if (RangeUse[0] == 0 && RangeUse[1] == 0) {
2599      setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
2600      return;
2601    }
2602    else if (RangeUse[0] < 2 && RangeUse[1] < 2) {
2603      // Extract appropriate subvector and generate a vector shuffle
2604      for (int Input=0; Input < 2; ++Input) {
2605        SDValue &Src = Input == 0 ? Src1 : Src2;
2606        if (RangeUse[Input] == 0)
2607          Src = DAG.getUNDEF(VT);
2608        else
2609          Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT,
2610                            Src, DAG.getIntPtrConstant(StartIdx[Input]));
2611      }
2612
2613      // Calculate new mask.
2614      SmallVector<int, 8> MappedOps;
2615      for (unsigned i = 0; i != MaskNumElts; ++i) {
2616        int Idx = Mask[i];
2617        if (Idx < 0)
2618          MappedOps.push_back(Idx);
2619        else if (Idx < (int)SrcNumElts)
2620          MappedOps.push_back(Idx - StartIdx[0]);
2621        else
2622          MappedOps.push_back(Idx - SrcNumElts - StartIdx[1] + MaskNumElts);
2623      }
2624
2625      setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2,
2626                                        &MappedOps[0]));
2627      return;
2628    }
2629  }
2630
2631  // We can't use either concat vectors or extract subvectors so fall back to
2632  // replacing the shuffle with extract and build vector.
2633  // to insert and build vector.
2634  EVT EltVT = VT.getVectorElementType();
2635  EVT PtrVT = TLI.getPointerTy();
2636  SmallVector<SDValue,8> Ops;
2637  for (unsigned i = 0; i != MaskNumElts; ++i) {
2638    if (Mask[i] < 0) {
2639      Ops.push_back(DAG.getUNDEF(EltVT));
2640    } else {
2641      int Idx = Mask[i];
2642      SDValue Res;
2643
2644      if (Idx < (int)SrcNumElts)
2645        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2646                          EltVT, Src1, DAG.getConstant(Idx, PtrVT));
2647      else
2648        Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(),
2649                          EltVT, Src2,
2650                          DAG.getConstant(Idx - SrcNumElts, PtrVT));
2651
2652      Ops.push_back(Res);
2653    }
2654  }
2655
2656  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(),
2657                           VT, &Ops[0], Ops.size()));
2658}
2659
2660void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
2661  const Value *Op0 = I.getOperand(0);
2662  const Value *Op1 = I.getOperand(1);
2663  const Type *AggTy = I.getType();
2664  const Type *ValTy = Op1->getType();
2665  bool IntoUndef = isa<UndefValue>(Op0);
2666  bool FromUndef = isa<UndefValue>(Op1);
2667
2668  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
2669                                            I.idx_begin(), I.idx_end());
2670
2671  SmallVector<EVT, 4> AggValueVTs;
2672  ComputeValueVTs(TLI, AggTy, AggValueVTs);
2673  SmallVector<EVT, 4> ValValueVTs;
2674  ComputeValueVTs(TLI, ValTy, ValValueVTs);
2675
2676  unsigned NumAggValues = AggValueVTs.size();
2677  unsigned NumValValues = ValValueVTs.size();
2678  SmallVector<SDValue, 4> Values(NumAggValues);
2679
2680  SDValue Agg = getValue(Op0);
2681  SDValue Val = getValue(Op1);
2682  unsigned i = 0;
2683  // Copy the beginning value(s) from the original aggregate.
2684  for (; i != LinearIndex; ++i)
2685    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2686                SDValue(Agg.getNode(), Agg.getResNo() + i);
2687  // Copy values from the inserted value(s).
2688  for (; i != LinearIndex + NumValValues; ++i)
2689    Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2690                SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
2691  // Copy remaining value(s) from the original aggregate.
2692  for (; i != NumAggValues; ++i)
2693    Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
2694                SDValue(Agg.getNode(), Agg.getResNo() + i);
2695
2696  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2697                           DAG.getVTList(&AggValueVTs[0], NumAggValues),
2698                           &Values[0], NumAggValues));
2699}
2700
2701void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
2702  const Value *Op0 = I.getOperand(0);
2703  const Type *AggTy = Op0->getType();
2704  const Type *ValTy = I.getType();
2705  bool OutOfUndef = isa<UndefValue>(Op0);
2706
2707  unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
2708                                            I.idx_begin(), I.idx_end());
2709
2710  SmallVector<EVT, 4> ValValueVTs;
2711  ComputeValueVTs(TLI, ValTy, ValValueVTs);
2712
2713  unsigned NumValValues = ValValueVTs.size();
2714  SmallVector<SDValue, 4> Values(NumValValues);
2715
2716  SDValue Agg = getValue(Op0);
2717  // Copy out the selected value(s).
2718  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
2719    Values[i - LinearIndex] =
2720      OutOfUndef ?
2721        DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
2722        SDValue(Agg.getNode(), Agg.getResNo() + i);
2723
2724  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2725                           DAG.getVTList(&ValValueVTs[0], NumValValues),
2726                           &Values[0], NumValValues));
2727}
2728
2729void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
2730  SDValue N = getValue(I.getOperand(0));
2731  const Type *Ty = I.getOperand(0)->getType();
2732
2733  for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end();
2734       OI != E; ++OI) {
2735    const Value *Idx = *OI;
2736    if (const StructType *StTy = dyn_cast<StructType>(Ty)) {
2737      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
2738      if (Field) {
2739        // N = N + Offset
2740        uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field);
2741        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
2742                        DAG.getIntPtrConstant(Offset));
2743      }
2744
2745      Ty = StTy->getElementType(Field);
2746    } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) {
2747      unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
2748
2749      // Offset canonically 0 for unions, but type changes
2750      Ty = UnTy->getElementType(Field);
2751    } else {
2752      Ty = cast<SequentialType>(Ty)->getElementType();
2753
2754      // If this is a constant subscript, handle it quickly.
2755      if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
2756        if (CI->isZero()) continue;
2757        uint64_t Offs =
2758            TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
2759        SDValue OffsVal;
2760        EVT PTy = TLI.getPointerTy();
2761        unsigned PtrBits = PTy.getSizeInBits();
2762        if (PtrBits < 64)
2763          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
2764                                TLI.getPointerTy(),
2765                                DAG.getConstant(Offs, MVT::i64));
2766        else
2767          OffsVal = DAG.getIntPtrConstant(Offs);
2768
2769        N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N,
2770                        OffsVal);
2771        continue;
2772      }
2773
2774      // N = N + Idx * ElementSize;
2775      APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(),
2776                                TD->getTypeAllocSize(Ty));
2777      SDValue IdxN = getValue(Idx);
2778
2779      // If the index is smaller or larger than intptr_t, truncate or extend
2780      // it.
2781      IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType());
2782
2783      // If this is a multiply by a power of two, turn it into a shl
2784      // immediately.  This is a very common case.
2785      if (ElementSize != 1) {
2786        if (ElementSize.isPowerOf2()) {
2787          unsigned Amt = ElementSize.logBase2();
2788          IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(),
2789                             N.getValueType(), IdxN,
2790                             DAG.getConstant(Amt, TLI.getPointerTy()));
2791        } else {
2792          SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy());
2793          IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(),
2794                             N.getValueType(), IdxN, Scale);
2795        }
2796      }
2797
2798      N = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2799                      N.getValueType(), N, IdxN);
2800    }
2801  }
2802
2803  setValue(&I, N);
2804}
2805
2806void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
2807  // If this is a fixed sized alloca in the entry block of the function,
2808  // allocate it statically on the stack.
2809  if (FuncInfo.StaticAllocaMap.count(&I))
2810    return;   // getValue will auto-populate this.
2811
2812  const Type *Ty = I.getAllocatedType();
2813  uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
2814  unsigned Align =
2815    std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty),
2816             I.getAlignment());
2817
2818  SDValue AllocSize = getValue(I.getArraySize());
2819
2820  EVT IntPtr = TLI.getPointerTy();
2821  if (AllocSize.getValueType() != IntPtr)
2822    AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr);
2823
2824  AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr,
2825                          AllocSize,
2826                          DAG.getConstant(TySize, IntPtr));
2827
2828  // Handle alignment.  If the requested alignment is less than or equal to
2829  // the stack alignment, ignore it.  If the size is greater than or equal to
2830  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
2831  unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
2832  if (Align <= StackAlign)
2833    Align = 0;
2834
2835  // Round the size of the allocation up to the stack alignment size
2836  // by add SA-1 to the size.
2837  AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2838                          AllocSize.getValueType(), AllocSize,
2839                          DAG.getIntPtrConstant(StackAlign-1));
2840
2841  // Mask out the low bits for alignment purposes.
2842  AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(),
2843                          AllocSize.getValueType(), AllocSize,
2844                          DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1)));
2845
2846  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) };
2847  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
2848  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(),
2849                            VTs, Ops, 3);
2850  setValue(&I, DSA);
2851  DAG.setRoot(DSA.getValue(1));
2852
2853  // Inform the Frame Information that we have just allocated a variable-sized
2854  // object.
2855  FuncInfo.MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1);
2856}
2857
2858void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
2859  const Value *SV = I.getOperand(0);
2860  SDValue Ptr = getValue(SV);
2861
2862  const Type *Ty = I.getType();
2863
2864  bool isVolatile = I.isVolatile();
2865  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
2866  unsigned Alignment = I.getAlignment();
2867
2868  SmallVector<EVT, 4> ValueVTs;
2869  SmallVector<uint64_t, 4> Offsets;
2870  ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets);
2871  unsigned NumValues = ValueVTs.size();
2872  if (NumValues == 0)
2873    return;
2874
2875  SDValue Root;
2876  bool ConstantMemory = false;
2877  if (I.isVolatile())
2878    // Serialize volatile loads with other side effects.
2879    Root = getRoot();
2880  else if (AA->pointsToConstantMemory(SV)) {
2881    // Do not serialize (non-volatile) loads of constant memory with anything.
2882    Root = DAG.getEntryNode();
2883    ConstantMemory = true;
2884  } else {
2885    // Do not serialize non-volatile loads against each other.
2886    Root = DAG.getRoot();
2887  }
2888
2889  SmallVector<SDValue, 4> Values(NumValues);
2890  SmallVector<SDValue, 4> Chains(NumValues);
2891  EVT PtrVT = Ptr.getValueType();
2892  for (unsigned i = 0; i != NumValues; ++i) {
2893    SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
2894                            PtrVT, Ptr,
2895                            DAG.getConstant(Offsets[i], PtrVT));
2896    SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
2897                            A, SV, Offsets[i], isVolatile,
2898                            isNonTemporal, Alignment);
2899
2900    Values[i] = L;
2901    Chains[i] = L.getValue(1);
2902  }
2903
2904  if (!ConstantMemory) {
2905    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
2906                                MVT::Other, &Chains[0], NumValues);
2907    if (isVolatile)
2908      DAG.setRoot(Chain);
2909    else
2910      PendingLoads.push_back(Chain);
2911  }
2912
2913  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
2914                           DAG.getVTList(&ValueVTs[0], NumValues),
2915                           &Values[0], NumValues));
2916}
2917
2918void SelectionDAGBuilder::visitStore(const StoreInst &I) {
2919  const Value *SrcV = I.getOperand(0);
2920  const Value *PtrV = I.getOperand(1);
2921
2922  SmallVector<EVT, 4> ValueVTs;
2923  SmallVector<uint64_t, 4> Offsets;
2924  ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets);
2925  unsigned NumValues = ValueVTs.size();
2926  if (NumValues == 0)
2927    return;
2928
2929  // Get the lowered operands. Note that we do this after
2930  // checking if NumResults is zero, because with zero results
2931  // the operands won't have values in the map.
2932  SDValue Src = getValue(SrcV);
2933  SDValue Ptr = getValue(PtrV);
2934
2935  SDValue Root = getRoot();
2936  SmallVector<SDValue, 4> Chains(NumValues);
2937  EVT PtrVT = Ptr.getValueType();
2938  bool isVolatile = I.isVolatile();
2939  bool isNonTemporal = I.getMetadata("nontemporal") != 0;
2940  unsigned Alignment = I.getAlignment();
2941
2942  for (unsigned i = 0; i != NumValues; ++i) {
2943    SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
2944                              DAG.getConstant(Offsets[i], PtrVT));
2945    Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
2946                             SDValue(Src.getNode(), Src.getResNo() + i),
2947                             Add, PtrV, Offsets[i], isVolatile,
2948                             isNonTemporal, Alignment);
2949  }
2950
2951  DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
2952                          MVT::Other, &Chains[0], NumValues));
2953}
2954
2955/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
2956/// node.
2957void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
2958                                               unsigned Intrinsic) {
2959  bool HasChain = !I.doesNotAccessMemory();
2960  bool OnlyLoad = HasChain && I.onlyReadsMemory();
2961
2962  // Build the operand list.
2963  SmallVector<SDValue, 8> Ops;
2964  if (HasChain) {  // If this intrinsic has side-effects, chainify it.
2965    if (OnlyLoad) {
2966      // We don't need to serialize loads against other loads.
2967      Ops.push_back(DAG.getRoot());
2968    } else {
2969      Ops.push_back(getRoot());
2970    }
2971  }
2972
2973  // Info is set by getTgtMemInstrinsic
2974  TargetLowering::IntrinsicInfo Info;
2975  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
2976
2977  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
2978  if (!IsTgtIntrinsic)
2979    Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
2980
2981  // Add all operands of the call to the operand list.
2982  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
2983    SDValue Op = getValue(I.getArgOperand(i));
2984    assert(TLI.isTypeLegal(Op.getValueType()) &&
2985           "Intrinsic uses a non-legal type?");
2986    Ops.push_back(Op);
2987  }
2988
2989  SmallVector<EVT, 4> ValueVTs;
2990  ComputeValueVTs(TLI, I.getType(), ValueVTs);
2991#ifndef NDEBUG
2992  for (unsigned Val = 0, E = ValueVTs.size(); Val != E; ++Val) {
2993    assert(TLI.isTypeLegal(ValueVTs[Val]) &&
2994           "Intrinsic uses a non-legal type?");
2995  }
2996#endif // NDEBUG
2997
2998  if (HasChain)
2999    ValueVTs.push_back(MVT::Other);
3000
3001  SDVTList VTs = DAG.getVTList(ValueVTs.data(), ValueVTs.size());
3002
3003  // Create the node.
3004  SDValue Result;
3005  if (IsTgtIntrinsic) {
3006    // This is target intrinsic that touches memory
3007    Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
3008                                     VTs, &Ops[0], Ops.size(),
3009                                     Info.memVT, Info.ptrVal, Info.offset,
3010                                     Info.align, Info.vol,
3011                                     Info.readMem, Info.writeMem);
3012  } else if (!HasChain) {
3013    Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(),
3014                         VTs, &Ops[0], Ops.size());
3015  } else if (!I.getType()->isVoidTy()) {
3016    Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(),
3017                         VTs, &Ops[0], Ops.size());
3018  } else {
3019    Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(),
3020                         VTs, &Ops[0], Ops.size());
3021  }
3022
3023  if (HasChain) {
3024    SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
3025    if (OnlyLoad)
3026      PendingLoads.push_back(Chain);
3027    else
3028      DAG.setRoot(Chain);
3029  }
3030
3031  if (!I.getType()->isVoidTy()) {
3032    if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
3033      EVT VT = TLI.getValueType(PTy);
3034      Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
3035    }
3036
3037    setValue(&I, Result);
3038  }
3039}
3040
3041/// GetSignificand - Get the significand and build it into a floating-point
3042/// number with exponent of 1:
3043///
3044///   Op = (Op & 0x007fffff) | 0x3f800000;
3045///
3046/// where Op is the hexidecimal representation of floating point value.
3047static SDValue
3048GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
3049  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3050                           DAG.getConstant(0x007fffff, MVT::i32));
3051  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
3052                           DAG.getConstant(0x3f800000, MVT::i32));
3053  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
3054}
3055
3056/// GetExponent - Get the exponent:
3057///
3058///   (float)(int)(((Op & 0x7f800000) >> 23) - 127);
3059///
3060/// where Op is the hexidecimal representation of floating point value.
3061static SDValue
3062GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
3063            DebugLoc dl) {
3064  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
3065                           DAG.getConstant(0x7f800000, MVT::i32));
3066  SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
3067                           DAG.getConstant(23, TLI.getPointerTy()));
3068  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
3069                           DAG.getConstant(127, MVT::i32));
3070  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
3071}
3072
3073/// getF32Constant - Get 32-bit floating point constant.
3074static SDValue
3075getF32Constant(SelectionDAG &DAG, unsigned Flt) {
3076  return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32);
3077}
3078
3079/// Inlined utility function to implement binary input atomic intrinsics for
3080/// visitIntrinsicCall: I is a call instruction
3081///                     Op is the associated NodeType for I
3082const char *
3083SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I,
3084                                           ISD::NodeType Op) {
3085  SDValue Root = getRoot();
3086  SDValue L =
3087    DAG.getAtomic(Op, getCurDebugLoc(),
3088                  getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
3089                  Root,
3090                  getValue(I.getArgOperand(0)),
3091                  getValue(I.getArgOperand(1)),
3092                  I.getArgOperand(0));
3093  setValue(&I, L);
3094  DAG.setRoot(L.getValue(1));
3095  return 0;
3096}
3097
3098// implVisitAluOverflow - Lower arithmetic overflow instrinsics.
3099const char *
3100SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) {
3101  SDValue Op1 = getValue(I.getArgOperand(0));
3102  SDValue Op2 = getValue(I.getArgOperand(1));
3103
3104  SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
3105  setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2));
3106  return 0;
3107}
3108
3109/// visitExp - Lower an exp intrinsic. Handles the special sequences for
3110/// limited-precision mode.
3111void
3112SelectionDAGBuilder::visitExp(const CallInst &I) {
3113  SDValue result;
3114  DebugLoc dl = getCurDebugLoc();
3115
3116  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3117      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3118    SDValue Op = getValue(I.getArgOperand(0));
3119
3120    // Put the exponent in the right bit position for later addition to the
3121    // final result:
3122    //
3123    //   #define LOG2OFe 1.4426950f
3124    //   IntegerPartOfX = ((int32_t)(X * LOG2OFe));
3125    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
3126                             getF32Constant(DAG, 0x3fb8aa3b));
3127    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
3128
3129    //   FractionalPartOfX = (X * LOG2OFe) - (float)IntegerPartOfX;
3130    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3131    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
3132
3133    //   IntegerPartOfX <<= 23;
3134    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3135                                 DAG.getConstant(23, TLI.getPointerTy()));
3136
3137    if (LimitFloatPrecision <= 6) {
3138      // For floating-point precision of 6:
3139      //
3140      //   TwoToFractionalPartOfX =
3141      //     0.997535578f +
3142      //       (0.735607626f + 0.252464424f * x) * x;
3143      //
3144      // error 0.0144103317, which is 6 bits
3145      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3146                               getF32Constant(DAG, 0x3e814304));
3147      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3148                               getF32Constant(DAG, 0x3f3c50c8));
3149      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3150      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3151                               getF32Constant(DAG, 0x3f7f5e7e));
3152      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
3153
3154      // Add the exponent into the result in integer domain.
3155      SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3156                               TwoToFracPartOfX, IntegerPartOfX);
3157
3158      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
3159    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3160      // For floating-point precision of 12:
3161      //
3162      //   TwoToFractionalPartOfX =
3163      //     0.999892986f +
3164      //       (0.696457318f +
3165      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3166      //
3167      // 0.000107046256 error, which is 13 to 14 bits
3168      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3169                               getF32Constant(DAG, 0x3da235e3));
3170      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3171                               getF32Constant(DAG, 0x3e65b8f3));
3172      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3173      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3174                               getF32Constant(DAG, 0x3f324b07));
3175      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3176      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3177                               getF32Constant(DAG, 0x3f7ff8fd));
3178      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
3179
3180      // Add the exponent into the result in integer domain.
3181      SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3182                               TwoToFracPartOfX, IntegerPartOfX);
3183
3184      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
3185    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3186      // For floating-point precision of 18:
3187      //
3188      //   TwoToFractionalPartOfX =
3189      //     0.999999982f +
3190      //       (0.693148872f +
3191      //         (0.240227044f +
3192      //           (0.554906021e-1f +
3193      //             (0.961591928e-2f +
3194      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3195      //
3196      // error 2.47208000*10^(-7), which is better than 18 bits
3197      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3198                               getF32Constant(DAG, 0x3924b03e));
3199      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3200                               getF32Constant(DAG, 0x3ab24b87));
3201      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3202      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3203                               getF32Constant(DAG, 0x3c1d8c17));
3204      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3205      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3206                               getF32Constant(DAG, 0x3d634a1d));
3207      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3208      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3209                               getF32Constant(DAG, 0x3e75fe14));
3210      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3211      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3212                                getF32Constant(DAG, 0x3f317234));
3213      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3214      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3215                                getF32Constant(DAG, 0x3f800000));
3216      SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
3217                                             MVT::i32, t13);
3218
3219      // Add the exponent into the result in integer domain.
3220      SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3221                                TwoToFracPartOfX, IntegerPartOfX);
3222
3223      result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
3224    }
3225  } else {
3226    // No special expansion.
3227    result = DAG.getNode(ISD::FEXP, dl,
3228                         getValue(I.getArgOperand(0)).getValueType(),
3229                         getValue(I.getArgOperand(0)));
3230  }
3231
3232  setValue(&I, result);
3233}
3234
3235/// visitLog - Lower a log intrinsic. Handles the special sequences for
3236/// limited-precision mode.
3237void
3238SelectionDAGBuilder::visitLog(const CallInst &I) {
3239  SDValue result;
3240  DebugLoc dl = getCurDebugLoc();
3241
3242  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3243      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3244    SDValue Op = getValue(I.getArgOperand(0));
3245    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3246
3247    // Scale the exponent by log(2) [0.69314718f].
3248    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
3249    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
3250                                        getF32Constant(DAG, 0x3f317218));
3251
3252    // Get the significand and build it into a floating-point number with
3253    // exponent of 1.
3254    SDValue X = GetSignificand(DAG, Op1, dl);
3255
3256    if (LimitFloatPrecision <= 6) {
3257      // For floating-point precision of 6:
3258      //
3259      //   LogofMantissa =
3260      //     -1.1609546f +
3261      //       (1.4034025f - 0.23903021f * x) * x;
3262      //
3263      // error 0.0034276066, which is better than 8 bits
3264      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3265                               getF32Constant(DAG, 0xbe74c456));
3266      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3267                               getF32Constant(DAG, 0x3fb3a2b1));
3268      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3269      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3270                                          getF32Constant(DAG, 0x3f949a29));
3271
3272      result = DAG.getNode(ISD::FADD, dl,
3273                           MVT::f32, LogOfExponent, LogOfMantissa);
3274    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3275      // For floating-point precision of 12:
3276      //
3277      //   LogOfMantissa =
3278      //     -1.7417939f +
3279      //       (2.8212026f +
3280      //         (-1.4699568f +
3281      //           (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
3282      //
3283      // error 0.000061011436, which is 14 bits
3284      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3285                               getF32Constant(DAG, 0xbd67b6d6));
3286      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3287                               getF32Constant(DAG, 0x3ee4f4b8));
3288      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3289      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3290                               getF32Constant(DAG, 0x3fbc278b));
3291      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3292      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3293                               getF32Constant(DAG, 0x40348e95));
3294      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3295      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3296                                          getF32Constant(DAG, 0x3fdef31a));
3297
3298      result = DAG.getNode(ISD::FADD, dl,
3299                           MVT::f32, LogOfExponent, LogOfMantissa);
3300    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3301      // For floating-point precision of 18:
3302      //
3303      //   LogOfMantissa =
3304      //     -2.1072184f +
3305      //       (4.2372794f +
3306      //         (-3.7029485f +
3307      //           (2.2781945f +
3308      //             (-0.87823314f +
3309      //               (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
3310      //
3311      // error 0.0000023660568, which is better than 18 bits
3312      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3313                               getF32Constant(DAG, 0xbc91e5ac));
3314      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3315                               getF32Constant(DAG, 0x3e4350aa));
3316      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3317      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3318                               getF32Constant(DAG, 0x3f60d3e3));
3319      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3320      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3321                               getF32Constant(DAG, 0x4011cdf0));
3322      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3323      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3324                               getF32Constant(DAG, 0x406cfd1c));
3325      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3326      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3327                               getF32Constant(DAG, 0x408797cb));
3328      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3329      SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3330                                          getF32Constant(DAG, 0x4006dcab));
3331
3332      result = DAG.getNode(ISD::FADD, dl,
3333                           MVT::f32, LogOfExponent, LogOfMantissa);
3334    }
3335  } else {
3336    // No special expansion.
3337    result = DAG.getNode(ISD::FLOG, dl,
3338                         getValue(I.getArgOperand(0)).getValueType(),
3339                         getValue(I.getArgOperand(0)));
3340  }
3341
3342  setValue(&I, result);
3343}
3344
3345/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for
3346/// limited-precision mode.
3347void
3348SelectionDAGBuilder::visitLog2(const CallInst &I) {
3349  SDValue result;
3350  DebugLoc dl = getCurDebugLoc();
3351
3352  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3353      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3354    SDValue Op = getValue(I.getArgOperand(0));
3355    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3356
3357    // Get the exponent.
3358    SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
3359
3360    // Get the significand and build it into a floating-point number with
3361    // exponent of 1.
3362    SDValue X = GetSignificand(DAG, Op1, dl);
3363
3364    // Different possible minimax approximations of significand in
3365    // floating-point for various degrees of accuracy over [1,2].
3366    if (LimitFloatPrecision <= 6) {
3367      // For floating-point precision of 6:
3368      //
3369      //   Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
3370      //
3371      // error 0.0049451742, which is more than 7 bits
3372      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3373                               getF32Constant(DAG, 0xbeb08fe0));
3374      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3375                               getF32Constant(DAG, 0x40019463));
3376      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3377      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3378                                           getF32Constant(DAG, 0x3fd6633d));
3379
3380      result = DAG.getNode(ISD::FADD, dl,
3381                           MVT::f32, LogOfExponent, Log2ofMantissa);
3382    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3383      // For floating-point precision of 12:
3384      //
3385      //   Log2ofMantissa =
3386      //     -2.51285454f +
3387      //       (4.07009056f +
3388      //         (-2.12067489f +
3389      //           (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
3390      //
3391      // error 0.0000876136000, which is better than 13 bits
3392      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3393                               getF32Constant(DAG, 0xbda7262e));
3394      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3395                               getF32Constant(DAG, 0x3f25280b));
3396      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3397      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3398                               getF32Constant(DAG, 0x4007b923));
3399      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3400      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3401                               getF32Constant(DAG, 0x40823e2f));
3402      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3403      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3404                                           getF32Constant(DAG, 0x4020d29c));
3405
3406      result = DAG.getNode(ISD::FADD, dl,
3407                           MVT::f32, LogOfExponent, Log2ofMantissa);
3408    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3409      // For floating-point precision of 18:
3410      //
3411      //   Log2ofMantissa =
3412      //     -3.0400495f +
3413      //       (6.1129976f +
3414      //         (-5.3420409f +
3415      //           (3.2865683f +
3416      //             (-1.2669343f +
3417      //               (0.27515199f -
3418      //                 0.25691327e-1f * x) * x) * x) * x) * x) * x;
3419      //
3420      // error 0.0000018516, which is better than 18 bits
3421      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3422                               getF32Constant(DAG, 0xbcd2769e));
3423      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3424                               getF32Constant(DAG, 0x3e8ce0b9));
3425      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3426      SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3427                               getF32Constant(DAG, 0x3fa22ae7));
3428      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3429      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3430                               getF32Constant(DAG, 0x40525723));
3431      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3432      SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
3433                               getF32Constant(DAG, 0x40aaf200));
3434      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3435      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3436                               getF32Constant(DAG, 0x40c39dad));
3437      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3438      SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
3439                                           getF32Constant(DAG, 0x4042902c));
3440
3441      result = DAG.getNode(ISD::FADD, dl,
3442                           MVT::f32, LogOfExponent, Log2ofMantissa);
3443    }
3444  } else {
3445    // No special expansion.
3446    result = DAG.getNode(ISD::FLOG2, dl,
3447                         getValue(I.getArgOperand(0)).getValueType(),
3448                         getValue(I.getArgOperand(0)));
3449  }
3450
3451  setValue(&I, result);
3452}
3453
3454/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for
3455/// limited-precision mode.
3456void
3457SelectionDAGBuilder::visitLog10(const CallInst &I) {
3458  SDValue result;
3459  DebugLoc dl = getCurDebugLoc();
3460
3461  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3462      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3463    SDValue Op = getValue(I.getArgOperand(0));
3464    SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
3465
3466    // Scale the exponent by log10(2) [0.30102999f].
3467    SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
3468    SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
3469                                        getF32Constant(DAG, 0x3e9a209a));
3470
3471    // Get the significand and build it into a floating-point number with
3472    // exponent of 1.
3473    SDValue X = GetSignificand(DAG, Op1, dl);
3474
3475    if (LimitFloatPrecision <= 6) {
3476      // For floating-point precision of 6:
3477      //
3478      //   Log10ofMantissa =
3479      //     -0.50419619f +
3480      //       (0.60948995f - 0.10380950f * x) * x;
3481      //
3482      // error 0.0014886165, which is 6 bits
3483      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3484                               getF32Constant(DAG, 0xbdd49a13));
3485      SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
3486                               getF32Constant(DAG, 0x3f1c0789));
3487      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3488      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
3489                                            getF32Constant(DAG, 0x3f011300));
3490
3491      result = DAG.getNode(ISD::FADD, dl,
3492                           MVT::f32, LogOfExponent, Log10ofMantissa);
3493    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3494      // For floating-point precision of 12:
3495      //
3496      //   Log10ofMantissa =
3497      //     -0.64831180f +
3498      //       (0.91751397f +
3499      //         (-0.31664806f + 0.47637168e-1f * x) * x) * x;
3500      //
3501      // error 0.00019228036, which is better than 12 bits
3502      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3503                               getF32Constant(DAG, 0x3d431f31));
3504      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
3505                               getF32Constant(DAG, 0x3ea21fb2));
3506      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3507      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3508                               getF32Constant(DAG, 0x3f6ae232));
3509      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3510      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
3511                                            getF32Constant(DAG, 0x3f25f7c3));
3512
3513      result = DAG.getNode(ISD::FADD, dl,
3514                           MVT::f32, LogOfExponent, Log10ofMantissa);
3515    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3516      // For floating-point precision of 18:
3517      //
3518      //   Log10ofMantissa =
3519      //     -0.84299375f +
3520      //       (1.5327582f +
3521      //         (-1.0688956f +
3522      //           (0.49102474f +
3523      //             (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
3524      //
3525      // error 0.0000037995730, which is better than 18 bits
3526      SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3527                               getF32Constant(DAG, 0x3c5d51ce));
3528      SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
3529                               getF32Constant(DAG, 0x3e00685a));
3530      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
3531      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3532                               getF32Constant(DAG, 0x3efb6798));
3533      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3534      SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
3535                               getF32Constant(DAG, 0x3f88d192));
3536      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3537      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3538                               getF32Constant(DAG, 0x3fc4316c));
3539      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3540      SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
3541                                            getF32Constant(DAG, 0x3f57ce70));
3542
3543      result = DAG.getNode(ISD::FADD, dl,
3544                           MVT::f32, LogOfExponent, Log10ofMantissa);
3545    }
3546  } else {
3547    // No special expansion.
3548    result = DAG.getNode(ISD::FLOG10, dl,
3549                         getValue(I.getArgOperand(0)).getValueType(),
3550                         getValue(I.getArgOperand(0)));
3551  }
3552
3553  setValue(&I, result);
3554}
3555
3556/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for
3557/// limited-precision mode.
3558void
3559SelectionDAGBuilder::visitExp2(const CallInst &I) {
3560  SDValue result;
3561  DebugLoc dl = getCurDebugLoc();
3562
3563  if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
3564      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3565    SDValue Op = getValue(I.getArgOperand(0));
3566
3567    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op);
3568
3569    //   FractionalPartOfX = x - (float)IntegerPartOfX;
3570    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3571    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, Op, t1);
3572
3573    //   IntegerPartOfX <<= 23;
3574    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3575                                 DAG.getConstant(23, TLI.getPointerTy()));
3576
3577    if (LimitFloatPrecision <= 6) {
3578      // For floating-point precision of 6:
3579      //
3580      //   TwoToFractionalPartOfX =
3581      //     0.997535578f +
3582      //       (0.735607626f + 0.252464424f * x) * x;
3583      //
3584      // error 0.0144103317, which is 6 bits
3585      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3586                               getF32Constant(DAG, 0x3e814304));
3587      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3588                               getF32Constant(DAG, 0x3f3c50c8));
3589      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3590      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3591                               getF32Constant(DAG, 0x3f7f5e7e));
3592      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
3593      SDValue TwoToFractionalPartOfX =
3594        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
3595
3596      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3597                           MVT::f32, TwoToFractionalPartOfX);
3598    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3599      // For floating-point precision of 12:
3600      //
3601      //   TwoToFractionalPartOfX =
3602      //     0.999892986f +
3603      //       (0.696457318f +
3604      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3605      //
3606      // error 0.000107046256, which is 13 to 14 bits
3607      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3608                               getF32Constant(DAG, 0x3da235e3));
3609      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3610                               getF32Constant(DAG, 0x3e65b8f3));
3611      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3612      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3613                               getF32Constant(DAG, 0x3f324b07));
3614      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3615      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3616                               getF32Constant(DAG, 0x3f7ff8fd));
3617      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
3618      SDValue TwoToFractionalPartOfX =
3619        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
3620
3621      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3622                           MVT::f32, TwoToFractionalPartOfX);
3623    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3624      // For floating-point precision of 18:
3625      //
3626      //   TwoToFractionalPartOfX =
3627      //     0.999999982f +
3628      //       (0.693148872f +
3629      //         (0.240227044f +
3630      //           (0.554906021e-1f +
3631      //             (0.961591928e-2f +
3632      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3633      // error 2.47208000*10^(-7), which is better than 18 bits
3634      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3635                               getF32Constant(DAG, 0x3924b03e));
3636      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3637                               getF32Constant(DAG, 0x3ab24b87));
3638      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3639      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3640                               getF32Constant(DAG, 0x3c1d8c17));
3641      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3642      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3643                               getF32Constant(DAG, 0x3d634a1d));
3644      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3645      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3646                               getF32Constant(DAG, 0x3e75fe14));
3647      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3648      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3649                                getF32Constant(DAG, 0x3f317234));
3650      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3651      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3652                                getF32Constant(DAG, 0x3f800000));
3653      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
3654      SDValue TwoToFractionalPartOfX =
3655        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
3656
3657      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3658                           MVT::f32, TwoToFractionalPartOfX);
3659    }
3660  } else {
3661    // No special expansion.
3662    result = DAG.getNode(ISD::FEXP2, dl,
3663                         getValue(I.getArgOperand(0)).getValueType(),
3664                         getValue(I.getArgOperand(0)));
3665  }
3666
3667  setValue(&I, result);
3668}
3669
3670/// visitPow - Lower a pow intrinsic. Handles the special sequences for
3671/// limited-precision mode with x == 10.0f.
3672void
3673SelectionDAGBuilder::visitPow(const CallInst &I) {
3674  SDValue result;
3675  const Value *Val = I.getArgOperand(0);
3676  DebugLoc dl = getCurDebugLoc();
3677  bool IsExp10 = false;
3678
3679  if (getValue(Val).getValueType() == MVT::f32 &&
3680      getValue(I.getArgOperand(1)).getValueType() == MVT::f32 &&
3681      LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3682    if (Constant *C = const_cast<Constant*>(dyn_cast<Constant>(Val))) {
3683      if (ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
3684        APFloat Ten(10.0f);
3685        IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten);
3686      }
3687    }
3688  }
3689
3690  if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
3691    SDValue Op = getValue(I.getArgOperand(1));
3692
3693    // Put the exponent in the right bit position for later addition to the
3694    // final result:
3695    //
3696    //   #define LOG2OF10 3.3219281f
3697    //   IntegerPartOfX = (int32_t)(x * LOG2OF10);
3698    SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
3699                             getF32Constant(DAG, 0x40549a78));
3700    SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
3701
3702    //   FractionalPartOfX = x - (float)IntegerPartOfX;
3703    SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
3704    SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
3705
3706    //   IntegerPartOfX <<= 23;
3707    IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX,
3708                                 DAG.getConstant(23, TLI.getPointerTy()));
3709
3710    if (LimitFloatPrecision <= 6) {
3711      // For floating-point precision of 6:
3712      //
3713      //   twoToFractionalPartOfX =
3714      //     0.997535578f +
3715      //       (0.735607626f + 0.252464424f * x) * x;
3716      //
3717      // error 0.0144103317, which is 6 bits
3718      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3719                               getF32Constant(DAG, 0x3e814304));
3720      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3721                               getF32Constant(DAG, 0x3f3c50c8));
3722      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3723      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3724                               getF32Constant(DAG, 0x3f7f5e7e));
3725      SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
3726      SDValue TwoToFractionalPartOfX =
3727        DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
3728
3729      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3730                           MVT::f32, TwoToFractionalPartOfX);
3731    } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
3732      // For floating-point precision of 12:
3733      //
3734      //   TwoToFractionalPartOfX =
3735      //     0.999892986f +
3736      //       (0.696457318f +
3737      //         (0.224338339f + 0.792043434e-1f * x) * x) * x;
3738      //
3739      // error 0.000107046256, which is 13 to 14 bits
3740      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3741                               getF32Constant(DAG, 0x3da235e3));
3742      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3743                               getF32Constant(DAG, 0x3e65b8f3));
3744      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3745      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3746                               getF32Constant(DAG, 0x3f324b07));
3747      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3748      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3749                               getF32Constant(DAG, 0x3f7ff8fd));
3750      SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
3751      SDValue TwoToFractionalPartOfX =
3752        DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
3753
3754      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3755                           MVT::f32, TwoToFractionalPartOfX);
3756    } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
3757      // For floating-point precision of 18:
3758      //
3759      //   TwoToFractionalPartOfX =
3760      //     0.999999982f +
3761      //       (0.693148872f +
3762      //         (0.240227044f +
3763      //           (0.554906021e-1f +
3764      //             (0.961591928e-2f +
3765      //               (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
3766      // error 2.47208000*10^(-7), which is better than 18 bits
3767      SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
3768                               getF32Constant(DAG, 0x3924b03e));
3769      SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
3770                               getF32Constant(DAG, 0x3ab24b87));
3771      SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
3772      SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
3773                               getF32Constant(DAG, 0x3c1d8c17));
3774      SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
3775      SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
3776                               getF32Constant(DAG, 0x3d634a1d));
3777      SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
3778      SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
3779                               getF32Constant(DAG, 0x3e75fe14));
3780      SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
3781      SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
3782                                getF32Constant(DAG, 0x3f317234));
3783      SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
3784      SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
3785                                getF32Constant(DAG, 0x3f800000));
3786      SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
3787      SDValue TwoToFractionalPartOfX =
3788        DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
3789
3790      result = DAG.getNode(ISD::BIT_CONVERT, dl,
3791                           MVT::f32, TwoToFractionalPartOfX);
3792    }
3793  } else {
3794    // No special expansion.
3795    result = DAG.getNode(ISD::FPOW, dl,
3796                         getValue(I.getArgOperand(0)).getValueType(),
3797                         getValue(I.getArgOperand(0)),
3798                         getValue(I.getArgOperand(1)));
3799  }
3800
3801  setValue(&I, result);
3802}
3803
3804
3805/// ExpandPowI - Expand a llvm.powi intrinsic.
3806static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
3807                          SelectionDAG &DAG) {
3808  // If RHS is a constant, we can expand this out to a multiplication tree,
3809  // otherwise we end up lowering to a call to __powidf2 (for example).  When
3810  // optimizing for size, we only want to do this if the expansion would produce
3811  // a small number of multiplies, otherwise we do the full expansion.
3812  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
3813    // Get the exponent as a positive value.
3814    unsigned Val = RHSC->getSExtValue();
3815    if ((int)Val < 0) Val = -Val;
3816
3817    // powi(x, 0) -> 1.0
3818    if (Val == 0)
3819      return DAG.getConstantFP(1.0, LHS.getValueType());
3820
3821    const Function *F = DAG.getMachineFunction().getFunction();
3822    if (!F->hasFnAttr(Attribute::OptimizeForSize) ||
3823        // If optimizing for size, don't insert too many multiplies.  This
3824        // inserts up to 5 multiplies.
3825        CountPopulation_32(Val)+Log2_32(Val) < 7) {
3826      // We use the simple binary decomposition method to generate the multiply
3827      // sequence.  There are more optimal ways to do this (for example,
3828      // powi(x,15) generates one more multiply than it should), but this has
3829      // the benefit of being both really simple and much better than a libcall.
3830      SDValue Res;  // Logically starts equal to 1.0
3831      SDValue CurSquare = LHS;
3832      while (Val) {
3833        if (Val & 1) {
3834          if (Res.getNode())
3835            Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
3836          else
3837            Res = CurSquare;  // 1.0*CurSquare.
3838        }
3839
3840        CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
3841                                CurSquare, CurSquare);
3842        Val >>= 1;
3843      }
3844
3845      // If the original was negative, invert the result, producing 1/(x*x*x).
3846      if (RHSC->getSExtValue() < 0)
3847        Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
3848                          DAG.getConstantFP(1.0, LHS.getValueType()), Res);
3849      return Res;
3850    }
3851  }
3852
3853  // Otherwise, expand to a libcall.
3854  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
3855}
3856
3857/// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function
3858/// argument, create the corresponding DBG_VALUE machine instruction for it now.
3859/// At the end of instruction selection, they will be inserted to the entry BB.
3860bool
3861SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI,
3862                                              const Value *V, MDNode *Variable,
3863                                              uint64_t Offset,
3864                                              const SDValue &N) {
3865  if (!isa<Argument>(V))
3866    return false;
3867
3868  MachineFunction &MF = DAG.getMachineFunction();
3869  // Ignore inlined function arguments here.
3870  DIVariable DV(Variable);
3871  if (DV.isInlinedFnArgument(MF.getFunction()))
3872    return false;
3873
3874  MachineBasicBlock *MBB = FuncInfo.MBB;
3875  if (MBB != &MF.front())
3876    return false;
3877
3878  unsigned Reg = 0;
3879  if (N.getOpcode() == ISD::CopyFromReg) {
3880    Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
3881    if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
3882      MachineRegisterInfo &RegInfo = MF.getRegInfo();
3883      unsigned PR = RegInfo.getLiveInPhysReg(Reg);
3884      if (PR)
3885        Reg = PR;
3886    }
3887  }
3888
3889  if (!Reg) {
3890    DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
3891    if (VMI == FuncInfo.ValueMap.end())
3892      return false;
3893    Reg = VMI->second;
3894  }
3895
3896  const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
3897  MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
3898                                    TII->get(TargetOpcode::DBG_VALUE))
3899    .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
3900  FuncInfo.ArgDbgValues.push_back(&*MIB);
3901  return true;
3902}
3903
3904// VisualStudio defines setjmp as _setjmp
3905#if defined(_MSC_VER) && defined(setjmp)
3906#define setjmp_undefined_for_visual_studio
3907#undef setjmp
3908#endif
3909
3910/// visitIntrinsicCall - Lower the call to the specified intrinsic function.  If
3911/// we want to emit this as a call to a named external function, return the name
3912/// otherwise lower it and return null.
3913const char *
3914SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
3915  DebugLoc dl = getCurDebugLoc();
3916  SDValue Res;
3917
3918  switch (Intrinsic) {
3919  default:
3920    // By default, turn this into a target intrinsic node.
3921    visitTargetIntrinsic(I, Intrinsic);
3922    return 0;
3923  case Intrinsic::vastart:  visitVAStart(I); return 0;
3924  case Intrinsic::vaend:    visitVAEnd(I); return 0;
3925  case Intrinsic::vacopy:   visitVACopy(I); return 0;
3926  case Intrinsic::returnaddress:
3927    setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(),
3928                             getValue(I.getArgOperand(0))));
3929    return 0;
3930  case Intrinsic::frameaddress:
3931    setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(),
3932                             getValue(I.getArgOperand(0))));
3933    return 0;
3934  case Intrinsic::setjmp:
3935    return "_setjmp"+!TLI.usesUnderscoreSetJmp();
3936  case Intrinsic::longjmp:
3937    return "_longjmp"+!TLI.usesUnderscoreLongJmp();
3938  case Intrinsic::memcpy: {
3939    // Assert for address < 256 since we support only user defined address
3940    // spaces.
3941    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
3942           < 256 &&
3943           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
3944           < 256 &&
3945           "Unknown address space");
3946    SDValue Op1 = getValue(I.getArgOperand(0));
3947    SDValue Op2 = getValue(I.getArgOperand(1));
3948    SDValue Op3 = getValue(I.getArgOperand(2));
3949    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
3950    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
3951    DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
3952                              I.getArgOperand(0), 0, I.getArgOperand(1), 0));
3953    return 0;
3954  }
3955  case Intrinsic::memset: {
3956    // Assert for address < 256 since we support only user defined address
3957    // spaces.
3958    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
3959           < 256 &&
3960           "Unknown address space");
3961    SDValue Op1 = getValue(I.getArgOperand(0));
3962    SDValue Op2 = getValue(I.getArgOperand(1));
3963    SDValue Op3 = getValue(I.getArgOperand(2));
3964    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
3965    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
3966    DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
3967                              I.getArgOperand(0), 0));
3968    return 0;
3969  }
3970  case Intrinsic::memmove: {
3971    // Assert for address < 256 since we support only user defined address
3972    // spaces.
3973    assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace()
3974           < 256 &&
3975           cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace()
3976           < 256 &&
3977           "Unknown address space");
3978    SDValue Op1 = getValue(I.getArgOperand(0));
3979    SDValue Op2 = getValue(I.getArgOperand(1));
3980    SDValue Op3 = getValue(I.getArgOperand(2));
3981    unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
3982    bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
3983
3984    // If the source and destination are known to not be aliases, we can
3985    // lower memmove as memcpy.
3986    uint64_t Size = -1ULL;
3987    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
3988      Size = C->getZExtValue();
3989    if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
3990        AliasAnalysis::NoAlias) {
3991      DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
3992                                false, I.getArgOperand(0), 0,
3993                                I.getArgOperand(1), 0));
3994      return 0;
3995    }
3996
3997    DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
3998                               I.getArgOperand(0), 0, I.getArgOperand(1), 0));
3999    return 0;
4000  }
4001  case Intrinsic::dbg_declare: {
4002    const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
4003    if (!DIVariable(DI.getVariable()).Verify())
4004      return 0;
4005
4006    MDNode *Variable = DI.getVariable();
4007    // Parameters are handled specially.
4008    bool isParameter =
4009      DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
4010    const Value *Address = DI.getAddress();
4011    if (!Address)
4012      return 0;
4013    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
4014      Address = BCI->getOperand(0);
4015    const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
4016
4017    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
4018    // but do not always have a corresponding SDNode built.  The SDNodeOrder
4019    // absolute, but not relative, values are different depending on whether
4020    // debug info exists.
4021    ++SDNodeOrder;
4022    SDValue &N = NodeMap[Address];
4023    SDDbgValue *SDV;
4024    if (N.getNode()) {
4025      if (isParameter && !AI) {
4026        FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode());
4027        if (FINode)
4028          // Byval parameter.  We have a frame index at this point.
4029          SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
4030                                0, dl, SDNodeOrder);
4031        else
4032          // Can't do anything with other non-AI cases yet.  This might be a
4033          // parameter of a callee function that got inlined, for example.
4034          return 0;
4035      } else if (AI)
4036        SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
4037                              0, dl, SDNodeOrder);
4038      else
4039        // Can't do anything with other non-AI cases yet.
4040        return 0;
4041      DAG.AddDbgValue(SDV, N.getNode(), isParameter);
4042    } else {
4043      // This isn't useful, but it shows what we're missing.
4044      SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
4045                            0, dl, SDNodeOrder);
4046      DAG.AddDbgValue(SDV, 0, isParameter);
4047    }
4048    return 0;
4049  }
4050  case Intrinsic::dbg_value: {
4051    const DbgValueInst &DI = cast<DbgValueInst>(I);
4052    if (!DIVariable(DI.getVariable()).Verify())
4053      return 0;
4054
4055    MDNode *Variable = DI.getVariable();
4056    uint64_t Offset = DI.getOffset();
4057    const Value *V = DI.getValue();
4058    if (!V)
4059      return 0;
4060
4061    // Build an entry in DbgOrdering.  Debug info input nodes get an SDNodeOrder
4062    // but do not always have a corresponding SDNode built.  The SDNodeOrder
4063    // absolute, but not relative, values are different depending on whether
4064    // debug info exists.
4065    ++SDNodeOrder;
4066    SDDbgValue *SDV;
4067    if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
4068      SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder);
4069      DAG.AddDbgValue(SDV, 0, false);
4070    } else {
4071      bool createUndef = false;
4072      // Do not use getValue() in here; we don't want to generate code at
4073      // this point if it hasn't been done yet.
4074      SDValue N = NodeMap[V];
4075      if (!N.getNode() && isa<Argument>(V))
4076        // Check unused arguments map.
4077        N = UnusedArgNodeMap[V];
4078      if (N.getNode()) {
4079        if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) {
4080          SDV = DAG.getDbgValue(Variable, N.getNode(),
4081                                N.getResNo(), Offset, dl, SDNodeOrder);
4082          DAG.AddDbgValue(SDV, N.getNode(), false);
4083        }
4084      } else if (isa<PHINode>(V) && !V->use_empty() ) {
4085        // Do not call getValue(V) yet, as we don't want to generate code.
4086        // Remember it for later.
4087        DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
4088        DanglingDebugInfoMap[V] = DDI;
4089      } else
4090        createUndef = true;
4091      if (createUndef) {
4092        // We may expand this to cover more cases.  One case where we have no
4093        // data available is an unreferenced parameter; we need this fallback.
4094        SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
4095                              Offset, dl, SDNodeOrder);
4096        DAG.AddDbgValue(SDV, 0, false);
4097      }
4098    }
4099
4100    // Build a debug info table entry.
4101    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V))
4102      V = BCI->getOperand(0);
4103    const AllocaInst *AI = dyn_cast<AllocaInst>(V);
4104    // Don't handle byval struct arguments or VLAs, for example.
4105    if (!AI)
4106      return 0;
4107    DenseMap<const AllocaInst*, int>::iterator SI =
4108      FuncInfo.StaticAllocaMap.find(AI);
4109    if (SI == FuncInfo.StaticAllocaMap.end())
4110      return 0; // VLAs.
4111    int FI = SI->second;
4112
4113    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4114    if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
4115      MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
4116    return 0;
4117  }
4118  case Intrinsic::eh_exception: {
4119    // Insert the EXCEPTIONADDR instruction.
4120    assert(FuncInfo.MBB->isLandingPad() &&
4121           "Call to eh.exception not in landing pad!");
4122    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
4123    SDValue Ops[1];
4124    Ops[0] = DAG.getRoot();
4125    SDValue Op = DAG.getNode(ISD::EXCEPTIONADDR, dl, VTs, Ops, 1);
4126    setValue(&I, Op);
4127    DAG.setRoot(Op.getValue(1));
4128    return 0;
4129  }
4130
4131  case Intrinsic::eh_selector: {
4132    MachineBasicBlock *CallMBB = FuncInfo.MBB;
4133    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4134    if (CallMBB->isLandingPad())
4135      AddCatchInfo(I, &MMI, CallMBB);
4136    else {
4137#ifndef NDEBUG
4138      FuncInfo.CatchInfoLost.insert(&I);
4139#endif
4140      // FIXME: Mark exception selector register as live in.  Hack for PR1508.
4141      unsigned Reg = TLI.getExceptionSelectorRegister();
4142      if (Reg) FuncInfo.MBB->addLiveIn(Reg);
4143    }
4144
4145    // Insert the EHSELECTION instruction.
4146    SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other);
4147    SDValue Ops[2];
4148    Ops[0] = getValue(I.getArgOperand(0));
4149    Ops[1] = getRoot();
4150    SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2);
4151    DAG.setRoot(Op.getValue(1));
4152    setValue(&I, DAG.getSExtOrTrunc(Op, dl, MVT::i32));
4153    return 0;
4154  }
4155
4156  case Intrinsic::eh_typeid_for: {
4157    // Find the type id for the given typeinfo.
4158    GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0));
4159    unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
4160    Res = DAG.getConstant(TypeID, MVT::i32);
4161    setValue(&I, Res);
4162    return 0;
4163  }
4164
4165  case Intrinsic::eh_return_i32:
4166  case Intrinsic::eh_return_i64:
4167    DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
4168    DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl,
4169                            MVT::Other,
4170                            getControlRoot(),
4171                            getValue(I.getArgOperand(0)),
4172                            getValue(I.getArgOperand(1))));
4173    return 0;
4174  case Intrinsic::eh_unwind_init:
4175    DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
4176    return 0;
4177  case Intrinsic::eh_dwarf_cfa: {
4178    SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl,
4179                                        TLI.getPointerTy());
4180    SDValue Offset = DAG.getNode(ISD::ADD, dl,
4181                                 TLI.getPointerTy(),
4182                                 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl,
4183                                             TLI.getPointerTy()),
4184                                 CfaArg);
4185    SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl,
4186                             TLI.getPointerTy(),
4187                             DAG.getConstant(0, TLI.getPointerTy()));
4188    setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(),
4189                             FA, Offset));
4190    return 0;
4191  }
4192  case Intrinsic::eh_sjlj_callsite: {
4193    MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4194    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
4195    assert(CI && "Non-constant call site value in eh.sjlj.callsite!");
4196    assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
4197
4198    MMI.setCurrentCallSite(CI->getZExtValue());
4199    return 0;
4200  }
4201  case Intrinsic::eh_sjlj_setjmp: {
4202    setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(),
4203                             getValue(I.getArgOperand(0))));
4204    return 0;
4205  }
4206  case Intrinsic::eh_sjlj_longjmp: {
4207    DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
4208                            getRoot(),
4209                            getValue(I.getArgOperand(0))));
4210    return 0;
4211  }
4212
4213  case Intrinsic::convertff:
4214  case Intrinsic::convertfsi:
4215  case Intrinsic::convertfui:
4216  case Intrinsic::convertsif:
4217  case Intrinsic::convertuif:
4218  case Intrinsic::convertss:
4219  case Intrinsic::convertsu:
4220  case Intrinsic::convertus:
4221  case Intrinsic::convertuu: {
4222    ISD::CvtCode Code = ISD::CVT_INVALID;
4223    switch (Intrinsic) {
4224    case Intrinsic::convertff:  Code = ISD::CVT_FF; break;
4225    case Intrinsic::convertfsi: Code = ISD::CVT_FS; break;
4226    case Intrinsic::convertfui: Code = ISD::CVT_FU; break;
4227    case Intrinsic::convertsif: Code = ISD::CVT_SF; break;
4228    case Intrinsic::convertuif: Code = ISD::CVT_UF; break;
4229    case Intrinsic::convertss:  Code = ISD::CVT_SS; break;
4230    case Intrinsic::convertsu:  Code = ISD::CVT_SU; break;
4231    case Intrinsic::convertus:  Code = ISD::CVT_US; break;
4232    case Intrinsic::convertuu:  Code = ISD::CVT_UU; break;
4233    }
4234    EVT DestVT = TLI.getValueType(I.getType());
4235    const Value *Op1 = I.getArgOperand(0);
4236    Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1),
4237                               DAG.getValueType(DestVT),
4238                               DAG.getValueType(getValue(Op1).getValueType()),
4239                               getValue(I.getArgOperand(1)),
4240                               getValue(I.getArgOperand(2)),
4241                               Code);
4242    setValue(&I, Res);
4243    return 0;
4244  }
4245  case Intrinsic::sqrt:
4246    setValue(&I, DAG.getNode(ISD::FSQRT, dl,
4247                             getValue(I.getArgOperand(0)).getValueType(),
4248                             getValue(I.getArgOperand(0))));
4249    return 0;
4250  case Intrinsic::powi:
4251    setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)),
4252                            getValue(I.getArgOperand(1)), DAG));
4253    return 0;
4254  case Intrinsic::sin:
4255    setValue(&I, DAG.getNode(ISD::FSIN, dl,
4256                             getValue(I.getArgOperand(0)).getValueType(),
4257                             getValue(I.getArgOperand(0))));
4258    return 0;
4259  case Intrinsic::cos:
4260    setValue(&I, DAG.getNode(ISD::FCOS, dl,
4261                             getValue(I.getArgOperand(0)).getValueType(),
4262                             getValue(I.getArgOperand(0))));
4263    return 0;
4264  case Intrinsic::log:
4265    visitLog(I);
4266    return 0;
4267  case Intrinsic::log2:
4268    visitLog2(I);
4269    return 0;
4270  case Intrinsic::log10:
4271    visitLog10(I);
4272    return 0;
4273  case Intrinsic::exp:
4274    visitExp(I);
4275    return 0;
4276  case Intrinsic::exp2:
4277    visitExp2(I);
4278    return 0;
4279  case Intrinsic::pow:
4280    visitPow(I);
4281    return 0;
4282  case Intrinsic::convert_to_fp16:
4283    setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
4284                             MVT::i16, getValue(I.getArgOperand(0))));
4285    return 0;
4286  case Intrinsic::convert_from_fp16:
4287    setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl,
4288                             MVT::f32, getValue(I.getArgOperand(0))));
4289    return 0;
4290  case Intrinsic::pcmarker: {
4291    SDValue Tmp = getValue(I.getArgOperand(0));
4292    DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp));
4293    return 0;
4294  }
4295  case Intrinsic::readcyclecounter: {
4296    SDValue Op = getRoot();
4297    Res = DAG.getNode(ISD::READCYCLECOUNTER, dl,
4298                      DAG.getVTList(MVT::i64, MVT::Other),
4299                      &Op, 1);
4300    setValue(&I, Res);
4301    DAG.setRoot(Res.getValue(1));
4302    return 0;
4303  }
4304  case Intrinsic::bswap:
4305    setValue(&I, DAG.getNode(ISD::BSWAP, dl,
4306                             getValue(I.getArgOperand(0)).getValueType(),
4307                             getValue(I.getArgOperand(0))));
4308    return 0;
4309  case Intrinsic::cttz: {
4310    SDValue Arg = getValue(I.getArgOperand(0));
4311    EVT Ty = Arg.getValueType();
4312    setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg));
4313    return 0;
4314  }
4315  case Intrinsic::ctlz: {
4316    SDValue Arg = getValue(I.getArgOperand(0));
4317    EVT Ty = Arg.getValueType();
4318    setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg));
4319    return 0;
4320  }
4321  case Intrinsic::ctpop: {
4322    SDValue Arg = getValue(I.getArgOperand(0));
4323    EVT Ty = Arg.getValueType();
4324    setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg));
4325    return 0;
4326  }
4327  case Intrinsic::stacksave: {
4328    SDValue Op = getRoot();
4329    Res = DAG.getNode(ISD::STACKSAVE, dl,
4330                      DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1);
4331    setValue(&I, Res);
4332    DAG.setRoot(Res.getValue(1));
4333    return 0;
4334  }
4335  case Intrinsic::stackrestore: {
4336    Res = getValue(I.getArgOperand(0));
4337    DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res));
4338    return 0;
4339  }
4340  case Intrinsic::stackprotector: {
4341    // Emit code into the DAG to store the stack guard onto the stack.
4342    MachineFunction &MF = DAG.getMachineFunction();
4343    MachineFrameInfo *MFI = MF.getFrameInfo();
4344    EVT PtrTy = TLI.getPointerTy();
4345
4346    SDValue Src = getValue(I.getArgOperand(0));   // The guard's value.
4347    AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
4348
4349    int FI = FuncInfo.StaticAllocaMap[Slot];
4350    MFI->setStackProtectorIndex(FI);
4351
4352    SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
4353
4354    // Store the stack protector onto the stack.
4355    Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
4356                       PseudoSourceValue::getFixedStack(FI),
4357                       0, true, false, 0);
4358    setValue(&I, Res);
4359    DAG.setRoot(Res);
4360    return 0;
4361  }
4362  case Intrinsic::objectsize: {
4363    // If we don't know by now, we're never going to know.
4364    ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
4365
4366    assert(CI && "Non-constant type in __builtin_object_size?");
4367
4368    SDValue Arg = getValue(I.getCalledValue());
4369    EVT Ty = Arg.getValueType();
4370
4371    if (CI->isZero())
4372      Res = DAG.getConstant(-1ULL, Ty);
4373    else
4374      Res = DAG.getConstant(0, Ty);
4375
4376    setValue(&I, Res);
4377    return 0;
4378  }
4379  case Intrinsic::var_annotation:
4380    // Discard annotate attributes
4381    return 0;
4382
4383  case Intrinsic::init_trampoline: {
4384    const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts());
4385
4386    SDValue Ops[6];
4387    Ops[0] = getRoot();
4388    Ops[1] = getValue(I.getArgOperand(0));
4389    Ops[2] = getValue(I.getArgOperand(1));
4390    Ops[3] = getValue(I.getArgOperand(2));
4391    Ops[4] = DAG.getSrcValue(I.getArgOperand(0));
4392    Ops[5] = DAG.getSrcValue(F);
4393
4394    Res = DAG.getNode(ISD::TRAMPOLINE, dl,
4395                      DAG.getVTList(TLI.getPointerTy(), MVT::Other),
4396                      Ops, 6);
4397
4398    setValue(&I, Res);
4399    DAG.setRoot(Res.getValue(1));
4400    return 0;
4401  }
4402  case Intrinsic::gcroot:
4403    if (GFI) {
4404      const Value *Alloca = I.getArgOperand(0);
4405      const Constant *TypeMap = cast<Constant>(I.getArgOperand(1));
4406
4407      FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
4408      GFI->addStackRoot(FI->getIndex(), TypeMap);
4409    }
4410    return 0;
4411  case Intrinsic::gcread:
4412  case Intrinsic::gcwrite:
4413    llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
4414    return 0;
4415  case Intrinsic::flt_rounds:
4416    setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
4417    return 0;
4418  case Intrinsic::trap:
4419    DAG.setRoot(DAG.getNode(ISD::TRAP, dl,MVT::Other, getRoot()));
4420    return 0;
4421  case Intrinsic::uadd_with_overflow:
4422    return implVisitAluOverflow(I, ISD::UADDO);
4423  case Intrinsic::sadd_with_overflow:
4424    return implVisitAluOverflow(I, ISD::SADDO);
4425  case Intrinsic::usub_with_overflow:
4426    return implVisitAluOverflow(I, ISD::USUBO);
4427  case Intrinsic::ssub_with_overflow:
4428    return implVisitAluOverflow(I, ISD::SSUBO);
4429  case Intrinsic::umul_with_overflow:
4430    return implVisitAluOverflow(I, ISD::UMULO);
4431  case Intrinsic::smul_with_overflow:
4432    return implVisitAluOverflow(I, ISD::SMULO);
4433
4434  case Intrinsic::prefetch: {
4435    SDValue Ops[4];
4436    Ops[0] = getRoot();
4437    Ops[1] = getValue(I.getArgOperand(0));
4438    Ops[2] = getValue(I.getArgOperand(1));
4439    Ops[3] = getValue(I.getArgOperand(2));
4440    DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
4441    return 0;
4442  }
4443
4444  case Intrinsic::memory_barrier: {
4445    SDValue Ops[6];
4446    Ops[0] = getRoot();
4447    for (int x = 1; x < 6; ++x)
4448      Ops[x] = getValue(I.getArgOperand(x - 1));
4449
4450    DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6));
4451    return 0;
4452  }
4453  case Intrinsic::atomic_cmp_swap: {
4454    SDValue Root = getRoot();
4455    SDValue L =
4456      DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(),
4457                    getValue(I.getArgOperand(1)).getValueType().getSimpleVT(),
4458                    Root,
4459                    getValue(I.getArgOperand(0)),
4460                    getValue(I.getArgOperand(1)),
4461                    getValue(I.getArgOperand(2)),
4462                    I.getArgOperand(0));
4463    setValue(&I, L);
4464    DAG.setRoot(L.getValue(1));
4465    return 0;
4466  }
4467  case Intrinsic::atomic_load_add:
4468    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_ADD);
4469  case Intrinsic::atomic_load_sub:
4470    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_SUB);
4471  case Intrinsic::atomic_load_or:
4472    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_OR);
4473  case Intrinsic::atomic_load_xor:
4474    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_XOR);
4475  case Intrinsic::atomic_load_and:
4476    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_AND);
4477  case Intrinsic::atomic_load_nand:
4478    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_NAND);
4479  case Intrinsic::atomic_load_max:
4480    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MAX);
4481  case Intrinsic::atomic_load_min:
4482    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_MIN);
4483  case Intrinsic::atomic_load_umin:
4484    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMIN);
4485  case Intrinsic::atomic_load_umax:
4486    return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
4487  case Intrinsic::atomic_swap:
4488    return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
4489
4490  case Intrinsic::invariant_start:
4491  case Intrinsic::lifetime_start:
4492    // Discard region information.
4493    setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
4494    return 0;
4495  case Intrinsic::invariant_end:
4496  case Intrinsic::lifetime_end:
4497    // Discard region information.
4498    return 0;
4499  }
4500}
4501
4502void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
4503                                      bool isTailCall,
4504                                      MachineBasicBlock *LandingPad) {
4505  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
4506  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
4507  const Type *RetTy = FTy->getReturnType();
4508  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
4509  MCSymbol *BeginLabel = 0;
4510
4511  TargetLowering::ArgListTy Args;
4512  TargetLowering::ArgListEntry Entry;
4513  Args.reserve(CS.arg_size());
4514
4515  // Check whether the function can return without sret-demotion.
4516  SmallVector<ISD::OutputArg, 4> Outs;
4517  SmallVector<uint64_t, 4> Offsets;
4518  GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
4519                Outs, TLI, &Offsets);
4520
4521  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
4522                        FTy->isVarArg(), Outs, FTy->getContext());
4523
4524  SDValue DemoteStackSlot;
4525
4526  if (!CanLowerReturn) {
4527    uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
4528                      FTy->getReturnType());
4529    unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(
4530                      FTy->getReturnType());
4531    MachineFunction &MF = DAG.getMachineFunction();
4532    int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
4533    const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
4534
4535    DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
4536    Entry.Node = DemoteStackSlot;
4537    Entry.Ty = StackSlotPtrType;
4538    Entry.isSExt = false;
4539    Entry.isZExt = false;
4540    Entry.isInReg = false;
4541    Entry.isSRet = true;
4542    Entry.isNest = false;
4543    Entry.isByVal = false;
4544    Entry.Alignment = Align;
4545    Args.push_back(Entry);
4546    RetTy = Type::getVoidTy(FTy->getContext());
4547  }
4548
4549  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
4550       i != e; ++i) {
4551    SDValue ArgNode = getValue(*i);
4552    Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
4553
4554    unsigned attrInd = i - CS.arg_begin() + 1;
4555    Entry.isSExt  = CS.paramHasAttr(attrInd, Attribute::SExt);
4556    Entry.isZExt  = CS.paramHasAttr(attrInd, Attribute::ZExt);
4557    Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg);
4558    Entry.isSRet  = CS.paramHasAttr(attrInd, Attribute::StructRet);
4559    Entry.isNest  = CS.paramHasAttr(attrInd, Attribute::Nest);
4560    Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal);
4561    Entry.Alignment = CS.getParamAlignment(attrInd);
4562    Args.push_back(Entry);
4563  }
4564
4565  if (LandingPad) {
4566    // Insert a label before the invoke call to mark the try range.  This can be
4567    // used to detect deletion of the invoke via the MachineModuleInfo.
4568    BeginLabel = MMI.getContext().CreateTempSymbol();
4569
4570    // For SjLj, keep track of which landing pads go with which invokes
4571    // so as to maintain the ordering of pads in the LSDA.
4572    unsigned CallSiteIndex = MMI.getCurrentCallSite();
4573    if (CallSiteIndex) {
4574      MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
4575      // Now that the call site is handled, stop tracking it.
4576      MMI.setCurrentCallSite(0);
4577    }
4578
4579    // Both PendingLoads and PendingExports must be flushed here;
4580    // this call might not return.
4581    (void)getRoot();
4582    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel));
4583  }
4584
4585  // Check if target-independent constraints permit a tail call here.
4586  // Target-dependent constraints are checked within TLI.LowerCallTo.
4587  if (isTailCall &&
4588      !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI))
4589    isTailCall = false;
4590
4591  std::pair<SDValue,SDValue> Result =
4592    TLI.LowerCallTo(getRoot(), RetTy,
4593                    CS.paramHasAttr(0, Attribute::SExt),
4594                    CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
4595                    CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
4596                    CS.getCallingConv(),
4597                    isTailCall,
4598                    !CS.getInstruction()->use_empty(),
4599                    Callee, Args, DAG, getCurDebugLoc());
4600  assert((isTailCall || Result.second.getNode()) &&
4601         "Non-null chain expected with non-tail call!");
4602  assert((Result.second.getNode() || !Result.first.getNode()) &&
4603         "Null value expected with tail call!");
4604  if (Result.first.getNode()) {
4605    setValue(CS.getInstruction(), Result.first);
4606  } else if (!CanLowerReturn && Result.second.getNode()) {
4607    // The instruction result is the result of loading from the
4608    // hidden sret parameter.
4609    SmallVector<EVT, 1> PVTs;
4610    const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
4611
4612    ComputeValueVTs(TLI, PtrRetTy, PVTs);
4613    assert(PVTs.size() == 1 && "Pointers should fit in one register");
4614    EVT PtrVT = PVTs[0];
4615    unsigned NumValues = Outs.size();
4616    SmallVector<SDValue, 4> Values(NumValues);
4617    SmallVector<SDValue, 4> Chains(NumValues);
4618
4619    for (unsigned i = 0; i < NumValues; ++i) {
4620      SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT,
4621                                DemoteStackSlot,
4622                                DAG.getConstant(Offsets[i], PtrVT));
4623      SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
4624                              Add, NULL, Offsets[i], false, false, 1);
4625      Values[i] = L;
4626      Chains[i] = L.getValue(1);
4627    }
4628
4629    SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
4630                                MVT::Other, &Chains[0], NumValues);
4631    PendingLoads.push_back(Chain);
4632
4633    // Collect the legal value parts into potentially illegal values
4634    // that correspond to the original function's return values.
4635    SmallVector<EVT, 4> RetTys;
4636    RetTy = FTy->getReturnType();
4637    ComputeValueVTs(TLI, RetTy, RetTys);
4638    ISD::NodeType AssertOp = ISD::DELETED_NODE;
4639    SmallVector<SDValue, 4> ReturnValues;
4640    unsigned CurReg = 0;
4641    for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
4642      EVT VT = RetTys[I];
4643      EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
4644      unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
4645
4646      SDValue ReturnValue =
4647        getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
4648                         RegisterVT, VT, AssertOp);
4649      ReturnValues.push_back(ReturnValue);
4650      CurReg += NumRegs;
4651    }
4652
4653    setValue(CS.getInstruction(),
4654             DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(),
4655                         DAG.getVTList(&RetTys[0], RetTys.size()),
4656                         &ReturnValues[0], ReturnValues.size()));
4657
4658  }
4659
4660  // As a special case, a null chain means that a tail call has been emitted and
4661  // the DAG root is already updated.
4662  if (Result.second.getNode())
4663    DAG.setRoot(Result.second);
4664  else
4665    HasTailCall = true;
4666
4667  if (LandingPad) {
4668    // Insert a label at the end of the invoke call to mark the try range.  This
4669    // can be used to detect deletion of the invoke via the MachineModuleInfo.
4670    MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol();
4671    DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel));
4672
4673    // Inform MachineModuleInfo of range.
4674    MMI.addInvoke(LandingPad, BeginLabel, EndLabel);
4675  }
4676}
4677
4678/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the
4679/// value is equal or not-equal to zero.
4680static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) {
4681  for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end();
4682       UI != E; ++UI) {
4683    if (const ICmpInst *IC = dyn_cast<ICmpInst>(*UI))
4684      if (IC->isEquality())
4685        if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1)))
4686          if (C->isNullValue())
4687            continue;
4688    // Unknown instruction.
4689    return false;
4690  }
4691  return true;
4692}
4693
4694static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
4695                             const Type *LoadTy,
4696                             SelectionDAGBuilder &Builder) {
4697
4698  // Check to see if this load can be trivially constant folded, e.g. if the
4699  // input is from a string literal.
4700  if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) {
4701    // Cast pointer to the type we really want to load.
4702    LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput),
4703                                         PointerType::getUnqual(LoadTy));
4704
4705    if (const Constant *LoadCst =
4706          ConstantFoldLoadFromConstPtr(const_cast<Constant *>(LoadInput),
4707                                       Builder.TD))
4708      return Builder.getValue(LoadCst);
4709  }
4710
4711  // Otherwise, we have to emit the load.  If the pointer is to unfoldable but
4712  // still constant memory, the input chain can be the entry node.
4713  SDValue Root;
4714  bool ConstantMemory = false;
4715
4716  // Do not serialize (non-volatile) loads of constant memory with anything.
4717  if (Builder.AA->pointsToConstantMemory(PtrVal)) {
4718    Root = Builder.DAG.getEntryNode();
4719    ConstantMemory = true;
4720  } else {
4721    // Do not serialize non-volatile loads against each other.
4722    Root = Builder.DAG.getRoot();
4723  }
4724
4725  SDValue Ptr = Builder.getValue(PtrVal);
4726  SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
4727                                        Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
4728                                        false /*volatile*/,
4729                                        false /*nontemporal*/, 1 /* align=1 */);
4730
4731  if (!ConstantMemory)
4732    Builder.PendingLoads.push_back(LoadVal.getValue(1));
4733  return LoadVal;
4734}
4735
4736
4737/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form.
4738/// If so, return true and lower it, otherwise return false and it will be
4739/// lowered like a normal call.
4740bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
4741  // Verify that the prototype makes sense.  int memcmp(void*,void*,size_t)
4742  if (I.getNumArgOperands() != 3)
4743    return false;
4744
4745  const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1);
4746  if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
4747      !I.getArgOperand(2)->getType()->isIntegerTy() ||
4748      !I.getType()->isIntegerTy())
4749    return false;
4750
4751  const ConstantInt *Size = dyn_cast<ConstantInt>(I.getArgOperand(2));
4752
4753  // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS)  != 0
4754  // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS)  != 0
4755  if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) {
4756    bool ActuallyDoIt = true;
4757    MVT LoadVT;
4758    const Type *LoadTy;
4759    switch (Size->getZExtValue()) {
4760    default:
4761      LoadVT = MVT::Other;
4762      LoadTy = 0;
4763      ActuallyDoIt = false;
4764      break;
4765    case 2:
4766      LoadVT = MVT::i16;
4767      LoadTy = Type::getInt16Ty(Size->getContext());
4768      break;
4769    case 4:
4770      LoadVT = MVT::i32;
4771      LoadTy = Type::getInt32Ty(Size->getContext());
4772      break;
4773    case 8:
4774      LoadVT = MVT::i64;
4775      LoadTy = Type::getInt64Ty(Size->getContext());
4776      break;
4777        /*
4778    case 16:
4779      LoadVT = MVT::v4i32;
4780      LoadTy = Type::getInt32Ty(Size->getContext());
4781      LoadTy = VectorType::get(LoadTy, 4);
4782      break;
4783         */
4784    }
4785
4786    // This turns into unaligned loads.  We only do this if the target natively
4787    // supports the MVT we'll be loading or if it is small enough (<= 4) that
4788    // we'll only produce a small number of byte loads.
4789
4790    // Require that we can find a legal MVT, and only do this if the target
4791    // supports unaligned loads of that type.  Expanding into byte loads would
4792    // bloat the code.
4793    if (ActuallyDoIt && Size->getZExtValue() > 4) {
4794      // TODO: Handle 5 byte compare as 4-byte + 1 byte.
4795      // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads.
4796      if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT))
4797        ActuallyDoIt = false;
4798    }
4799
4800    if (ActuallyDoIt) {
4801      SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this);
4802      SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this);
4803
4804      SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal,
4805                                 ISD::SETNE);
4806      EVT CallVT = TLI.getValueType(I.getType(), true);
4807      setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT));
4808      return true;
4809    }
4810  }
4811
4812
4813  return false;
4814}
4815
4816
4817void SelectionDAGBuilder::visitCall(const CallInst &I) {
4818  // Handle inline assembly differently.
4819  if (isa<InlineAsm>(I.getCalledValue())) {
4820    visitInlineAsm(&I);
4821    return;
4822  }
4823
4824  const char *RenameFn = 0;
4825  if (Function *F = I.getCalledFunction()) {
4826    if (F->isDeclaration()) {
4827      if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) {
4828        if (unsigned IID = II->getIntrinsicID(F)) {
4829          RenameFn = visitIntrinsicCall(I, IID);
4830          if (!RenameFn)
4831            return;
4832        }
4833      }
4834      if (unsigned IID = F->getIntrinsicID()) {
4835        RenameFn = visitIntrinsicCall(I, IID);
4836        if (!RenameFn)
4837          return;
4838      }
4839    }
4840
4841    // Check for well-known libc/libm calls.  If the function is internal, it
4842    // can't be a library call.
4843    if (!F->hasLocalLinkage() && F->hasName()) {
4844      StringRef Name = F->getName();
4845      if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") {
4846        if (I.getNumArgOperands() == 2 &&   // Basic sanity checks.
4847            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4848            I.getType() == I.getArgOperand(0)->getType() &&
4849            I.getType() == I.getArgOperand(1)->getType()) {
4850          SDValue LHS = getValue(I.getArgOperand(0));
4851          SDValue RHS = getValue(I.getArgOperand(1));
4852          setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(),
4853                                   LHS.getValueType(), LHS, RHS));
4854          return;
4855        }
4856      } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") {
4857        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
4858            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4859            I.getType() == I.getArgOperand(0)->getType()) {
4860          SDValue Tmp = getValue(I.getArgOperand(0));
4861          setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(),
4862                                   Tmp.getValueType(), Tmp));
4863          return;
4864        }
4865      } else if (Name == "sin" || Name == "sinf" || Name == "sinl") {
4866        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
4867            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4868            I.getType() == I.getArgOperand(0)->getType() &&
4869            I.onlyReadsMemory()) {
4870          SDValue Tmp = getValue(I.getArgOperand(0));
4871          setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(),
4872                                   Tmp.getValueType(), Tmp));
4873          return;
4874        }
4875      } else if (Name == "cos" || Name == "cosf" || Name == "cosl") {
4876        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
4877            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4878            I.getType() == I.getArgOperand(0)->getType() &&
4879            I.onlyReadsMemory()) {
4880          SDValue Tmp = getValue(I.getArgOperand(0));
4881          setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(),
4882                                   Tmp.getValueType(), Tmp));
4883          return;
4884        }
4885      } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") {
4886        if (I.getNumArgOperands() == 1 &&   // Basic sanity checks.
4887            I.getArgOperand(0)->getType()->isFloatingPointTy() &&
4888            I.getType() == I.getArgOperand(0)->getType() &&
4889            I.onlyReadsMemory()) {
4890          SDValue Tmp = getValue(I.getArgOperand(0));
4891          setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(),
4892                                   Tmp.getValueType(), Tmp));
4893          return;
4894        }
4895      } else if (Name == "memcmp") {
4896        if (visitMemCmpCall(I))
4897          return;
4898      }
4899    }
4900  }
4901
4902  SDValue Callee;
4903  if (!RenameFn)
4904    Callee = getValue(I.getCalledValue());
4905  else
4906    Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy());
4907
4908  // Check if we can potentially perform a tail call. More detailed checking is
4909  // be done within LowerCallTo, after more information about the call is known.
4910  LowerCallTo(&I, Callee, I.isTailCall());
4911}
4912
4913namespace llvm {
4914
4915/// AsmOperandInfo - This contains information for each constraint that we are
4916/// lowering.
4917class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo :
4918    public TargetLowering::AsmOperandInfo {
4919public:
4920  /// CallOperand - If this is the result output operand or a clobber
4921  /// this is null, otherwise it is the incoming operand to the CallInst.
4922  /// This gets modified as the asm is processed.
4923  SDValue CallOperand;
4924
4925  /// AssignedRegs - If this is a register or register class operand, this
4926  /// contains the set of register corresponding to the operand.
4927  RegsForValue AssignedRegs;
4928
4929  explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
4930    : TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
4931  }
4932
4933  /// MarkAllocatedRegs - Once AssignedRegs is set, mark the assigned registers
4934  /// busy in OutputRegs/InputRegs.
4935  void MarkAllocatedRegs(bool isOutReg, bool isInReg,
4936                         std::set<unsigned> &OutputRegs,
4937                         std::set<unsigned> &InputRegs,
4938                         const TargetRegisterInfo &TRI) const {
4939    if (isOutReg) {
4940      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
4941        MarkRegAndAliases(AssignedRegs.Regs[i], OutputRegs, TRI);
4942    }
4943    if (isInReg) {
4944      for (unsigned i = 0, e = AssignedRegs.Regs.size(); i != e; ++i)
4945        MarkRegAndAliases(AssignedRegs.Regs[i], InputRegs, TRI);
4946    }
4947  }
4948
4949  /// getCallOperandValEVT - Return the EVT of the Value* that this operand
4950  /// corresponds to.  If there is no Value* for this operand, it returns
4951  /// MVT::Other.
4952  EVT getCallOperandValEVT(LLVMContext &Context,
4953                           const TargetLowering &TLI,
4954                           const TargetData *TD) const {
4955    if (CallOperandVal == 0) return MVT::Other;
4956
4957    if (isa<BasicBlock>(CallOperandVal))
4958      return TLI.getPointerTy();
4959
4960    const llvm::Type *OpTy = CallOperandVal->getType();
4961
4962    // If this is an indirect operand, the operand is a pointer to the
4963    // accessed type.
4964    if (isIndirect) {
4965      const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
4966      if (!PtrTy)
4967        report_fatal_error("Indirect operand for inline asm not a pointer!");
4968      OpTy = PtrTy->getElementType();
4969    }
4970
4971    // If OpTy is not a single value, it may be a struct/union that we
4972    // can tile with integers.
4973    if (!OpTy->isSingleValueType() && OpTy->isSized()) {
4974      unsigned BitSize = TD->getTypeSizeInBits(OpTy);
4975      switch (BitSize) {
4976      default: break;
4977      case 1:
4978      case 8:
4979      case 16:
4980      case 32:
4981      case 64:
4982      case 128:
4983        OpTy = IntegerType::get(Context, BitSize);
4984        break;
4985      }
4986    }
4987
4988    return TLI.getValueType(OpTy, true);
4989  }
4990
4991private:
4992  /// MarkRegAndAliases - Mark the specified register and all aliases in the
4993  /// specified set.
4994  static void MarkRegAndAliases(unsigned Reg, std::set<unsigned> &Regs,
4995                                const TargetRegisterInfo &TRI) {
4996    assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "Isn't a physreg");
4997    Regs.insert(Reg);
4998    if (const unsigned *Aliases = TRI.getAliasSet(Reg))
4999      for (; *Aliases; ++Aliases)
5000        Regs.insert(*Aliases);
5001  }
5002};
5003
5004} // end llvm namespace.
5005
5006/// isAllocatableRegister - If the specified register is safe to allocate,
5007/// i.e. it isn't a stack pointer or some other special register, return the
5008/// register class for the register.  Otherwise, return null.
5009static const TargetRegisterClass *
5010isAllocatableRegister(unsigned Reg, MachineFunction &MF,
5011                      const TargetLowering &TLI,
5012                      const TargetRegisterInfo *TRI) {
5013  EVT FoundVT = MVT::Other;
5014  const TargetRegisterClass *FoundRC = 0;
5015  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
5016       E = TRI->regclass_end(); RCI != E; ++RCI) {
5017    EVT ThisVT = MVT::Other;
5018
5019    const TargetRegisterClass *RC = *RCI;
5020    // If none of the value types for this register class are valid, we
5021    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
5022    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
5023         I != E; ++I) {
5024      if (TLI.isTypeLegal(*I)) {
5025        // If we have already found this register in a different register class,
5026        // choose the one with the largest VT specified.  For example, on
5027        // PowerPC, we favor f64 register classes over f32.
5028        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
5029          ThisVT = *I;
5030          break;
5031        }
5032      }
5033    }
5034
5035    if (ThisVT == MVT::Other) continue;
5036
5037    // NOTE: This isn't ideal.  In particular, this might allocate the
5038    // frame pointer in functions that need it (due to them not being taken
5039    // out of allocation, because a variable sized allocation hasn't been seen
5040    // yet).  This is a slight code pessimization, but should still work.
5041    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
5042         E = RC->allocation_order_end(MF); I != E; ++I)
5043      if (*I == Reg) {
5044        // We found a matching register class.  Keep looking at others in case
5045        // we find one with larger registers that this physreg is also in.
5046        FoundRC = RC;
5047        FoundVT = ThisVT;
5048        break;
5049      }
5050  }
5051  return FoundRC;
5052}
5053
5054/// GetRegistersForValue - Assign registers (virtual or physical) for the
5055/// specified operand.  We prefer to assign virtual registers, to allow the
5056/// register allocator to handle the assignment process.  However, if the asm
5057/// uses features that we can't model on machineinstrs, we have SDISel do the
5058/// allocation.  This produces generally horrible, but correct, code.
5059///
5060///   OpInfo describes the operand.
5061///   Input and OutputRegs are the set of already allocated physical registers.
5062///
5063void SelectionDAGBuilder::
5064GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
5065                     std::set<unsigned> &OutputRegs,
5066                     std::set<unsigned> &InputRegs) {
5067  LLVMContext &Context = FuncInfo.Fn->getContext();
5068
5069  // Compute whether this value requires an input register, an output register,
5070  // or both.
5071  bool isOutReg = false;
5072  bool isInReg = false;
5073  switch (OpInfo.Type) {
5074  case InlineAsm::isOutput:
5075    isOutReg = true;
5076
5077    // If there is an input constraint that matches this, we need to reserve
5078    // the input register so no other inputs allocate to it.
5079    isInReg = OpInfo.hasMatchingInput();
5080    break;
5081  case InlineAsm::isInput:
5082    isInReg = true;
5083    isOutReg = false;
5084    break;
5085  case InlineAsm::isClobber:
5086    isOutReg = true;
5087    isInReg = true;
5088    break;
5089  }
5090
5091
5092  MachineFunction &MF = DAG.getMachineFunction();
5093  SmallVector<unsigned, 4> Regs;
5094
5095  // If this is a constraint for a single physreg, or a constraint for a
5096  // register class, find it.
5097  std::pair<unsigned, const TargetRegisterClass*> PhysReg =
5098    TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode,
5099                                     OpInfo.ConstraintVT);
5100
5101  unsigned NumRegs = 1;
5102  if (OpInfo.ConstraintVT != MVT::Other) {
5103    // If this is a FP input in an integer register (or visa versa) insert a bit
5104    // cast of the input value.  More generally, handle any case where the input
5105    // value disagrees with the register class we plan to stick this in.
5106    if (OpInfo.Type == InlineAsm::isInput &&
5107        PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) {
5108      // Try to convert to the first EVT that the reg class contains.  If the
5109      // types are identical size, use a bitcast to convert (e.g. two differing
5110      // vector types).
5111      EVT RegVT = *PhysReg.second->vt_begin();
5112      if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
5113        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
5114                                         RegVT, OpInfo.CallOperand);
5115        OpInfo.ConstraintVT = RegVT;
5116      } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
5117        // If the input is a FP value and we want it in FP registers, do a
5118        // bitcast to the corresponding integer type.  This turns an f64 value
5119        // into i64, which can be passed with two i32 values on a 32-bit
5120        // machine.
5121        RegVT = EVT::getIntegerVT(Context,
5122                                  OpInfo.ConstraintVT.getSizeInBits());
5123        OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
5124                                         RegVT, OpInfo.CallOperand);
5125        OpInfo.ConstraintVT = RegVT;
5126      }
5127    }
5128
5129    NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT);
5130  }
5131
5132  EVT RegVT;
5133  EVT ValueVT = OpInfo.ConstraintVT;
5134
5135  // If this is a constraint for a specific physical register, like {r17},
5136  // assign it now.
5137  if (unsigned AssignedReg = PhysReg.first) {
5138    const TargetRegisterClass *RC = PhysReg.second;
5139    if (OpInfo.ConstraintVT == MVT::Other)
5140      ValueVT = *RC->vt_begin();
5141
5142    // Get the actual register value type.  This is important, because the user
5143    // may have asked for (e.g.) the AX register in i32 type.  We need to
5144    // remember that AX is actually i16 to get the right extension.
5145    RegVT = *RC->vt_begin();
5146
5147    // This is a explicit reference to a physical register.
5148    Regs.push_back(AssignedReg);
5149
5150    // If this is an expanded reference, add the rest of the regs to Regs.
5151    if (NumRegs != 1) {
5152      TargetRegisterClass::iterator I = RC->begin();
5153      for (; *I != AssignedReg; ++I)
5154        assert(I != RC->end() && "Didn't find reg!");
5155
5156      // Already added the first reg.
5157      --NumRegs; ++I;
5158      for (; NumRegs; --NumRegs, ++I) {
5159        assert(I != RC->end() && "Ran out of registers to allocate!");
5160        Regs.push_back(*I);
5161      }
5162    }
5163
5164    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
5165    const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
5166    OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
5167    return;
5168  }
5169
5170  // Otherwise, if this was a reference to an LLVM register class, create vregs
5171  // for this reference.
5172  if (const TargetRegisterClass *RC = PhysReg.second) {
5173    RegVT = *RC->vt_begin();
5174    if (OpInfo.ConstraintVT == MVT::Other)
5175      ValueVT = RegVT;
5176
5177    // Create the appropriate number of virtual registers.
5178    MachineRegisterInfo &RegInfo = MF.getRegInfo();
5179    for (; NumRegs; --NumRegs)
5180      Regs.push_back(RegInfo.createVirtualRegister(RC));
5181
5182    OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT);
5183    return;
5184  }
5185
5186  // This is a reference to a register class that doesn't directly correspond
5187  // to an LLVM register class.  Allocate NumRegs consecutive, available,
5188  // registers from the class.
5189  std::vector<unsigned> RegClassRegs
5190    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
5191                                            OpInfo.ConstraintVT);
5192
5193  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
5194  unsigned NumAllocated = 0;
5195  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
5196    unsigned Reg = RegClassRegs[i];
5197    // See if this register is available.
5198    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
5199        (isInReg  && InputRegs.count(Reg))) {    // Already used.
5200      // Make sure we find consecutive registers.
5201      NumAllocated = 0;
5202      continue;
5203    }
5204
5205    // Check to see if this register is allocatable (i.e. don't give out the
5206    // stack pointer).
5207    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
5208    if (!RC) {        // Couldn't allocate this register.
5209      // Reset NumAllocated to make sure we return consecutive registers.
5210      NumAllocated = 0;
5211      continue;
5212    }
5213
5214    // Okay, this register is good, we can use it.
5215    ++NumAllocated;
5216
5217    // If we allocated enough consecutive registers, succeed.
5218    if (NumAllocated == NumRegs) {
5219      unsigned RegStart = (i-NumAllocated)+1;
5220      unsigned RegEnd   = i+1;
5221      // Mark all of the allocated registers used.
5222      for (unsigned i = RegStart; i != RegEnd; ++i)
5223        Regs.push_back(RegClassRegs[i]);
5224
5225      OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
5226                                         OpInfo.ConstraintVT);
5227      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
5228      return;
5229    }
5230  }
5231
5232  // Otherwise, we couldn't allocate enough registers for this.
5233}
5234
5235/// visitInlineAsm - Handle a call to an InlineAsm object.
5236///
5237void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
5238  const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
5239
5240  /// ConstraintOperands - Information about all of the constraints.
5241  std::vector<SDISelAsmOperandInfo> ConstraintOperands;
5242
5243  std::set<unsigned> OutputRegs, InputRegs;
5244
5245  // Do a prepass over the constraints, canonicalizing them, and building up the
5246  // ConstraintOperands list.
5247  std::vector<InlineAsm::ConstraintInfo>
5248    ConstraintInfos = IA->ParseConstraints();
5249
5250  bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
5251
5252  SDValue Chain, Flag;
5253
5254  // We won't need to flush pending loads if this asm doesn't touch
5255  // memory and is nonvolatile.
5256  if (hasMemory || IA->hasSideEffects())
5257    Chain = getRoot();
5258  else
5259    Chain = DAG.getRoot();
5260
5261  unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
5262  unsigned ResNo = 0;   // ResNo - The result number of the next output.
5263  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
5264    ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
5265    SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
5266
5267    EVT OpVT = MVT::Other;
5268
5269    // Compute the value type for each operand.
5270    switch (OpInfo.Type) {
5271    case InlineAsm::isOutput:
5272      // Indirect outputs just consume an argument.
5273      if (OpInfo.isIndirect) {
5274        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
5275        break;
5276      }
5277
5278      // The return value of the call is this value.  As such, there is no
5279      // corresponding argument.
5280      assert(!CS.getType()->isVoidTy() &&
5281             "Bad inline asm!");
5282      if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
5283        OpVT = TLI.getValueType(STy->getElementType(ResNo));
5284      } else {
5285        assert(ResNo == 0 && "Asm only has one result!");
5286        OpVT = TLI.getValueType(CS.getType());
5287      }
5288      ++ResNo;
5289      break;
5290    case InlineAsm::isInput:
5291      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
5292      break;
5293    case InlineAsm::isClobber:
5294      // Nothing to do.
5295      break;
5296    }
5297
5298    // If this is an input or an indirect output, process the call argument.
5299    // BasicBlocks are labels, currently appearing only in asm's.
5300    if (OpInfo.CallOperandVal) {
5301      // Strip bitcasts, if any.  This mostly comes up for functions.
5302      OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
5303
5304      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
5305        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
5306      } else {
5307        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
5308      }
5309
5310      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD);
5311    }
5312
5313    OpInfo.ConstraintVT = OpVT;
5314  }
5315
5316  // Second pass over the constraints: compute which constraint option to use
5317  // and assign registers to constraints that want a specific physreg.
5318  for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
5319    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5320
5321    // If this is an output operand with a matching input operand, look up the
5322    // matching input. If their types mismatch, e.g. one is an integer, the
5323    // other is floating point, or their sizes are different, flag it as an
5324    // error.
5325    if (OpInfo.hasMatchingInput()) {
5326      SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
5327
5328      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
5329        if ((OpInfo.ConstraintVT.isInteger() !=
5330             Input.ConstraintVT.isInteger()) ||
5331            (OpInfo.ConstraintVT.getSizeInBits() !=
5332             Input.ConstraintVT.getSizeInBits())) {
5333          report_fatal_error("Unsupported asm: input constraint"
5334                             " with a matching output constraint of"
5335                             " incompatible type!");
5336        }
5337        Input.ConstraintVT = OpInfo.ConstraintVT;
5338      }
5339    }
5340
5341    // Compute the constraint code and ConstraintType to use.
5342    TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG);
5343
5344    // If this is a memory input, and if the operand is not indirect, do what we
5345    // need to to provide an address for the memory input.
5346    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5347        !OpInfo.isIndirect) {
5348      assert(OpInfo.Type == InlineAsm::isInput &&
5349             "Can only indirectify direct input operands!");
5350
5351      // Memory operands really want the address of the value.  If we don't have
5352      // an indirect input, put it in the constpool if we can, otherwise spill
5353      // it to a stack slot.
5354
5355      // If the operand is a float, integer, or vector constant, spill to a
5356      // constant pool entry to get its address.
5357      const Value *OpVal = OpInfo.CallOperandVal;
5358      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
5359          isa<ConstantVector>(OpVal)) {
5360        OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal),
5361                                                 TLI.getPointerTy());
5362      } else {
5363        // Otherwise, create a stack slot and emit a store to it before the
5364        // asm.
5365        const Type *Ty = OpVal->getType();
5366        uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
5367        unsigned Align  = TLI.getTargetData()->getPrefTypeAlignment(Ty);
5368        MachineFunction &MF = DAG.getMachineFunction();
5369        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
5370        SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
5371        Chain = DAG.getStore(Chain, getCurDebugLoc(),
5372                             OpInfo.CallOperand, StackSlot, NULL, 0,
5373                             false, false, 0);
5374        OpInfo.CallOperand = StackSlot;
5375      }
5376
5377      // There is no longer a Value* corresponding to this operand.
5378      OpInfo.CallOperandVal = 0;
5379
5380      // It is now an indirect operand.
5381      OpInfo.isIndirect = true;
5382    }
5383
5384    // If this constraint is for a specific register, allocate it before
5385    // anything else.
5386    if (OpInfo.ConstraintType == TargetLowering::C_Register)
5387      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
5388  }
5389
5390  ConstraintInfos.clear();
5391
5392  // Second pass - Loop over all of the operands, assigning virtual or physregs
5393  // to register class operands.
5394  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
5395    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5396
5397    // C_Register operands have already been allocated, Other/Memory don't need
5398    // to be.
5399    if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass)
5400      GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
5401  }
5402
5403  // AsmNodeOperands - The operands for the ISD::INLINEASM node.
5404  std::vector<SDValue> AsmNodeOperands;
5405  AsmNodeOperands.push_back(SDValue());  // reserve space for input chain
5406  AsmNodeOperands.push_back(
5407          DAG.getTargetExternalSymbol(IA->getAsmString().c_str(),
5408                                      TLI.getPointerTy()));
5409
5410  // If we have a !srcloc metadata node associated with it, we want to attach
5411  // this to the ultimately generated inline asm machineinstr.  To do this, we
5412  // pass in the third operand as this (potentially null) inline asm MDNode.
5413  const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
5414  AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
5415
5416  // Remember the AlignStack bit as operand 3.
5417  AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
5418                                            MVT::i1));
5419
5420  // Loop over all of the inputs, copying the operand values into the
5421  // appropriate registers and processing the output regs.
5422  RegsForValue RetValRegs;
5423
5424  // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
5425  std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit;
5426
5427  for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
5428    SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
5429
5430    switch (OpInfo.Type) {
5431    case InlineAsm::isOutput: {
5432      if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
5433          OpInfo.ConstraintType != TargetLowering::C_Register) {
5434        // Memory output, or 'other' output (e.g. 'X' constraint).
5435        assert(OpInfo.isIndirect && "Memory output must be indirect operand");
5436
5437        // Add information to the INLINEASM node to know about this output.
5438        unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
5439        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags,
5440                                                        TLI.getPointerTy()));
5441        AsmNodeOperands.push_back(OpInfo.CallOperand);
5442        break;
5443      }
5444
5445      // Otherwise, this is a register or register class output.
5446
5447      // Copy the output from the appropriate register.  Find a register that
5448      // we can use.
5449      if (OpInfo.AssignedRegs.Regs.empty())
5450        report_fatal_error("Couldn't allocate output reg for constraint '" +
5451                           Twine(OpInfo.ConstraintCode) + "'!");
5452
5453      // If this is an indirect operand, store through the pointer after the
5454      // asm.
5455      if (OpInfo.isIndirect) {
5456        IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
5457                                                      OpInfo.CallOperandVal));
5458      } else {
5459        // This is the result value of the call.
5460        assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
5461        // Concatenate this output onto the outputs list.
5462        RetValRegs.append(OpInfo.AssignedRegs);
5463      }
5464
5465      // Add information to the INLINEASM node to know that this register is
5466      // set.
5467      OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ?
5468                                           InlineAsm::Kind_RegDefEarlyClobber :
5469                                               InlineAsm::Kind_RegDef,
5470                                               false,
5471                                               0,
5472                                               DAG,
5473                                               AsmNodeOperands);
5474      break;
5475    }
5476    case InlineAsm::isInput: {
5477      SDValue InOperandVal = OpInfo.CallOperand;
5478
5479      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
5480        // If this is required to match an output register we have already set,
5481        // just use its register.
5482        unsigned OperandNo = OpInfo.getMatchedOperand();
5483
5484        // Scan until we find the definition we already emitted of this operand.
5485        // When we find it, create a RegsForValue operand.
5486        unsigned CurOp = InlineAsm::Op_FirstOperand;
5487        for (; OperandNo; --OperandNo) {
5488          // Advance to the next operand.
5489          unsigned OpFlag =
5490            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
5491          assert((InlineAsm::isRegDefKind(OpFlag) ||
5492                  InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
5493                  InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
5494          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
5495        }
5496
5497        unsigned OpFlag =
5498          cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
5499        if (InlineAsm::isRegDefKind(OpFlag) ||
5500            InlineAsm::isRegDefEarlyClobberKind(OpFlag)) {
5501          // Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
5502          if (OpInfo.isIndirect) {
5503            // This happens on gcc/testsuite/gcc.dg/pr8788-1.c
5504            LLVMContext &Ctx = *DAG.getContext();
5505            Ctx.emitError(CS.getInstruction(),  "inline asm not supported yet:"
5506                          " don't know how to handle tied "
5507                          "indirect register inputs");
5508          }
5509
5510          RegsForValue MatchedRegs;
5511          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
5512          EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
5513          MatchedRegs.RegVTs.push_back(RegVT);
5514          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
5515          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
5516               i != e; ++i)
5517            MatchedRegs.Regs.push_back
5518              (RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)));
5519
5520          // Use the produced MatchedRegs object to
5521          MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
5522                                    Chain, &Flag);
5523          MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse,
5524                                           true, OpInfo.getMatchedOperand(),
5525                                           DAG, AsmNodeOperands);
5526          break;
5527        }
5528
5529        assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
5530        assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
5531               "Unexpected number of operands");
5532        // Add information to the INLINEASM node to know about this input.
5533        // See InlineAsm.h isUseOperandTiedToDef.
5534        OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag,
5535                                                    OpInfo.getMatchedOperand());
5536        AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag,
5537                                                        TLI.getPointerTy()));
5538        AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]);
5539        break;
5540      }
5541
5542      // Treat indirect 'X' constraint as memory.
5543      if (OpInfo.ConstraintType == TargetLowering::C_Other &&
5544          OpInfo.isIndirect)
5545        OpInfo.ConstraintType = TargetLowering::C_Memory;
5546
5547      if (OpInfo.ConstraintType == TargetLowering::C_Other) {
5548        std::vector<SDValue> Ops;
5549        TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
5550                                         Ops, DAG);
5551        if (Ops.empty())
5552          report_fatal_error("Invalid operand for inline asm constraint '" +
5553                             Twine(OpInfo.ConstraintCode) + "'!");
5554
5555        // Add information to the INLINEASM node to know about this input.
5556        unsigned ResOpType =
5557          InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size());
5558        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5559                                                        TLI.getPointerTy()));
5560        AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
5561        break;
5562      }
5563
5564      if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
5565        assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
5566        assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
5567               "Memory operands expect pointer values");
5568
5569        // Add information to the INLINEASM node to know about this input.
5570        unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1);
5571        AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType,
5572                                                        TLI.getPointerTy()));
5573        AsmNodeOperands.push_back(InOperandVal);
5574        break;
5575      }
5576
5577      assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
5578              OpInfo.ConstraintType == TargetLowering::C_Register) &&
5579             "Unknown constraint type!");
5580      assert(!OpInfo.isIndirect &&
5581             "Don't know how to handle indirect register inputs yet!");
5582
5583      // Copy the input into the appropriate registers.
5584      if (OpInfo.AssignedRegs.Regs.empty() ||
5585          !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
5586        report_fatal_error("Couldn't allocate input reg for constraint '" +
5587                           Twine(OpInfo.ConstraintCode) + "'!");
5588
5589      OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
5590                                        Chain, &Flag);
5591
5592      OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0,
5593                                               DAG, AsmNodeOperands);
5594      break;
5595    }
5596    case InlineAsm::isClobber: {
5597      // Add the clobbered value to the operand list, so that the register
5598      // allocator is aware that the physreg got clobbered.
5599      if (!OpInfo.AssignedRegs.Regs.empty())
5600        OpInfo.AssignedRegs.AddInlineAsmOperands(
5601                                            InlineAsm::Kind_RegDefEarlyClobber,
5602                                                 false, 0, DAG,
5603                                                 AsmNodeOperands);
5604      break;
5605    }
5606    }
5607  }
5608
5609  // Finish up input operands.  Set the input chain and add the flag last.
5610  AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5611  if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
5612
5613  Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
5614                      DAG.getVTList(MVT::Other, MVT::Flag),
5615                      &AsmNodeOperands[0], AsmNodeOperands.size());
5616  Flag = Chain.getValue(1);
5617
5618  // If this asm returns a register value, copy the result from that register
5619  // and set it as the value of the call.
5620  if (!RetValRegs.Regs.empty()) {
5621    SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
5622                                             Chain, &Flag);
5623
5624    // FIXME: Why don't we do this for inline asms with MRVs?
5625    if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
5626      EVT ResultType = TLI.getValueType(CS.getType());
5627
5628      // If any of the results of the inline asm is a vector, it may have the
5629      // wrong width/num elts.  This can happen for register classes that can
5630      // contain multiple different value types.  The preg or vreg allocated may
5631      // not have the same VT as was expected.  Convert it to the right type
5632      // with bit_convert.
5633      if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
5634        Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
5635                          ResultType, Val);
5636
5637      } else if (ResultType != Val.getValueType() &&
5638                 ResultType.isInteger() && Val.getValueType().isInteger()) {
5639        // If a result value was tied to an input value, the computed result may
5640        // have a wider width than the expected result.  Extract the relevant
5641        // portion.
5642        Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val);
5643      }
5644
5645      assert(ResultType == Val.getValueType() && "Asm result value mismatch!");
5646    }
5647
5648    setValue(CS.getInstruction(), Val);
5649    // Don't need to use this as a chain in this case.
5650    if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
5651      return;
5652  }
5653
5654  std::vector<std::pair<SDValue, const Value *> > StoresToEmit;
5655
5656  // Process indirect outputs, first output all of the flagged copies out of
5657  // physregs.
5658  for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
5659    RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
5660    const Value *Ptr = IndirectStoresToEmit[i].second;
5661    SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(),
5662                                             Chain, &Flag);
5663    StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
5664  }
5665
5666  // Emit the non-flagged stores from the physregs.
5667  SmallVector<SDValue, 8> OutChains;
5668  for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
5669    SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
5670                               StoresToEmit[i].first,
5671                               getValue(StoresToEmit[i].second),
5672                               StoresToEmit[i].second, 0,
5673                               false, false, 0);
5674    OutChains.push_back(Val);
5675  }
5676
5677  if (!OutChains.empty())
5678    Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other,
5679                        &OutChains[0], OutChains.size());
5680
5681  DAG.setRoot(Chain);
5682}
5683
5684void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
5685  DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(),
5686                          MVT::Other, getRoot(),
5687                          getValue(I.getArgOperand(0)),
5688                          DAG.getSrcValue(I.getArgOperand(0))));
5689}
5690
5691void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
5692  const TargetData &TD = *TLI.getTargetData();
5693  SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
5694                           getRoot(), getValue(I.getOperand(0)),
5695                           DAG.getSrcValue(I.getOperand(0)),
5696                           TD.getABITypeAlignment(I.getType()));
5697  setValue(&I, V);
5698  DAG.setRoot(V.getValue(1));
5699}
5700
5701void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
5702  DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(),
5703                          MVT::Other, getRoot(),
5704                          getValue(I.getArgOperand(0)),
5705                          DAG.getSrcValue(I.getArgOperand(0))));
5706}
5707
5708void SelectionDAGBuilder::visitVACopy(const CallInst &I) {
5709  DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(),
5710                          MVT::Other, getRoot(),
5711                          getValue(I.getArgOperand(0)),
5712                          getValue(I.getArgOperand(1)),
5713                          DAG.getSrcValue(I.getArgOperand(0)),
5714                          DAG.getSrcValue(I.getArgOperand(1))));
5715}
5716
5717/// TargetLowering::LowerCallTo - This is the default LowerCallTo
5718/// implementation, which just calls LowerCall.
5719/// FIXME: When all targets are
5720/// migrated to using LowerCall, this hook should be integrated into SDISel.
5721std::pair<SDValue, SDValue>
5722TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
5723                            bool RetSExt, bool RetZExt, bool isVarArg,
5724                            bool isInreg, unsigned NumFixedArgs,
5725                            CallingConv::ID CallConv, bool isTailCall,
5726                            bool isReturnValueUsed,
5727                            SDValue Callee,
5728                            ArgListTy &Args, SelectionDAG &DAG,
5729                            DebugLoc dl) const {
5730  // Handle all of the outgoing arguments.
5731  SmallVector<ISD::OutputArg, 32> Outs;
5732  SmallVector<SDValue, 32> OutVals;
5733  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
5734    SmallVector<EVT, 4> ValueVTs;
5735    ComputeValueVTs(*this, Args[i].Ty, ValueVTs);
5736    for (unsigned Value = 0, NumValues = ValueVTs.size();
5737         Value != NumValues; ++Value) {
5738      EVT VT = ValueVTs[Value];
5739      const Type *ArgTy = VT.getTypeForEVT(RetTy->getContext());
5740      SDValue Op = SDValue(Args[i].Node.getNode(),
5741                           Args[i].Node.getResNo() + Value);
5742      ISD::ArgFlagsTy Flags;
5743      unsigned OriginalAlignment =
5744        getTargetData()->getABITypeAlignment(ArgTy);
5745
5746      if (Args[i].isZExt)
5747        Flags.setZExt();
5748      if (Args[i].isSExt)
5749        Flags.setSExt();
5750      if (Args[i].isInReg)
5751        Flags.setInReg();
5752      if (Args[i].isSRet)
5753        Flags.setSRet();
5754      if (Args[i].isByVal) {
5755        Flags.setByVal();
5756        const PointerType *Ty = cast<PointerType>(Args[i].Ty);
5757        const Type *ElementTy = Ty->getElementType();
5758        unsigned FrameAlign = getByValTypeAlignment(ElementTy);
5759        unsigned FrameSize  = getTargetData()->getTypeAllocSize(ElementTy);
5760        // For ByVal, alignment should come from FE.  BE will guess if this
5761        // info is not there but there are cases it cannot get right.
5762        if (Args[i].Alignment)
5763          FrameAlign = Args[i].Alignment;
5764        Flags.setByValAlign(FrameAlign);
5765        Flags.setByValSize(FrameSize);
5766      }
5767      if (Args[i].isNest)
5768        Flags.setNest();
5769      Flags.setOrigAlign(OriginalAlignment);
5770
5771      EVT PartVT = getRegisterType(RetTy->getContext(), VT);
5772      unsigned NumParts = getNumRegisters(RetTy->getContext(), VT);
5773      SmallVector<SDValue, 4> Parts(NumParts);
5774      ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
5775
5776      if (Args[i].isSExt)
5777        ExtendKind = ISD::SIGN_EXTEND;
5778      else if (Args[i].isZExt)
5779        ExtendKind = ISD::ZERO_EXTEND;
5780
5781      getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
5782                     PartVT, ExtendKind);
5783
5784      for (unsigned j = 0; j != NumParts; ++j) {
5785        // if it isn't first piece, alignment must be 1
5786        ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(),
5787                               i < NumFixedArgs);
5788        if (NumParts > 1 && j == 0)
5789          MyFlags.Flags.setSplit();
5790        else if (j != 0)
5791          MyFlags.Flags.setOrigAlign(1);
5792
5793        Outs.push_back(MyFlags);
5794        OutVals.push_back(Parts[j]);
5795      }
5796    }
5797  }
5798
5799  // Handle the incoming return values from the call.
5800  SmallVector<ISD::InputArg, 32> Ins;
5801  SmallVector<EVT, 4> RetTys;
5802  ComputeValueVTs(*this, RetTy, RetTys);
5803  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
5804    EVT VT = RetTys[I];
5805    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
5806    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
5807    for (unsigned i = 0; i != NumRegs; ++i) {
5808      ISD::InputArg MyFlags;
5809      MyFlags.VT = RegisterVT;
5810      MyFlags.Used = isReturnValueUsed;
5811      if (RetSExt)
5812        MyFlags.Flags.setSExt();
5813      if (RetZExt)
5814        MyFlags.Flags.setZExt();
5815      if (isInreg)
5816        MyFlags.Flags.setInReg();
5817      Ins.push_back(MyFlags);
5818    }
5819  }
5820
5821  SmallVector<SDValue, 4> InVals;
5822  Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall,
5823                    Outs, OutVals, Ins, dl, DAG, InVals);
5824
5825  // Verify that the target's LowerCall behaved as expected.
5826  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
5827         "LowerCall didn't return a valid chain!");
5828  assert((!isTailCall || InVals.empty()) &&
5829         "LowerCall emitted a return value for a tail call!");
5830  assert((isTailCall || InVals.size() == Ins.size()) &&
5831         "LowerCall didn't emit the correct number of values!");
5832
5833  // For a tail call, the return value is merely live-out and there aren't
5834  // any nodes in the DAG representing it. Return a special value to
5835  // indicate that a tail call has been emitted and no more Instructions
5836  // should be processed in the current block.
5837  if (isTailCall) {
5838    DAG.setRoot(Chain);
5839    return std::make_pair(SDValue(), SDValue());
5840  }
5841
5842  DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5843          assert(InVals[i].getNode() &&
5844                 "LowerCall emitted a null value!");
5845          assert(Ins[i].VT == InVals[i].getValueType() &&
5846                 "LowerCall emitted a value with the wrong type!");
5847        });
5848
5849  // Collect the legal value parts into potentially illegal values
5850  // that correspond to the original function's return values.
5851  ISD::NodeType AssertOp = ISD::DELETED_NODE;
5852  if (RetSExt)
5853    AssertOp = ISD::AssertSext;
5854  else if (RetZExt)
5855    AssertOp = ISD::AssertZext;
5856  SmallVector<SDValue, 4> ReturnValues;
5857  unsigned CurReg = 0;
5858  for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
5859    EVT VT = RetTys[I];
5860    EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
5861    unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
5862
5863    ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
5864                                            NumRegs, RegisterVT, VT,
5865                                            AssertOp));
5866    CurReg += NumRegs;
5867  }
5868
5869  // For a function returning void, there is no return value. We can't create
5870  // such a node, so we just return a null return value in that case. In
5871  // that case, nothing will actualy look at the value.
5872  if (ReturnValues.empty())
5873    return std::make_pair(SDValue(), Chain);
5874
5875  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
5876                            DAG.getVTList(&RetTys[0], RetTys.size()),
5877                            &ReturnValues[0], ReturnValues.size());
5878  return std::make_pair(Res, Chain);
5879}
5880
5881void TargetLowering::LowerOperationWrapper(SDNode *N,
5882                                           SmallVectorImpl<SDValue> &Results,
5883                                           SelectionDAG &DAG) const {
5884  SDValue Res = LowerOperation(SDValue(N, 0), DAG);
5885  if (Res.getNode())
5886    Results.push_back(Res);
5887}
5888
5889SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
5890  llvm_unreachable("LowerOperation not implemented for this target!");
5891  return SDValue();
5892}
5893
5894void
5895SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
5896  SDValue Op = getNonRegisterValue(V);
5897  assert((Op.getOpcode() != ISD::CopyFromReg ||
5898          cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
5899         "Copy from a reg to the same reg!");
5900  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
5901
5902  RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
5903  SDValue Chain = DAG.getEntryNode();
5904  RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
5905  PendingExports.push_back(Chain);
5906}
5907
5908#include "llvm/CodeGen/SelectionDAGISel.h"
5909
5910void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
5911  // If this is the entry block, emit arguments.
5912  const Function &F = *LLVMBB->getParent();
5913  SelectionDAG &DAG = SDB->DAG;
5914  DebugLoc dl = SDB->getCurDebugLoc();
5915  const TargetData *TD = TLI.getTargetData();
5916  SmallVector<ISD::InputArg, 16> Ins;
5917
5918  // Check whether the function can return without sret-demotion.
5919  SmallVector<ISD::OutputArg, 4> Outs;
5920  GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
5921                Outs, TLI);
5922
5923  if (!FuncInfo->CanLowerReturn) {
5924    // Put in an sret pointer parameter before all the other parameters.
5925    SmallVector<EVT, 1> ValueVTs;
5926    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
5927
5928    // NOTE: Assuming that a pointer will never break down to more than one VT
5929    // or one register.
5930    ISD::ArgFlagsTy Flags;
5931    Flags.setSRet();
5932    EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]);
5933    ISD::InputArg RetArg(Flags, RegisterVT, true);
5934    Ins.push_back(RetArg);
5935  }
5936
5937  // Set up the incoming argument description vector.
5938  unsigned Idx = 1;
5939  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
5940       I != E; ++I, ++Idx) {
5941    SmallVector<EVT, 4> ValueVTs;
5942    ComputeValueVTs(TLI, I->getType(), ValueVTs);
5943    bool isArgValueUsed = !I->use_empty();
5944    for (unsigned Value = 0, NumValues = ValueVTs.size();
5945         Value != NumValues; ++Value) {
5946      EVT VT = ValueVTs[Value];
5947      const Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
5948      ISD::ArgFlagsTy Flags;
5949      unsigned OriginalAlignment =
5950        TD->getABITypeAlignment(ArgTy);
5951
5952      if (F.paramHasAttr(Idx, Attribute::ZExt))
5953        Flags.setZExt();
5954      if (F.paramHasAttr(Idx, Attribute::SExt))
5955        Flags.setSExt();
5956      if (F.paramHasAttr(Idx, Attribute::InReg))
5957        Flags.setInReg();
5958      if (F.paramHasAttr(Idx, Attribute::StructRet))
5959        Flags.setSRet();
5960      if (F.paramHasAttr(Idx, Attribute::ByVal)) {
5961        Flags.setByVal();
5962        const PointerType *Ty = cast<PointerType>(I->getType());
5963        const Type *ElementTy = Ty->getElementType();
5964        unsigned FrameAlign = TLI.getByValTypeAlignment(ElementTy);
5965        unsigned FrameSize  = TD->getTypeAllocSize(ElementTy);
5966        // For ByVal, alignment should be passed from FE.  BE will guess if
5967        // this info is not there but there are cases it cannot get right.
5968        if (F.getParamAlignment(Idx))
5969          FrameAlign = F.getParamAlignment(Idx);
5970        Flags.setByValAlign(FrameAlign);
5971        Flags.setByValSize(FrameSize);
5972      }
5973      if (F.paramHasAttr(Idx, Attribute::Nest))
5974        Flags.setNest();
5975      Flags.setOrigAlign(OriginalAlignment);
5976
5977      EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
5978      unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT);
5979      for (unsigned i = 0; i != NumRegs; ++i) {
5980        ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed);
5981        if (NumRegs > 1 && i == 0)
5982          MyFlags.Flags.setSplit();
5983        // if it isn't first piece, alignment must be 1
5984        else if (i > 0)
5985          MyFlags.Flags.setOrigAlign(1);
5986        Ins.push_back(MyFlags);
5987      }
5988    }
5989  }
5990
5991  // Call the target to set up the argument values.
5992  SmallVector<SDValue, 8> InVals;
5993  SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(),
5994                                             F.isVarArg(), Ins,
5995                                             dl, DAG, InVals);
5996
5997  // Verify that the target's LowerFormalArguments behaved as expected.
5998  assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other &&
5999         "LowerFormalArguments didn't return a valid chain!");
6000  assert(InVals.size() == Ins.size() &&
6001         "LowerFormalArguments didn't emit the correct number of values!");
6002  DEBUG({
6003      for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
6004        assert(InVals[i].getNode() &&
6005               "LowerFormalArguments emitted a null value!");
6006        assert(Ins[i].VT == InVals[i].getValueType() &&
6007               "LowerFormalArguments emitted a value with the wrong type!");
6008      }
6009    });
6010
6011  // Update the DAG with the new chain value resulting from argument lowering.
6012  DAG.setRoot(NewRoot);
6013
6014  // Set up the argument values.
6015  unsigned i = 0;
6016  Idx = 1;
6017  if (!FuncInfo->CanLowerReturn) {
6018    // Create a virtual register for the sret pointer, and put in a copy
6019    // from the sret argument into it.
6020    SmallVector<EVT, 1> ValueVTs;
6021    ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
6022    EVT VT = ValueVTs[0];
6023    EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6024    ISD::NodeType AssertOp = ISD::DELETED_NODE;
6025    SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
6026                                        RegVT, VT, AssertOp);
6027
6028    MachineFunction& MF = SDB->DAG.getMachineFunction();
6029    MachineRegisterInfo& RegInfo = MF.getRegInfo();
6030    unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
6031    FuncInfo->DemoteRegister = SRetReg;
6032    NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(),
6033                                    SRetReg, ArgValue);
6034    DAG.setRoot(NewRoot);
6035
6036    // i indexes lowered arguments.  Bump it past the hidden sret argument.
6037    // Idx indexes LLVM arguments.  Don't touch it.
6038    ++i;
6039  }
6040
6041  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
6042      ++I, ++Idx) {
6043    SmallVector<SDValue, 4> ArgValues;
6044    SmallVector<EVT, 4> ValueVTs;
6045    ComputeValueVTs(TLI, I->getType(), ValueVTs);
6046    unsigned NumValues = ValueVTs.size();
6047
6048    // If this argument is unused then remember its value. It is used to generate
6049    // debugging information.
6050    if (I->use_empty() && NumValues)
6051      SDB->setUnusedArgValue(I, InVals[i]);
6052
6053    for (unsigned Value = 0; Value != NumValues; ++Value) {
6054      EVT VT = ValueVTs[Value];
6055      EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
6056      unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT);
6057
6058      if (!I->use_empty()) {
6059        ISD::NodeType AssertOp = ISD::DELETED_NODE;
6060        if (F.paramHasAttr(Idx, Attribute::SExt))
6061          AssertOp = ISD::AssertSext;
6062        else if (F.paramHasAttr(Idx, Attribute::ZExt))
6063          AssertOp = ISD::AssertZext;
6064
6065        ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
6066                                             NumParts, PartVT, VT,
6067                                             AssertOp));
6068      }
6069
6070      i += NumParts;
6071    }
6072
6073    if (!I->use_empty()) {
6074      SDValue Res;
6075      if (!ArgValues.empty())
6076        Res = DAG.getMergeValues(&ArgValues[0], NumValues,
6077                                 SDB->getCurDebugLoc());
6078      SDB->setValue(I, Res);
6079
6080      // If this argument is live outside of the entry block, insert a copy from
6081      // whereever we got it to the vreg that other BB's will reference it as.
6082      SDB->CopyToExportRegsIfNeeded(I);
6083    }
6084  }
6085
6086  assert(i == InVals.size() && "Argument register count mismatch!");
6087
6088  // Finally, if the target has anything special to do, allow it to do so.
6089  // FIXME: this should insert code into the DAG!
6090  EmitFunctionEntryCode();
6091}
6092
6093/// Handle PHI nodes in successor blocks.  Emit code into the SelectionDAG to
6094/// ensure constants are generated when needed.  Remember the virtual registers
6095/// that need to be added to the Machine PHI nodes as input.  We cannot just
6096/// directly add them, because expansion might result in multiple MBB's for one
6097/// BB.  As such, the start of the BB might correspond to a different MBB than
6098/// the end.
6099///
6100void
6101SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
6102  const TerminatorInst *TI = LLVMBB->getTerminator();
6103
6104  SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled;
6105
6106  // Check successor nodes' PHI nodes that expect a constant to be available
6107  // from this block.
6108  for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) {
6109    const BasicBlock *SuccBB = TI->getSuccessor(succ);
6110    if (!isa<PHINode>(SuccBB->begin())) continue;
6111    MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB];
6112
6113    // If this terminator has multiple identical successors (common for
6114    // switches), only handle each succ once.
6115    if (!SuccsHandled.insert(SuccMBB)) continue;
6116
6117    MachineBasicBlock::iterator MBBI = SuccMBB->begin();
6118
6119    // At this point we know that there is a 1-1 correspondence between LLVM PHI
6120    // nodes and Machine PHI nodes, but the incoming operands have not been
6121    // emitted yet.
6122    for (BasicBlock::const_iterator I = SuccBB->begin();
6123         const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
6124      // Ignore dead phi's.
6125      if (PN->use_empty()) continue;
6126
6127      unsigned Reg;
6128      const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB);
6129
6130      if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
6131        unsigned &RegOut = ConstantsOut[C];
6132        if (RegOut == 0) {
6133          RegOut = FuncInfo.CreateRegs(C->getType());
6134          CopyValueToVirtualRegister(C, RegOut);
6135        }
6136        Reg = RegOut;
6137      } else {
6138        DenseMap<const Value *, unsigned>::iterator I =
6139          FuncInfo.ValueMap.find(PHIOp);
6140        if (I != FuncInfo.ValueMap.end())
6141          Reg = I->second;
6142        else {
6143          assert(isa<AllocaInst>(PHIOp) &&
6144                 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
6145                 "Didn't codegen value into a register!??");
6146          Reg = FuncInfo.CreateRegs(PHIOp->getType());
6147          CopyValueToVirtualRegister(PHIOp, Reg);
6148        }
6149      }
6150
6151      // Remember that this register needs to added to the machine PHI node as
6152      // the input for this MBB.
6153      SmallVector<EVT, 4> ValueVTs;
6154      ComputeValueVTs(TLI, PN->getType(), ValueVTs);
6155      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
6156        EVT VT = ValueVTs[vti];
6157        unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT);
6158        for (unsigned i = 0, e = NumRegisters; i != e; ++i)
6159          FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i));
6160        Reg += NumRegisters;
6161      }
6162    }
6163  }
6164  ConstantsOut.clear();
6165}
6166