X86ISelLowering.cpp revision 59ed56b9ee4ac2f09cf5c95acbe05b5b91a77215
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrBuilder.h"
17#include "X86ISelLowering.h"
18#include "X86MachineFunctionInfo.h"
19#include "X86TargetMachine.h"
20#include "llvm/CallingConv.h"
21#include "llvm/Constants.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/Function.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/ADT/VectorExtras.h"
26#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27#include "llvm/CodeGen/CallingConvLower.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/SelectionDAG.h"
32#include "llvm/CodeGen/SSARegMap.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Target/TargetOptions.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/ADT/StringExtras.h"
37using namespace llvm;
38
39// FIXME: temporary.
40static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
41                                  cl::desc("Enable fastcc on X86"));
42X86TargetLowering::X86TargetLowering(TargetMachine &TM)
43  : TargetLowering(TM) {
44  Subtarget = &TM.getSubtarget<X86Subtarget>();
45  X86ScalarSSE = Subtarget->hasSSE2();
46  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
47
48  // Set up the TargetLowering object.
49
50  // X86 is weird, it always uses i8 for shift amounts and setcc results.
51  setShiftAmountType(MVT::i8);
52  setSetCCResultType(MVT::i8);
53  setSetCCResultContents(ZeroOrOneSetCCResult);
54  setSchedulingPreference(SchedulingForRegPressure);
55  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
56  setStackPointerRegisterToSaveRestore(X86StackPtr);
57
58  if (Subtarget->isTargetDarwin()) {
59    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
60    setUseUnderscoreSetJmp(false);
61    setUseUnderscoreLongJmp(false);
62  } else if (Subtarget->isTargetMingw()) {
63    // MS runtime is weird: it exports _setjmp, but longjmp!
64    setUseUnderscoreSetJmp(true);
65    setUseUnderscoreLongJmp(false);
66  } else {
67    setUseUnderscoreSetJmp(true);
68    setUseUnderscoreLongJmp(true);
69  }
70
71  // Add legal addressing mode scale values.
72  addLegalAddressScale(8);
73  addLegalAddressScale(4);
74  addLegalAddressScale(2);
75  // Enter the ones which require both scale + index last. These are more
76  // expensive.
77  addLegalAddressScale(9);
78  addLegalAddressScale(5);
79  addLegalAddressScale(3);
80
81  // Set up the register classes.
82  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
83  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
84  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
85  if (Subtarget->is64Bit())
86    addRegisterClass(MVT::i64, X86::GR64RegisterClass);
87
88  setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
89
90  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
91  // operation.
92  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
93  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
94  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
95
96  if (Subtarget->is64Bit()) {
97    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
98    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
99  } else {
100    if (X86ScalarSSE)
101      // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
102      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Expand);
103    else
104      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
105  }
106
107  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
108  // this operation.
109  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
110  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
111  // SSE has no i16 to fp conversion, only i32
112  if (X86ScalarSSE)
113    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
114  else {
115    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
116    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
117  }
118
119  if (!Subtarget->is64Bit()) {
120    // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
121    setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
122    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
123  }
124
125  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
126  // this operation.
127  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
128  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
129
130  if (X86ScalarSSE) {
131    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
132  } else {
133    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
134    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
135  }
136
137  // Handle FP_TO_UINT by promoting the destination to a larger signed
138  // conversion.
139  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
140  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
141  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
142
143  if (Subtarget->is64Bit()) {
144    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
145    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
146  } else {
147    if (X86ScalarSSE && !Subtarget->hasSSE3())
148      // Expand FP_TO_UINT into a select.
149      // FIXME: We would like to use a Custom expander here eventually to do
150      // the optimal thing for SSE vs. the default expansion in the legalizer.
151      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
152    else
153      // With SSE3 we can use fisttpll to convert to a signed i64.
154      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
155  }
156
157  // TODO: when we have SSE, these could be more efficient, by using movd/movq.
158  if (!X86ScalarSSE) {
159    setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
160    setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
161  }
162
163  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
164  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
165  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
166  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
167  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
168  if (Subtarget->is64Bit())
169    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
170  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
171  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
172  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
173  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
174  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
175
176  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
177  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
178  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
179  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
180  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
181  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
182  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
183  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
184  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
185  if (Subtarget->is64Bit()) {
186    setOperationAction(ISD::CTPOP          , MVT::i64  , Expand);
187    setOperationAction(ISD::CTTZ           , MVT::i64  , Expand);
188    setOperationAction(ISD::CTLZ           , MVT::i64  , Expand);
189  }
190
191  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
192  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
193
194  // These should be promoted to a larger select which is supported.
195  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
196  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
197  // X86 wants to expand cmov itself.
198  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
199  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
200  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
201  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
202  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
203  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
204  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
205  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
206  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
207  if (Subtarget->is64Bit()) {
208    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
209    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
210  }
211  // X86 ret instruction may pop stack.
212  setOperationAction(ISD::RET             , MVT::Other, Custom);
213  // Darwin ABI issue.
214  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
215  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
216  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
217  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
218  if (Subtarget->is64Bit()) {
219    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
220    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
221    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
222    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
223  }
224  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
225  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
226  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
227  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
228  // X86 wants to expand memset / memcpy itself.
229  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
230  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
231
232  // We don't have line number support yet.
233  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
234  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
235  // FIXME - use subtarget debug flags
236  if (!Subtarget->isTargetDarwin() &&
237      !Subtarget->isTargetELF() &&
238      !Subtarget->isTargetCygMing())
239    setOperationAction(ISD::LABEL, MVT::Other, Expand);
240
241  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
242  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
243
244  // Use the default implementation.
245  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
246  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
247  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
248  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
249  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
250  if (Subtarget->is64Bit())
251    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
252  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
253
254  if (X86ScalarSSE) {
255    // Set up the FP register classes.
256    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
257    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
258
259    // Use ANDPD to simulate FABS.
260    setOperationAction(ISD::FABS , MVT::f64, Custom);
261    setOperationAction(ISD::FABS , MVT::f32, Custom);
262
263    // Use XORP to simulate FNEG.
264    setOperationAction(ISD::FNEG , MVT::f64, Custom);
265    setOperationAction(ISD::FNEG , MVT::f32, Custom);
266
267    // Use ANDPD and ORPD to simulate FCOPYSIGN.
268    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
269    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
270
271    // We don't support sin/cos/fmod
272    setOperationAction(ISD::FSIN , MVT::f64, Expand);
273    setOperationAction(ISD::FCOS , MVT::f64, Expand);
274    setOperationAction(ISD::FREM , MVT::f64, Expand);
275    setOperationAction(ISD::FSIN , MVT::f32, Expand);
276    setOperationAction(ISD::FCOS , MVT::f32, Expand);
277    setOperationAction(ISD::FREM , MVT::f32, Expand);
278
279    // Expand FP immediates into loads from the stack, except for the special
280    // cases we handle.
281    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
282    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
283    addLegalFPImmediate(+0.0); // xorps / xorpd
284  } else {
285    // Set up the FP register classes.
286    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
287
288    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
289    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
290    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
291
292    if (!UnsafeFPMath) {
293      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
294      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
295    }
296
297    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
298    addLegalFPImmediate(+0.0); // FLD0
299    addLegalFPImmediate(+1.0); // FLD1
300    addLegalFPImmediate(-0.0); // FLD0/FCHS
301    addLegalFPImmediate(-1.0); // FLD1/FCHS
302  }
303
304  // First set operation action for all vector types to expand. Then we
305  // will selectively turn on ones that can be effectively codegen'd.
306  for (unsigned VT = (unsigned)MVT::Vector + 1;
307       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
308    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
309    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
310    setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
311    setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
312    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
313    setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
314    setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
315    setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
316    setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
317    setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
318    setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
319    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
320    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
321    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
322    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
323  }
324
325  if (Subtarget->hasMMX()) {
326    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
327    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
328    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
329
330    // FIXME: add MMX packed arithmetics
331    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
332    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
333    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
334  }
335
336  if (Subtarget->hasSSE1()) {
337    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
338
339    setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
340    setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
341    setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
342    setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
343    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
344    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
345    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
346    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
347    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
348  }
349
350  if (Subtarget->hasSSE2()) {
351    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
352    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
353    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
354    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
355    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
356
357    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
358    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
359    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
360    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
361    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
362    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
363    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
364    setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
365    setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
366    setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
367    setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
368
369    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
370    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
371    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
372    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
373    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
374    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
375
376    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
377    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
378      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
379      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
380      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
381    }
382    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
383    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
384    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
385    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
386    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
387    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
388
389    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
390    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
391      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
392      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
393      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
394      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
395      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
396      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
397      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
398      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
399      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
400      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
401    }
402
403    // Custom lower v2i64 and v2f64 selects.
404    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
405    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
406    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
407    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
408  }
409
410  // We want to custom lower some of our intrinsics.
411  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
412
413  // We have target-specific dag combine patterns for the following nodes:
414  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
415  setTargetDAGCombine(ISD::SELECT);
416
417  computeRegisterProperties();
418
419  // FIXME: These should be based on subtarget info. Plus, the values should
420  // be smaller when we are in optimizing for size mode.
421  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
422  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
423  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
424  allowUnalignedMemoryAccesses = true; // x86 supports it!
425}
426
427
428//===----------------------------------------------------------------------===//
429//               Return Value Calling Convention Implementation
430//===----------------------------------------------------------------------===//
431
432#include "X86GenCallingConv.inc"
433
434/// LowerRET - Lower an ISD::RET node.
435SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
436  assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
437
438  SmallVector<CCValAssign, 16> RVLocs;
439  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
440  CCState CCInfo(CC, getTargetMachine(), RVLocs);
441
442  // Determine which register each value should be copied into.
443  for (unsigned i = 0; i != Op.getNumOperands() / 2; ++i) {
444    MVT::ValueType VT = Op.getOperand(i*2+1).getValueType();
445    if (RetCC_X86(i, VT, VT, CCValAssign::Full,
446                  cast<ConstantSDNode>(Op.getOperand(i*2+2))->getValue(),
447                  CCInfo))
448      assert(0 && "Unhandled result type!");
449  }
450
451  // If this is the first return lowered for this function, add the regs to the
452  // liveout set for the function.
453  if (DAG.getMachineFunction().liveout_empty()) {
454    for (unsigned i = 0; i != RVLocs.size(); ++i)
455      if (RVLocs[i].isRegLoc())
456        DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
457  }
458
459  SDOperand Chain = Op.getOperand(0);
460  SDOperand Flag;
461
462  // Copy the result values into the output registers.
463  if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
464      RVLocs[0].getLocReg() != X86::ST0) {
465    for (unsigned i = 0; i != RVLocs.size(); ++i) {
466      CCValAssign &VA = RVLocs[i];
467      assert(VA.isRegLoc() && "Can only return in registers!");
468      Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
469                               Flag);
470      Flag = Chain.getValue(1);
471    }
472  } else {
473    // We need to handle a destination of ST0 specially, because it isn't really
474    // a register.
475    SDOperand Value = Op.getOperand(1);
476
477    // If this is an FP return with ScalarSSE, we need to move the value from
478    // an XMM register onto the fp-stack.
479    if (X86ScalarSSE) {
480      SDOperand MemLoc;
481
482      // If this is a load into a scalarsse value, don't store the loaded value
483      // back to the stack, only to reload it: just replace the scalar-sse load.
484      if (ISD::isNON_EXTLoad(Value.Val) &&
485          (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
486        Chain  = Value.getOperand(0);
487        MemLoc = Value.getOperand(1);
488      } else {
489        // Spill the value to memory and reload it into top of stack.
490        unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
491        MachineFunction &MF = DAG.getMachineFunction();
492        int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
493        MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
494        Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
495      }
496      SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
497      SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
498      Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
499      Chain = Value.getValue(1);
500    }
501
502    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
503    SDOperand Ops[] = { Chain, Value };
504    Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
505    Flag = Chain.getValue(1);
506  }
507
508  SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
509  if (Flag.Val)
510    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
511  else
512    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
513}
514
515
516/// LowerCallResult - Lower the result values of an ISD::CALL into the
517/// appropriate copies out of appropriate physical registers.  This assumes that
518/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
519/// being lowered.  The returns a SDNode with the same number of values as the
520/// ISD::CALL.
521SDNode *X86TargetLowering::
522LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
523                unsigned CallingConv, SelectionDAG &DAG) {
524  SmallVector<SDOperand, 8> ResultVals;
525
526  SmallVector<CCValAssign, 16> RVLocs;
527  CCState CCInfo(CallingConv, getTargetMachine(), RVLocs);
528
529  for (unsigned i = 0, e = TheCall->getNumValues() - 1; i != e; ++i) {
530    MVT::ValueType VT = TheCall->getValueType(i);
531    if (RetCC_X86(i, VT, VT, CCValAssign::Full, 0, CCInfo))
532      assert(0 && "Unhandled result type!");
533  }
534
535  // Copy all of the result registers out of their specified physreg.
536  if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) {
537    for (unsigned i = 0; i != RVLocs.size(); ++i) {
538      Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
539                                 RVLocs[i].getValVT(), InFlag).getValue(1);
540      InFlag = Chain.getValue(2);
541      ResultVals.push_back(Chain.getValue(0));
542    }
543  } else {
544    // Copies from the FP stack are special, as ST0 isn't a valid register
545    // before the fp stackifier runs.
546
547    // Copy ST0 into an RFP register with FP_GET_RESULT.
548    SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
549    SDOperand GROps[] = { Chain, InFlag };
550    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
551    Chain  = RetVal.getValue(1);
552    InFlag = RetVal.getValue(2);
553
554    // If we are using ScalarSSE, store ST(0) to the stack and reload it into
555    // an XMM register.
556    if (X86ScalarSSE) {
557      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
558      // shouldn't be necessary except that RFP cannot be live across
559      // multiple blocks. When stackifier is fixed, they can be uncoupled.
560      MachineFunction &MF = DAG.getMachineFunction();
561      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
562      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
563      SDOperand Ops[] = {
564        Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
565      };
566      Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
567      RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
568      Chain = RetVal.getValue(1);
569    }
570
571    if (RVLocs[0].getValVT() == MVT::f32 && !X86ScalarSSE)
572      // FIXME: we would really like to remember that this FP_ROUND
573      // operation is okay to eliminate if we allow excess FP precision.
574      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
575    ResultVals.push_back(RetVal);
576  }
577
578  // Merge everything together with a MERGE_VALUES node.
579  ResultVals.push_back(Chain);
580  return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
581                     &ResultVals[0], ResultVals.size()).Val;
582}
583
584
585//===----------------------------------------------------------------------===//
586//                C & StdCall Calling Convention implementation
587//===----------------------------------------------------------------------===//
588//  StdCall calling convention seems to be standard for many Windows' API
589//  routines and around. It differs from C calling convention just a little:
590//  callee should clean up the stack, not caller. Symbols should be also
591//  decorated in some fancy way :) It doesn't support any vector arguments.
592
593/// AddLiveIn - This helper function adds the specified physical register to the
594/// MachineFunction as a live in value.  It also creates a corresponding virtual
595/// register for it.
596static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
597                          const TargetRegisterClass *RC) {
598  assert(RC->contains(PReg) && "Not the correct regclass!");
599  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
600  MF.addLiveIn(PReg, VReg);
601  return VReg;
602}
603
604/// HowToPassArgument - Returns how an formal argument of the specified type
605/// should be passed. If it is through stack, returns the size of the stack
606/// slot; if it is through integer or XMM register, returns the number of
607/// integer or XMM registers are needed.
608static void
609HowToPassCallArgument(MVT::ValueType ObjectVT,
610                      bool ArgInReg,
611                      unsigned NumIntRegs, unsigned NumXMMRegs,
612                      unsigned MaxNumIntRegs,
613                      unsigned &ObjSize, unsigned &ObjIntRegs,
614                      unsigned &ObjXMMRegs) {
615  ObjSize = 0;
616  ObjIntRegs = 0;
617  ObjXMMRegs = 0;
618
619  if (MaxNumIntRegs>3) {
620    // We don't have too much registers on ia32! :)
621    MaxNumIntRegs = 3;
622  }
623
624  switch (ObjectVT) {
625  default: assert(0 && "Unhandled argument type!");
626  case MVT::i8:
627   if (ArgInReg && (NumIntRegs < MaxNumIntRegs))
628     ObjIntRegs = 1;
629   else
630     ObjSize = 1;
631   break;
632  case MVT::i16:
633   if (ArgInReg && (NumIntRegs < MaxNumIntRegs))
634     ObjIntRegs = 1;
635   else
636     ObjSize = 2;
637   break;
638  case MVT::i32:
639   if (ArgInReg && (NumIntRegs < MaxNumIntRegs))
640     ObjIntRegs = 1;
641   else
642     ObjSize = 4;
643   break;
644  case MVT::i64:
645   if (ArgInReg && (NumIntRegs+2 <= MaxNumIntRegs)) {
646     ObjIntRegs = 2;
647   } else if (ArgInReg && (NumIntRegs+1 <= MaxNumIntRegs)) {
648     ObjIntRegs = 1;
649     ObjSize = 4;
650   } else
651     ObjSize = 8;
652  case MVT::f32:
653    ObjSize = 4;
654    break;
655  case MVT::f64:
656    ObjSize = 8;
657    break;
658  case MVT::v16i8:
659  case MVT::v8i16:
660  case MVT::v4i32:
661  case MVT::v2i64:
662  case MVT::v4f32:
663  case MVT::v2f64:
664    if (NumXMMRegs < 4)
665      ObjXMMRegs = 1;
666    else
667      ObjSize = 16;
668    break;
669  }
670}
671
672SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
673                                               bool isStdCall) {
674  unsigned NumArgs = Op.Val->getNumValues() - 1;
675  MachineFunction &MF = DAG.getMachineFunction();
676  MachineFrameInfo *MFI = MF.getFrameInfo();
677  SDOperand Root = Op.getOperand(0);
678  SmallVector<SDOperand, 8> ArgValues;
679  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
680
681  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
682  // the stack frame looks like this:
683  //
684  // [ESP] -- return address
685  // [ESP + 4] -- first argument (leftmost lexically)
686  // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
687  //    ...
688  //
689  unsigned ArgOffset   = 0; // Frame mechanisms handle retaddr slot
690  unsigned NumSRetBytes= 0; // How much bytes on stack used for struct return
691  unsigned NumXMMRegs  = 0; // XMM regs used for parameter passing.
692  unsigned NumIntRegs  = 0; // Integer regs used for parameter passing
693
694  static const unsigned XMMArgRegs[] = {
695    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
696  };
697  static const unsigned GPRArgRegs[][3] = {
698    { X86::AL,  X86::DL,  X86::CL  },
699    { X86::AX,  X86::DX,  X86::CX  },
700    { X86::EAX, X86::EDX, X86::ECX }
701  };
702  static const TargetRegisterClass* GPRClasses[3] = {
703    X86::GR8RegisterClass, X86::GR16RegisterClass, X86::GR32RegisterClass
704  };
705
706  // Handle regparm attribute
707  SmallVector<bool, 8> ArgInRegs(NumArgs, false);
708  SmallVector<bool, 8> SRetArgs(NumArgs, false);
709  if (!isVarArg) {
710    for (unsigned i = 0; i<NumArgs; ++i) {
711      unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3+i))->getValue();
712      ArgInRegs[i]   = (Flags >> 1) & 1;
713      SRetArgs[i]    = (Flags >> 2) & 1;
714    }
715  }
716
717  for (unsigned i = 0; i < NumArgs; ++i) {
718    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
719    unsigned ArgIncrement = 4;
720    unsigned ObjSize = 0;
721    unsigned ObjXMMRegs = 0;
722    unsigned ObjIntRegs = 0;
723    unsigned Reg = 0;
724    SDOperand ArgValue;
725
726    HowToPassCallArgument(ObjectVT,
727                          ArgInRegs[i],
728                          NumIntRegs, NumXMMRegs, 3,
729                          ObjSize, ObjIntRegs, ObjXMMRegs);
730
731    if (ObjSize > 4)
732      ArgIncrement = ObjSize;
733
734    if (ObjIntRegs || ObjXMMRegs) {
735      switch (ObjectVT) {
736      default: assert(0 && "Unhandled argument type!");
737      case MVT::i8:
738      case MVT::i16:
739      case MVT::i32: {
740       unsigned RegToUse = GPRArgRegs[ObjectVT-MVT::i8][NumIntRegs];
741       Reg = AddLiveIn(MF, RegToUse, GPRClasses[ObjectVT-MVT::i8]);
742       ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
743       break;
744      }
745      case MVT::v16i8:
746      case MVT::v8i16:
747      case MVT::v4i32:
748      case MVT::v2i64:
749      case MVT::v4f32:
750      case MVT::v2f64:
751       assert(!isStdCall && "Unhandled argument type!");
752       Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
753       ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
754       break;
755      }
756      NumIntRegs += ObjIntRegs;
757      NumXMMRegs += ObjXMMRegs;
758    }
759    if (ObjSize) {
760      // XMM arguments have to be aligned on 16-byte boundary.
761      if (ObjSize == 16)
762        ArgOffset = ((ArgOffset + 15) / 16) * 16;
763      // Create the SelectionDAG nodes corresponding to a load from this
764      // parameter.
765      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
766      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
767      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
768
769      ArgOffset += ArgIncrement;   // Move on to the next argument.
770      if (SRetArgs[i])
771        NumSRetBytes += ArgIncrement;
772    }
773
774    ArgValues.push_back(ArgValue);
775  }
776
777  ArgValues.push_back(Root);
778
779  // If the function takes variable number of arguments, make a frame index for
780  // the start of the first vararg value... for expansion of llvm.va_start.
781  if (isVarArg)
782    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
783
784  if (isStdCall && !isVarArg) {
785    BytesToPopOnReturn  = ArgOffset;    // Callee pops everything..
786    BytesCallerReserves = 0;
787  } else {
788    BytesToPopOnReturn  = NumSRetBytes; // Callee pops hidden struct pointer.
789    BytesCallerReserves = ArgOffset;
790  }
791
792  RegSaveFrameIndex = 0xAAAAAAA;  // X86-64 only.
793  ReturnAddrIndex = 0;            // No return address slot generated yet.
794
795
796  MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
797
798  // Return the new list of results.
799  return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
800                     &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
801}
802
803SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
804                                            unsigned CC) {
805  SDOperand Chain     = Op.getOperand(0);
806  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
807  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
808  SDOperand Callee    = Op.getOperand(4);
809  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
810
811  static const unsigned XMMArgRegs[] = {
812    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
813  };
814  static const unsigned GPR32ArgRegs[] = {
815    X86::EAX, X86::EDX,  X86::ECX
816  };
817
818  // Count how many bytes are to be pushed on the stack.
819  unsigned NumBytes   = 0;
820  // Keep track of the number of integer regs passed so far.
821  unsigned NumIntRegs = 0;
822  // Keep track of the number of XMM regs passed so far.
823  unsigned NumXMMRegs = 0;
824  // How much bytes on stack used for struct return
825  unsigned NumSRetBytes= 0;
826
827  // Handle regparm attribute
828  SmallVector<bool, 8> ArgInRegs(NumOps, false);
829  SmallVector<bool, 8> SRetArgs(NumOps, false);
830  for (unsigned i = 0; i<NumOps; ++i) {
831    unsigned Flags =
832      dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
833    ArgInRegs[i] = (Flags >> 1) & 1;
834    SRetArgs[i]  = (Flags >> 2) & 1;
835  }
836
837  // Calculate stack frame size
838  for (unsigned i = 0; i != NumOps; ++i) {
839    SDOperand Arg = Op.getOperand(5+2*i);
840    unsigned ArgIncrement = 4;
841    unsigned ObjSize = 0;
842    unsigned ObjIntRegs = 0;
843    unsigned ObjXMMRegs = 0;
844
845    HowToPassCallArgument(Arg.getValueType(),
846                          ArgInRegs[i],
847                          NumIntRegs, NumXMMRegs, 3,
848                          ObjSize, ObjIntRegs, ObjXMMRegs);
849    if (ObjSize > 4)
850      ArgIncrement = ObjSize;
851
852    NumIntRegs += ObjIntRegs;
853    NumXMMRegs += ObjXMMRegs;
854    if (ObjSize) {
855      // XMM arguments have to be aligned on 16-byte boundary.
856      if (ObjSize == 16)
857        NumBytes = ((NumBytes + 15) / 16) * 16;
858      NumBytes += ArgIncrement;
859    }
860  }
861
862  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
863
864  // Arguments go on the stack in reverse order, as specified by the ABI.
865  unsigned ArgOffset = 0;
866  NumXMMRegs = 0;
867  NumIntRegs = 0;
868  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
869  SmallVector<SDOperand, 8> MemOpChains;
870  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
871  for (unsigned i = 0; i != NumOps; ++i) {
872    SDOperand Arg = Op.getOperand(5+2*i);
873    unsigned ArgIncrement = 4;
874    unsigned ObjSize = 0;
875    unsigned ObjIntRegs = 0;
876    unsigned ObjXMMRegs = 0;
877
878    HowToPassCallArgument(Arg.getValueType(),
879                          ArgInRegs[i],
880                          NumIntRegs, NumXMMRegs, 3,
881                          ObjSize, ObjIntRegs, ObjXMMRegs);
882
883    if (ObjSize > 4)
884      ArgIncrement = ObjSize;
885
886    if (Arg.getValueType() == MVT::i8 || Arg.getValueType() == MVT::i16) {
887      // Promote the integer to 32 bits.  If the input type is signed use a
888      // sign extend, otherwise use a zero extend.
889      unsigned Flags = cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
890
891      unsigned ExtOp = (Flags & 1) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
892      Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
893    }
894
895    if (ObjIntRegs || ObjXMMRegs) {
896      switch (Arg.getValueType()) {
897      default: assert(0 && "Unhandled argument type!");
898      case MVT::i32:
899       RegsToPass.push_back(std::make_pair(GPR32ArgRegs[NumIntRegs], Arg));
900       break;
901      case MVT::v16i8:
902      case MVT::v8i16:
903      case MVT::v4i32:
904      case MVT::v2i64:
905      case MVT::v4f32:
906      case MVT::v2f64:
907       RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
908       break;
909      }
910
911      NumIntRegs += ObjIntRegs;
912      NumXMMRegs += ObjXMMRegs;
913    }
914    if (ObjSize) {
915      // XMM arguments have to be aligned on 16-byte boundary.
916      if (ObjSize == 16)
917        ArgOffset = ((ArgOffset + 15) / 16) * 16;
918
919      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
920      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
921      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
922
923      ArgOffset += ArgIncrement;   // Move on to the next argument.
924      if (SRetArgs[i])
925        NumSRetBytes += ArgIncrement;
926    }
927  }
928
929  // Sanity check: we haven't seen NumSRetBytes > 4
930  assert((NumSRetBytes<=4) &&
931         "Too much space for struct-return pointer requested");
932
933  if (!MemOpChains.empty())
934    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
935                        &MemOpChains[0], MemOpChains.size());
936
937  // Build a sequence of copy-to-reg nodes chained together with token chain
938  // and flag operands which copy the outgoing args into registers.
939  SDOperand InFlag;
940  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
941    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
942                             InFlag);
943    InFlag = Chain.getValue(1);
944  }
945
946  // ELF / PIC requires GOT in the EBX register before function calls via PLT
947  // GOT pointer.
948  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
949      Subtarget->isPICStyleGOT()) {
950    Chain = DAG.getCopyToReg(Chain, X86::EBX,
951                             DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
952                             InFlag);
953    InFlag = Chain.getValue(1);
954  }
955
956  // If the callee is a GlobalAddress node (quite common, every direct call is)
957  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
958  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
959    // We should use extra load for direct calls to dllimported functions in
960    // non-JIT mode.
961    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
962                                        getTargetMachine(), true))
963      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
964  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
965    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
966
967  // Returns a chain & a flag for retval copy to use.
968  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
969  SmallVector<SDOperand, 8> Ops;
970  Ops.push_back(Chain);
971  Ops.push_back(Callee);
972
973  // Add argument registers to the end of the list so that they are known live
974  // into the call.
975  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
976    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
977                                  RegsToPass[i].second.getValueType()));
978
979  // Add an implicit use GOT pointer in EBX.
980  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
981      Subtarget->isPICStyleGOT())
982    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
983
984  if (InFlag.Val)
985    Ops.push_back(InFlag);
986
987  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
988                      NodeTys, &Ops[0], Ops.size());
989  InFlag = Chain.getValue(1);
990
991  // Create the CALLSEQ_END node.
992  unsigned NumBytesForCalleeToPush = 0;
993
994  if (CC == CallingConv::X86_StdCall) {
995    if (isVarArg)
996      NumBytesForCalleeToPush = NumSRetBytes;
997    else
998      NumBytesForCalleeToPush = NumBytes;
999  } else {
1000    // If this is is a call to a struct-return function, the callee
1001    // pops the hidden struct pointer, so we have to push it back.
1002    // This is common for Darwin/X86, Linux & Mingw32 targets.
1003    NumBytesForCalleeToPush = NumSRetBytes;
1004  }
1005
1006  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1007  Ops.clear();
1008  Ops.push_back(Chain);
1009  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1010  Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
1011  Ops.push_back(InFlag);
1012  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1013  InFlag = Chain.getValue(1);
1014
1015  // Handle result values, copying them out of physregs into vregs that we
1016  // return.
1017  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
1018}
1019
1020
1021//===----------------------------------------------------------------------===//
1022//                 X86-64 C Calling Convention implementation
1023//===----------------------------------------------------------------------===//
1024
1025
1026
1027SDOperand
1028X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
1029  unsigned NumArgs = Op.Val->getNumValues() - 1;
1030  MachineFunction &MF = DAG.getMachineFunction();
1031  MachineFrameInfo *MFI = MF.getFrameInfo();
1032  SDOperand Root = Op.getOperand(0);
1033  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1034
1035  static const unsigned GPR64ArgRegs[] = {
1036    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
1037  };
1038  static const unsigned XMMArgRegs[] = {
1039    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1040    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1041  };
1042
1043  SmallVector<CCValAssign, 16> ArgLocs;
1044  CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(),
1045                 ArgLocs);
1046
1047  for (unsigned i = 0; i != NumArgs; ++i) {
1048    MVT::ValueType ArgVT = Op.getValue(i).getValueType();
1049    unsigned ArgFlags = cast<ConstantSDNode>(Op.getOperand(3+i))->getValue();
1050    if (CC_X86_64_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo))
1051      assert(0 && "Unhandled argument type!");
1052  }
1053
1054  SmallVector<SDOperand, 8> ArgValues;
1055  unsigned LastVal = ~0U;
1056  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1057    CCValAssign &VA = ArgLocs[i];
1058    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
1059    // places.
1060    assert(VA.getValNo() != LastVal &&
1061           "Don't support value assigned to multiple locs yet");
1062    LastVal = VA.getValNo();
1063
1064    if (VA.isRegLoc()) {
1065      MVT::ValueType RegVT = VA.getLocVT();
1066      TargetRegisterClass *RC;
1067      if (RegVT == MVT::i32)
1068        RC = X86::GR32RegisterClass;
1069      else if (RegVT == MVT::i64)
1070        RC = X86::GR64RegisterClass;
1071      else if (RegVT == MVT::f32)
1072        RC = X86::FR32RegisterClass;
1073      else if (RegVT == MVT::f64)
1074        RC = X86::FR64RegisterClass;
1075      else {
1076        assert(MVT::isVector(RegVT));
1077        RC = X86::VR128RegisterClass;
1078      }
1079
1080      SDOperand ArgValue = DAG.getCopyFromReg(Root, VA.getLocReg(), RegVT);
1081      AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
1082
1083      // If this is an 8 or 16-bit value, it is really passed promoted to 32
1084      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
1085      // right size.
1086      if (VA.getLocInfo() == CCValAssign::SExt)
1087        ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
1088                               DAG.getValueType(VA.getValVT()));
1089      else if (VA.getLocInfo() == CCValAssign::ZExt)
1090        ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
1091                               DAG.getValueType(VA.getValVT()));
1092
1093      if (VA.getLocInfo() != CCValAssign::Full)
1094        ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
1095
1096      ArgValues.push_back(ArgValue);
1097    } else {
1098      assert(VA.isMemLoc());
1099
1100      // Create the nodes corresponding to a load from this parameter slot.
1101      int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
1102                                      VA.getLocMemOffset());
1103      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
1104      ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
1105    }
1106  }
1107
1108  unsigned StackSize = CCInfo.getNextStackOffset();
1109
1110  // If the function takes variable number of arguments, make a frame index for
1111  // the start of the first vararg value... for expansion of llvm.va_start.
1112  if (isVarArg) {
1113    unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
1114    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
1115
1116    // For X86-64, if there are vararg parameters that are passed via
1117    // registers, then we must store them to their spots on the stack so they
1118    // may be loaded by deferencing the result of va_next.
1119    VarArgsGPOffset = NumIntRegs * 8;
1120    VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
1121    VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
1122    RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
1123
1124    // Store the integer parameter registers.
1125    SmallVector<SDOperand, 8> MemOps;
1126    SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
1127    SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
1128                              DAG.getConstant(VarArgsGPOffset, getPointerTy()));
1129    for (; NumIntRegs != 6; ++NumIntRegs) {
1130      unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
1131                                X86::GR64RegisterClass);
1132      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
1133      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1134      MemOps.push_back(Store);
1135      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1136                        DAG.getConstant(8, getPointerTy()));
1137    }
1138
1139    // Now store the XMM (fp + vector) parameter registers.
1140    FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
1141                      DAG.getConstant(VarArgsFPOffset, getPointerTy()));
1142    for (; NumXMMRegs != 8; ++NumXMMRegs) {
1143      unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
1144                                X86::VR128RegisterClass);
1145      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
1146      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1147      MemOps.push_back(Store);
1148      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1149                        DAG.getConstant(16, getPointerTy()));
1150    }
1151    if (!MemOps.empty())
1152        Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
1153                           &MemOps[0], MemOps.size());
1154  }
1155
1156  ArgValues.push_back(Root);
1157
1158  ReturnAddrIndex = 0;     // No return address slot generated yet.
1159  BytesToPopOnReturn = 0;  // Callee pops nothing.
1160  BytesCallerReserves = StackSize;
1161
1162  // Return the new list of results.
1163  return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
1164                     &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
1165}
1166
1167SDOperand
1168X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
1169                                        unsigned CC) {
1170  SDOperand Chain     = Op.getOperand(0);
1171  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1172  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1173  SDOperand Callee    = Op.getOperand(4);
1174  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1175
1176  SmallVector<CCValAssign, 16> ArgLocs;
1177  CCState CCInfo(CC, getTargetMachine(), ArgLocs);
1178
1179  for (unsigned i = 0; i != NumOps; ++i) {
1180    MVT::ValueType ArgVT = Op.getOperand(5+2*i).getValueType();
1181    unsigned ArgFlags =cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
1182    if (CC_X86_64_C(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo))
1183      assert(0 && "Unhandled argument type!");
1184  }
1185
1186  // Get a count of how many bytes are to be pushed on the stack.
1187  unsigned NumBytes = CCInfo.getNextStackOffset();
1188  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1189
1190  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
1191  SmallVector<SDOperand, 8> MemOpChains;
1192
1193  SDOperand StackPtr;
1194
1195  // Walk the register/memloc assignments, inserting copies/loads.
1196  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1197    CCValAssign &VA = ArgLocs[i];
1198    SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
1199
1200    // Promote the value if needed.
1201    switch (VA.getLocInfo()) {
1202    default: assert(0 && "Unknown loc info!");
1203    case CCValAssign::Full: break;
1204    case CCValAssign::SExt:
1205      Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
1206      break;
1207    case CCValAssign::ZExt:
1208      Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
1209      break;
1210    case CCValAssign::AExt:
1211      Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
1212      break;
1213    }
1214
1215    if (VA.isRegLoc()) {
1216      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1217    } else {
1218      assert(VA.isMemLoc());
1219      if (StackPtr.Val == 0)
1220        StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
1221      SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
1222      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1223      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1224    }
1225  }
1226
1227  if (!MemOpChains.empty())
1228    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1229                        &MemOpChains[0], MemOpChains.size());
1230
1231  // Build a sequence of copy-to-reg nodes chained together with token chain
1232  // and flag operands which copy the outgoing args into registers.
1233  SDOperand InFlag;
1234  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1235    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1236                             InFlag);
1237    InFlag = Chain.getValue(1);
1238  }
1239
1240  if (isVarArg) {
1241    // From AMD64 ABI document:
1242    // For calls that may call functions that use varargs or stdargs
1243    // (prototype-less calls or calls to functions containing ellipsis (...) in
1244    // the declaration) %al is used as hidden argument to specify the number
1245    // of SSE registers used. The contents of %al do not need to match exactly
1246    // the number of registers, but must be an ubound on the number of SSE
1247    // registers used and is in the range 0 - 8 inclusive.
1248
1249    // Count the number of XMM registers allocated.
1250    static const unsigned XMMArgRegs[] = {
1251      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1252      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1253    };
1254    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
1255
1256    Chain = DAG.getCopyToReg(Chain, X86::AL,
1257                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
1258    InFlag = Chain.getValue(1);
1259  }
1260
1261  // If the callee is a GlobalAddress node (quite common, every direct call is)
1262  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1263  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1264    // We should use extra load for direct calls to dllimported functions in
1265    // non-JIT mode.
1266    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
1267                                        getTargetMachine(), true))
1268      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1269  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1270    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1271
1272  // Returns a chain & a flag for retval copy to use.
1273  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1274  SmallVector<SDOperand, 8> Ops;
1275  Ops.push_back(Chain);
1276  Ops.push_back(Callee);
1277
1278  // Add argument registers to the end of the list so that they are known live
1279  // into the call.
1280  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1281    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1282                                  RegsToPass[i].second.getValueType()));
1283
1284  if (InFlag.Val)
1285    Ops.push_back(InFlag);
1286
1287  // FIXME: Do not generate X86ISD::TAILCALL for now.
1288  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1289                      NodeTys, &Ops[0], Ops.size());
1290  InFlag = Chain.getValue(1);
1291
1292  // Returns a flag for retval copy to use.
1293  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1294  Ops.clear();
1295  Ops.push_back(Chain);
1296  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1297  Ops.push_back(DAG.getConstant(0, getPointerTy()));
1298  Ops.push_back(InFlag);
1299  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1300  InFlag = Chain.getValue(1);
1301
1302  // Handle result values, copying them out of physregs into vregs that we
1303  // return.
1304  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
1305}
1306
1307//===----------------------------------------------------------------------===//
1308//                 Fast & FastCall Calling Convention implementation
1309//===----------------------------------------------------------------------===//
1310//
1311// The X86 'fast' calling convention passes up to two integer arguments in
1312// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
1313// and requires that the callee pop its arguments off the stack (allowing proper
1314// tail calls), and has the same return value conventions as C calling convs.
1315//
1316// This calling convention always arranges for the callee pop value to be 8n+4
1317// bytes, which is needed for tail recursion elimination and stack alignment
1318// reasons.
1319//
1320// Note that this can be enhanced in the future to pass fp vals in registers
1321// (when we have a global fp allocator) and do other tricks.
1322//
1323//===----------------------------------------------------------------------===//
1324// The X86 'fastcall' calling convention passes up to two integer arguments in
1325// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
1326// and requires that the callee pop its arguments off the stack (allowing proper
1327// tail calls), and has the same return value conventions as C calling convs.
1328//
1329// This calling convention always arranges for the callee pop value to be 8n+4
1330// bytes, which is needed for tail recursion elimination and stack alignment
1331// reasons.
1332SDOperand
1333X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG,
1334                                        bool isFastCall) {
1335  unsigned NumArgs = Op.Val->getNumValues()-1;
1336  MachineFunction &MF = DAG.getMachineFunction();
1337  MachineFrameInfo *MFI = MF.getFrameInfo();
1338  SDOperand Root = Op.getOperand(0);
1339  SmallVector<SDOperand, 8> ArgValues;
1340
1341  // Add DAG nodes to load the arguments...  On entry to a function the stack
1342  // frame looks like this:
1343  //
1344  // [ESP] -- return address
1345  // [ESP + 4] -- first nonreg argument (leftmost lexically)
1346  // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size
1347  //    ...
1348  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
1349
1350  // Keep track of the number of integer regs passed so far.  This can be either
1351  // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX
1352  // are both used).
1353  unsigned NumIntRegs = 0;
1354  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1355
1356  static const unsigned XMMArgRegs[] = {
1357    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1358  };
1359
1360  static const unsigned GPRArgRegs[][2][2] = {
1361    {{ X86::AL,  X86::DL },  { X86::CL,  X86::DL }},
1362    {{ X86::AX,  X86::DX },  { X86::CX,  X86::DX }},
1363    {{ X86::EAX, X86::EDX }, { X86::ECX,  X86::EDX }}
1364  };
1365
1366  static const TargetRegisterClass* GPRClasses[3] = {
1367    X86::GR8RegisterClass, X86::GR16RegisterClass, X86::GR32RegisterClass
1368  };
1369
1370  unsigned GPRInd = (isFastCall ? 1 : 0);
1371  for (unsigned i = 0; i < NumArgs; ++i) {
1372    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
1373    unsigned ArgIncrement = 4;
1374    unsigned ObjSize = 0;
1375    unsigned ObjXMMRegs = 0;
1376    unsigned ObjIntRegs = 0;
1377    unsigned Reg = 0;
1378    SDOperand ArgValue;
1379
1380    HowToPassCallArgument(ObjectVT,
1381                          true, // Use as much registers as possible
1382                          NumIntRegs, NumXMMRegs,
1383                          (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS),
1384                          ObjSize, ObjIntRegs, ObjXMMRegs);
1385
1386    if (ObjSize > 4)
1387      ArgIncrement = ObjSize;
1388
1389    if (ObjIntRegs || ObjXMMRegs) {
1390      switch (ObjectVT) {
1391      default: assert(0 && "Unhandled argument type!");
1392      case MVT::i8:
1393      case MVT::i16:
1394      case MVT::i32: {
1395        unsigned RegToUse = GPRArgRegs[ObjectVT-MVT::i8][GPRInd][NumIntRegs];
1396        Reg = AddLiveIn(MF, RegToUse, GPRClasses[ObjectVT-MVT::i8]);
1397        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
1398        break;
1399      }
1400      case MVT::v16i8:
1401      case MVT::v8i16:
1402      case MVT::v4i32:
1403      case MVT::v2i64:
1404      case MVT::v4f32:
1405      case MVT::v2f64: {
1406        assert(!isFastCall && "Unhandled argument type!");
1407        Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
1408        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
1409        break;
1410      }
1411      }
1412      NumIntRegs += ObjIntRegs;
1413      NumXMMRegs += ObjXMMRegs;
1414    }
1415    if (ObjSize) {
1416      // XMM arguments have to be aligned on 16-byte boundary.
1417      if (ObjSize == 16)
1418        ArgOffset = ((ArgOffset + 15) / 16) * 16;
1419      // Create the SelectionDAG nodes corresponding to a load from this
1420      // parameter.
1421      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1422      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
1423      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
1424
1425      ArgOffset += ArgIncrement;   // Move on to the next argument.
1426    }
1427
1428    ArgValues.push_back(ArgValue);
1429  }
1430
1431  ArgValues.push_back(Root);
1432
1433  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1434  // arguments and the arguments after the retaddr has been pushed are aligned.
1435  if ((ArgOffset & 7) == 0)
1436    ArgOffset += 4;
1437
1438  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
1439  RegSaveFrameIndex = 0xAAAAAAA;   // X86-64 only.
1440  ReturnAddrIndex = 0;             // No return address slot generated yet.
1441  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
1442  BytesCallerReserves = 0;
1443
1444  MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
1445
1446  // Finally, inform the code generator which regs we return values in.
1447  switch (getValueType(MF.getFunction()->getReturnType())) {
1448  default: assert(0 && "Unknown type!");
1449  case MVT::isVoid: break;
1450  case MVT::i1:
1451  case MVT::i8:
1452  case MVT::i16:
1453  case MVT::i32:
1454    MF.addLiveOut(X86::EAX);
1455    break;
1456  case MVT::i64:
1457    MF.addLiveOut(X86::EAX);
1458    MF.addLiveOut(X86::EDX);
1459    break;
1460  case MVT::f32:
1461  case MVT::f64:
1462    MF.addLiveOut(X86::ST0);
1463    break;
1464  case MVT::v16i8:
1465  case MVT::v8i16:
1466  case MVT::v4i32:
1467  case MVT::v2i64:
1468  case MVT::v4f32:
1469  case MVT::v2f64:
1470    assert(!isFastCall && "Unknown result type");
1471    MF.addLiveOut(X86::XMM0);
1472    break;
1473  }
1474
1475  // Return the new list of results.
1476  return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
1477                     &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
1478}
1479
1480SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
1481                                               unsigned CC) {
1482  SDOperand Chain     = Op.getOperand(0);
1483  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1484  SDOperand Callee    = Op.getOperand(4);
1485  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1486
1487  // Count how many bytes are to be pushed on the stack.
1488  unsigned NumBytes = 0;
1489
1490  // Keep track of the number of integer regs passed so far.  This can be either
1491  // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX
1492  // are both used).
1493  unsigned NumIntRegs = 0;
1494  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1495
1496  static const unsigned GPRArgRegs[][2][2] = {
1497    {{ X86::AL,  X86::DL },  { X86::CL,  X86::DL }},
1498    {{ X86::AX,  X86::DX },  { X86::CX,  X86::DX }},
1499    {{ X86::EAX, X86::EDX }, { X86::ECX,  X86::EDX }}
1500  };
1501  static const unsigned XMMArgRegs[] = {
1502    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1503  };
1504
1505  bool isFastCall = CC == CallingConv::X86_FastCall;
1506  unsigned GPRInd = isFastCall ? 1 : 0;
1507  for (unsigned i = 0; i != NumOps; ++i) {
1508    SDOperand Arg = Op.getOperand(5+2*i);
1509
1510    switch (Arg.getValueType()) {
1511    default: assert(0 && "Unknown value type!");
1512    case MVT::i8:
1513    case MVT::i16:
1514    case MVT::i32: {
1515     unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS);
1516     if (NumIntRegs < MaxNumIntRegs) {
1517       ++NumIntRegs;
1518       break;
1519     }
1520     } // Fall through
1521    case MVT::f32:
1522      NumBytes += 4;
1523      break;
1524    case MVT::f64:
1525      NumBytes += 8;
1526      break;
1527    case MVT::v16i8:
1528    case MVT::v8i16:
1529    case MVT::v4i32:
1530    case MVT::v2i64:
1531    case MVT::v4f32:
1532    case MVT::v2f64:
1533      assert(!isFastCall && "Unknown value type!");
1534      if (NumXMMRegs < 4)
1535        NumXMMRegs++;
1536      else {
1537        // XMM arguments have to be aligned on 16-byte boundary.
1538        NumBytes = ((NumBytes + 15) / 16) * 16;
1539        NumBytes += 16;
1540      }
1541      break;
1542    }
1543  }
1544
1545  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1546  // arguments and the arguments after the retaddr has been pushed are aligned.
1547  if ((NumBytes & 7) == 0)
1548    NumBytes += 4;
1549
1550  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1551
1552  // Arguments go on the stack in reverse order, as specified by the ABI.
1553  unsigned ArgOffset = 0;
1554  NumIntRegs = 0;
1555  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
1556  SmallVector<SDOperand, 8> MemOpChains;
1557  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1558  for (unsigned i = 0; i != NumOps; ++i) {
1559    SDOperand Arg = Op.getOperand(5+2*i);
1560
1561    switch (Arg.getValueType()) {
1562    default: assert(0 && "Unexpected ValueType for argument!");
1563    case MVT::i8:
1564    case MVT::i16:
1565    case MVT::i32: {
1566     unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS);
1567     if (NumIntRegs < MaxNumIntRegs) {
1568       unsigned RegToUse =
1569         GPRArgRegs[Arg.getValueType()-MVT::i8][GPRInd][NumIntRegs];
1570       RegsToPass.push_back(std::make_pair(RegToUse, Arg));
1571       ++NumIntRegs;
1572       break;
1573     }
1574    } // Fall through
1575    case MVT::f32: {
1576      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1577      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1578      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1579      ArgOffset += 4;
1580      break;
1581    }
1582    case MVT::f64: {
1583      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1584      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1585      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1586      ArgOffset += 8;
1587      break;
1588    }
1589    case MVT::v16i8:
1590    case MVT::v8i16:
1591    case MVT::v4i32:
1592    case MVT::v2i64:
1593    case MVT::v4f32:
1594    case MVT::v2f64:
1595      assert(!isFastCall && "Unexpected ValueType for argument!");
1596      if (NumXMMRegs < 4) {
1597        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
1598        NumXMMRegs++;
1599      } else {
1600        // XMM arguments have to be aligned on 16-byte boundary.
1601        ArgOffset = ((ArgOffset + 15) / 16) * 16;
1602        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1603        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1604        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1605        ArgOffset += 16;
1606      }
1607      break;
1608    }
1609  }
1610
1611  if (!MemOpChains.empty())
1612    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1613                        &MemOpChains[0], MemOpChains.size());
1614
1615  // Build a sequence of copy-to-reg nodes chained together with token chain
1616  // and flag operands which copy the outgoing args into registers.
1617  SDOperand InFlag;
1618  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1619    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1620                             InFlag);
1621    InFlag = Chain.getValue(1);
1622  }
1623
1624  // If the callee is a GlobalAddress node (quite common, every direct call is)
1625  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1626  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1627    // We should use extra load for direct calls to dllimported functions in
1628    // non-JIT mode.
1629    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
1630                                        getTargetMachine(), true))
1631      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1632  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1633    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1634
1635  // ELF / PIC requires GOT in the EBX register before function calls via PLT
1636  // GOT pointer.
1637  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1638      Subtarget->isPICStyleGOT()) {
1639    Chain = DAG.getCopyToReg(Chain, X86::EBX,
1640                             DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
1641                             InFlag);
1642    InFlag = Chain.getValue(1);
1643  }
1644
1645  // Returns a chain & a flag for retval copy to use.
1646  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1647  SmallVector<SDOperand, 8> Ops;
1648  Ops.push_back(Chain);
1649  Ops.push_back(Callee);
1650
1651  // Add argument registers to the end of the list so that they are known live
1652  // into the call.
1653  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1654    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1655                                  RegsToPass[i].second.getValueType()));
1656
1657  // Add an implicit use GOT pointer in EBX.
1658  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1659      Subtarget->isPICStyleGOT())
1660    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
1661
1662  if (InFlag.Val)
1663    Ops.push_back(InFlag);
1664
1665  // FIXME: Do not generate X86ISD::TAILCALL for now.
1666  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1667                      NodeTys, &Ops[0], Ops.size());
1668  InFlag = Chain.getValue(1);
1669
1670  // Returns a flag for retval copy to use.
1671  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1672  Ops.clear();
1673  Ops.push_back(Chain);
1674  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1675  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1676  Ops.push_back(InFlag);
1677  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1678  InFlag = Chain.getValue(1);
1679
1680  // Handle result values, copying them out of physregs into vregs that we
1681  // return.
1682  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
1683}
1684
1685SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
1686  if (ReturnAddrIndex == 0) {
1687    // Set up a frame object for the return address.
1688    MachineFunction &MF = DAG.getMachineFunction();
1689    if (Subtarget->is64Bit())
1690      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
1691    else
1692      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1693  }
1694
1695  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
1696}
1697
1698
1699
1700/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1701/// specific condition code. It returns a false if it cannot do a direct
1702/// translation. X86CC is the translated CondCode.  LHS/RHS are modified as
1703/// needed.
1704static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
1705                           unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
1706                           SelectionDAG &DAG) {
1707  X86CC = X86::COND_INVALID;
1708  if (!isFP) {
1709    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1710      if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
1711        // X > -1   -> X == 0, jump !sign.
1712        RHS = DAG.getConstant(0, RHS.getValueType());
1713        X86CC = X86::COND_NS;
1714        return true;
1715      } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
1716        // X < 0   -> X == 0, jump on sign.
1717        X86CC = X86::COND_S;
1718        return true;
1719      }
1720    }
1721
1722    switch (SetCCOpcode) {
1723    default: break;
1724    case ISD::SETEQ:  X86CC = X86::COND_E;  break;
1725    case ISD::SETGT:  X86CC = X86::COND_G;  break;
1726    case ISD::SETGE:  X86CC = X86::COND_GE; break;
1727    case ISD::SETLT:  X86CC = X86::COND_L;  break;
1728    case ISD::SETLE:  X86CC = X86::COND_LE; break;
1729    case ISD::SETNE:  X86CC = X86::COND_NE; break;
1730    case ISD::SETULT: X86CC = X86::COND_B;  break;
1731    case ISD::SETUGT: X86CC = X86::COND_A;  break;
1732    case ISD::SETULE: X86CC = X86::COND_BE; break;
1733    case ISD::SETUGE: X86CC = X86::COND_AE; break;
1734    }
1735  } else {
1736    // On a floating point condition, the flags are set as follows:
1737    // ZF  PF  CF   op
1738    //  0 | 0 | 0 | X > Y
1739    //  0 | 0 | 1 | X < Y
1740    //  1 | 0 | 0 | X == Y
1741    //  1 | 1 | 1 | unordered
1742    bool Flip = false;
1743    switch (SetCCOpcode) {
1744    default: break;
1745    case ISD::SETUEQ:
1746    case ISD::SETEQ: X86CC = X86::COND_E;  break;
1747    case ISD::SETOLT: Flip = true; // Fallthrough
1748    case ISD::SETOGT:
1749    case ISD::SETGT: X86CC = X86::COND_A;  break;
1750    case ISD::SETOLE: Flip = true; // Fallthrough
1751    case ISD::SETOGE:
1752    case ISD::SETGE: X86CC = X86::COND_AE; break;
1753    case ISD::SETUGT: Flip = true; // Fallthrough
1754    case ISD::SETULT:
1755    case ISD::SETLT: X86CC = X86::COND_B;  break;
1756    case ISD::SETUGE: Flip = true; // Fallthrough
1757    case ISD::SETULE:
1758    case ISD::SETLE: X86CC = X86::COND_BE; break;
1759    case ISD::SETONE:
1760    case ISD::SETNE: X86CC = X86::COND_NE; break;
1761    case ISD::SETUO: X86CC = X86::COND_P;  break;
1762    case ISD::SETO:  X86CC = X86::COND_NP; break;
1763    }
1764    if (Flip)
1765      std::swap(LHS, RHS);
1766  }
1767
1768  return X86CC != X86::COND_INVALID;
1769}
1770
1771/// hasFPCMov - is there a floating point cmov for the specific X86 condition
1772/// code. Current x86 isa includes the following FP cmov instructions:
1773/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1774static bool hasFPCMov(unsigned X86CC) {
1775  switch (X86CC) {
1776  default:
1777    return false;
1778  case X86::COND_B:
1779  case X86::COND_BE:
1780  case X86::COND_E:
1781  case X86::COND_P:
1782  case X86::COND_A:
1783  case X86::COND_AE:
1784  case X86::COND_NE:
1785  case X86::COND_NP:
1786    return true;
1787  }
1788}
1789
1790/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
1791/// true if Op is undef or if its value falls within the specified range (L, H].
1792static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
1793  if (Op.getOpcode() == ISD::UNDEF)
1794    return true;
1795
1796  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
1797  return (Val >= Low && Val < Hi);
1798}
1799
1800/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
1801/// true if Op is undef or if its value equal to the specified value.
1802static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
1803  if (Op.getOpcode() == ISD::UNDEF)
1804    return true;
1805  return cast<ConstantSDNode>(Op)->getValue() == Val;
1806}
1807
1808/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1809/// specifies a shuffle of elements that is suitable for input to PSHUFD.
1810bool X86::isPSHUFDMask(SDNode *N) {
1811  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1812
1813  if (N->getNumOperands() != 4)
1814    return false;
1815
1816  // Check if the value doesn't reference the second vector.
1817  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1818    SDOperand Arg = N->getOperand(i);
1819    if (Arg.getOpcode() == ISD::UNDEF) continue;
1820    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1821    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
1822      return false;
1823  }
1824
1825  return true;
1826}
1827
1828/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
1829/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
1830bool X86::isPSHUFHWMask(SDNode *N) {
1831  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1832
1833  if (N->getNumOperands() != 8)
1834    return false;
1835
1836  // Lower quadword copied in order.
1837  for (unsigned i = 0; i != 4; ++i) {
1838    SDOperand Arg = N->getOperand(i);
1839    if (Arg.getOpcode() == ISD::UNDEF) continue;
1840    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1841    if (cast<ConstantSDNode>(Arg)->getValue() != i)
1842      return false;
1843  }
1844
1845  // Upper quadword shuffled.
1846  for (unsigned i = 4; i != 8; ++i) {
1847    SDOperand Arg = N->getOperand(i);
1848    if (Arg.getOpcode() == ISD::UNDEF) continue;
1849    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1850    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1851    if (Val < 4 || Val > 7)
1852      return false;
1853  }
1854
1855  return true;
1856}
1857
1858/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
1859/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
1860bool X86::isPSHUFLWMask(SDNode *N) {
1861  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1862
1863  if (N->getNumOperands() != 8)
1864    return false;
1865
1866  // Upper quadword copied in order.
1867  for (unsigned i = 4; i != 8; ++i)
1868    if (!isUndefOrEqual(N->getOperand(i), i))
1869      return false;
1870
1871  // Lower quadword shuffled.
1872  for (unsigned i = 0; i != 4; ++i)
1873    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
1874      return false;
1875
1876  return true;
1877}
1878
1879/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
1880/// specifies a shuffle of elements that is suitable for input to SHUFP*.
1881static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) {
1882  if (NumElems != 2 && NumElems != 4) return false;
1883
1884  unsigned Half = NumElems / 2;
1885  for (unsigned i = 0; i < Half; ++i)
1886    if (!isUndefOrInRange(Elems[i], 0, NumElems))
1887      return false;
1888  for (unsigned i = Half; i < NumElems; ++i)
1889    if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
1890      return false;
1891
1892  return true;
1893}
1894
1895bool X86::isSHUFPMask(SDNode *N) {
1896  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1897  return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
1898}
1899
1900/// isCommutedSHUFP - Returns true if the shuffle mask is except
1901/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
1902/// half elements to come from vector 1 (which would equal the dest.) and
1903/// the upper half to come from vector 2.
1904static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) {
1905  if (NumOps != 2 && NumOps != 4) return false;
1906
1907  unsigned Half = NumOps / 2;
1908  for (unsigned i = 0; i < Half; ++i)
1909    if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
1910      return false;
1911  for (unsigned i = Half; i < NumOps; ++i)
1912    if (!isUndefOrInRange(Ops[i], 0, NumOps))
1913      return false;
1914  return true;
1915}
1916
1917static bool isCommutedSHUFP(SDNode *N) {
1918  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1919  return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
1920}
1921
1922/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
1923/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
1924bool X86::isMOVHLPSMask(SDNode *N) {
1925  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1926
1927  if (N->getNumOperands() != 4)
1928    return false;
1929
1930  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
1931  return isUndefOrEqual(N->getOperand(0), 6) &&
1932         isUndefOrEqual(N->getOperand(1), 7) &&
1933         isUndefOrEqual(N->getOperand(2), 2) &&
1934         isUndefOrEqual(N->getOperand(3), 3);
1935}
1936
1937/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
1938/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
1939/// <2, 3, 2, 3>
1940bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
1941  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1942
1943  if (N->getNumOperands() != 4)
1944    return false;
1945
1946  // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
1947  return isUndefOrEqual(N->getOperand(0), 2) &&
1948         isUndefOrEqual(N->getOperand(1), 3) &&
1949         isUndefOrEqual(N->getOperand(2), 2) &&
1950         isUndefOrEqual(N->getOperand(3), 3);
1951}
1952
1953/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
1954/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
1955bool X86::isMOVLPMask(SDNode *N) {
1956  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1957
1958  unsigned NumElems = N->getNumOperands();
1959  if (NumElems != 2 && NumElems != 4)
1960    return false;
1961
1962  for (unsigned i = 0; i < NumElems/2; ++i)
1963    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
1964      return false;
1965
1966  for (unsigned i = NumElems/2; i < NumElems; ++i)
1967    if (!isUndefOrEqual(N->getOperand(i), i))
1968      return false;
1969
1970  return true;
1971}
1972
1973/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
1974/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
1975/// and MOVLHPS.
1976bool X86::isMOVHPMask(SDNode *N) {
1977  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1978
1979  unsigned NumElems = N->getNumOperands();
1980  if (NumElems != 2 && NumElems != 4)
1981    return false;
1982
1983  for (unsigned i = 0; i < NumElems/2; ++i)
1984    if (!isUndefOrEqual(N->getOperand(i), i))
1985      return false;
1986
1987  for (unsigned i = 0; i < NumElems/2; ++i) {
1988    SDOperand Arg = N->getOperand(i + NumElems/2);
1989    if (!isUndefOrEqual(Arg, i + NumElems))
1990      return false;
1991  }
1992
1993  return true;
1994}
1995
1996/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
1997/// specifies a shuffle of elements that is suitable for input to UNPCKL.
1998bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts,
1999                         bool V2IsSplat = false) {
2000  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
2001    return false;
2002
2003  for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
2004    SDOperand BitI  = Elts[i];
2005    SDOperand BitI1 = Elts[i+1];
2006    if (!isUndefOrEqual(BitI, j))
2007      return false;
2008    if (V2IsSplat) {
2009      if (isUndefOrEqual(BitI1, NumElts))
2010        return false;
2011    } else {
2012      if (!isUndefOrEqual(BitI1, j + NumElts))
2013        return false;
2014    }
2015  }
2016
2017  return true;
2018}
2019
2020bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
2021  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2022  return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
2023}
2024
2025/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
2026/// specifies a shuffle of elements that is suitable for input to UNPCKH.
2027bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts,
2028                         bool V2IsSplat = false) {
2029  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
2030    return false;
2031
2032  for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
2033    SDOperand BitI  = Elts[i];
2034    SDOperand BitI1 = Elts[i+1];
2035    if (!isUndefOrEqual(BitI, j + NumElts/2))
2036      return false;
2037    if (V2IsSplat) {
2038      if (isUndefOrEqual(BitI1, NumElts))
2039        return false;
2040    } else {
2041      if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
2042        return false;
2043    }
2044  }
2045
2046  return true;
2047}
2048
2049bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
2050  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2051  return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
2052}
2053
2054/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
2055/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
2056/// <0, 0, 1, 1>
2057bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
2058  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2059
2060  unsigned NumElems = N->getNumOperands();
2061  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
2062    return false;
2063
2064  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2065    SDOperand BitI  = N->getOperand(i);
2066    SDOperand BitI1 = N->getOperand(i+1);
2067
2068    if (!isUndefOrEqual(BitI, j))
2069      return false;
2070    if (!isUndefOrEqual(BitI1, j))
2071      return false;
2072  }
2073
2074  return true;
2075}
2076
2077/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
2078/// specifies a shuffle of elements that is suitable for input to MOVSS,
2079/// MOVSD, and MOVD, i.e. setting the lowest element.
2080static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
2081  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
2082    return false;
2083
2084  if (!isUndefOrEqual(Elts[0], NumElts))
2085    return false;
2086
2087  for (unsigned i = 1; i < NumElts; ++i) {
2088    if (!isUndefOrEqual(Elts[i], i))
2089      return false;
2090  }
2091
2092  return true;
2093}
2094
2095bool X86::isMOVLMask(SDNode *N) {
2096  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2097  return ::isMOVLMask(N->op_begin(), N->getNumOperands());
2098}
2099
2100/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
2101/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
2102/// element of vector 2 and the other elements to come from vector 1 in order.
2103static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps,
2104                           bool V2IsSplat = false,
2105                           bool V2IsUndef = false) {
2106  if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
2107    return false;
2108
2109  if (!isUndefOrEqual(Ops[0], 0))
2110    return false;
2111
2112  for (unsigned i = 1; i < NumOps; ++i) {
2113    SDOperand Arg = Ops[i];
2114    if (!(isUndefOrEqual(Arg, i+NumOps) ||
2115          (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) ||
2116          (V2IsSplat && isUndefOrEqual(Arg, NumOps))))
2117      return false;
2118  }
2119
2120  return true;
2121}
2122
2123static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
2124                           bool V2IsUndef = false) {
2125  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2126  return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
2127                        V2IsSplat, V2IsUndef);
2128}
2129
2130/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2131/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
2132bool X86::isMOVSHDUPMask(SDNode *N) {
2133  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2134
2135  if (N->getNumOperands() != 4)
2136    return false;
2137
2138  // Expect 1, 1, 3, 3
2139  for (unsigned i = 0; i < 2; ++i) {
2140    SDOperand Arg = N->getOperand(i);
2141    if (Arg.getOpcode() == ISD::UNDEF) continue;
2142    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2143    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2144    if (Val != 1) return false;
2145  }
2146
2147  bool HasHi = false;
2148  for (unsigned i = 2; i < 4; ++i) {
2149    SDOperand Arg = N->getOperand(i);
2150    if (Arg.getOpcode() == ISD::UNDEF) continue;
2151    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2152    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2153    if (Val != 3) return false;
2154    HasHi = true;
2155  }
2156
2157  // Don't use movshdup if it can be done with a shufps.
2158  return HasHi;
2159}
2160
2161/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2162/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
2163bool X86::isMOVSLDUPMask(SDNode *N) {
2164  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2165
2166  if (N->getNumOperands() != 4)
2167    return false;
2168
2169  // Expect 0, 0, 2, 2
2170  for (unsigned i = 0; i < 2; ++i) {
2171    SDOperand Arg = N->getOperand(i);
2172    if (Arg.getOpcode() == ISD::UNDEF) continue;
2173    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2174    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2175    if (Val != 0) return false;
2176  }
2177
2178  bool HasHi = false;
2179  for (unsigned i = 2; i < 4; ++i) {
2180    SDOperand Arg = N->getOperand(i);
2181    if (Arg.getOpcode() == ISD::UNDEF) continue;
2182    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2183    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2184    if (Val != 2) return false;
2185    HasHi = true;
2186  }
2187
2188  // Don't use movshdup if it can be done with a shufps.
2189  return HasHi;
2190}
2191
2192/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2193/// a splat of a single element.
2194static bool isSplatMask(SDNode *N) {
2195  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2196
2197  // This is a splat operation if each element of the permute is the same, and
2198  // if the value doesn't reference the second vector.
2199  unsigned NumElems = N->getNumOperands();
2200  SDOperand ElementBase;
2201  unsigned i = 0;
2202  for (; i != NumElems; ++i) {
2203    SDOperand Elt = N->getOperand(i);
2204    if (isa<ConstantSDNode>(Elt)) {
2205      ElementBase = Elt;
2206      break;
2207    }
2208  }
2209
2210  if (!ElementBase.Val)
2211    return false;
2212
2213  for (; i != NumElems; ++i) {
2214    SDOperand Arg = N->getOperand(i);
2215    if (Arg.getOpcode() == ISD::UNDEF) continue;
2216    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2217    if (Arg != ElementBase) return false;
2218  }
2219
2220  // Make sure it is a splat of the first vector operand.
2221  return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
2222}
2223
2224/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2225/// a splat of a single element and it's a 2 or 4 element mask.
2226bool X86::isSplatMask(SDNode *N) {
2227  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2228
2229  // We can only splat 64-bit, and 32-bit quantities with a single instruction.
2230  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
2231    return false;
2232  return ::isSplatMask(N);
2233}
2234
2235/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
2236/// specifies a splat of zero element.
2237bool X86::isSplatLoMask(SDNode *N) {
2238  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2239
2240  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
2241    if (!isUndefOrEqual(N->getOperand(i), 0))
2242      return false;
2243  return true;
2244}
2245
2246/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
2247/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
2248/// instructions.
2249unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
2250  unsigned NumOperands = N->getNumOperands();
2251  unsigned Shift = (NumOperands == 4) ? 2 : 1;
2252  unsigned Mask = 0;
2253  for (unsigned i = 0; i < NumOperands; ++i) {
2254    unsigned Val = 0;
2255    SDOperand Arg = N->getOperand(NumOperands-i-1);
2256    if (Arg.getOpcode() != ISD::UNDEF)
2257      Val = cast<ConstantSDNode>(Arg)->getValue();
2258    if (Val >= NumOperands) Val -= NumOperands;
2259    Mask |= Val;
2260    if (i != NumOperands - 1)
2261      Mask <<= Shift;
2262  }
2263
2264  return Mask;
2265}
2266
2267/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
2268/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
2269/// instructions.
2270unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
2271  unsigned Mask = 0;
2272  // 8 nodes, but we only care about the last 4.
2273  for (unsigned i = 7; i >= 4; --i) {
2274    unsigned Val = 0;
2275    SDOperand Arg = N->getOperand(i);
2276    if (Arg.getOpcode() != ISD::UNDEF)
2277      Val = cast<ConstantSDNode>(Arg)->getValue();
2278    Mask |= (Val - 4);
2279    if (i != 4)
2280      Mask <<= 2;
2281  }
2282
2283  return Mask;
2284}
2285
2286/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
2287/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
2288/// instructions.
2289unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
2290  unsigned Mask = 0;
2291  // 8 nodes, but we only care about the first 4.
2292  for (int i = 3; i >= 0; --i) {
2293    unsigned Val = 0;
2294    SDOperand Arg = N->getOperand(i);
2295    if (Arg.getOpcode() != ISD::UNDEF)
2296      Val = cast<ConstantSDNode>(Arg)->getValue();
2297    Mask |= Val;
2298    if (i != 0)
2299      Mask <<= 2;
2300  }
2301
2302  return Mask;
2303}
2304
2305/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
2306/// specifies a 8 element shuffle that can be broken into a pair of
2307/// PSHUFHW and PSHUFLW.
2308static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
2309  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2310
2311  if (N->getNumOperands() != 8)
2312    return false;
2313
2314  // Lower quadword shuffled.
2315  for (unsigned i = 0; i != 4; ++i) {
2316    SDOperand Arg = N->getOperand(i);
2317    if (Arg.getOpcode() == ISD::UNDEF) continue;
2318    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2319    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2320    if (Val > 4)
2321      return false;
2322  }
2323
2324  // Upper quadword shuffled.
2325  for (unsigned i = 4; i != 8; ++i) {
2326    SDOperand Arg = N->getOperand(i);
2327    if (Arg.getOpcode() == ISD::UNDEF) continue;
2328    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2329    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2330    if (Val < 4 || Val > 7)
2331      return false;
2332  }
2333
2334  return true;
2335}
2336
2337/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
2338/// values in ther permute mask.
2339static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
2340                                      SDOperand &V2, SDOperand &Mask,
2341                                      SelectionDAG &DAG) {
2342  MVT::ValueType VT = Op.getValueType();
2343  MVT::ValueType MaskVT = Mask.getValueType();
2344  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
2345  unsigned NumElems = Mask.getNumOperands();
2346  SmallVector<SDOperand, 8> MaskVec;
2347
2348  for (unsigned i = 0; i != NumElems; ++i) {
2349    SDOperand Arg = Mask.getOperand(i);
2350    if (Arg.getOpcode() == ISD::UNDEF) {
2351      MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
2352      continue;
2353    }
2354    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2355    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2356    if (Val < NumElems)
2357      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
2358    else
2359      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
2360  }
2361
2362  std::swap(V1, V2);
2363  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2364  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2365}
2366
2367/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
2368/// match movhlps. The lower half elements should come from upper half of
2369/// V1 (and in order), and the upper half elements should come from the upper
2370/// half of V2 (and in order).
2371static bool ShouldXformToMOVHLPS(SDNode *Mask) {
2372  unsigned NumElems = Mask->getNumOperands();
2373  if (NumElems != 4)
2374    return false;
2375  for (unsigned i = 0, e = 2; i != e; ++i)
2376    if (!isUndefOrEqual(Mask->getOperand(i), i+2))
2377      return false;
2378  for (unsigned i = 2; i != 4; ++i)
2379    if (!isUndefOrEqual(Mask->getOperand(i), i+4))
2380      return false;
2381  return true;
2382}
2383
2384/// isScalarLoadToVector - Returns true if the node is a scalar load that
2385/// is promoted to a vector.
2386static inline bool isScalarLoadToVector(SDNode *N) {
2387  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
2388    N = N->getOperand(0).Val;
2389    return ISD::isNON_EXTLoad(N);
2390  }
2391  return false;
2392}
2393
2394/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
2395/// match movlp{s|d}. The lower half elements should come from lower half of
2396/// V1 (and in order), and the upper half elements should come from the upper
2397/// half of V2 (and in order). And since V1 will become the source of the
2398/// MOVLP, it must be either a vector load or a scalar load to vector.
2399static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) {
2400  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
2401    return false;
2402  // Is V2 is a vector load, don't do this transformation. We will try to use
2403  // load folding shufps op.
2404  if (ISD::isNON_EXTLoad(V2))
2405    return false;
2406
2407  unsigned NumElems = Mask->getNumOperands();
2408  if (NumElems != 2 && NumElems != 4)
2409    return false;
2410  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
2411    if (!isUndefOrEqual(Mask->getOperand(i), i))
2412      return false;
2413  for (unsigned i = NumElems/2; i != NumElems; ++i)
2414    if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
2415      return false;
2416  return true;
2417}
2418
2419/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
2420/// all the same.
2421static bool isSplatVector(SDNode *N) {
2422  if (N->getOpcode() != ISD::BUILD_VECTOR)
2423    return false;
2424
2425  SDOperand SplatValue = N->getOperand(0);
2426  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
2427    if (N->getOperand(i) != SplatValue)
2428      return false;
2429  return true;
2430}
2431
2432/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
2433/// to an undef.
2434static bool isUndefShuffle(SDNode *N) {
2435  if (N->getOpcode() != ISD::BUILD_VECTOR)
2436    return false;
2437
2438  SDOperand V1 = N->getOperand(0);
2439  SDOperand V2 = N->getOperand(1);
2440  SDOperand Mask = N->getOperand(2);
2441  unsigned NumElems = Mask.getNumOperands();
2442  for (unsigned i = 0; i != NumElems; ++i) {
2443    SDOperand Arg = Mask.getOperand(i);
2444    if (Arg.getOpcode() != ISD::UNDEF) {
2445      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2446      if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
2447        return false;
2448      else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
2449        return false;
2450    }
2451  }
2452  return true;
2453}
2454
2455/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
2456/// that point to V2 points to its first element.
2457static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
2458  assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
2459
2460  bool Changed = false;
2461  SmallVector<SDOperand, 8> MaskVec;
2462  unsigned NumElems = Mask.getNumOperands();
2463  for (unsigned i = 0; i != NumElems; ++i) {
2464    SDOperand Arg = Mask.getOperand(i);
2465    if (Arg.getOpcode() != ISD::UNDEF) {
2466      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2467      if (Val > NumElems) {
2468        Arg = DAG.getConstant(NumElems, Arg.getValueType());
2469        Changed = true;
2470      }
2471    }
2472    MaskVec.push_back(Arg);
2473  }
2474
2475  if (Changed)
2476    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
2477                       &MaskVec[0], MaskVec.size());
2478  return Mask;
2479}
2480
2481/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
2482/// operation of specified width.
2483static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
2484  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2485  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2486
2487  SmallVector<SDOperand, 8> MaskVec;
2488  MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
2489  for (unsigned i = 1; i != NumElems; ++i)
2490    MaskVec.push_back(DAG.getConstant(i, BaseVT));
2491  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2492}
2493
2494/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
2495/// of specified width.
2496static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
2497  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2498  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2499  SmallVector<SDOperand, 8> MaskVec;
2500  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
2501    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
2502    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
2503  }
2504  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2505}
2506
2507/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
2508/// of specified width.
2509static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
2510  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2511  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2512  unsigned Half = NumElems/2;
2513  SmallVector<SDOperand, 8> MaskVec;
2514  for (unsigned i = 0; i != Half; ++i) {
2515    MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
2516    MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
2517  }
2518  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2519}
2520
2521/// getZeroVector - Returns a vector of specified type with all zero elements.
2522///
2523static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
2524  assert(MVT::isVector(VT) && "Expected a vector type");
2525  unsigned NumElems = getVectorNumElements(VT);
2526  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2527  bool isFP = MVT::isFloatingPoint(EVT);
2528  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
2529  SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
2530  return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
2531}
2532
2533/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
2534///
2535static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
2536  SDOperand V1 = Op.getOperand(0);
2537  SDOperand Mask = Op.getOperand(2);
2538  MVT::ValueType VT = Op.getValueType();
2539  unsigned NumElems = Mask.getNumOperands();
2540  Mask = getUnpacklMask(NumElems, DAG);
2541  while (NumElems != 4) {
2542    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
2543    NumElems >>= 1;
2544  }
2545  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
2546
2547  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2548  Mask = getZeroVector(MaskVT, DAG);
2549  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
2550                                  DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
2551  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
2552}
2553
2554/// isZeroNode - Returns true if Elt is a constant zero or a floating point
2555/// constant +0.0.
2556static inline bool isZeroNode(SDOperand Elt) {
2557  return ((isa<ConstantSDNode>(Elt) &&
2558           cast<ConstantSDNode>(Elt)->getValue() == 0) ||
2559          (isa<ConstantFPSDNode>(Elt) &&
2560           cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
2561}
2562
2563/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
2564/// vector and zero or undef vector.
2565static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
2566                                             unsigned NumElems, unsigned Idx,
2567                                             bool isZero, SelectionDAG &DAG) {
2568  SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
2569  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2570  MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2571  SDOperand Zero = DAG.getConstant(0, EVT);
2572  SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
2573  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
2574  SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2575                               &MaskVec[0], MaskVec.size());
2576  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2577}
2578
2579/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
2580///
2581static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
2582                                       unsigned NumNonZero, unsigned NumZero,
2583                                       SelectionDAG &DAG, TargetLowering &TLI) {
2584  if (NumNonZero > 8)
2585    return SDOperand();
2586
2587  SDOperand V(0, 0);
2588  bool First = true;
2589  for (unsigned i = 0; i < 16; ++i) {
2590    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
2591    if (ThisIsNonZero && First) {
2592      if (NumZero)
2593        V = getZeroVector(MVT::v8i16, DAG);
2594      else
2595        V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2596      First = false;
2597    }
2598
2599    if ((i & 1) != 0) {
2600      SDOperand ThisElt(0, 0), LastElt(0, 0);
2601      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
2602      if (LastIsNonZero) {
2603        LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
2604      }
2605      if (ThisIsNonZero) {
2606        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
2607        ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
2608                              ThisElt, DAG.getConstant(8, MVT::i8));
2609        if (LastIsNonZero)
2610          ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
2611      } else
2612        ThisElt = LastElt;
2613
2614      if (ThisElt.Val)
2615        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
2616                        DAG.getConstant(i/2, TLI.getPointerTy()));
2617    }
2618  }
2619
2620  return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
2621}
2622
2623/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
2624///
2625static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
2626                                       unsigned NumNonZero, unsigned NumZero,
2627                                       SelectionDAG &DAG, TargetLowering &TLI) {
2628  if (NumNonZero > 4)
2629    return SDOperand();
2630
2631  SDOperand V(0, 0);
2632  bool First = true;
2633  for (unsigned i = 0; i < 8; ++i) {
2634    bool isNonZero = (NonZeros & (1 << i)) != 0;
2635    if (isNonZero) {
2636      if (First) {
2637        if (NumZero)
2638          V = getZeroVector(MVT::v8i16, DAG);
2639        else
2640          V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2641        First = false;
2642      }
2643      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
2644                      DAG.getConstant(i, TLI.getPointerTy()));
2645    }
2646  }
2647
2648  return V;
2649}
2650
2651SDOperand
2652X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2653  // All zero's are handled with pxor.
2654  if (ISD::isBuildVectorAllZeros(Op.Val))
2655    return Op;
2656
2657  // All one's are handled with pcmpeqd.
2658  if (ISD::isBuildVectorAllOnes(Op.Val))
2659    return Op;
2660
2661  MVT::ValueType VT = Op.getValueType();
2662  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2663  unsigned EVTBits = MVT::getSizeInBits(EVT);
2664
2665  unsigned NumElems = Op.getNumOperands();
2666  unsigned NumZero  = 0;
2667  unsigned NumNonZero = 0;
2668  unsigned NonZeros = 0;
2669  std::set<SDOperand> Values;
2670  for (unsigned i = 0; i < NumElems; ++i) {
2671    SDOperand Elt = Op.getOperand(i);
2672    if (Elt.getOpcode() != ISD::UNDEF) {
2673      Values.insert(Elt);
2674      if (isZeroNode(Elt))
2675        NumZero++;
2676      else {
2677        NonZeros |= (1 << i);
2678        NumNonZero++;
2679      }
2680    }
2681  }
2682
2683  if (NumNonZero == 0)
2684    // Must be a mix of zero and undef. Return a zero vector.
2685    return getZeroVector(VT, DAG);
2686
2687  // Splat is obviously ok. Let legalizer expand it to a shuffle.
2688  if (Values.size() == 1)
2689    return SDOperand();
2690
2691  // Special case for single non-zero element.
2692  if (NumNonZero == 1) {
2693    unsigned Idx = CountTrailingZeros_32(NonZeros);
2694    SDOperand Item = Op.getOperand(Idx);
2695    Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
2696    if (Idx == 0)
2697      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
2698      return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
2699                                         NumZero > 0, DAG);
2700
2701    if (EVTBits == 32) {
2702      // Turn it into a shuffle of zero and zero-extended scalar to vector.
2703      Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
2704                                         DAG);
2705      MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
2706      MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2707      SmallVector<SDOperand, 8> MaskVec;
2708      for (unsigned i = 0; i < NumElems; i++)
2709        MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
2710      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2711                                   &MaskVec[0], MaskVec.size());
2712      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
2713                         DAG.getNode(ISD::UNDEF, VT), Mask);
2714    }
2715  }
2716
2717  // Let legalizer expand 2-wide build_vector's.
2718  if (EVTBits == 64)
2719    return SDOperand();
2720
2721  // If element VT is < 32 bits, convert it to inserts into a zero vector.
2722  if (EVTBits == 8) {
2723    SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
2724                                        *this);
2725    if (V.Val) return V;
2726  }
2727
2728  if (EVTBits == 16) {
2729    SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
2730                                        *this);
2731    if (V.Val) return V;
2732  }
2733
2734  // If element VT is == 32 bits, turn it into a number of shuffles.
2735  SmallVector<SDOperand, 8> V;
2736  V.resize(NumElems);
2737  if (NumElems == 4 && NumZero > 0) {
2738    for (unsigned i = 0; i < 4; ++i) {
2739      bool isZero = !(NonZeros & (1 << i));
2740      if (isZero)
2741        V[i] = getZeroVector(VT, DAG);
2742      else
2743        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2744    }
2745
2746    for (unsigned i = 0; i < 2; ++i) {
2747      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
2748        default: break;
2749        case 0:
2750          V[i] = V[i*2];  // Must be a zero vector.
2751          break;
2752        case 1:
2753          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
2754                             getMOVLMask(NumElems, DAG));
2755          break;
2756        case 2:
2757          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2758                             getMOVLMask(NumElems, DAG));
2759          break;
2760        case 3:
2761          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2762                             getUnpacklMask(NumElems, DAG));
2763          break;
2764      }
2765    }
2766
2767    // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
2768    // clears the upper bits.
2769    // FIXME: we can do the same for v4f32 case when we know both parts of
2770    // the lower half come from scalar_to_vector (loadf32). We should do
2771    // that in post legalizer dag combiner with target specific hooks.
2772    if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
2773      return V[0];
2774    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2775    MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2776    SmallVector<SDOperand, 8> MaskVec;
2777    bool Reverse = (NonZeros & 0x3) == 2;
2778    for (unsigned i = 0; i < 2; ++i)
2779      if (Reverse)
2780        MaskVec.push_back(DAG.getConstant(1-i, EVT));
2781      else
2782        MaskVec.push_back(DAG.getConstant(i, EVT));
2783    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
2784    for (unsigned i = 0; i < 2; ++i)
2785      if (Reverse)
2786        MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
2787      else
2788        MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
2789    SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2790                                     &MaskVec[0], MaskVec.size());
2791    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
2792  }
2793
2794  if (Values.size() > 2) {
2795    // Expand into a number of unpckl*.
2796    // e.g. for v4f32
2797    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
2798    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
2799    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
2800    SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
2801    for (unsigned i = 0; i < NumElems; ++i)
2802      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2803    NumElems >>= 1;
2804    while (NumElems != 0) {
2805      for (unsigned i = 0; i < NumElems; ++i)
2806        V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
2807                           UnpckMask);
2808      NumElems >>= 1;
2809    }
2810    return V[0];
2811  }
2812
2813  return SDOperand();
2814}
2815
2816SDOperand
2817X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
2818  SDOperand V1 = Op.getOperand(0);
2819  SDOperand V2 = Op.getOperand(1);
2820  SDOperand PermMask = Op.getOperand(2);
2821  MVT::ValueType VT = Op.getValueType();
2822  unsigned NumElems = PermMask.getNumOperands();
2823  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
2824  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
2825  bool V1IsSplat = false;
2826  bool V2IsSplat = false;
2827
2828  if (isUndefShuffle(Op.Val))
2829    return DAG.getNode(ISD::UNDEF, VT);
2830
2831  if (isSplatMask(PermMask.Val)) {
2832    if (NumElems <= 4) return Op;
2833    // Promote it to a v4i32 splat.
2834    return PromoteSplat(Op, DAG);
2835  }
2836
2837  if (X86::isMOVLMask(PermMask.Val))
2838    return (V1IsUndef) ? V2 : Op;
2839
2840  if (X86::isMOVSHDUPMask(PermMask.Val) ||
2841      X86::isMOVSLDUPMask(PermMask.Val) ||
2842      X86::isMOVHLPSMask(PermMask.Val) ||
2843      X86::isMOVHPMask(PermMask.Val) ||
2844      X86::isMOVLPMask(PermMask.Val))
2845    return Op;
2846
2847  if (ShouldXformToMOVHLPS(PermMask.Val) ||
2848      ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
2849    return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2850
2851  bool Commuted = false;
2852  V1IsSplat = isSplatVector(V1.Val);
2853  V2IsSplat = isSplatVector(V2.Val);
2854  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
2855    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2856    std::swap(V1IsSplat, V2IsSplat);
2857    std::swap(V1IsUndef, V2IsUndef);
2858    Commuted = true;
2859  }
2860
2861  if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
2862    if (V2IsUndef) return V1;
2863    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2864    if (V2IsSplat) {
2865      // V2 is a splat, so the mask may be malformed. That is, it may point
2866      // to any V2 element. The instruction selectior won't like this. Get
2867      // a corrected mask and commute to form a proper MOVS{S|D}.
2868      SDOperand NewMask = getMOVLMask(NumElems, DAG);
2869      if (NewMask.Val != PermMask.Val)
2870        Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2871    }
2872    return Op;
2873  }
2874
2875  if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
2876      X86::isUNPCKLMask(PermMask.Val) ||
2877      X86::isUNPCKHMask(PermMask.Val))
2878    return Op;
2879
2880  if (V2IsSplat) {
2881    // Normalize mask so all entries that point to V2 points to its first
2882    // element then try to match unpck{h|l} again. If match, return a
2883    // new vector_shuffle with the corrected mask.
2884    SDOperand NewMask = NormalizeMask(PermMask, DAG);
2885    if (NewMask.Val != PermMask.Val) {
2886      if (X86::isUNPCKLMask(PermMask.Val, true)) {
2887        SDOperand NewMask = getUnpacklMask(NumElems, DAG);
2888        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2889      } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
2890        SDOperand NewMask = getUnpackhMask(NumElems, DAG);
2891        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2892      }
2893    }
2894  }
2895
2896  // Normalize the node to match x86 shuffle ops if needed
2897  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
2898      Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2899
2900  if (Commuted) {
2901    // Commute is back and try unpck* again.
2902    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2903    if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
2904        X86::isUNPCKLMask(PermMask.Val) ||
2905        X86::isUNPCKHMask(PermMask.Val))
2906      return Op;
2907  }
2908
2909  // If VT is integer, try PSHUF* first, then SHUFP*.
2910  if (MVT::isInteger(VT)) {
2911    if (X86::isPSHUFDMask(PermMask.Val) ||
2912        X86::isPSHUFHWMask(PermMask.Val) ||
2913        X86::isPSHUFLWMask(PermMask.Val)) {
2914      if (V2.getOpcode() != ISD::UNDEF)
2915        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2916                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2917      return Op;
2918    }
2919
2920    if (X86::isSHUFPMask(PermMask.Val))
2921      return Op;
2922
2923    // Handle v8i16 shuffle high / low shuffle node pair.
2924    if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
2925      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2926      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2927      SmallVector<SDOperand, 8> MaskVec;
2928      for (unsigned i = 0; i != 4; ++i)
2929        MaskVec.push_back(PermMask.getOperand(i));
2930      for (unsigned i = 4; i != 8; ++i)
2931        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2932      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2933                                   &MaskVec[0], MaskVec.size());
2934      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2935      MaskVec.clear();
2936      for (unsigned i = 0; i != 4; ++i)
2937        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2938      for (unsigned i = 4; i != 8; ++i)
2939        MaskVec.push_back(PermMask.getOperand(i));
2940      Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
2941      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2942    }
2943  } else {
2944    // Floating point cases in the other order.
2945    if (X86::isSHUFPMask(PermMask.Val))
2946      return Op;
2947    if (X86::isPSHUFDMask(PermMask.Val) ||
2948        X86::isPSHUFHWMask(PermMask.Val) ||
2949        X86::isPSHUFLWMask(PermMask.Val)) {
2950      if (V2.getOpcode() != ISD::UNDEF)
2951        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2952                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2953      return Op;
2954    }
2955  }
2956
2957  if (NumElems == 4) {
2958    MVT::ValueType MaskVT = PermMask.getValueType();
2959    MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2960    SmallVector<std::pair<int, int>, 8> Locs;
2961    Locs.reserve(NumElems);
2962    SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2963    SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2964    unsigned NumHi = 0;
2965    unsigned NumLo = 0;
2966    // If no more than two elements come from either vector. This can be
2967    // implemented with two shuffles. First shuffle gather the elements.
2968    // The second shuffle, which takes the first shuffle as both of its
2969    // vector operands, put the elements into the right order.
2970    for (unsigned i = 0; i != NumElems; ++i) {
2971      SDOperand Elt = PermMask.getOperand(i);
2972      if (Elt.getOpcode() == ISD::UNDEF) {
2973        Locs[i] = std::make_pair(-1, -1);
2974      } else {
2975        unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
2976        if (Val < NumElems) {
2977          Locs[i] = std::make_pair(0, NumLo);
2978          Mask1[NumLo] = Elt;
2979          NumLo++;
2980        } else {
2981          Locs[i] = std::make_pair(1, NumHi);
2982          if (2+NumHi < NumElems)
2983            Mask1[2+NumHi] = Elt;
2984          NumHi++;
2985        }
2986      }
2987    }
2988    if (NumLo <= 2 && NumHi <= 2) {
2989      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
2990                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2991                                   &Mask1[0], Mask1.size()));
2992      for (unsigned i = 0; i != NumElems; ++i) {
2993        if (Locs[i].first == -1)
2994          continue;
2995        else {
2996          unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
2997          Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
2998          Mask2[i] = DAG.getConstant(Idx, MaskEVT);
2999        }
3000      }
3001
3002      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
3003                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3004                                     &Mask2[0], Mask2.size()));
3005    }
3006
3007    // Break it into (shuffle shuffle_hi, shuffle_lo).
3008    Locs.clear();
3009    SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3010    SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3011    SmallVector<SDOperand,8> *MaskPtr = &LoMask;
3012    unsigned MaskIdx = 0;
3013    unsigned LoIdx = 0;
3014    unsigned HiIdx = NumElems/2;
3015    for (unsigned i = 0; i != NumElems; ++i) {
3016      if (i == NumElems/2) {
3017        MaskPtr = &HiMask;
3018        MaskIdx = 1;
3019        LoIdx = 0;
3020        HiIdx = NumElems/2;
3021      }
3022      SDOperand Elt = PermMask.getOperand(i);
3023      if (Elt.getOpcode() == ISD::UNDEF) {
3024        Locs[i] = std::make_pair(-1, -1);
3025      } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
3026        Locs[i] = std::make_pair(MaskIdx, LoIdx);
3027        (*MaskPtr)[LoIdx] = Elt;
3028        LoIdx++;
3029      } else {
3030        Locs[i] = std::make_pair(MaskIdx, HiIdx);
3031        (*MaskPtr)[HiIdx] = Elt;
3032        HiIdx++;
3033      }
3034    }
3035
3036    SDOperand LoShuffle =
3037      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3038                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3039                              &LoMask[0], LoMask.size()));
3040    SDOperand HiShuffle =
3041      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3042                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3043                              &HiMask[0], HiMask.size()));
3044    SmallVector<SDOperand, 8> MaskOps;
3045    for (unsigned i = 0; i != NumElems; ++i) {
3046      if (Locs[i].first == -1) {
3047        MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
3048      } else {
3049        unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
3050        MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
3051      }
3052    }
3053    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
3054                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3055                                   &MaskOps[0], MaskOps.size()));
3056  }
3057
3058  return SDOperand();
3059}
3060
3061SDOperand
3062X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3063  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3064    return SDOperand();
3065
3066  MVT::ValueType VT = Op.getValueType();
3067  // TODO: handle v16i8.
3068  if (MVT::getSizeInBits(VT) == 16) {
3069    // Transform it so it match pextrw which produces a 32-bit result.
3070    MVT::ValueType EVT = (MVT::ValueType)(VT+1);
3071    SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
3072                                    Op.getOperand(0), Op.getOperand(1));
3073    SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
3074                                    DAG.getValueType(VT));
3075    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
3076  } else if (MVT::getSizeInBits(VT) == 32) {
3077    SDOperand Vec = Op.getOperand(0);
3078    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3079    if (Idx == 0)
3080      return Op;
3081    // SHUFPS the element to the lowest double word, then movss.
3082    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3083    SmallVector<SDOperand, 8> IdxVec;
3084    IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
3085    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3086    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3087    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3088    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3089                                 &IdxVec[0], IdxVec.size());
3090    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3091                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
3092    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3093                       DAG.getConstant(0, getPointerTy()));
3094  } else if (MVT::getSizeInBits(VT) == 64) {
3095    SDOperand Vec = Op.getOperand(0);
3096    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3097    if (Idx == 0)
3098      return Op;
3099
3100    // UNPCKHPD the element to the lowest double word, then movsd.
3101    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
3102    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
3103    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3104    SmallVector<SDOperand, 8> IdxVec;
3105    IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
3106    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3107    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3108                                 &IdxVec[0], IdxVec.size());
3109    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3110                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
3111    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3112                       DAG.getConstant(0, getPointerTy()));
3113  }
3114
3115  return SDOperand();
3116}
3117
3118SDOperand
3119X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3120  // Transform it so it match pinsrw which expects a 16-bit value in a GR32
3121  // as its second argument.
3122  MVT::ValueType VT = Op.getValueType();
3123  MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
3124  SDOperand N0 = Op.getOperand(0);
3125  SDOperand N1 = Op.getOperand(1);
3126  SDOperand N2 = Op.getOperand(2);
3127  if (MVT::getSizeInBits(BaseVT) == 16) {
3128    if (N1.getValueType() != MVT::i32)
3129      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
3130    if (N2.getValueType() != MVT::i32)
3131      N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
3132    return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
3133  } else if (MVT::getSizeInBits(BaseVT) == 32) {
3134    unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
3135    if (Idx == 0) {
3136      // Use a movss.
3137      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
3138      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3139      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3140      SmallVector<SDOperand, 8> MaskVec;
3141      MaskVec.push_back(DAG.getConstant(4, BaseVT));
3142      for (unsigned i = 1; i <= 3; ++i)
3143        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3144      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
3145                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3146                                     &MaskVec[0], MaskVec.size()));
3147    } else {
3148      // Use two pinsrw instructions to insert a 32 bit value.
3149      Idx <<= 1;
3150      if (MVT::isFloatingPoint(N1.getValueType())) {
3151        if (ISD::isNON_EXTLoad(N1.Val)) {
3152          // Just load directly from f32mem to GR32.
3153          LoadSDNode *LD = cast<LoadSDNode>(N1);
3154          N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(),
3155                           LD->getSrcValue(), LD->getSrcValueOffset());
3156        } else {
3157          N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
3158          N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
3159          N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
3160                           DAG.getConstant(0, getPointerTy()));
3161        }
3162      }
3163      N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
3164      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3165                       DAG.getConstant(Idx, getPointerTy()));
3166      N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
3167      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3168                       DAG.getConstant(Idx+1, getPointerTy()));
3169      return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
3170    }
3171  }
3172
3173  return SDOperand();
3174}
3175
3176SDOperand
3177X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
3178  SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
3179  return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
3180}
3181
3182// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3183// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
3184// one of the above mentioned nodes. It has to be wrapped because otherwise
3185// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3186// be used to form addressing mode. These wrapped nodes will be selected
3187// into MOV32ri.
3188SDOperand
3189X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
3190  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3191  SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
3192                                               getPointerTy(),
3193                                               CP->getAlignment());
3194  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3195  // With PIC, the address is actually $g + Offset.
3196  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
3197      !Subtarget->isPICStyleRIPRel()) {
3198    Result = DAG.getNode(ISD::ADD, getPointerTy(),
3199                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3200                         Result);
3201  }
3202
3203  return Result;
3204}
3205
3206SDOperand
3207X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
3208  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3209  SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
3210  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3211  // With PIC, the address is actually $g + Offset.
3212  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
3213      !Subtarget->isPICStyleRIPRel()) {
3214    Result = DAG.getNode(ISD::ADD, getPointerTy(),
3215                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3216                         Result);
3217  }
3218
3219  // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
3220  // load the value at address GV, not the value of GV itself. This means that
3221  // the GlobalAddress must be in the base or index register of the address, not
3222  // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
3223  // The same applies for external symbols during PIC codegen
3224  if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
3225    Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
3226
3227  return Result;
3228}
3229
3230SDOperand
3231X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
3232  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
3233  SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
3234  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3235  // With PIC, the address is actually $g + Offset.
3236  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
3237      !Subtarget->isPICStyleRIPRel()) {
3238    Result = DAG.getNode(ISD::ADD, getPointerTy(),
3239                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3240                         Result);
3241  }
3242
3243  return Result;
3244}
3245
3246SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
3247  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3248  SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
3249  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3250  // With PIC, the address is actually $g + Offset.
3251  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
3252      !Subtarget->isPICStyleRIPRel()) {
3253    Result = DAG.getNode(ISD::ADD, getPointerTy(),
3254                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3255                         Result);
3256  }
3257
3258  return Result;
3259}
3260
3261SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
3262    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
3263           "Not an i64 shift!");
3264    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
3265    SDOperand ShOpLo = Op.getOperand(0);
3266    SDOperand ShOpHi = Op.getOperand(1);
3267    SDOperand ShAmt  = Op.getOperand(2);
3268    SDOperand Tmp1 = isSRA ?
3269      DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
3270      DAG.getConstant(0, MVT::i32);
3271
3272    SDOperand Tmp2, Tmp3;
3273    if (Op.getOpcode() == ISD::SHL_PARTS) {
3274      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
3275      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
3276    } else {
3277      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
3278      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
3279    }
3280
3281    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3282    SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
3283                                    DAG.getConstant(32, MVT::i8));
3284    SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
3285    SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
3286
3287    SDOperand Hi, Lo;
3288    SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
3289
3290    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
3291    SmallVector<SDOperand, 4> Ops;
3292    if (Op.getOpcode() == ISD::SHL_PARTS) {
3293      Ops.push_back(Tmp2);
3294      Ops.push_back(Tmp3);
3295      Ops.push_back(CC);
3296      Ops.push_back(InFlag);
3297      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3298      InFlag = Hi.getValue(1);
3299
3300      Ops.clear();
3301      Ops.push_back(Tmp3);
3302      Ops.push_back(Tmp1);
3303      Ops.push_back(CC);
3304      Ops.push_back(InFlag);
3305      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3306    } else {
3307      Ops.push_back(Tmp2);
3308      Ops.push_back(Tmp3);
3309      Ops.push_back(CC);
3310      Ops.push_back(InFlag);
3311      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3312      InFlag = Lo.getValue(1);
3313
3314      Ops.clear();
3315      Ops.push_back(Tmp3);
3316      Ops.push_back(Tmp1);
3317      Ops.push_back(CC);
3318      Ops.push_back(InFlag);
3319      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3320    }
3321
3322    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
3323    Ops.clear();
3324    Ops.push_back(Lo);
3325    Ops.push_back(Hi);
3326    return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
3327}
3328
3329SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
3330  assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
3331         Op.getOperand(0).getValueType() >= MVT::i16 &&
3332         "Unknown SINT_TO_FP to lower!");
3333
3334  SDOperand Result;
3335  MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
3336  unsigned Size = MVT::getSizeInBits(SrcVT)/8;
3337  MachineFunction &MF = DAG.getMachineFunction();
3338  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3339  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3340  SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
3341                                 StackSlot, NULL, 0);
3342
3343  // Build the FILD
3344  SDVTList Tys;
3345  if (X86ScalarSSE)
3346    Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
3347  else
3348    Tys = DAG.getVTList(MVT::f64, MVT::Other);
3349  SmallVector<SDOperand, 8> Ops;
3350  Ops.push_back(Chain);
3351  Ops.push_back(StackSlot);
3352  Ops.push_back(DAG.getValueType(SrcVT));
3353  Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
3354                       Tys, &Ops[0], Ops.size());
3355
3356  if (X86ScalarSSE) {
3357    Chain = Result.getValue(1);
3358    SDOperand InFlag = Result.getValue(2);
3359
3360    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
3361    // shouldn't be necessary except that RFP cannot be live across
3362    // multiple blocks. When stackifier is fixed, they can be uncoupled.
3363    MachineFunction &MF = DAG.getMachineFunction();
3364    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
3365    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3366    Tys = DAG.getVTList(MVT::Other);
3367    SmallVector<SDOperand, 8> Ops;
3368    Ops.push_back(Chain);
3369    Ops.push_back(Result);
3370    Ops.push_back(StackSlot);
3371    Ops.push_back(DAG.getValueType(Op.getValueType()));
3372    Ops.push_back(InFlag);
3373    Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
3374    Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
3375  }
3376
3377  return Result;
3378}
3379
3380SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
3381  assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
3382         "Unknown FP_TO_SINT to lower!");
3383  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
3384  // stack slot.
3385  MachineFunction &MF = DAG.getMachineFunction();
3386  unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
3387  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3388  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3389
3390  unsigned Opc;
3391  switch (Op.getValueType()) {
3392    default: assert(0 && "Invalid FP_TO_SINT to lower!");
3393    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
3394    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
3395    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
3396  }
3397
3398  SDOperand Chain = DAG.getEntryNode();
3399  SDOperand Value = Op.getOperand(0);
3400  if (X86ScalarSSE) {
3401    assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
3402    Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
3403    SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
3404    SDOperand Ops[] = {
3405      Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
3406    };
3407    Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
3408    Chain = Value.getValue(1);
3409    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3410    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3411  }
3412
3413  // Build the FP_TO_INT*_IN_MEM
3414  SDOperand Ops[] = { Chain, Value, StackSlot };
3415  SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
3416
3417  // Load the result.
3418  return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
3419}
3420
3421SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
3422  MVT::ValueType VT = Op.getValueType();
3423  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3424  std::vector<Constant*> CV;
3425  if (VT == MVT::f64) {
3426    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
3427    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3428  } else {
3429    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
3430    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3431    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3432    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3433  }
3434  Constant *CS = ConstantStruct::get(CV);
3435  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3436  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3437  SmallVector<SDOperand, 3> Ops;
3438  Ops.push_back(DAG.getEntryNode());
3439  Ops.push_back(CPIdx);
3440  Ops.push_back(DAG.getSrcValue(NULL));
3441  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3442  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
3443}
3444
3445SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
3446  MVT::ValueType VT = Op.getValueType();
3447  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3448  std::vector<Constant*> CV;
3449  if (VT == MVT::f64) {
3450    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
3451    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3452  } else {
3453    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
3454    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3455    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3456    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3457  }
3458  Constant *CS = ConstantStruct::get(CV);
3459  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3460  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3461  SmallVector<SDOperand, 3> Ops;
3462  Ops.push_back(DAG.getEntryNode());
3463  Ops.push_back(CPIdx);
3464  Ops.push_back(DAG.getSrcValue(NULL));
3465  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3466  return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
3467}
3468
3469SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
3470  SDOperand Op0 = Op.getOperand(0);
3471  SDOperand Op1 = Op.getOperand(1);
3472  MVT::ValueType VT = Op.getValueType();
3473  MVT::ValueType SrcVT = Op1.getValueType();
3474  const Type *SrcTy =  MVT::getTypeForValueType(SrcVT);
3475
3476  // If second operand is smaller, extend it first.
3477  if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
3478    Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
3479    SrcVT = VT;
3480  }
3481
3482  // First get the sign bit of second operand.
3483  std::vector<Constant*> CV;
3484  if (SrcVT == MVT::f64) {
3485    CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
3486    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3487  } else {
3488    CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
3489    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3490    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3491    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3492  }
3493  Constant *CS = ConstantStruct::get(CV);
3494  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3495  SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other);
3496  SmallVector<SDOperand, 3> Ops;
3497  Ops.push_back(DAG.getEntryNode());
3498  Ops.push_back(CPIdx);
3499  Ops.push_back(DAG.getSrcValue(NULL));
3500  SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3501  SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
3502
3503  // Shift sign bit right or left if the two operands have different types.
3504  if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
3505    // Op0 is MVT::f32, Op1 is MVT::f64.
3506    SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
3507    SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
3508                          DAG.getConstant(32, MVT::i32));
3509    SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
3510    SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
3511                          DAG.getConstant(0, getPointerTy()));
3512  }
3513
3514  // Clear first operand sign bit.
3515  CV.clear();
3516  if (VT == MVT::f64) {
3517    CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63))));
3518    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3519  } else {
3520    CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31))));
3521    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3522    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3523    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3524  }
3525  CS = ConstantStruct::get(CV);
3526  CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3527  Tys = DAG.getVTList(VT, MVT::Other);
3528  Ops.clear();
3529  Ops.push_back(DAG.getEntryNode());
3530  Ops.push_back(CPIdx);
3531  Ops.push_back(DAG.getSrcValue(NULL));
3532  SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3533  SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
3534
3535  // Or the value with the sign bit.
3536  return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
3537}
3538
3539SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
3540                                        SDOperand Chain) {
3541  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
3542  SDOperand Cond;
3543  SDOperand Op0 = Op.getOperand(0);
3544  SDOperand Op1 = Op.getOperand(1);
3545  SDOperand CC = Op.getOperand(2);
3546  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3547  const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3548  const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
3549  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
3550  unsigned X86CC;
3551
3552  if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
3553                     Op0, Op1, DAG)) {
3554    SDOperand Ops1[] = { Chain, Op0, Op1 };
3555    Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
3556    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
3557    return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
3558  }
3559
3560  assert(isFP && "Illegal integer SetCC!");
3561
3562  SDOperand COps[] = { Chain, Op0, Op1 };
3563  Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
3564
3565  switch (SetCCOpcode) {
3566  default: assert(false && "Illegal floating point SetCC!");
3567  case ISD::SETOEQ: {  // !PF & ZF
3568    SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
3569    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
3570    SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
3571                         Tmp1.getValue(1) };
3572    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
3573    return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
3574  }
3575  case ISD::SETUNE: {  // PF | !ZF
3576    SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
3577    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
3578    SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
3579                         Tmp1.getValue(1) };
3580    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
3581    return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
3582  }
3583  }
3584}
3585
3586SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
3587  bool addTest = true;
3588  SDOperand Chain = DAG.getEntryNode();
3589  SDOperand Cond  = Op.getOperand(0);
3590  SDOperand CC;
3591  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3592
3593  if (Cond.getOpcode() == ISD::SETCC)
3594    Cond = LowerSETCC(Cond, DAG, Chain);
3595
3596  if (Cond.getOpcode() == X86ISD::SETCC) {
3597    CC = Cond.getOperand(0);
3598
3599    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3600    // (since flag operand cannot be shared). Use it as the condition setting
3601    // operand in place of the X86ISD::SETCC.
3602    // If the X86ISD::SETCC has more than one use, then perhaps it's better
3603    // to use a test instead of duplicating the X86ISD::CMP (for register
3604    // pressure reason)?
3605    SDOperand Cmp = Cond.getOperand(1);
3606    unsigned Opc = Cmp.getOpcode();
3607    bool IllegalFPCMov = !X86ScalarSSE &&
3608      MVT::isFloatingPoint(Op.getValueType()) &&
3609      !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
3610    if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) &&
3611        !IllegalFPCMov) {
3612      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
3613      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
3614      addTest = false;
3615    }
3616  }
3617
3618  if (addTest) {
3619    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
3620    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
3621    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
3622  }
3623
3624  VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
3625  SmallVector<SDOperand, 4> Ops;
3626  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
3627  // condition is true.
3628  Ops.push_back(Op.getOperand(2));
3629  Ops.push_back(Op.getOperand(1));
3630  Ops.push_back(CC);
3631  Ops.push_back(Cond.getValue(1));
3632  return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3633}
3634
3635SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
3636  bool addTest = true;
3637  SDOperand Chain = Op.getOperand(0);
3638  SDOperand Cond  = Op.getOperand(1);
3639  SDOperand Dest  = Op.getOperand(2);
3640  SDOperand CC;
3641  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3642
3643  if (Cond.getOpcode() == ISD::SETCC)
3644    Cond = LowerSETCC(Cond, DAG, Chain);
3645
3646  if (Cond.getOpcode() == X86ISD::SETCC) {
3647    CC = Cond.getOperand(0);
3648
3649    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3650    // (since flag operand cannot be shared). Use it as the condition setting
3651    // operand in place of the X86ISD::SETCC.
3652    // If the X86ISD::SETCC has more than one use, then perhaps it's better
3653    // to use a test instead of duplicating the X86ISD::CMP (for register
3654    // pressure reason)?
3655    SDOperand Cmp = Cond.getOperand(1);
3656    unsigned Opc = Cmp.getOpcode();
3657    if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) {
3658      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
3659      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
3660      addTest = false;
3661    }
3662  }
3663
3664  if (addTest) {
3665    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
3666    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
3667    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
3668  }
3669  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
3670                     Cond, Op.getOperand(2), CC, Cond.getValue(1));
3671}
3672
3673SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
3674  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3675
3676  if (Subtarget->is64Bit())
3677    return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
3678  else
3679    switch (CallingConv) {
3680    default:
3681      assert(0 && "Unsupported calling convention");
3682    case CallingConv::Fast:
3683      if (EnableFastCC)
3684        return LowerFastCCCallTo(Op, DAG, CallingConv);
3685      // Falls through
3686    case CallingConv::C:
3687    case CallingConv::X86_StdCall:
3688      return LowerCCCCallTo(Op, DAG, CallingConv);
3689    case CallingConv::X86_FastCall:
3690      return LowerFastCCCallTo(Op, DAG, CallingConv);
3691    }
3692}
3693
3694SDOperand
3695X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
3696  MachineFunction &MF = DAG.getMachineFunction();
3697  const Function* Fn = MF.getFunction();
3698  if (Fn->hasExternalLinkage() &&
3699      Subtarget->isTargetCygMing() &&
3700      Fn->getName() == "main")
3701    MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true);
3702
3703  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3704  if (Subtarget->is64Bit())
3705    return LowerX86_64CCCArguments(Op, DAG);
3706  else
3707    switch(CC) {
3708    default:
3709      assert(0 && "Unsupported calling convention");
3710    case CallingConv::Fast:
3711      if (EnableFastCC) {
3712        return LowerFastCCArguments(Op, DAG);
3713      }
3714      // Falls through
3715    case CallingConv::C:
3716      return LowerCCCArguments(Op, DAG);
3717    case CallingConv::X86_StdCall:
3718      MF.getInfo<X86FunctionInfo>()->setDecorationStyle(StdCall);
3719      return LowerCCCArguments(Op, DAG, true);
3720    case CallingConv::X86_FastCall:
3721      MF.getInfo<X86FunctionInfo>()->setDecorationStyle(FastCall);
3722      return LowerFastCCArguments(Op, DAG, true);
3723    }
3724}
3725
3726SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
3727  SDOperand InFlag(0, 0);
3728  SDOperand Chain = Op.getOperand(0);
3729  unsigned Align =
3730    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3731  if (Align == 0) Align = 1;
3732
3733  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3734  // If not DWORD aligned, call memset if size is less than the threshold.
3735  // It knows how to align to the right boundary first.
3736  if ((Align & 3) != 0 ||
3737      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3738    MVT::ValueType IntPtr = getPointerTy();
3739    const Type *IntPtrTy = getTargetData()->getIntPtrType();
3740    TargetLowering::ArgListTy Args;
3741    TargetLowering::ArgListEntry Entry;
3742    Entry.Node = Op.getOperand(1);
3743    Entry.Ty = IntPtrTy;
3744    Entry.isSigned = false;
3745    Entry.isInReg = false;
3746    Entry.isSRet = false;
3747    Args.push_back(Entry);
3748    // Extend the unsigned i8 argument to be an int value for the call.
3749    Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
3750    Entry.Ty = IntPtrTy;
3751    Entry.isSigned = false;
3752    Entry.isInReg = false;
3753    Entry.isSRet = false;
3754    Args.push_back(Entry);
3755    Entry.Node = Op.getOperand(3);
3756    Args.push_back(Entry);
3757    std::pair<SDOperand,SDOperand> CallResult =
3758      LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
3759                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
3760    return CallResult.second;
3761  }
3762
3763  MVT::ValueType AVT;
3764  SDOperand Count;
3765  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3766  unsigned BytesLeft = 0;
3767  bool TwoRepStos = false;
3768  if (ValC) {
3769    unsigned ValReg;
3770    uint64_t Val = ValC->getValue() & 255;
3771
3772    // If the value is a constant, then we can potentially use larger sets.
3773    switch (Align & 3) {
3774      case 2:   // WORD aligned
3775        AVT = MVT::i16;
3776        ValReg = X86::AX;
3777        Val = (Val << 8) | Val;
3778        break;
3779      case 0:  // DWORD aligned
3780        AVT = MVT::i32;
3781        ValReg = X86::EAX;
3782        Val = (Val << 8)  | Val;
3783        Val = (Val << 16) | Val;
3784        if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) {  // QWORD aligned
3785          AVT = MVT::i64;
3786          ValReg = X86::RAX;
3787          Val = (Val << 32) | Val;
3788        }
3789        break;
3790      default:  // Byte aligned
3791        AVT = MVT::i8;
3792        ValReg = X86::AL;
3793        Count = Op.getOperand(3);
3794        break;
3795    }
3796
3797    if (AVT > MVT::i8) {
3798      if (I) {
3799        unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
3800        Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
3801        BytesLeft = I->getValue() % UBytes;
3802      } else {
3803        assert(AVT >= MVT::i32 &&
3804               "Do not use rep;stos if not at least DWORD aligned");
3805        Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
3806                            Op.getOperand(3), DAG.getConstant(2, MVT::i8));
3807        TwoRepStos = true;
3808      }
3809    }
3810
3811    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
3812                              InFlag);
3813    InFlag = Chain.getValue(1);
3814  } else {
3815    AVT = MVT::i8;
3816    Count  = Op.getOperand(3);
3817    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
3818    InFlag = Chain.getValue(1);
3819  }
3820
3821  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
3822                            Count, InFlag);
3823  InFlag = Chain.getValue(1);
3824  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
3825                            Op.getOperand(1), InFlag);
3826  InFlag = Chain.getValue(1);
3827
3828  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3829  SmallVector<SDOperand, 8> Ops;
3830  Ops.push_back(Chain);
3831  Ops.push_back(DAG.getValueType(AVT));
3832  Ops.push_back(InFlag);
3833  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
3834
3835  if (TwoRepStos) {
3836    InFlag = Chain.getValue(1);
3837    Count = Op.getOperand(3);
3838    MVT::ValueType CVT = Count.getValueType();
3839    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3840                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
3841    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
3842                              Left, InFlag);
3843    InFlag = Chain.getValue(1);
3844    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3845    Ops.clear();
3846    Ops.push_back(Chain);
3847    Ops.push_back(DAG.getValueType(MVT::i8));
3848    Ops.push_back(InFlag);
3849    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
3850  } else if (BytesLeft) {
3851    // Issue stores for the last 1 - 7 bytes.
3852    SDOperand Value;
3853    unsigned Val = ValC->getValue() & 255;
3854    unsigned Offset = I->getValue() - BytesLeft;
3855    SDOperand DstAddr = Op.getOperand(1);
3856    MVT::ValueType AddrVT = DstAddr.getValueType();
3857    if (BytesLeft >= 4) {
3858      Val = (Val << 8)  | Val;
3859      Val = (Val << 16) | Val;
3860      Value = DAG.getConstant(Val, MVT::i32);
3861      Chain = DAG.getStore(Chain, Value,
3862                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3863                                       DAG.getConstant(Offset, AddrVT)),
3864                           NULL, 0);
3865      BytesLeft -= 4;
3866      Offset += 4;
3867    }
3868    if (BytesLeft >= 2) {
3869      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
3870      Chain = DAG.getStore(Chain, Value,
3871                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3872                                       DAG.getConstant(Offset, AddrVT)),
3873                           NULL, 0);
3874      BytesLeft -= 2;
3875      Offset += 2;
3876    }
3877    if (BytesLeft == 1) {
3878      Value = DAG.getConstant(Val, MVT::i8);
3879      Chain = DAG.getStore(Chain, Value,
3880                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3881                                       DAG.getConstant(Offset, AddrVT)),
3882                           NULL, 0);
3883    }
3884  }
3885
3886  return Chain;
3887}
3888
3889SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
3890  SDOperand Chain = Op.getOperand(0);
3891  unsigned Align =
3892    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3893  if (Align == 0) Align = 1;
3894
3895  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3896  // If not DWORD aligned, call memcpy if size is less than the threshold.
3897  // It knows how to align to the right boundary first.
3898  if ((Align & 3) != 0 ||
3899      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3900    MVT::ValueType IntPtr = getPointerTy();
3901    TargetLowering::ArgListTy Args;
3902    TargetLowering::ArgListEntry Entry;
3903    Entry.Ty = getTargetData()->getIntPtrType();
3904    Entry.isSigned = false;
3905    Entry.isInReg = false;
3906    Entry.isSRet = false;
3907    Entry.Node = Op.getOperand(1); Args.push_back(Entry);
3908    Entry.Node = Op.getOperand(2); Args.push_back(Entry);
3909    Entry.Node = Op.getOperand(3); Args.push_back(Entry);
3910    std::pair<SDOperand,SDOperand> CallResult =
3911      LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
3912                  DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
3913    return CallResult.second;
3914  }
3915
3916  MVT::ValueType AVT;
3917  SDOperand Count;
3918  unsigned BytesLeft = 0;
3919  bool TwoRepMovs = false;
3920  switch (Align & 3) {
3921    case 2:   // WORD aligned
3922      AVT = MVT::i16;
3923      break;
3924    case 0:  // DWORD aligned
3925      AVT = MVT::i32;
3926      if (Subtarget->is64Bit() && ((Align & 0xF) == 0))  // QWORD aligned
3927        AVT = MVT::i64;
3928      break;
3929    default:  // Byte aligned
3930      AVT = MVT::i8;
3931      Count = Op.getOperand(3);
3932      break;
3933  }
3934
3935  if (AVT > MVT::i8) {
3936    if (I) {
3937      unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
3938      Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
3939      BytesLeft = I->getValue() % UBytes;
3940    } else {
3941      assert(AVT >= MVT::i32 &&
3942             "Do not use rep;movs if not at least DWORD aligned");
3943      Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
3944                          Op.getOperand(3), DAG.getConstant(2, MVT::i8));
3945      TwoRepMovs = true;
3946    }
3947  }
3948
3949  SDOperand InFlag(0, 0);
3950  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
3951                            Count, InFlag);
3952  InFlag = Chain.getValue(1);
3953  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
3954                            Op.getOperand(1), InFlag);
3955  InFlag = Chain.getValue(1);
3956  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
3957                            Op.getOperand(2), InFlag);
3958  InFlag = Chain.getValue(1);
3959
3960  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3961  SmallVector<SDOperand, 8> Ops;
3962  Ops.push_back(Chain);
3963  Ops.push_back(DAG.getValueType(AVT));
3964  Ops.push_back(InFlag);
3965  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
3966
3967  if (TwoRepMovs) {
3968    InFlag = Chain.getValue(1);
3969    Count = Op.getOperand(3);
3970    MVT::ValueType CVT = Count.getValueType();
3971    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3972                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
3973    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
3974                              Left, InFlag);
3975    InFlag = Chain.getValue(1);
3976    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3977    Ops.clear();
3978    Ops.push_back(Chain);
3979    Ops.push_back(DAG.getValueType(MVT::i8));
3980    Ops.push_back(InFlag);
3981    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
3982  } else if (BytesLeft) {
3983    // Issue loads and stores for the last 1 - 7 bytes.
3984    unsigned Offset = I->getValue() - BytesLeft;
3985    SDOperand DstAddr = Op.getOperand(1);
3986    MVT::ValueType DstVT = DstAddr.getValueType();
3987    SDOperand SrcAddr = Op.getOperand(2);
3988    MVT::ValueType SrcVT = SrcAddr.getValueType();
3989    SDOperand Value;
3990    if (BytesLeft >= 4) {
3991      Value = DAG.getLoad(MVT::i32, Chain,
3992                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
3993                                      DAG.getConstant(Offset, SrcVT)),
3994                          NULL, 0);
3995      Chain = Value.getValue(1);
3996      Chain = DAG.getStore(Chain, Value,
3997                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
3998                                       DAG.getConstant(Offset, DstVT)),
3999                           NULL, 0);
4000      BytesLeft -= 4;
4001      Offset += 4;
4002    }
4003    if (BytesLeft >= 2) {
4004      Value = DAG.getLoad(MVT::i16, Chain,
4005                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4006                                      DAG.getConstant(Offset, SrcVT)),
4007                          NULL, 0);
4008      Chain = Value.getValue(1);
4009      Chain = DAG.getStore(Chain, Value,
4010                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
4011                                       DAG.getConstant(Offset, DstVT)),
4012                           NULL, 0);
4013      BytesLeft -= 2;
4014      Offset += 2;
4015    }
4016
4017    if (BytesLeft == 1) {
4018      Value = DAG.getLoad(MVT::i8, Chain,
4019                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4020                                      DAG.getConstant(Offset, SrcVT)),
4021                          NULL, 0);
4022      Chain = Value.getValue(1);
4023      Chain = DAG.getStore(Chain, Value,
4024                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
4025                                       DAG.getConstant(Offset, DstVT)),
4026                           NULL, 0);
4027    }
4028  }
4029
4030  return Chain;
4031}
4032
4033SDOperand
4034X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
4035  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
4036  SDOperand TheOp = Op.getOperand(0);
4037  SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
4038  if (Subtarget->is64Bit()) {
4039    SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
4040    SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
4041                                         MVT::i64, Copy1.getValue(2));
4042    SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
4043                                DAG.getConstant(32, MVT::i8));
4044    SDOperand Ops[] = {
4045      DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
4046    };
4047
4048    Tys = DAG.getVTList(MVT::i64, MVT::Other);
4049    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
4050  }
4051
4052  SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
4053  SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
4054                                       MVT::i32, Copy1.getValue(2));
4055  SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
4056  Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
4057  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
4058}
4059
4060SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
4061  SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
4062
4063  if (!Subtarget->is64Bit()) {
4064    // vastart just stores the address of the VarArgsFrameIndex slot into the
4065    // memory location argument.
4066    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4067    return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
4068                        SV->getOffset());
4069  }
4070
4071  // __va_list_tag:
4072  //   gp_offset         (0 - 6 * 8)
4073  //   fp_offset         (48 - 48 + 8 * 16)
4074  //   overflow_arg_area (point to parameters coming in memory).
4075  //   reg_save_area
4076  SmallVector<SDOperand, 8> MemOps;
4077  SDOperand FIN = Op.getOperand(1);
4078  // Store gp_offset
4079  SDOperand Store = DAG.getStore(Op.getOperand(0),
4080                                 DAG.getConstant(VarArgsGPOffset, MVT::i32),
4081                                 FIN, SV->getValue(), SV->getOffset());
4082  MemOps.push_back(Store);
4083
4084  // Store fp_offset
4085  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4086                    DAG.getConstant(4, getPointerTy()));
4087  Store = DAG.getStore(Op.getOperand(0),
4088                       DAG.getConstant(VarArgsFPOffset, MVT::i32),
4089                       FIN, SV->getValue(), SV->getOffset());
4090  MemOps.push_back(Store);
4091
4092  // Store ptr to overflow_arg_area
4093  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4094                    DAG.getConstant(4, getPointerTy()));
4095  SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4096  Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
4097                       SV->getOffset());
4098  MemOps.push_back(Store);
4099
4100  // Store ptr to reg_save_area.
4101  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4102                    DAG.getConstant(8, getPointerTy()));
4103  SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
4104  Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
4105                       SV->getOffset());
4106  MemOps.push_back(Store);
4107  return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
4108}
4109
4110SDOperand
4111X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
4112  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
4113  switch (IntNo) {
4114  default: return SDOperand();    // Don't custom lower most intrinsics.
4115    // Comparison intrinsics.
4116  case Intrinsic::x86_sse_comieq_ss:
4117  case Intrinsic::x86_sse_comilt_ss:
4118  case Intrinsic::x86_sse_comile_ss:
4119  case Intrinsic::x86_sse_comigt_ss:
4120  case Intrinsic::x86_sse_comige_ss:
4121  case Intrinsic::x86_sse_comineq_ss:
4122  case Intrinsic::x86_sse_ucomieq_ss:
4123  case Intrinsic::x86_sse_ucomilt_ss:
4124  case Intrinsic::x86_sse_ucomile_ss:
4125  case Intrinsic::x86_sse_ucomigt_ss:
4126  case Intrinsic::x86_sse_ucomige_ss:
4127  case Intrinsic::x86_sse_ucomineq_ss:
4128  case Intrinsic::x86_sse2_comieq_sd:
4129  case Intrinsic::x86_sse2_comilt_sd:
4130  case Intrinsic::x86_sse2_comile_sd:
4131  case Intrinsic::x86_sse2_comigt_sd:
4132  case Intrinsic::x86_sse2_comige_sd:
4133  case Intrinsic::x86_sse2_comineq_sd:
4134  case Intrinsic::x86_sse2_ucomieq_sd:
4135  case Intrinsic::x86_sse2_ucomilt_sd:
4136  case Intrinsic::x86_sse2_ucomile_sd:
4137  case Intrinsic::x86_sse2_ucomigt_sd:
4138  case Intrinsic::x86_sse2_ucomige_sd:
4139  case Intrinsic::x86_sse2_ucomineq_sd: {
4140    unsigned Opc = 0;
4141    ISD::CondCode CC = ISD::SETCC_INVALID;
4142    switch (IntNo) {
4143    default: break;
4144    case Intrinsic::x86_sse_comieq_ss:
4145    case Intrinsic::x86_sse2_comieq_sd:
4146      Opc = X86ISD::COMI;
4147      CC = ISD::SETEQ;
4148      break;
4149    case Intrinsic::x86_sse_comilt_ss:
4150    case Intrinsic::x86_sse2_comilt_sd:
4151      Opc = X86ISD::COMI;
4152      CC = ISD::SETLT;
4153      break;
4154    case Intrinsic::x86_sse_comile_ss:
4155    case Intrinsic::x86_sse2_comile_sd:
4156      Opc = X86ISD::COMI;
4157      CC = ISD::SETLE;
4158      break;
4159    case Intrinsic::x86_sse_comigt_ss:
4160    case Intrinsic::x86_sse2_comigt_sd:
4161      Opc = X86ISD::COMI;
4162      CC = ISD::SETGT;
4163      break;
4164    case Intrinsic::x86_sse_comige_ss:
4165    case Intrinsic::x86_sse2_comige_sd:
4166      Opc = X86ISD::COMI;
4167      CC = ISD::SETGE;
4168      break;
4169    case Intrinsic::x86_sse_comineq_ss:
4170    case Intrinsic::x86_sse2_comineq_sd:
4171      Opc = X86ISD::COMI;
4172      CC = ISD::SETNE;
4173      break;
4174    case Intrinsic::x86_sse_ucomieq_ss:
4175    case Intrinsic::x86_sse2_ucomieq_sd:
4176      Opc = X86ISD::UCOMI;
4177      CC = ISD::SETEQ;
4178      break;
4179    case Intrinsic::x86_sse_ucomilt_ss:
4180    case Intrinsic::x86_sse2_ucomilt_sd:
4181      Opc = X86ISD::UCOMI;
4182      CC = ISD::SETLT;
4183      break;
4184    case Intrinsic::x86_sse_ucomile_ss:
4185    case Intrinsic::x86_sse2_ucomile_sd:
4186      Opc = X86ISD::UCOMI;
4187      CC = ISD::SETLE;
4188      break;
4189    case Intrinsic::x86_sse_ucomigt_ss:
4190    case Intrinsic::x86_sse2_ucomigt_sd:
4191      Opc = X86ISD::UCOMI;
4192      CC = ISD::SETGT;
4193      break;
4194    case Intrinsic::x86_sse_ucomige_ss:
4195    case Intrinsic::x86_sse2_ucomige_sd:
4196      Opc = X86ISD::UCOMI;
4197      CC = ISD::SETGE;
4198      break;
4199    case Intrinsic::x86_sse_ucomineq_ss:
4200    case Intrinsic::x86_sse2_ucomineq_sd:
4201      Opc = X86ISD::UCOMI;
4202      CC = ISD::SETNE;
4203      break;
4204    }
4205
4206    unsigned X86CC;
4207    SDOperand LHS = Op.getOperand(1);
4208    SDOperand RHS = Op.getOperand(2);
4209    translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
4210
4211    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
4212    SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
4213    SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
4214    VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
4215    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
4216    SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
4217    return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
4218  }
4219  }
4220}
4221
4222SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
4223  // Depths > 0 not supported yet!
4224  if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
4225    return SDOperand();
4226
4227  // Just load the return address
4228  SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
4229  return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
4230}
4231
4232SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
4233  // Depths > 0 not supported yet!
4234  if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
4235    return SDOperand();
4236
4237  SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
4238  return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
4239                     DAG.getConstant(4, getPointerTy()));
4240}
4241
4242/// LowerOperation - Provide custom lowering hooks for some operations.
4243///
4244SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
4245  switch (Op.getOpcode()) {
4246  default: assert(0 && "Should not custom lower this!");
4247  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
4248  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
4249  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4250  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
4251  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
4252  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
4253  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
4254  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
4255  case ISD::SHL_PARTS:
4256  case ISD::SRA_PARTS:
4257  case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
4258  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
4259  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
4260  case ISD::FABS:               return LowerFABS(Op, DAG);
4261  case ISD::FNEG:               return LowerFNEG(Op, DAG);
4262  case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
4263  case ISD::SETCC:              return LowerSETCC(Op, DAG, DAG.getEntryNode());
4264  case ISD::SELECT:             return LowerSELECT(Op, DAG);
4265  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
4266  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
4267  case ISD::CALL:               return LowerCALL(Op, DAG);
4268  case ISD::RET:                return LowerRET(Op, DAG);
4269  case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
4270  case ISD::MEMSET:             return LowerMEMSET(Op, DAG);
4271  case ISD::MEMCPY:             return LowerMEMCPY(Op, DAG);
4272  case ISD::READCYCLECOUNTER:   return LowerREADCYCLCECOUNTER(Op, DAG);
4273  case ISD::VASTART:            return LowerVASTART(Op, DAG);
4274  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4275  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
4276  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
4277  }
4278  return SDOperand();
4279}
4280
4281const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
4282  switch (Opcode) {
4283  default: return NULL;
4284  case X86ISD::SHLD:               return "X86ISD::SHLD";
4285  case X86ISD::SHRD:               return "X86ISD::SHRD";
4286  case X86ISD::FAND:               return "X86ISD::FAND";
4287  case X86ISD::FOR:                return "X86ISD::FOR";
4288  case X86ISD::FXOR:               return "X86ISD::FXOR";
4289  case X86ISD::FSRL:               return "X86ISD::FSRL";
4290  case X86ISD::FILD:               return "X86ISD::FILD";
4291  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
4292  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
4293  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
4294  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
4295  case X86ISD::FLD:                return "X86ISD::FLD";
4296  case X86ISD::FST:                return "X86ISD::FST";
4297  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
4298  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
4299  case X86ISD::CALL:               return "X86ISD::CALL";
4300  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
4301  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
4302  case X86ISD::CMP:                return "X86ISD::CMP";
4303  case X86ISD::COMI:               return "X86ISD::COMI";
4304  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
4305  case X86ISD::SETCC:              return "X86ISD::SETCC";
4306  case X86ISD::CMOV:               return "X86ISD::CMOV";
4307  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
4308  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
4309  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
4310  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
4311  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
4312  case X86ISD::LOAD_UA:            return "X86ISD::LOAD_UA";
4313  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
4314  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
4315  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
4316  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
4317  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
4318  case X86ISD::FMAX:               return "X86ISD::FMAX";
4319  case X86ISD::FMIN:               return "X86ISD::FMIN";
4320  }
4321}
4322
4323/// isLegalAddressImmediate - Return true if the integer value or
4324/// GlobalValue can be used as the offset of the target addressing mode.
4325bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
4326  // X86 allows a sign-extended 32-bit immediate field.
4327  return (V > -(1LL << 32) && V < (1LL << 32)-1);
4328}
4329
4330bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
4331  // In 64-bit mode, GV is 64-bit so it won't fit in the 32-bit displacement
4332  // field unless we are in small code model.
4333  if (Subtarget->is64Bit() &&
4334      getTargetMachine().getCodeModel() != CodeModel::Small)
4335    return false;
4336
4337  return (!Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false));
4338}
4339
4340/// isShuffleMaskLegal - Targets can use this to indicate that they only
4341/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4342/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4343/// are assumed to be legal.
4344bool
4345X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
4346  // Only do shuffles on 128-bit vector types for now.
4347  if (MVT::getSizeInBits(VT) == 64) return false;
4348  return (Mask.Val->getNumOperands() <= 4 ||
4349          isSplatMask(Mask.Val)  ||
4350          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
4351          X86::isUNPCKLMask(Mask.Val) ||
4352          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
4353          X86::isUNPCKHMask(Mask.Val));
4354}
4355
4356bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
4357                                               MVT::ValueType EVT,
4358                                               SelectionDAG &DAG) const {
4359  unsigned NumElts = BVOps.size();
4360  // Only do shuffles on 128-bit vector types for now.
4361  if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
4362  if (NumElts == 2) return true;
4363  if (NumElts == 4) {
4364    return (isMOVLMask(&BVOps[0], 4)  ||
4365            isCommutedMOVL(&BVOps[0], 4, true) ||
4366            isSHUFPMask(&BVOps[0], 4) ||
4367            isCommutedSHUFP(&BVOps[0], 4));
4368  }
4369  return false;
4370}
4371
4372//===----------------------------------------------------------------------===//
4373//                           X86 Scheduler Hooks
4374//===----------------------------------------------------------------------===//
4375
4376MachineBasicBlock *
4377X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
4378                                           MachineBasicBlock *BB) {
4379  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4380  switch (MI->getOpcode()) {
4381  default: assert(false && "Unexpected instr type to insert");
4382  case X86::CMOV_FR32:
4383  case X86::CMOV_FR64:
4384  case X86::CMOV_V4F32:
4385  case X86::CMOV_V2F64:
4386  case X86::CMOV_V2I64: {
4387    // To "insert" a SELECT_CC instruction, we actually have to insert the
4388    // diamond control-flow pattern.  The incoming instruction knows the
4389    // destination vreg to set, the condition code register to branch on, the
4390    // true/false values to select between, and a branch opcode to use.
4391    const BasicBlock *LLVM_BB = BB->getBasicBlock();
4392    ilist<MachineBasicBlock>::iterator It = BB;
4393    ++It;
4394
4395    //  thisMBB:
4396    //  ...
4397    //   TrueVal = ...
4398    //   cmpTY ccX, r1, r2
4399    //   bCC copy1MBB
4400    //   fallthrough --> copy0MBB
4401    MachineBasicBlock *thisMBB = BB;
4402    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
4403    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
4404    unsigned Opc =
4405      X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
4406    BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
4407    MachineFunction *F = BB->getParent();
4408    F->getBasicBlockList().insert(It, copy0MBB);
4409    F->getBasicBlockList().insert(It, sinkMBB);
4410    // Update machine-CFG edges by first adding all successors of the current
4411    // block to the new block which will contain the Phi node for the select.
4412    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
4413        e = BB->succ_end(); i != e; ++i)
4414      sinkMBB->addSuccessor(*i);
4415    // Next, remove all successors of the current block, and add the true
4416    // and fallthrough blocks as its successors.
4417    while(!BB->succ_empty())
4418      BB->removeSuccessor(BB->succ_begin());
4419    BB->addSuccessor(copy0MBB);
4420    BB->addSuccessor(sinkMBB);
4421
4422    //  copy0MBB:
4423    //   %FalseValue = ...
4424    //   # fallthrough to sinkMBB
4425    BB = copy0MBB;
4426
4427    // Update machine-CFG edges
4428    BB->addSuccessor(sinkMBB);
4429
4430    //  sinkMBB:
4431    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4432    //  ...
4433    BB = sinkMBB;
4434    BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
4435      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
4436      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4437
4438    delete MI;   // The pseudo instruction is gone now.
4439    return BB;
4440  }
4441
4442  case X86::FP_TO_INT16_IN_MEM:
4443  case X86::FP_TO_INT32_IN_MEM:
4444  case X86::FP_TO_INT64_IN_MEM: {
4445    // Change the floating point control register to use "round towards zero"
4446    // mode when truncating to an integer value.
4447    MachineFunction *F = BB->getParent();
4448    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
4449    addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
4450
4451    // Load the old value of the high byte of the control word...
4452    unsigned OldCW =
4453      F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
4454    addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
4455
4456    // Set the high part to be round to zero...
4457    addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
4458      .addImm(0xC7F);
4459
4460    // Reload the modified control word now...
4461    addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
4462
4463    // Restore the memory image of control word to original value
4464    addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
4465      .addReg(OldCW);
4466
4467    // Get the X86 opcode to use.
4468    unsigned Opc;
4469    switch (MI->getOpcode()) {
4470    default: assert(0 && "illegal opcode!");
4471    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
4472    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
4473    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
4474    }
4475
4476    X86AddressMode AM;
4477    MachineOperand &Op = MI->getOperand(0);
4478    if (Op.isRegister()) {
4479      AM.BaseType = X86AddressMode::RegBase;
4480      AM.Base.Reg = Op.getReg();
4481    } else {
4482      AM.BaseType = X86AddressMode::FrameIndexBase;
4483      AM.Base.FrameIndex = Op.getFrameIndex();
4484    }
4485    Op = MI->getOperand(1);
4486    if (Op.isImmediate())
4487      AM.Scale = Op.getImm();
4488    Op = MI->getOperand(2);
4489    if (Op.isImmediate())
4490      AM.IndexReg = Op.getImm();
4491    Op = MI->getOperand(3);
4492    if (Op.isGlobalAddress()) {
4493      AM.GV = Op.getGlobal();
4494    } else {
4495      AM.Disp = Op.getImm();
4496    }
4497    addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
4498                      .addReg(MI->getOperand(4).getReg());
4499
4500    // Reload the original control word now.
4501    addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
4502
4503    delete MI;   // The pseudo instruction is gone now.
4504    return BB;
4505  }
4506  }
4507}
4508
4509//===----------------------------------------------------------------------===//
4510//                           X86 Optimization Hooks
4511//===----------------------------------------------------------------------===//
4512
4513void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
4514                                                       uint64_t Mask,
4515                                                       uint64_t &KnownZero,
4516                                                       uint64_t &KnownOne,
4517                                                       unsigned Depth) const {
4518  unsigned Opc = Op.getOpcode();
4519  assert((Opc >= ISD::BUILTIN_OP_END ||
4520          Opc == ISD::INTRINSIC_WO_CHAIN ||
4521          Opc == ISD::INTRINSIC_W_CHAIN ||
4522          Opc == ISD::INTRINSIC_VOID) &&
4523         "Should use MaskedValueIsZero if you don't know whether Op"
4524         " is a target node!");
4525
4526  KnownZero = KnownOne = 0;   // Don't know anything.
4527  switch (Opc) {
4528  default: break;
4529  case X86ISD::SETCC:
4530    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
4531    break;
4532  }
4533}
4534
4535/// getShuffleScalarElt - Returns the scalar element that will make up the ith
4536/// element of the result of the vector shuffle.
4537static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
4538  MVT::ValueType VT = N->getValueType(0);
4539  SDOperand PermMask = N->getOperand(2);
4540  unsigned NumElems = PermMask.getNumOperands();
4541  SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
4542  i %= NumElems;
4543  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
4544    return (i == 0)
4545      ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4546  } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
4547    SDOperand Idx = PermMask.getOperand(i);
4548    if (Idx.getOpcode() == ISD::UNDEF)
4549      return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4550    return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
4551  }
4552  return SDOperand();
4553}
4554
4555/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
4556/// node is a GlobalAddress + an offset.
4557static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
4558  unsigned Opc = N->getOpcode();
4559  if (Opc == X86ISD::Wrapper) {
4560    if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
4561      GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
4562      return true;
4563    }
4564  } else if (Opc == ISD::ADD) {
4565    SDOperand N1 = N->getOperand(0);
4566    SDOperand N2 = N->getOperand(1);
4567    if (isGAPlusOffset(N1.Val, GA, Offset)) {
4568      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
4569      if (V) {
4570        Offset += V->getSignExtended();
4571        return true;
4572      }
4573    } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
4574      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
4575      if (V) {
4576        Offset += V->getSignExtended();
4577        return true;
4578      }
4579    }
4580  }
4581  return false;
4582}
4583
4584/// isConsecutiveLoad - Returns true if N is loading from an address of Base
4585/// + Dist * Size.
4586static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
4587                              MachineFrameInfo *MFI) {
4588  if (N->getOperand(0).Val != Base->getOperand(0).Val)
4589    return false;
4590
4591  SDOperand Loc = N->getOperand(1);
4592  SDOperand BaseLoc = Base->getOperand(1);
4593  if (Loc.getOpcode() == ISD::FrameIndex) {
4594    if (BaseLoc.getOpcode() != ISD::FrameIndex)
4595      return false;
4596    int FI  = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
4597    int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
4598    int FS  = MFI->getObjectSize(FI);
4599    int BFS = MFI->getObjectSize(BFI);
4600    if (FS != BFS || FS != Size) return false;
4601    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
4602  } else {
4603    GlobalValue *GV1 = NULL;
4604    GlobalValue *GV2 = NULL;
4605    int64_t Offset1 = 0;
4606    int64_t Offset2 = 0;
4607    bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
4608    bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
4609    if (isGA1 && isGA2 && GV1 == GV2)
4610      return Offset1 == (Offset2 + Dist*Size);
4611  }
4612
4613  return false;
4614}
4615
4616static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
4617                              const X86Subtarget *Subtarget) {
4618  GlobalValue *GV;
4619  int64_t Offset;
4620  if (isGAPlusOffset(Base, GV, Offset))
4621    return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
4622  else {
4623    assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
4624    int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
4625    if (BFI < 0)
4626      // Fixed objects do not specify alignment, however the offsets are known.
4627      return ((Subtarget->getStackAlignment() % 16) == 0 &&
4628              (MFI->getObjectOffset(BFI) % 16) == 0);
4629    else
4630      return MFI->getObjectAlignment(BFI) >= 16;
4631  }
4632  return false;
4633}
4634
4635
4636/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
4637/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
4638/// if the load addresses are consecutive, non-overlapping, and in the right
4639/// order.
4640static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
4641                                       const X86Subtarget *Subtarget) {
4642  MachineFunction &MF = DAG.getMachineFunction();
4643  MachineFrameInfo *MFI = MF.getFrameInfo();
4644  MVT::ValueType VT = N->getValueType(0);
4645  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
4646  SDOperand PermMask = N->getOperand(2);
4647  int NumElems = (int)PermMask.getNumOperands();
4648  SDNode *Base = NULL;
4649  for (int i = 0; i < NumElems; ++i) {
4650    SDOperand Idx = PermMask.getOperand(i);
4651    if (Idx.getOpcode() == ISD::UNDEF) {
4652      if (!Base) return SDOperand();
4653    } else {
4654      SDOperand Arg =
4655        getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
4656      if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
4657        return SDOperand();
4658      if (!Base)
4659        Base = Arg.Val;
4660      else if (!isConsecutiveLoad(Arg.Val, Base,
4661                                  i, MVT::getSizeInBits(EVT)/8,MFI))
4662        return SDOperand();
4663    }
4664  }
4665
4666  bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
4667  if (isAlign16) {
4668    LoadSDNode *LD = cast<LoadSDNode>(Base);
4669    return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
4670                       LD->getSrcValueOffset());
4671  } else {
4672    // Just use movups, it's shorter.
4673    SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
4674    SmallVector<SDOperand, 3> Ops;
4675    Ops.push_back(Base->getOperand(0));
4676    Ops.push_back(Base->getOperand(1));
4677    Ops.push_back(Base->getOperand(2));
4678    return DAG.getNode(ISD::BIT_CONVERT, VT,
4679                       DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
4680  }
4681}
4682
4683/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
4684static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
4685                                      const X86Subtarget *Subtarget) {
4686  SDOperand Cond = N->getOperand(0);
4687
4688  // If we have SSE[12] support, try to form min/max nodes.
4689  if (Subtarget->hasSSE2() &&
4690      (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) {
4691    if (Cond.getOpcode() == ISD::SETCC) {
4692      // Get the LHS/RHS of the select.
4693      SDOperand LHS = N->getOperand(1);
4694      SDOperand RHS = N->getOperand(2);
4695      ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
4696
4697      unsigned Opcode = 0;
4698      if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
4699        switch (CC) {
4700        default: break;
4701        case ISD::SETOLE: // (X <= Y) ? X : Y -> min
4702        case ISD::SETULE:
4703        case ISD::SETLE:
4704          if (!UnsafeFPMath) break;
4705          // FALL THROUGH.
4706        case ISD::SETOLT:  // (X olt/lt Y) ? X : Y -> min
4707        case ISD::SETLT:
4708          Opcode = X86ISD::FMIN;
4709          break;
4710
4711        case ISD::SETOGT: // (X > Y) ? X : Y -> max
4712        case ISD::SETUGT:
4713        case ISD::SETGT:
4714          if (!UnsafeFPMath) break;
4715          // FALL THROUGH.
4716        case ISD::SETUGE:  // (X uge/ge Y) ? X : Y -> max
4717        case ISD::SETGE:
4718          Opcode = X86ISD::FMAX;
4719          break;
4720        }
4721      } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
4722        switch (CC) {
4723        default: break;
4724        case ISD::SETOGT: // (X > Y) ? Y : X -> min
4725        case ISD::SETUGT:
4726        case ISD::SETGT:
4727          if (!UnsafeFPMath) break;
4728          // FALL THROUGH.
4729        case ISD::SETUGE:  // (X uge/ge Y) ? Y : X -> min
4730        case ISD::SETGE:
4731          Opcode = X86ISD::FMIN;
4732          break;
4733
4734        case ISD::SETOLE:   // (X <= Y) ? Y : X -> max
4735        case ISD::SETULE:
4736        case ISD::SETLE:
4737          if (!UnsafeFPMath) break;
4738          // FALL THROUGH.
4739        case ISD::SETOLT:   // (X olt/lt Y) ? Y : X -> max
4740        case ISD::SETLT:
4741          Opcode = X86ISD::FMAX;
4742          break;
4743        }
4744      }
4745
4746      if (Opcode)
4747        return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
4748    }
4749
4750  }
4751
4752  return SDOperand();
4753}
4754
4755
4756SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
4757                                               DAGCombinerInfo &DCI) const {
4758  SelectionDAG &DAG = DCI.DAG;
4759  switch (N->getOpcode()) {
4760  default: break;
4761  case ISD::VECTOR_SHUFFLE:
4762    return PerformShuffleCombine(N, DAG, Subtarget);
4763  case ISD::SELECT:
4764    return PerformSELECTCombine(N, DAG, Subtarget);
4765  }
4766
4767  return SDOperand();
4768}
4769
4770//===----------------------------------------------------------------------===//
4771//                           X86 Inline Assembly Support
4772//===----------------------------------------------------------------------===//
4773
4774/// getConstraintType - Given a constraint letter, return the type of
4775/// constraint it is for this target.
4776X86TargetLowering::ConstraintType
4777X86TargetLowering::getConstraintType(char ConstraintLetter) const {
4778  switch (ConstraintLetter) {
4779  case 'A':
4780  case 'r':
4781  case 'R':
4782  case 'l':
4783  case 'q':
4784  case 'Q':
4785  case 'x':
4786  case 'Y':
4787    return C_RegisterClass;
4788  default: return TargetLowering::getConstraintType(ConstraintLetter);
4789  }
4790}
4791
4792/// isOperandValidForConstraint - Return the specified operand (possibly
4793/// modified) if the specified SDOperand is valid for the specified target
4794/// constraint letter, otherwise return null.
4795SDOperand X86TargetLowering::
4796isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) {
4797  switch (Constraint) {
4798  default: break;
4799  case 'i':
4800    // Literal immediates are always ok.
4801    if (isa<ConstantSDNode>(Op)) return Op;
4802
4803    // If we are in non-pic codegen mode, we allow the address of a global to
4804    // be used with 'i'.
4805    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4806      if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
4807        return SDOperand(0, 0);
4808
4809      if (GA->getOpcode() != ISD::TargetGlobalAddress)
4810        Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
4811                                        GA->getOffset());
4812      return Op;
4813    }
4814
4815    // Otherwise, not valid for this mode.
4816    return SDOperand(0, 0);
4817  }
4818  return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG);
4819}
4820
4821
4822std::vector<unsigned> X86TargetLowering::
4823getRegClassForInlineAsmConstraint(const std::string &Constraint,
4824                                  MVT::ValueType VT) const {
4825  if (Constraint.size() == 1) {
4826    // FIXME: not handling fp-stack yet!
4827    // FIXME: not handling MMX registers yet ('y' constraint).
4828    switch (Constraint[0]) {      // GCC X86 Constraint Letters
4829    default: break;  // Unknown constraint letter
4830    case 'A':   // EAX/EDX
4831      if (VT == MVT::i32 || VT == MVT::i64)
4832        return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
4833      break;
4834    case 'r':   // GENERAL_REGS
4835    case 'R':   // LEGACY_REGS
4836      if (VT == MVT::i64 && Subtarget->is64Bit())
4837        return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
4838                                     X86::RSI, X86::RDI, X86::RBP, X86::RSP,
4839                                     X86::R8,  X86::R9,  X86::R10, X86::R11,
4840                                     X86::R12, X86::R13, X86::R14, X86::R15, 0);
4841      if (VT == MVT::i32)
4842        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4843                                     X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
4844      else if (VT == MVT::i16)
4845        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4846                                     X86::SI, X86::DI, X86::BP, X86::SP, 0);
4847      else if (VT == MVT::i8)
4848        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
4849      break;
4850    case 'l':   // INDEX_REGS
4851      if (VT == MVT::i32)
4852        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4853                                     X86::ESI, X86::EDI, X86::EBP, 0);
4854      else if (VT == MVT::i16)
4855        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4856                                     X86::SI, X86::DI, X86::BP, 0);
4857      else if (VT == MVT::i8)
4858        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4859      break;
4860    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
4861    case 'Q':   // Q_REGS
4862      if (VT == MVT::i32)
4863        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
4864      else if (VT == MVT::i16)
4865        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
4866      else if (VT == MVT::i8)
4867        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4868        break;
4869    case 'x':   // SSE_REGS if SSE1 allowed
4870      if (Subtarget->hasSSE1())
4871        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4872                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4873                                     0);
4874      return std::vector<unsigned>();
4875    case 'Y':   // SSE_REGS if SSE2 allowed
4876      if (Subtarget->hasSSE2())
4877        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4878                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4879                                     0);
4880      return std::vector<unsigned>();
4881    }
4882  }
4883
4884  return std::vector<unsigned>();
4885}
4886
4887std::pair<unsigned, const TargetRegisterClass*>
4888X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4889                                                MVT::ValueType VT) const {
4890  // Use the default implementation in TargetLowering to convert the register
4891  // constraint into a member of a register class.
4892  std::pair<unsigned, const TargetRegisterClass*> Res;
4893  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4894
4895  // Not found as a standard register?
4896  if (Res.second == 0) {
4897    // GCC calls "st(0)" just plain "st".
4898    if (StringsEqualNoCase("{st}", Constraint)) {
4899      Res.first = X86::ST0;
4900      Res.second = X86::RSTRegisterClass;
4901    }
4902
4903    return Res;
4904  }
4905
4906  // Otherwise, check to see if this is a register class of the wrong value
4907  // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
4908  // turn into {ax},{dx}.
4909  if (Res.second->hasType(VT))
4910    return Res;   // Correct type already, nothing to do.
4911
4912  // All of the single-register GCC register classes map their values onto
4913  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
4914  // really want an 8-bit or 32-bit register, map to the appropriate register
4915  // class and return the appropriate register.
4916  if (Res.second != X86::GR16RegisterClass)
4917    return Res;
4918
4919  if (VT == MVT::i8) {
4920    unsigned DestReg = 0;
4921    switch (Res.first) {
4922    default: break;
4923    case X86::AX: DestReg = X86::AL; break;
4924    case X86::DX: DestReg = X86::DL; break;
4925    case X86::CX: DestReg = X86::CL; break;
4926    case X86::BX: DestReg = X86::BL; break;
4927    }
4928    if (DestReg) {
4929      Res.first = DestReg;
4930      Res.second = Res.second = X86::GR8RegisterClass;
4931    }
4932  } else if (VT == MVT::i32) {
4933    unsigned DestReg = 0;
4934    switch (Res.first) {
4935    default: break;
4936    case X86::AX: DestReg = X86::EAX; break;
4937    case X86::DX: DestReg = X86::EDX; break;
4938    case X86::CX: DestReg = X86::ECX; break;
4939    case X86::BX: DestReg = X86::EBX; break;
4940    case X86::SI: DestReg = X86::ESI; break;
4941    case X86::DI: DestReg = X86::EDI; break;
4942    case X86::BP: DestReg = X86::EBP; break;
4943    case X86::SP: DestReg = X86::ESP; break;
4944    }
4945    if (DestReg) {
4946      Res.first = DestReg;
4947      Res.second = Res.second = X86::GR32RegisterClass;
4948    }
4949  } else if (VT == MVT::i64) {
4950    unsigned DestReg = 0;
4951    switch (Res.first) {
4952    default: break;
4953    case X86::AX: DestReg = X86::RAX; break;
4954    case X86::DX: DestReg = X86::RDX; break;
4955    case X86::CX: DestReg = X86::RCX; break;
4956    case X86::BX: DestReg = X86::RBX; break;
4957    case X86::SI: DestReg = X86::RSI; break;
4958    case X86::DI: DestReg = X86::RDI; break;
4959    case X86::BP: DestReg = X86::RBP; break;
4960    case X86::SP: DestReg = X86::RSP; break;
4961    }
4962    if (DestReg) {
4963      Res.first = DestReg;
4964      Res.second = Res.second = X86::GR64RegisterClass;
4965    }
4966  }
4967
4968  return Res;
4969}
4970