X86ISelLowering.cpp revision 13513b7a50f5b4497c276796e00ef9bae730d8e0
1bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown//
3bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown//                     The LLVM Compiler Infrastructure
4bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown//
5bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown// This file was developed by Chris Lattner and is distributed under
6bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown// the University of Illinois Open Source License. See LICENSE.TXT for details.
7bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown//
8bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown//===----------------------------------------------------------------------===//
9bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown//
10bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown// This file defines the interfaces that X86 uses to lower LLVM code into a
11bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown// selection DAG.
12bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown//
13bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown//===----------------------------------------------------------------------===//
14bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown
15bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown#include "X86.h"
16bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown#include "X86InstrBuilder.h"
17bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown#include "X86ISelLowering.h"
18bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown#include "X86MachineFunctionInfo.h"
19bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown#include "X86TargetMachine.h"
20bbda99d2086d79ac70b403cad38a47c35af84adfJeff Brown#include "llvm/CallingConv.h"
21#include "llvm/Constants.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/Function.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/ADT/VectorExtras.h"
26#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27#include "llvm/CodeGen/CallingConvLower.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/SelectionDAG.h"
32#include "llvm/CodeGen/SSARegMap.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Target/TargetOptions.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/ADT/StringExtras.h"
37using namespace llvm;
38
39// FIXME: temporary.
40static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
41                                  cl::desc("Enable fastcc on X86"));
42X86TargetLowering::X86TargetLowering(TargetMachine &TM)
43  : TargetLowering(TM) {
44  Subtarget = &TM.getSubtarget<X86Subtarget>();
45  X86ScalarSSE = Subtarget->hasSSE2();
46  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
47
48  // Set up the TargetLowering object.
49
50  // X86 is weird, it always uses i8 for shift amounts and setcc results.
51  setShiftAmountType(MVT::i8);
52  setSetCCResultType(MVT::i8);
53  setSetCCResultContents(ZeroOrOneSetCCResult);
54  setSchedulingPreference(SchedulingForRegPressure);
55  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
56  setStackPointerRegisterToSaveRestore(X86StackPtr);
57
58  if (Subtarget->isTargetDarwin()) {
59    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
60    setUseUnderscoreSetJmp(false);
61    setUseUnderscoreLongJmp(false);
62  } else if (Subtarget->isTargetMingw()) {
63    // MS runtime is weird: it exports _setjmp, but longjmp!
64    setUseUnderscoreSetJmp(true);
65    setUseUnderscoreLongJmp(false);
66  } else {
67    setUseUnderscoreSetJmp(true);
68    setUseUnderscoreLongJmp(true);
69  }
70
71  // Add legal addressing mode scale values.
72  addLegalAddressScale(8);
73  addLegalAddressScale(4);
74  addLegalAddressScale(2);
75  // Enter the ones which require both scale + index last. These are more
76  // expensive.
77  addLegalAddressScale(9);
78  addLegalAddressScale(5);
79  addLegalAddressScale(3);
80
81  // Set up the register classes.
82  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
83  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
84  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
85  if (Subtarget->is64Bit())
86    addRegisterClass(MVT::i64, X86::GR64RegisterClass);
87
88  setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
89
90  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
91  // operation.
92  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
93  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
94  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
95
96  if (Subtarget->is64Bit()) {
97    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
98    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
99  } else {
100    if (X86ScalarSSE)
101      // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
102      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Expand);
103    else
104      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
105  }
106
107  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
108  // this operation.
109  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
110  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
111  // SSE has no i16 to fp conversion, only i32
112  if (X86ScalarSSE)
113    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
114  else {
115    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
116    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
117  }
118
119  if (!Subtarget->is64Bit()) {
120    // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
121    setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
122    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
123  }
124
125  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
126  // this operation.
127  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
128  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
129
130  if (X86ScalarSSE) {
131    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
132  } else {
133    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
134    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
135  }
136
137  // Handle FP_TO_UINT by promoting the destination to a larger signed
138  // conversion.
139  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
140  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
141  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
142
143  if (Subtarget->is64Bit()) {
144    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
145    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
146  } else {
147    if (X86ScalarSSE && !Subtarget->hasSSE3())
148      // Expand FP_TO_UINT into a select.
149      // FIXME: We would like to use a Custom expander here eventually to do
150      // the optimal thing for SSE vs. the default expansion in the legalizer.
151      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
152    else
153      // With SSE3 we can use fisttpll to convert to a signed i64.
154      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
155  }
156
157  // TODO: when we have SSE, these could be more efficient, by using movd/movq.
158  if (!X86ScalarSSE) {
159    setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
160    setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
161  }
162
163  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
164  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
165  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
166  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
167  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
168  if (Subtarget->is64Bit())
169    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
170  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
171  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
172  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
173  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
174  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
175
176  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
177  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
178  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
179  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
180  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
181  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
182  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
183  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
184  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
185  if (Subtarget->is64Bit()) {
186    setOperationAction(ISD::CTPOP          , MVT::i64  , Expand);
187    setOperationAction(ISD::CTTZ           , MVT::i64  , Expand);
188    setOperationAction(ISD::CTLZ           , MVT::i64  , Expand);
189  }
190
191  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
192  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
193
194  // These should be promoted to a larger select which is supported.
195  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
196  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
197  // X86 wants to expand cmov itself.
198  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
199  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
200  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
201  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
202  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
203  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
204  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
205  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
206  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
207  if (Subtarget->is64Bit()) {
208    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
209    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
210  }
211  // X86 ret instruction may pop stack.
212  setOperationAction(ISD::RET             , MVT::Other, Custom);
213  // Darwin ABI issue.
214  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
215  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
216  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
217  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
218  if (Subtarget->is64Bit()) {
219    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
220    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
221    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
222    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
223  }
224  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
225  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
226  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
227  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
228  // X86 wants to expand memset / memcpy itself.
229  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
230  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
231
232  // We don't have line number support yet.
233  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
234  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
235  // FIXME - use subtarget debug flags
236  if (!Subtarget->isTargetDarwin() &&
237      !Subtarget->isTargetELF() &&
238      !Subtarget->isTargetCygMing())
239    setOperationAction(ISD::LABEL, MVT::Other, Expand);
240
241  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
242  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
243
244  // Use the default implementation.
245  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
246  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
247  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
248  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
249  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
250  if (Subtarget->is64Bit())
251    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
252  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
253
254  if (X86ScalarSSE) {
255    // Set up the FP register classes.
256    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
257    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
258
259    // Use ANDPD to simulate FABS.
260    setOperationAction(ISD::FABS , MVT::f64, Custom);
261    setOperationAction(ISD::FABS , MVT::f32, Custom);
262
263    // Use XORP to simulate FNEG.
264    setOperationAction(ISD::FNEG , MVT::f64, Custom);
265    setOperationAction(ISD::FNEG , MVT::f32, Custom);
266
267    // Use ANDPD and ORPD to simulate FCOPYSIGN.
268    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
269    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
270
271    // We don't support sin/cos/fmod
272    setOperationAction(ISD::FSIN , MVT::f64, Expand);
273    setOperationAction(ISD::FCOS , MVT::f64, Expand);
274    setOperationAction(ISD::FREM , MVT::f64, Expand);
275    setOperationAction(ISD::FSIN , MVT::f32, Expand);
276    setOperationAction(ISD::FCOS , MVT::f32, Expand);
277    setOperationAction(ISD::FREM , MVT::f32, Expand);
278
279    // Expand FP immediates into loads from the stack, except for the special
280    // cases we handle.
281    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
282    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
283    addLegalFPImmediate(+0.0); // xorps / xorpd
284  } else {
285    // Set up the FP register classes.
286    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
287
288    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
289    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
290    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
291
292    if (!UnsafeFPMath) {
293      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
294      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
295    }
296
297    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
298    addLegalFPImmediate(+0.0); // FLD0
299    addLegalFPImmediate(+1.0); // FLD1
300    addLegalFPImmediate(-0.0); // FLD0/FCHS
301    addLegalFPImmediate(-1.0); // FLD1/FCHS
302  }
303
304  // First set operation action for all vector types to expand. Then we
305  // will selectively turn on ones that can be effectively codegen'd.
306  for (unsigned VT = (unsigned)MVT::Vector + 1;
307       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
308    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
309    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
310    setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
311    setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
312    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
313    setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
314    setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
315    setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
316    setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
317    setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
318    setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
319    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
320    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
321    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
322    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
323  }
324
325  if (Subtarget->hasMMX()) {
326    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
327    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
328    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
329
330    // FIXME: add MMX packed arithmetics
331    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
332    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
333    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
334  }
335
336  if (Subtarget->hasSSE1()) {
337    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
338
339    setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
340    setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
341    setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
342    setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
343    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
344    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
345    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
346    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
347    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
348  }
349
350  if (Subtarget->hasSSE2()) {
351    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
352    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
353    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
354    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
355    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
356
357    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
358    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
359    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
360    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
361    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
362    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
363    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
364    setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
365    setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
366    setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
367    setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
368
369    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
370    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
371    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
372    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
373    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
374    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
375
376    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
377    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
378      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
379      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
380      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
381    }
382    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
383    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
384    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
385    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
386    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
387    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
388
389    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
390    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
391      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
392      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
393      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
394      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
395      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
396      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
397      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
398      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
399      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
400      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
401    }
402
403    // Custom lower v2i64 and v2f64 selects.
404    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
405    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
406    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
407    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
408  }
409
410  // We want to custom lower some of our intrinsics.
411  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
412
413  // We have target-specific dag combine patterns for the following nodes:
414  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
415  setTargetDAGCombine(ISD::SELECT);
416
417  computeRegisterProperties();
418
419  // FIXME: These should be based on subtarget info. Plus, the values should
420  // be smaller when we are in optimizing for size mode.
421  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
422  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
423  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
424  allowUnalignedMemoryAccesses = true; // x86 supports it!
425}
426
427
428//===----------------------------------------------------------------------===//
429//               Return Value Calling Convention Implementation
430//===----------------------------------------------------------------------===//
431
432
433/// GetRetValueLocs - If we are returning a set of values with the specified
434/// value types, determine the set of registers each one will land in.  This
435/// sets one element of the ResultRegs array for each element in the VTs array.
436static void GetRetValueLocs(const MVT::ValueType *VTs, unsigned NumVTs,
437                            unsigned *ResultRegs,
438                            const X86Subtarget *Subtarget,
439                            unsigned CC) {
440  if (NumVTs == 0) return;
441
442  if (NumVTs == 2) {
443    ResultRegs[0] = VTs[0] == MVT::i64 ? X86::RAX : X86::EAX;
444    ResultRegs[1] = VTs[1] == MVT::i64 ? X86::RDX : X86::EDX;
445    return;
446  }
447
448  // Otherwise, NumVTs is 1.
449  MVT::ValueType ArgVT = VTs[0];
450
451  unsigned Reg;
452  switch (ArgVT) {
453  case MVT::i8:  Reg = X86::AL; break;
454  case MVT::i16: Reg = X86::AX; break;
455  case MVT::i32: Reg = X86::EAX; break;
456  case MVT::i64: Reg = X86::RAX; break;
457  case MVT::f32:
458  case MVT::f64:
459    if (Subtarget->is64Bit())
460      Reg = X86::XMM0;         // FP values in X86-64 go in XMM0.
461    else if (CC == CallingConv::Fast && Subtarget->hasSSE2())
462      Reg = X86::XMM0;         // FP values in X86-32 with fastcc go in XMM0.
463    else
464      Reg = X86::ST0;          // FP values in X86-32 go in ST0.
465    break;
466  default:
467    assert(MVT::isVector(ArgVT) && "Unknown return value type!");
468    Reg = X86::XMM0; // Int/FP vector result -> XMM0.
469    break;
470  }
471  ResultRegs[0] = Reg;
472}
473
474/// LowerRET - Lower an ISD::RET node.
475SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
476  assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
477
478  // Support up returning up to two registers.
479  MVT::ValueType VTs[2];
480  unsigned DestRegs[2];
481  unsigned NumRegs = Op.getNumOperands() / 2;
482  assert(NumRegs <= 2 && "Can only return up to two regs!");
483
484  for (unsigned i = 0; i != NumRegs; ++i)
485    VTs[i] = Op.getOperand(i*2+1).getValueType();
486
487  // Determine which register each value should be copied into.
488  GetRetValueLocs(VTs, NumRegs, DestRegs, Subtarget,
489                  DAG.getMachineFunction().getFunction()->getCallingConv());
490
491  // If this is the first return lowered for this function, add the regs to the
492  // liveout set for the function.
493  if (DAG.getMachineFunction().liveout_empty()) {
494    for (unsigned i = 0; i != NumRegs; ++i)
495      DAG.getMachineFunction().addLiveOut(DestRegs[i]);
496  }
497
498  SDOperand Chain = Op.getOperand(0);
499  SDOperand Flag;
500
501  // Copy the result values into the output registers.
502  if (NumRegs != 1 || DestRegs[0] != X86::ST0) {
503    for (unsigned i = 0; i != NumRegs; ++i) {
504      Chain = DAG.getCopyToReg(Chain, DestRegs[i], Op.getOperand(i*2+1), Flag);
505      Flag = Chain.getValue(1);
506    }
507  } else {
508    // We need to handle a destination of ST0 specially, because it isn't really
509    // a register.
510    SDOperand Value = Op.getOperand(1);
511
512    // If this is an FP return with ScalarSSE, we need to move the value from
513    // an XMM register onto the fp-stack.
514    if (X86ScalarSSE) {
515      SDOperand MemLoc;
516
517      // If this is a load into a scalarsse value, don't store the loaded value
518      // back to the stack, only to reload it: just replace the scalar-sse load.
519      if (ISD::isNON_EXTLoad(Value.Val) &&
520          (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
521        Chain  = Value.getOperand(0);
522        MemLoc = Value.getOperand(1);
523      } else {
524        // Spill the value to memory and reload it into top of stack.
525        unsigned Size = MVT::getSizeInBits(VTs[0])/8;
526        MachineFunction &MF = DAG.getMachineFunction();
527        int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
528        MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
529        Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
530      }
531      SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
532      SDOperand Ops[] = { Chain, MemLoc, DAG.getValueType(VTs[0]) };
533      Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
534      Chain = Value.getValue(1);
535    }
536
537    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
538    SDOperand Ops[] = { Chain, Value };
539    Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
540    Flag = Chain.getValue(1);
541  }
542
543  SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
544  if (Flag.Val)
545    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
546  else
547    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
548}
549
550
551/// LowerCallResult - Lower the result values of an ISD::CALL into the
552/// appropriate copies out of appropriate physical registers.  This assumes that
553/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
554/// being lowered.  The returns a SDNode with the same number of values as the
555/// ISD::CALL.
556SDNode *X86TargetLowering::
557LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
558                unsigned CallingConv, SelectionDAG &DAG) {
559  SmallVector<SDOperand, 8> ResultVals;
560
561  // We support returning up to two registers.
562  MVT::ValueType VTs[2];
563  unsigned DestRegs[2];
564  unsigned NumRegs = TheCall->getNumValues() - 1;
565  assert(NumRegs <= 2 && "Can only return up to two regs!");
566
567  for (unsigned i = 0; i != NumRegs; ++i)
568    VTs[i] = TheCall->getValueType(i);
569
570  // Determine which register each value should be copied into.
571  GetRetValueLocs(VTs, NumRegs, DestRegs, Subtarget, CallingConv);
572
573  // Copy all of the result registers out of their specified physreg.
574  if (NumRegs != 1 || DestRegs[0] != X86::ST0) {
575    for (unsigned i = 0; i != NumRegs; ++i) {
576      Chain = DAG.getCopyFromReg(Chain, DestRegs[i], VTs[i],
577                                 InFlag).getValue(1);
578      InFlag = Chain.getValue(2);
579      ResultVals.push_back(Chain.getValue(0));
580    }
581  } else {
582    // Copies from the FP stack are special, as ST0 isn't a valid register
583    // before the fp stackifier runs.
584
585    // Copy ST0 into an RFP register with FP_GET_RESULT.
586    SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
587    SDOperand GROps[] = { Chain, InFlag };
588    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
589    Chain  = RetVal.getValue(1);
590    InFlag = RetVal.getValue(2);
591
592    // If we are using ScalarSSE, store ST(0) to the stack and reload it into
593    // an XMM register.
594    if (X86ScalarSSE) {
595      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
596      // shouldn't be necessary except that RFP cannot be live across
597      // multiple blocks. When stackifier is fixed, they can be uncoupled.
598      MachineFunction &MF = DAG.getMachineFunction();
599      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
600      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
601      SDOperand Ops[] = {
602        Chain, RetVal, StackSlot, DAG.getValueType(VTs[0]), InFlag
603      };
604      Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
605      RetVal = DAG.getLoad(VTs[0], Chain, StackSlot, NULL, 0);
606      Chain = RetVal.getValue(1);
607    }
608
609    if (VTs[0] == MVT::f32 && !X86ScalarSSE)
610      // FIXME: we would really like to remember that this FP_ROUND
611      // operation is okay to eliminate if we allow excess FP precision.
612      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
613    ResultVals.push_back(RetVal);
614  }
615
616  // Merge everything together with a MERGE_VALUES node.
617  ResultVals.push_back(Chain);
618  return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
619                     &ResultVals[0], ResultVals.size()).Val;
620}
621
622
623//===----------------------------------------------------------------------===//
624//                C & StdCall Calling Convention implementation
625//===----------------------------------------------------------------------===//
626//  StdCall calling convention seems to be standard for many Windows' API
627//  routines and around. It differs from C calling convention just a little:
628//  callee should clean up the stack, not caller. Symbols should be also
629//  decorated in some fancy way :) It doesn't support any vector arguments.
630
631/// AddLiveIn - This helper function adds the specified physical register to the
632/// MachineFunction as a live in value.  It also creates a corresponding virtual
633/// register for it.
634static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
635                          const TargetRegisterClass *RC) {
636  assert(RC->contains(PReg) && "Not the correct regclass!");
637  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
638  MF.addLiveIn(PReg, VReg);
639  return VReg;
640}
641
642/// HowToPassArgument - Returns how an formal argument of the specified type
643/// should be passed. If it is through stack, returns the size of the stack
644/// slot; if it is through integer or XMM register, returns the number of
645/// integer or XMM registers are needed.
646static void
647HowToPassCallArgument(MVT::ValueType ObjectVT,
648                      bool ArgInReg,
649                      unsigned NumIntRegs, unsigned NumXMMRegs,
650                      unsigned MaxNumIntRegs,
651                      unsigned &ObjSize, unsigned &ObjIntRegs,
652                      unsigned &ObjXMMRegs) {
653  ObjSize = 0;
654  ObjIntRegs = 0;
655  ObjXMMRegs = 0;
656
657  if (MaxNumIntRegs>3) {
658    // We don't have too much registers on ia32! :)
659    MaxNumIntRegs = 3;
660  }
661
662  switch (ObjectVT) {
663  default: assert(0 && "Unhandled argument type!");
664  case MVT::i8:
665   if (ArgInReg && (NumIntRegs < MaxNumIntRegs))
666     ObjIntRegs = 1;
667   else
668     ObjSize = 1;
669   break;
670  case MVT::i16:
671   if (ArgInReg && (NumIntRegs < MaxNumIntRegs))
672     ObjIntRegs = 1;
673   else
674     ObjSize = 2;
675   break;
676  case MVT::i32:
677   if (ArgInReg && (NumIntRegs < MaxNumIntRegs))
678     ObjIntRegs = 1;
679   else
680     ObjSize = 4;
681   break;
682  case MVT::i64:
683   if (ArgInReg && (NumIntRegs+2 <= MaxNumIntRegs)) {
684     ObjIntRegs = 2;
685   } else if (ArgInReg && (NumIntRegs+1 <= MaxNumIntRegs)) {
686     ObjIntRegs = 1;
687     ObjSize = 4;
688   } else
689     ObjSize = 8;
690  case MVT::f32:
691    ObjSize = 4;
692    break;
693  case MVT::f64:
694    ObjSize = 8;
695    break;
696  case MVT::v16i8:
697  case MVT::v8i16:
698  case MVT::v4i32:
699  case MVT::v2i64:
700  case MVT::v4f32:
701  case MVT::v2f64:
702    if (NumXMMRegs < 4)
703      ObjXMMRegs = 1;
704    else
705      ObjSize = 16;
706    break;
707  }
708}
709
710SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
711                                               bool isStdCall) {
712  unsigned NumArgs = Op.Val->getNumValues() - 1;
713  MachineFunction &MF = DAG.getMachineFunction();
714  MachineFrameInfo *MFI = MF.getFrameInfo();
715  SDOperand Root = Op.getOperand(0);
716  SmallVector<SDOperand, 8> ArgValues;
717  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
718
719  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
720  // the stack frame looks like this:
721  //
722  // [ESP] -- return address
723  // [ESP + 4] -- first argument (leftmost lexically)
724  // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
725  //    ...
726  //
727  unsigned ArgOffset   = 0; // Frame mechanisms handle retaddr slot
728  unsigned NumSRetBytes= 0; // How much bytes on stack used for struct return
729  unsigned NumXMMRegs  = 0; // XMM regs used for parameter passing.
730  unsigned NumIntRegs  = 0; // Integer regs used for parameter passing
731
732  static const unsigned XMMArgRegs[] = {
733    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
734  };
735  static const unsigned GPRArgRegs[][3] = {
736    { X86::AL,  X86::DL,  X86::CL  },
737    { X86::AX,  X86::DX,  X86::CX  },
738    { X86::EAX, X86::EDX, X86::ECX }
739  };
740  static const TargetRegisterClass* GPRClasses[3] = {
741    X86::GR8RegisterClass, X86::GR16RegisterClass, X86::GR32RegisterClass
742  };
743
744  // Handle regparm attribute
745  SmallVector<bool, 8> ArgInRegs(NumArgs, false);
746  SmallVector<bool, 8> SRetArgs(NumArgs, false);
747  if (!isVarArg) {
748    for (unsigned i = 0; i<NumArgs; ++i) {
749      unsigned Flags = cast<ConstantSDNode>(Op.getOperand(3+i))->getValue();
750      ArgInRegs[i]   = (Flags >> 1) & 1;
751      SRetArgs[i]    = (Flags >> 2) & 1;
752    }
753  }
754
755  for (unsigned i = 0; i < NumArgs; ++i) {
756    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
757    unsigned ArgIncrement = 4;
758    unsigned ObjSize = 0;
759    unsigned ObjXMMRegs = 0;
760    unsigned ObjIntRegs = 0;
761    unsigned Reg = 0;
762    SDOperand ArgValue;
763
764    HowToPassCallArgument(ObjectVT,
765                          ArgInRegs[i],
766                          NumIntRegs, NumXMMRegs, 3,
767                          ObjSize, ObjIntRegs, ObjXMMRegs);
768
769    if (ObjSize > 4)
770      ArgIncrement = ObjSize;
771
772    if (ObjIntRegs || ObjXMMRegs) {
773      switch (ObjectVT) {
774      default: assert(0 && "Unhandled argument type!");
775      case MVT::i8:
776      case MVT::i16:
777      case MVT::i32: {
778       unsigned RegToUse = GPRArgRegs[ObjectVT-MVT::i8][NumIntRegs];
779       Reg = AddLiveIn(MF, RegToUse, GPRClasses[ObjectVT-MVT::i8]);
780       ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
781       break;
782      }
783      case MVT::v16i8:
784      case MVT::v8i16:
785      case MVT::v4i32:
786      case MVT::v2i64:
787      case MVT::v4f32:
788      case MVT::v2f64:
789       assert(!isStdCall && "Unhandled argument type!");
790       Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
791       ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
792       break;
793      }
794      NumIntRegs += ObjIntRegs;
795      NumXMMRegs += ObjXMMRegs;
796    }
797    if (ObjSize) {
798      // XMM arguments have to be aligned on 16-byte boundary.
799      if (ObjSize == 16)
800        ArgOffset = ((ArgOffset + 15) / 16) * 16;
801      // Create the SelectionDAG nodes corresponding to a load from this
802      // parameter.
803      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
804      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
805      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
806
807      ArgOffset += ArgIncrement;   // Move on to the next argument.
808      if (SRetArgs[i])
809        NumSRetBytes += ArgIncrement;
810    }
811
812    ArgValues.push_back(ArgValue);
813  }
814
815  ArgValues.push_back(Root);
816
817  // If the function takes variable number of arguments, make a frame index for
818  // the start of the first vararg value... for expansion of llvm.va_start.
819  if (isVarArg)
820    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
821
822  if (isStdCall && !isVarArg) {
823    BytesToPopOnReturn  = ArgOffset;    // Callee pops everything..
824    BytesCallerReserves = 0;
825  } else {
826    BytesToPopOnReturn  = NumSRetBytes; // Callee pops hidden struct pointer.
827    BytesCallerReserves = ArgOffset;
828  }
829
830  RegSaveFrameIndex = 0xAAAAAAA;  // X86-64 only.
831  ReturnAddrIndex = 0;            // No return address slot generated yet.
832
833
834  MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
835
836  // Return the new list of results.
837  return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
838                     &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
839}
840
841SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
842                                            unsigned CC) {
843  SDOperand Chain     = Op.getOperand(0);
844  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
845  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
846  SDOperand Callee    = Op.getOperand(4);
847  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
848
849  static const unsigned XMMArgRegs[] = {
850    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
851  };
852  static const unsigned GPR32ArgRegs[] = {
853    X86::EAX, X86::EDX,  X86::ECX
854  };
855
856  // Count how many bytes are to be pushed on the stack.
857  unsigned NumBytes   = 0;
858  // Keep track of the number of integer regs passed so far.
859  unsigned NumIntRegs = 0;
860  // Keep track of the number of XMM regs passed so far.
861  unsigned NumXMMRegs = 0;
862  // How much bytes on stack used for struct return
863  unsigned NumSRetBytes= 0;
864
865  // Handle regparm attribute
866  SmallVector<bool, 8> ArgInRegs(NumOps, false);
867  SmallVector<bool, 8> SRetArgs(NumOps, false);
868  for (unsigned i = 0; i<NumOps; ++i) {
869    unsigned Flags =
870      dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
871    ArgInRegs[i] = (Flags >> 1) & 1;
872    SRetArgs[i]  = (Flags >> 2) & 1;
873  }
874
875  // Calculate stack frame size
876  for (unsigned i = 0; i != NumOps; ++i) {
877    SDOperand Arg = Op.getOperand(5+2*i);
878    unsigned ArgIncrement = 4;
879    unsigned ObjSize = 0;
880    unsigned ObjIntRegs = 0;
881    unsigned ObjXMMRegs = 0;
882
883    HowToPassCallArgument(Arg.getValueType(),
884                          ArgInRegs[i],
885                          NumIntRegs, NumXMMRegs, 3,
886                          ObjSize, ObjIntRegs, ObjXMMRegs);
887    if (ObjSize > 4)
888      ArgIncrement = ObjSize;
889
890    NumIntRegs += ObjIntRegs;
891    NumXMMRegs += ObjXMMRegs;
892    if (ObjSize) {
893      // XMM arguments have to be aligned on 16-byte boundary.
894      if (ObjSize == 16)
895        NumBytes = ((NumBytes + 15) / 16) * 16;
896      NumBytes += ArgIncrement;
897    }
898  }
899
900  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
901
902  // Arguments go on the stack in reverse order, as specified by the ABI.
903  unsigned ArgOffset = 0;
904  NumXMMRegs = 0;
905  NumIntRegs = 0;
906  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
907  SmallVector<SDOperand, 8> MemOpChains;
908  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
909  for (unsigned i = 0; i != NumOps; ++i) {
910    SDOperand Arg = Op.getOperand(5+2*i);
911    unsigned ArgIncrement = 4;
912    unsigned ObjSize = 0;
913    unsigned ObjIntRegs = 0;
914    unsigned ObjXMMRegs = 0;
915
916    HowToPassCallArgument(Arg.getValueType(),
917                          ArgInRegs[i],
918                          NumIntRegs, NumXMMRegs, 3,
919                          ObjSize, ObjIntRegs, ObjXMMRegs);
920
921    if (ObjSize > 4)
922      ArgIncrement = ObjSize;
923
924    if (Arg.getValueType() == MVT::i8 || Arg.getValueType() == MVT::i16) {
925      // Promote the integer to 32 bits.  If the input type is signed use a
926      // sign extend, otherwise use a zero extend.
927      unsigned Flags = cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
928
929      unsigned ExtOp = (Flags & 1) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
930      Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
931    }
932
933    if (ObjIntRegs || ObjXMMRegs) {
934      switch (Arg.getValueType()) {
935      default: assert(0 && "Unhandled argument type!");
936      case MVT::i32:
937       RegsToPass.push_back(std::make_pair(GPR32ArgRegs[NumIntRegs], Arg));
938       break;
939      case MVT::v16i8:
940      case MVT::v8i16:
941      case MVT::v4i32:
942      case MVT::v2i64:
943      case MVT::v4f32:
944      case MVT::v2f64:
945       RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
946       break;
947      }
948
949      NumIntRegs += ObjIntRegs;
950      NumXMMRegs += ObjXMMRegs;
951    }
952    if (ObjSize) {
953      // XMM arguments have to be aligned on 16-byte boundary.
954      if (ObjSize == 16)
955        ArgOffset = ((ArgOffset + 15) / 16) * 16;
956
957      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
958      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
959      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
960
961      ArgOffset += ArgIncrement;   // Move on to the next argument.
962      if (SRetArgs[i])
963        NumSRetBytes += ArgIncrement;
964    }
965  }
966
967  // Sanity check: we haven't seen NumSRetBytes > 4
968  assert((NumSRetBytes<=4) &&
969         "Too much space for struct-return pointer requested");
970
971  if (!MemOpChains.empty())
972    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
973                        &MemOpChains[0], MemOpChains.size());
974
975  // Build a sequence of copy-to-reg nodes chained together with token chain
976  // and flag operands which copy the outgoing args into registers.
977  SDOperand InFlag;
978  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
979    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
980                             InFlag);
981    InFlag = Chain.getValue(1);
982  }
983
984  // ELF / PIC requires GOT in the EBX register before function calls via PLT
985  // GOT pointer.
986  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
987      Subtarget->isPICStyleGOT()) {
988    Chain = DAG.getCopyToReg(Chain, X86::EBX,
989                             DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
990                             InFlag);
991    InFlag = Chain.getValue(1);
992  }
993
994  // If the callee is a GlobalAddress node (quite common, every direct call is)
995  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
996  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
997    // We should use extra load for direct calls to dllimported functions in
998    // non-JIT mode.
999    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
1000                                        getTargetMachine(), true))
1001      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1002  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1003    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1004
1005  // Returns a chain & a flag for retval copy to use.
1006  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1007  SmallVector<SDOperand, 8> Ops;
1008  Ops.push_back(Chain);
1009  Ops.push_back(Callee);
1010
1011  // Add argument registers to the end of the list so that they are known live
1012  // into the call.
1013  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1014    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1015                                  RegsToPass[i].second.getValueType()));
1016
1017  // Add an implicit use GOT pointer in EBX.
1018  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1019      Subtarget->isPICStyleGOT())
1020    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
1021
1022  if (InFlag.Val)
1023    Ops.push_back(InFlag);
1024
1025  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1026                      NodeTys, &Ops[0], Ops.size());
1027  InFlag = Chain.getValue(1);
1028
1029  // Create the CALLSEQ_END node.
1030  unsigned NumBytesForCalleeToPush = 0;
1031
1032  if (CC == CallingConv::X86_StdCall) {
1033    if (isVarArg)
1034      NumBytesForCalleeToPush = NumSRetBytes;
1035    else
1036      NumBytesForCalleeToPush = NumBytes;
1037  } else {
1038    // If this is is a call to a struct-return function, the callee
1039    // pops the hidden struct pointer, so we have to push it back.
1040    // This is common for Darwin/X86, Linux & Mingw32 targets.
1041    NumBytesForCalleeToPush = NumSRetBytes;
1042  }
1043
1044  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1045  Ops.clear();
1046  Ops.push_back(Chain);
1047  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1048  Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
1049  Ops.push_back(InFlag);
1050  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1051  InFlag = Chain.getValue(1);
1052
1053  // Handle result values, copying them out of physregs into vregs that we
1054  // return.
1055  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
1056}
1057
1058
1059//===----------------------------------------------------------------------===//
1060//                 X86-64 C Calling Convention implementation
1061//===----------------------------------------------------------------------===//
1062
1063
1064/// X86_64_CCC_AssignArgument - Implement the X86-64 C Calling Convention.  This
1065/// returns true if the value was not handled by this calling convention.
1066static bool X86_64_CCC_AssignArgument(unsigned ValNo,
1067                                      MVT::ValueType ArgVT, unsigned ArgFlags,
1068                                      CCState &State) {
1069  MVT::ValueType LocVT = ArgVT;
1070  CCValAssign::LocInfo LocInfo = CCValAssign::Full;
1071
1072  // Promote the integer to 32 bits.  If the input type is signed use a
1073  // sign extend, otherwise use a zero extend.
1074  if (ArgVT == MVT::i8 || ArgVT == MVT::i16) {
1075    LocVT = MVT::i32;
1076    LocInfo = (ArgFlags & 1) ? CCValAssign::SExt : CCValAssign::ZExt;
1077  }
1078
1079  // If this is a 32-bit value, assign to a 32-bit register if any are
1080  // available.
1081  if (LocVT == MVT::i32) {
1082    static const unsigned GPR32ArgRegs[] = {
1083      X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
1084    };
1085    if (unsigned Reg = State.AllocateReg(GPR32ArgRegs, 6)) {
1086      State.addLoc(CCValAssign::getReg(ValNo, ArgVT, Reg, LocVT, LocInfo));
1087      return false;
1088    }
1089  }
1090
1091  // If this is a 64-bit value, assign to a 64-bit register if any are
1092  // available.
1093  if (LocVT == MVT::i64) {
1094    static const unsigned GPR64ArgRegs[] = {
1095      X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1096    };
1097    if (unsigned Reg = State.AllocateReg(GPR64ArgRegs, 6)) {
1098      State.addLoc(CCValAssign::getReg(ValNo, ArgVT, Reg, LocVT, LocInfo));
1099      return false;
1100    }
1101  }
1102
1103  // If this is a FP or vector type, assign to an XMM reg if any are
1104  // available.
1105  if (MVT::isVector(LocVT) || MVT::isFloatingPoint(LocVT)) {
1106    static const unsigned XMMArgRegs[] = {
1107      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1108      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1109    };
1110    if (unsigned Reg = State.AllocateReg(XMMArgRegs, 8)) {
1111      State.addLoc(CCValAssign::getReg(ValNo, ArgVT, Reg, LocVT, LocInfo));
1112      return false;
1113    }
1114  }
1115
1116  // Integer/FP values get stored in stack slots that are 8 bytes in size and
1117  // 8-byte aligned if there are no more registers to hold them.
1118  if (LocVT == MVT::i32 || LocVT == MVT::i64 ||
1119      LocVT == MVT::f32 || LocVT == MVT::f64) {
1120    unsigned Offset = State.AllocateStack(8, 8);
1121    State.addLoc(CCValAssign::getMem(ValNo, ArgVT, Offset, LocVT, LocInfo));
1122    return false;
1123  }
1124
1125  // Vectors get 16-byte stack slots that are 16-byte aligned.
1126  if (MVT::isVector(LocVT)) {
1127    unsigned Offset = State.AllocateStack(16, 16);
1128    State.addLoc(CCValAssign::getMem(ValNo, ArgVT, Offset, LocVT, LocInfo));
1129    return false;
1130  }
1131  return true;
1132}
1133
1134
1135SDOperand
1136X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
1137  unsigned NumArgs = Op.Val->getNumValues() - 1;
1138  MachineFunction &MF = DAG.getMachineFunction();
1139  MachineFrameInfo *MFI = MF.getFrameInfo();
1140  SDOperand Root = Op.getOperand(0);
1141  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1142
1143  static const unsigned GPR64ArgRegs[] = {
1144    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
1145  };
1146  static const unsigned XMMArgRegs[] = {
1147    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1148    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1149  };
1150
1151  SmallVector<CCValAssign, 16> ArgLocs;
1152  CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(),
1153                 ArgLocs);
1154
1155  for (unsigned i = 0; i != NumArgs; ++i) {
1156    MVT::ValueType ArgVT = Op.getValue(i).getValueType();
1157    unsigned ArgFlags = cast<ConstantSDNode>(Op.getOperand(3+i))->getValue();
1158    if (X86_64_CCC_AssignArgument(i, ArgVT, ArgFlags, CCInfo))
1159      assert(0 && "Unhandled argument type!");
1160  }
1161
1162  SmallVector<SDOperand, 8> ArgValues;
1163  unsigned LastVal = ~0U;
1164  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1165    CCValAssign &VA = ArgLocs[i];
1166    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
1167    // places.
1168    assert(VA.getValNo() != LastVal &&
1169           "Don't support value assigned to multiple locs yet");
1170    LastVal = VA.getValNo();
1171
1172    if (VA.isRegLoc()) {
1173      MVT::ValueType RegVT = VA.getLocVT();
1174      TargetRegisterClass *RC;
1175      if (RegVT == MVT::i32)
1176        RC = X86::GR32RegisterClass;
1177      else if (RegVT == MVT::i64)
1178        RC = X86::GR64RegisterClass;
1179      else if (RegVT == MVT::f32)
1180        RC = X86::FR32RegisterClass;
1181      else if (RegVT == MVT::f64)
1182        RC = X86::FR64RegisterClass;
1183      else {
1184        assert(MVT::isVector(RegVT));
1185        RC = X86::VR128RegisterClass;
1186      }
1187
1188      SDOperand ArgValue = DAG.getCopyFromReg(Root, VA.getLocReg(), RegVT);
1189      AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
1190
1191      // If this is an 8 or 16-bit value, it is really passed promoted to 32
1192      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
1193      // right size.
1194      if (VA.getLocInfo() == CCValAssign::SExt)
1195        ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
1196                               DAG.getValueType(VA.getValVT()));
1197      else if (VA.getLocInfo() == CCValAssign::ZExt)
1198        ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
1199                               DAG.getValueType(VA.getValVT()));
1200
1201      if (VA.getLocInfo() != CCValAssign::Full)
1202        ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
1203
1204      ArgValues.push_back(ArgValue);
1205    } else {
1206      assert(VA.isMemLoc());
1207
1208      // Create the nodes corresponding to a load from this parameter slot.
1209      int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
1210                                      VA.getLocMemOffset());
1211      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
1212      ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
1213    }
1214  }
1215
1216  unsigned StackSize = CCInfo.getNextStackOffset();
1217
1218  // If the function takes variable number of arguments, make a frame index for
1219  // the start of the first vararg value... for expansion of llvm.va_start.
1220  if (isVarArg) {
1221    unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
1222    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
1223
1224    // For X86-64, if there are vararg parameters that are passed via
1225    // registers, then we must store them to their spots on the stack so they
1226    // may be loaded by deferencing the result of va_next.
1227    VarArgsGPOffset = NumIntRegs * 8;
1228    VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
1229    VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
1230    RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
1231
1232    // Store the integer parameter registers.
1233    SmallVector<SDOperand, 8> MemOps;
1234    SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
1235    SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
1236                              DAG.getConstant(VarArgsGPOffset, getPointerTy()));
1237    for (; NumIntRegs != 6; ++NumIntRegs) {
1238      unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
1239                                X86::GR64RegisterClass);
1240      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
1241      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1242      MemOps.push_back(Store);
1243      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1244                        DAG.getConstant(8, getPointerTy()));
1245    }
1246
1247    // Now store the XMM (fp + vector) parameter registers.
1248    FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
1249                      DAG.getConstant(VarArgsFPOffset, getPointerTy()));
1250    for (; NumXMMRegs != 8; ++NumXMMRegs) {
1251      unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
1252                                X86::VR128RegisterClass);
1253      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
1254      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1255      MemOps.push_back(Store);
1256      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1257                        DAG.getConstant(16, getPointerTy()));
1258    }
1259    if (!MemOps.empty())
1260        Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
1261                           &MemOps[0], MemOps.size());
1262  }
1263
1264  ArgValues.push_back(Root);
1265
1266  ReturnAddrIndex = 0;     // No return address slot generated yet.
1267  BytesToPopOnReturn = 0;  // Callee pops nothing.
1268  BytesCallerReserves = StackSize;
1269
1270  // Return the new list of results.
1271  return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
1272                     &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
1273}
1274
1275SDOperand
1276X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
1277                                        unsigned CC) {
1278  SDOperand Chain     = Op.getOperand(0);
1279  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1280  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1281  SDOperand Callee    = Op.getOperand(4);
1282  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1283
1284  SmallVector<CCValAssign, 16> ArgLocs;
1285  CCState CCInfo(CC, getTargetMachine(), ArgLocs);
1286
1287  for (unsigned i = 0; i != NumOps; ++i) {
1288    MVT::ValueType ArgVT = Op.getOperand(5+2*i).getValueType();
1289    unsigned ArgFlags =cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue();
1290    if (X86_64_CCC_AssignArgument(i, ArgVT, ArgFlags, CCInfo))
1291      assert(0 && "Unhandled argument type!");
1292  }
1293
1294  // Get a count of how many bytes are to be pushed on the stack.
1295  unsigned NumBytes = CCInfo.getNextStackOffset();
1296  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1297
1298  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
1299  SmallVector<SDOperand, 8> MemOpChains;
1300
1301  SDOperand StackPtr;
1302
1303  // Walk the register/memloc assignments, inserting copies/loads.
1304  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1305    CCValAssign &VA = ArgLocs[i];
1306    SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
1307
1308    // Promote the value if needed.
1309    switch (VA.getLocInfo()) {
1310    default: assert(0 && "Unknown loc info!");
1311    case CCValAssign::Full: break;
1312    case CCValAssign::SExt:
1313      Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
1314      break;
1315    case CCValAssign::ZExt:
1316      Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
1317      break;
1318    case CCValAssign::AExt:
1319      Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
1320      break;
1321    }
1322
1323    if (VA.isRegLoc()) {
1324      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1325    } else {
1326      assert(VA.isMemLoc());
1327      if (StackPtr.Val == 0)
1328        StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
1329      SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
1330      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1331      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1332    }
1333  }
1334
1335  if (!MemOpChains.empty())
1336    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1337                        &MemOpChains[0], MemOpChains.size());
1338
1339  // Build a sequence of copy-to-reg nodes chained together with token chain
1340  // and flag operands which copy the outgoing args into registers.
1341  SDOperand InFlag;
1342  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1343    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1344                             InFlag);
1345    InFlag = Chain.getValue(1);
1346  }
1347
1348  if (isVarArg) {
1349    // From AMD64 ABI document:
1350    // For calls that may call functions that use varargs or stdargs
1351    // (prototype-less calls or calls to functions containing ellipsis (...) in
1352    // the declaration) %al is used as hidden argument to specify the number
1353    // of SSE registers used. The contents of %al do not need to match exactly
1354    // the number of registers, but must be an ubound on the number of SSE
1355    // registers used and is in the range 0 - 8 inclusive.
1356
1357    // Count the number of XMM registers allocated.
1358    static const unsigned XMMArgRegs[] = {
1359      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1360      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1361    };
1362    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
1363
1364    Chain = DAG.getCopyToReg(Chain, X86::AL,
1365                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
1366    InFlag = Chain.getValue(1);
1367  }
1368
1369  // If the callee is a GlobalAddress node (quite common, every direct call is)
1370  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1371  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1372    // We should use extra load for direct calls to dllimported functions in
1373    // non-JIT mode.
1374    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
1375                                        getTargetMachine(), true))
1376      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1377  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1378    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1379
1380  // Returns a chain & a flag for retval copy to use.
1381  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1382  SmallVector<SDOperand, 8> Ops;
1383  Ops.push_back(Chain);
1384  Ops.push_back(Callee);
1385
1386  // Add argument registers to the end of the list so that they are known live
1387  // into the call.
1388  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1389    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1390                                  RegsToPass[i].second.getValueType()));
1391
1392  if (InFlag.Val)
1393    Ops.push_back(InFlag);
1394
1395  // FIXME: Do not generate X86ISD::TAILCALL for now.
1396  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1397                      NodeTys, &Ops[0], Ops.size());
1398  InFlag = Chain.getValue(1);
1399
1400  // Returns a flag for retval copy to use.
1401  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1402  Ops.clear();
1403  Ops.push_back(Chain);
1404  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1405  Ops.push_back(DAG.getConstant(0, getPointerTy()));
1406  Ops.push_back(InFlag);
1407  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1408  InFlag = Chain.getValue(1);
1409
1410  // Handle result values, copying them out of physregs into vregs that we
1411  // return.
1412  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
1413}
1414
1415//===----------------------------------------------------------------------===//
1416//                 Fast & FastCall Calling Convention implementation
1417//===----------------------------------------------------------------------===//
1418//
1419// The X86 'fast' calling convention passes up to two integer arguments in
1420// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
1421// and requires that the callee pop its arguments off the stack (allowing proper
1422// tail calls), and has the same return value conventions as C calling convs.
1423//
1424// This calling convention always arranges for the callee pop value to be 8n+4
1425// bytes, which is needed for tail recursion elimination and stack alignment
1426// reasons.
1427//
1428// Note that this can be enhanced in the future to pass fp vals in registers
1429// (when we have a global fp allocator) and do other tricks.
1430//
1431//===----------------------------------------------------------------------===//
1432// The X86 'fastcall' calling convention passes up to two integer arguments in
1433// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
1434// and requires that the callee pop its arguments off the stack (allowing proper
1435// tail calls), and has the same return value conventions as C calling convs.
1436//
1437// This calling convention always arranges for the callee pop value to be 8n+4
1438// bytes, which is needed for tail recursion elimination and stack alignment
1439// reasons.
1440SDOperand
1441X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG,
1442                                        bool isFastCall) {
1443  unsigned NumArgs = Op.Val->getNumValues()-1;
1444  MachineFunction &MF = DAG.getMachineFunction();
1445  MachineFrameInfo *MFI = MF.getFrameInfo();
1446  SDOperand Root = Op.getOperand(0);
1447  SmallVector<SDOperand, 8> ArgValues;
1448
1449  // Add DAG nodes to load the arguments...  On entry to a function the stack
1450  // frame looks like this:
1451  //
1452  // [ESP] -- return address
1453  // [ESP + 4] -- first nonreg argument (leftmost lexically)
1454  // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size
1455  //    ...
1456  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
1457
1458  // Keep track of the number of integer regs passed so far.  This can be either
1459  // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX
1460  // are both used).
1461  unsigned NumIntRegs = 0;
1462  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1463
1464  static const unsigned XMMArgRegs[] = {
1465    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1466  };
1467
1468  static const unsigned GPRArgRegs[][2][2] = {
1469    {{ X86::AL,  X86::DL },  { X86::CL,  X86::DL }},
1470    {{ X86::AX,  X86::DX },  { X86::CX,  X86::DX }},
1471    {{ X86::EAX, X86::EDX }, { X86::ECX,  X86::EDX }}
1472  };
1473
1474  static const TargetRegisterClass* GPRClasses[3] = {
1475    X86::GR8RegisterClass, X86::GR16RegisterClass, X86::GR32RegisterClass
1476  };
1477
1478  unsigned GPRInd = (isFastCall ? 1 : 0);
1479  for (unsigned i = 0; i < NumArgs; ++i) {
1480    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
1481    unsigned ArgIncrement = 4;
1482    unsigned ObjSize = 0;
1483    unsigned ObjXMMRegs = 0;
1484    unsigned ObjIntRegs = 0;
1485    unsigned Reg = 0;
1486    SDOperand ArgValue;
1487
1488    HowToPassCallArgument(ObjectVT,
1489                          true, // Use as much registers as possible
1490                          NumIntRegs, NumXMMRegs,
1491                          (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS),
1492                          ObjSize, ObjIntRegs, ObjXMMRegs);
1493
1494    if (ObjSize > 4)
1495      ArgIncrement = ObjSize;
1496
1497    if (ObjIntRegs || ObjXMMRegs) {
1498      switch (ObjectVT) {
1499      default: assert(0 && "Unhandled argument type!");
1500      case MVT::i8:
1501      case MVT::i16:
1502      case MVT::i32: {
1503        unsigned RegToUse = GPRArgRegs[ObjectVT-MVT::i8][GPRInd][NumIntRegs];
1504        Reg = AddLiveIn(MF, RegToUse, GPRClasses[ObjectVT-MVT::i8]);
1505        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
1506        break;
1507      }
1508      case MVT::v16i8:
1509      case MVT::v8i16:
1510      case MVT::v4i32:
1511      case MVT::v2i64:
1512      case MVT::v4f32:
1513      case MVT::v2f64: {
1514        assert(!isFastCall && "Unhandled argument type!");
1515        Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
1516        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
1517        break;
1518      }
1519      }
1520      NumIntRegs += ObjIntRegs;
1521      NumXMMRegs += ObjXMMRegs;
1522    }
1523    if (ObjSize) {
1524      // XMM arguments have to be aligned on 16-byte boundary.
1525      if (ObjSize == 16)
1526        ArgOffset = ((ArgOffset + 15) / 16) * 16;
1527      // Create the SelectionDAG nodes corresponding to a load from this
1528      // parameter.
1529      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1530      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
1531      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
1532
1533      ArgOffset += ArgIncrement;   // Move on to the next argument.
1534    }
1535
1536    ArgValues.push_back(ArgValue);
1537  }
1538
1539  ArgValues.push_back(Root);
1540
1541  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1542  // arguments and the arguments after the retaddr has been pushed are aligned.
1543  if ((ArgOffset & 7) == 0)
1544    ArgOffset += 4;
1545
1546  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
1547  RegSaveFrameIndex = 0xAAAAAAA;   // X86-64 only.
1548  ReturnAddrIndex = 0;             // No return address slot generated yet.
1549  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
1550  BytesCallerReserves = 0;
1551
1552  MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
1553
1554  // Finally, inform the code generator which regs we return values in.
1555  switch (getValueType(MF.getFunction()->getReturnType())) {
1556  default: assert(0 && "Unknown type!");
1557  case MVT::isVoid: break;
1558  case MVT::i1:
1559  case MVT::i8:
1560  case MVT::i16:
1561  case MVT::i32:
1562    MF.addLiveOut(X86::EAX);
1563    break;
1564  case MVT::i64:
1565    MF.addLiveOut(X86::EAX);
1566    MF.addLiveOut(X86::EDX);
1567    break;
1568  case MVT::f32:
1569  case MVT::f64:
1570    MF.addLiveOut(X86::ST0);
1571    break;
1572  case MVT::v16i8:
1573  case MVT::v8i16:
1574  case MVT::v4i32:
1575  case MVT::v2i64:
1576  case MVT::v4f32:
1577  case MVT::v2f64:
1578    assert(!isFastCall && "Unknown result type");
1579    MF.addLiveOut(X86::XMM0);
1580    break;
1581  }
1582
1583  // Return the new list of results.
1584  return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
1585                     &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
1586}
1587
1588SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
1589                                               unsigned CC) {
1590  SDOperand Chain     = Op.getOperand(0);
1591  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1592  SDOperand Callee    = Op.getOperand(4);
1593  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1594
1595  // Count how many bytes are to be pushed on the stack.
1596  unsigned NumBytes = 0;
1597
1598  // Keep track of the number of integer regs passed so far.  This can be either
1599  // 0 (neither EAX/ECX or EDX used), 1 (EAX/ECX is used) or 2 (EAX/ECX and EDX
1600  // are both used).
1601  unsigned NumIntRegs = 0;
1602  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1603
1604  static const unsigned GPRArgRegs[][2][2] = {
1605    {{ X86::AL,  X86::DL },  { X86::CL,  X86::DL }},
1606    {{ X86::AX,  X86::DX },  { X86::CX,  X86::DX }},
1607    {{ X86::EAX, X86::EDX }, { X86::ECX,  X86::EDX }}
1608  };
1609  static const unsigned XMMArgRegs[] = {
1610    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1611  };
1612
1613  bool isFastCall = CC == CallingConv::X86_FastCall;
1614  unsigned GPRInd = isFastCall ? 1 : 0;
1615  for (unsigned i = 0; i != NumOps; ++i) {
1616    SDOperand Arg = Op.getOperand(5+2*i);
1617
1618    switch (Arg.getValueType()) {
1619    default: assert(0 && "Unknown value type!");
1620    case MVT::i8:
1621    case MVT::i16:
1622    case MVT::i32: {
1623     unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS);
1624     if (NumIntRegs < MaxNumIntRegs) {
1625       ++NumIntRegs;
1626       break;
1627     }
1628     } // Fall through
1629    case MVT::f32:
1630      NumBytes += 4;
1631      break;
1632    case MVT::f64:
1633      NumBytes += 8;
1634      break;
1635    case MVT::v16i8:
1636    case MVT::v8i16:
1637    case MVT::v4i32:
1638    case MVT::v2i64:
1639    case MVT::v4f32:
1640    case MVT::v2f64:
1641      assert(!isFastCall && "Unknown value type!");
1642      if (NumXMMRegs < 4)
1643        NumXMMRegs++;
1644      else {
1645        // XMM arguments have to be aligned on 16-byte boundary.
1646        NumBytes = ((NumBytes + 15) / 16) * 16;
1647        NumBytes += 16;
1648      }
1649      break;
1650    }
1651  }
1652
1653  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1654  // arguments and the arguments after the retaddr has been pushed are aligned.
1655  if ((NumBytes & 7) == 0)
1656    NumBytes += 4;
1657
1658  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1659
1660  // Arguments go on the stack in reverse order, as specified by the ABI.
1661  unsigned ArgOffset = 0;
1662  NumIntRegs = 0;
1663  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
1664  SmallVector<SDOperand, 8> MemOpChains;
1665  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1666  for (unsigned i = 0; i != NumOps; ++i) {
1667    SDOperand Arg = Op.getOperand(5+2*i);
1668
1669    switch (Arg.getValueType()) {
1670    default: assert(0 && "Unexpected ValueType for argument!");
1671    case MVT::i8:
1672    case MVT::i16:
1673    case MVT::i32: {
1674     unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS);
1675     if (NumIntRegs < MaxNumIntRegs) {
1676       unsigned RegToUse =
1677         GPRArgRegs[Arg.getValueType()-MVT::i8][GPRInd][NumIntRegs];
1678       RegsToPass.push_back(std::make_pair(RegToUse, Arg));
1679       ++NumIntRegs;
1680       break;
1681     }
1682    } // Fall through
1683    case MVT::f32: {
1684      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1685      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1686      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1687      ArgOffset += 4;
1688      break;
1689    }
1690    case MVT::f64: {
1691      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1692      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1693      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1694      ArgOffset += 8;
1695      break;
1696    }
1697    case MVT::v16i8:
1698    case MVT::v8i16:
1699    case MVT::v4i32:
1700    case MVT::v2i64:
1701    case MVT::v4f32:
1702    case MVT::v2f64:
1703      assert(!isFastCall && "Unexpected ValueType for argument!");
1704      if (NumXMMRegs < 4) {
1705        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
1706        NumXMMRegs++;
1707      } else {
1708        // XMM arguments have to be aligned on 16-byte boundary.
1709        ArgOffset = ((ArgOffset + 15) / 16) * 16;
1710        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1711        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1712        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1713        ArgOffset += 16;
1714      }
1715      break;
1716    }
1717  }
1718
1719  if (!MemOpChains.empty())
1720    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1721                        &MemOpChains[0], MemOpChains.size());
1722
1723  // Build a sequence of copy-to-reg nodes chained together with token chain
1724  // and flag operands which copy the outgoing args into registers.
1725  SDOperand InFlag;
1726  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1727    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1728                             InFlag);
1729    InFlag = Chain.getValue(1);
1730  }
1731
1732  // If the callee is a GlobalAddress node (quite common, every direct call is)
1733  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1734  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1735    // We should use extra load for direct calls to dllimported functions in
1736    // non-JIT mode.
1737    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
1738                                        getTargetMachine(), true))
1739      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1740  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1741    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1742
1743  // ELF / PIC requires GOT in the EBX register before function calls via PLT
1744  // GOT pointer.
1745  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1746      Subtarget->isPICStyleGOT()) {
1747    Chain = DAG.getCopyToReg(Chain, X86::EBX,
1748                             DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
1749                             InFlag);
1750    InFlag = Chain.getValue(1);
1751  }
1752
1753  // Returns a chain & a flag for retval copy to use.
1754  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1755  SmallVector<SDOperand, 8> Ops;
1756  Ops.push_back(Chain);
1757  Ops.push_back(Callee);
1758
1759  // Add argument registers to the end of the list so that they are known live
1760  // into the call.
1761  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1762    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1763                                  RegsToPass[i].second.getValueType()));
1764
1765  // Add an implicit use GOT pointer in EBX.
1766  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1767      Subtarget->isPICStyleGOT())
1768    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
1769
1770  if (InFlag.Val)
1771    Ops.push_back(InFlag);
1772
1773  // FIXME: Do not generate X86ISD::TAILCALL for now.
1774  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1775                      NodeTys, &Ops[0], Ops.size());
1776  InFlag = Chain.getValue(1);
1777
1778  // Returns a flag for retval copy to use.
1779  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1780  Ops.clear();
1781  Ops.push_back(Chain);
1782  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1783  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1784  Ops.push_back(InFlag);
1785  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1786  InFlag = Chain.getValue(1);
1787
1788  // Handle result values, copying them out of physregs into vregs that we
1789  // return.
1790  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
1791}
1792
1793SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
1794  if (ReturnAddrIndex == 0) {
1795    // Set up a frame object for the return address.
1796    MachineFunction &MF = DAG.getMachineFunction();
1797    if (Subtarget->is64Bit())
1798      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
1799    else
1800      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1801  }
1802
1803  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
1804}
1805
1806
1807
1808/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1809/// specific condition code. It returns a false if it cannot do a direct
1810/// translation. X86CC is the translated CondCode.  LHS/RHS are modified as
1811/// needed.
1812static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
1813                           unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
1814                           SelectionDAG &DAG) {
1815  X86CC = X86::COND_INVALID;
1816  if (!isFP) {
1817    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1818      if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
1819        // X > -1   -> X == 0, jump !sign.
1820        RHS = DAG.getConstant(0, RHS.getValueType());
1821        X86CC = X86::COND_NS;
1822        return true;
1823      } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
1824        // X < 0   -> X == 0, jump on sign.
1825        X86CC = X86::COND_S;
1826        return true;
1827      }
1828    }
1829
1830    switch (SetCCOpcode) {
1831    default: break;
1832    case ISD::SETEQ:  X86CC = X86::COND_E;  break;
1833    case ISD::SETGT:  X86CC = X86::COND_G;  break;
1834    case ISD::SETGE:  X86CC = X86::COND_GE; break;
1835    case ISD::SETLT:  X86CC = X86::COND_L;  break;
1836    case ISD::SETLE:  X86CC = X86::COND_LE; break;
1837    case ISD::SETNE:  X86CC = X86::COND_NE; break;
1838    case ISD::SETULT: X86CC = X86::COND_B;  break;
1839    case ISD::SETUGT: X86CC = X86::COND_A;  break;
1840    case ISD::SETULE: X86CC = X86::COND_BE; break;
1841    case ISD::SETUGE: X86CC = X86::COND_AE; break;
1842    }
1843  } else {
1844    // On a floating point condition, the flags are set as follows:
1845    // ZF  PF  CF   op
1846    //  0 | 0 | 0 | X > Y
1847    //  0 | 0 | 1 | X < Y
1848    //  1 | 0 | 0 | X == Y
1849    //  1 | 1 | 1 | unordered
1850    bool Flip = false;
1851    switch (SetCCOpcode) {
1852    default: break;
1853    case ISD::SETUEQ:
1854    case ISD::SETEQ: X86CC = X86::COND_E;  break;
1855    case ISD::SETOLT: Flip = true; // Fallthrough
1856    case ISD::SETOGT:
1857    case ISD::SETGT: X86CC = X86::COND_A;  break;
1858    case ISD::SETOLE: Flip = true; // Fallthrough
1859    case ISD::SETOGE:
1860    case ISD::SETGE: X86CC = X86::COND_AE; break;
1861    case ISD::SETUGT: Flip = true; // Fallthrough
1862    case ISD::SETULT:
1863    case ISD::SETLT: X86CC = X86::COND_B;  break;
1864    case ISD::SETUGE: Flip = true; // Fallthrough
1865    case ISD::SETULE:
1866    case ISD::SETLE: X86CC = X86::COND_BE; break;
1867    case ISD::SETONE:
1868    case ISD::SETNE: X86CC = X86::COND_NE; break;
1869    case ISD::SETUO: X86CC = X86::COND_P;  break;
1870    case ISD::SETO:  X86CC = X86::COND_NP; break;
1871    }
1872    if (Flip)
1873      std::swap(LHS, RHS);
1874  }
1875
1876  return X86CC != X86::COND_INVALID;
1877}
1878
1879/// hasFPCMov - is there a floating point cmov for the specific X86 condition
1880/// code. Current x86 isa includes the following FP cmov instructions:
1881/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1882static bool hasFPCMov(unsigned X86CC) {
1883  switch (X86CC) {
1884  default:
1885    return false;
1886  case X86::COND_B:
1887  case X86::COND_BE:
1888  case X86::COND_E:
1889  case X86::COND_P:
1890  case X86::COND_A:
1891  case X86::COND_AE:
1892  case X86::COND_NE:
1893  case X86::COND_NP:
1894    return true;
1895  }
1896}
1897
1898/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
1899/// true if Op is undef or if its value falls within the specified range (L, H].
1900static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
1901  if (Op.getOpcode() == ISD::UNDEF)
1902    return true;
1903
1904  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
1905  return (Val >= Low && Val < Hi);
1906}
1907
1908/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
1909/// true if Op is undef or if its value equal to the specified value.
1910static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
1911  if (Op.getOpcode() == ISD::UNDEF)
1912    return true;
1913  return cast<ConstantSDNode>(Op)->getValue() == Val;
1914}
1915
1916/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1917/// specifies a shuffle of elements that is suitable for input to PSHUFD.
1918bool X86::isPSHUFDMask(SDNode *N) {
1919  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1920
1921  if (N->getNumOperands() != 4)
1922    return false;
1923
1924  // Check if the value doesn't reference the second vector.
1925  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1926    SDOperand Arg = N->getOperand(i);
1927    if (Arg.getOpcode() == ISD::UNDEF) continue;
1928    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1929    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
1930      return false;
1931  }
1932
1933  return true;
1934}
1935
1936/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
1937/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
1938bool X86::isPSHUFHWMask(SDNode *N) {
1939  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1940
1941  if (N->getNumOperands() != 8)
1942    return false;
1943
1944  // Lower quadword copied in order.
1945  for (unsigned i = 0; i != 4; ++i) {
1946    SDOperand Arg = N->getOperand(i);
1947    if (Arg.getOpcode() == ISD::UNDEF) continue;
1948    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1949    if (cast<ConstantSDNode>(Arg)->getValue() != i)
1950      return false;
1951  }
1952
1953  // Upper quadword shuffled.
1954  for (unsigned i = 4; i != 8; ++i) {
1955    SDOperand Arg = N->getOperand(i);
1956    if (Arg.getOpcode() == ISD::UNDEF) continue;
1957    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1958    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1959    if (Val < 4 || Val > 7)
1960      return false;
1961  }
1962
1963  return true;
1964}
1965
1966/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
1967/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
1968bool X86::isPSHUFLWMask(SDNode *N) {
1969  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1970
1971  if (N->getNumOperands() != 8)
1972    return false;
1973
1974  // Upper quadword copied in order.
1975  for (unsigned i = 4; i != 8; ++i)
1976    if (!isUndefOrEqual(N->getOperand(i), i))
1977      return false;
1978
1979  // Lower quadword shuffled.
1980  for (unsigned i = 0; i != 4; ++i)
1981    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
1982      return false;
1983
1984  return true;
1985}
1986
1987/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
1988/// specifies a shuffle of elements that is suitable for input to SHUFP*.
1989static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) {
1990  if (NumElems != 2 && NumElems != 4) return false;
1991
1992  unsigned Half = NumElems / 2;
1993  for (unsigned i = 0; i < Half; ++i)
1994    if (!isUndefOrInRange(Elems[i], 0, NumElems))
1995      return false;
1996  for (unsigned i = Half; i < NumElems; ++i)
1997    if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
1998      return false;
1999
2000  return true;
2001}
2002
2003bool X86::isSHUFPMask(SDNode *N) {
2004  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2005  return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
2006}
2007
2008/// isCommutedSHUFP - Returns true if the shuffle mask is except
2009/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
2010/// half elements to come from vector 1 (which would equal the dest.) and
2011/// the upper half to come from vector 2.
2012static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) {
2013  if (NumOps != 2 && NumOps != 4) return false;
2014
2015  unsigned Half = NumOps / 2;
2016  for (unsigned i = 0; i < Half; ++i)
2017    if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
2018      return false;
2019  for (unsigned i = Half; i < NumOps; ++i)
2020    if (!isUndefOrInRange(Ops[i], 0, NumOps))
2021      return false;
2022  return true;
2023}
2024
2025static bool isCommutedSHUFP(SDNode *N) {
2026  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2027  return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
2028}
2029
2030/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
2031/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
2032bool X86::isMOVHLPSMask(SDNode *N) {
2033  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2034
2035  if (N->getNumOperands() != 4)
2036    return false;
2037
2038  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
2039  return isUndefOrEqual(N->getOperand(0), 6) &&
2040         isUndefOrEqual(N->getOperand(1), 7) &&
2041         isUndefOrEqual(N->getOperand(2), 2) &&
2042         isUndefOrEqual(N->getOperand(3), 3);
2043}
2044
2045/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
2046/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
2047/// <2, 3, 2, 3>
2048bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
2049  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2050
2051  if (N->getNumOperands() != 4)
2052    return false;
2053
2054  // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
2055  return isUndefOrEqual(N->getOperand(0), 2) &&
2056         isUndefOrEqual(N->getOperand(1), 3) &&
2057         isUndefOrEqual(N->getOperand(2), 2) &&
2058         isUndefOrEqual(N->getOperand(3), 3);
2059}
2060
2061/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
2062/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
2063bool X86::isMOVLPMask(SDNode *N) {
2064  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2065
2066  unsigned NumElems = N->getNumOperands();
2067  if (NumElems != 2 && NumElems != 4)
2068    return false;
2069
2070  for (unsigned i = 0; i < NumElems/2; ++i)
2071    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
2072      return false;
2073
2074  for (unsigned i = NumElems/2; i < NumElems; ++i)
2075    if (!isUndefOrEqual(N->getOperand(i), i))
2076      return false;
2077
2078  return true;
2079}
2080
2081/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
2082/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
2083/// and MOVLHPS.
2084bool X86::isMOVHPMask(SDNode *N) {
2085  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2086
2087  unsigned NumElems = N->getNumOperands();
2088  if (NumElems != 2 && NumElems != 4)
2089    return false;
2090
2091  for (unsigned i = 0; i < NumElems/2; ++i)
2092    if (!isUndefOrEqual(N->getOperand(i), i))
2093      return false;
2094
2095  for (unsigned i = 0; i < NumElems/2; ++i) {
2096    SDOperand Arg = N->getOperand(i + NumElems/2);
2097    if (!isUndefOrEqual(Arg, i + NumElems))
2098      return false;
2099  }
2100
2101  return true;
2102}
2103
2104/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
2105/// specifies a shuffle of elements that is suitable for input to UNPCKL.
2106bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts,
2107                         bool V2IsSplat = false) {
2108  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
2109    return false;
2110
2111  for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
2112    SDOperand BitI  = Elts[i];
2113    SDOperand BitI1 = Elts[i+1];
2114    if (!isUndefOrEqual(BitI, j))
2115      return false;
2116    if (V2IsSplat) {
2117      if (isUndefOrEqual(BitI1, NumElts))
2118        return false;
2119    } else {
2120      if (!isUndefOrEqual(BitI1, j + NumElts))
2121        return false;
2122    }
2123  }
2124
2125  return true;
2126}
2127
2128bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
2129  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2130  return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
2131}
2132
2133/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
2134/// specifies a shuffle of elements that is suitable for input to UNPCKH.
2135bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts,
2136                         bool V2IsSplat = false) {
2137  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
2138    return false;
2139
2140  for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
2141    SDOperand BitI  = Elts[i];
2142    SDOperand BitI1 = Elts[i+1];
2143    if (!isUndefOrEqual(BitI, j + NumElts/2))
2144      return false;
2145    if (V2IsSplat) {
2146      if (isUndefOrEqual(BitI1, NumElts))
2147        return false;
2148    } else {
2149      if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
2150        return false;
2151    }
2152  }
2153
2154  return true;
2155}
2156
2157bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
2158  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2159  return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
2160}
2161
2162/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
2163/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
2164/// <0, 0, 1, 1>
2165bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
2166  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2167
2168  unsigned NumElems = N->getNumOperands();
2169  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
2170    return false;
2171
2172  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2173    SDOperand BitI  = N->getOperand(i);
2174    SDOperand BitI1 = N->getOperand(i+1);
2175
2176    if (!isUndefOrEqual(BitI, j))
2177      return false;
2178    if (!isUndefOrEqual(BitI1, j))
2179      return false;
2180  }
2181
2182  return true;
2183}
2184
2185/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
2186/// specifies a shuffle of elements that is suitable for input to MOVSS,
2187/// MOVSD, and MOVD, i.e. setting the lowest element.
2188static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
2189  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
2190    return false;
2191
2192  if (!isUndefOrEqual(Elts[0], NumElts))
2193    return false;
2194
2195  for (unsigned i = 1; i < NumElts; ++i) {
2196    if (!isUndefOrEqual(Elts[i], i))
2197      return false;
2198  }
2199
2200  return true;
2201}
2202
2203bool X86::isMOVLMask(SDNode *N) {
2204  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2205  return ::isMOVLMask(N->op_begin(), N->getNumOperands());
2206}
2207
2208/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
2209/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
2210/// element of vector 2 and the other elements to come from vector 1 in order.
2211static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps,
2212                           bool V2IsSplat = false,
2213                           bool V2IsUndef = false) {
2214  if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
2215    return false;
2216
2217  if (!isUndefOrEqual(Ops[0], 0))
2218    return false;
2219
2220  for (unsigned i = 1; i < NumOps; ++i) {
2221    SDOperand Arg = Ops[i];
2222    if (!(isUndefOrEqual(Arg, i+NumOps) ||
2223          (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) ||
2224          (V2IsSplat && isUndefOrEqual(Arg, NumOps))))
2225      return false;
2226  }
2227
2228  return true;
2229}
2230
2231static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
2232                           bool V2IsUndef = false) {
2233  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2234  return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
2235                        V2IsSplat, V2IsUndef);
2236}
2237
2238/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2239/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
2240bool X86::isMOVSHDUPMask(SDNode *N) {
2241  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2242
2243  if (N->getNumOperands() != 4)
2244    return false;
2245
2246  // Expect 1, 1, 3, 3
2247  for (unsigned i = 0; i < 2; ++i) {
2248    SDOperand Arg = N->getOperand(i);
2249    if (Arg.getOpcode() == ISD::UNDEF) continue;
2250    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2251    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2252    if (Val != 1) return false;
2253  }
2254
2255  bool HasHi = false;
2256  for (unsigned i = 2; i < 4; ++i) {
2257    SDOperand Arg = N->getOperand(i);
2258    if (Arg.getOpcode() == ISD::UNDEF) continue;
2259    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2260    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2261    if (Val != 3) return false;
2262    HasHi = true;
2263  }
2264
2265  // Don't use movshdup if it can be done with a shufps.
2266  return HasHi;
2267}
2268
2269/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2270/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
2271bool X86::isMOVSLDUPMask(SDNode *N) {
2272  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2273
2274  if (N->getNumOperands() != 4)
2275    return false;
2276
2277  // Expect 0, 0, 2, 2
2278  for (unsigned i = 0; i < 2; ++i) {
2279    SDOperand Arg = N->getOperand(i);
2280    if (Arg.getOpcode() == ISD::UNDEF) continue;
2281    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2282    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2283    if (Val != 0) return false;
2284  }
2285
2286  bool HasHi = false;
2287  for (unsigned i = 2; i < 4; ++i) {
2288    SDOperand Arg = N->getOperand(i);
2289    if (Arg.getOpcode() == ISD::UNDEF) continue;
2290    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2291    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2292    if (Val != 2) return false;
2293    HasHi = true;
2294  }
2295
2296  // Don't use movshdup if it can be done with a shufps.
2297  return HasHi;
2298}
2299
2300/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2301/// a splat of a single element.
2302static bool isSplatMask(SDNode *N) {
2303  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2304
2305  // This is a splat operation if each element of the permute is the same, and
2306  // if the value doesn't reference the second vector.
2307  unsigned NumElems = N->getNumOperands();
2308  SDOperand ElementBase;
2309  unsigned i = 0;
2310  for (; i != NumElems; ++i) {
2311    SDOperand Elt = N->getOperand(i);
2312    if (isa<ConstantSDNode>(Elt)) {
2313      ElementBase = Elt;
2314      break;
2315    }
2316  }
2317
2318  if (!ElementBase.Val)
2319    return false;
2320
2321  for (; i != NumElems; ++i) {
2322    SDOperand Arg = N->getOperand(i);
2323    if (Arg.getOpcode() == ISD::UNDEF) continue;
2324    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2325    if (Arg != ElementBase) return false;
2326  }
2327
2328  // Make sure it is a splat of the first vector operand.
2329  return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
2330}
2331
2332/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2333/// a splat of a single element and it's a 2 or 4 element mask.
2334bool X86::isSplatMask(SDNode *N) {
2335  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2336
2337  // We can only splat 64-bit, and 32-bit quantities with a single instruction.
2338  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
2339    return false;
2340  return ::isSplatMask(N);
2341}
2342
2343/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
2344/// specifies a splat of zero element.
2345bool X86::isSplatLoMask(SDNode *N) {
2346  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2347
2348  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
2349    if (!isUndefOrEqual(N->getOperand(i), 0))
2350      return false;
2351  return true;
2352}
2353
2354/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
2355/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
2356/// instructions.
2357unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
2358  unsigned NumOperands = N->getNumOperands();
2359  unsigned Shift = (NumOperands == 4) ? 2 : 1;
2360  unsigned Mask = 0;
2361  for (unsigned i = 0; i < NumOperands; ++i) {
2362    unsigned Val = 0;
2363    SDOperand Arg = N->getOperand(NumOperands-i-1);
2364    if (Arg.getOpcode() != ISD::UNDEF)
2365      Val = cast<ConstantSDNode>(Arg)->getValue();
2366    if (Val >= NumOperands) Val -= NumOperands;
2367    Mask |= Val;
2368    if (i != NumOperands - 1)
2369      Mask <<= Shift;
2370  }
2371
2372  return Mask;
2373}
2374
2375/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
2376/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
2377/// instructions.
2378unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
2379  unsigned Mask = 0;
2380  // 8 nodes, but we only care about the last 4.
2381  for (unsigned i = 7; i >= 4; --i) {
2382    unsigned Val = 0;
2383    SDOperand Arg = N->getOperand(i);
2384    if (Arg.getOpcode() != ISD::UNDEF)
2385      Val = cast<ConstantSDNode>(Arg)->getValue();
2386    Mask |= (Val - 4);
2387    if (i != 4)
2388      Mask <<= 2;
2389  }
2390
2391  return Mask;
2392}
2393
2394/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
2395/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
2396/// instructions.
2397unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
2398  unsigned Mask = 0;
2399  // 8 nodes, but we only care about the first 4.
2400  for (int i = 3; i >= 0; --i) {
2401    unsigned Val = 0;
2402    SDOperand Arg = N->getOperand(i);
2403    if (Arg.getOpcode() != ISD::UNDEF)
2404      Val = cast<ConstantSDNode>(Arg)->getValue();
2405    Mask |= Val;
2406    if (i != 0)
2407      Mask <<= 2;
2408  }
2409
2410  return Mask;
2411}
2412
2413/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
2414/// specifies a 8 element shuffle that can be broken into a pair of
2415/// PSHUFHW and PSHUFLW.
2416static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
2417  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2418
2419  if (N->getNumOperands() != 8)
2420    return false;
2421
2422  // Lower quadword shuffled.
2423  for (unsigned i = 0; i != 4; ++i) {
2424    SDOperand Arg = N->getOperand(i);
2425    if (Arg.getOpcode() == ISD::UNDEF) continue;
2426    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2427    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2428    if (Val > 4)
2429      return false;
2430  }
2431
2432  // Upper quadword shuffled.
2433  for (unsigned i = 4; i != 8; ++i) {
2434    SDOperand Arg = N->getOperand(i);
2435    if (Arg.getOpcode() == ISD::UNDEF) continue;
2436    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2437    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2438    if (Val < 4 || Val > 7)
2439      return false;
2440  }
2441
2442  return true;
2443}
2444
2445/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
2446/// values in ther permute mask.
2447static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
2448                                      SDOperand &V2, SDOperand &Mask,
2449                                      SelectionDAG &DAG) {
2450  MVT::ValueType VT = Op.getValueType();
2451  MVT::ValueType MaskVT = Mask.getValueType();
2452  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
2453  unsigned NumElems = Mask.getNumOperands();
2454  SmallVector<SDOperand, 8> MaskVec;
2455
2456  for (unsigned i = 0; i != NumElems; ++i) {
2457    SDOperand Arg = Mask.getOperand(i);
2458    if (Arg.getOpcode() == ISD::UNDEF) {
2459      MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
2460      continue;
2461    }
2462    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2463    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2464    if (Val < NumElems)
2465      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
2466    else
2467      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
2468  }
2469
2470  std::swap(V1, V2);
2471  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2472  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2473}
2474
2475/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
2476/// match movhlps. The lower half elements should come from upper half of
2477/// V1 (and in order), and the upper half elements should come from the upper
2478/// half of V2 (and in order).
2479static bool ShouldXformToMOVHLPS(SDNode *Mask) {
2480  unsigned NumElems = Mask->getNumOperands();
2481  if (NumElems != 4)
2482    return false;
2483  for (unsigned i = 0, e = 2; i != e; ++i)
2484    if (!isUndefOrEqual(Mask->getOperand(i), i+2))
2485      return false;
2486  for (unsigned i = 2; i != 4; ++i)
2487    if (!isUndefOrEqual(Mask->getOperand(i), i+4))
2488      return false;
2489  return true;
2490}
2491
2492/// isScalarLoadToVector - Returns true if the node is a scalar load that
2493/// is promoted to a vector.
2494static inline bool isScalarLoadToVector(SDNode *N) {
2495  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
2496    N = N->getOperand(0).Val;
2497    return ISD::isNON_EXTLoad(N);
2498  }
2499  return false;
2500}
2501
2502/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
2503/// match movlp{s|d}. The lower half elements should come from lower half of
2504/// V1 (and in order), and the upper half elements should come from the upper
2505/// half of V2 (and in order). And since V1 will become the source of the
2506/// MOVLP, it must be either a vector load or a scalar load to vector.
2507static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) {
2508  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
2509    return false;
2510  // Is V2 is a vector load, don't do this transformation. We will try to use
2511  // load folding shufps op.
2512  if (ISD::isNON_EXTLoad(V2))
2513    return false;
2514
2515  unsigned NumElems = Mask->getNumOperands();
2516  if (NumElems != 2 && NumElems != 4)
2517    return false;
2518  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
2519    if (!isUndefOrEqual(Mask->getOperand(i), i))
2520      return false;
2521  for (unsigned i = NumElems/2; i != NumElems; ++i)
2522    if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
2523      return false;
2524  return true;
2525}
2526
2527/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
2528/// all the same.
2529static bool isSplatVector(SDNode *N) {
2530  if (N->getOpcode() != ISD::BUILD_VECTOR)
2531    return false;
2532
2533  SDOperand SplatValue = N->getOperand(0);
2534  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
2535    if (N->getOperand(i) != SplatValue)
2536      return false;
2537  return true;
2538}
2539
2540/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
2541/// to an undef.
2542static bool isUndefShuffle(SDNode *N) {
2543  if (N->getOpcode() != ISD::BUILD_VECTOR)
2544    return false;
2545
2546  SDOperand V1 = N->getOperand(0);
2547  SDOperand V2 = N->getOperand(1);
2548  SDOperand Mask = N->getOperand(2);
2549  unsigned NumElems = Mask.getNumOperands();
2550  for (unsigned i = 0; i != NumElems; ++i) {
2551    SDOperand Arg = Mask.getOperand(i);
2552    if (Arg.getOpcode() != ISD::UNDEF) {
2553      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2554      if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
2555        return false;
2556      else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
2557        return false;
2558    }
2559  }
2560  return true;
2561}
2562
2563/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
2564/// that point to V2 points to its first element.
2565static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
2566  assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
2567
2568  bool Changed = false;
2569  SmallVector<SDOperand, 8> MaskVec;
2570  unsigned NumElems = Mask.getNumOperands();
2571  for (unsigned i = 0; i != NumElems; ++i) {
2572    SDOperand Arg = Mask.getOperand(i);
2573    if (Arg.getOpcode() != ISD::UNDEF) {
2574      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2575      if (Val > NumElems) {
2576        Arg = DAG.getConstant(NumElems, Arg.getValueType());
2577        Changed = true;
2578      }
2579    }
2580    MaskVec.push_back(Arg);
2581  }
2582
2583  if (Changed)
2584    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
2585                       &MaskVec[0], MaskVec.size());
2586  return Mask;
2587}
2588
2589/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
2590/// operation of specified width.
2591static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
2592  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2593  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2594
2595  SmallVector<SDOperand, 8> MaskVec;
2596  MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
2597  for (unsigned i = 1; i != NumElems; ++i)
2598    MaskVec.push_back(DAG.getConstant(i, BaseVT));
2599  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2600}
2601
2602/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
2603/// of specified width.
2604static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
2605  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2606  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2607  SmallVector<SDOperand, 8> MaskVec;
2608  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
2609    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
2610    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
2611  }
2612  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2613}
2614
2615/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
2616/// of specified width.
2617static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
2618  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2619  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2620  unsigned Half = NumElems/2;
2621  SmallVector<SDOperand, 8> MaskVec;
2622  for (unsigned i = 0; i != Half; ++i) {
2623    MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
2624    MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
2625  }
2626  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2627}
2628
2629/// getZeroVector - Returns a vector of specified type with all zero elements.
2630///
2631static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
2632  assert(MVT::isVector(VT) && "Expected a vector type");
2633  unsigned NumElems = getVectorNumElements(VT);
2634  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2635  bool isFP = MVT::isFloatingPoint(EVT);
2636  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
2637  SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
2638  return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
2639}
2640
2641/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
2642///
2643static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
2644  SDOperand V1 = Op.getOperand(0);
2645  SDOperand Mask = Op.getOperand(2);
2646  MVT::ValueType VT = Op.getValueType();
2647  unsigned NumElems = Mask.getNumOperands();
2648  Mask = getUnpacklMask(NumElems, DAG);
2649  while (NumElems != 4) {
2650    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
2651    NumElems >>= 1;
2652  }
2653  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
2654
2655  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2656  Mask = getZeroVector(MaskVT, DAG);
2657  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
2658                                  DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
2659  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
2660}
2661
2662/// isZeroNode - Returns true if Elt is a constant zero or a floating point
2663/// constant +0.0.
2664static inline bool isZeroNode(SDOperand Elt) {
2665  return ((isa<ConstantSDNode>(Elt) &&
2666           cast<ConstantSDNode>(Elt)->getValue() == 0) ||
2667          (isa<ConstantFPSDNode>(Elt) &&
2668           cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
2669}
2670
2671/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
2672/// vector and zero or undef vector.
2673static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
2674                                             unsigned NumElems, unsigned Idx,
2675                                             bool isZero, SelectionDAG &DAG) {
2676  SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
2677  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2678  MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2679  SDOperand Zero = DAG.getConstant(0, EVT);
2680  SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
2681  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
2682  SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2683                               &MaskVec[0], MaskVec.size());
2684  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2685}
2686
2687/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
2688///
2689static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
2690                                       unsigned NumNonZero, unsigned NumZero,
2691                                       SelectionDAG &DAG, TargetLowering &TLI) {
2692  if (NumNonZero > 8)
2693    return SDOperand();
2694
2695  SDOperand V(0, 0);
2696  bool First = true;
2697  for (unsigned i = 0; i < 16; ++i) {
2698    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
2699    if (ThisIsNonZero && First) {
2700      if (NumZero)
2701        V = getZeroVector(MVT::v8i16, DAG);
2702      else
2703        V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2704      First = false;
2705    }
2706
2707    if ((i & 1) != 0) {
2708      SDOperand ThisElt(0, 0), LastElt(0, 0);
2709      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
2710      if (LastIsNonZero) {
2711        LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
2712      }
2713      if (ThisIsNonZero) {
2714        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
2715        ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
2716                              ThisElt, DAG.getConstant(8, MVT::i8));
2717        if (LastIsNonZero)
2718          ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
2719      } else
2720        ThisElt = LastElt;
2721
2722      if (ThisElt.Val)
2723        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
2724                        DAG.getConstant(i/2, TLI.getPointerTy()));
2725    }
2726  }
2727
2728  return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
2729}
2730
2731/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
2732///
2733static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
2734                                       unsigned NumNonZero, unsigned NumZero,
2735                                       SelectionDAG &DAG, TargetLowering &TLI) {
2736  if (NumNonZero > 4)
2737    return SDOperand();
2738
2739  SDOperand V(0, 0);
2740  bool First = true;
2741  for (unsigned i = 0; i < 8; ++i) {
2742    bool isNonZero = (NonZeros & (1 << i)) != 0;
2743    if (isNonZero) {
2744      if (First) {
2745        if (NumZero)
2746          V = getZeroVector(MVT::v8i16, DAG);
2747        else
2748          V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2749        First = false;
2750      }
2751      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
2752                      DAG.getConstant(i, TLI.getPointerTy()));
2753    }
2754  }
2755
2756  return V;
2757}
2758
2759SDOperand
2760X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2761  // All zero's are handled with pxor.
2762  if (ISD::isBuildVectorAllZeros(Op.Val))
2763    return Op;
2764
2765  // All one's are handled with pcmpeqd.
2766  if (ISD::isBuildVectorAllOnes(Op.Val))
2767    return Op;
2768
2769  MVT::ValueType VT = Op.getValueType();
2770  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2771  unsigned EVTBits = MVT::getSizeInBits(EVT);
2772
2773  unsigned NumElems = Op.getNumOperands();
2774  unsigned NumZero  = 0;
2775  unsigned NumNonZero = 0;
2776  unsigned NonZeros = 0;
2777  std::set<SDOperand> Values;
2778  for (unsigned i = 0; i < NumElems; ++i) {
2779    SDOperand Elt = Op.getOperand(i);
2780    if (Elt.getOpcode() != ISD::UNDEF) {
2781      Values.insert(Elt);
2782      if (isZeroNode(Elt))
2783        NumZero++;
2784      else {
2785        NonZeros |= (1 << i);
2786        NumNonZero++;
2787      }
2788    }
2789  }
2790
2791  if (NumNonZero == 0)
2792    // Must be a mix of zero and undef. Return a zero vector.
2793    return getZeroVector(VT, DAG);
2794
2795  // Splat is obviously ok. Let legalizer expand it to a shuffle.
2796  if (Values.size() == 1)
2797    return SDOperand();
2798
2799  // Special case for single non-zero element.
2800  if (NumNonZero == 1) {
2801    unsigned Idx = CountTrailingZeros_32(NonZeros);
2802    SDOperand Item = Op.getOperand(Idx);
2803    Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
2804    if (Idx == 0)
2805      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
2806      return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
2807                                         NumZero > 0, DAG);
2808
2809    if (EVTBits == 32) {
2810      // Turn it into a shuffle of zero and zero-extended scalar to vector.
2811      Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
2812                                         DAG);
2813      MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
2814      MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2815      SmallVector<SDOperand, 8> MaskVec;
2816      for (unsigned i = 0; i < NumElems; i++)
2817        MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
2818      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2819                                   &MaskVec[0], MaskVec.size());
2820      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
2821                         DAG.getNode(ISD::UNDEF, VT), Mask);
2822    }
2823  }
2824
2825  // Let legalizer expand 2-wide build_vector's.
2826  if (EVTBits == 64)
2827    return SDOperand();
2828
2829  // If element VT is < 32 bits, convert it to inserts into a zero vector.
2830  if (EVTBits == 8) {
2831    SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
2832                                        *this);
2833    if (V.Val) return V;
2834  }
2835
2836  if (EVTBits == 16) {
2837    SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
2838                                        *this);
2839    if (V.Val) return V;
2840  }
2841
2842  // If element VT is == 32 bits, turn it into a number of shuffles.
2843  SmallVector<SDOperand, 8> V;
2844  V.resize(NumElems);
2845  if (NumElems == 4 && NumZero > 0) {
2846    for (unsigned i = 0; i < 4; ++i) {
2847      bool isZero = !(NonZeros & (1 << i));
2848      if (isZero)
2849        V[i] = getZeroVector(VT, DAG);
2850      else
2851        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2852    }
2853
2854    for (unsigned i = 0; i < 2; ++i) {
2855      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
2856        default: break;
2857        case 0:
2858          V[i] = V[i*2];  // Must be a zero vector.
2859          break;
2860        case 1:
2861          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
2862                             getMOVLMask(NumElems, DAG));
2863          break;
2864        case 2:
2865          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2866                             getMOVLMask(NumElems, DAG));
2867          break;
2868        case 3:
2869          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2870                             getUnpacklMask(NumElems, DAG));
2871          break;
2872      }
2873    }
2874
2875    // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
2876    // clears the upper bits.
2877    // FIXME: we can do the same for v4f32 case when we know both parts of
2878    // the lower half come from scalar_to_vector (loadf32). We should do
2879    // that in post legalizer dag combiner with target specific hooks.
2880    if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
2881      return V[0];
2882    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2883    MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2884    SmallVector<SDOperand, 8> MaskVec;
2885    bool Reverse = (NonZeros & 0x3) == 2;
2886    for (unsigned i = 0; i < 2; ++i)
2887      if (Reverse)
2888        MaskVec.push_back(DAG.getConstant(1-i, EVT));
2889      else
2890        MaskVec.push_back(DAG.getConstant(i, EVT));
2891    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
2892    for (unsigned i = 0; i < 2; ++i)
2893      if (Reverse)
2894        MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
2895      else
2896        MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
2897    SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2898                                     &MaskVec[0], MaskVec.size());
2899    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
2900  }
2901
2902  if (Values.size() > 2) {
2903    // Expand into a number of unpckl*.
2904    // e.g. for v4f32
2905    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
2906    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
2907    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
2908    SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
2909    for (unsigned i = 0; i < NumElems; ++i)
2910      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2911    NumElems >>= 1;
2912    while (NumElems != 0) {
2913      for (unsigned i = 0; i < NumElems; ++i)
2914        V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
2915                           UnpckMask);
2916      NumElems >>= 1;
2917    }
2918    return V[0];
2919  }
2920
2921  return SDOperand();
2922}
2923
2924SDOperand
2925X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
2926  SDOperand V1 = Op.getOperand(0);
2927  SDOperand V2 = Op.getOperand(1);
2928  SDOperand PermMask = Op.getOperand(2);
2929  MVT::ValueType VT = Op.getValueType();
2930  unsigned NumElems = PermMask.getNumOperands();
2931  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
2932  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
2933  bool V1IsSplat = false;
2934  bool V2IsSplat = false;
2935
2936  if (isUndefShuffle(Op.Val))
2937    return DAG.getNode(ISD::UNDEF, VT);
2938
2939  if (isSplatMask(PermMask.Val)) {
2940    if (NumElems <= 4) return Op;
2941    // Promote it to a v4i32 splat.
2942    return PromoteSplat(Op, DAG);
2943  }
2944
2945  if (X86::isMOVLMask(PermMask.Val))
2946    return (V1IsUndef) ? V2 : Op;
2947
2948  if (X86::isMOVSHDUPMask(PermMask.Val) ||
2949      X86::isMOVSLDUPMask(PermMask.Val) ||
2950      X86::isMOVHLPSMask(PermMask.Val) ||
2951      X86::isMOVHPMask(PermMask.Val) ||
2952      X86::isMOVLPMask(PermMask.Val))
2953    return Op;
2954
2955  if (ShouldXformToMOVHLPS(PermMask.Val) ||
2956      ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
2957    return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2958
2959  bool Commuted = false;
2960  V1IsSplat = isSplatVector(V1.Val);
2961  V2IsSplat = isSplatVector(V2.Val);
2962  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
2963    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2964    std::swap(V1IsSplat, V2IsSplat);
2965    std::swap(V1IsUndef, V2IsUndef);
2966    Commuted = true;
2967  }
2968
2969  if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
2970    if (V2IsUndef) return V1;
2971    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2972    if (V2IsSplat) {
2973      // V2 is a splat, so the mask may be malformed. That is, it may point
2974      // to any V2 element. The instruction selectior won't like this. Get
2975      // a corrected mask and commute to form a proper MOVS{S|D}.
2976      SDOperand NewMask = getMOVLMask(NumElems, DAG);
2977      if (NewMask.Val != PermMask.Val)
2978        Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2979    }
2980    return Op;
2981  }
2982
2983  if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
2984      X86::isUNPCKLMask(PermMask.Val) ||
2985      X86::isUNPCKHMask(PermMask.Val))
2986    return Op;
2987
2988  if (V2IsSplat) {
2989    // Normalize mask so all entries that point to V2 points to its first
2990    // element then try to match unpck{h|l} again. If match, return a
2991    // new vector_shuffle with the corrected mask.
2992    SDOperand NewMask = NormalizeMask(PermMask, DAG);
2993    if (NewMask.Val != PermMask.Val) {
2994      if (X86::isUNPCKLMask(PermMask.Val, true)) {
2995        SDOperand NewMask = getUnpacklMask(NumElems, DAG);
2996        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2997      } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
2998        SDOperand NewMask = getUnpackhMask(NumElems, DAG);
2999        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3000      }
3001    }
3002  }
3003
3004  // Normalize the node to match x86 shuffle ops if needed
3005  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
3006      Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
3007
3008  if (Commuted) {
3009    // Commute is back and try unpck* again.
3010    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
3011    if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
3012        X86::isUNPCKLMask(PermMask.Val) ||
3013        X86::isUNPCKHMask(PermMask.Val))
3014      return Op;
3015  }
3016
3017  // If VT is integer, try PSHUF* first, then SHUFP*.
3018  if (MVT::isInteger(VT)) {
3019    if (X86::isPSHUFDMask(PermMask.Val) ||
3020        X86::isPSHUFHWMask(PermMask.Val) ||
3021        X86::isPSHUFLWMask(PermMask.Val)) {
3022      if (V2.getOpcode() != ISD::UNDEF)
3023        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3024                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3025      return Op;
3026    }
3027
3028    if (X86::isSHUFPMask(PermMask.Val))
3029      return Op;
3030
3031    // Handle v8i16 shuffle high / low shuffle node pair.
3032    if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
3033      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3034      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3035      SmallVector<SDOperand, 8> MaskVec;
3036      for (unsigned i = 0; i != 4; ++i)
3037        MaskVec.push_back(PermMask.getOperand(i));
3038      for (unsigned i = 4; i != 8; ++i)
3039        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3040      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3041                                   &MaskVec[0], MaskVec.size());
3042      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3043      MaskVec.clear();
3044      for (unsigned i = 0; i != 4; ++i)
3045        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3046      for (unsigned i = 4; i != 8; ++i)
3047        MaskVec.push_back(PermMask.getOperand(i));
3048      Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
3049      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3050    }
3051  } else {
3052    // Floating point cases in the other order.
3053    if (X86::isSHUFPMask(PermMask.Val))
3054      return Op;
3055    if (X86::isPSHUFDMask(PermMask.Val) ||
3056        X86::isPSHUFHWMask(PermMask.Val) ||
3057        X86::isPSHUFLWMask(PermMask.Val)) {
3058      if (V2.getOpcode() != ISD::UNDEF)
3059        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3060                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3061      return Op;
3062    }
3063  }
3064
3065  if (NumElems == 4) {
3066    MVT::ValueType MaskVT = PermMask.getValueType();
3067    MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
3068    SmallVector<std::pair<int, int>, 8> Locs;
3069    Locs.reserve(NumElems);
3070    SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3071    SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3072    unsigned NumHi = 0;
3073    unsigned NumLo = 0;
3074    // If no more than two elements come from either vector. This can be
3075    // implemented with two shuffles. First shuffle gather the elements.
3076    // The second shuffle, which takes the first shuffle as both of its
3077    // vector operands, put the elements into the right order.
3078    for (unsigned i = 0; i != NumElems; ++i) {
3079      SDOperand Elt = PermMask.getOperand(i);
3080      if (Elt.getOpcode() == ISD::UNDEF) {
3081        Locs[i] = std::make_pair(-1, -1);
3082      } else {
3083        unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
3084        if (Val < NumElems) {
3085          Locs[i] = std::make_pair(0, NumLo);
3086          Mask1[NumLo] = Elt;
3087          NumLo++;
3088        } else {
3089          Locs[i] = std::make_pair(1, NumHi);
3090          if (2+NumHi < NumElems)
3091            Mask1[2+NumHi] = Elt;
3092          NumHi++;
3093        }
3094      }
3095    }
3096    if (NumLo <= 2 && NumHi <= 2) {
3097      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3098                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3099                                   &Mask1[0], Mask1.size()));
3100      for (unsigned i = 0; i != NumElems; ++i) {
3101        if (Locs[i].first == -1)
3102          continue;
3103        else {
3104          unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
3105          Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
3106          Mask2[i] = DAG.getConstant(Idx, MaskEVT);
3107        }
3108      }
3109
3110      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
3111                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3112                                     &Mask2[0], Mask2.size()));
3113    }
3114
3115    // Break it into (shuffle shuffle_hi, shuffle_lo).
3116    Locs.clear();
3117    SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3118    SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3119    SmallVector<SDOperand,8> *MaskPtr = &LoMask;
3120    unsigned MaskIdx = 0;
3121    unsigned LoIdx = 0;
3122    unsigned HiIdx = NumElems/2;
3123    for (unsigned i = 0; i != NumElems; ++i) {
3124      if (i == NumElems/2) {
3125        MaskPtr = &HiMask;
3126        MaskIdx = 1;
3127        LoIdx = 0;
3128        HiIdx = NumElems/2;
3129      }
3130      SDOperand Elt = PermMask.getOperand(i);
3131      if (Elt.getOpcode() == ISD::UNDEF) {
3132        Locs[i] = std::make_pair(-1, -1);
3133      } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
3134        Locs[i] = std::make_pair(MaskIdx, LoIdx);
3135        (*MaskPtr)[LoIdx] = Elt;
3136        LoIdx++;
3137      } else {
3138        Locs[i] = std::make_pair(MaskIdx, HiIdx);
3139        (*MaskPtr)[HiIdx] = Elt;
3140        HiIdx++;
3141      }
3142    }
3143
3144    SDOperand LoShuffle =
3145      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3146                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3147                              &LoMask[0], LoMask.size()));
3148    SDOperand HiShuffle =
3149      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3150                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3151                              &HiMask[0], HiMask.size()));
3152    SmallVector<SDOperand, 8> MaskOps;
3153    for (unsigned i = 0; i != NumElems; ++i) {
3154      if (Locs[i].first == -1) {
3155        MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
3156      } else {
3157        unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
3158        MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
3159      }
3160    }
3161    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
3162                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3163                                   &MaskOps[0], MaskOps.size()));
3164  }
3165
3166  return SDOperand();
3167}
3168
3169SDOperand
3170X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3171  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3172    return SDOperand();
3173
3174  MVT::ValueType VT = Op.getValueType();
3175  // TODO: handle v16i8.
3176  if (MVT::getSizeInBits(VT) == 16) {
3177    // Transform it so it match pextrw which produces a 32-bit result.
3178    MVT::ValueType EVT = (MVT::ValueType)(VT+1);
3179    SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
3180                                    Op.getOperand(0), Op.getOperand(1));
3181    SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
3182                                    DAG.getValueType(VT));
3183    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
3184  } else if (MVT::getSizeInBits(VT) == 32) {
3185    SDOperand Vec = Op.getOperand(0);
3186    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3187    if (Idx == 0)
3188      return Op;
3189    // SHUFPS the element to the lowest double word, then movss.
3190    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3191    SmallVector<SDOperand, 8> IdxVec;
3192    IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
3193    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3194    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3195    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3196    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3197                                 &IdxVec[0], IdxVec.size());
3198    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3199                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
3200    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3201                       DAG.getConstant(0, getPointerTy()));
3202  } else if (MVT::getSizeInBits(VT) == 64) {
3203    SDOperand Vec = Op.getOperand(0);
3204    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3205    if (Idx == 0)
3206      return Op;
3207
3208    // UNPCKHPD the element to the lowest double word, then movsd.
3209    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
3210    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
3211    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3212    SmallVector<SDOperand, 8> IdxVec;
3213    IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
3214    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3215    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3216                                 &IdxVec[0], IdxVec.size());
3217    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3218                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
3219    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3220                       DAG.getConstant(0, getPointerTy()));
3221  }
3222
3223  return SDOperand();
3224}
3225
3226SDOperand
3227X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3228  // Transform it so it match pinsrw which expects a 16-bit value in a GR32
3229  // as its second argument.
3230  MVT::ValueType VT = Op.getValueType();
3231  MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
3232  SDOperand N0 = Op.getOperand(0);
3233  SDOperand N1 = Op.getOperand(1);
3234  SDOperand N2 = Op.getOperand(2);
3235  if (MVT::getSizeInBits(BaseVT) == 16) {
3236    if (N1.getValueType() != MVT::i32)
3237      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
3238    if (N2.getValueType() != MVT::i32)
3239      N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
3240    return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
3241  } else if (MVT::getSizeInBits(BaseVT) == 32) {
3242    unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
3243    if (Idx == 0) {
3244      // Use a movss.
3245      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
3246      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3247      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3248      SmallVector<SDOperand, 8> MaskVec;
3249      MaskVec.push_back(DAG.getConstant(4, BaseVT));
3250      for (unsigned i = 1; i <= 3; ++i)
3251        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3252      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
3253                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3254                                     &MaskVec[0], MaskVec.size()));
3255    } else {
3256      // Use two pinsrw instructions to insert a 32 bit value.
3257      Idx <<= 1;
3258      if (MVT::isFloatingPoint(N1.getValueType())) {
3259        if (ISD::isNON_EXTLoad(N1.Val)) {
3260          // Just load directly from f32mem to GR32.
3261          LoadSDNode *LD = cast<LoadSDNode>(N1);
3262          N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(),
3263                           LD->getSrcValue(), LD->getSrcValueOffset());
3264        } else {
3265          N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
3266          N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
3267          N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
3268                           DAG.getConstant(0, getPointerTy()));
3269        }
3270      }
3271      N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
3272      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3273                       DAG.getConstant(Idx, getPointerTy()));
3274      N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
3275      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3276                       DAG.getConstant(Idx+1, getPointerTy()));
3277      return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
3278    }
3279  }
3280
3281  return SDOperand();
3282}
3283
3284SDOperand
3285X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
3286  SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
3287  return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
3288}
3289
3290// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3291// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
3292// one of the above mentioned nodes. It has to be wrapped because otherwise
3293// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3294// be used to form addressing mode. These wrapped nodes will be selected
3295// into MOV32ri.
3296SDOperand
3297X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
3298  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3299  SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
3300                                               getPointerTy(),
3301                                               CP->getAlignment());
3302  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3303  // With PIC, the address is actually $g + Offset.
3304  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
3305      !Subtarget->isPICStyleRIPRel()) {
3306    Result = DAG.getNode(ISD::ADD, getPointerTy(),
3307                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3308                         Result);
3309  }
3310
3311  return Result;
3312}
3313
3314SDOperand
3315X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
3316  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3317  SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
3318  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3319  // With PIC, the address is actually $g + Offset.
3320  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
3321      !Subtarget->isPICStyleRIPRel()) {
3322    Result = DAG.getNode(ISD::ADD, getPointerTy(),
3323                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3324                         Result);
3325  }
3326
3327  // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
3328  // load the value at address GV, not the value of GV itself. This means that
3329  // the GlobalAddress must be in the base or index register of the address, not
3330  // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
3331  // The same applies for external symbols during PIC codegen
3332  if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
3333    Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
3334
3335  return Result;
3336}
3337
3338SDOperand
3339X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
3340  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
3341  SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
3342  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3343  // With PIC, the address is actually $g + Offset.
3344  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
3345      !Subtarget->isPICStyleRIPRel()) {
3346    Result = DAG.getNode(ISD::ADD, getPointerTy(),
3347                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3348                         Result);
3349  }
3350
3351  return Result;
3352}
3353
3354SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
3355  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3356  SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
3357  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3358  // With PIC, the address is actually $g + Offset.
3359  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
3360      !Subtarget->isPICStyleRIPRel()) {
3361    Result = DAG.getNode(ISD::ADD, getPointerTy(),
3362                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3363                         Result);
3364  }
3365
3366  return Result;
3367}
3368
3369SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
3370    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
3371           "Not an i64 shift!");
3372    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
3373    SDOperand ShOpLo = Op.getOperand(0);
3374    SDOperand ShOpHi = Op.getOperand(1);
3375    SDOperand ShAmt  = Op.getOperand(2);
3376    SDOperand Tmp1 = isSRA ?
3377      DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
3378      DAG.getConstant(0, MVT::i32);
3379
3380    SDOperand Tmp2, Tmp3;
3381    if (Op.getOpcode() == ISD::SHL_PARTS) {
3382      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
3383      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
3384    } else {
3385      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
3386      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
3387    }
3388
3389    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3390    SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
3391                                    DAG.getConstant(32, MVT::i8));
3392    SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
3393    SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
3394
3395    SDOperand Hi, Lo;
3396    SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
3397
3398    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
3399    SmallVector<SDOperand, 4> Ops;
3400    if (Op.getOpcode() == ISD::SHL_PARTS) {
3401      Ops.push_back(Tmp2);
3402      Ops.push_back(Tmp3);
3403      Ops.push_back(CC);
3404      Ops.push_back(InFlag);
3405      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3406      InFlag = Hi.getValue(1);
3407
3408      Ops.clear();
3409      Ops.push_back(Tmp3);
3410      Ops.push_back(Tmp1);
3411      Ops.push_back(CC);
3412      Ops.push_back(InFlag);
3413      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3414    } else {
3415      Ops.push_back(Tmp2);
3416      Ops.push_back(Tmp3);
3417      Ops.push_back(CC);
3418      Ops.push_back(InFlag);
3419      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3420      InFlag = Lo.getValue(1);
3421
3422      Ops.clear();
3423      Ops.push_back(Tmp3);
3424      Ops.push_back(Tmp1);
3425      Ops.push_back(CC);
3426      Ops.push_back(InFlag);
3427      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3428    }
3429
3430    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
3431    Ops.clear();
3432    Ops.push_back(Lo);
3433    Ops.push_back(Hi);
3434    return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
3435}
3436
3437SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
3438  assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
3439         Op.getOperand(0).getValueType() >= MVT::i16 &&
3440         "Unknown SINT_TO_FP to lower!");
3441
3442  SDOperand Result;
3443  MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
3444  unsigned Size = MVT::getSizeInBits(SrcVT)/8;
3445  MachineFunction &MF = DAG.getMachineFunction();
3446  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3447  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3448  SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
3449                                 StackSlot, NULL, 0);
3450
3451  // Build the FILD
3452  SDVTList Tys;
3453  if (X86ScalarSSE)
3454    Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
3455  else
3456    Tys = DAG.getVTList(MVT::f64, MVT::Other);
3457  SmallVector<SDOperand, 8> Ops;
3458  Ops.push_back(Chain);
3459  Ops.push_back(StackSlot);
3460  Ops.push_back(DAG.getValueType(SrcVT));
3461  Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
3462                       Tys, &Ops[0], Ops.size());
3463
3464  if (X86ScalarSSE) {
3465    Chain = Result.getValue(1);
3466    SDOperand InFlag = Result.getValue(2);
3467
3468    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
3469    // shouldn't be necessary except that RFP cannot be live across
3470    // multiple blocks. When stackifier is fixed, they can be uncoupled.
3471    MachineFunction &MF = DAG.getMachineFunction();
3472    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
3473    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3474    Tys = DAG.getVTList(MVT::Other);
3475    SmallVector<SDOperand, 8> Ops;
3476    Ops.push_back(Chain);
3477    Ops.push_back(Result);
3478    Ops.push_back(StackSlot);
3479    Ops.push_back(DAG.getValueType(Op.getValueType()));
3480    Ops.push_back(InFlag);
3481    Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
3482    Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
3483  }
3484
3485  return Result;
3486}
3487
3488SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
3489  assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
3490         "Unknown FP_TO_SINT to lower!");
3491  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
3492  // stack slot.
3493  MachineFunction &MF = DAG.getMachineFunction();
3494  unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
3495  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3496  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3497
3498  unsigned Opc;
3499  switch (Op.getValueType()) {
3500    default: assert(0 && "Invalid FP_TO_SINT to lower!");
3501    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
3502    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
3503    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
3504  }
3505
3506  SDOperand Chain = DAG.getEntryNode();
3507  SDOperand Value = Op.getOperand(0);
3508  if (X86ScalarSSE) {
3509    assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
3510    Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
3511    SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
3512    SDOperand Ops[] = {
3513      Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
3514    };
3515    Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
3516    Chain = Value.getValue(1);
3517    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3518    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3519  }
3520
3521  // Build the FP_TO_INT*_IN_MEM
3522  SDOperand Ops[] = { Chain, Value, StackSlot };
3523  SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
3524
3525  // Load the result.
3526  return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
3527}
3528
3529SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
3530  MVT::ValueType VT = Op.getValueType();
3531  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3532  std::vector<Constant*> CV;
3533  if (VT == MVT::f64) {
3534    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
3535    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3536  } else {
3537    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
3538    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3539    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3540    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3541  }
3542  Constant *CS = ConstantStruct::get(CV);
3543  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3544  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3545  SmallVector<SDOperand, 3> Ops;
3546  Ops.push_back(DAG.getEntryNode());
3547  Ops.push_back(CPIdx);
3548  Ops.push_back(DAG.getSrcValue(NULL));
3549  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3550  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
3551}
3552
3553SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
3554  MVT::ValueType VT = Op.getValueType();
3555  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3556  std::vector<Constant*> CV;
3557  if (VT == MVT::f64) {
3558    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
3559    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3560  } else {
3561    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
3562    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3563    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3564    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3565  }
3566  Constant *CS = ConstantStruct::get(CV);
3567  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3568  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3569  SmallVector<SDOperand, 3> Ops;
3570  Ops.push_back(DAG.getEntryNode());
3571  Ops.push_back(CPIdx);
3572  Ops.push_back(DAG.getSrcValue(NULL));
3573  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3574  return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
3575}
3576
3577SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
3578  SDOperand Op0 = Op.getOperand(0);
3579  SDOperand Op1 = Op.getOperand(1);
3580  MVT::ValueType VT = Op.getValueType();
3581  MVT::ValueType SrcVT = Op1.getValueType();
3582  const Type *SrcTy =  MVT::getTypeForValueType(SrcVT);
3583
3584  // If second operand is smaller, extend it first.
3585  if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
3586    Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
3587    SrcVT = VT;
3588  }
3589
3590  // First get the sign bit of second operand.
3591  std::vector<Constant*> CV;
3592  if (SrcVT == MVT::f64) {
3593    CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
3594    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3595  } else {
3596    CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
3597    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3598    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3599    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3600  }
3601  Constant *CS = ConstantStruct::get(CV);
3602  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3603  SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other);
3604  SmallVector<SDOperand, 3> Ops;
3605  Ops.push_back(DAG.getEntryNode());
3606  Ops.push_back(CPIdx);
3607  Ops.push_back(DAG.getSrcValue(NULL));
3608  SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3609  SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
3610
3611  // Shift sign bit right or left if the two operands have different types.
3612  if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
3613    // Op0 is MVT::f32, Op1 is MVT::f64.
3614    SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
3615    SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
3616                          DAG.getConstant(32, MVT::i32));
3617    SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
3618    SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
3619                          DAG.getConstant(0, getPointerTy()));
3620  }
3621
3622  // Clear first operand sign bit.
3623  CV.clear();
3624  if (VT == MVT::f64) {
3625    CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63))));
3626    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3627  } else {
3628    CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31))));
3629    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3630    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3631    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3632  }
3633  CS = ConstantStruct::get(CV);
3634  CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3635  Tys = DAG.getVTList(VT, MVT::Other);
3636  Ops.clear();
3637  Ops.push_back(DAG.getEntryNode());
3638  Ops.push_back(CPIdx);
3639  Ops.push_back(DAG.getSrcValue(NULL));
3640  SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3641  SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
3642
3643  // Or the value with the sign bit.
3644  return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
3645}
3646
3647SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
3648                                        SDOperand Chain) {
3649  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
3650  SDOperand Cond;
3651  SDOperand Op0 = Op.getOperand(0);
3652  SDOperand Op1 = Op.getOperand(1);
3653  SDOperand CC = Op.getOperand(2);
3654  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3655  const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3656  const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
3657  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
3658  unsigned X86CC;
3659
3660  if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
3661                     Op0, Op1, DAG)) {
3662    SDOperand Ops1[] = { Chain, Op0, Op1 };
3663    Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
3664    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
3665    return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
3666  }
3667
3668  assert(isFP && "Illegal integer SetCC!");
3669
3670  SDOperand COps[] = { Chain, Op0, Op1 };
3671  Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
3672
3673  switch (SetCCOpcode) {
3674  default: assert(false && "Illegal floating point SetCC!");
3675  case ISD::SETOEQ: {  // !PF & ZF
3676    SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
3677    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
3678    SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
3679                         Tmp1.getValue(1) };
3680    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
3681    return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
3682  }
3683  case ISD::SETUNE: {  // PF | !ZF
3684    SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
3685    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
3686    SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
3687                         Tmp1.getValue(1) };
3688    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
3689    return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
3690  }
3691  }
3692}
3693
3694SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
3695  bool addTest = true;
3696  SDOperand Chain = DAG.getEntryNode();
3697  SDOperand Cond  = Op.getOperand(0);
3698  SDOperand CC;
3699  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3700
3701  if (Cond.getOpcode() == ISD::SETCC)
3702    Cond = LowerSETCC(Cond, DAG, Chain);
3703
3704  if (Cond.getOpcode() == X86ISD::SETCC) {
3705    CC = Cond.getOperand(0);
3706
3707    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3708    // (since flag operand cannot be shared). Use it as the condition setting
3709    // operand in place of the X86ISD::SETCC.
3710    // If the X86ISD::SETCC has more than one use, then perhaps it's better
3711    // to use a test instead of duplicating the X86ISD::CMP (for register
3712    // pressure reason)?
3713    SDOperand Cmp = Cond.getOperand(1);
3714    unsigned Opc = Cmp.getOpcode();
3715    bool IllegalFPCMov = !X86ScalarSSE &&
3716      MVT::isFloatingPoint(Op.getValueType()) &&
3717      !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
3718    if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) &&
3719        !IllegalFPCMov) {
3720      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
3721      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
3722      addTest = false;
3723    }
3724  }
3725
3726  if (addTest) {
3727    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
3728    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
3729    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
3730  }
3731
3732  VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
3733  SmallVector<SDOperand, 4> Ops;
3734  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
3735  // condition is true.
3736  Ops.push_back(Op.getOperand(2));
3737  Ops.push_back(Op.getOperand(1));
3738  Ops.push_back(CC);
3739  Ops.push_back(Cond.getValue(1));
3740  return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3741}
3742
3743SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
3744  bool addTest = true;
3745  SDOperand Chain = Op.getOperand(0);
3746  SDOperand Cond  = Op.getOperand(1);
3747  SDOperand Dest  = Op.getOperand(2);
3748  SDOperand CC;
3749  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3750
3751  if (Cond.getOpcode() == ISD::SETCC)
3752    Cond = LowerSETCC(Cond, DAG, Chain);
3753
3754  if (Cond.getOpcode() == X86ISD::SETCC) {
3755    CC = Cond.getOperand(0);
3756
3757    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3758    // (since flag operand cannot be shared). Use it as the condition setting
3759    // operand in place of the X86ISD::SETCC.
3760    // If the X86ISD::SETCC has more than one use, then perhaps it's better
3761    // to use a test instead of duplicating the X86ISD::CMP (for register
3762    // pressure reason)?
3763    SDOperand Cmp = Cond.getOperand(1);
3764    unsigned Opc = Cmp.getOpcode();
3765    if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) {
3766      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
3767      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
3768      addTest = false;
3769    }
3770  }
3771
3772  if (addTest) {
3773    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
3774    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
3775    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
3776  }
3777  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
3778                     Cond, Op.getOperand(2), CC, Cond.getValue(1));
3779}
3780
3781SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
3782  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3783
3784  if (Subtarget->is64Bit())
3785    return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
3786  else
3787    switch (CallingConv) {
3788    default:
3789      assert(0 && "Unsupported calling convention");
3790    case CallingConv::Fast:
3791      if (EnableFastCC)
3792        return LowerFastCCCallTo(Op, DAG, CallingConv);
3793      // Falls through
3794    case CallingConv::C:
3795    case CallingConv::X86_StdCall:
3796      return LowerCCCCallTo(Op, DAG, CallingConv);
3797    case CallingConv::X86_FastCall:
3798      return LowerFastCCCallTo(Op, DAG, CallingConv);
3799    }
3800}
3801
3802SDOperand
3803X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
3804  MachineFunction &MF = DAG.getMachineFunction();
3805  const Function* Fn = MF.getFunction();
3806  if (Fn->hasExternalLinkage() &&
3807      Subtarget->isTargetCygMing() &&
3808      Fn->getName() == "main")
3809    MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true);
3810
3811  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3812  if (Subtarget->is64Bit())
3813    return LowerX86_64CCCArguments(Op, DAG);
3814  else
3815    switch(CC) {
3816    default:
3817      assert(0 && "Unsupported calling convention");
3818    case CallingConv::Fast:
3819      if (EnableFastCC) {
3820        return LowerFastCCArguments(Op, DAG);
3821      }
3822      // Falls through
3823    case CallingConv::C:
3824      return LowerCCCArguments(Op, DAG);
3825    case CallingConv::X86_StdCall:
3826      MF.getInfo<X86FunctionInfo>()->setDecorationStyle(StdCall);
3827      return LowerCCCArguments(Op, DAG, true);
3828    case CallingConv::X86_FastCall:
3829      MF.getInfo<X86FunctionInfo>()->setDecorationStyle(FastCall);
3830      return LowerFastCCArguments(Op, DAG, true);
3831    }
3832}
3833
3834SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
3835  SDOperand InFlag(0, 0);
3836  SDOperand Chain = Op.getOperand(0);
3837  unsigned Align =
3838    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3839  if (Align == 0) Align = 1;
3840
3841  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3842  // If not DWORD aligned, call memset if size is less than the threshold.
3843  // It knows how to align to the right boundary first.
3844  if ((Align & 3) != 0 ||
3845      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3846    MVT::ValueType IntPtr = getPointerTy();
3847    const Type *IntPtrTy = getTargetData()->getIntPtrType();
3848    TargetLowering::ArgListTy Args;
3849    TargetLowering::ArgListEntry Entry;
3850    Entry.Node = Op.getOperand(1);
3851    Entry.Ty = IntPtrTy;
3852    Entry.isSigned = false;
3853    Entry.isInReg = false;
3854    Entry.isSRet = false;
3855    Args.push_back(Entry);
3856    // Extend the unsigned i8 argument to be an int value for the call.
3857    Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
3858    Entry.Ty = IntPtrTy;
3859    Entry.isSigned = false;
3860    Entry.isInReg = false;
3861    Entry.isSRet = false;
3862    Args.push_back(Entry);
3863    Entry.Node = Op.getOperand(3);
3864    Args.push_back(Entry);
3865    std::pair<SDOperand,SDOperand> CallResult =
3866      LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
3867                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
3868    return CallResult.second;
3869  }
3870
3871  MVT::ValueType AVT;
3872  SDOperand Count;
3873  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3874  unsigned BytesLeft = 0;
3875  bool TwoRepStos = false;
3876  if (ValC) {
3877    unsigned ValReg;
3878    uint64_t Val = ValC->getValue() & 255;
3879
3880    // If the value is a constant, then we can potentially use larger sets.
3881    switch (Align & 3) {
3882      case 2:   // WORD aligned
3883        AVT = MVT::i16;
3884        ValReg = X86::AX;
3885        Val = (Val << 8) | Val;
3886        break;
3887      case 0:  // DWORD aligned
3888        AVT = MVT::i32;
3889        ValReg = X86::EAX;
3890        Val = (Val << 8)  | Val;
3891        Val = (Val << 16) | Val;
3892        if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) {  // QWORD aligned
3893          AVT = MVT::i64;
3894          ValReg = X86::RAX;
3895          Val = (Val << 32) | Val;
3896        }
3897        break;
3898      default:  // Byte aligned
3899        AVT = MVT::i8;
3900        ValReg = X86::AL;
3901        Count = Op.getOperand(3);
3902        break;
3903    }
3904
3905    if (AVT > MVT::i8) {
3906      if (I) {
3907        unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
3908        Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
3909        BytesLeft = I->getValue() % UBytes;
3910      } else {
3911        assert(AVT >= MVT::i32 &&
3912               "Do not use rep;stos if not at least DWORD aligned");
3913        Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
3914                            Op.getOperand(3), DAG.getConstant(2, MVT::i8));
3915        TwoRepStos = true;
3916      }
3917    }
3918
3919    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
3920                              InFlag);
3921    InFlag = Chain.getValue(1);
3922  } else {
3923    AVT = MVT::i8;
3924    Count  = Op.getOperand(3);
3925    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
3926    InFlag = Chain.getValue(1);
3927  }
3928
3929  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
3930                            Count, InFlag);
3931  InFlag = Chain.getValue(1);
3932  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
3933                            Op.getOperand(1), InFlag);
3934  InFlag = Chain.getValue(1);
3935
3936  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3937  SmallVector<SDOperand, 8> Ops;
3938  Ops.push_back(Chain);
3939  Ops.push_back(DAG.getValueType(AVT));
3940  Ops.push_back(InFlag);
3941  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
3942
3943  if (TwoRepStos) {
3944    InFlag = Chain.getValue(1);
3945    Count = Op.getOperand(3);
3946    MVT::ValueType CVT = Count.getValueType();
3947    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3948                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
3949    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
3950                              Left, InFlag);
3951    InFlag = Chain.getValue(1);
3952    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3953    Ops.clear();
3954    Ops.push_back(Chain);
3955    Ops.push_back(DAG.getValueType(MVT::i8));
3956    Ops.push_back(InFlag);
3957    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
3958  } else if (BytesLeft) {
3959    // Issue stores for the last 1 - 7 bytes.
3960    SDOperand Value;
3961    unsigned Val = ValC->getValue() & 255;
3962    unsigned Offset = I->getValue() - BytesLeft;
3963    SDOperand DstAddr = Op.getOperand(1);
3964    MVT::ValueType AddrVT = DstAddr.getValueType();
3965    if (BytesLeft >= 4) {
3966      Val = (Val << 8)  | Val;
3967      Val = (Val << 16) | Val;
3968      Value = DAG.getConstant(Val, MVT::i32);
3969      Chain = DAG.getStore(Chain, Value,
3970                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3971                                       DAG.getConstant(Offset, AddrVT)),
3972                           NULL, 0);
3973      BytesLeft -= 4;
3974      Offset += 4;
3975    }
3976    if (BytesLeft >= 2) {
3977      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
3978      Chain = DAG.getStore(Chain, Value,
3979                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3980                                       DAG.getConstant(Offset, AddrVT)),
3981                           NULL, 0);
3982      BytesLeft -= 2;
3983      Offset += 2;
3984    }
3985    if (BytesLeft == 1) {
3986      Value = DAG.getConstant(Val, MVT::i8);
3987      Chain = DAG.getStore(Chain, Value,
3988                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3989                                       DAG.getConstant(Offset, AddrVT)),
3990                           NULL, 0);
3991    }
3992  }
3993
3994  return Chain;
3995}
3996
3997SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
3998  SDOperand Chain = Op.getOperand(0);
3999  unsigned Align =
4000    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
4001  if (Align == 0) Align = 1;
4002
4003  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
4004  // If not DWORD aligned, call memcpy if size is less than the threshold.
4005  // It knows how to align to the right boundary first.
4006  if ((Align & 3) != 0 ||
4007      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
4008    MVT::ValueType IntPtr = getPointerTy();
4009    TargetLowering::ArgListTy Args;
4010    TargetLowering::ArgListEntry Entry;
4011    Entry.Ty = getTargetData()->getIntPtrType();
4012    Entry.isSigned = false;
4013    Entry.isInReg = false;
4014    Entry.isSRet = false;
4015    Entry.Node = Op.getOperand(1); Args.push_back(Entry);
4016    Entry.Node = Op.getOperand(2); Args.push_back(Entry);
4017    Entry.Node = Op.getOperand(3); Args.push_back(Entry);
4018    std::pair<SDOperand,SDOperand> CallResult =
4019      LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
4020                  DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
4021    return CallResult.second;
4022  }
4023
4024  MVT::ValueType AVT;
4025  SDOperand Count;
4026  unsigned BytesLeft = 0;
4027  bool TwoRepMovs = false;
4028  switch (Align & 3) {
4029    case 2:   // WORD aligned
4030      AVT = MVT::i16;
4031      break;
4032    case 0:  // DWORD aligned
4033      AVT = MVT::i32;
4034      if (Subtarget->is64Bit() && ((Align & 0xF) == 0))  // QWORD aligned
4035        AVT = MVT::i64;
4036      break;
4037    default:  // Byte aligned
4038      AVT = MVT::i8;
4039      Count = Op.getOperand(3);
4040      break;
4041  }
4042
4043  if (AVT > MVT::i8) {
4044    if (I) {
4045      unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
4046      Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
4047      BytesLeft = I->getValue() % UBytes;
4048    } else {
4049      assert(AVT >= MVT::i32 &&
4050             "Do not use rep;movs if not at least DWORD aligned");
4051      Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
4052                          Op.getOperand(3), DAG.getConstant(2, MVT::i8));
4053      TwoRepMovs = true;
4054    }
4055  }
4056
4057  SDOperand InFlag(0, 0);
4058  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
4059                            Count, InFlag);
4060  InFlag = Chain.getValue(1);
4061  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4062                            Op.getOperand(1), InFlag);
4063  InFlag = Chain.getValue(1);
4064  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
4065                            Op.getOperand(2), InFlag);
4066  InFlag = Chain.getValue(1);
4067
4068  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
4069  SmallVector<SDOperand, 8> Ops;
4070  Ops.push_back(Chain);
4071  Ops.push_back(DAG.getValueType(AVT));
4072  Ops.push_back(InFlag);
4073  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
4074
4075  if (TwoRepMovs) {
4076    InFlag = Chain.getValue(1);
4077    Count = Op.getOperand(3);
4078    MVT::ValueType CVT = Count.getValueType();
4079    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
4080                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
4081    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
4082                              Left, InFlag);
4083    InFlag = Chain.getValue(1);
4084    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
4085    Ops.clear();
4086    Ops.push_back(Chain);
4087    Ops.push_back(DAG.getValueType(MVT::i8));
4088    Ops.push_back(InFlag);
4089    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
4090  } else if (BytesLeft) {
4091    // Issue loads and stores for the last 1 - 7 bytes.
4092    unsigned Offset = I->getValue() - BytesLeft;
4093    SDOperand DstAddr = Op.getOperand(1);
4094    MVT::ValueType DstVT = DstAddr.getValueType();
4095    SDOperand SrcAddr = Op.getOperand(2);
4096    MVT::ValueType SrcVT = SrcAddr.getValueType();
4097    SDOperand Value;
4098    if (BytesLeft >= 4) {
4099      Value = DAG.getLoad(MVT::i32, Chain,
4100                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4101                                      DAG.getConstant(Offset, SrcVT)),
4102                          NULL, 0);
4103      Chain = Value.getValue(1);
4104      Chain = DAG.getStore(Chain, Value,
4105                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
4106                                       DAG.getConstant(Offset, DstVT)),
4107                           NULL, 0);
4108      BytesLeft -= 4;
4109      Offset += 4;
4110    }
4111    if (BytesLeft >= 2) {
4112      Value = DAG.getLoad(MVT::i16, Chain,
4113                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4114                                      DAG.getConstant(Offset, SrcVT)),
4115                          NULL, 0);
4116      Chain = Value.getValue(1);
4117      Chain = DAG.getStore(Chain, Value,
4118                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
4119                                       DAG.getConstant(Offset, DstVT)),
4120                           NULL, 0);
4121      BytesLeft -= 2;
4122      Offset += 2;
4123    }
4124
4125    if (BytesLeft == 1) {
4126      Value = DAG.getLoad(MVT::i8, Chain,
4127                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4128                                      DAG.getConstant(Offset, SrcVT)),
4129                          NULL, 0);
4130      Chain = Value.getValue(1);
4131      Chain = DAG.getStore(Chain, Value,
4132                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
4133                                       DAG.getConstant(Offset, DstVT)),
4134                           NULL, 0);
4135    }
4136  }
4137
4138  return Chain;
4139}
4140
4141SDOperand
4142X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
4143  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
4144  SDOperand TheOp = Op.getOperand(0);
4145  SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
4146  if (Subtarget->is64Bit()) {
4147    SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
4148    SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
4149                                         MVT::i64, Copy1.getValue(2));
4150    SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
4151                                DAG.getConstant(32, MVT::i8));
4152    SDOperand Ops[] = {
4153      DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
4154    };
4155
4156    Tys = DAG.getVTList(MVT::i64, MVT::Other);
4157    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
4158  }
4159
4160  SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
4161  SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
4162                                       MVT::i32, Copy1.getValue(2));
4163  SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
4164  Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
4165  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
4166}
4167
4168SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
4169  SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
4170
4171  if (!Subtarget->is64Bit()) {
4172    // vastart just stores the address of the VarArgsFrameIndex slot into the
4173    // memory location argument.
4174    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4175    return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
4176                        SV->getOffset());
4177  }
4178
4179  // __va_list_tag:
4180  //   gp_offset         (0 - 6 * 8)
4181  //   fp_offset         (48 - 48 + 8 * 16)
4182  //   overflow_arg_area (point to parameters coming in memory).
4183  //   reg_save_area
4184  SmallVector<SDOperand, 8> MemOps;
4185  SDOperand FIN = Op.getOperand(1);
4186  // Store gp_offset
4187  SDOperand Store = DAG.getStore(Op.getOperand(0),
4188                                 DAG.getConstant(VarArgsGPOffset, MVT::i32),
4189                                 FIN, SV->getValue(), SV->getOffset());
4190  MemOps.push_back(Store);
4191
4192  // Store fp_offset
4193  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4194                    DAG.getConstant(4, getPointerTy()));
4195  Store = DAG.getStore(Op.getOperand(0),
4196                       DAG.getConstant(VarArgsFPOffset, MVT::i32),
4197                       FIN, SV->getValue(), SV->getOffset());
4198  MemOps.push_back(Store);
4199
4200  // Store ptr to overflow_arg_area
4201  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4202                    DAG.getConstant(4, getPointerTy()));
4203  SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4204  Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
4205                       SV->getOffset());
4206  MemOps.push_back(Store);
4207
4208  // Store ptr to reg_save_area.
4209  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4210                    DAG.getConstant(8, getPointerTy()));
4211  SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
4212  Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
4213                       SV->getOffset());
4214  MemOps.push_back(Store);
4215  return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
4216}
4217
4218SDOperand
4219X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
4220  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
4221  switch (IntNo) {
4222  default: return SDOperand();    // Don't custom lower most intrinsics.
4223    // Comparison intrinsics.
4224  case Intrinsic::x86_sse_comieq_ss:
4225  case Intrinsic::x86_sse_comilt_ss:
4226  case Intrinsic::x86_sse_comile_ss:
4227  case Intrinsic::x86_sse_comigt_ss:
4228  case Intrinsic::x86_sse_comige_ss:
4229  case Intrinsic::x86_sse_comineq_ss:
4230  case Intrinsic::x86_sse_ucomieq_ss:
4231  case Intrinsic::x86_sse_ucomilt_ss:
4232  case Intrinsic::x86_sse_ucomile_ss:
4233  case Intrinsic::x86_sse_ucomigt_ss:
4234  case Intrinsic::x86_sse_ucomige_ss:
4235  case Intrinsic::x86_sse_ucomineq_ss:
4236  case Intrinsic::x86_sse2_comieq_sd:
4237  case Intrinsic::x86_sse2_comilt_sd:
4238  case Intrinsic::x86_sse2_comile_sd:
4239  case Intrinsic::x86_sse2_comigt_sd:
4240  case Intrinsic::x86_sse2_comige_sd:
4241  case Intrinsic::x86_sse2_comineq_sd:
4242  case Intrinsic::x86_sse2_ucomieq_sd:
4243  case Intrinsic::x86_sse2_ucomilt_sd:
4244  case Intrinsic::x86_sse2_ucomile_sd:
4245  case Intrinsic::x86_sse2_ucomigt_sd:
4246  case Intrinsic::x86_sse2_ucomige_sd:
4247  case Intrinsic::x86_sse2_ucomineq_sd: {
4248    unsigned Opc = 0;
4249    ISD::CondCode CC = ISD::SETCC_INVALID;
4250    switch (IntNo) {
4251    default: break;
4252    case Intrinsic::x86_sse_comieq_ss:
4253    case Intrinsic::x86_sse2_comieq_sd:
4254      Opc = X86ISD::COMI;
4255      CC = ISD::SETEQ;
4256      break;
4257    case Intrinsic::x86_sse_comilt_ss:
4258    case Intrinsic::x86_sse2_comilt_sd:
4259      Opc = X86ISD::COMI;
4260      CC = ISD::SETLT;
4261      break;
4262    case Intrinsic::x86_sse_comile_ss:
4263    case Intrinsic::x86_sse2_comile_sd:
4264      Opc = X86ISD::COMI;
4265      CC = ISD::SETLE;
4266      break;
4267    case Intrinsic::x86_sse_comigt_ss:
4268    case Intrinsic::x86_sse2_comigt_sd:
4269      Opc = X86ISD::COMI;
4270      CC = ISD::SETGT;
4271      break;
4272    case Intrinsic::x86_sse_comige_ss:
4273    case Intrinsic::x86_sse2_comige_sd:
4274      Opc = X86ISD::COMI;
4275      CC = ISD::SETGE;
4276      break;
4277    case Intrinsic::x86_sse_comineq_ss:
4278    case Intrinsic::x86_sse2_comineq_sd:
4279      Opc = X86ISD::COMI;
4280      CC = ISD::SETNE;
4281      break;
4282    case Intrinsic::x86_sse_ucomieq_ss:
4283    case Intrinsic::x86_sse2_ucomieq_sd:
4284      Opc = X86ISD::UCOMI;
4285      CC = ISD::SETEQ;
4286      break;
4287    case Intrinsic::x86_sse_ucomilt_ss:
4288    case Intrinsic::x86_sse2_ucomilt_sd:
4289      Opc = X86ISD::UCOMI;
4290      CC = ISD::SETLT;
4291      break;
4292    case Intrinsic::x86_sse_ucomile_ss:
4293    case Intrinsic::x86_sse2_ucomile_sd:
4294      Opc = X86ISD::UCOMI;
4295      CC = ISD::SETLE;
4296      break;
4297    case Intrinsic::x86_sse_ucomigt_ss:
4298    case Intrinsic::x86_sse2_ucomigt_sd:
4299      Opc = X86ISD::UCOMI;
4300      CC = ISD::SETGT;
4301      break;
4302    case Intrinsic::x86_sse_ucomige_ss:
4303    case Intrinsic::x86_sse2_ucomige_sd:
4304      Opc = X86ISD::UCOMI;
4305      CC = ISD::SETGE;
4306      break;
4307    case Intrinsic::x86_sse_ucomineq_ss:
4308    case Intrinsic::x86_sse2_ucomineq_sd:
4309      Opc = X86ISD::UCOMI;
4310      CC = ISD::SETNE;
4311      break;
4312    }
4313
4314    unsigned X86CC;
4315    SDOperand LHS = Op.getOperand(1);
4316    SDOperand RHS = Op.getOperand(2);
4317    translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
4318
4319    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
4320    SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
4321    SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
4322    VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
4323    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
4324    SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
4325    return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
4326  }
4327  }
4328}
4329
4330SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
4331  // Depths > 0 not supported yet!
4332  if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
4333    return SDOperand();
4334
4335  // Just load the return address
4336  SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
4337  return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
4338}
4339
4340SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
4341  // Depths > 0 not supported yet!
4342  if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
4343    return SDOperand();
4344
4345  SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
4346  return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
4347                     DAG.getConstant(4, getPointerTy()));
4348}
4349
4350/// LowerOperation - Provide custom lowering hooks for some operations.
4351///
4352SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
4353  switch (Op.getOpcode()) {
4354  default: assert(0 && "Should not custom lower this!");
4355  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
4356  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
4357  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4358  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
4359  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
4360  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
4361  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
4362  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
4363  case ISD::SHL_PARTS:
4364  case ISD::SRA_PARTS:
4365  case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
4366  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
4367  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
4368  case ISD::FABS:               return LowerFABS(Op, DAG);
4369  case ISD::FNEG:               return LowerFNEG(Op, DAG);
4370  case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
4371  case ISD::SETCC:              return LowerSETCC(Op, DAG, DAG.getEntryNode());
4372  case ISD::SELECT:             return LowerSELECT(Op, DAG);
4373  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
4374  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
4375  case ISD::CALL:               return LowerCALL(Op, DAG);
4376  case ISD::RET:                return LowerRET(Op, DAG);
4377  case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
4378  case ISD::MEMSET:             return LowerMEMSET(Op, DAG);
4379  case ISD::MEMCPY:             return LowerMEMCPY(Op, DAG);
4380  case ISD::READCYCLECOUNTER:   return LowerREADCYCLCECOUNTER(Op, DAG);
4381  case ISD::VASTART:            return LowerVASTART(Op, DAG);
4382  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4383  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
4384  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
4385  }
4386  return SDOperand();
4387}
4388
4389const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
4390  switch (Opcode) {
4391  default: return NULL;
4392  case X86ISD::SHLD:               return "X86ISD::SHLD";
4393  case X86ISD::SHRD:               return "X86ISD::SHRD";
4394  case X86ISD::FAND:               return "X86ISD::FAND";
4395  case X86ISD::FOR:                return "X86ISD::FOR";
4396  case X86ISD::FXOR:               return "X86ISD::FXOR";
4397  case X86ISD::FSRL:               return "X86ISD::FSRL";
4398  case X86ISD::FILD:               return "X86ISD::FILD";
4399  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
4400  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
4401  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
4402  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
4403  case X86ISD::FLD:                return "X86ISD::FLD";
4404  case X86ISD::FST:                return "X86ISD::FST";
4405  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
4406  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
4407  case X86ISD::CALL:               return "X86ISD::CALL";
4408  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
4409  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
4410  case X86ISD::CMP:                return "X86ISD::CMP";
4411  case X86ISD::COMI:               return "X86ISD::COMI";
4412  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
4413  case X86ISD::SETCC:              return "X86ISD::SETCC";
4414  case X86ISD::CMOV:               return "X86ISD::CMOV";
4415  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
4416  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
4417  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
4418  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
4419  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
4420  case X86ISD::LOAD_UA:            return "X86ISD::LOAD_UA";
4421  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
4422  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
4423  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
4424  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
4425  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
4426  case X86ISD::FMAX:               return "X86ISD::FMAX";
4427  case X86ISD::FMIN:               return "X86ISD::FMIN";
4428  }
4429}
4430
4431/// isLegalAddressImmediate - Return true if the integer value or
4432/// GlobalValue can be used as the offset of the target addressing mode.
4433bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
4434  // X86 allows a sign-extended 32-bit immediate field.
4435  return (V > -(1LL << 32) && V < (1LL << 32)-1);
4436}
4437
4438bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
4439  // In 64-bit mode, GV is 64-bit so it won't fit in the 32-bit displacement
4440  // field unless we are in small code model.
4441  if (Subtarget->is64Bit() &&
4442      getTargetMachine().getCodeModel() != CodeModel::Small)
4443    return false;
4444
4445  return (!Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false));
4446}
4447
4448/// isShuffleMaskLegal - Targets can use this to indicate that they only
4449/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4450/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4451/// are assumed to be legal.
4452bool
4453X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
4454  // Only do shuffles on 128-bit vector types for now.
4455  if (MVT::getSizeInBits(VT) == 64) return false;
4456  return (Mask.Val->getNumOperands() <= 4 ||
4457          isSplatMask(Mask.Val)  ||
4458          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
4459          X86::isUNPCKLMask(Mask.Val) ||
4460          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
4461          X86::isUNPCKHMask(Mask.Val));
4462}
4463
4464bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
4465                                               MVT::ValueType EVT,
4466                                               SelectionDAG &DAG) const {
4467  unsigned NumElts = BVOps.size();
4468  // Only do shuffles on 128-bit vector types for now.
4469  if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
4470  if (NumElts == 2) return true;
4471  if (NumElts == 4) {
4472    return (isMOVLMask(&BVOps[0], 4)  ||
4473            isCommutedMOVL(&BVOps[0], 4, true) ||
4474            isSHUFPMask(&BVOps[0], 4) ||
4475            isCommutedSHUFP(&BVOps[0], 4));
4476  }
4477  return false;
4478}
4479
4480//===----------------------------------------------------------------------===//
4481//                           X86 Scheduler Hooks
4482//===----------------------------------------------------------------------===//
4483
4484MachineBasicBlock *
4485X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
4486                                           MachineBasicBlock *BB) {
4487  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4488  switch (MI->getOpcode()) {
4489  default: assert(false && "Unexpected instr type to insert");
4490  case X86::CMOV_FR32:
4491  case X86::CMOV_FR64:
4492  case X86::CMOV_V4F32:
4493  case X86::CMOV_V2F64:
4494  case X86::CMOV_V2I64: {
4495    // To "insert" a SELECT_CC instruction, we actually have to insert the
4496    // diamond control-flow pattern.  The incoming instruction knows the
4497    // destination vreg to set, the condition code register to branch on, the
4498    // true/false values to select between, and a branch opcode to use.
4499    const BasicBlock *LLVM_BB = BB->getBasicBlock();
4500    ilist<MachineBasicBlock>::iterator It = BB;
4501    ++It;
4502
4503    //  thisMBB:
4504    //  ...
4505    //   TrueVal = ...
4506    //   cmpTY ccX, r1, r2
4507    //   bCC copy1MBB
4508    //   fallthrough --> copy0MBB
4509    MachineBasicBlock *thisMBB = BB;
4510    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
4511    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
4512    unsigned Opc =
4513      X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
4514    BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
4515    MachineFunction *F = BB->getParent();
4516    F->getBasicBlockList().insert(It, copy0MBB);
4517    F->getBasicBlockList().insert(It, sinkMBB);
4518    // Update machine-CFG edges by first adding all successors of the current
4519    // block to the new block which will contain the Phi node for the select.
4520    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
4521        e = BB->succ_end(); i != e; ++i)
4522      sinkMBB->addSuccessor(*i);
4523    // Next, remove all successors of the current block, and add the true
4524    // and fallthrough blocks as its successors.
4525    while(!BB->succ_empty())
4526      BB->removeSuccessor(BB->succ_begin());
4527    BB->addSuccessor(copy0MBB);
4528    BB->addSuccessor(sinkMBB);
4529
4530    //  copy0MBB:
4531    //   %FalseValue = ...
4532    //   # fallthrough to sinkMBB
4533    BB = copy0MBB;
4534
4535    // Update machine-CFG edges
4536    BB->addSuccessor(sinkMBB);
4537
4538    //  sinkMBB:
4539    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4540    //  ...
4541    BB = sinkMBB;
4542    BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
4543      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
4544      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4545
4546    delete MI;   // The pseudo instruction is gone now.
4547    return BB;
4548  }
4549
4550  case X86::FP_TO_INT16_IN_MEM:
4551  case X86::FP_TO_INT32_IN_MEM:
4552  case X86::FP_TO_INT64_IN_MEM: {
4553    // Change the floating point control register to use "round towards zero"
4554    // mode when truncating to an integer value.
4555    MachineFunction *F = BB->getParent();
4556    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
4557    addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
4558
4559    // Load the old value of the high byte of the control word...
4560    unsigned OldCW =
4561      F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
4562    addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
4563
4564    // Set the high part to be round to zero...
4565    addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
4566      .addImm(0xC7F);
4567
4568    // Reload the modified control word now...
4569    addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
4570
4571    // Restore the memory image of control word to original value
4572    addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
4573      .addReg(OldCW);
4574
4575    // Get the X86 opcode to use.
4576    unsigned Opc;
4577    switch (MI->getOpcode()) {
4578    default: assert(0 && "illegal opcode!");
4579    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
4580    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
4581    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
4582    }
4583
4584    X86AddressMode AM;
4585    MachineOperand &Op = MI->getOperand(0);
4586    if (Op.isRegister()) {
4587      AM.BaseType = X86AddressMode::RegBase;
4588      AM.Base.Reg = Op.getReg();
4589    } else {
4590      AM.BaseType = X86AddressMode::FrameIndexBase;
4591      AM.Base.FrameIndex = Op.getFrameIndex();
4592    }
4593    Op = MI->getOperand(1);
4594    if (Op.isImmediate())
4595      AM.Scale = Op.getImm();
4596    Op = MI->getOperand(2);
4597    if (Op.isImmediate())
4598      AM.IndexReg = Op.getImm();
4599    Op = MI->getOperand(3);
4600    if (Op.isGlobalAddress()) {
4601      AM.GV = Op.getGlobal();
4602    } else {
4603      AM.Disp = Op.getImm();
4604    }
4605    addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
4606                      .addReg(MI->getOperand(4).getReg());
4607
4608    // Reload the original control word now.
4609    addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
4610
4611    delete MI;   // The pseudo instruction is gone now.
4612    return BB;
4613  }
4614  }
4615}
4616
4617//===----------------------------------------------------------------------===//
4618//                           X86 Optimization Hooks
4619//===----------------------------------------------------------------------===//
4620
4621void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
4622                                                       uint64_t Mask,
4623                                                       uint64_t &KnownZero,
4624                                                       uint64_t &KnownOne,
4625                                                       unsigned Depth) const {
4626  unsigned Opc = Op.getOpcode();
4627  assert((Opc >= ISD::BUILTIN_OP_END ||
4628          Opc == ISD::INTRINSIC_WO_CHAIN ||
4629          Opc == ISD::INTRINSIC_W_CHAIN ||
4630          Opc == ISD::INTRINSIC_VOID) &&
4631         "Should use MaskedValueIsZero if you don't know whether Op"
4632         " is a target node!");
4633
4634  KnownZero = KnownOne = 0;   // Don't know anything.
4635  switch (Opc) {
4636  default: break;
4637  case X86ISD::SETCC:
4638    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
4639    break;
4640  }
4641}
4642
4643/// getShuffleScalarElt - Returns the scalar element that will make up the ith
4644/// element of the result of the vector shuffle.
4645static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
4646  MVT::ValueType VT = N->getValueType(0);
4647  SDOperand PermMask = N->getOperand(2);
4648  unsigned NumElems = PermMask.getNumOperands();
4649  SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
4650  i %= NumElems;
4651  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
4652    return (i == 0)
4653      ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4654  } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
4655    SDOperand Idx = PermMask.getOperand(i);
4656    if (Idx.getOpcode() == ISD::UNDEF)
4657      return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4658    return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
4659  }
4660  return SDOperand();
4661}
4662
4663/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
4664/// node is a GlobalAddress + an offset.
4665static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
4666  unsigned Opc = N->getOpcode();
4667  if (Opc == X86ISD::Wrapper) {
4668    if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
4669      GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
4670      return true;
4671    }
4672  } else if (Opc == ISD::ADD) {
4673    SDOperand N1 = N->getOperand(0);
4674    SDOperand N2 = N->getOperand(1);
4675    if (isGAPlusOffset(N1.Val, GA, Offset)) {
4676      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
4677      if (V) {
4678        Offset += V->getSignExtended();
4679        return true;
4680      }
4681    } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
4682      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
4683      if (V) {
4684        Offset += V->getSignExtended();
4685        return true;
4686      }
4687    }
4688  }
4689  return false;
4690}
4691
4692/// isConsecutiveLoad - Returns true if N is loading from an address of Base
4693/// + Dist * Size.
4694static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
4695                              MachineFrameInfo *MFI) {
4696  if (N->getOperand(0).Val != Base->getOperand(0).Val)
4697    return false;
4698
4699  SDOperand Loc = N->getOperand(1);
4700  SDOperand BaseLoc = Base->getOperand(1);
4701  if (Loc.getOpcode() == ISD::FrameIndex) {
4702    if (BaseLoc.getOpcode() != ISD::FrameIndex)
4703      return false;
4704    int FI  = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
4705    int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
4706    int FS  = MFI->getObjectSize(FI);
4707    int BFS = MFI->getObjectSize(BFI);
4708    if (FS != BFS || FS != Size) return false;
4709    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
4710  } else {
4711    GlobalValue *GV1 = NULL;
4712    GlobalValue *GV2 = NULL;
4713    int64_t Offset1 = 0;
4714    int64_t Offset2 = 0;
4715    bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
4716    bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
4717    if (isGA1 && isGA2 && GV1 == GV2)
4718      return Offset1 == (Offset2 + Dist*Size);
4719  }
4720
4721  return false;
4722}
4723
4724static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
4725                              const X86Subtarget *Subtarget) {
4726  GlobalValue *GV;
4727  int64_t Offset;
4728  if (isGAPlusOffset(Base, GV, Offset))
4729    return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
4730  else {
4731    assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
4732    int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
4733    if (BFI < 0)
4734      // Fixed objects do not specify alignment, however the offsets are known.
4735      return ((Subtarget->getStackAlignment() % 16) == 0 &&
4736              (MFI->getObjectOffset(BFI) % 16) == 0);
4737    else
4738      return MFI->getObjectAlignment(BFI) >= 16;
4739  }
4740  return false;
4741}
4742
4743
4744/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
4745/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
4746/// if the load addresses are consecutive, non-overlapping, and in the right
4747/// order.
4748static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
4749                                       const X86Subtarget *Subtarget) {
4750  MachineFunction &MF = DAG.getMachineFunction();
4751  MachineFrameInfo *MFI = MF.getFrameInfo();
4752  MVT::ValueType VT = N->getValueType(0);
4753  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
4754  SDOperand PermMask = N->getOperand(2);
4755  int NumElems = (int)PermMask.getNumOperands();
4756  SDNode *Base = NULL;
4757  for (int i = 0; i < NumElems; ++i) {
4758    SDOperand Idx = PermMask.getOperand(i);
4759    if (Idx.getOpcode() == ISD::UNDEF) {
4760      if (!Base) return SDOperand();
4761    } else {
4762      SDOperand Arg =
4763        getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
4764      if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
4765        return SDOperand();
4766      if (!Base)
4767        Base = Arg.Val;
4768      else if (!isConsecutiveLoad(Arg.Val, Base,
4769                                  i, MVT::getSizeInBits(EVT)/8,MFI))
4770        return SDOperand();
4771    }
4772  }
4773
4774  bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
4775  if (isAlign16) {
4776    LoadSDNode *LD = cast<LoadSDNode>(Base);
4777    return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
4778                       LD->getSrcValueOffset());
4779  } else {
4780    // Just use movups, it's shorter.
4781    SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
4782    SmallVector<SDOperand, 3> Ops;
4783    Ops.push_back(Base->getOperand(0));
4784    Ops.push_back(Base->getOperand(1));
4785    Ops.push_back(Base->getOperand(2));
4786    return DAG.getNode(ISD::BIT_CONVERT, VT,
4787                       DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
4788  }
4789}
4790
4791/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
4792static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
4793                                      const X86Subtarget *Subtarget) {
4794  SDOperand Cond = N->getOperand(0);
4795
4796  // If we have SSE[12] support, try to form min/max nodes.
4797  if (Subtarget->hasSSE2() &&
4798      (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) {
4799    if (Cond.getOpcode() == ISD::SETCC) {
4800      // Get the LHS/RHS of the select.
4801      SDOperand LHS = N->getOperand(1);
4802      SDOperand RHS = N->getOperand(2);
4803      ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
4804
4805      unsigned Opcode = 0;
4806      if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
4807        switch (CC) {
4808        default: break;
4809        case ISD::SETOLE: // (X <= Y) ? X : Y -> min
4810        case ISD::SETULE:
4811        case ISD::SETLE:
4812          if (!UnsafeFPMath) break;
4813          // FALL THROUGH.
4814        case ISD::SETOLT:  // (X olt/lt Y) ? X : Y -> min
4815        case ISD::SETLT:
4816          Opcode = X86ISD::FMIN;
4817          break;
4818
4819        case ISD::SETOGT: // (X > Y) ? X : Y -> max
4820        case ISD::SETUGT:
4821        case ISD::SETGT:
4822          if (!UnsafeFPMath) break;
4823          // FALL THROUGH.
4824        case ISD::SETUGE:  // (X uge/ge Y) ? X : Y -> max
4825        case ISD::SETGE:
4826          Opcode = X86ISD::FMAX;
4827          break;
4828        }
4829      } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
4830        switch (CC) {
4831        default: break;
4832        case ISD::SETOGT: // (X > Y) ? Y : X -> min
4833        case ISD::SETUGT:
4834        case ISD::SETGT:
4835          if (!UnsafeFPMath) break;
4836          // FALL THROUGH.
4837        case ISD::SETUGE:  // (X uge/ge Y) ? Y : X -> min
4838        case ISD::SETGE:
4839          Opcode = X86ISD::FMIN;
4840          break;
4841
4842        case ISD::SETOLE:   // (X <= Y) ? Y : X -> max
4843        case ISD::SETULE:
4844        case ISD::SETLE:
4845          if (!UnsafeFPMath) break;
4846          // FALL THROUGH.
4847        case ISD::SETOLT:   // (X olt/lt Y) ? Y : X -> max
4848        case ISD::SETLT:
4849          Opcode = X86ISD::FMAX;
4850          break;
4851        }
4852      }
4853
4854      if (Opcode)
4855        return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
4856    }
4857
4858  }
4859
4860  return SDOperand();
4861}
4862
4863
4864SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
4865                                               DAGCombinerInfo &DCI) const {
4866  SelectionDAG &DAG = DCI.DAG;
4867  switch (N->getOpcode()) {
4868  default: break;
4869  case ISD::VECTOR_SHUFFLE:
4870    return PerformShuffleCombine(N, DAG, Subtarget);
4871  case ISD::SELECT:
4872    return PerformSELECTCombine(N, DAG, Subtarget);
4873  }
4874
4875  return SDOperand();
4876}
4877
4878//===----------------------------------------------------------------------===//
4879//                           X86 Inline Assembly Support
4880//===----------------------------------------------------------------------===//
4881
4882/// getConstraintType - Given a constraint letter, return the type of
4883/// constraint it is for this target.
4884X86TargetLowering::ConstraintType
4885X86TargetLowering::getConstraintType(char ConstraintLetter) const {
4886  switch (ConstraintLetter) {
4887  case 'A':
4888  case 'r':
4889  case 'R':
4890  case 'l':
4891  case 'q':
4892  case 'Q':
4893  case 'x':
4894  case 'Y':
4895    return C_RegisterClass;
4896  default: return TargetLowering::getConstraintType(ConstraintLetter);
4897  }
4898}
4899
4900/// isOperandValidForConstraint - Return the specified operand (possibly
4901/// modified) if the specified SDOperand is valid for the specified target
4902/// constraint letter, otherwise return null.
4903SDOperand X86TargetLowering::
4904isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) {
4905  switch (Constraint) {
4906  default: break;
4907  case 'i':
4908    // Literal immediates are always ok.
4909    if (isa<ConstantSDNode>(Op)) return Op;
4910
4911    // If we are in non-pic codegen mode, we allow the address of a global to
4912    // be used with 'i'.
4913    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4914      if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
4915        return SDOperand(0, 0);
4916
4917      if (GA->getOpcode() != ISD::TargetGlobalAddress)
4918        Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
4919                                        GA->getOffset());
4920      return Op;
4921    }
4922
4923    // Otherwise, not valid for this mode.
4924    return SDOperand(0, 0);
4925  }
4926  return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG);
4927}
4928
4929
4930std::vector<unsigned> X86TargetLowering::
4931getRegClassForInlineAsmConstraint(const std::string &Constraint,
4932                                  MVT::ValueType VT) const {
4933  if (Constraint.size() == 1) {
4934    // FIXME: not handling fp-stack yet!
4935    // FIXME: not handling MMX registers yet ('y' constraint).
4936    switch (Constraint[0]) {      // GCC X86 Constraint Letters
4937    default: break;  // Unknown constraint letter
4938    case 'A':   // EAX/EDX
4939      if (VT == MVT::i32 || VT == MVT::i64)
4940        return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
4941      break;
4942    case 'r':   // GENERAL_REGS
4943    case 'R':   // LEGACY_REGS
4944      if (VT == MVT::i64 && Subtarget->is64Bit())
4945        return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
4946                                     X86::RSI, X86::RDI, X86::RBP, X86::RSP,
4947                                     X86::R8,  X86::R9,  X86::R10, X86::R11,
4948                                     X86::R12, X86::R13, X86::R14, X86::R15, 0);
4949      if (VT == MVT::i32)
4950        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4951                                     X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
4952      else if (VT == MVT::i16)
4953        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4954                                     X86::SI, X86::DI, X86::BP, X86::SP, 0);
4955      else if (VT == MVT::i8)
4956        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
4957      break;
4958    case 'l':   // INDEX_REGS
4959      if (VT == MVT::i32)
4960        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4961                                     X86::ESI, X86::EDI, X86::EBP, 0);
4962      else if (VT == MVT::i16)
4963        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4964                                     X86::SI, X86::DI, X86::BP, 0);
4965      else if (VT == MVT::i8)
4966        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4967      break;
4968    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
4969    case 'Q':   // Q_REGS
4970      if (VT == MVT::i32)
4971        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
4972      else if (VT == MVT::i16)
4973        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
4974      else if (VT == MVT::i8)
4975        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4976        break;
4977    case 'x':   // SSE_REGS if SSE1 allowed
4978      if (Subtarget->hasSSE1())
4979        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4980                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4981                                     0);
4982      return std::vector<unsigned>();
4983    case 'Y':   // SSE_REGS if SSE2 allowed
4984      if (Subtarget->hasSSE2())
4985        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4986                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4987                                     0);
4988      return std::vector<unsigned>();
4989    }
4990  }
4991
4992  return std::vector<unsigned>();
4993}
4994
4995std::pair<unsigned, const TargetRegisterClass*>
4996X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4997                                                MVT::ValueType VT) const {
4998  // Use the default implementation in TargetLowering to convert the register
4999  // constraint into a member of a register class.
5000  std::pair<unsigned, const TargetRegisterClass*> Res;
5001  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
5002
5003  // Not found as a standard register?
5004  if (Res.second == 0) {
5005    // GCC calls "st(0)" just plain "st".
5006    if (StringsEqualNoCase("{st}", Constraint)) {
5007      Res.first = X86::ST0;
5008      Res.second = X86::RSTRegisterClass;
5009    }
5010
5011    return Res;
5012  }
5013
5014  // Otherwise, check to see if this is a register class of the wrong value
5015  // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
5016  // turn into {ax},{dx}.
5017  if (Res.second->hasType(VT))
5018    return Res;   // Correct type already, nothing to do.
5019
5020  // All of the single-register GCC register classes map their values onto
5021  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
5022  // really want an 8-bit or 32-bit register, map to the appropriate register
5023  // class and return the appropriate register.
5024  if (Res.second != X86::GR16RegisterClass)
5025    return Res;
5026
5027  if (VT == MVT::i8) {
5028    unsigned DestReg = 0;
5029    switch (Res.first) {
5030    default: break;
5031    case X86::AX: DestReg = X86::AL; break;
5032    case X86::DX: DestReg = X86::DL; break;
5033    case X86::CX: DestReg = X86::CL; break;
5034    case X86::BX: DestReg = X86::BL; break;
5035    }
5036    if (DestReg) {
5037      Res.first = DestReg;
5038      Res.second = Res.second = X86::GR8RegisterClass;
5039    }
5040  } else if (VT == MVT::i32) {
5041    unsigned DestReg = 0;
5042    switch (Res.first) {
5043    default: break;
5044    case X86::AX: DestReg = X86::EAX; break;
5045    case X86::DX: DestReg = X86::EDX; break;
5046    case X86::CX: DestReg = X86::ECX; break;
5047    case X86::BX: DestReg = X86::EBX; break;
5048    case X86::SI: DestReg = X86::ESI; break;
5049    case X86::DI: DestReg = X86::EDI; break;
5050    case X86::BP: DestReg = X86::EBP; break;
5051    case X86::SP: DestReg = X86::ESP; break;
5052    }
5053    if (DestReg) {
5054      Res.first = DestReg;
5055      Res.second = Res.second = X86::GR32RegisterClass;
5056    }
5057  } else if (VT == MVT::i64) {
5058    unsigned DestReg = 0;
5059    switch (Res.first) {
5060    default: break;
5061    case X86::AX: DestReg = X86::RAX; break;
5062    case X86::DX: DestReg = X86::RDX; break;
5063    case X86::CX: DestReg = X86::RCX; break;
5064    case X86::BX: DestReg = X86::RBX; break;
5065    case X86::SI: DestReg = X86::RSI; break;
5066    case X86::DI: DestReg = X86::RDI; break;
5067    case X86::BP: DestReg = X86::RBP; break;
5068    case X86::SP: DestReg = X86::RSP; break;
5069    }
5070    if (DestReg) {
5071      Res.first = DestReg;
5072      Res.second = Res.second = X86::GR64RegisterClass;
5073    }
5074  }
5075
5076  return Res;
5077}
5078