X86ISelLowering.cpp revision bfd68a785810fcc4dfc82f583c4f2bea192d59f4
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrBuilder.h"
17#include "X86ISelLowering.h"
18#include "X86MachineFunctionInfo.h"
19#include "X86TargetMachine.h"
20#include "llvm/CallingConv.h"
21#include "llvm/Constants.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/Function.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/ADT/VectorExtras.h"
26#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27#include "llvm/CodeGen/MachineFrameInfo.h"
28#include "llvm/CodeGen/MachineFunction.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/SelectionDAG.h"
31#include "llvm/CodeGen/SSARegMap.h"
32#include "llvm/Support/MathExtras.h"
33#include "llvm/Target/TargetOptions.h"
34using namespace llvm;
35
36// FIXME: temporary.
37#include "llvm/Support/CommandLine.h"
38static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
39                                  cl::desc("Enable fastcc on X86"));
40
41X86TargetLowering::X86TargetLowering(TargetMachine &TM)
42  : TargetLowering(TM) {
43  Subtarget = &TM.getSubtarget<X86Subtarget>();
44  X86ScalarSSE = Subtarget->hasSSE2();
45  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
46
47  // Set up the TargetLowering object.
48
49  // X86 is weird, it always uses i8 for shift amounts and setcc results.
50  setShiftAmountType(MVT::i8);
51  setSetCCResultType(MVT::i8);
52  setSetCCResultContents(ZeroOrOneSetCCResult);
53  setSchedulingPreference(SchedulingForRegPressure);
54  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
55  setStackPointerRegisterToSaveRestore(X86StackPtr);
56
57  if (!Subtarget->isTargetDarwin())
58    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
59    setUseUnderscoreSetJmpLongJmp(true);
60
61  // Add legal addressing mode scale values.
62  addLegalAddressScale(8);
63  addLegalAddressScale(4);
64  addLegalAddressScale(2);
65  // Enter the ones which require both scale + index last. These are more
66  // expensive.
67  addLegalAddressScale(9);
68  addLegalAddressScale(5);
69  addLegalAddressScale(3);
70
71  // Set up the register classes.
72  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
73  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
74  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
75  if (Subtarget->is64Bit())
76    addRegisterClass(MVT::i64, X86::GR64RegisterClass);
77
78  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
79  // operation.
80  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
81  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
82  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
83
84  if (Subtarget->is64Bit()) {
85    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
86    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
87  } else {
88    if (X86ScalarSSE)
89      // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
90      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Expand);
91    else
92      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
93  }
94
95  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
96  // this operation.
97  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
98  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
99  // SSE has no i16 to fp conversion, only i32
100  if (X86ScalarSSE)
101    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
102  else {
103    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
104    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
105  }
106
107  if (!Subtarget->is64Bit()) {
108    // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
109    setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
110    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
111  }
112
113  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
114  // this operation.
115  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
116  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
117
118  if (X86ScalarSSE) {
119    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
120  } else {
121    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
122    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
123  }
124
125  // Handle FP_TO_UINT by promoting the destination to a larger signed
126  // conversion.
127  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
128  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
129  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
130
131  if (Subtarget->is64Bit()) {
132    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
133    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
134  } else {
135    if (X86ScalarSSE && !Subtarget->hasSSE3())
136      // Expand FP_TO_UINT into a select.
137      // FIXME: We would like to use a Custom expander here eventually to do
138      // the optimal thing for SSE vs. the default expansion in the legalizer.
139      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
140    else
141      // With SSE3 we can use fisttpll to convert to a signed i64.
142      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
143  }
144
145  setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
146  setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
147
148  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
149  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
150  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
151  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
152  if (Subtarget->is64Bit())
153    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
154  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
155  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
156  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
157  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
158  setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
159  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
160
161  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
162  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
163  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
164  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
165  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
166  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
167  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
168  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
169  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
170  if (Subtarget->is64Bit()) {
171    setOperationAction(ISD::CTPOP          , MVT::i64  , Expand);
172    setOperationAction(ISD::CTTZ           , MVT::i64  , Expand);
173    setOperationAction(ISD::CTLZ           , MVT::i64  , Expand);
174  }
175
176  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
177  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
178
179  // These should be promoted to a larger select which is supported.
180  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
181  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
182  // X86 wants to expand cmov itself.
183  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
184  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
185  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
186  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
187  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
188  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
189  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
190  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
191  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
192  if (Subtarget->is64Bit()) {
193    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
194    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
195  }
196  // X86 ret instruction may pop stack.
197  setOperationAction(ISD::RET             , MVT::Other, Custom);
198  // Darwin ABI issue.
199  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
200  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
201  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
202  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
203  if (Subtarget->is64Bit()) {
204    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
205    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
206    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
207    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
208  }
209  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
210  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
211  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
212  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
213  // X86 wants to expand memset / memcpy itself.
214  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
215  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
216
217  // We don't have line number support yet.
218  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
219  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
220  // FIXME - use subtarget debug flags
221  if (!Subtarget->isTargetDarwin())
222    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
223
224  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
225  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
226
227  // Use the default implementation.
228  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
229  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
230  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
231  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
232  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
233  if (Subtarget->is64Bit())
234    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
235  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
236
237  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
238  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
239
240  if (X86ScalarSSE) {
241    // Set up the FP register classes.
242    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
243    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
244
245    // Use ANDPD to simulate FABS.
246    setOperationAction(ISD::FABS , MVT::f64, Custom);
247    setOperationAction(ISD::FABS , MVT::f32, Custom);
248
249    // Use XORP to simulate FNEG.
250    setOperationAction(ISD::FNEG , MVT::f64, Custom);
251    setOperationAction(ISD::FNEG , MVT::f32, Custom);
252
253    // We don't support sin/cos/fmod
254    setOperationAction(ISD::FSIN , MVT::f64, Expand);
255    setOperationAction(ISD::FCOS , MVT::f64, Expand);
256    setOperationAction(ISD::FREM , MVT::f64, Expand);
257    setOperationAction(ISD::FSIN , MVT::f32, Expand);
258    setOperationAction(ISD::FCOS , MVT::f32, Expand);
259    setOperationAction(ISD::FREM , MVT::f32, Expand);
260
261    // Expand FP immediates into loads from the stack, except for the special
262    // cases we handle.
263    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
264    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
265    addLegalFPImmediate(+0.0); // xorps / xorpd
266  } else {
267    // Set up the FP register classes.
268    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
269
270    setOperationAction(ISD::UNDEF, MVT::f64, Expand);
271
272    if (!UnsafeFPMath) {
273      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
274      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
275    }
276
277    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
278    addLegalFPImmediate(+0.0); // FLD0
279    addLegalFPImmediate(+1.0); // FLD1
280    addLegalFPImmediate(-0.0); // FLD0/FCHS
281    addLegalFPImmediate(-1.0); // FLD1/FCHS
282  }
283
284  // First set operation action for all vector types to expand. Then we
285  // will selectively turn on ones that can be effectively codegen'd.
286  for (unsigned VT = (unsigned)MVT::Vector + 1;
287       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
288    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
289    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
290    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
291    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
292    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
293    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
294    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
295  }
296
297  if (Subtarget->hasMMX()) {
298    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
299    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
300    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
301
302    // FIXME: add MMX packed arithmetics
303    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
304    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
305    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
306  }
307
308  if (Subtarget->hasSSE1()) {
309    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
310
311    setOperationAction(ISD::AND,                MVT::v4f32, Legal);
312    setOperationAction(ISD::OR,                 MVT::v4f32, Legal);
313    setOperationAction(ISD::XOR,                MVT::v4f32, Legal);
314    setOperationAction(ISD::ADD,                MVT::v4f32, Legal);
315    setOperationAction(ISD::SUB,                MVT::v4f32, Legal);
316    setOperationAction(ISD::MUL,                MVT::v4f32, Legal);
317    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
318    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
319    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
320    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
321    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
322  }
323
324  if (Subtarget->hasSSE2()) {
325    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
326    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
327    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
328    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
329    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
330
331    setOperationAction(ISD::ADD,                MVT::v2f64, Legal);
332    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
333    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
334    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
335    setOperationAction(ISD::SUB,                MVT::v2f64, Legal);
336    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
337    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
338    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
339    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
340    setOperationAction(ISD::MUL,                MVT::v2f64, Legal);
341
342    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
343    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
344    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
345    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
346    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
347    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
348
349    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
350    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
351      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
352      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
353      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
354    }
355    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
356    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
357    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
358    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
359    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
360    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
361
362    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
363    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
364      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
365      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
366      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
367      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
368      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
369      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
370      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
371      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
372      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
373      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
374    }
375
376    // Custom lower v2i64 and v2f64 selects.
377    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
378    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
379    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
380    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
381  }
382
383  // We want to custom lower some of our intrinsics.
384  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
385
386  // We have target-specific dag combine patterns for the following nodes:
387  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
388
389  computeRegisterProperties();
390
391  // FIXME: These should be based on subtarget info. Plus, the values should
392  // be smaller when we are in optimizing for size mode.
393  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
394  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
395  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
396  allowUnalignedMemoryAccesses = true; // x86 supports it!
397}
398
399//===----------------------------------------------------------------------===//
400//                    C Calling Convention implementation
401//===----------------------------------------------------------------------===//
402
403/// AddLiveIn - This helper function adds the specified physical register to the
404/// MachineFunction as a live in value.  It also creates a corresponding virtual
405/// register for it.
406static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
407                          TargetRegisterClass *RC) {
408  assert(RC->contains(PReg) && "Not the correct regclass!");
409  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
410  MF.addLiveIn(PReg, VReg);
411  return VReg;
412}
413
414/// HowToPassCCCArgument - Returns how an formal argument of the specified type
415/// should be passed. If it is through stack, returns the size of the stack
416/// slot; if it is through XMM register, returns the number of XMM registers
417/// are needed.
418static void
419HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs,
420                     unsigned &ObjSize, unsigned &ObjXMMRegs) {
421  ObjXMMRegs = 0;
422
423  switch (ObjectVT) {
424  default: assert(0 && "Unhandled argument type!");
425  case MVT::i8:  ObjSize = 1; break;
426  case MVT::i16: ObjSize = 2; break;
427  case MVT::i32: ObjSize = 4; break;
428  case MVT::i64: ObjSize = 8; break;
429  case MVT::f32: ObjSize = 4; break;
430  case MVT::f64: ObjSize = 8; break;
431  case MVT::v16i8:
432  case MVT::v8i16:
433  case MVT::v4i32:
434  case MVT::v2i64:
435  case MVT::v4f32:
436  case MVT::v2f64:
437    if (NumXMMRegs < 4)
438      ObjXMMRegs = 1;
439    else
440      ObjSize = 16;
441    break;
442  }
443}
444
445SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) {
446  unsigned NumArgs = Op.Val->getNumValues() - 1;
447  MachineFunction &MF = DAG.getMachineFunction();
448  MachineFrameInfo *MFI = MF.getFrameInfo();
449  SDOperand Root = Op.getOperand(0);
450  std::vector<SDOperand> ArgValues;
451
452  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
453  // the stack frame looks like this:
454  //
455  // [ESP] -- return address
456  // [ESP + 4] -- first argument (leftmost lexically)
457  // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
458  //    ...
459  //
460  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
461  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
462  static const unsigned XMMArgRegs[] = {
463    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
464  };
465  for (unsigned i = 0; i < NumArgs; ++i) {
466    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
467    unsigned ArgIncrement = 4;
468    unsigned ObjSize = 0;
469    unsigned ObjXMMRegs = 0;
470    HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs);
471    if (ObjSize > 4)
472      ArgIncrement = ObjSize;
473
474    SDOperand ArgValue;
475    if (ObjXMMRegs) {
476      // Passed in a XMM register.
477      unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
478                               X86::VR128RegisterClass);
479      ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT);
480      ArgValues.push_back(ArgValue);
481      NumXMMRegs += ObjXMMRegs;
482    } else {
483      // XMM arguments have to be aligned on 16-byte boundary.
484      if (ObjSize == 16)
485        ArgOffset = ((ArgOffset + 15) / 16) * 16;
486      // Create the frame index object for this incoming parameter...
487      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
488      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
489      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
490                             DAG.getSrcValue(NULL));
491      ArgValues.push_back(ArgValue);
492      ArgOffset += ArgIncrement;   // Move on to the next argument...
493    }
494  }
495
496  ArgValues.push_back(Root);
497
498  // If the function takes variable number of arguments, make a frame index for
499  // the start of the first vararg value... for expansion of llvm.va_start.
500  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
501  if (isVarArg)
502    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
503  RegSaveFrameIndex = 0xAAAAAAA;  // X86-64 only.
504  ReturnAddrIndex = 0;            // No return address slot generated yet.
505  BytesToPopOnReturn = 0;         // Callee pops nothing.
506  BytesCallerReserves = ArgOffset;
507
508  // If this is a struct return on Darwin/X86, the callee pops the hidden struct
509  // pointer.
510  if (MF.getFunction()->getCallingConv() == CallingConv::CSRet &&
511      Subtarget->isTargetDarwin())
512    BytesToPopOnReturn = 4;
513
514  // Return the new list of results.
515  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
516                                     Op.Val->value_end());
517  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
518}
519
520
521SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) {
522  SDOperand Chain     = Op.getOperand(0);
523  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
524  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
525  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
526  SDOperand Callee    = Op.getOperand(4);
527  MVT::ValueType RetVT= Op.Val->getValueType(0);
528  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
529
530  // Keep track of the number of XMM regs passed so far.
531  unsigned NumXMMRegs = 0;
532  static const unsigned XMMArgRegs[] = {
533    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
534  };
535
536  // Count how many bytes are to be pushed on the stack.
537  unsigned NumBytes = 0;
538  for (unsigned i = 0; i != NumOps; ++i) {
539    SDOperand Arg = Op.getOperand(5+2*i);
540
541    switch (Arg.getValueType()) {
542    default: assert(0 && "Unexpected ValueType for argument!");
543    case MVT::i8:
544    case MVT::i16:
545    case MVT::i32:
546    case MVT::f32:
547      NumBytes += 4;
548      break;
549    case MVT::i64:
550    case MVT::f64:
551      NumBytes += 8;
552      break;
553    case MVT::v16i8:
554    case MVT::v8i16:
555    case MVT::v4i32:
556    case MVT::v2i64:
557    case MVT::v4f32:
558    case MVT::v2f64:
559      if (NumXMMRegs < 4)
560        ++NumXMMRegs;
561      else {
562        // XMM arguments have to be aligned on 16-byte boundary.
563        NumBytes = ((NumBytes + 15) / 16) * 16;
564        NumBytes += 16;
565      }
566      break;
567    }
568  }
569
570  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
571
572  // Arguments go on the stack in reverse order, as specified by the ABI.
573  unsigned ArgOffset = 0;
574  NumXMMRegs = 0;
575  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
576  std::vector<SDOperand> MemOpChains;
577  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
578  for (unsigned i = 0; i != NumOps; ++i) {
579    SDOperand Arg = Op.getOperand(5+2*i);
580
581    switch (Arg.getValueType()) {
582    default: assert(0 && "Unexpected ValueType for argument!");
583    case MVT::i8:
584    case MVT::i16: {
585      // Promote the integer to 32 bits.  If the input type is signed use a
586      // sign extend, otherwise use a zero extend.
587      unsigned ExtOp =
588        dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ?
589        ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
590      Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
591    }
592    // Fallthrough
593
594    case MVT::i32:
595    case MVT::f32: {
596      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
597      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
598      MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
599                                        Arg, PtrOff, DAG.getSrcValue(NULL)));
600      ArgOffset += 4;
601      break;
602    }
603    case MVT::i64:
604    case MVT::f64: {
605      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
606      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
607      MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
608                                        Arg, PtrOff, DAG.getSrcValue(NULL)));
609      ArgOffset += 8;
610      break;
611    }
612    case MVT::v16i8:
613    case MVT::v8i16:
614    case MVT::v4i32:
615    case MVT::v2i64:
616    case MVT::v4f32:
617    case MVT::v2f64:
618      if (NumXMMRegs < 4) {
619        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
620        NumXMMRegs++;
621      } else {
622        // XMM arguments have to be aligned on 16-byte boundary.
623        ArgOffset = ((ArgOffset + 15) / 16) * 16;
624        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
625        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
626        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
627                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
628        ArgOffset += 16;
629      }
630    }
631  }
632
633  if (!MemOpChains.empty())
634    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
635                        &MemOpChains[0], MemOpChains.size());
636
637  // Build a sequence of copy-to-reg nodes chained together with token chain
638  // and flag operands which copy the outgoing args into registers.
639  SDOperand InFlag;
640  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
641    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
642                             InFlag);
643    InFlag = Chain.getValue(1);
644  }
645
646  // If the callee is a GlobalAddress node (quite common, every direct call is)
647  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
648  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
649    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
650  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
651    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
652
653  std::vector<MVT::ValueType> NodeTys;
654  NodeTys.push_back(MVT::Other);   // Returns a chain
655  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
656  std::vector<SDOperand> Ops;
657  Ops.push_back(Chain);
658  Ops.push_back(Callee);
659
660  // Add argument registers to the end of the list so that they are known live
661  // into the call.
662  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
663    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
664                                  RegsToPass[i].second.getValueType()));
665
666  if (InFlag.Val)
667    Ops.push_back(InFlag);
668
669  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
670                      NodeTys, &Ops[0], Ops.size());
671  InFlag = Chain.getValue(1);
672
673  // Create the CALLSEQ_END node.
674  unsigned NumBytesForCalleeToPush = 0;
675
676  // If this is is a call to a struct-return function on Darwin/X86, the callee
677  // pops the hidden struct pointer, so we have to push it back.
678  if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin())
679    NumBytesForCalleeToPush = 4;
680
681  NodeTys.clear();
682  NodeTys.push_back(MVT::Other);   // Returns a chain
683  if (RetVT != MVT::Other)
684    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
685  Ops.clear();
686  Ops.push_back(Chain);
687  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
688  Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
689  Ops.push_back(InFlag);
690  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
691  if (RetVT != MVT::Other)
692    InFlag = Chain.getValue(1);
693
694  std::vector<SDOperand> ResultVals;
695  NodeTys.clear();
696  switch (RetVT) {
697  default: assert(0 && "Unknown value type to return!");
698  case MVT::Other: break;
699  case MVT::i8:
700    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
701    ResultVals.push_back(Chain.getValue(0));
702    NodeTys.push_back(MVT::i8);
703    break;
704  case MVT::i16:
705    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
706    ResultVals.push_back(Chain.getValue(0));
707    NodeTys.push_back(MVT::i16);
708    break;
709  case MVT::i32:
710    if (Op.Val->getValueType(1) == MVT::i32) {
711      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
712      ResultVals.push_back(Chain.getValue(0));
713      Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
714                                 Chain.getValue(2)).getValue(1);
715      ResultVals.push_back(Chain.getValue(0));
716      NodeTys.push_back(MVT::i32);
717    } else {
718      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
719      ResultVals.push_back(Chain.getValue(0));
720    }
721    NodeTys.push_back(MVT::i32);
722    break;
723  case MVT::v16i8:
724  case MVT::v8i16:
725  case MVT::v4i32:
726  case MVT::v2i64:
727  case MVT::v4f32:
728  case MVT::v2f64:
729    Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
730    ResultVals.push_back(Chain.getValue(0));
731    NodeTys.push_back(RetVT);
732    break;
733  case MVT::f32:
734  case MVT::f64: {
735    std::vector<MVT::ValueType> Tys;
736    Tys.push_back(MVT::f64);
737    Tys.push_back(MVT::Other);
738    Tys.push_back(MVT::Flag);
739    std::vector<SDOperand> Ops;
740    Ops.push_back(Chain);
741    Ops.push_back(InFlag);
742    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys,
743                                   &Ops[0], Ops.size());
744    Chain  = RetVal.getValue(1);
745    InFlag = RetVal.getValue(2);
746    if (X86ScalarSSE) {
747      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
748      // shouldn't be necessary except that RFP cannot be live across
749      // multiple blocks. When stackifier is fixed, they can be uncoupled.
750      MachineFunction &MF = DAG.getMachineFunction();
751      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
752      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
753      Tys.clear();
754      Tys.push_back(MVT::Other);
755      Ops.clear();
756      Ops.push_back(Chain);
757      Ops.push_back(RetVal);
758      Ops.push_back(StackSlot);
759      Ops.push_back(DAG.getValueType(RetVT));
760      Ops.push_back(InFlag);
761      Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
762      RetVal = DAG.getLoad(RetVT, Chain, StackSlot,
763                           DAG.getSrcValue(NULL));
764      Chain = RetVal.getValue(1);
765    }
766
767    if (RetVT == MVT::f32 && !X86ScalarSSE)
768      // FIXME: we would really like to remember that this FP_ROUND
769      // operation is okay to eliminate if we allow excess FP precision.
770      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
771    ResultVals.push_back(RetVal);
772    NodeTys.push_back(RetVT);
773    break;
774  }
775  }
776
777  // If the function returns void, just return the chain.
778  if (ResultVals.empty())
779    return Chain;
780
781  // Otherwise, merge everything together with a MERGE_VALUES node.
782  NodeTys.push_back(MVT::Other);
783  ResultVals.push_back(Chain);
784  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
785                              &ResultVals[0], ResultVals.size());
786  return Res.getValue(Op.ResNo);
787}
788
789
790//===----------------------------------------------------------------------===//
791//                 X86-64 C Calling Convention implementation
792//===----------------------------------------------------------------------===//
793
794/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified
795/// type should be passed. If it is through stack, returns the size of the stack
796/// slot; if it is through integer or XMM register, returns the number of
797/// integer or XMM registers are needed.
798static void
799HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT,
800                           unsigned NumIntRegs, unsigned NumXMMRegs,
801                           unsigned &ObjSize, unsigned &ObjIntRegs,
802                           unsigned &ObjXMMRegs) {
803  ObjSize = 0;
804  ObjIntRegs = 0;
805  ObjXMMRegs = 0;
806
807  switch (ObjectVT) {
808  default: assert(0 && "Unhandled argument type!");
809  case MVT::i8:
810  case MVT::i16:
811  case MVT::i32:
812  case MVT::i64:
813    if (NumIntRegs < 6)
814      ObjIntRegs = 1;
815    else {
816      switch (ObjectVT) {
817      default: break;
818      case MVT::i8:  ObjSize = 1; break;
819      case MVT::i16: ObjSize = 2; break;
820      case MVT::i32: ObjSize = 4; break;
821      case MVT::i64: ObjSize = 8; break;
822      }
823    }
824    break;
825  case MVT::f32:
826  case MVT::f64:
827  case MVT::v16i8:
828  case MVT::v8i16:
829  case MVT::v4i32:
830  case MVT::v2i64:
831  case MVT::v4f32:
832  case MVT::v2f64:
833    if (NumXMMRegs < 8)
834      ObjXMMRegs = 1;
835    else {
836      switch (ObjectVT) {
837      default: break;
838      case MVT::f32:  ObjSize = 4; break;
839      case MVT::f64:  ObjSize = 8; break;
840      case MVT::v16i8:
841      case MVT::v8i16:
842      case MVT::v4i32:
843      case MVT::v2i64:
844      case MVT::v4f32:
845      case MVT::v2f64: ObjSize = 16; break;
846    }
847    break;
848  }
849  }
850}
851
852SDOperand
853X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
854  unsigned NumArgs = Op.Val->getNumValues() - 1;
855  MachineFunction &MF = DAG.getMachineFunction();
856  MachineFrameInfo *MFI = MF.getFrameInfo();
857  SDOperand Root = Op.getOperand(0);
858  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
859  std::vector<SDOperand> ArgValues;
860
861  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
862  // the stack frame looks like this:
863  //
864  // [RSP] -- return address
865  // [RSP + 8] -- first nonreg argument (leftmost lexically)
866  // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size
867  //    ...
868  //
869  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
870  unsigned NumIntRegs = 0;  // Int regs used for parameter passing.
871  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
872
873  static const unsigned GPR8ArgRegs[] = {
874    X86::DIL, X86::SIL, X86::DL,  X86::CL,  X86::R8B, X86::R9B
875  };
876  static const unsigned GPR16ArgRegs[] = {
877    X86::DI,  X86::SI,  X86::DX,  X86::CX,  X86::R8W, X86::R9W
878  };
879  static const unsigned GPR32ArgRegs[] = {
880    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
881  };
882  static const unsigned GPR64ArgRegs[] = {
883    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
884  };
885  static const unsigned XMMArgRegs[] = {
886    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
887    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
888  };
889
890  for (unsigned i = 0; i < NumArgs; ++i) {
891    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
892    unsigned ArgIncrement = 8;
893    unsigned ObjSize = 0;
894    unsigned ObjIntRegs = 0;
895    unsigned ObjXMMRegs = 0;
896
897    // FIXME: __int128 and long double support?
898    HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
899                               ObjSize, ObjIntRegs, ObjXMMRegs);
900    if (ObjSize > 8)
901      ArgIncrement = ObjSize;
902
903    unsigned Reg = 0;
904    SDOperand ArgValue;
905    if (ObjIntRegs || ObjXMMRegs) {
906      switch (ObjectVT) {
907      default: assert(0 && "Unhandled argument type!");
908      case MVT::i8:
909      case MVT::i16:
910      case MVT::i32:
911      case MVT::i64: {
912        TargetRegisterClass *RC = NULL;
913        switch (ObjectVT) {
914        default: break;
915        case MVT::i8:
916          RC = X86::GR8RegisterClass;
917          Reg = GPR8ArgRegs[NumIntRegs];
918          break;
919        case MVT::i16:
920          RC = X86::GR16RegisterClass;
921          Reg = GPR16ArgRegs[NumIntRegs];
922          break;
923        case MVT::i32:
924          RC = X86::GR32RegisterClass;
925          Reg = GPR32ArgRegs[NumIntRegs];
926          break;
927        case MVT::i64:
928          RC = X86::GR64RegisterClass;
929          Reg = GPR64ArgRegs[NumIntRegs];
930          break;
931        }
932        Reg = AddLiveIn(MF, Reg, RC);
933        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
934        break;
935      }
936      case MVT::f32:
937      case MVT::f64:
938      case MVT::v16i8:
939      case MVT::v8i16:
940      case MVT::v4i32:
941      case MVT::v2i64:
942      case MVT::v4f32:
943      case MVT::v2f64: {
944        TargetRegisterClass *RC= (ObjectVT == MVT::f32) ?
945          X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ?
946                              X86::FR64RegisterClass : X86::VR128RegisterClass);
947        Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC);
948        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
949        break;
950      }
951      }
952      NumIntRegs += ObjIntRegs;
953      NumXMMRegs += ObjXMMRegs;
954    } else if (ObjSize) {
955      // XMM arguments have to be aligned on 16-byte boundary.
956      if (ObjSize == 16)
957        ArgOffset = ((ArgOffset + 15) / 16) * 16;
958      // Create the SelectionDAG nodes corresponding to a load from this
959      // parameter.
960      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
961      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
962      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
963                             DAG.getSrcValue(NULL));
964      ArgOffset += ArgIncrement;   // Move on to the next argument.
965    }
966
967    ArgValues.push_back(ArgValue);
968  }
969
970  // If the function takes variable number of arguments, make a frame index for
971  // the start of the first vararg value... for expansion of llvm.va_start.
972  if (isVarArg) {
973    // For X86-64, if there are vararg parameters that are passed via
974    // registers, then we must store them to their spots on the stack so they
975    // may be loaded by deferencing the result of va_next.
976    VarArgsGPOffset = NumIntRegs * 8;
977    VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
978    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
979    RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
980
981    // Store the integer parameter registers.
982    std::vector<SDOperand> MemOps;
983    SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
984    SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
985                              DAG.getConstant(VarArgsGPOffset, getPointerTy()));
986    for (; NumIntRegs != 6; ++NumIntRegs) {
987      unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
988                                X86::GR64RegisterClass);
989      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
990      SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
991                                    Val, FIN, DAG.getSrcValue(NULL));
992      MemOps.push_back(Store);
993      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
994                        DAG.getConstant(8, getPointerTy()));
995    }
996
997    // Now store the XMM (fp + vector) parameter registers.
998    FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
999                      DAG.getConstant(VarArgsFPOffset, getPointerTy()));
1000    for (; NumXMMRegs != 8; ++NumXMMRegs) {
1001      unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
1002                                X86::VR128RegisterClass);
1003      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
1004      SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
1005                                    Val, FIN, DAG.getSrcValue(NULL));
1006      MemOps.push_back(Store);
1007      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1008                        DAG.getConstant(16, getPointerTy()));
1009    }
1010    if (!MemOps.empty())
1011        Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
1012                           &MemOps[0], MemOps.size());
1013  }
1014
1015  ArgValues.push_back(Root);
1016
1017  ReturnAddrIndex = 0;     // No return address slot generated yet.
1018  BytesToPopOnReturn = 0;  // Callee pops nothing.
1019  BytesCallerReserves = ArgOffset;
1020
1021  // Return the new list of results.
1022  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
1023                                     Op.Val->value_end());
1024  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
1025}
1026
1027SDOperand
1028X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) {
1029  SDOperand Chain     = Op.getOperand(0);
1030  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
1031  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1032  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1033  SDOperand Callee    = Op.getOperand(4);
1034  MVT::ValueType RetVT= Op.Val->getValueType(0);
1035  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1036
1037  // Count how many bytes are to be pushed on the stack.
1038  unsigned NumBytes = 0;
1039  unsigned NumIntRegs = 0;  // Int regs used for parameter passing.
1040  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1041
1042  static const unsigned GPR8ArgRegs[] = {
1043    X86::DIL, X86::SIL, X86::DL,  X86::CL,  X86::R8B, X86::R9B
1044  };
1045  static const unsigned GPR16ArgRegs[] = {
1046    X86::DI,  X86::SI,  X86::DX,  X86::CX,  X86::R8W, X86::R9W
1047  };
1048  static const unsigned GPR32ArgRegs[] = {
1049    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
1050  };
1051  static const unsigned GPR64ArgRegs[] = {
1052    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
1053  };
1054  static const unsigned XMMArgRegs[] = {
1055    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1056    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1057  };
1058
1059  for (unsigned i = 0; i != NumOps; ++i) {
1060    SDOperand Arg = Op.getOperand(5+2*i);
1061    MVT::ValueType ArgVT = Arg.getValueType();
1062
1063    switch (ArgVT) {
1064    default: assert(0 && "Unknown value type!");
1065    case MVT::i8:
1066    case MVT::i16:
1067    case MVT::i32:
1068    case MVT::i64:
1069      if (NumIntRegs < 6)
1070        ++NumIntRegs;
1071      else
1072        NumBytes += 8;
1073      break;
1074    case MVT::f32:
1075    case MVT::f64:
1076    case MVT::v16i8:
1077    case MVT::v8i16:
1078    case MVT::v4i32:
1079    case MVT::v2i64:
1080    case MVT::v4f32:
1081    case MVT::v2f64:
1082      if (NumXMMRegs < 8)
1083        NumXMMRegs++;
1084      else if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
1085        NumBytes += 8;
1086      else {
1087        // XMM arguments have to be aligned on 16-byte boundary.
1088        NumBytes = ((NumBytes + 15) / 16) * 16;
1089        NumBytes += 16;
1090      }
1091      break;
1092    }
1093  }
1094
1095  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1096
1097  // Arguments go on the stack in reverse order, as specified by the ABI.
1098  unsigned ArgOffset = 0;
1099  NumIntRegs = 0;
1100  NumXMMRegs = 0;
1101  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1102  std::vector<SDOperand> MemOpChains;
1103  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1104  for (unsigned i = 0; i != NumOps; ++i) {
1105    SDOperand Arg = Op.getOperand(5+2*i);
1106    MVT::ValueType ArgVT = Arg.getValueType();
1107
1108    switch (ArgVT) {
1109    default: assert(0 && "Unexpected ValueType for argument!");
1110    case MVT::i8:
1111    case MVT::i16:
1112    case MVT::i32:
1113    case MVT::i64:
1114      if (NumIntRegs < 6) {
1115        unsigned Reg = 0;
1116        switch (ArgVT) {
1117        default: break;
1118        case MVT::i8:  Reg = GPR8ArgRegs[NumIntRegs];  break;
1119        case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break;
1120        case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break;
1121        case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break;
1122        }
1123        RegsToPass.push_back(std::make_pair(Reg, Arg));
1124        ++NumIntRegs;
1125      } else {
1126        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1127        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1128        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1129                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
1130        ArgOffset += 8;
1131      }
1132      break;
1133    case MVT::f32:
1134    case MVT::f64:
1135    case MVT::v16i8:
1136    case MVT::v8i16:
1137    case MVT::v4i32:
1138    case MVT::v2i64:
1139    case MVT::v4f32:
1140    case MVT::v2f64:
1141      if (NumXMMRegs < 8) {
1142        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
1143        NumXMMRegs++;
1144      } else {
1145        if (ArgVT != MVT::f32 && ArgVT != MVT::f64) {
1146          // XMM arguments have to be aligned on 16-byte boundary.
1147          ArgOffset = ((ArgOffset + 15) / 16) * 16;
1148        }
1149        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1150        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1151        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1152                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
1153        if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
1154          ArgOffset += 8;
1155        else
1156          ArgOffset += 16;
1157      }
1158    }
1159  }
1160
1161  if (!MemOpChains.empty())
1162    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1163                        &MemOpChains[0], MemOpChains.size());
1164
1165  // Build a sequence of copy-to-reg nodes chained together with token chain
1166  // and flag operands which copy the outgoing args into registers.
1167  SDOperand InFlag;
1168  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1169    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1170                             InFlag);
1171    InFlag = Chain.getValue(1);
1172  }
1173
1174  if (isVarArg) {
1175    // From AMD64 ABI document:
1176    // For calls that may call functions that use varargs or stdargs
1177    // (prototype-less calls or calls to functions containing ellipsis (...) in
1178    // the declaration) %al is used as hidden argument to specify the number
1179    // of SSE registers used. The contents of %al do not need to match exactly
1180    // the number of registers, but must be an ubound on the number of SSE
1181    // registers used and is in the range 0 - 8 inclusive.
1182    Chain = DAG.getCopyToReg(Chain, X86::AL,
1183                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
1184    InFlag = Chain.getValue(1);
1185  }
1186
1187  // If the callee is a GlobalAddress node (quite common, every direct call is)
1188  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1189  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1190    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1191  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1192    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1193
1194  std::vector<MVT::ValueType> NodeTys;
1195  NodeTys.push_back(MVT::Other);   // Returns a chain
1196  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1197  std::vector<SDOperand> Ops;
1198  Ops.push_back(Chain);
1199  Ops.push_back(Callee);
1200
1201  // Add argument registers to the end of the list so that they are known live
1202  // into the call.
1203  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1204    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1205                                  RegsToPass[i].second.getValueType()));
1206
1207  if (InFlag.Val)
1208    Ops.push_back(InFlag);
1209
1210  // FIXME: Do not generate X86ISD::TAILCALL for now.
1211  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1212                      NodeTys, &Ops[0], Ops.size());
1213  InFlag = Chain.getValue(1);
1214
1215  NodeTys.clear();
1216  NodeTys.push_back(MVT::Other);   // Returns a chain
1217  if (RetVT != MVT::Other)
1218    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
1219  Ops.clear();
1220  Ops.push_back(Chain);
1221  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1222  Ops.push_back(DAG.getConstant(0, getPointerTy()));
1223  Ops.push_back(InFlag);
1224  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1225  if (RetVT != MVT::Other)
1226    InFlag = Chain.getValue(1);
1227
1228  std::vector<SDOperand> ResultVals;
1229  NodeTys.clear();
1230  switch (RetVT) {
1231  default: assert(0 && "Unknown value type to return!");
1232  case MVT::Other: break;
1233  case MVT::i8:
1234    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
1235    ResultVals.push_back(Chain.getValue(0));
1236    NodeTys.push_back(MVT::i8);
1237    break;
1238  case MVT::i16:
1239    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
1240    ResultVals.push_back(Chain.getValue(0));
1241    NodeTys.push_back(MVT::i16);
1242    break;
1243  case MVT::i32:
1244    Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1245    ResultVals.push_back(Chain.getValue(0));
1246    NodeTys.push_back(MVT::i32);
1247    break;
1248  case MVT::i64:
1249    if (Op.Val->getValueType(1) == MVT::i64) {
1250      // FIXME: __int128 support?
1251      Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
1252      ResultVals.push_back(Chain.getValue(0));
1253      Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64,
1254                                 Chain.getValue(2)).getValue(1);
1255      ResultVals.push_back(Chain.getValue(0));
1256      NodeTys.push_back(MVT::i64);
1257    } else {
1258      Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
1259      ResultVals.push_back(Chain.getValue(0));
1260    }
1261    NodeTys.push_back(MVT::i64);
1262    break;
1263  case MVT::f32:
1264  case MVT::f64:
1265  case MVT::v16i8:
1266  case MVT::v8i16:
1267  case MVT::v4i32:
1268  case MVT::v2i64:
1269  case MVT::v4f32:
1270  case MVT::v2f64:
1271    // FIXME: long double support?
1272    Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
1273    ResultVals.push_back(Chain.getValue(0));
1274    NodeTys.push_back(RetVT);
1275    break;
1276  }
1277
1278  // If the function returns void, just return the chain.
1279  if (ResultVals.empty())
1280    return Chain;
1281
1282  // Otherwise, merge everything together with a MERGE_VALUES node.
1283  NodeTys.push_back(MVT::Other);
1284  ResultVals.push_back(Chain);
1285  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1286                              &ResultVals[0], ResultVals.size());
1287  return Res.getValue(Op.ResNo);
1288}
1289
1290//===----------------------------------------------------------------------===//
1291//                    Fast Calling Convention implementation
1292//===----------------------------------------------------------------------===//
1293//
1294// The X86 'fast' calling convention passes up to two integer arguments in
1295// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
1296// and requires that the callee pop its arguments off the stack (allowing proper
1297// tail calls), and has the same return value conventions as C calling convs.
1298//
1299// This calling convention always arranges for the callee pop value to be 8n+4
1300// bytes, which is needed for tail recursion elimination and stack alignment
1301// reasons.
1302//
1303// Note that this can be enhanced in the future to pass fp vals in registers
1304// (when we have a global fp allocator) and do other tricks.
1305//
1306
1307/// HowToPassFastCCArgument - Returns how an formal argument of the specified
1308/// type should be passed. If it is through stack, returns the size of the stack
1309/// slot; if it is through integer or XMM register, returns the number of
1310/// integer or XMM registers are needed.
1311static void
1312HowToPassFastCCArgument(MVT::ValueType ObjectVT,
1313                        unsigned NumIntRegs, unsigned NumXMMRegs,
1314                        unsigned &ObjSize, unsigned &ObjIntRegs,
1315                        unsigned &ObjXMMRegs) {
1316  ObjSize = 0;
1317  ObjIntRegs = 0;
1318  ObjXMMRegs = 0;
1319
1320  switch (ObjectVT) {
1321  default: assert(0 && "Unhandled argument type!");
1322  case MVT::i8:
1323#if FASTCC_NUM_INT_ARGS_INREGS > 0
1324    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
1325      ObjIntRegs = 1;
1326    else
1327#endif
1328      ObjSize = 1;
1329    break;
1330  case MVT::i16:
1331#if FASTCC_NUM_INT_ARGS_INREGS > 0
1332    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
1333      ObjIntRegs = 1;
1334    else
1335#endif
1336      ObjSize = 2;
1337    break;
1338  case MVT::i32:
1339#if FASTCC_NUM_INT_ARGS_INREGS > 0
1340    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
1341      ObjIntRegs = 1;
1342    else
1343#endif
1344      ObjSize = 4;
1345    break;
1346  case MVT::i64:
1347#if FASTCC_NUM_INT_ARGS_INREGS > 0
1348    if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
1349      ObjIntRegs = 2;
1350    } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
1351      ObjIntRegs = 1;
1352      ObjSize = 4;
1353    } else
1354#endif
1355      ObjSize = 8;
1356  case MVT::f32:
1357    ObjSize = 4;
1358    break;
1359  case MVT::f64:
1360    ObjSize = 8;
1361    break;
1362  case MVT::v16i8:
1363  case MVT::v8i16:
1364  case MVT::v4i32:
1365  case MVT::v2i64:
1366  case MVT::v4f32:
1367  case MVT::v2f64:
1368    if (NumXMMRegs < 4)
1369      ObjXMMRegs = 1;
1370    else
1371      ObjSize = 16;
1372    break;
1373  }
1374}
1375
1376SDOperand
1377X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
1378  unsigned NumArgs = Op.Val->getNumValues()-1;
1379  MachineFunction &MF = DAG.getMachineFunction();
1380  MachineFrameInfo *MFI = MF.getFrameInfo();
1381  SDOperand Root = Op.getOperand(0);
1382  std::vector<SDOperand> ArgValues;
1383
1384  // Add DAG nodes to load the arguments...  On entry to a function the stack
1385  // frame looks like this:
1386  //
1387  // [ESP] -- return address
1388  // [ESP + 4] -- first nonreg argument (leftmost lexically)
1389  // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size
1390  //    ...
1391  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
1392
1393  // Keep track of the number of integer regs passed so far.  This can be either
1394  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
1395  // used).
1396  unsigned NumIntRegs = 0;
1397  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1398
1399  static const unsigned XMMArgRegs[] = {
1400    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1401  };
1402
1403  for (unsigned i = 0; i < NumArgs; ++i) {
1404    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
1405    unsigned ArgIncrement = 4;
1406    unsigned ObjSize = 0;
1407    unsigned ObjIntRegs = 0;
1408    unsigned ObjXMMRegs = 0;
1409
1410    HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
1411                            ObjSize, ObjIntRegs, ObjXMMRegs);
1412    if (ObjSize > 4)
1413      ArgIncrement = ObjSize;
1414
1415    unsigned Reg = 0;
1416    SDOperand ArgValue;
1417    if (ObjIntRegs || ObjXMMRegs) {
1418      switch (ObjectVT) {
1419      default: assert(0 && "Unhandled argument type!");
1420      case MVT::i8:
1421        Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
1422                        X86::GR8RegisterClass);
1423        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8);
1424        break;
1425      case MVT::i16:
1426        Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
1427                        X86::GR16RegisterClass);
1428        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16);
1429        break;
1430      case MVT::i32:
1431        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
1432                        X86::GR32RegisterClass);
1433        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
1434        break;
1435      case MVT::i64:
1436        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
1437                        X86::GR32RegisterClass);
1438        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
1439        if (ObjIntRegs == 2) {
1440          Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass);
1441          SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32);
1442          ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
1443        }
1444        break;
1445      case MVT::v16i8:
1446      case MVT::v8i16:
1447      case MVT::v4i32:
1448      case MVT::v2i64:
1449      case MVT::v4f32:
1450      case MVT::v2f64:
1451        Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
1452        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
1453        break;
1454      }
1455      NumIntRegs += ObjIntRegs;
1456      NumXMMRegs += ObjXMMRegs;
1457    }
1458
1459    if (ObjSize) {
1460      // XMM arguments have to be aligned on 16-byte boundary.
1461      if (ObjSize == 16)
1462        ArgOffset = ((ArgOffset + 15) / 16) * 16;
1463      // Create the SelectionDAG nodes corresponding to a load from this
1464      // parameter.
1465      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1466      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
1467      if (ObjectVT == MVT::i64 && ObjIntRegs) {
1468        SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
1469                                          DAG.getSrcValue(NULL));
1470        ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
1471      } else
1472        ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
1473                               DAG.getSrcValue(NULL));
1474      ArgOffset += ArgIncrement;   // Move on to the next argument.
1475    }
1476
1477    ArgValues.push_back(ArgValue);
1478  }
1479
1480  ArgValues.push_back(Root);
1481
1482  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1483  // arguments and the arguments after the retaddr has been pushed are aligned.
1484  if ((ArgOffset & 7) == 0)
1485    ArgOffset += 4;
1486
1487  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
1488  RegSaveFrameIndex = 0xAAAAAAA;   // X86-64 only.
1489  ReturnAddrIndex = 0;             // No return address slot generated yet.
1490  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
1491  BytesCallerReserves = 0;
1492
1493  // Finally, inform the code generator which regs we return values in.
1494  switch (getValueType(MF.getFunction()->getReturnType())) {
1495  default: assert(0 && "Unknown type!");
1496  case MVT::isVoid: break;
1497  case MVT::i8:
1498  case MVT::i16:
1499  case MVT::i32:
1500    MF.addLiveOut(X86::EAX);
1501    break;
1502  case MVT::i64:
1503    MF.addLiveOut(X86::EAX);
1504    MF.addLiveOut(X86::EDX);
1505    break;
1506  case MVT::f32:
1507  case MVT::f64:
1508    MF.addLiveOut(X86::ST0);
1509    break;
1510  case MVT::v16i8:
1511  case MVT::v8i16:
1512  case MVT::v4i32:
1513  case MVT::v2i64:
1514  case MVT::v4f32:
1515  case MVT::v2f64:
1516    MF.addLiveOut(X86::XMM0);
1517    break;
1518  }
1519
1520  // Return the new list of results.
1521  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
1522                                     Op.Val->value_end());
1523  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
1524}
1525
1526SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG){
1527  SDOperand Chain     = Op.getOperand(0);
1528  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
1529  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1530  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1531  SDOperand Callee    = Op.getOperand(4);
1532  MVT::ValueType RetVT= Op.Val->getValueType(0);
1533  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1534
1535  // Count how many bytes are to be pushed on the stack.
1536  unsigned NumBytes = 0;
1537
1538  // Keep track of the number of integer regs passed so far.  This can be either
1539  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
1540  // used).
1541  unsigned NumIntRegs = 0;
1542  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1543
1544  static const unsigned GPRArgRegs[][2] = {
1545    { X86::AL,  X86::DL },
1546    { X86::AX,  X86::DX },
1547    { X86::EAX, X86::EDX }
1548  };
1549  static const unsigned XMMArgRegs[] = {
1550    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1551  };
1552
1553  for (unsigned i = 0; i != NumOps; ++i) {
1554    SDOperand Arg = Op.getOperand(5+2*i);
1555
1556    switch (Arg.getValueType()) {
1557    default: assert(0 && "Unknown value type!");
1558    case MVT::i8:
1559    case MVT::i16:
1560    case MVT::i32:
1561#if FASTCC_NUM_INT_ARGS_INREGS > 0
1562      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1563        ++NumIntRegs;
1564        break;
1565      }
1566#endif
1567      // Fall through
1568    case MVT::f32:
1569      NumBytes += 4;
1570      break;
1571    case MVT::f64:
1572      NumBytes += 8;
1573      break;
1574    case MVT::v16i8:
1575    case MVT::v8i16:
1576    case MVT::v4i32:
1577    case MVT::v2i64:
1578    case MVT::v4f32:
1579    case MVT::v2f64:
1580      if (NumXMMRegs < 4)
1581        NumXMMRegs++;
1582      else {
1583        // XMM arguments have to be aligned on 16-byte boundary.
1584        NumBytes = ((NumBytes + 15) / 16) * 16;
1585        NumBytes += 16;
1586      }
1587      break;
1588    }
1589  }
1590
1591  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1592  // arguments and the arguments after the retaddr has been pushed are aligned.
1593  if ((NumBytes & 7) == 0)
1594    NumBytes += 4;
1595
1596  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1597
1598  // Arguments go on the stack in reverse order, as specified by the ABI.
1599  unsigned ArgOffset = 0;
1600  NumIntRegs = 0;
1601  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1602  std::vector<SDOperand> MemOpChains;
1603  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1604  for (unsigned i = 0; i != NumOps; ++i) {
1605    SDOperand Arg = Op.getOperand(5+2*i);
1606
1607    switch (Arg.getValueType()) {
1608    default: assert(0 && "Unexpected ValueType for argument!");
1609    case MVT::i8:
1610    case MVT::i16:
1611    case MVT::i32:
1612#if FASTCC_NUM_INT_ARGS_INREGS > 0
1613      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1614        RegsToPass.push_back(
1615              std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs],
1616                             Arg));
1617        ++NumIntRegs;
1618        break;
1619      }
1620#endif
1621      // Fall through
1622    case MVT::f32: {
1623      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1624      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1625      MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1626                                        Arg, PtrOff, DAG.getSrcValue(NULL)));
1627      ArgOffset += 4;
1628      break;
1629    }
1630    case MVT::f64: {
1631      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1632      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1633      MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1634                                        Arg, PtrOff, DAG.getSrcValue(NULL)));
1635      ArgOffset += 8;
1636      break;
1637    }
1638    case MVT::v16i8:
1639    case MVT::v8i16:
1640    case MVT::v4i32:
1641    case MVT::v2i64:
1642    case MVT::v4f32:
1643    case MVT::v2f64:
1644      if (NumXMMRegs < 4) {
1645        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
1646        NumXMMRegs++;
1647      } else {
1648        // XMM arguments have to be aligned on 16-byte boundary.
1649        ArgOffset = ((ArgOffset + 15) / 16) * 16;
1650        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1651        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1652        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1653                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
1654        ArgOffset += 16;
1655      }
1656    }
1657  }
1658
1659  if (!MemOpChains.empty())
1660    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1661                        &MemOpChains[0], MemOpChains.size());
1662
1663  // Build a sequence of copy-to-reg nodes chained together with token chain
1664  // and flag operands which copy the outgoing args into registers.
1665  SDOperand InFlag;
1666  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1667    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1668                             InFlag);
1669    InFlag = Chain.getValue(1);
1670  }
1671
1672  // If the callee is a GlobalAddress node (quite common, every direct call is)
1673  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1674  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1675    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1676  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1677    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1678
1679  std::vector<MVT::ValueType> NodeTys;
1680  NodeTys.push_back(MVT::Other);   // Returns a chain
1681  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1682  std::vector<SDOperand> Ops;
1683  Ops.push_back(Chain);
1684  Ops.push_back(Callee);
1685
1686  // Add argument registers to the end of the list so that they are known live
1687  // into the call.
1688  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1689    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1690                                  RegsToPass[i].second.getValueType()));
1691
1692  if (InFlag.Val)
1693    Ops.push_back(InFlag);
1694
1695  // FIXME: Do not generate X86ISD::TAILCALL for now.
1696  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1697                      NodeTys, &Ops[0], Ops.size());
1698  InFlag = Chain.getValue(1);
1699
1700  NodeTys.clear();
1701  NodeTys.push_back(MVT::Other);   // Returns a chain
1702  if (RetVT != MVT::Other)
1703    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
1704  Ops.clear();
1705  Ops.push_back(Chain);
1706  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1707  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1708  Ops.push_back(InFlag);
1709  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1710  if (RetVT != MVT::Other)
1711    InFlag = Chain.getValue(1);
1712
1713  std::vector<SDOperand> ResultVals;
1714  NodeTys.clear();
1715  switch (RetVT) {
1716  default: assert(0 && "Unknown value type to return!");
1717  case MVT::Other: break;
1718  case MVT::i8:
1719    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
1720    ResultVals.push_back(Chain.getValue(0));
1721    NodeTys.push_back(MVT::i8);
1722    break;
1723  case MVT::i16:
1724    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
1725    ResultVals.push_back(Chain.getValue(0));
1726    NodeTys.push_back(MVT::i16);
1727    break;
1728  case MVT::i32:
1729    if (Op.Val->getValueType(1) == MVT::i32) {
1730      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1731      ResultVals.push_back(Chain.getValue(0));
1732      Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
1733                                 Chain.getValue(2)).getValue(1);
1734      ResultVals.push_back(Chain.getValue(0));
1735      NodeTys.push_back(MVT::i32);
1736    } else {
1737      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1738      ResultVals.push_back(Chain.getValue(0));
1739    }
1740    NodeTys.push_back(MVT::i32);
1741    break;
1742  case MVT::v16i8:
1743  case MVT::v8i16:
1744  case MVT::v4i32:
1745  case MVT::v2i64:
1746  case MVT::v4f32:
1747  case MVT::v2f64:
1748    Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
1749    ResultVals.push_back(Chain.getValue(0));
1750    NodeTys.push_back(RetVT);
1751    break;
1752  case MVT::f32:
1753  case MVT::f64: {
1754    std::vector<MVT::ValueType> Tys;
1755    Tys.push_back(MVT::f64);
1756    Tys.push_back(MVT::Other);
1757    Tys.push_back(MVT::Flag);
1758    std::vector<SDOperand> Ops;
1759    Ops.push_back(Chain);
1760    Ops.push_back(InFlag);
1761    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys,
1762                                   &Ops[0], Ops.size());
1763    Chain  = RetVal.getValue(1);
1764    InFlag = RetVal.getValue(2);
1765    if (X86ScalarSSE) {
1766      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
1767      // shouldn't be necessary except that RFP cannot be live across
1768      // multiple blocks. When stackifier is fixed, they can be uncoupled.
1769      MachineFunction &MF = DAG.getMachineFunction();
1770      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
1771      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
1772      Tys.clear();
1773      Tys.push_back(MVT::Other);
1774      Ops.clear();
1775      Ops.push_back(Chain);
1776      Ops.push_back(RetVal);
1777      Ops.push_back(StackSlot);
1778      Ops.push_back(DAG.getValueType(RetVT));
1779      Ops.push_back(InFlag);
1780      Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
1781      RetVal = DAG.getLoad(RetVT, Chain, StackSlot,
1782                           DAG.getSrcValue(NULL));
1783      Chain = RetVal.getValue(1);
1784    }
1785
1786    if (RetVT == MVT::f32 && !X86ScalarSSE)
1787      // FIXME: we would really like to remember that this FP_ROUND
1788      // operation is okay to eliminate if we allow excess FP precision.
1789      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
1790    ResultVals.push_back(RetVal);
1791    NodeTys.push_back(RetVT);
1792    break;
1793  }
1794  }
1795
1796
1797  // If the function returns void, just return the chain.
1798  if (ResultVals.empty())
1799    return Chain;
1800
1801  // Otherwise, merge everything together with a MERGE_VALUES node.
1802  NodeTys.push_back(MVT::Other);
1803  ResultVals.push_back(Chain);
1804  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1805                              &ResultVals[0], ResultVals.size());
1806  return Res.getValue(Op.ResNo);
1807}
1808
1809SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
1810  if (ReturnAddrIndex == 0) {
1811    // Set up a frame object for the return address.
1812    MachineFunction &MF = DAG.getMachineFunction();
1813    if (Subtarget->is64Bit())
1814      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
1815    else
1816      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1817  }
1818
1819  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
1820}
1821
1822
1823
1824std::pair<SDOperand, SDOperand> X86TargetLowering::
1825LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
1826                        SelectionDAG &DAG) {
1827  SDOperand Result;
1828  if (Depth)        // Depths > 0 not supported yet!
1829    Result = DAG.getConstant(0, getPointerTy());
1830  else {
1831    SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
1832    if (!isFrameAddress)
1833      // Just load the return address
1834      Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI,
1835                           DAG.getSrcValue(NULL));
1836    else
1837      Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
1838                           DAG.getConstant(4, getPointerTy()));
1839  }
1840  return std::make_pair(Result, Chain);
1841}
1842
1843/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode
1844/// which corresponds to the condition code.
1845static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) {
1846  switch (X86CC) {
1847  default: assert(0 && "Unknown X86 conditional code!");
1848  case X86ISD::COND_A:  return X86::JA;
1849  case X86ISD::COND_AE: return X86::JAE;
1850  case X86ISD::COND_B:  return X86::JB;
1851  case X86ISD::COND_BE: return X86::JBE;
1852  case X86ISD::COND_E:  return X86::JE;
1853  case X86ISD::COND_G:  return X86::JG;
1854  case X86ISD::COND_GE: return X86::JGE;
1855  case X86ISD::COND_L:  return X86::JL;
1856  case X86ISD::COND_LE: return X86::JLE;
1857  case X86ISD::COND_NE: return X86::JNE;
1858  case X86ISD::COND_NO: return X86::JNO;
1859  case X86ISD::COND_NP: return X86::JNP;
1860  case X86ISD::COND_NS: return X86::JNS;
1861  case X86ISD::COND_O:  return X86::JO;
1862  case X86ISD::COND_P:  return X86::JP;
1863  case X86ISD::COND_S:  return X86::JS;
1864  }
1865}
1866
1867/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1868/// specific condition code. It returns a false if it cannot do a direct
1869/// translation. X86CC is the translated CondCode.  LHS/RHS are modified as
1870/// needed.
1871static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
1872                           unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
1873                           SelectionDAG &DAG) {
1874  X86CC = X86ISD::COND_INVALID;
1875  if (!isFP) {
1876    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1877      if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
1878        // X > -1   -> X == 0, jump !sign.
1879        RHS = DAG.getConstant(0, RHS.getValueType());
1880        X86CC = X86ISD::COND_NS;
1881        return true;
1882      } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
1883        // X < 0   -> X == 0, jump on sign.
1884        X86CC = X86ISD::COND_S;
1885        return true;
1886      }
1887    }
1888
1889    switch (SetCCOpcode) {
1890    default: break;
1891    case ISD::SETEQ:  X86CC = X86ISD::COND_E;  break;
1892    case ISD::SETGT:  X86CC = X86ISD::COND_G;  break;
1893    case ISD::SETGE:  X86CC = X86ISD::COND_GE; break;
1894    case ISD::SETLT:  X86CC = X86ISD::COND_L;  break;
1895    case ISD::SETLE:  X86CC = X86ISD::COND_LE; break;
1896    case ISD::SETNE:  X86CC = X86ISD::COND_NE; break;
1897    case ISD::SETULT: X86CC = X86ISD::COND_B;  break;
1898    case ISD::SETUGT: X86CC = X86ISD::COND_A;  break;
1899    case ISD::SETULE: X86CC = X86ISD::COND_BE; break;
1900    case ISD::SETUGE: X86CC = X86ISD::COND_AE; break;
1901    }
1902  } else {
1903    // On a floating point condition, the flags are set as follows:
1904    // ZF  PF  CF   op
1905    //  0 | 0 | 0 | X > Y
1906    //  0 | 0 | 1 | X < Y
1907    //  1 | 0 | 0 | X == Y
1908    //  1 | 1 | 1 | unordered
1909    bool Flip = false;
1910    switch (SetCCOpcode) {
1911    default: break;
1912    case ISD::SETUEQ:
1913    case ISD::SETEQ: X86CC = X86ISD::COND_E;  break;
1914    case ISD::SETOLT: Flip = true; // Fallthrough
1915    case ISD::SETOGT:
1916    case ISD::SETGT: X86CC = X86ISD::COND_A;  break;
1917    case ISD::SETOLE: Flip = true; // Fallthrough
1918    case ISD::SETOGE:
1919    case ISD::SETGE: X86CC = X86ISD::COND_AE; break;
1920    case ISD::SETUGT: Flip = true; // Fallthrough
1921    case ISD::SETULT:
1922    case ISD::SETLT: X86CC = X86ISD::COND_B;  break;
1923    case ISD::SETUGE: Flip = true; // Fallthrough
1924    case ISD::SETULE:
1925    case ISD::SETLE: X86CC = X86ISD::COND_BE; break;
1926    case ISD::SETONE:
1927    case ISD::SETNE: X86CC = X86ISD::COND_NE; break;
1928    case ISD::SETUO: X86CC = X86ISD::COND_P;  break;
1929    case ISD::SETO:  X86CC = X86ISD::COND_NP; break;
1930    }
1931    if (Flip)
1932      std::swap(LHS, RHS);
1933  }
1934
1935  return X86CC != X86ISD::COND_INVALID;
1936}
1937
1938/// hasFPCMov - is there a floating point cmov for the specific X86 condition
1939/// code. Current x86 isa includes the following FP cmov instructions:
1940/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1941static bool hasFPCMov(unsigned X86CC) {
1942  switch (X86CC) {
1943  default:
1944    return false;
1945  case X86ISD::COND_B:
1946  case X86ISD::COND_BE:
1947  case X86ISD::COND_E:
1948  case X86ISD::COND_P:
1949  case X86ISD::COND_A:
1950  case X86ISD::COND_AE:
1951  case X86ISD::COND_NE:
1952  case X86ISD::COND_NP:
1953    return true;
1954  }
1955}
1956
1957/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra
1958/// load. For Darwin, external and weak symbols are indirect, loading the value
1959/// at address GV rather then the value of GV itself. This means that the
1960/// GlobalAddress must be in the base or index register of the address, not the
1961/// GV offset field.
1962static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) {
1963  return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
1964          (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
1965}
1966
1967/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
1968/// true if Op is undef or if its value falls within the specified range (L, H].
1969static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
1970  if (Op.getOpcode() == ISD::UNDEF)
1971    return true;
1972
1973  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
1974  return (Val >= Low && Val < Hi);
1975}
1976
1977/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
1978/// true if Op is undef or if its value equal to the specified value.
1979static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
1980  if (Op.getOpcode() == ISD::UNDEF)
1981    return true;
1982  return cast<ConstantSDNode>(Op)->getValue() == Val;
1983}
1984
1985/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1986/// specifies a shuffle of elements that is suitable for input to PSHUFD.
1987bool X86::isPSHUFDMask(SDNode *N) {
1988  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1989
1990  if (N->getNumOperands() != 4)
1991    return false;
1992
1993  // Check if the value doesn't reference the second vector.
1994  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1995    SDOperand Arg = N->getOperand(i);
1996    if (Arg.getOpcode() == ISD::UNDEF) continue;
1997    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1998    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
1999      return false;
2000  }
2001
2002  return true;
2003}
2004
2005/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
2006/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
2007bool X86::isPSHUFHWMask(SDNode *N) {
2008  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2009
2010  if (N->getNumOperands() != 8)
2011    return false;
2012
2013  // Lower quadword copied in order.
2014  for (unsigned i = 0; i != 4; ++i) {
2015    SDOperand Arg = N->getOperand(i);
2016    if (Arg.getOpcode() == ISD::UNDEF) continue;
2017    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2018    if (cast<ConstantSDNode>(Arg)->getValue() != i)
2019      return false;
2020  }
2021
2022  // Upper quadword shuffled.
2023  for (unsigned i = 4; i != 8; ++i) {
2024    SDOperand Arg = N->getOperand(i);
2025    if (Arg.getOpcode() == ISD::UNDEF) continue;
2026    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2027    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2028    if (Val < 4 || Val > 7)
2029      return false;
2030  }
2031
2032  return true;
2033}
2034
2035/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
2036/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
2037bool X86::isPSHUFLWMask(SDNode *N) {
2038  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2039
2040  if (N->getNumOperands() != 8)
2041    return false;
2042
2043  // Upper quadword copied in order.
2044  for (unsigned i = 4; i != 8; ++i)
2045    if (!isUndefOrEqual(N->getOperand(i), i))
2046      return false;
2047
2048  // Lower quadword shuffled.
2049  for (unsigned i = 0; i != 4; ++i)
2050    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
2051      return false;
2052
2053  return true;
2054}
2055
2056/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
2057/// specifies a shuffle of elements that is suitable for input to SHUFP*.
2058static bool isSHUFPMask(std::vector<SDOperand> &N) {
2059  unsigned NumElems = N.size();
2060  if (NumElems != 2 && NumElems != 4) return false;
2061
2062  unsigned Half = NumElems / 2;
2063  for (unsigned i = 0; i < Half; ++i)
2064    if (!isUndefOrInRange(N[i], 0, NumElems))
2065      return false;
2066  for (unsigned i = Half; i < NumElems; ++i)
2067    if (!isUndefOrInRange(N[i], NumElems, NumElems*2))
2068      return false;
2069
2070  return true;
2071}
2072
2073bool X86::isSHUFPMask(SDNode *N) {
2074  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2075  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2076  return ::isSHUFPMask(Ops);
2077}
2078
2079/// isCommutedSHUFP - Returns true if the shuffle mask is except
2080/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
2081/// half elements to come from vector 1 (which would equal the dest.) and
2082/// the upper half to come from vector 2.
2083static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) {
2084  unsigned NumElems = Ops.size();
2085  if (NumElems != 2 && NumElems != 4) return false;
2086
2087  unsigned Half = NumElems / 2;
2088  for (unsigned i = 0; i < Half; ++i)
2089    if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2))
2090      return false;
2091  for (unsigned i = Half; i < NumElems; ++i)
2092    if (!isUndefOrInRange(Ops[i], 0, NumElems))
2093      return false;
2094  return true;
2095}
2096
2097static bool isCommutedSHUFP(SDNode *N) {
2098  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2099  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2100  return isCommutedSHUFP(Ops);
2101}
2102
2103/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
2104/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
2105bool X86::isMOVHLPSMask(SDNode *N) {
2106  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2107
2108  if (N->getNumOperands() != 4)
2109    return false;
2110
2111  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
2112  return isUndefOrEqual(N->getOperand(0), 6) &&
2113         isUndefOrEqual(N->getOperand(1), 7) &&
2114         isUndefOrEqual(N->getOperand(2), 2) &&
2115         isUndefOrEqual(N->getOperand(3), 3);
2116}
2117
2118/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
2119/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
2120bool X86::isMOVLPMask(SDNode *N) {
2121  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2122
2123  unsigned NumElems = N->getNumOperands();
2124  if (NumElems != 2 && NumElems != 4)
2125    return false;
2126
2127  for (unsigned i = 0; i < NumElems/2; ++i)
2128    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
2129      return false;
2130
2131  for (unsigned i = NumElems/2; i < NumElems; ++i)
2132    if (!isUndefOrEqual(N->getOperand(i), i))
2133      return false;
2134
2135  return true;
2136}
2137
2138/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
2139/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
2140/// and MOVLHPS.
2141bool X86::isMOVHPMask(SDNode *N) {
2142  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2143
2144  unsigned NumElems = N->getNumOperands();
2145  if (NumElems != 2 && NumElems != 4)
2146    return false;
2147
2148  for (unsigned i = 0; i < NumElems/2; ++i)
2149    if (!isUndefOrEqual(N->getOperand(i), i))
2150      return false;
2151
2152  for (unsigned i = 0; i < NumElems/2; ++i) {
2153    SDOperand Arg = N->getOperand(i + NumElems/2);
2154    if (!isUndefOrEqual(Arg, i + NumElems))
2155      return false;
2156  }
2157
2158  return true;
2159}
2160
2161/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
2162/// specifies a shuffle of elements that is suitable for input to UNPCKL.
2163bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
2164  unsigned NumElems = N.size();
2165  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2166    return false;
2167
2168  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2169    SDOperand BitI  = N[i];
2170    SDOperand BitI1 = N[i+1];
2171    if (!isUndefOrEqual(BitI, j))
2172      return false;
2173    if (V2IsSplat) {
2174      if (isUndefOrEqual(BitI1, NumElems))
2175        return false;
2176    } else {
2177      if (!isUndefOrEqual(BitI1, j + NumElems))
2178        return false;
2179    }
2180  }
2181
2182  return true;
2183}
2184
2185bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
2186  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2187  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2188  return ::isUNPCKLMask(Ops, V2IsSplat);
2189}
2190
2191/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
2192/// specifies a shuffle of elements that is suitable for input to UNPCKH.
2193bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
2194  unsigned NumElems = N.size();
2195  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2196    return false;
2197
2198  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2199    SDOperand BitI  = N[i];
2200    SDOperand BitI1 = N[i+1];
2201    if (!isUndefOrEqual(BitI, j + NumElems/2))
2202      return false;
2203    if (V2IsSplat) {
2204      if (isUndefOrEqual(BitI1, NumElems))
2205        return false;
2206    } else {
2207      if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems))
2208        return false;
2209    }
2210  }
2211
2212  return true;
2213}
2214
2215bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
2216  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2217  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2218  return ::isUNPCKHMask(Ops, V2IsSplat);
2219}
2220
2221/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
2222/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
2223/// <0, 0, 1, 1>
2224bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
2225  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2226
2227  unsigned NumElems = N->getNumOperands();
2228  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
2229    return false;
2230
2231  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2232    SDOperand BitI  = N->getOperand(i);
2233    SDOperand BitI1 = N->getOperand(i+1);
2234
2235    if (!isUndefOrEqual(BitI, j))
2236      return false;
2237    if (!isUndefOrEqual(BitI1, j))
2238      return false;
2239  }
2240
2241  return true;
2242}
2243
2244/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
2245/// specifies a shuffle of elements that is suitable for input to MOVSS,
2246/// MOVSD, and MOVD, i.e. setting the lowest element.
2247static bool isMOVLMask(std::vector<SDOperand> &N) {
2248  unsigned NumElems = N.size();
2249  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2250    return false;
2251
2252  if (!isUndefOrEqual(N[0], NumElems))
2253    return false;
2254
2255  for (unsigned i = 1; i < NumElems; ++i) {
2256    SDOperand Arg = N[i];
2257    if (!isUndefOrEqual(Arg, i))
2258      return false;
2259  }
2260
2261  return true;
2262}
2263
2264bool X86::isMOVLMask(SDNode *N) {
2265  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2266  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2267  return ::isMOVLMask(Ops);
2268}
2269
2270/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
2271/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
2272/// element of vector 2 and the other elements to come from vector 1 in order.
2273static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false,
2274                           bool V2IsUndef = false) {
2275  unsigned NumElems = Ops.size();
2276  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2277    return false;
2278
2279  if (!isUndefOrEqual(Ops[0], 0))
2280    return false;
2281
2282  for (unsigned i = 1; i < NumElems; ++i) {
2283    SDOperand Arg = Ops[i];
2284    if (!(isUndefOrEqual(Arg, i+NumElems) ||
2285          (V2IsUndef && isUndefOrInRange(Arg, NumElems, NumElems*2)) ||
2286          (V2IsSplat && isUndefOrEqual(Arg, NumElems))))
2287      return false;
2288  }
2289
2290  return true;
2291}
2292
2293static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
2294                           bool V2IsUndef = false) {
2295  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2296  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2297  return isCommutedMOVL(Ops, V2IsSplat, V2IsUndef);
2298}
2299
2300/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2301/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
2302bool X86::isMOVSHDUPMask(SDNode *N) {
2303  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2304
2305  if (N->getNumOperands() != 4)
2306    return false;
2307
2308  // Expect 1, 1, 3, 3
2309  for (unsigned i = 0; i < 2; ++i) {
2310    SDOperand Arg = N->getOperand(i);
2311    if (Arg.getOpcode() == ISD::UNDEF) continue;
2312    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2313    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2314    if (Val != 1) return false;
2315  }
2316
2317  bool HasHi = false;
2318  for (unsigned i = 2; i < 4; ++i) {
2319    SDOperand Arg = N->getOperand(i);
2320    if (Arg.getOpcode() == ISD::UNDEF) continue;
2321    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2322    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2323    if (Val != 3) return false;
2324    HasHi = true;
2325  }
2326
2327  // Don't use movshdup if it can be done with a shufps.
2328  return HasHi;
2329}
2330
2331/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2332/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
2333bool X86::isMOVSLDUPMask(SDNode *N) {
2334  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2335
2336  if (N->getNumOperands() != 4)
2337    return false;
2338
2339  // Expect 0, 0, 2, 2
2340  for (unsigned i = 0; i < 2; ++i) {
2341    SDOperand Arg = N->getOperand(i);
2342    if (Arg.getOpcode() == ISD::UNDEF) continue;
2343    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2344    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2345    if (Val != 0) return false;
2346  }
2347
2348  bool HasHi = false;
2349  for (unsigned i = 2; i < 4; ++i) {
2350    SDOperand Arg = N->getOperand(i);
2351    if (Arg.getOpcode() == ISD::UNDEF) continue;
2352    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2353    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2354    if (Val != 2) return false;
2355    HasHi = true;
2356  }
2357
2358  // Don't use movshdup if it can be done with a shufps.
2359  return HasHi;
2360}
2361
2362/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2363/// a splat of a single element.
2364static bool isSplatMask(SDNode *N) {
2365  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2366
2367  // This is a splat operation if each element of the permute is the same, and
2368  // if the value doesn't reference the second vector.
2369  unsigned NumElems = N->getNumOperands();
2370  SDOperand ElementBase;
2371  unsigned i = 0;
2372  for (; i != NumElems; ++i) {
2373    SDOperand Elt = N->getOperand(i);
2374    if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) {
2375      ElementBase = Elt;
2376      break;
2377    }
2378  }
2379
2380  if (!ElementBase.Val)
2381    return false;
2382
2383  for (; i != NumElems; ++i) {
2384    SDOperand Arg = N->getOperand(i);
2385    if (Arg.getOpcode() == ISD::UNDEF) continue;
2386    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2387    if (Arg != ElementBase) return false;
2388  }
2389
2390  // Make sure it is a splat of the first vector operand.
2391  return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
2392}
2393
2394/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2395/// a splat of a single element and it's a 2 or 4 element mask.
2396bool X86::isSplatMask(SDNode *N) {
2397  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2398
2399  // We can only splat 64-bit, and 32-bit quantities with a single instruction.
2400  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
2401    return false;
2402  return ::isSplatMask(N);
2403}
2404
2405/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
2406/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
2407/// instructions.
2408unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
2409  unsigned NumOperands = N->getNumOperands();
2410  unsigned Shift = (NumOperands == 4) ? 2 : 1;
2411  unsigned Mask = 0;
2412  for (unsigned i = 0; i < NumOperands; ++i) {
2413    unsigned Val = 0;
2414    SDOperand Arg = N->getOperand(NumOperands-i-1);
2415    if (Arg.getOpcode() != ISD::UNDEF)
2416      Val = cast<ConstantSDNode>(Arg)->getValue();
2417    if (Val >= NumOperands) Val -= NumOperands;
2418    Mask |= Val;
2419    if (i != NumOperands - 1)
2420      Mask <<= Shift;
2421  }
2422
2423  return Mask;
2424}
2425
2426/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
2427/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
2428/// instructions.
2429unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
2430  unsigned Mask = 0;
2431  // 8 nodes, but we only care about the last 4.
2432  for (unsigned i = 7; i >= 4; --i) {
2433    unsigned Val = 0;
2434    SDOperand Arg = N->getOperand(i);
2435    if (Arg.getOpcode() != ISD::UNDEF)
2436      Val = cast<ConstantSDNode>(Arg)->getValue();
2437    Mask |= (Val - 4);
2438    if (i != 4)
2439      Mask <<= 2;
2440  }
2441
2442  return Mask;
2443}
2444
2445/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
2446/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
2447/// instructions.
2448unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
2449  unsigned Mask = 0;
2450  // 8 nodes, but we only care about the first 4.
2451  for (int i = 3; i >= 0; --i) {
2452    unsigned Val = 0;
2453    SDOperand Arg = N->getOperand(i);
2454    if (Arg.getOpcode() != ISD::UNDEF)
2455      Val = cast<ConstantSDNode>(Arg)->getValue();
2456    Mask |= Val;
2457    if (i != 0)
2458      Mask <<= 2;
2459  }
2460
2461  return Mask;
2462}
2463
2464/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
2465/// specifies a 8 element shuffle that can be broken into a pair of
2466/// PSHUFHW and PSHUFLW.
2467static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
2468  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2469
2470  if (N->getNumOperands() != 8)
2471    return false;
2472
2473  // Lower quadword shuffled.
2474  for (unsigned i = 0; i != 4; ++i) {
2475    SDOperand Arg = N->getOperand(i);
2476    if (Arg.getOpcode() == ISD::UNDEF) continue;
2477    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2478    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2479    if (Val > 4)
2480      return false;
2481  }
2482
2483  // Upper quadword shuffled.
2484  for (unsigned i = 4; i != 8; ++i) {
2485    SDOperand Arg = N->getOperand(i);
2486    if (Arg.getOpcode() == ISD::UNDEF) continue;
2487    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2488    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2489    if (Val < 4 || Val > 7)
2490      return false;
2491  }
2492
2493  return true;
2494}
2495
2496/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
2497/// values in ther permute mask.
2498static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) {
2499  SDOperand V1 = Op.getOperand(0);
2500  SDOperand V2 = Op.getOperand(1);
2501  SDOperand Mask = Op.getOperand(2);
2502  MVT::ValueType VT = Op.getValueType();
2503  MVT::ValueType MaskVT = Mask.getValueType();
2504  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
2505  unsigned NumElems = Mask.getNumOperands();
2506  std::vector<SDOperand> MaskVec;
2507
2508  for (unsigned i = 0; i != NumElems; ++i) {
2509    SDOperand Arg = Mask.getOperand(i);
2510    if (Arg.getOpcode() == ISD::UNDEF) {
2511      MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
2512      continue;
2513    }
2514    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2515    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2516    if (Val < NumElems)
2517      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
2518    else
2519      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
2520  }
2521
2522  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2523  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
2524}
2525
2526/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
2527/// match movhlps. The lower half elements should come from upper half of
2528/// V1 (and in order), and the upper half elements should come from the upper
2529/// half of V2 (and in order).
2530static bool ShouldXformToMOVHLPS(SDNode *Mask) {
2531  unsigned NumElems = Mask->getNumOperands();
2532  if (NumElems != 4)
2533    return false;
2534  for (unsigned i = 0, e = 2; i != e; ++i)
2535    if (!isUndefOrEqual(Mask->getOperand(i), i+2))
2536      return false;
2537  for (unsigned i = 2; i != 4; ++i)
2538    if (!isUndefOrEqual(Mask->getOperand(i), i+4))
2539      return false;
2540  return true;
2541}
2542
2543/// isScalarLoadToVector - Returns true if the node is a scalar load that
2544/// is promoted to a vector.
2545static inline bool isScalarLoadToVector(SDNode *N) {
2546  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
2547    N = N->getOperand(0).Val;
2548    return (N->getOpcode() == ISD::LOAD);
2549  }
2550  return false;
2551}
2552
2553/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
2554/// match movlp{s|d}. The lower half elements should come from lower half of
2555/// V1 (and in order), and the upper half elements should come from the upper
2556/// half of V2 (and in order). And since V1 will become the source of the
2557/// MOVLP, it must be either a vector load or a scalar load to vector.
2558static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) {
2559  if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1))
2560    return false;
2561
2562  unsigned NumElems = Mask->getNumOperands();
2563  if (NumElems != 2 && NumElems != 4)
2564    return false;
2565  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
2566    if (!isUndefOrEqual(Mask->getOperand(i), i))
2567      return false;
2568  for (unsigned i = NumElems/2; i != NumElems; ++i)
2569    if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
2570      return false;
2571  return true;
2572}
2573
2574/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
2575/// all the same.
2576static bool isSplatVector(SDNode *N) {
2577  if (N->getOpcode() != ISD::BUILD_VECTOR)
2578    return false;
2579
2580  SDOperand SplatValue = N->getOperand(0);
2581  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
2582    if (N->getOperand(i) != SplatValue)
2583      return false;
2584  return true;
2585}
2586
2587/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
2588/// to an undef.
2589static bool isUndefShuffle(SDNode *N) {
2590  if (N->getOpcode() != ISD::BUILD_VECTOR)
2591    return false;
2592
2593  SDOperand V1 = N->getOperand(0);
2594  SDOperand V2 = N->getOperand(1);
2595  SDOperand Mask = N->getOperand(2);
2596  unsigned NumElems = Mask.getNumOperands();
2597  for (unsigned i = 0; i != NumElems; ++i) {
2598    SDOperand Arg = Mask.getOperand(i);
2599    if (Arg.getOpcode() != ISD::UNDEF) {
2600      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2601      if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
2602        return false;
2603      else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
2604        return false;
2605    }
2606  }
2607  return true;
2608}
2609
2610/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
2611/// that point to V2 points to its first element.
2612static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
2613  assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
2614
2615  bool Changed = false;
2616  std::vector<SDOperand> MaskVec;
2617  unsigned NumElems = Mask.getNumOperands();
2618  for (unsigned i = 0; i != NumElems; ++i) {
2619    SDOperand Arg = Mask.getOperand(i);
2620    if (Arg.getOpcode() != ISD::UNDEF) {
2621      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2622      if (Val > NumElems) {
2623        Arg = DAG.getConstant(NumElems, Arg.getValueType());
2624        Changed = true;
2625      }
2626    }
2627    MaskVec.push_back(Arg);
2628  }
2629
2630  if (Changed)
2631    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
2632                       &MaskVec[0], MaskVec.size());
2633  return Mask;
2634}
2635
2636/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
2637/// operation of specified width.
2638static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
2639  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2640  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2641
2642  std::vector<SDOperand> MaskVec;
2643  MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
2644  for (unsigned i = 1; i != NumElems; ++i)
2645    MaskVec.push_back(DAG.getConstant(i, BaseVT));
2646  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2647}
2648
2649/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
2650/// of specified width.
2651static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
2652  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2653  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2654  std::vector<SDOperand> MaskVec;
2655  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
2656    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
2657    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
2658  }
2659  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2660}
2661
2662/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
2663/// of specified width.
2664static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
2665  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2666  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2667  unsigned Half = NumElems/2;
2668  std::vector<SDOperand> MaskVec;
2669  for (unsigned i = 0; i != Half; ++i) {
2670    MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
2671    MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
2672  }
2673  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2674}
2675
2676/// getZeroVector - Returns a vector of specified type with all zero elements.
2677///
2678static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
2679  assert(MVT::isVector(VT) && "Expected a vector type");
2680  unsigned NumElems = getVectorNumElements(VT);
2681  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2682  bool isFP = MVT::isFloatingPoint(EVT);
2683  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
2684  std::vector<SDOperand> ZeroVec(NumElems, Zero);
2685  return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
2686}
2687
2688/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
2689///
2690static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
2691  SDOperand V1 = Op.getOperand(0);
2692  SDOperand Mask = Op.getOperand(2);
2693  MVT::ValueType VT = Op.getValueType();
2694  unsigned NumElems = Mask.getNumOperands();
2695  Mask = getUnpacklMask(NumElems, DAG);
2696  while (NumElems != 4) {
2697    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
2698    NumElems >>= 1;
2699  }
2700  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
2701
2702  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2703  Mask = getZeroVector(MaskVT, DAG);
2704  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
2705                                  DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
2706  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
2707}
2708
2709/// isZeroNode - Returns true if Elt is a constant zero or a floating point
2710/// constant +0.0.
2711static inline bool isZeroNode(SDOperand Elt) {
2712  return ((isa<ConstantSDNode>(Elt) &&
2713           cast<ConstantSDNode>(Elt)->getValue() == 0) ||
2714          (isa<ConstantFPSDNode>(Elt) &&
2715           cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
2716}
2717
2718/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
2719/// vector and zero or undef vector.
2720static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
2721                                             unsigned NumElems, unsigned Idx,
2722                                             bool isZero, SelectionDAG &DAG) {
2723  SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
2724  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2725  MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2726  SDOperand Zero = DAG.getConstant(0, EVT);
2727  std::vector<SDOperand> MaskVec(NumElems, Zero);
2728  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
2729  SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2730                               &MaskVec[0], MaskVec.size());
2731  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2732}
2733
2734/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
2735///
2736static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
2737                                       unsigned NumNonZero, unsigned NumZero,
2738                                       SelectionDAG &DAG, TargetLowering &TLI) {
2739  if (NumNonZero > 8)
2740    return SDOperand();
2741
2742  SDOperand V(0, 0);
2743  bool First = true;
2744  for (unsigned i = 0; i < 16; ++i) {
2745    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
2746    if (ThisIsNonZero && First) {
2747      if (NumZero)
2748        V = getZeroVector(MVT::v8i16, DAG);
2749      else
2750        V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2751      First = false;
2752    }
2753
2754    if ((i & 1) != 0) {
2755      SDOperand ThisElt(0, 0), LastElt(0, 0);
2756      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
2757      if (LastIsNonZero) {
2758        LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
2759      }
2760      if (ThisIsNonZero) {
2761        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
2762        ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
2763                              ThisElt, DAG.getConstant(8, MVT::i8));
2764        if (LastIsNonZero)
2765          ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
2766      } else
2767        ThisElt = LastElt;
2768
2769      if (ThisElt.Val)
2770        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
2771                        DAG.getConstant(i/2, TLI.getPointerTy()));
2772    }
2773  }
2774
2775  return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
2776}
2777
2778/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
2779///
2780static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
2781                                       unsigned NumNonZero, unsigned NumZero,
2782                                       SelectionDAG &DAG, TargetLowering &TLI) {
2783  if (NumNonZero > 4)
2784    return SDOperand();
2785
2786  SDOperand V(0, 0);
2787  bool First = true;
2788  for (unsigned i = 0; i < 8; ++i) {
2789    bool isNonZero = (NonZeros & (1 << i)) != 0;
2790    if (isNonZero) {
2791      if (First) {
2792        if (NumZero)
2793          V = getZeroVector(MVT::v8i16, DAG);
2794        else
2795          V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2796        First = false;
2797      }
2798      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
2799                      DAG.getConstant(i, TLI.getPointerTy()));
2800    }
2801  }
2802
2803  return V;
2804}
2805
2806SDOperand
2807X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2808  // All zero's are handled with pxor.
2809  if (ISD::isBuildVectorAllZeros(Op.Val))
2810    return Op;
2811
2812  // All one's are handled with pcmpeqd.
2813  if (ISD::isBuildVectorAllOnes(Op.Val))
2814    return Op;
2815
2816  MVT::ValueType VT = Op.getValueType();
2817  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2818  unsigned EVTBits = MVT::getSizeInBits(EVT);
2819
2820  unsigned NumElems = Op.getNumOperands();
2821  unsigned NumZero  = 0;
2822  unsigned NumNonZero = 0;
2823  unsigned NonZeros = 0;
2824  std::set<SDOperand> Values;
2825  for (unsigned i = 0; i < NumElems; ++i) {
2826    SDOperand Elt = Op.getOperand(i);
2827    if (Elt.getOpcode() != ISD::UNDEF) {
2828      Values.insert(Elt);
2829      if (isZeroNode(Elt))
2830        NumZero++;
2831      else {
2832        NonZeros |= (1 << i);
2833        NumNonZero++;
2834      }
2835    }
2836  }
2837
2838  if (NumNonZero == 0)
2839    // Must be a mix of zero and undef. Return a zero vector.
2840    return getZeroVector(VT, DAG);
2841
2842  // Splat is obviously ok. Let legalizer expand it to a shuffle.
2843  if (Values.size() == 1)
2844    return SDOperand();
2845
2846  // Special case for single non-zero element.
2847  if (NumNonZero == 1) {
2848    unsigned Idx = CountTrailingZeros_32(NonZeros);
2849    SDOperand Item = Op.getOperand(Idx);
2850    Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
2851    if (Idx == 0)
2852      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
2853      return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
2854                                         NumZero > 0, DAG);
2855
2856    if (EVTBits == 32) {
2857      // Turn it into a shuffle of zero and zero-extended scalar to vector.
2858      Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
2859                                         DAG);
2860      MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
2861      MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2862      std::vector<SDOperand> MaskVec;
2863      for (unsigned i = 0; i < NumElems; i++)
2864        MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
2865      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2866                                   &MaskVec[0], MaskVec.size());
2867      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
2868                         DAG.getNode(ISD::UNDEF, VT), Mask);
2869    }
2870  }
2871
2872  // Let legalizer expand 2-widde build_vector's.
2873  if (EVTBits == 64)
2874    return SDOperand();
2875
2876  // If element VT is < 32 bits, convert it to inserts into a zero vector.
2877  if (EVTBits == 8) {
2878    SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
2879                                        *this);
2880    if (V.Val) return V;
2881  }
2882
2883  if (EVTBits == 16) {
2884    SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
2885                                        *this);
2886    if (V.Val) return V;
2887  }
2888
2889  // If element VT is == 32 bits, turn it into a number of shuffles.
2890  std::vector<SDOperand> V(NumElems);
2891  if (NumElems == 4 && NumZero > 0) {
2892    for (unsigned i = 0; i < 4; ++i) {
2893      bool isZero = !(NonZeros & (1 << i));
2894      if (isZero)
2895        V[i] = getZeroVector(VT, DAG);
2896      else
2897        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2898    }
2899
2900    for (unsigned i = 0; i < 2; ++i) {
2901      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
2902        default: break;
2903        case 0:
2904          V[i] = V[i*2];  // Must be a zero vector.
2905          break;
2906        case 1:
2907          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
2908                             getMOVLMask(NumElems, DAG));
2909          break;
2910        case 2:
2911          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2912                             getMOVLMask(NumElems, DAG));
2913          break;
2914        case 3:
2915          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2916                             getUnpacklMask(NumElems, DAG));
2917          break;
2918      }
2919    }
2920
2921    // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
2922    // clears the upper bits.
2923    // FIXME: we can do the same for v4f32 case when we know both parts of
2924    // the lower half come from scalar_to_vector (loadf32). We should do
2925    // that in post legalizer dag combiner with target specific hooks.
2926    if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
2927      return V[0];
2928    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2929    MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2930    std::vector<SDOperand> MaskVec;
2931    bool Reverse = (NonZeros & 0x3) == 2;
2932    for (unsigned i = 0; i < 2; ++i)
2933      if (Reverse)
2934        MaskVec.push_back(DAG.getConstant(1-i, EVT));
2935      else
2936        MaskVec.push_back(DAG.getConstant(i, EVT));
2937    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
2938    for (unsigned i = 0; i < 2; ++i)
2939      if (Reverse)
2940        MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
2941      else
2942        MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
2943    SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2944                                     &MaskVec[0], MaskVec.size());
2945    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
2946  }
2947
2948  if (Values.size() > 2) {
2949    // Expand into a number of unpckl*.
2950    // e.g. for v4f32
2951    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
2952    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
2953    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
2954    SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
2955    for (unsigned i = 0; i < NumElems; ++i)
2956      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2957    NumElems >>= 1;
2958    while (NumElems != 0) {
2959      for (unsigned i = 0; i < NumElems; ++i)
2960        V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
2961                           UnpckMask);
2962      NumElems >>= 1;
2963    }
2964    return V[0];
2965  }
2966
2967  return SDOperand();
2968}
2969
2970SDOperand
2971X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
2972  SDOperand V1 = Op.getOperand(0);
2973  SDOperand V2 = Op.getOperand(1);
2974  SDOperand PermMask = Op.getOperand(2);
2975  MVT::ValueType VT = Op.getValueType();
2976  unsigned NumElems = PermMask.getNumOperands();
2977  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
2978  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
2979
2980  if (isUndefShuffle(Op.Val))
2981    return DAG.getNode(ISD::UNDEF, VT);
2982
2983  if (isSplatMask(PermMask.Val)) {
2984    if (NumElems <= 4) return Op;
2985    // Promote it to a v4i32 splat.
2986    return PromoteSplat(Op, DAG);
2987  }
2988
2989  if (X86::isMOVLMask(PermMask.Val))
2990    return (V1IsUndef) ? V2 : Op;
2991
2992  if (X86::isMOVSHDUPMask(PermMask.Val) ||
2993      X86::isMOVSLDUPMask(PermMask.Val) ||
2994      X86::isMOVHLPSMask(PermMask.Val) ||
2995      X86::isMOVHPMask(PermMask.Val) ||
2996      X86::isMOVLPMask(PermMask.Val))
2997    return Op;
2998
2999  if (ShouldXformToMOVHLPS(PermMask.Val) ||
3000      ShouldXformToMOVLP(V1.Val, PermMask.Val))
3001    return CommuteVectorShuffle(Op, DAG);
3002
3003  bool V1IsSplat = isSplatVector(V1.Val);
3004  bool V2IsSplat = isSplatVector(V2.Val);
3005  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
3006    Op = CommuteVectorShuffle(Op, DAG);
3007    V1 = Op.getOperand(0);
3008    V2 = Op.getOperand(1);
3009    PermMask = Op.getOperand(2);
3010    std::swap(V1IsSplat, V2IsSplat);
3011    std::swap(V1IsUndef, V2IsUndef);
3012  }
3013
3014  if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
3015    if (V2IsUndef) return V1;
3016    Op = CommuteVectorShuffle(Op, DAG);
3017    V1 = Op.getOperand(0);
3018    V2 = Op.getOperand(1);
3019    PermMask = Op.getOperand(2);
3020    if (V2IsSplat) {
3021      // V2 is a splat, so the mask may be malformed. That is, it may point
3022      // to any V2 element. The instruction selectior won't like this. Get
3023      // a corrected mask and commute to form a proper MOVS{S|D}.
3024      SDOperand NewMask = getMOVLMask(NumElems, DAG);
3025      if (NewMask.Val != PermMask.Val)
3026        Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3027    }
3028    return Op;
3029  }
3030
3031  if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
3032      X86::isUNPCKLMask(PermMask.Val) ||
3033      X86::isUNPCKHMask(PermMask.Val))
3034    return Op;
3035
3036  if (V2IsSplat) {
3037    // Normalize mask so all entries that point to V2 points to its first
3038    // element then try to match unpck{h|l} again. If match, return a
3039    // new vector_shuffle with the corrected mask.
3040    SDOperand NewMask = NormalizeMask(PermMask, DAG);
3041    if (NewMask.Val != PermMask.Val) {
3042      if (X86::isUNPCKLMask(PermMask.Val, true)) {
3043        SDOperand NewMask = getUnpacklMask(NumElems, DAG);
3044        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3045      } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
3046        SDOperand NewMask = getUnpackhMask(NumElems, DAG);
3047        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3048      }
3049    }
3050  }
3051
3052  // Normalize the node to match x86 shuffle ops if needed
3053  if (V2.getOpcode() != ISD::UNDEF)
3054    if (isCommutedSHUFP(PermMask.Val)) {
3055      Op = CommuteVectorShuffle(Op, DAG);
3056      V1 = Op.getOperand(0);
3057      V2 = Op.getOperand(1);
3058      PermMask = Op.getOperand(2);
3059    }
3060
3061  // If VT is integer, try PSHUF* first, then SHUFP*.
3062  if (MVT::isInteger(VT)) {
3063    if (X86::isPSHUFDMask(PermMask.Val) ||
3064        X86::isPSHUFHWMask(PermMask.Val) ||
3065        X86::isPSHUFLWMask(PermMask.Val)) {
3066      if (V2.getOpcode() != ISD::UNDEF)
3067        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3068                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3069      return Op;
3070    }
3071
3072    if (X86::isSHUFPMask(PermMask.Val))
3073      return Op;
3074
3075    // Handle v8i16 shuffle high / low shuffle node pair.
3076    if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
3077      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3078      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3079      std::vector<SDOperand> MaskVec;
3080      for (unsigned i = 0; i != 4; ++i)
3081        MaskVec.push_back(PermMask.getOperand(i));
3082      for (unsigned i = 4; i != 8; ++i)
3083        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3084      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3085                                   &MaskVec[0], MaskVec.size());
3086      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3087      MaskVec.clear();
3088      for (unsigned i = 0; i != 4; ++i)
3089        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3090      for (unsigned i = 4; i != 8; ++i)
3091        MaskVec.push_back(PermMask.getOperand(i));
3092      Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
3093      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3094    }
3095  } else {
3096    // Floating point cases in the other order.
3097    if (X86::isSHUFPMask(PermMask.Val))
3098      return Op;
3099    if (X86::isPSHUFDMask(PermMask.Val) ||
3100        X86::isPSHUFHWMask(PermMask.Val) ||
3101        X86::isPSHUFLWMask(PermMask.Val)) {
3102      if (V2.getOpcode() != ISD::UNDEF)
3103        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3104                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3105      return Op;
3106    }
3107  }
3108
3109  if (NumElems == 4) {
3110    MVT::ValueType MaskVT = PermMask.getValueType();
3111    MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
3112    std::vector<std::pair<int, int> > Locs;
3113    Locs.reserve(NumElems);
3114    std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3115    std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3116    unsigned NumHi = 0;
3117    unsigned NumLo = 0;
3118    // If no more than two elements come from either vector. This can be
3119    // implemented with two shuffles. First shuffle gather the elements.
3120    // The second shuffle, which takes the first shuffle as both of its
3121    // vector operands, put the elements into the right order.
3122    for (unsigned i = 0; i != NumElems; ++i) {
3123      SDOperand Elt = PermMask.getOperand(i);
3124      if (Elt.getOpcode() == ISD::UNDEF) {
3125        Locs[i] = std::make_pair(-1, -1);
3126      } else {
3127        unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
3128        if (Val < NumElems) {
3129          Locs[i] = std::make_pair(0, NumLo);
3130          Mask1[NumLo] = Elt;
3131          NumLo++;
3132        } else {
3133          Locs[i] = std::make_pair(1, NumHi);
3134          if (2+NumHi < NumElems)
3135            Mask1[2+NumHi] = Elt;
3136          NumHi++;
3137        }
3138      }
3139    }
3140    if (NumLo <= 2 && NumHi <= 2) {
3141      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3142                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3143                                   &Mask1[0], Mask1.size()));
3144      for (unsigned i = 0; i != NumElems; ++i) {
3145        if (Locs[i].first == -1)
3146          continue;
3147        else {
3148          unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
3149          Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
3150          Mask2[i] = DAG.getConstant(Idx, MaskEVT);
3151        }
3152      }
3153
3154      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
3155                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3156                                     &Mask2[0], Mask2.size()));
3157    }
3158
3159    // Break it into (shuffle shuffle_hi, shuffle_lo).
3160    Locs.clear();
3161    std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3162    std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3163    std::vector<SDOperand> *MaskPtr = &LoMask;
3164    unsigned MaskIdx = 0;
3165    unsigned LoIdx = 0;
3166    unsigned HiIdx = NumElems/2;
3167    for (unsigned i = 0; i != NumElems; ++i) {
3168      if (i == NumElems/2) {
3169        MaskPtr = &HiMask;
3170        MaskIdx = 1;
3171        LoIdx = 0;
3172        HiIdx = NumElems/2;
3173      }
3174      SDOperand Elt = PermMask.getOperand(i);
3175      if (Elt.getOpcode() == ISD::UNDEF) {
3176        Locs[i] = std::make_pair(-1, -1);
3177      } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
3178        Locs[i] = std::make_pair(MaskIdx, LoIdx);
3179        (*MaskPtr)[LoIdx] = Elt;
3180        LoIdx++;
3181      } else {
3182        Locs[i] = std::make_pair(MaskIdx, HiIdx);
3183        (*MaskPtr)[HiIdx] = Elt;
3184        HiIdx++;
3185      }
3186    }
3187
3188    SDOperand LoShuffle =
3189      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3190                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3191                              &LoMask[0], LoMask.size()));
3192    SDOperand HiShuffle =
3193      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3194                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3195                              &HiMask[0], HiMask.size()));
3196    std::vector<SDOperand> MaskOps;
3197    for (unsigned i = 0; i != NumElems; ++i) {
3198      if (Locs[i].first == -1) {
3199        MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
3200      } else {
3201        unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
3202        MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
3203      }
3204    }
3205    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
3206                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3207                                   &MaskOps[0], MaskOps.size()));
3208  }
3209
3210  return SDOperand();
3211}
3212
3213SDOperand
3214X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3215  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3216    return SDOperand();
3217
3218  MVT::ValueType VT = Op.getValueType();
3219  // TODO: handle v16i8.
3220  if (MVT::getSizeInBits(VT) == 16) {
3221    // Transform it so it match pextrw which produces a 32-bit result.
3222    MVT::ValueType EVT = (MVT::ValueType)(VT+1);
3223    SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
3224                                    Op.getOperand(0), Op.getOperand(1));
3225    SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
3226                                    DAG.getValueType(VT));
3227    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
3228  } else if (MVT::getSizeInBits(VT) == 32) {
3229    SDOperand Vec = Op.getOperand(0);
3230    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3231    if (Idx == 0)
3232      return Op;
3233    // SHUFPS the element to the lowest double word, then movss.
3234    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3235    std::vector<SDOperand> IdxVec;
3236    IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
3237    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3238    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3239    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3240    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3241                                 &IdxVec[0], IdxVec.size());
3242    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3243                      Vec, Vec, Mask);
3244    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3245                       DAG.getConstant(0, getPointerTy()));
3246  } else if (MVT::getSizeInBits(VT) == 64) {
3247    SDOperand Vec = Op.getOperand(0);
3248    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3249    if (Idx == 0)
3250      return Op;
3251
3252    // UNPCKHPD the element to the lowest double word, then movsd.
3253    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
3254    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
3255    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3256    std::vector<SDOperand> IdxVec;
3257    IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
3258    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3259    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3260                                 &IdxVec[0], IdxVec.size());
3261    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3262                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
3263    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3264                       DAG.getConstant(0, getPointerTy()));
3265  }
3266
3267  return SDOperand();
3268}
3269
3270SDOperand
3271X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3272  // Transform it so it match pinsrw which expects a 16-bit value in a GR32
3273  // as its second argument.
3274  MVT::ValueType VT = Op.getValueType();
3275  MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
3276  SDOperand N0 = Op.getOperand(0);
3277  SDOperand N1 = Op.getOperand(1);
3278  SDOperand N2 = Op.getOperand(2);
3279  if (MVT::getSizeInBits(BaseVT) == 16) {
3280    if (N1.getValueType() != MVT::i32)
3281      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
3282    if (N2.getValueType() != MVT::i32)
3283      N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
3284    return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
3285  } else if (MVT::getSizeInBits(BaseVT) == 32) {
3286    unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
3287    if (Idx == 0) {
3288      // Use a movss.
3289      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
3290      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3291      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3292      std::vector<SDOperand> MaskVec;
3293      MaskVec.push_back(DAG.getConstant(4, BaseVT));
3294      for (unsigned i = 1; i <= 3; ++i)
3295        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3296      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
3297                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3298                                     &MaskVec[0], MaskVec.size()));
3299    } else {
3300      // Use two pinsrw instructions to insert a 32 bit value.
3301      Idx <<= 1;
3302      if (MVT::isFloatingPoint(N1.getValueType())) {
3303        if (N1.getOpcode() == ISD::LOAD) {
3304          // Just load directly from f32mem to GR32.
3305          N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1),
3306                           N1.getOperand(2));
3307        } else {
3308          N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
3309          N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
3310          N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
3311                           DAG.getConstant(0, getPointerTy()));
3312        }
3313      }
3314      N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
3315      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3316                       DAG.getConstant(Idx, getPointerTy()));
3317      N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
3318      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3319                       DAG.getConstant(Idx+1, getPointerTy()));
3320      return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
3321    }
3322  }
3323
3324  return SDOperand();
3325}
3326
3327SDOperand
3328X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
3329  SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
3330  return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
3331}
3332
3333// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3334// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
3335// one of the above mentioned nodes. It has to be wrapped because otherwise
3336// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3337// be used to form addressing mode. These wrapped nodes will be selected
3338// into MOV32ri.
3339SDOperand
3340X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
3341  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3342  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3343                                 DAG.getTargetConstantPool(CP->getConstVal(),
3344                                                           getPointerTy(),
3345                                                           CP->getAlignment()));
3346  if (Subtarget->isTargetDarwin()) {
3347    // With PIC, the address is actually $g + Offset.
3348    if (!Subtarget->is64Bit() &&
3349        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3350      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3351                    DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
3352  }
3353
3354  return Result;
3355}
3356
3357SDOperand
3358X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
3359  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3360  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3361                                 DAG.getTargetGlobalAddress(GV,
3362                                                            getPointerTy()));
3363  if (Subtarget->isTargetDarwin()) {
3364    // With PIC, the address is actually $g + Offset.
3365    if (!Subtarget->is64Bit() &&
3366        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3367      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3368                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3369                           Result);
3370
3371    // For Darwin, external and weak symbols are indirect, so we want to load
3372    // the value at address GV, not the value of GV itself. This means that
3373    // the GlobalAddress must be in the base or index register of the address,
3374    // not the GV offset field.
3375    if (getTargetMachine().getRelocationModel() != Reloc::Static &&
3376        DarwinGVRequiresExtraLoad(GV))
3377      Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(),
3378                           Result, DAG.getSrcValue(NULL));
3379  }
3380
3381  return Result;
3382}
3383
3384SDOperand
3385X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
3386  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
3387  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3388                                 DAG.getTargetExternalSymbol(Sym,
3389                                                             getPointerTy()));
3390  if (Subtarget->isTargetDarwin()) {
3391    // With PIC, the address is actually $g + Offset.
3392    if (!Subtarget->is64Bit() &&
3393        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3394      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3395                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3396                           Result);
3397  }
3398
3399  return Result;
3400}
3401
3402SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
3403    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
3404           "Not an i64 shift!");
3405    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
3406    SDOperand ShOpLo = Op.getOperand(0);
3407    SDOperand ShOpHi = Op.getOperand(1);
3408    SDOperand ShAmt  = Op.getOperand(2);
3409    SDOperand Tmp1 = isSRA ?
3410      DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
3411      DAG.getConstant(0, MVT::i32);
3412
3413    SDOperand Tmp2, Tmp3;
3414    if (Op.getOpcode() == ISD::SHL_PARTS) {
3415      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
3416      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
3417    } else {
3418      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
3419      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
3420    }
3421
3422    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3423    SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
3424                                    DAG.getConstant(32, MVT::i8));
3425    SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
3426    SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
3427
3428    SDOperand Hi, Lo;
3429    SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3430
3431    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
3432    SmallVector<SDOperand, 4> Ops;
3433    if (Op.getOpcode() == ISD::SHL_PARTS) {
3434      Ops.push_back(Tmp2);
3435      Ops.push_back(Tmp3);
3436      Ops.push_back(CC);
3437      Ops.push_back(InFlag);
3438      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3439      InFlag = Hi.getValue(1);
3440
3441      Ops.clear();
3442      Ops.push_back(Tmp3);
3443      Ops.push_back(Tmp1);
3444      Ops.push_back(CC);
3445      Ops.push_back(InFlag);
3446      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3447    } else {
3448      Ops.push_back(Tmp2);
3449      Ops.push_back(Tmp3);
3450      Ops.push_back(CC);
3451      Ops.push_back(InFlag);
3452      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3453      InFlag = Lo.getValue(1);
3454
3455      Ops.clear();
3456      Ops.push_back(Tmp3);
3457      Ops.push_back(Tmp1);
3458      Ops.push_back(CC);
3459      Ops.push_back(InFlag);
3460      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3461    }
3462
3463    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
3464    Ops.clear();
3465    Ops.push_back(Lo);
3466    Ops.push_back(Hi);
3467    return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
3468}
3469
3470SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
3471  assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
3472         Op.getOperand(0).getValueType() >= MVT::i16 &&
3473         "Unknown SINT_TO_FP to lower!");
3474
3475  SDOperand Result;
3476  MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
3477  unsigned Size = MVT::getSizeInBits(SrcVT)/8;
3478  MachineFunction &MF = DAG.getMachineFunction();
3479  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3480  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3481  SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
3482                                DAG.getEntryNode(), Op.getOperand(0),
3483                                StackSlot, DAG.getSrcValue(NULL));
3484
3485  // Build the FILD
3486  std::vector<MVT::ValueType> Tys;
3487  Tys.push_back(MVT::f64);
3488  Tys.push_back(MVT::Other);
3489  if (X86ScalarSSE) Tys.push_back(MVT::Flag);
3490  std::vector<SDOperand> Ops;
3491  Ops.push_back(Chain);
3492  Ops.push_back(StackSlot);
3493  Ops.push_back(DAG.getValueType(SrcVT));
3494  Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
3495                       Tys, &Ops[0], Ops.size());
3496
3497  if (X86ScalarSSE) {
3498    Chain = Result.getValue(1);
3499    SDOperand InFlag = Result.getValue(2);
3500
3501    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
3502    // shouldn't be necessary except that RFP cannot be live across
3503    // multiple blocks. When stackifier is fixed, they can be uncoupled.
3504    MachineFunction &MF = DAG.getMachineFunction();
3505    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
3506    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3507    std::vector<MVT::ValueType> Tys;
3508    Tys.push_back(MVT::Other);
3509    std::vector<SDOperand> Ops;
3510    Ops.push_back(Chain);
3511    Ops.push_back(Result);
3512    Ops.push_back(StackSlot);
3513    Ops.push_back(DAG.getValueType(Op.getValueType()));
3514    Ops.push_back(InFlag);
3515    Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
3516    Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
3517                         DAG.getSrcValue(NULL));
3518  }
3519
3520  return Result;
3521}
3522
3523SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
3524  assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
3525         "Unknown FP_TO_SINT to lower!");
3526  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
3527  // stack slot.
3528  MachineFunction &MF = DAG.getMachineFunction();
3529  unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
3530  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3531  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3532
3533  unsigned Opc;
3534  switch (Op.getValueType()) {
3535    default: assert(0 && "Invalid FP_TO_SINT to lower!");
3536    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
3537    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
3538    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
3539  }
3540
3541  SDOperand Chain = DAG.getEntryNode();
3542  SDOperand Value = Op.getOperand(0);
3543  if (X86ScalarSSE) {
3544    assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
3545    Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot,
3546                        DAG.getSrcValue(0));
3547    std::vector<MVT::ValueType> Tys;
3548    Tys.push_back(MVT::f64);
3549    Tys.push_back(MVT::Other);
3550    std::vector<SDOperand> Ops;
3551    Ops.push_back(Chain);
3552    Ops.push_back(StackSlot);
3553    Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType()));
3554    Value = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size());
3555    Chain = Value.getValue(1);
3556    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3557    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3558  }
3559
3560  // Build the FP_TO_INT*_IN_MEM
3561  std::vector<SDOperand> Ops;
3562  Ops.push_back(Chain);
3563  Ops.push_back(Value);
3564  Ops.push_back(StackSlot);
3565  SDOperand FIST = DAG.getNode(Opc, MVT::Other, &Ops[0], Ops.size());
3566
3567  // Load the result.
3568  return DAG.getLoad(Op.getValueType(), FIST, StackSlot,
3569                     DAG.getSrcValue(NULL));
3570}
3571
3572SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
3573  MVT::ValueType VT = Op.getValueType();
3574  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3575  std::vector<Constant*> CV;
3576  if (VT == MVT::f64) {
3577    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
3578    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3579  } else {
3580    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
3581    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3582    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3583    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3584  }
3585  Constant *CS = ConstantStruct::get(CV);
3586  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3587  std::vector<MVT::ValueType> Tys;
3588  Tys.push_back(VT);
3589  Tys.push_back(MVT::Other);
3590  SmallVector<SDOperand, 3> Ops;
3591  Ops.push_back(DAG.getEntryNode());
3592  Ops.push_back(CPIdx);
3593  Ops.push_back(DAG.getSrcValue(NULL));
3594  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3595  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
3596}
3597
3598SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
3599  MVT::ValueType VT = Op.getValueType();
3600  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3601  std::vector<Constant*> CV;
3602  if (VT == MVT::f64) {
3603    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
3604    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3605  } else {
3606    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
3607    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3608    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3609    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3610  }
3611  Constant *CS = ConstantStruct::get(CV);
3612  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3613  std::vector<MVT::ValueType> Tys;
3614  Tys.push_back(VT);
3615  Tys.push_back(MVT::Other);
3616  SmallVector<SDOperand, 3> Ops;
3617  Ops.push_back(DAG.getEntryNode());
3618  Ops.push_back(CPIdx);
3619  Ops.push_back(DAG.getSrcValue(NULL));
3620  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3621  return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
3622}
3623
3624SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
3625                                        SDOperand Chain) {
3626  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
3627  SDOperand Cond;
3628  SDOperand Op0 = Op.getOperand(0);
3629  SDOperand Op1 = Op.getOperand(1);
3630  SDOperand CC = Op.getOperand(2);
3631  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3632  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3633  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
3634  unsigned X86CC;
3635
3636  VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
3637  if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
3638                     Op0, Op1, DAG)) {
3639    SDOperand Ops1[] = { Chain, Op0, Op1 };
3640    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops1, 3).getValue(1);
3641    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
3642    return DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
3643  }
3644
3645  assert(isFP && "Illegal integer SetCC!");
3646
3647  SDOperand COps[] = { Chain, Op0, Op1 };
3648  Cond = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
3649
3650  switch (SetCCOpcode) {
3651  default: assert(false && "Illegal floating point SetCC!");
3652  case ISD::SETOEQ: {  // !PF & ZF
3653    SDOperand Ops1[] = { DAG.getConstant(X86ISD::COND_NP, MVT::i8), Cond };
3654    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops1, 2);
3655    SDOperand Ops2[] = { DAG.getConstant(X86ISD::COND_E, MVT::i8),
3656                         Tmp1.getValue(1) };
3657    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
3658    return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
3659  }
3660  case ISD::SETUNE: {  // PF | !ZF
3661    SDOperand Ops1[] = { DAG.getConstant(X86ISD::COND_P, MVT::i8), Cond };
3662    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops1, 2);
3663    SDOperand Ops2[] = { DAG.getConstant(X86ISD::COND_NE, MVT::i8),
3664                         Tmp1.getValue(1) };
3665    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
3666    return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
3667  }
3668  }
3669}
3670
3671SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
3672  bool addTest = true;
3673  SDOperand Chain = DAG.getEntryNode();
3674  SDOperand Cond  = Op.getOperand(0);
3675  SDOperand CC;
3676  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3677
3678  if (Cond.getOpcode() == ISD::SETCC)
3679    Cond = LowerSETCC(Cond, DAG, Chain);
3680
3681  if (Cond.getOpcode() == X86ISD::SETCC) {
3682    CC = Cond.getOperand(0);
3683
3684    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3685    // (since flag operand cannot be shared). Use it as the condition setting
3686    // operand in place of the X86ISD::SETCC.
3687    // If the X86ISD::SETCC has more than one use, then perhaps it's better
3688    // to use a test instead of duplicating the X86ISD::CMP (for register
3689    // pressure reason)?
3690    SDOperand Cmp = Cond.getOperand(1);
3691    unsigned Opc = Cmp.getOpcode();
3692    bool IllegalFPCMov = !X86ScalarSSE &&
3693      MVT::isFloatingPoint(Op.getValueType()) &&
3694      !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
3695    if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) &&
3696        !IllegalFPCMov) {
3697      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
3698      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
3699      addTest = false;
3700    }
3701  }
3702
3703  if (addTest) {
3704    CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3705    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
3706    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
3707  }
3708
3709  VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
3710  SmallVector<SDOperand, 4> Ops;
3711  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
3712  // condition is true.
3713  Ops.push_back(Op.getOperand(2));
3714  Ops.push_back(Op.getOperand(1));
3715  Ops.push_back(CC);
3716  Ops.push_back(Cond.getValue(1));
3717  return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3718}
3719
3720SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
3721  bool addTest = true;
3722  SDOperand Chain = Op.getOperand(0);
3723  SDOperand Cond  = Op.getOperand(1);
3724  SDOperand Dest  = Op.getOperand(2);
3725  SDOperand CC;
3726  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3727
3728  if (Cond.getOpcode() == ISD::SETCC)
3729    Cond = LowerSETCC(Cond, DAG, Chain);
3730
3731  if (Cond.getOpcode() == X86ISD::SETCC) {
3732    CC = Cond.getOperand(0);
3733
3734    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3735    // (since flag operand cannot be shared). Use it as the condition setting
3736    // operand in place of the X86ISD::SETCC.
3737    // If the X86ISD::SETCC has more than one use, then perhaps it's better
3738    // to use a test instead of duplicating the X86ISD::CMP (for register
3739    // pressure reason)?
3740    SDOperand Cmp = Cond.getOperand(1);
3741    unsigned Opc = Cmp.getOpcode();
3742    if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) {
3743      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
3744      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
3745      addTest = false;
3746    }
3747  }
3748
3749  if (addTest) {
3750    CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3751    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
3752    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
3753  }
3754  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
3755                     Cond, Op.getOperand(2), CC, Cond.getValue(1));
3756}
3757
3758SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
3759  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3760  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3761                                 DAG.getTargetJumpTable(JT->getIndex(),
3762                                                        getPointerTy()));
3763  if (Subtarget->isTargetDarwin()) {
3764    // With PIC, the address is actually $g + Offset.
3765    if (!Subtarget->is64Bit() &&
3766        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3767      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3768                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3769                           Result);
3770  }
3771
3772  return Result;
3773}
3774
3775SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
3776  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3777  if (Subtarget->is64Bit())
3778    return LowerX86_64CCCCallTo(Op, DAG);
3779  else if (CallingConv == CallingConv::Fast && EnableFastCC)
3780    return LowerFastCCCallTo(Op, DAG);
3781  else
3782    return LowerCCCCallTo(Op, DAG);
3783}
3784
3785SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
3786  SDOperand Copy;
3787
3788  switch(Op.getNumOperands()) {
3789    default:
3790      assert(0 && "Do not know how to return this many arguments!");
3791      abort();
3792    case 1:    // ret void.
3793      return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
3794                        DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
3795    case 3: {
3796      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
3797
3798      if (MVT::isVector(ArgVT) ||
3799          (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) {
3800        // Integer or FP vector result -> XMM0.
3801        if (DAG.getMachineFunction().liveout_empty())
3802          DAG.getMachineFunction().addLiveOut(X86::XMM0);
3803        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
3804                                SDOperand());
3805      } else if (MVT::isInteger(ArgVT)) {
3806        // Integer result -> EAX / RAX.
3807        // The C calling convention guarantees the return value has been
3808        // promoted to at least MVT::i32. The X86-64 ABI doesn't require the
3809        // value to be promoted MVT::i64. So we don't have to extend it to
3810        // 64-bit. Return the value in EAX, but mark RAX as liveout.
3811        unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
3812        if (DAG.getMachineFunction().liveout_empty())
3813          DAG.getMachineFunction().addLiveOut(Reg);
3814
3815        Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX;
3816        Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1),
3817                                SDOperand());
3818      } else if (!X86ScalarSSE) {
3819        // FP return with fp-stack value.
3820        if (DAG.getMachineFunction().liveout_empty())
3821          DAG.getMachineFunction().addLiveOut(X86::ST0);
3822
3823        std::vector<MVT::ValueType> Tys;
3824        Tys.push_back(MVT::Other);
3825        Tys.push_back(MVT::Flag);
3826        std::vector<SDOperand> Ops;
3827        Ops.push_back(Op.getOperand(0));
3828        Ops.push_back(Op.getOperand(1));
3829        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size());
3830      } else {
3831        // FP return with ScalarSSE (return on fp-stack).
3832        if (DAG.getMachineFunction().liveout_empty())
3833          DAG.getMachineFunction().addLiveOut(X86::ST0);
3834
3835        SDOperand MemLoc;
3836        SDOperand Chain = Op.getOperand(0);
3837        SDOperand Value = Op.getOperand(1);
3838
3839        if (Value.getOpcode() == ISD::LOAD &&
3840            (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
3841          Chain  = Value.getOperand(0);
3842          MemLoc = Value.getOperand(1);
3843        } else {
3844          // Spill the value to memory and reload it into top of stack.
3845          unsigned Size = MVT::getSizeInBits(ArgVT)/8;
3846          MachineFunction &MF = DAG.getMachineFunction();
3847          int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3848          MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
3849          Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
3850                              Value, MemLoc, DAG.getSrcValue(0));
3851        }
3852        std::vector<MVT::ValueType> Tys;
3853        Tys.push_back(MVT::f64);
3854        Tys.push_back(MVT::Other);
3855        std::vector<SDOperand> Ops;
3856        Ops.push_back(Chain);
3857        Ops.push_back(MemLoc);
3858        Ops.push_back(DAG.getValueType(ArgVT));
3859        Copy = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size());
3860        Tys.clear();
3861        Tys.push_back(MVT::Other);
3862        Tys.push_back(MVT::Flag);
3863        Ops.clear();
3864        Ops.push_back(Copy.getValue(1));
3865        Ops.push_back(Copy);
3866        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size());
3867      }
3868      break;
3869    }
3870    case 5: {
3871      unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
3872      unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX;
3873      if (DAG.getMachineFunction().liveout_empty()) {
3874        DAG.getMachineFunction().addLiveOut(Reg1);
3875        DAG.getMachineFunction().addLiveOut(Reg2);
3876      }
3877
3878      Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3),
3879                              SDOperand());
3880      Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1));
3881      break;
3882    }
3883  }
3884  return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
3885                     Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
3886                     Copy.getValue(1));
3887}
3888
3889SDOperand
3890X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
3891  MachineFunction &MF = DAG.getMachineFunction();
3892  const Function* Fn = MF.getFunction();
3893  if (Fn->hasExternalLinkage() &&
3894      Subtarget->TargetType == X86Subtarget::isCygwin &&
3895      Fn->getName() == "main")
3896    MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true);
3897
3898  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3899  if (Subtarget->is64Bit())
3900    return LowerX86_64CCCArguments(Op, DAG);
3901  else if (CC == CallingConv::Fast && EnableFastCC)
3902    return LowerFastCCArguments(Op, DAG);
3903  else
3904    return LowerCCCArguments(Op, DAG);
3905}
3906
3907SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
3908  SDOperand InFlag(0, 0);
3909  SDOperand Chain = Op.getOperand(0);
3910  unsigned Align =
3911    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3912  if (Align == 0) Align = 1;
3913
3914  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3915  // If not DWORD aligned, call memset if size is less than the threshold.
3916  // It knows how to align to the right boundary first.
3917  if ((Align & 3) != 0 ||
3918      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3919    MVT::ValueType IntPtr = getPointerTy();
3920    const Type *IntPtrTy = getTargetData()->getIntPtrType();
3921    std::vector<std::pair<SDOperand, const Type*> > Args;
3922    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
3923    // Extend the ubyte argument to be an int value for the call.
3924    SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
3925    Args.push_back(std::make_pair(Val, IntPtrTy));
3926    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
3927    std::pair<SDOperand,SDOperand> CallResult =
3928      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
3929                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
3930    return CallResult.second;
3931  }
3932
3933  MVT::ValueType AVT;
3934  SDOperand Count;
3935  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3936  unsigned BytesLeft = 0;
3937  bool TwoRepStos = false;
3938  if (ValC) {
3939    unsigned ValReg;
3940    uint64_t Val = ValC->getValue() & 255;
3941
3942    // If the value is a constant, then we can potentially use larger sets.
3943    switch (Align & 3) {
3944      case 2:   // WORD aligned
3945        AVT = MVT::i16;
3946        ValReg = X86::AX;
3947        Val = (Val << 8) | Val;
3948        break;
3949      case 0:  // DWORD aligned
3950        AVT = MVT::i32;
3951        ValReg = X86::EAX;
3952        Val = (Val << 8)  | Val;
3953        Val = (Val << 16) | Val;
3954        if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) {  // QWORD aligned
3955          AVT = MVT::i64;
3956          ValReg = X86::RAX;
3957          Val = (Val << 32) | Val;
3958        }
3959        break;
3960      default:  // Byte aligned
3961        AVT = MVT::i8;
3962        ValReg = X86::AL;
3963        Count = Op.getOperand(3);
3964        break;
3965    }
3966
3967    if (AVT > MVT::i8) {
3968      if (I) {
3969        unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
3970        Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
3971        BytesLeft = I->getValue() % UBytes;
3972      } else {
3973        assert(AVT >= MVT::i32 &&
3974               "Do not use rep;stos if not at least DWORD aligned");
3975        Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
3976                            Op.getOperand(3), DAG.getConstant(2, MVT::i8));
3977        TwoRepStos = true;
3978      }
3979    }
3980
3981    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
3982                              InFlag);
3983    InFlag = Chain.getValue(1);
3984  } else {
3985    AVT = MVT::i8;
3986    Count  = Op.getOperand(3);
3987    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
3988    InFlag = Chain.getValue(1);
3989  }
3990
3991  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
3992                            Count, InFlag);
3993  InFlag = Chain.getValue(1);
3994  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
3995                            Op.getOperand(1), InFlag);
3996  InFlag = Chain.getValue(1);
3997
3998  std::vector<MVT::ValueType> Tys;
3999  Tys.push_back(MVT::Other);
4000  Tys.push_back(MVT::Flag);
4001  std::vector<SDOperand> Ops;
4002  Ops.push_back(Chain);
4003  Ops.push_back(DAG.getValueType(AVT));
4004  Ops.push_back(InFlag);
4005  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
4006
4007  if (TwoRepStos) {
4008    InFlag = Chain.getValue(1);
4009    Count = Op.getOperand(3);
4010    MVT::ValueType CVT = Count.getValueType();
4011    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
4012                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
4013    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
4014                              Left, InFlag);
4015    InFlag = Chain.getValue(1);
4016    Tys.clear();
4017    Tys.push_back(MVT::Other);
4018    Tys.push_back(MVT::Flag);
4019    Ops.clear();
4020    Ops.push_back(Chain);
4021    Ops.push_back(DAG.getValueType(MVT::i8));
4022    Ops.push_back(InFlag);
4023    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
4024  } else if (BytesLeft) {
4025    // Issue stores for the last 1 - 7 bytes.
4026    SDOperand Value;
4027    unsigned Val = ValC->getValue() & 255;
4028    unsigned Offset = I->getValue() - BytesLeft;
4029    SDOperand DstAddr = Op.getOperand(1);
4030    MVT::ValueType AddrVT = DstAddr.getValueType();
4031    if (BytesLeft >= 4) {
4032      Val = (Val << 8)  | Val;
4033      Val = (Val << 16) | Val;
4034      Value = DAG.getConstant(Val, MVT::i32);
4035      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4036                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4037                                      DAG.getConstant(Offset, AddrVT)),
4038                          DAG.getSrcValue(NULL));
4039      BytesLeft -= 4;
4040      Offset += 4;
4041    }
4042    if (BytesLeft >= 2) {
4043      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
4044      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4045                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4046                                      DAG.getConstant(Offset, AddrVT)),
4047                          DAG.getSrcValue(NULL));
4048      BytesLeft -= 2;
4049      Offset += 2;
4050    }
4051    if (BytesLeft == 1) {
4052      Value = DAG.getConstant(Val, MVT::i8);
4053      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4054                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4055                                      DAG.getConstant(Offset, AddrVT)),
4056                          DAG.getSrcValue(NULL));
4057    }
4058  }
4059
4060  return Chain;
4061}
4062
4063SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
4064  SDOperand Chain = Op.getOperand(0);
4065  unsigned Align =
4066    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
4067  if (Align == 0) Align = 1;
4068
4069  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
4070  // If not DWORD aligned, call memcpy if size is less than the threshold.
4071  // It knows how to align to the right boundary first.
4072  if ((Align & 3) != 0 ||
4073      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
4074    MVT::ValueType IntPtr = getPointerTy();
4075    const Type *IntPtrTy = getTargetData()->getIntPtrType();
4076    std::vector<std::pair<SDOperand, const Type*> > Args;
4077    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
4078    Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
4079    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
4080    std::pair<SDOperand,SDOperand> CallResult =
4081      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
4082                  DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
4083    return CallResult.second;
4084  }
4085
4086  MVT::ValueType AVT;
4087  SDOperand Count;
4088  unsigned BytesLeft = 0;
4089  bool TwoRepMovs = false;
4090  switch (Align & 3) {
4091    case 2:   // WORD aligned
4092      AVT = MVT::i16;
4093      break;
4094    case 0:  // DWORD aligned
4095      AVT = MVT::i32;
4096      if (Subtarget->is64Bit() && ((Align & 0xF) == 0))  // QWORD aligned
4097        AVT = MVT::i64;
4098      break;
4099    default:  // Byte aligned
4100      AVT = MVT::i8;
4101      Count = Op.getOperand(3);
4102      break;
4103  }
4104
4105  if (AVT > MVT::i8) {
4106    if (I) {
4107      unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
4108      Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
4109      BytesLeft = I->getValue() % UBytes;
4110    } else {
4111      assert(AVT >= MVT::i32 &&
4112             "Do not use rep;movs if not at least DWORD aligned");
4113      Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
4114                          Op.getOperand(3), DAG.getConstant(2, MVT::i8));
4115      TwoRepMovs = true;
4116    }
4117  }
4118
4119  SDOperand InFlag(0, 0);
4120  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
4121                            Count, InFlag);
4122  InFlag = Chain.getValue(1);
4123  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4124                            Op.getOperand(1), InFlag);
4125  InFlag = Chain.getValue(1);
4126  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
4127                            Op.getOperand(2), InFlag);
4128  InFlag = Chain.getValue(1);
4129
4130  std::vector<MVT::ValueType> Tys;
4131  Tys.push_back(MVT::Other);
4132  Tys.push_back(MVT::Flag);
4133  std::vector<SDOperand> Ops;
4134  Ops.push_back(Chain);
4135  Ops.push_back(DAG.getValueType(AVT));
4136  Ops.push_back(InFlag);
4137  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
4138
4139  if (TwoRepMovs) {
4140    InFlag = Chain.getValue(1);
4141    Count = Op.getOperand(3);
4142    MVT::ValueType CVT = Count.getValueType();
4143    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
4144                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
4145    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
4146                              Left, InFlag);
4147    InFlag = Chain.getValue(1);
4148    Tys.clear();
4149    Tys.push_back(MVT::Other);
4150    Tys.push_back(MVT::Flag);
4151    Ops.clear();
4152    Ops.push_back(Chain);
4153    Ops.push_back(DAG.getValueType(MVT::i8));
4154    Ops.push_back(InFlag);
4155    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
4156  } else if (BytesLeft) {
4157    // Issue loads and stores for the last 1 - 7 bytes.
4158    unsigned Offset = I->getValue() - BytesLeft;
4159    SDOperand DstAddr = Op.getOperand(1);
4160    MVT::ValueType DstVT = DstAddr.getValueType();
4161    SDOperand SrcAddr = Op.getOperand(2);
4162    MVT::ValueType SrcVT = SrcAddr.getValueType();
4163    SDOperand Value;
4164    if (BytesLeft >= 4) {
4165      Value = DAG.getLoad(MVT::i32, Chain,
4166                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4167                                      DAG.getConstant(Offset, SrcVT)),
4168                          DAG.getSrcValue(NULL));
4169      Chain = Value.getValue(1);
4170      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4171                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
4172                                      DAG.getConstant(Offset, DstVT)),
4173                          DAG.getSrcValue(NULL));
4174      BytesLeft -= 4;
4175      Offset += 4;
4176    }
4177    if (BytesLeft >= 2) {
4178      Value = DAG.getLoad(MVT::i16, Chain,
4179                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4180                                      DAG.getConstant(Offset, SrcVT)),
4181                          DAG.getSrcValue(NULL));
4182      Chain = Value.getValue(1);
4183      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4184                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
4185                                      DAG.getConstant(Offset, DstVT)),
4186                          DAG.getSrcValue(NULL));
4187      BytesLeft -= 2;
4188      Offset += 2;
4189    }
4190
4191    if (BytesLeft == 1) {
4192      Value = DAG.getLoad(MVT::i8, Chain,
4193                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4194                                      DAG.getConstant(Offset, SrcVT)),
4195                          DAG.getSrcValue(NULL));
4196      Chain = Value.getValue(1);
4197      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4198                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
4199                                      DAG.getConstant(Offset, DstVT)),
4200                          DAG.getSrcValue(NULL));
4201    }
4202  }
4203
4204  return Chain;
4205}
4206
4207SDOperand
4208X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
4209  std::vector<MVT::ValueType> Tys;
4210  Tys.push_back(MVT::Other);
4211  Tys.push_back(MVT::Flag);
4212  std::vector<SDOperand> Ops;
4213  Ops.push_back(Op.getOperand(0));
4214  SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &Ops[0], Ops.size());
4215  Ops.clear();
4216  Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)));
4217  Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX,
4218                                   MVT::i32, Ops[0].getValue(2)));
4219  Ops.push_back(Ops[1].getValue(1));
4220  Tys[0] = Tys[1] = MVT::i32;
4221  Tys.push_back(MVT::Other);
4222  return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size());
4223}
4224
4225SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
4226  if (!Subtarget->is64Bit()) {
4227    // vastart just stores the address of the VarArgsFrameIndex slot into the
4228    // memory location argument.
4229    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4230    return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
4231                       Op.getOperand(1), Op.getOperand(2));
4232  }
4233
4234  // __va_list_tag:
4235  //   gp_offset         (0 - 6 * 8)
4236  //   fp_offset         (48 - 48 + 8 * 16)
4237  //   overflow_arg_area (point to parameters coming in memory).
4238  //   reg_save_area
4239  std::vector<SDOperand> MemOps;
4240  SDOperand FIN = Op.getOperand(1);
4241  // Store gp_offset
4242  SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4243                                DAG.getConstant(VarArgsGPOffset, MVT::i32),
4244                                FIN, Op.getOperand(2));
4245  MemOps.push_back(Store);
4246
4247  // Store fp_offset
4248  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4249                    DAG.getConstant(4, getPointerTy()));
4250  Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4251                      DAG.getConstant(VarArgsFPOffset, MVT::i32),
4252                      FIN, Op.getOperand(2));
4253  MemOps.push_back(Store);
4254
4255  // Store ptr to overflow_arg_area
4256  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4257                    DAG.getConstant(4, getPointerTy()));
4258  SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4259  Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4260                      OVFIN, FIN, Op.getOperand(2));
4261  MemOps.push_back(Store);
4262
4263  // Store ptr to reg_save_area.
4264  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4265                    DAG.getConstant(8, getPointerTy()));
4266  SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
4267  Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4268                      RSFIN, FIN, Op.getOperand(2));
4269  MemOps.push_back(Store);
4270  return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
4271}
4272
4273SDOperand
4274X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
4275  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
4276  switch (IntNo) {
4277  default: return SDOperand();    // Don't custom lower most intrinsics.
4278    // Comparison intrinsics.
4279  case Intrinsic::x86_sse_comieq_ss:
4280  case Intrinsic::x86_sse_comilt_ss:
4281  case Intrinsic::x86_sse_comile_ss:
4282  case Intrinsic::x86_sse_comigt_ss:
4283  case Intrinsic::x86_sse_comige_ss:
4284  case Intrinsic::x86_sse_comineq_ss:
4285  case Intrinsic::x86_sse_ucomieq_ss:
4286  case Intrinsic::x86_sse_ucomilt_ss:
4287  case Intrinsic::x86_sse_ucomile_ss:
4288  case Intrinsic::x86_sse_ucomigt_ss:
4289  case Intrinsic::x86_sse_ucomige_ss:
4290  case Intrinsic::x86_sse_ucomineq_ss:
4291  case Intrinsic::x86_sse2_comieq_sd:
4292  case Intrinsic::x86_sse2_comilt_sd:
4293  case Intrinsic::x86_sse2_comile_sd:
4294  case Intrinsic::x86_sse2_comigt_sd:
4295  case Intrinsic::x86_sse2_comige_sd:
4296  case Intrinsic::x86_sse2_comineq_sd:
4297  case Intrinsic::x86_sse2_ucomieq_sd:
4298  case Intrinsic::x86_sse2_ucomilt_sd:
4299  case Intrinsic::x86_sse2_ucomile_sd:
4300  case Intrinsic::x86_sse2_ucomigt_sd:
4301  case Intrinsic::x86_sse2_ucomige_sd:
4302  case Intrinsic::x86_sse2_ucomineq_sd: {
4303    unsigned Opc = 0;
4304    ISD::CondCode CC = ISD::SETCC_INVALID;
4305    switch (IntNo) {
4306    default: break;
4307    case Intrinsic::x86_sse_comieq_ss:
4308    case Intrinsic::x86_sse2_comieq_sd:
4309      Opc = X86ISD::COMI;
4310      CC = ISD::SETEQ;
4311      break;
4312    case Intrinsic::x86_sse_comilt_ss:
4313    case Intrinsic::x86_sse2_comilt_sd:
4314      Opc = X86ISD::COMI;
4315      CC = ISD::SETLT;
4316      break;
4317    case Intrinsic::x86_sse_comile_ss:
4318    case Intrinsic::x86_sse2_comile_sd:
4319      Opc = X86ISD::COMI;
4320      CC = ISD::SETLE;
4321      break;
4322    case Intrinsic::x86_sse_comigt_ss:
4323    case Intrinsic::x86_sse2_comigt_sd:
4324      Opc = X86ISD::COMI;
4325      CC = ISD::SETGT;
4326      break;
4327    case Intrinsic::x86_sse_comige_ss:
4328    case Intrinsic::x86_sse2_comige_sd:
4329      Opc = X86ISD::COMI;
4330      CC = ISD::SETGE;
4331      break;
4332    case Intrinsic::x86_sse_comineq_ss:
4333    case Intrinsic::x86_sse2_comineq_sd:
4334      Opc = X86ISD::COMI;
4335      CC = ISD::SETNE;
4336      break;
4337    case Intrinsic::x86_sse_ucomieq_ss:
4338    case Intrinsic::x86_sse2_ucomieq_sd:
4339      Opc = X86ISD::UCOMI;
4340      CC = ISD::SETEQ;
4341      break;
4342    case Intrinsic::x86_sse_ucomilt_ss:
4343    case Intrinsic::x86_sse2_ucomilt_sd:
4344      Opc = X86ISD::UCOMI;
4345      CC = ISD::SETLT;
4346      break;
4347    case Intrinsic::x86_sse_ucomile_ss:
4348    case Intrinsic::x86_sse2_ucomile_sd:
4349      Opc = X86ISD::UCOMI;
4350      CC = ISD::SETLE;
4351      break;
4352    case Intrinsic::x86_sse_ucomigt_ss:
4353    case Intrinsic::x86_sse2_ucomigt_sd:
4354      Opc = X86ISD::UCOMI;
4355      CC = ISD::SETGT;
4356      break;
4357    case Intrinsic::x86_sse_ucomige_ss:
4358    case Intrinsic::x86_sse2_ucomige_sd:
4359      Opc = X86ISD::UCOMI;
4360      CC = ISD::SETGE;
4361      break;
4362    case Intrinsic::x86_sse_ucomineq_ss:
4363    case Intrinsic::x86_sse2_ucomineq_sd:
4364      Opc = X86ISD::UCOMI;
4365      CC = ISD::SETNE;
4366      break;
4367    }
4368
4369    unsigned X86CC;
4370    SDOperand LHS = Op.getOperand(1);
4371    SDOperand RHS = Op.getOperand(2);
4372    translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
4373
4374    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
4375    SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
4376    SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
4377    VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
4378    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
4379    SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
4380    return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
4381  }
4382  }
4383}
4384
4385/// LowerOperation - Provide custom lowering hooks for some operations.
4386///
4387SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
4388  switch (Op.getOpcode()) {
4389  default: assert(0 && "Should not custom lower this!");
4390  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
4391  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
4392  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4393  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
4394  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
4395  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
4396  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
4397  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
4398  case ISD::SHL_PARTS:
4399  case ISD::SRA_PARTS:
4400  case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
4401  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
4402  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
4403  case ISD::FABS:               return LowerFABS(Op, DAG);
4404  case ISD::FNEG:               return LowerFNEG(Op, DAG);
4405  case ISD::SETCC:              return LowerSETCC(Op, DAG, DAG.getEntryNode());
4406  case ISD::SELECT:             return LowerSELECT(Op, DAG);
4407  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
4408  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
4409  case ISD::CALL:               return LowerCALL(Op, DAG);
4410  case ISD::RET:                return LowerRET(Op, DAG);
4411  case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
4412  case ISD::MEMSET:             return LowerMEMSET(Op, DAG);
4413  case ISD::MEMCPY:             return LowerMEMCPY(Op, DAG);
4414  case ISD::READCYCLECOUNTER:   return LowerREADCYCLCECOUNTER(Op, DAG);
4415  case ISD::VASTART:            return LowerVASTART(Op, DAG);
4416  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4417  }
4418}
4419
4420const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
4421  switch (Opcode) {
4422  default: return NULL;
4423  case X86ISD::SHLD:               return "X86ISD::SHLD";
4424  case X86ISD::SHRD:               return "X86ISD::SHRD";
4425  case X86ISD::FAND:               return "X86ISD::FAND";
4426  case X86ISD::FXOR:               return "X86ISD::FXOR";
4427  case X86ISD::FILD:               return "X86ISD::FILD";
4428  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
4429  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
4430  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
4431  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
4432  case X86ISD::FLD:                return "X86ISD::FLD";
4433  case X86ISD::FST:                return "X86ISD::FST";
4434  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
4435  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
4436  case X86ISD::CALL:               return "X86ISD::CALL";
4437  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
4438  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
4439  case X86ISD::CMP:                return "X86ISD::CMP";
4440  case X86ISD::COMI:               return "X86ISD::COMI";
4441  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
4442  case X86ISD::SETCC:              return "X86ISD::SETCC";
4443  case X86ISD::CMOV:               return "X86ISD::CMOV";
4444  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
4445  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
4446  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
4447  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
4448  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
4449  case X86ISD::LOAD_UA:            return "X86ISD::LOAD_UA";
4450  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
4451  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
4452  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
4453  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
4454  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
4455  }
4456}
4457
4458/// isLegalAddressImmediate - Return true if the integer value or
4459/// GlobalValue can be used as the offset of the target addressing mode.
4460bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
4461  // X86 allows a sign-extended 32-bit immediate field.
4462  return (V > -(1LL << 32) && V < (1LL << 32)-1);
4463}
4464
4465bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
4466  // GV is 64-bit but displacement field is 32-bit unless we are in small code
4467  // model. Mac OS X happens to support only small PIC code model.
4468  // FIXME: better support for other OS's.
4469  if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin())
4470    return false;
4471  if (Subtarget->isTargetDarwin()) {
4472    Reloc::Model RModel = getTargetMachine().getRelocationModel();
4473    if (RModel == Reloc::Static)
4474      return true;
4475    else if (RModel == Reloc::DynamicNoPIC)
4476      return !DarwinGVRequiresExtraLoad(GV);
4477    else
4478      return false;
4479  } else
4480    return true;
4481}
4482
4483/// isShuffleMaskLegal - Targets can use this to indicate that they only
4484/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4485/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4486/// are assumed to be legal.
4487bool
4488X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
4489  // Only do shuffles on 128-bit vector types for now.
4490  if (MVT::getSizeInBits(VT) == 64) return false;
4491  return (Mask.Val->getNumOperands() <= 4 ||
4492          isSplatMask(Mask.Val)  ||
4493          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
4494          X86::isUNPCKLMask(Mask.Val) ||
4495          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
4496          X86::isUNPCKHMask(Mask.Val));
4497}
4498
4499bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
4500                                               MVT::ValueType EVT,
4501                                               SelectionDAG &DAG) const {
4502  unsigned NumElts = BVOps.size();
4503  // Only do shuffles on 128-bit vector types for now.
4504  if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
4505  if (NumElts == 2) return true;
4506  if (NumElts == 4) {
4507    return (isMOVLMask(BVOps)  || isCommutedMOVL(BVOps, true) ||
4508            isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps));
4509  }
4510  return false;
4511}
4512
4513//===----------------------------------------------------------------------===//
4514//                           X86 Scheduler Hooks
4515//===----------------------------------------------------------------------===//
4516
4517MachineBasicBlock *
4518X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
4519                                           MachineBasicBlock *BB) {
4520  switch (MI->getOpcode()) {
4521  default: assert(false && "Unexpected instr type to insert");
4522  case X86::CMOV_FR32:
4523  case X86::CMOV_FR64:
4524  case X86::CMOV_V4F32:
4525  case X86::CMOV_V2F64:
4526  case X86::CMOV_V2I64: {
4527    // To "insert" a SELECT_CC instruction, we actually have to insert the
4528    // diamond control-flow pattern.  The incoming instruction knows the
4529    // destination vreg to set, the condition code register to branch on, the
4530    // true/false values to select between, and a branch opcode to use.
4531    const BasicBlock *LLVM_BB = BB->getBasicBlock();
4532    ilist<MachineBasicBlock>::iterator It = BB;
4533    ++It;
4534
4535    //  thisMBB:
4536    //  ...
4537    //   TrueVal = ...
4538    //   cmpTY ccX, r1, r2
4539    //   bCC copy1MBB
4540    //   fallthrough --> copy0MBB
4541    MachineBasicBlock *thisMBB = BB;
4542    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
4543    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
4544    unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue());
4545    BuildMI(BB, Opc, 1).addMBB(sinkMBB);
4546    MachineFunction *F = BB->getParent();
4547    F->getBasicBlockList().insert(It, copy0MBB);
4548    F->getBasicBlockList().insert(It, sinkMBB);
4549    // Update machine-CFG edges by first adding all successors of the current
4550    // block to the new block which will contain the Phi node for the select.
4551    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
4552        e = BB->succ_end(); i != e; ++i)
4553      sinkMBB->addSuccessor(*i);
4554    // Next, remove all successors of the current block, and add the true
4555    // and fallthrough blocks as its successors.
4556    while(!BB->succ_empty())
4557      BB->removeSuccessor(BB->succ_begin());
4558    BB->addSuccessor(copy0MBB);
4559    BB->addSuccessor(sinkMBB);
4560
4561    //  copy0MBB:
4562    //   %FalseValue = ...
4563    //   # fallthrough to sinkMBB
4564    BB = copy0MBB;
4565
4566    // Update machine-CFG edges
4567    BB->addSuccessor(sinkMBB);
4568
4569    //  sinkMBB:
4570    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4571    //  ...
4572    BB = sinkMBB;
4573    BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg())
4574      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
4575      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4576
4577    delete MI;   // The pseudo instruction is gone now.
4578    return BB;
4579  }
4580
4581  case X86::FP_TO_INT16_IN_MEM:
4582  case X86::FP_TO_INT32_IN_MEM:
4583  case X86::FP_TO_INT64_IN_MEM: {
4584    // Change the floating point control register to use "round towards zero"
4585    // mode when truncating to an integer value.
4586    MachineFunction *F = BB->getParent();
4587    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
4588    addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
4589
4590    // Load the old value of the high byte of the control word...
4591    unsigned OldCW =
4592      F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
4593    addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx);
4594
4595    // Set the high part to be round to zero...
4596    addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F);
4597
4598    // Reload the modified control word now...
4599    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
4600
4601    // Restore the memory image of control word to original value
4602    addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW);
4603
4604    // Get the X86 opcode to use.
4605    unsigned Opc;
4606    switch (MI->getOpcode()) {
4607    default: assert(0 && "illegal opcode!");
4608    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
4609    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
4610    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
4611    }
4612
4613    X86AddressMode AM;
4614    MachineOperand &Op = MI->getOperand(0);
4615    if (Op.isRegister()) {
4616      AM.BaseType = X86AddressMode::RegBase;
4617      AM.Base.Reg = Op.getReg();
4618    } else {
4619      AM.BaseType = X86AddressMode::FrameIndexBase;
4620      AM.Base.FrameIndex = Op.getFrameIndex();
4621    }
4622    Op = MI->getOperand(1);
4623    if (Op.isImmediate())
4624      AM.Scale = Op.getImmedValue();
4625    Op = MI->getOperand(2);
4626    if (Op.isImmediate())
4627      AM.IndexReg = Op.getImmedValue();
4628    Op = MI->getOperand(3);
4629    if (Op.isGlobalAddress()) {
4630      AM.GV = Op.getGlobal();
4631    } else {
4632      AM.Disp = Op.getImmedValue();
4633    }
4634    addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg());
4635
4636    // Reload the original control word now.
4637    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
4638
4639    delete MI;   // The pseudo instruction is gone now.
4640    return BB;
4641  }
4642  }
4643}
4644
4645//===----------------------------------------------------------------------===//
4646//                           X86 Optimization Hooks
4647//===----------------------------------------------------------------------===//
4648
4649void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
4650                                                       uint64_t Mask,
4651                                                       uint64_t &KnownZero,
4652                                                       uint64_t &KnownOne,
4653                                                       unsigned Depth) const {
4654  unsigned Opc = Op.getOpcode();
4655  assert((Opc >= ISD::BUILTIN_OP_END ||
4656          Opc == ISD::INTRINSIC_WO_CHAIN ||
4657          Opc == ISD::INTRINSIC_W_CHAIN ||
4658          Opc == ISD::INTRINSIC_VOID) &&
4659         "Should use MaskedValueIsZero if you don't know whether Op"
4660         " is a target node!");
4661
4662  KnownZero = KnownOne = 0;   // Don't know anything.
4663  switch (Opc) {
4664  default: break;
4665  case X86ISD::SETCC:
4666    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
4667    break;
4668  }
4669}
4670
4671/// getShuffleScalarElt - Returns the scalar element that will make up the ith
4672/// element of the result of the vector shuffle.
4673static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
4674  MVT::ValueType VT = N->getValueType(0);
4675  SDOperand PermMask = N->getOperand(2);
4676  unsigned NumElems = PermMask.getNumOperands();
4677  SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
4678  i %= NumElems;
4679  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
4680    return (i == 0)
4681      ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4682  } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
4683    SDOperand Idx = PermMask.getOperand(i);
4684    if (Idx.getOpcode() == ISD::UNDEF)
4685      return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4686    return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
4687  }
4688  return SDOperand();
4689}
4690
4691/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
4692/// node is a GlobalAddress + an offset.
4693static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
4694  if (N->getOpcode() == X86ISD::Wrapper) {
4695    if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
4696      GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
4697      return true;
4698    }
4699  } else if (N->getOpcode() == ISD::ADD) {
4700    SDOperand N1 = N->getOperand(0);
4701    SDOperand N2 = N->getOperand(1);
4702    if (isGAPlusOffset(N1.Val, GA, Offset)) {
4703      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
4704      if (V) {
4705        Offset += V->getSignExtended();
4706        return true;
4707      }
4708    } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
4709      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
4710      if (V) {
4711        Offset += V->getSignExtended();
4712        return true;
4713      }
4714    }
4715  }
4716  return false;
4717}
4718
4719/// isConsecutiveLoad - Returns true if N is loading from an address of Base
4720/// + Dist * Size.
4721static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
4722                              MachineFrameInfo *MFI) {
4723  if (N->getOperand(0).Val != Base->getOperand(0).Val)
4724    return false;
4725
4726  SDOperand Loc = N->getOperand(1);
4727  SDOperand BaseLoc = Base->getOperand(1);
4728  if (Loc.getOpcode() == ISD::FrameIndex) {
4729    if (BaseLoc.getOpcode() != ISD::FrameIndex)
4730      return false;
4731    int FI  = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
4732    int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
4733    int FS  = MFI->getObjectSize(FI);
4734    int BFS = MFI->getObjectSize(BFI);
4735    if (FS != BFS || FS != Size) return false;
4736    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
4737  } else {
4738    GlobalValue *GV1 = NULL;
4739    GlobalValue *GV2 = NULL;
4740    int64_t Offset1 = 0;
4741    int64_t Offset2 = 0;
4742    bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
4743    bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
4744    if (isGA1 && isGA2 && GV1 == GV2)
4745      return Offset1 == (Offset2 + Dist*Size);
4746  }
4747
4748  return false;
4749}
4750
4751static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
4752                              const X86Subtarget *Subtarget) {
4753  GlobalValue *GV;
4754  int64_t Offset;
4755  if (isGAPlusOffset(Base, GV, Offset))
4756    return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
4757  else {
4758    assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
4759    int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
4760    if (BFI < 0)
4761      // Fixed objects do not specify alignment, however the offsets are known.
4762      return ((Subtarget->getStackAlignment() % 16) == 0 &&
4763              (MFI->getObjectOffset(BFI) % 16) == 0);
4764    else
4765      return MFI->getObjectAlignment(BFI) >= 16;
4766  }
4767  return false;
4768}
4769
4770
4771/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
4772/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
4773/// if the load addresses are consecutive, non-overlapping, and in the right
4774/// order.
4775static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
4776                                       const X86Subtarget *Subtarget) {
4777  MachineFunction &MF = DAG.getMachineFunction();
4778  MachineFrameInfo *MFI = MF.getFrameInfo();
4779  MVT::ValueType VT = N->getValueType(0);
4780  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
4781  SDOperand PermMask = N->getOperand(2);
4782  int NumElems = (int)PermMask.getNumOperands();
4783  SDNode *Base = NULL;
4784  for (int i = 0; i < NumElems; ++i) {
4785    SDOperand Idx = PermMask.getOperand(i);
4786    if (Idx.getOpcode() == ISD::UNDEF) {
4787      if (!Base) return SDOperand();
4788    } else {
4789      SDOperand Arg =
4790        getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
4791      if (!Arg.Val || Arg.getOpcode() != ISD::LOAD)
4792        return SDOperand();
4793      if (!Base)
4794        Base = Arg.Val;
4795      else if (!isConsecutiveLoad(Arg.Val, Base,
4796                                  i, MVT::getSizeInBits(EVT)/8,MFI))
4797        return SDOperand();
4798    }
4799  }
4800
4801  bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
4802  if (isAlign16)
4803    return DAG.getLoad(VT, Base->getOperand(0), Base->getOperand(1),
4804                       Base->getOperand(2));
4805  else {
4806    // Just use movups, it's shorter.
4807    std::vector<MVT::ValueType> Tys;
4808    Tys.push_back(MVT::v4f32);
4809    Tys.push_back(MVT::Other);
4810    SmallVector<SDOperand, 3> Ops;
4811    Ops.push_back(Base->getOperand(0));
4812    Ops.push_back(Base->getOperand(1));
4813    Ops.push_back(Base->getOperand(2));
4814    return DAG.getNode(ISD::BIT_CONVERT, VT,
4815                       DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
4816  }
4817}
4818
4819SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
4820                                               DAGCombinerInfo &DCI) const {
4821  TargetMachine &TM = getTargetMachine();
4822  SelectionDAG &DAG = DCI.DAG;
4823  switch (N->getOpcode()) {
4824  default: break;
4825  case ISD::VECTOR_SHUFFLE:
4826    return PerformShuffleCombine(N, DAG, Subtarget);
4827  }
4828
4829  return SDOperand();
4830}
4831
4832//===----------------------------------------------------------------------===//
4833//                           X86 Inline Assembly Support
4834//===----------------------------------------------------------------------===//
4835
4836/// getConstraintType - Given a constraint letter, return the type of
4837/// constraint it is for this target.
4838X86TargetLowering::ConstraintType
4839X86TargetLowering::getConstraintType(char ConstraintLetter) const {
4840  switch (ConstraintLetter) {
4841  case 'A':
4842  case 'r':
4843  case 'R':
4844  case 'l':
4845  case 'q':
4846  case 'Q':
4847  case 'x':
4848  case 'Y':
4849    return C_RegisterClass;
4850  default: return TargetLowering::getConstraintType(ConstraintLetter);
4851  }
4852}
4853
4854std::vector<unsigned> X86TargetLowering::
4855getRegClassForInlineAsmConstraint(const std::string &Constraint,
4856                                  MVT::ValueType VT) const {
4857  if (Constraint.size() == 1) {
4858    // FIXME: not handling fp-stack yet!
4859    // FIXME: not handling MMX registers yet ('y' constraint).
4860    switch (Constraint[0]) {      // GCC X86 Constraint Letters
4861    default: break;  // Unknown constraint letter
4862    case 'A':   // EAX/EDX
4863      if (VT == MVT::i32 || VT == MVT::i64)
4864        return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
4865      break;
4866    case 'r':   // GENERAL_REGS
4867    case 'R':   // LEGACY_REGS
4868      if (VT == MVT::i32)
4869        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4870                                     X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
4871      else if (VT == MVT::i16)
4872        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4873                                     X86::SI, X86::DI, X86::BP, X86::SP, 0);
4874      else if (VT == MVT::i8)
4875        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4876      break;
4877    case 'l':   // INDEX_REGS
4878      if (VT == MVT::i32)
4879        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4880                                     X86::ESI, X86::EDI, X86::EBP, 0);
4881      else if (VT == MVT::i16)
4882        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4883                                     X86::SI, X86::DI, X86::BP, 0);
4884      else if (VT == MVT::i8)
4885        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4886      break;
4887    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
4888    case 'Q':   // Q_REGS
4889      if (VT == MVT::i32)
4890        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
4891      else if (VT == MVT::i16)
4892        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
4893      else if (VT == MVT::i8)
4894        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4895        break;
4896    case 'x':   // SSE_REGS if SSE1 allowed
4897      if (Subtarget->hasSSE1())
4898        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4899                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4900                                     0);
4901      return std::vector<unsigned>();
4902    case 'Y':   // SSE_REGS if SSE2 allowed
4903      if (Subtarget->hasSSE2())
4904        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4905                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4906                                     0);
4907      return std::vector<unsigned>();
4908    }
4909  }
4910
4911  return std::vector<unsigned>();
4912}
4913
4914std::pair<unsigned, const TargetRegisterClass*>
4915X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4916                                                MVT::ValueType VT) const {
4917  // Use the default implementation in TargetLowering to convert the register
4918  // constraint into a member of a register class.
4919  std::pair<unsigned, const TargetRegisterClass*> Res;
4920  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4921
4922  // Not found?  Bail out.
4923  if (Res.second == 0) return Res;
4924
4925  // Otherwise, check to see if this is a register class of the wrong value
4926  // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
4927  // turn into {ax},{dx}.
4928  if (Res.second->hasType(VT))
4929    return Res;   // Correct type already, nothing to do.
4930
4931  // All of the single-register GCC register classes map their values onto
4932  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
4933  // really want an 8-bit or 32-bit register, map to the appropriate register
4934  // class and return the appropriate register.
4935  if (Res.second != X86::GR16RegisterClass)
4936    return Res;
4937
4938  if (VT == MVT::i8) {
4939    unsigned DestReg = 0;
4940    switch (Res.first) {
4941    default: break;
4942    case X86::AX: DestReg = X86::AL; break;
4943    case X86::DX: DestReg = X86::DL; break;
4944    case X86::CX: DestReg = X86::CL; break;
4945    case X86::BX: DestReg = X86::BL; break;
4946    }
4947    if (DestReg) {
4948      Res.first = DestReg;
4949      Res.second = Res.second = X86::GR8RegisterClass;
4950    }
4951  } else if (VT == MVT::i32) {
4952    unsigned DestReg = 0;
4953    switch (Res.first) {
4954    default: break;
4955    case X86::AX: DestReg = X86::EAX; break;
4956    case X86::DX: DestReg = X86::EDX; break;
4957    case X86::CX: DestReg = X86::ECX; break;
4958    case X86::BX: DestReg = X86::EBX; break;
4959    case X86::SI: DestReg = X86::ESI; break;
4960    case X86::DI: DestReg = X86::EDI; break;
4961    case X86::BP: DestReg = X86::EBP; break;
4962    case X86::SP: DestReg = X86::ESP; break;
4963    }
4964    if (DestReg) {
4965      Res.first = DestReg;
4966      Res.second = Res.second = X86::GR32RegisterClass;
4967    }
4968  } else if (VT == MVT::i64) {
4969    unsigned DestReg = 0;
4970    switch (Res.first) {
4971    default: break;
4972    case X86::AX: DestReg = X86::RAX; break;
4973    case X86::DX: DestReg = X86::RDX; break;
4974    case X86::CX: DestReg = X86::RCX; break;
4975    case X86::BX: DestReg = X86::RBX; break;
4976    case X86::SI: DestReg = X86::RSI; break;
4977    case X86::DI: DestReg = X86::RDI; break;
4978    case X86::BP: DestReg = X86::RBP; break;
4979    case X86::SP: DestReg = X86::RSP; break;
4980    }
4981    if (DestReg) {
4982      Res.first = DestReg;
4983      Res.second = Res.second = X86::GR64RegisterClass;
4984    }
4985  }
4986
4987  return Res;
4988}
4989
4990