X86ISelLowering.cpp revision 25ab690a43cbbb591b76d49e3595b019c32f4b3f
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrBuilder.h"
17#include "X86ISelLowering.h"
18#include "X86MachineFunctionInfo.h"
19#include "X86TargetMachine.h"
20#include "llvm/CallingConv.h"
21#include "llvm/Constants.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/Function.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/ADT/VectorExtras.h"
26#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27#include "llvm/CodeGen/MachineFrameInfo.h"
28#include "llvm/CodeGen/MachineFunction.h"
29#include "llvm/CodeGen/MachineInstrBuilder.h"
30#include "llvm/CodeGen/SelectionDAG.h"
31#include "llvm/CodeGen/SSARegMap.h"
32#include "llvm/Support/MathExtras.h"
33#include "llvm/Target/TargetOptions.h"
34using namespace llvm;
35
36// FIXME: temporary.
37#include "llvm/Support/CommandLine.h"
38static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
39                                  cl::desc("Enable fastcc on X86"));
40
41X86TargetLowering::X86TargetLowering(TargetMachine &TM)
42  : TargetLowering(TM) {
43  Subtarget = &TM.getSubtarget<X86Subtarget>();
44  X86ScalarSSE = Subtarget->hasSSE2();
45  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
46
47  // Set up the TargetLowering object.
48
49  // X86 is weird, it always uses i8 for shift amounts and setcc results.
50  setShiftAmountType(MVT::i8);
51  setSetCCResultType(MVT::i8);
52  setSetCCResultContents(ZeroOrOneSetCCResult);
53  setSchedulingPreference(SchedulingForRegPressure);
54  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
55  setStackPointerRegisterToSaveRestore(X86StackPtr);
56
57  if (!Subtarget->isTargetDarwin())
58    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
59    setUseUnderscoreSetJmpLongJmp(true);
60
61  // Add legal addressing mode scale values.
62  addLegalAddressScale(8);
63  addLegalAddressScale(4);
64  addLegalAddressScale(2);
65  // Enter the ones which require both scale + index last. These are more
66  // expensive.
67  addLegalAddressScale(9);
68  addLegalAddressScale(5);
69  addLegalAddressScale(3);
70
71  // Set up the register classes.
72  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
73  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
74  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
75  if (Subtarget->is64Bit())
76    addRegisterClass(MVT::i64, X86::GR64RegisterClass);
77
78  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
79  // operation.
80  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
81  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
82  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
83
84  if (Subtarget->is64Bit()) {
85    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
86    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
87  } else {
88    if (X86ScalarSSE)
89      // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
90      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Expand);
91    else
92      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
93  }
94
95  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
96  // this operation.
97  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
98  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
99  // SSE has no i16 to fp conversion, only i32
100  if (X86ScalarSSE)
101    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
102  else {
103    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
104    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
105  }
106
107  if (!Subtarget->is64Bit()) {
108    // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
109    setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
110    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
111  }
112
113  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
114  // this operation.
115  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
116  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
117
118  if (X86ScalarSSE) {
119    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
120  } else {
121    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
122    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
123  }
124
125  // Handle FP_TO_UINT by promoting the destination to a larger signed
126  // conversion.
127  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
128  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
129  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
130
131  if (Subtarget->is64Bit()) {
132    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
133    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
134  } else {
135    if (X86ScalarSSE && !Subtarget->hasSSE3())
136      // Expand FP_TO_UINT into a select.
137      // FIXME: We would like to use a Custom expander here eventually to do
138      // the optimal thing for SSE vs. the default expansion in the legalizer.
139      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
140    else
141      // With SSE3 we can use fisttpll to convert to a signed i64.
142      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
143  }
144
145  setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
146  setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
147
148  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
149  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
150  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
151  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
152  if (Subtarget->is64Bit())
153    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
154  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
155  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
156  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
157  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
158  setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
159  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
160
161  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
162  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
163  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
164  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
165  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
166  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
167  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
168  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
169  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
170  if (Subtarget->is64Bit()) {
171    setOperationAction(ISD::CTPOP          , MVT::i64  , Expand);
172    setOperationAction(ISD::CTTZ           , MVT::i64  , Expand);
173    setOperationAction(ISD::CTLZ           , MVT::i64  , Expand);
174  }
175
176  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
177  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
178
179  // These should be promoted to a larger select which is supported.
180  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
181  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
182  // X86 wants to expand cmov itself.
183  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
184  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
185  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
186  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
187  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
188  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
189  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
190  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
191  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
192  if (Subtarget->is64Bit()) {
193    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
194    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
195  }
196  // X86 ret instruction may pop stack.
197  setOperationAction(ISD::RET             , MVT::Other, Custom);
198  // Darwin ABI issue.
199  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
200  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
201  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
202  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
203  if (Subtarget->is64Bit()) {
204    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
205    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
206    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
207    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
208  }
209  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
210  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
211  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
212  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
213  // X86 wants to expand memset / memcpy itself.
214  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
215  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
216
217  // We don't have line number support yet.
218  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
219  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
220  // FIXME - use subtarget debug flags
221  if (!Subtarget->isTargetDarwin())
222    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
223
224  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
225  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
226
227  // Use the default implementation.
228  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
229  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
230  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
231  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
232  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
233  if (Subtarget->is64Bit())
234    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
235  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
236
237  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
238  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
239
240  if (X86ScalarSSE) {
241    // Set up the FP register classes.
242    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
243    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
244
245    // Use ANDPD to simulate FABS.
246    setOperationAction(ISD::FABS , MVT::f64, Custom);
247    setOperationAction(ISD::FABS , MVT::f32, Custom);
248
249    // Use XORP to simulate FNEG.
250    setOperationAction(ISD::FNEG , MVT::f64, Custom);
251    setOperationAction(ISD::FNEG , MVT::f32, Custom);
252
253    // We don't support sin/cos/fmod
254    setOperationAction(ISD::FSIN , MVT::f64, Expand);
255    setOperationAction(ISD::FCOS , MVT::f64, Expand);
256    setOperationAction(ISD::FREM , MVT::f64, Expand);
257    setOperationAction(ISD::FSIN , MVT::f32, Expand);
258    setOperationAction(ISD::FCOS , MVT::f32, Expand);
259    setOperationAction(ISD::FREM , MVT::f32, Expand);
260
261    // Expand FP immediates into loads from the stack, except for the special
262    // cases we handle.
263    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
264    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
265    addLegalFPImmediate(+0.0); // xorps / xorpd
266  } else {
267    // Set up the FP register classes.
268    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
269
270    setOperationAction(ISD::UNDEF, MVT::f64, Expand);
271
272    if (!UnsafeFPMath) {
273      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
274      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
275    }
276
277    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
278    addLegalFPImmediate(+0.0); // FLD0
279    addLegalFPImmediate(+1.0); // FLD1
280    addLegalFPImmediate(-0.0); // FLD0/FCHS
281    addLegalFPImmediate(-1.0); // FLD1/FCHS
282  }
283
284  // First set operation action for all vector types to expand. Then we
285  // will selectively turn on ones that can be effectively codegen'd.
286  for (unsigned VT = (unsigned)MVT::Vector + 1;
287       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
288    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
289    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
290    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
291    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
292    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
293    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
294    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
295  }
296
297  if (Subtarget->hasMMX()) {
298    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
299    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
300    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
301
302    // FIXME: add MMX packed arithmetics
303    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
304    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
305    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
306  }
307
308  if (Subtarget->hasSSE1()) {
309    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
310
311    setOperationAction(ISD::AND,                MVT::v4f32, Legal);
312    setOperationAction(ISD::OR,                 MVT::v4f32, Legal);
313    setOperationAction(ISD::XOR,                MVT::v4f32, Legal);
314    setOperationAction(ISD::ADD,                MVT::v4f32, Legal);
315    setOperationAction(ISD::SUB,                MVT::v4f32, Legal);
316    setOperationAction(ISD::MUL,                MVT::v4f32, Legal);
317    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
318    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
319    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
320    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
321    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
322  }
323
324  if (Subtarget->hasSSE2()) {
325    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
326    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
327    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
328    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
329    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
330
331    setOperationAction(ISD::ADD,                MVT::v2f64, Legal);
332    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
333    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
334    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
335    setOperationAction(ISD::SUB,                MVT::v2f64, Legal);
336    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
337    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
338    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
339    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
340    setOperationAction(ISD::MUL,                MVT::v2f64, Legal);
341
342    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
343    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
344    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
345    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
346    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
347    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
348
349    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
350    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
351      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
352      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
353      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
354    }
355    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
356    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
357    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
358    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
359    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
360    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
361
362    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
363    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
364      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
365      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
366      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
367      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
368      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
369      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
370      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
371      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
372      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
373      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
374    }
375
376    // Custom lower v2i64 and v2f64 selects.
377    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
378    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
379    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
380    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
381  }
382
383  // We want to custom lower some of our intrinsics.
384  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
385
386  // We have target-specific dag combine patterns for the following nodes:
387  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
388
389  computeRegisterProperties();
390
391  // FIXME: These should be based on subtarget info. Plus, the values should
392  // be smaller when we are in optimizing for size mode.
393  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
394  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
395  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
396  allowUnalignedMemoryAccesses = true; // x86 supports it!
397}
398
399//===----------------------------------------------------------------------===//
400//                    C Calling Convention implementation
401//===----------------------------------------------------------------------===//
402
403/// AddLiveIn - This helper function adds the specified physical register to the
404/// MachineFunction as a live in value.  It also creates a corresponding virtual
405/// register for it.
406static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
407                          TargetRegisterClass *RC) {
408  assert(RC->contains(PReg) && "Not the correct regclass!");
409  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
410  MF.addLiveIn(PReg, VReg);
411  return VReg;
412}
413
414/// HowToPassCCCArgument - Returns how an formal argument of the specified type
415/// should be passed. If it is through stack, returns the size of the stack
416/// slot; if it is through XMM register, returns the number of XMM registers
417/// are needed.
418static void
419HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs,
420                     unsigned &ObjSize, unsigned &ObjXMMRegs) {
421  ObjXMMRegs = 0;
422
423  switch (ObjectVT) {
424  default: assert(0 && "Unhandled argument type!");
425  case MVT::i8:  ObjSize = 1; break;
426  case MVT::i16: ObjSize = 2; break;
427  case MVT::i32: ObjSize = 4; break;
428  case MVT::i64: ObjSize = 8; break;
429  case MVT::f32: ObjSize = 4; break;
430  case MVT::f64: ObjSize = 8; break;
431  case MVT::v16i8:
432  case MVT::v8i16:
433  case MVT::v4i32:
434  case MVT::v2i64:
435  case MVT::v4f32:
436  case MVT::v2f64:
437    if (NumXMMRegs < 4)
438      ObjXMMRegs = 1;
439    else
440      ObjSize = 16;
441    break;
442  }
443}
444
445SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) {
446  unsigned NumArgs = Op.Val->getNumValues() - 1;
447  MachineFunction &MF = DAG.getMachineFunction();
448  MachineFrameInfo *MFI = MF.getFrameInfo();
449  SDOperand Root = Op.getOperand(0);
450  std::vector<SDOperand> ArgValues;
451
452  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
453  // the stack frame looks like this:
454  //
455  // [ESP] -- return address
456  // [ESP + 4] -- first argument (leftmost lexically)
457  // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
458  //    ...
459  //
460  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
461  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
462  static const unsigned XMMArgRegs[] = {
463    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
464  };
465  for (unsigned i = 0; i < NumArgs; ++i) {
466    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
467    unsigned ArgIncrement = 4;
468    unsigned ObjSize = 0;
469    unsigned ObjXMMRegs = 0;
470    HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs);
471    if (ObjSize > 4)
472      ArgIncrement = ObjSize;
473
474    SDOperand ArgValue;
475    if (ObjXMMRegs) {
476      // Passed in a XMM register.
477      unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
478                               X86::VR128RegisterClass);
479      ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT);
480      ArgValues.push_back(ArgValue);
481      NumXMMRegs += ObjXMMRegs;
482    } else {
483      // XMM arguments have to be aligned on 16-byte boundary.
484      if (ObjSize == 16)
485        ArgOffset = ((ArgOffset + 15) / 16) * 16;
486      // Create the frame index object for this incoming parameter...
487      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
488      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
489      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
490                             DAG.getSrcValue(NULL));
491      ArgValues.push_back(ArgValue);
492      ArgOffset += ArgIncrement;   // Move on to the next argument...
493    }
494  }
495
496  ArgValues.push_back(Root);
497
498  // If the function takes variable number of arguments, make a frame index for
499  // the start of the first vararg value... for expansion of llvm.va_start.
500  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
501  if (isVarArg)
502    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
503  RegSaveFrameIndex = 0xAAAAAAA;  // X86-64 only.
504  ReturnAddrIndex = 0;            // No return address slot generated yet.
505  BytesToPopOnReturn = 0;         // Callee pops nothing.
506  BytesCallerReserves = ArgOffset;
507
508  // If this is a struct return on Darwin/X86, the callee pops the hidden struct
509  // pointer.
510  if (MF.getFunction()->getCallingConv() == CallingConv::CSRet &&
511      Subtarget->isTargetDarwin())
512    BytesToPopOnReturn = 4;
513
514  // Return the new list of results.
515  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
516                                     Op.Val->value_end());
517  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
518}
519
520
521SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) {
522  SDOperand Chain     = Op.getOperand(0);
523  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
524  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
525  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
526  SDOperand Callee    = Op.getOperand(4);
527  MVT::ValueType RetVT= Op.Val->getValueType(0);
528  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
529
530  // Keep track of the number of XMM regs passed so far.
531  unsigned NumXMMRegs = 0;
532  static const unsigned XMMArgRegs[] = {
533    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
534  };
535
536  // Count how many bytes are to be pushed on the stack.
537  unsigned NumBytes = 0;
538  for (unsigned i = 0; i != NumOps; ++i) {
539    SDOperand Arg = Op.getOperand(5+2*i);
540
541    switch (Arg.getValueType()) {
542    default: assert(0 && "Unexpected ValueType for argument!");
543    case MVT::i8:
544    case MVT::i16:
545    case MVT::i32:
546    case MVT::f32:
547      NumBytes += 4;
548      break;
549    case MVT::i64:
550    case MVT::f64:
551      NumBytes += 8;
552      break;
553    case MVT::v16i8:
554    case MVT::v8i16:
555    case MVT::v4i32:
556    case MVT::v2i64:
557    case MVT::v4f32:
558    case MVT::v2f64:
559      if (NumXMMRegs < 4)
560        ++NumXMMRegs;
561      else {
562        // XMM arguments have to be aligned on 16-byte boundary.
563        NumBytes = ((NumBytes + 15) / 16) * 16;
564        NumBytes += 16;
565      }
566      break;
567    }
568  }
569
570  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
571
572  // Arguments go on the stack in reverse order, as specified by the ABI.
573  unsigned ArgOffset = 0;
574  NumXMMRegs = 0;
575  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
576  std::vector<SDOperand> MemOpChains;
577  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
578  for (unsigned i = 0; i != NumOps; ++i) {
579    SDOperand Arg = Op.getOperand(5+2*i);
580
581    switch (Arg.getValueType()) {
582    default: assert(0 && "Unexpected ValueType for argument!");
583    case MVT::i8:
584    case MVT::i16: {
585      // Promote the integer to 32 bits.  If the input type is signed use a
586      // sign extend, otherwise use a zero extend.
587      unsigned ExtOp =
588        dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ?
589        ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
590      Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
591    }
592    // Fallthrough
593
594    case MVT::i32:
595    case MVT::f32: {
596      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
597      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
598      MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
599                                        Arg, PtrOff, DAG.getSrcValue(NULL)));
600      ArgOffset += 4;
601      break;
602    }
603    case MVT::i64:
604    case MVT::f64: {
605      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
606      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
607      MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
608                                        Arg, PtrOff, DAG.getSrcValue(NULL)));
609      ArgOffset += 8;
610      break;
611    }
612    case MVT::v16i8:
613    case MVT::v8i16:
614    case MVT::v4i32:
615    case MVT::v2i64:
616    case MVT::v4f32:
617    case MVT::v2f64:
618      if (NumXMMRegs < 4) {
619        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
620        NumXMMRegs++;
621      } else {
622        // XMM arguments have to be aligned on 16-byte boundary.
623        ArgOffset = ((ArgOffset + 15) / 16) * 16;
624        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
625        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
626        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
627                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
628        ArgOffset += 16;
629      }
630    }
631  }
632
633  if (!MemOpChains.empty())
634    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
635                        &MemOpChains[0], MemOpChains.size());
636
637  // Build a sequence of copy-to-reg nodes chained together with token chain
638  // and flag operands which copy the outgoing args into registers.
639  SDOperand InFlag;
640  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
641    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
642                             InFlag);
643    InFlag = Chain.getValue(1);
644  }
645
646  // If the callee is a GlobalAddress node (quite common, every direct call is)
647  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
648  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
649    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
650  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
651    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
652
653  std::vector<MVT::ValueType> NodeTys;
654  NodeTys.push_back(MVT::Other);   // Returns a chain
655  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
656  std::vector<SDOperand> Ops;
657  Ops.push_back(Chain);
658  Ops.push_back(Callee);
659
660  // Add argument registers to the end of the list so that they are known live
661  // into the call.
662  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
663    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
664                                  RegsToPass[i].second.getValueType()));
665
666  if (InFlag.Val)
667    Ops.push_back(InFlag);
668
669  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
670                      NodeTys, &Ops[0], Ops.size());
671  InFlag = Chain.getValue(1);
672
673  // Create the CALLSEQ_END node.
674  unsigned NumBytesForCalleeToPush = 0;
675
676  // If this is is a call to a struct-return function on Darwin/X86, the callee
677  // pops the hidden struct pointer, so we have to push it back.
678  if (CallingConv == CallingConv::CSRet && Subtarget->isTargetDarwin())
679    NumBytesForCalleeToPush = 4;
680
681  NodeTys.clear();
682  NodeTys.push_back(MVT::Other);   // Returns a chain
683  if (RetVT != MVT::Other)
684    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
685  Ops.clear();
686  Ops.push_back(Chain);
687  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
688  Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
689  Ops.push_back(InFlag);
690  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
691  if (RetVT != MVT::Other)
692    InFlag = Chain.getValue(1);
693
694  std::vector<SDOperand> ResultVals;
695  NodeTys.clear();
696  switch (RetVT) {
697  default: assert(0 && "Unknown value type to return!");
698  case MVT::Other: break;
699  case MVT::i8:
700    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
701    ResultVals.push_back(Chain.getValue(0));
702    NodeTys.push_back(MVT::i8);
703    break;
704  case MVT::i16:
705    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
706    ResultVals.push_back(Chain.getValue(0));
707    NodeTys.push_back(MVT::i16);
708    break;
709  case MVT::i32:
710    if (Op.Val->getValueType(1) == MVT::i32) {
711      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
712      ResultVals.push_back(Chain.getValue(0));
713      Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
714                                 Chain.getValue(2)).getValue(1);
715      ResultVals.push_back(Chain.getValue(0));
716      NodeTys.push_back(MVT::i32);
717    } else {
718      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
719      ResultVals.push_back(Chain.getValue(0));
720    }
721    NodeTys.push_back(MVT::i32);
722    break;
723  case MVT::v16i8:
724  case MVT::v8i16:
725  case MVT::v4i32:
726  case MVT::v2i64:
727  case MVT::v4f32:
728  case MVT::v2f64:
729    Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
730    ResultVals.push_back(Chain.getValue(0));
731    NodeTys.push_back(RetVT);
732    break;
733  case MVT::f32:
734  case MVT::f64: {
735    std::vector<MVT::ValueType> Tys;
736    Tys.push_back(MVT::f64);
737    Tys.push_back(MVT::Other);
738    Tys.push_back(MVT::Flag);
739    std::vector<SDOperand> Ops;
740    Ops.push_back(Chain);
741    Ops.push_back(InFlag);
742    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys,
743                                   &Ops[0], Ops.size());
744    Chain  = RetVal.getValue(1);
745    InFlag = RetVal.getValue(2);
746    if (X86ScalarSSE) {
747      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
748      // shouldn't be necessary except that RFP cannot be live across
749      // multiple blocks. When stackifier is fixed, they can be uncoupled.
750      MachineFunction &MF = DAG.getMachineFunction();
751      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
752      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
753      Tys.clear();
754      Tys.push_back(MVT::Other);
755      Ops.clear();
756      Ops.push_back(Chain);
757      Ops.push_back(RetVal);
758      Ops.push_back(StackSlot);
759      Ops.push_back(DAG.getValueType(RetVT));
760      Ops.push_back(InFlag);
761      Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
762      RetVal = DAG.getLoad(RetVT, Chain, StackSlot,
763                           DAG.getSrcValue(NULL));
764      Chain = RetVal.getValue(1);
765    }
766
767    if (RetVT == MVT::f32 && !X86ScalarSSE)
768      // FIXME: we would really like to remember that this FP_ROUND
769      // operation is okay to eliminate if we allow excess FP precision.
770      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
771    ResultVals.push_back(RetVal);
772    NodeTys.push_back(RetVT);
773    break;
774  }
775  }
776
777  // If the function returns void, just return the chain.
778  if (ResultVals.empty())
779    return Chain;
780
781  // Otherwise, merge everything together with a MERGE_VALUES node.
782  NodeTys.push_back(MVT::Other);
783  ResultVals.push_back(Chain);
784  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
785                              &ResultVals[0], ResultVals.size());
786  return Res.getValue(Op.ResNo);
787}
788
789
790//===----------------------------------------------------------------------===//
791//                 X86-64 C Calling Convention implementation
792//===----------------------------------------------------------------------===//
793
794/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified
795/// type should be passed. If it is through stack, returns the size of the stack
796/// slot; if it is through integer or XMM register, returns the number of
797/// integer or XMM registers are needed.
798static void
799HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT,
800                           unsigned NumIntRegs, unsigned NumXMMRegs,
801                           unsigned &ObjSize, unsigned &ObjIntRegs,
802                           unsigned &ObjXMMRegs) {
803  ObjSize = 0;
804  ObjIntRegs = 0;
805  ObjXMMRegs = 0;
806
807  switch (ObjectVT) {
808  default: assert(0 && "Unhandled argument type!");
809  case MVT::i8:
810  case MVT::i16:
811  case MVT::i32:
812  case MVT::i64:
813    if (NumIntRegs < 6)
814      ObjIntRegs = 1;
815    else {
816      switch (ObjectVT) {
817      default: break;
818      case MVT::i8:  ObjSize = 1; break;
819      case MVT::i16: ObjSize = 2; break;
820      case MVT::i32: ObjSize = 4; break;
821      case MVT::i64: ObjSize = 8; break;
822      }
823    }
824    break;
825  case MVT::f32:
826  case MVT::f64:
827  case MVT::v16i8:
828  case MVT::v8i16:
829  case MVT::v4i32:
830  case MVT::v2i64:
831  case MVT::v4f32:
832  case MVT::v2f64:
833    if (NumXMMRegs < 8)
834      ObjXMMRegs = 1;
835    else {
836      switch (ObjectVT) {
837      default: break;
838      case MVT::f32:  ObjSize = 4; break;
839      case MVT::f64:  ObjSize = 8; break;
840      case MVT::v16i8:
841      case MVT::v8i16:
842      case MVT::v4i32:
843      case MVT::v2i64:
844      case MVT::v4f32:
845      case MVT::v2f64: ObjSize = 16; break;
846    }
847    break;
848  }
849  }
850}
851
852SDOperand
853X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
854  unsigned NumArgs = Op.Val->getNumValues() - 1;
855  MachineFunction &MF = DAG.getMachineFunction();
856  MachineFrameInfo *MFI = MF.getFrameInfo();
857  SDOperand Root = Op.getOperand(0);
858  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
859  std::vector<SDOperand> ArgValues;
860
861  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
862  // the stack frame looks like this:
863  //
864  // [RSP] -- return address
865  // [RSP + 8] -- first nonreg argument (leftmost lexically)
866  // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size
867  //    ...
868  //
869  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
870  unsigned NumIntRegs = 0;  // Int regs used for parameter passing.
871  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
872
873  static const unsigned GPR8ArgRegs[] = {
874    X86::DIL, X86::SIL, X86::DL,  X86::CL,  X86::R8B, X86::R9B
875  };
876  static const unsigned GPR16ArgRegs[] = {
877    X86::DI,  X86::SI,  X86::DX,  X86::CX,  X86::R8W, X86::R9W
878  };
879  static const unsigned GPR32ArgRegs[] = {
880    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
881  };
882  static const unsigned GPR64ArgRegs[] = {
883    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
884  };
885  static const unsigned XMMArgRegs[] = {
886    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
887    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
888  };
889
890  for (unsigned i = 0; i < NumArgs; ++i) {
891    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
892    unsigned ArgIncrement = 8;
893    unsigned ObjSize = 0;
894    unsigned ObjIntRegs = 0;
895    unsigned ObjXMMRegs = 0;
896
897    // FIXME: __int128 and long double support?
898    HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
899                               ObjSize, ObjIntRegs, ObjXMMRegs);
900    if (ObjSize > 8)
901      ArgIncrement = ObjSize;
902
903    unsigned Reg = 0;
904    SDOperand ArgValue;
905    if (ObjIntRegs || ObjXMMRegs) {
906      switch (ObjectVT) {
907      default: assert(0 && "Unhandled argument type!");
908      case MVT::i8:
909      case MVT::i16:
910      case MVT::i32:
911      case MVT::i64: {
912        TargetRegisterClass *RC = NULL;
913        switch (ObjectVT) {
914        default: break;
915        case MVT::i8:
916          RC = X86::GR8RegisterClass;
917          Reg = GPR8ArgRegs[NumIntRegs];
918          break;
919        case MVT::i16:
920          RC = X86::GR16RegisterClass;
921          Reg = GPR16ArgRegs[NumIntRegs];
922          break;
923        case MVT::i32:
924          RC = X86::GR32RegisterClass;
925          Reg = GPR32ArgRegs[NumIntRegs];
926          break;
927        case MVT::i64:
928          RC = X86::GR64RegisterClass;
929          Reg = GPR64ArgRegs[NumIntRegs];
930          break;
931        }
932        Reg = AddLiveIn(MF, Reg, RC);
933        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
934        break;
935      }
936      case MVT::f32:
937      case MVT::f64:
938      case MVT::v16i8:
939      case MVT::v8i16:
940      case MVT::v4i32:
941      case MVT::v2i64:
942      case MVT::v4f32:
943      case MVT::v2f64: {
944        TargetRegisterClass *RC= (ObjectVT == MVT::f32) ?
945          X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ?
946                              X86::FR64RegisterClass : X86::VR128RegisterClass);
947        Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC);
948        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
949        break;
950      }
951      }
952      NumIntRegs += ObjIntRegs;
953      NumXMMRegs += ObjXMMRegs;
954    } else if (ObjSize) {
955      // XMM arguments have to be aligned on 16-byte boundary.
956      if (ObjSize == 16)
957        ArgOffset = ((ArgOffset + 15) / 16) * 16;
958      // Create the SelectionDAG nodes corresponding to a load from this
959      // parameter.
960      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
961      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
962      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
963                             DAG.getSrcValue(NULL));
964      ArgOffset += ArgIncrement;   // Move on to the next argument.
965    }
966
967    ArgValues.push_back(ArgValue);
968  }
969
970  // If the function takes variable number of arguments, make a frame index for
971  // the start of the first vararg value... for expansion of llvm.va_start.
972  if (isVarArg) {
973    // For X86-64, if there are vararg parameters that are passed via
974    // registers, then we must store them to their spots on the stack so they
975    // may be loaded by deferencing the result of va_next.
976    VarArgsGPOffset = NumIntRegs * 8;
977    VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
978    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
979    RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
980
981    // Store the integer parameter registers.
982    std::vector<SDOperand> MemOps;
983    SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
984    SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
985                              DAG.getConstant(VarArgsGPOffset, getPointerTy()));
986    for (; NumIntRegs != 6; ++NumIntRegs) {
987      unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
988                                X86::GR64RegisterClass);
989      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
990      SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
991                                    Val, FIN, DAG.getSrcValue(NULL));
992      MemOps.push_back(Store);
993      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
994                        DAG.getConstant(8, getPointerTy()));
995    }
996
997    // Now store the XMM (fp + vector) parameter registers.
998    FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
999                      DAG.getConstant(VarArgsFPOffset, getPointerTy()));
1000    for (; NumXMMRegs != 8; ++NumXMMRegs) {
1001      unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
1002                                X86::VR128RegisterClass);
1003      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
1004      SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1),
1005                                    Val, FIN, DAG.getSrcValue(NULL));
1006      MemOps.push_back(Store);
1007      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1008                        DAG.getConstant(16, getPointerTy()));
1009    }
1010    if (!MemOps.empty())
1011        Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
1012                           &MemOps[0], MemOps.size());
1013  }
1014
1015  ArgValues.push_back(Root);
1016
1017  ReturnAddrIndex = 0;     // No return address slot generated yet.
1018  BytesToPopOnReturn = 0;  // Callee pops nothing.
1019  BytesCallerReserves = ArgOffset;
1020
1021  // Return the new list of results.
1022  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
1023                                     Op.Val->value_end());
1024  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
1025}
1026
1027SDOperand
1028X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) {
1029  SDOperand Chain     = Op.getOperand(0);
1030  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
1031  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1032  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1033  SDOperand Callee    = Op.getOperand(4);
1034  MVT::ValueType RetVT= Op.Val->getValueType(0);
1035  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1036
1037  // Count how many bytes are to be pushed on the stack.
1038  unsigned NumBytes = 0;
1039  unsigned NumIntRegs = 0;  // Int regs used for parameter passing.
1040  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1041
1042  static const unsigned GPR8ArgRegs[] = {
1043    X86::DIL, X86::SIL, X86::DL,  X86::CL,  X86::R8B, X86::R9B
1044  };
1045  static const unsigned GPR16ArgRegs[] = {
1046    X86::DI,  X86::SI,  X86::DX,  X86::CX,  X86::R8W, X86::R9W
1047  };
1048  static const unsigned GPR32ArgRegs[] = {
1049    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
1050  };
1051  static const unsigned GPR64ArgRegs[] = {
1052    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
1053  };
1054  static const unsigned XMMArgRegs[] = {
1055    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1056    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1057  };
1058
1059  for (unsigned i = 0; i != NumOps; ++i) {
1060    SDOperand Arg = Op.getOperand(5+2*i);
1061    MVT::ValueType ArgVT = Arg.getValueType();
1062
1063    switch (ArgVT) {
1064    default: assert(0 && "Unknown value type!");
1065    case MVT::i8:
1066    case MVT::i16:
1067    case MVT::i32:
1068    case MVT::i64:
1069      if (NumIntRegs < 6)
1070        ++NumIntRegs;
1071      else
1072        NumBytes += 8;
1073      break;
1074    case MVT::f32:
1075    case MVT::f64:
1076    case MVT::v16i8:
1077    case MVT::v8i16:
1078    case MVT::v4i32:
1079    case MVT::v2i64:
1080    case MVT::v4f32:
1081    case MVT::v2f64:
1082      if (NumXMMRegs < 8)
1083        NumXMMRegs++;
1084      else if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
1085        NumBytes += 8;
1086      else {
1087        // XMM arguments have to be aligned on 16-byte boundary.
1088        NumBytes = ((NumBytes + 15) / 16) * 16;
1089        NumBytes += 16;
1090      }
1091      break;
1092    }
1093  }
1094
1095  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1096
1097  // Arguments go on the stack in reverse order, as specified by the ABI.
1098  unsigned ArgOffset = 0;
1099  NumIntRegs = 0;
1100  NumXMMRegs = 0;
1101  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1102  std::vector<SDOperand> MemOpChains;
1103  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1104  for (unsigned i = 0; i != NumOps; ++i) {
1105    SDOperand Arg = Op.getOperand(5+2*i);
1106    MVT::ValueType ArgVT = Arg.getValueType();
1107
1108    switch (ArgVT) {
1109    default: assert(0 && "Unexpected ValueType for argument!");
1110    case MVT::i8:
1111    case MVT::i16:
1112    case MVT::i32:
1113    case MVT::i64:
1114      if (NumIntRegs < 6) {
1115        unsigned Reg = 0;
1116        switch (ArgVT) {
1117        default: break;
1118        case MVT::i8:  Reg = GPR8ArgRegs[NumIntRegs];  break;
1119        case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break;
1120        case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break;
1121        case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break;
1122        }
1123        RegsToPass.push_back(std::make_pair(Reg, Arg));
1124        ++NumIntRegs;
1125      } else {
1126        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1127        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1128        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1129                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
1130        ArgOffset += 8;
1131      }
1132      break;
1133    case MVT::f32:
1134    case MVT::f64:
1135    case MVT::v16i8:
1136    case MVT::v8i16:
1137    case MVT::v4i32:
1138    case MVT::v2i64:
1139    case MVT::v4f32:
1140    case MVT::v2f64:
1141      if (NumXMMRegs < 8) {
1142        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
1143        NumXMMRegs++;
1144      } else {
1145        if (ArgVT != MVT::f32 && ArgVT != MVT::f64) {
1146          // XMM arguments have to be aligned on 16-byte boundary.
1147          ArgOffset = ((ArgOffset + 15) / 16) * 16;
1148        }
1149        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1150        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1151        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1152                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
1153        if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
1154          ArgOffset += 8;
1155        else
1156          ArgOffset += 16;
1157      }
1158    }
1159  }
1160
1161  if (!MemOpChains.empty())
1162    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1163                        &MemOpChains[0], MemOpChains.size());
1164
1165  // Build a sequence of copy-to-reg nodes chained together with token chain
1166  // and flag operands which copy the outgoing args into registers.
1167  SDOperand InFlag;
1168  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1169    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1170                             InFlag);
1171    InFlag = Chain.getValue(1);
1172  }
1173
1174  if (isVarArg) {
1175    // From AMD64 ABI document:
1176    // For calls that may call functions that use varargs or stdargs
1177    // (prototype-less calls or calls to functions containing ellipsis (...) in
1178    // the declaration) %al is used as hidden argument to specify the number
1179    // of SSE registers used. The contents of %al do not need to match exactly
1180    // the number of registers, but must be an ubound on the number of SSE
1181    // registers used and is in the range 0 - 8 inclusive.
1182    Chain = DAG.getCopyToReg(Chain, X86::AL,
1183                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
1184    InFlag = Chain.getValue(1);
1185  }
1186
1187  // If the callee is a GlobalAddress node (quite common, every direct call is)
1188  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1189  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1190    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1191  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1192    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1193
1194  std::vector<MVT::ValueType> NodeTys;
1195  NodeTys.push_back(MVT::Other);   // Returns a chain
1196  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1197  std::vector<SDOperand> Ops;
1198  Ops.push_back(Chain);
1199  Ops.push_back(Callee);
1200
1201  // Add argument registers to the end of the list so that they are known live
1202  // into the call.
1203  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1204    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1205                                  RegsToPass[i].second.getValueType()));
1206
1207  if (InFlag.Val)
1208    Ops.push_back(InFlag);
1209
1210  // FIXME: Do not generate X86ISD::TAILCALL for now.
1211  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1212                      NodeTys, &Ops[0], Ops.size());
1213  InFlag = Chain.getValue(1);
1214
1215  NodeTys.clear();
1216  NodeTys.push_back(MVT::Other);   // Returns a chain
1217  if (RetVT != MVT::Other)
1218    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
1219  Ops.clear();
1220  Ops.push_back(Chain);
1221  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1222  Ops.push_back(DAG.getConstant(0, getPointerTy()));
1223  Ops.push_back(InFlag);
1224  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1225  if (RetVT != MVT::Other)
1226    InFlag = Chain.getValue(1);
1227
1228  std::vector<SDOperand> ResultVals;
1229  NodeTys.clear();
1230  switch (RetVT) {
1231  default: assert(0 && "Unknown value type to return!");
1232  case MVT::Other: break;
1233  case MVT::i8:
1234    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
1235    ResultVals.push_back(Chain.getValue(0));
1236    NodeTys.push_back(MVT::i8);
1237    break;
1238  case MVT::i16:
1239    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
1240    ResultVals.push_back(Chain.getValue(0));
1241    NodeTys.push_back(MVT::i16);
1242    break;
1243  case MVT::i32:
1244    Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1245    ResultVals.push_back(Chain.getValue(0));
1246    NodeTys.push_back(MVT::i32);
1247    break;
1248  case MVT::i64:
1249    if (Op.Val->getValueType(1) == MVT::i64) {
1250      // FIXME: __int128 support?
1251      Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
1252      ResultVals.push_back(Chain.getValue(0));
1253      Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64,
1254                                 Chain.getValue(2)).getValue(1);
1255      ResultVals.push_back(Chain.getValue(0));
1256      NodeTys.push_back(MVT::i64);
1257    } else {
1258      Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
1259      ResultVals.push_back(Chain.getValue(0));
1260    }
1261    NodeTys.push_back(MVT::i64);
1262    break;
1263  case MVT::f32:
1264  case MVT::f64:
1265  case MVT::v16i8:
1266  case MVT::v8i16:
1267  case MVT::v4i32:
1268  case MVT::v2i64:
1269  case MVT::v4f32:
1270  case MVT::v2f64:
1271    // FIXME: long double support?
1272    Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
1273    ResultVals.push_back(Chain.getValue(0));
1274    NodeTys.push_back(RetVT);
1275    break;
1276  }
1277
1278  // If the function returns void, just return the chain.
1279  if (ResultVals.empty())
1280    return Chain;
1281
1282  // Otherwise, merge everything together with a MERGE_VALUES node.
1283  NodeTys.push_back(MVT::Other);
1284  ResultVals.push_back(Chain);
1285  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1286                              &ResultVals[0], ResultVals.size());
1287  return Res.getValue(Op.ResNo);
1288}
1289
1290//===----------------------------------------------------------------------===//
1291//                    Fast Calling Convention implementation
1292//===----------------------------------------------------------------------===//
1293//
1294// The X86 'fast' calling convention passes up to two integer arguments in
1295// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
1296// and requires that the callee pop its arguments off the stack (allowing proper
1297// tail calls), and has the same return value conventions as C calling convs.
1298//
1299// This calling convention always arranges for the callee pop value to be 8n+4
1300// bytes, which is needed for tail recursion elimination and stack alignment
1301// reasons.
1302//
1303// Note that this can be enhanced in the future to pass fp vals in registers
1304// (when we have a global fp allocator) and do other tricks.
1305//
1306
1307/// HowToPassFastCCArgument - Returns how an formal argument of the specified
1308/// type should be passed. If it is through stack, returns the size of the stack
1309/// slot; if it is through integer or XMM register, returns the number of
1310/// integer or XMM registers are needed.
1311static void
1312HowToPassFastCCArgument(MVT::ValueType ObjectVT,
1313                        unsigned NumIntRegs, unsigned NumXMMRegs,
1314                        unsigned &ObjSize, unsigned &ObjIntRegs,
1315                        unsigned &ObjXMMRegs) {
1316  ObjSize = 0;
1317  ObjIntRegs = 0;
1318  ObjXMMRegs = 0;
1319
1320  switch (ObjectVT) {
1321  default: assert(0 && "Unhandled argument type!");
1322  case MVT::i8:
1323#if FASTCC_NUM_INT_ARGS_INREGS > 0
1324    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
1325      ObjIntRegs = 1;
1326    else
1327#endif
1328      ObjSize = 1;
1329    break;
1330  case MVT::i16:
1331#if FASTCC_NUM_INT_ARGS_INREGS > 0
1332    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
1333      ObjIntRegs = 1;
1334    else
1335#endif
1336      ObjSize = 2;
1337    break;
1338  case MVT::i32:
1339#if FASTCC_NUM_INT_ARGS_INREGS > 0
1340    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
1341      ObjIntRegs = 1;
1342    else
1343#endif
1344      ObjSize = 4;
1345    break;
1346  case MVT::i64:
1347#if FASTCC_NUM_INT_ARGS_INREGS > 0
1348    if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
1349      ObjIntRegs = 2;
1350    } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
1351      ObjIntRegs = 1;
1352      ObjSize = 4;
1353    } else
1354#endif
1355      ObjSize = 8;
1356  case MVT::f32:
1357    ObjSize = 4;
1358    break;
1359  case MVT::f64:
1360    ObjSize = 8;
1361    break;
1362  case MVT::v16i8:
1363  case MVT::v8i16:
1364  case MVT::v4i32:
1365  case MVT::v2i64:
1366  case MVT::v4f32:
1367  case MVT::v2f64:
1368    if (NumXMMRegs < 4)
1369      ObjXMMRegs = 1;
1370    else
1371      ObjSize = 16;
1372    break;
1373  }
1374}
1375
1376SDOperand
1377X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
1378  unsigned NumArgs = Op.Val->getNumValues()-1;
1379  MachineFunction &MF = DAG.getMachineFunction();
1380  MachineFrameInfo *MFI = MF.getFrameInfo();
1381  SDOperand Root = Op.getOperand(0);
1382  std::vector<SDOperand> ArgValues;
1383
1384  // Add DAG nodes to load the arguments...  On entry to a function the stack
1385  // frame looks like this:
1386  //
1387  // [ESP] -- return address
1388  // [ESP + 4] -- first nonreg argument (leftmost lexically)
1389  // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size
1390  //    ...
1391  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
1392
1393  // Keep track of the number of integer regs passed so far.  This can be either
1394  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
1395  // used).
1396  unsigned NumIntRegs = 0;
1397  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1398
1399  static const unsigned XMMArgRegs[] = {
1400    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1401  };
1402
1403  for (unsigned i = 0; i < NumArgs; ++i) {
1404    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
1405    unsigned ArgIncrement = 4;
1406    unsigned ObjSize = 0;
1407    unsigned ObjIntRegs = 0;
1408    unsigned ObjXMMRegs = 0;
1409
1410    HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
1411                            ObjSize, ObjIntRegs, ObjXMMRegs);
1412    if (ObjSize > 4)
1413      ArgIncrement = ObjSize;
1414
1415    unsigned Reg = 0;
1416    SDOperand ArgValue;
1417    if (ObjIntRegs || ObjXMMRegs) {
1418      switch (ObjectVT) {
1419      default: assert(0 && "Unhandled argument type!");
1420      case MVT::i8:
1421        Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
1422                        X86::GR8RegisterClass);
1423        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8);
1424        break;
1425      case MVT::i16:
1426        Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
1427                        X86::GR16RegisterClass);
1428        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16);
1429        break;
1430      case MVT::i32:
1431        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
1432                        X86::GR32RegisterClass);
1433        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
1434        break;
1435      case MVT::i64:
1436        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
1437                        X86::GR32RegisterClass);
1438        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
1439        if (ObjIntRegs == 2) {
1440          Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass);
1441          SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32);
1442          ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
1443        }
1444        break;
1445      case MVT::v16i8:
1446      case MVT::v8i16:
1447      case MVT::v4i32:
1448      case MVT::v2i64:
1449      case MVT::v4f32:
1450      case MVT::v2f64:
1451        Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
1452        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
1453        break;
1454      }
1455      NumIntRegs += ObjIntRegs;
1456      NumXMMRegs += ObjXMMRegs;
1457    }
1458
1459    if (ObjSize) {
1460      // XMM arguments have to be aligned on 16-byte boundary.
1461      if (ObjSize == 16)
1462        ArgOffset = ((ArgOffset + 15) / 16) * 16;
1463      // Create the SelectionDAG nodes corresponding to a load from this
1464      // parameter.
1465      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1466      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
1467      if (ObjectVT == MVT::i64 && ObjIntRegs) {
1468        SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
1469                                          DAG.getSrcValue(NULL));
1470        ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
1471      } else
1472        ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
1473                               DAG.getSrcValue(NULL));
1474      ArgOffset += ArgIncrement;   // Move on to the next argument.
1475    }
1476
1477    ArgValues.push_back(ArgValue);
1478  }
1479
1480  ArgValues.push_back(Root);
1481
1482  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1483  // arguments and the arguments after the retaddr has been pushed are aligned.
1484  if ((ArgOffset & 7) == 0)
1485    ArgOffset += 4;
1486
1487  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
1488  RegSaveFrameIndex = 0xAAAAAAA;   // X86-64 only.
1489  ReturnAddrIndex = 0;             // No return address slot generated yet.
1490  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
1491  BytesCallerReserves = 0;
1492
1493  // Finally, inform the code generator which regs we return values in.
1494  switch (getValueType(MF.getFunction()->getReturnType())) {
1495  default: assert(0 && "Unknown type!");
1496  case MVT::isVoid: break;
1497  case MVT::i8:
1498  case MVT::i16:
1499  case MVT::i32:
1500    MF.addLiveOut(X86::EAX);
1501    break;
1502  case MVT::i64:
1503    MF.addLiveOut(X86::EAX);
1504    MF.addLiveOut(X86::EDX);
1505    break;
1506  case MVT::f32:
1507  case MVT::f64:
1508    MF.addLiveOut(X86::ST0);
1509    break;
1510  case MVT::v16i8:
1511  case MVT::v8i16:
1512  case MVT::v4i32:
1513  case MVT::v2i64:
1514  case MVT::v4f32:
1515  case MVT::v2f64:
1516    MF.addLiveOut(X86::XMM0);
1517    break;
1518  }
1519
1520  // Return the new list of results.
1521  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
1522                                     Op.Val->value_end());
1523  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
1524}
1525
1526SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG){
1527  SDOperand Chain     = Op.getOperand(0);
1528  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
1529  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1530  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1531  SDOperand Callee    = Op.getOperand(4);
1532  MVT::ValueType RetVT= Op.Val->getValueType(0);
1533  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1534
1535  // Count how many bytes are to be pushed on the stack.
1536  unsigned NumBytes = 0;
1537
1538  // Keep track of the number of integer regs passed so far.  This can be either
1539  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
1540  // used).
1541  unsigned NumIntRegs = 0;
1542  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1543
1544  static const unsigned GPRArgRegs[][2] = {
1545    { X86::AL,  X86::DL },
1546    { X86::AX,  X86::DX },
1547    { X86::EAX, X86::EDX }
1548  };
1549  static const unsigned XMMArgRegs[] = {
1550    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1551  };
1552
1553  for (unsigned i = 0; i != NumOps; ++i) {
1554    SDOperand Arg = Op.getOperand(5+2*i);
1555
1556    switch (Arg.getValueType()) {
1557    default: assert(0 && "Unknown value type!");
1558    case MVT::i8:
1559    case MVT::i16:
1560    case MVT::i32:
1561#if FASTCC_NUM_INT_ARGS_INREGS > 0
1562      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1563        ++NumIntRegs;
1564        break;
1565      }
1566#endif
1567      // Fall through
1568    case MVT::f32:
1569      NumBytes += 4;
1570      break;
1571    case MVT::f64:
1572      NumBytes += 8;
1573      break;
1574    case MVT::v16i8:
1575    case MVT::v8i16:
1576    case MVT::v4i32:
1577    case MVT::v2i64:
1578    case MVT::v4f32:
1579    case MVT::v2f64:
1580      if (NumXMMRegs < 4)
1581        NumXMMRegs++;
1582      else {
1583        // XMM arguments have to be aligned on 16-byte boundary.
1584        NumBytes = ((NumBytes + 15) / 16) * 16;
1585        NumBytes += 16;
1586      }
1587      break;
1588    }
1589  }
1590
1591  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1592  // arguments and the arguments after the retaddr has been pushed are aligned.
1593  if ((NumBytes & 7) == 0)
1594    NumBytes += 4;
1595
1596  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1597
1598  // Arguments go on the stack in reverse order, as specified by the ABI.
1599  unsigned ArgOffset = 0;
1600  NumIntRegs = 0;
1601  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1602  std::vector<SDOperand> MemOpChains;
1603  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1604  for (unsigned i = 0; i != NumOps; ++i) {
1605    SDOperand Arg = Op.getOperand(5+2*i);
1606
1607    switch (Arg.getValueType()) {
1608    default: assert(0 && "Unexpected ValueType for argument!");
1609    case MVT::i8:
1610    case MVT::i16:
1611    case MVT::i32:
1612#if FASTCC_NUM_INT_ARGS_INREGS > 0
1613      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1614        RegsToPass.push_back(
1615              std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs],
1616                             Arg));
1617        ++NumIntRegs;
1618        break;
1619      }
1620#endif
1621      // Fall through
1622    case MVT::f32: {
1623      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1624      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1625      MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1626                                        Arg, PtrOff, DAG.getSrcValue(NULL)));
1627      ArgOffset += 4;
1628      break;
1629    }
1630    case MVT::f64: {
1631      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1632      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1633      MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1634                                        Arg, PtrOff, DAG.getSrcValue(NULL)));
1635      ArgOffset += 8;
1636      break;
1637    }
1638    case MVT::v16i8:
1639    case MVT::v8i16:
1640    case MVT::v4i32:
1641    case MVT::v2i64:
1642    case MVT::v4f32:
1643    case MVT::v2f64:
1644      if (NumXMMRegs < 4) {
1645        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
1646        NumXMMRegs++;
1647      } else {
1648        // XMM arguments have to be aligned on 16-byte boundary.
1649        ArgOffset = ((ArgOffset + 15) / 16) * 16;
1650        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1651        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1652        MemOpChains.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1653                                          Arg, PtrOff, DAG.getSrcValue(NULL)));
1654        ArgOffset += 16;
1655      }
1656    }
1657  }
1658
1659  if (!MemOpChains.empty())
1660    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1661                        &MemOpChains[0], MemOpChains.size());
1662
1663  // Build a sequence of copy-to-reg nodes chained together with token chain
1664  // and flag operands which copy the outgoing args into registers.
1665  SDOperand InFlag;
1666  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1667    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1668                             InFlag);
1669    InFlag = Chain.getValue(1);
1670  }
1671
1672  // If the callee is a GlobalAddress node (quite common, every direct call is)
1673  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1674  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1675    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1676  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1677    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1678
1679  std::vector<MVT::ValueType> NodeTys;
1680  NodeTys.push_back(MVT::Other);   // Returns a chain
1681  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1682  std::vector<SDOperand> Ops;
1683  Ops.push_back(Chain);
1684  Ops.push_back(Callee);
1685
1686  // Add argument registers to the end of the list so that they are known live
1687  // into the call.
1688  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1689    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1690                                  RegsToPass[i].second.getValueType()));
1691
1692  if (InFlag.Val)
1693    Ops.push_back(InFlag);
1694
1695  // FIXME: Do not generate X86ISD::TAILCALL for now.
1696  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1697                      NodeTys, &Ops[0], Ops.size());
1698  InFlag = Chain.getValue(1);
1699
1700  NodeTys.clear();
1701  NodeTys.push_back(MVT::Other);   // Returns a chain
1702  if (RetVT != MVT::Other)
1703    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
1704  Ops.clear();
1705  Ops.push_back(Chain);
1706  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1707  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1708  Ops.push_back(InFlag);
1709  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1710  if (RetVT != MVT::Other)
1711    InFlag = Chain.getValue(1);
1712
1713  std::vector<SDOperand> ResultVals;
1714  NodeTys.clear();
1715  switch (RetVT) {
1716  default: assert(0 && "Unknown value type to return!");
1717  case MVT::Other: break;
1718  case MVT::i8:
1719    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
1720    ResultVals.push_back(Chain.getValue(0));
1721    NodeTys.push_back(MVT::i8);
1722    break;
1723  case MVT::i16:
1724    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
1725    ResultVals.push_back(Chain.getValue(0));
1726    NodeTys.push_back(MVT::i16);
1727    break;
1728  case MVT::i32:
1729    if (Op.Val->getValueType(1) == MVT::i32) {
1730      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1731      ResultVals.push_back(Chain.getValue(0));
1732      Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
1733                                 Chain.getValue(2)).getValue(1);
1734      ResultVals.push_back(Chain.getValue(0));
1735      NodeTys.push_back(MVT::i32);
1736    } else {
1737      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1738      ResultVals.push_back(Chain.getValue(0));
1739    }
1740    NodeTys.push_back(MVT::i32);
1741    break;
1742  case MVT::v16i8:
1743  case MVT::v8i16:
1744  case MVT::v4i32:
1745  case MVT::v2i64:
1746  case MVT::v4f32:
1747  case MVT::v2f64:
1748    Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
1749    ResultVals.push_back(Chain.getValue(0));
1750    NodeTys.push_back(RetVT);
1751    break;
1752  case MVT::f32:
1753  case MVT::f64: {
1754    std::vector<MVT::ValueType> Tys;
1755    Tys.push_back(MVT::f64);
1756    Tys.push_back(MVT::Other);
1757    Tys.push_back(MVT::Flag);
1758    std::vector<SDOperand> Ops;
1759    Ops.push_back(Chain);
1760    Ops.push_back(InFlag);
1761    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys,
1762                                   &Ops[0], Ops.size());
1763    Chain  = RetVal.getValue(1);
1764    InFlag = RetVal.getValue(2);
1765    if (X86ScalarSSE) {
1766      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
1767      // shouldn't be necessary except that RFP cannot be live across
1768      // multiple blocks. When stackifier is fixed, they can be uncoupled.
1769      MachineFunction &MF = DAG.getMachineFunction();
1770      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
1771      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
1772      Tys.clear();
1773      Tys.push_back(MVT::Other);
1774      Ops.clear();
1775      Ops.push_back(Chain);
1776      Ops.push_back(RetVal);
1777      Ops.push_back(StackSlot);
1778      Ops.push_back(DAG.getValueType(RetVT));
1779      Ops.push_back(InFlag);
1780      Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
1781      RetVal = DAG.getLoad(RetVT, Chain, StackSlot,
1782                           DAG.getSrcValue(NULL));
1783      Chain = RetVal.getValue(1);
1784    }
1785
1786    if (RetVT == MVT::f32 && !X86ScalarSSE)
1787      // FIXME: we would really like to remember that this FP_ROUND
1788      // operation is okay to eliminate if we allow excess FP precision.
1789      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
1790    ResultVals.push_back(RetVal);
1791    NodeTys.push_back(RetVT);
1792    break;
1793  }
1794  }
1795
1796
1797  // If the function returns void, just return the chain.
1798  if (ResultVals.empty())
1799    return Chain;
1800
1801  // Otherwise, merge everything together with a MERGE_VALUES node.
1802  NodeTys.push_back(MVT::Other);
1803  ResultVals.push_back(Chain);
1804  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1805                              &ResultVals[0], ResultVals.size());
1806  return Res.getValue(Op.ResNo);
1807}
1808
1809SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
1810  if (ReturnAddrIndex == 0) {
1811    // Set up a frame object for the return address.
1812    MachineFunction &MF = DAG.getMachineFunction();
1813    if (Subtarget->is64Bit())
1814      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
1815    else
1816      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1817  }
1818
1819  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
1820}
1821
1822
1823
1824std::pair<SDOperand, SDOperand> X86TargetLowering::
1825LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
1826                        SelectionDAG &DAG) {
1827  SDOperand Result;
1828  if (Depth)        // Depths > 0 not supported yet!
1829    Result = DAG.getConstant(0, getPointerTy());
1830  else {
1831    SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
1832    if (!isFrameAddress)
1833      // Just load the return address
1834      Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI,
1835                           DAG.getSrcValue(NULL));
1836    else
1837      Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
1838                           DAG.getConstant(4, getPointerTy()));
1839  }
1840  return std::make_pair(Result, Chain);
1841}
1842
1843/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode
1844/// which corresponds to the condition code.
1845static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) {
1846  switch (X86CC) {
1847  default: assert(0 && "Unknown X86 conditional code!");
1848  case X86ISD::COND_A:  return X86::JA;
1849  case X86ISD::COND_AE: return X86::JAE;
1850  case X86ISD::COND_B:  return X86::JB;
1851  case X86ISD::COND_BE: return X86::JBE;
1852  case X86ISD::COND_E:  return X86::JE;
1853  case X86ISD::COND_G:  return X86::JG;
1854  case X86ISD::COND_GE: return X86::JGE;
1855  case X86ISD::COND_L:  return X86::JL;
1856  case X86ISD::COND_LE: return X86::JLE;
1857  case X86ISD::COND_NE: return X86::JNE;
1858  case X86ISD::COND_NO: return X86::JNO;
1859  case X86ISD::COND_NP: return X86::JNP;
1860  case X86ISD::COND_NS: return X86::JNS;
1861  case X86ISD::COND_O:  return X86::JO;
1862  case X86ISD::COND_P:  return X86::JP;
1863  case X86ISD::COND_S:  return X86::JS;
1864  }
1865}
1866
1867/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1868/// specific condition code. It returns a false if it cannot do a direct
1869/// translation. X86CC is the translated CondCode. Flip is set to true if the
1870/// the order of comparison operands should be flipped.
1871static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
1872                           unsigned &X86CC, bool &Flip) {
1873  Flip = false;
1874  X86CC = X86ISD::COND_INVALID;
1875  if (!isFP) {
1876    switch (SetCCOpcode) {
1877    default: break;
1878    case ISD::SETEQ:  X86CC = X86ISD::COND_E;  break;
1879    case ISD::SETGT:  X86CC = X86ISD::COND_G;  break;
1880    case ISD::SETGE:  X86CC = X86ISD::COND_GE; break;
1881    case ISD::SETLT:  X86CC = X86ISD::COND_L;  break;
1882    case ISD::SETLE:  X86CC = X86ISD::COND_LE; break;
1883    case ISD::SETNE:  X86CC = X86ISD::COND_NE; break;
1884    case ISD::SETULT: X86CC = X86ISD::COND_B;  break;
1885    case ISD::SETUGT: X86CC = X86ISD::COND_A;  break;
1886    case ISD::SETULE: X86CC = X86ISD::COND_BE; break;
1887    case ISD::SETUGE: X86CC = X86ISD::COND_AE; break;
1888    }
1889  } else {
1890    // On a floating point condition, the flags are set as follows:
1891    // ZF  PF  CF   op
1892    //  0 | 0 | 0 | X > Y
1893    //  0 | 0 | 1 | X < Y
1894    //  1 | 0 | 0 | X == Y
1895    //  1 | 1 | 1 | unordered
1896    switch (SetCCOpcode) {
1897    default: break;
1898    case ISD::SETUEQ:
1899    case ISD::SETEQ: X86CC = X86ISD::COND_E;  break;
1900    case ISD::SETOLT: Flip = true; // Fallthrough
1901    case ISD::SETOGT:
1902    case ISD::SETGT: X86CC = X86ISD::COND_A;  break;
1903    case ISD::SETOLE: Flip = true; // Fallthrough
1904    case ISD::SETOGE:
1905    case ISD::SETGE: X86CC = X86ISD::COND_AE; break;
1906    case ISD::SETUGT: Flip = true; // Fallthrough
1907    case ISD::SETULT:
1908    case ISD::SETLT: X86CC = X86ISD::COND_B;  break;
1909    case ISD::SETUGE: Flip = true; // Fallthrough
1910    case ISD::SETULE:
1911    case ISD::SETLE: X86CC = X86ISD::COND_BE; break;
1912    case ISD::SETONE:
1913    case ISD::SETNE: X86CC = X86ISD::COND_NE; break;
1914    case ISD::SETUO: X86CC = X86ISD::COND_P;  break;
1915    case ISD::SETO:  X86CC = X86ISD::COND_NP; break;
1916    }
1917  }
1918
1919  return X86CC != X86ISD::COND_INVALID;
1920}
1921
1922static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC,
1923                           bool &Flip) {
1924  return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip);
1925}
1926
1927/// hasFPCMov - is there a floating point cmov for the specific X86 condition
1928/// code. Current x86 isa includes the following FP cmov instructions:
1929/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1930static bool hasFPCMov(unsigned X86CC) {
1931  switch (X86CC) {
1932  default:
1933    return false;
1934  case X86ISD::COND_B:
1935  case X86ISD::COND_BE:
1936  case X86ISD::COND_E:
1937  case X86ISD::COND_P:
1938  case X86ISD::COND_A:
1939  case X86ISD::COND_AE:
1940  case X86ISD::COND_NE:
1941  case X86ISD::COND_NP:
1942    return true;
1943  }
1944}
1945
1946/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra
1947/// load. For Darwin, external and weak symbols are indirect, loading the value
1948/// at address GV rather then the value of GV itself. This means that the
1949/// GlobalAddress must be in the base or index register of the address, not the
1950/// GV offset field.
1951static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) {
1952  return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
1953          (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
1954}
1955
1956/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
1957/// true if Op is undef or if its value falls within the specified range (L, H].
1958static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
1959  if (Op.getOpcode() == ISD::UNDEF)
1960    return true;
1961
1962  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
1963  return (Val >= Low && Val < Hi);
1964}
1965
1966/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
1967/// true if Op is undef or if its value equal to the specified value.
1968static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
1969  if (Op.getOpcode() == ISD::UNDEF)
1970    return true;
1971  return cast<ConstantSDNode>(Op)->getValue() == Val;
1972}
1973
1974/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1975/// specifies a shuffle of elements that is suitable for input to PSHUFD.
1976bool X86::isPSHUFDMask(SDNode *N) {
1977  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1978
1979  if (N->getNumOperands() != 4)
1980    return false;
1981
1982  // Check if the value doesn't reference the second vector.
1983  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1984    SDOperand Arg = N->getOperand(i);
1985    if (Arg.getOpcode() == ISD::UNDEF) continue;
1986    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1987    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
1988      return false;
1989  }
1990
1991  return true;
1992}
1993
1994/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
1995/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
1996bool X86::isPSHUFHWMask(SDNode *N) {
1997  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1998
1999  if (N->getNumOperands() != 8)
2000    return false;
2001
2002  // Lower quadword copied in order.
2003  for (unsigned i = 0; i != 4; ++i) {
2004    SDOperand Arg = N->getOperand(i);
2005    if (Arg.getOpcode() == ISD::UNDEF) continue;
2006    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2007    if (cast<ConstantSDNode>(Arg)->getValue() != i)
2008      return false;
2009  }
2010
2011  // Upper quadword shuffled.
2012  for (unsigned i = 4; i != 8; ++i) {
2013    SDOperand Arg = N->getOperand(i);
2014    if (Arg.getOpcode() == ISD::UNDEF) continue;
2015    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2016    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2017    if (Val < 4 || Val > 7)
2018      return false;
2019  }
2020
2021  return true;
2022}
2023
2024/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
2025/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
2026bool X86::isPSHUFLWMask(SDNode *N) {
2027  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2028
2029  if (N->getNumOperands() != 8)
2030    return false;
2031
2032  // Upper quadword copied in order.
2033  for (unsigned i = 4; i != 8; ++i)
2034    if (!isUndefOrEqual(N->getOperand(i), i))
2035      return false;
2036
2037  // Lower quadword shuffled.
2038  for (unsigned i = 0; i != 4; ++i)
2039    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
2040      return false;
2041
2042  return true;
2043}
2044
2045/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
2046/// specifies a shuffle of elements that is suitable for input to SHUFP*.
2047static bool isSHUFPMask(std::vector<SDOperand> &N) {
2048  unsigned NumElems = N.size();
2049  if (NumElems != 2 && NumElems != 4) return false;
2050
2051  unsigned Half = NumElems / 2;
2052  for (unsigned i = 0; i < Half; ++i)
2053    if (!isUndefOrInRange(N[i], 0, NumElems))
2054      return false;
2055  for (unsigned i = Half; i < NumElems; ++i)
2056    if (!isUndefOrInRange(N[i], NumElems, NumElems*2))
2057      return false;
2058
2059  return true;
2060}
2061
2062bool X86::isSHUFPMask(SDNode *N) {
2063  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2064  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2065  return ::isSHUFPMask(Ops);
2066}
2067
2068/// isCommutedSHUFP - Returns true if the shuffle mask is except
2069/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
2070/// half elements to come from vector 1 (which would equal the dest.) and
2071/// the upper half to come from vector 2.
2072static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) {
2073  unsigned NumElems = Ops.size();
2074  if (NumElems != 2 && NumElems != 4) return false;
2075
2076  unsigned Half = NumElems / 2;
2077  for (unsigned i = 0; i < Half; ++i)
2078    if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2))
2079      return false;
2080  for (unsigned i = Half; i < NumElems; ++i)
2081    if (!isUndefOrInRange(Ops[i], 0, NumElems))
2082      return false;
2083  return true;
2084}
2085
2086static bool isCommutedSHUFP(SDNode *N) {
2087  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2088  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2089  return isCommutedSHUFP(Ops);
2090}
2091
2092/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
2093/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
2094bool X86::isMOVHLPSMask(SDNode *N) {
2095  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2096
2097  if (N->getNumOperands() != 4)
2098    return false;
2099
2100  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
2101  return isUndefOrEqual(N->getOperand(0), 6) &&
2102         isUndefOrEqual(N->getOperand(1), 7) &&
2103         isUndefOrEqual(N->getOperand(2), 2) &&
2104         isUndefOrEqual(N->getOperand(3), 3);
2105}
2106
2107/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
2108/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
2109bool X86::isMOVLPMask(SDNode *N) {
2110  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2111
2112  unsigned NumElems = N->getNumOperands();
2113  if (NumElems != 2 && NumElems != 4)
2114    return false;
2115
2116  for (unsigned i = 0; i < NumElems/2; ++i)
2117    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
2118      return false;
2119
2120  for (unsigned i = NumElems/2; i < NumElems; ++i)
2121    if (!isUndefOrEqual(N->getOperand(i), i))
2122      return false;
2123
2124  return true;
2125}
2126
2127/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
2128/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
2129/// and MOVLHPS.
2130bool X86::isMOVHPMask(SDNode *N) {
2131  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2132
2133  unsigned NumElems = N->getNumOperands();
2134  if (NumElems != 2 && NumElems != 4)
2135    return false;
2136
2137  for (unsigned i = 0; i < NumElems/2; ++i)
2138    if (!isUndefOrEqual(N->getOperand(i), i))
2139      return false;
2140
2141  for (unsigned i = 0; i < NumElems/2; ++i) {
2142    SDOperand Arg = N->getOperand(i + NumElems/2);
2143    if (!isUndefOrEqual(Arg, i + NumElems))
2144      return false;
2145  }
2146
2147  return true;
2148}
2149
2150/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
2151/// specifies a shuffle of elements that is suitable for input to UNPCKL.
2152bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
2153  unsigned NumElems = N.size();
2154  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2155    return false;
2156
2157  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2158    SDOperand BitI  = N[i];
2159    SDOperand BitI1 = N[i+1];
2160    if (!isUndefOrEqual(BitI, j))
2161      return false;
2162    if (V2IsSplat) {
2163      if (isUndefOrEqual(BitI1, NumElems))
2164        return false;
2165    } else {
2166      if (!isUndefOrEqual(BitI1, j + NumElems))
2167        return false;
2168    }
2169  }
2170
2171  return true;
2172}
2173
2174bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
2175  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2176  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2177  return ::isUNPCKLMask(Ops, V2IsSplat);
2178}
2179
2180/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
2181/// specifies a shuffle of elements that is suitable for input to UNPCKH.
2182bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
2183  unsigned NumElems = N.size();
2184  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2185    return false;
2186
2187  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2188    SDOperand BitI  = N[i];
2189    SDOperand BitI1 = N[i+1];
2190    if (!isUndefOrEqual(BitI, j + NumElems/2))
2191      return false;
2192    if (V2IsSplat) {
2193      if (isUndefOrEqual(BitI1, NumElems))
2194        return false;
2195    } else {
2196      if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems))
2197        return false;
2198    }
2199  }
2200
2201  return true;
2202}
2203
2204bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
2205  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2206  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2207  return ::isUNPCKHMask(Ops, V2IsSplat);
2208}
2209
2210/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
2211/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
2212/// <0, 0, 1, 1>
2213bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
2214  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2215
2216  unsigned NumElems = N->getNumOperands();
2217  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
2218    return false;
2219
2220  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2221    SDOperand BitI  = N->getOperand(i);
2222    SDOperand BitI1 = N->getOperand(i+1);
2223
2224    if (!isUndefOrEqual(BitI, j))
2225      return false;
2226    if (!isUndefOrEqual(BitI1, j))
2227      return false;
2228  }
2229
2230  return true;
2231}
2232
2233/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
2234/// specifies a shuffle of elements that is suitable for input to MOVSS,
2235/// MOVSD, and MOVD, i.e. setting the lowest element.
2236static bool isMOVLMask(std::vector<SDOperand> &N) {
2237  unsigned NumElems = N.size();
2238  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2239    return false;
2240
2241  if (!isUndefOrEqual(N[0], NumElems))
2242    return false;
2243
2244  for (unsigned i = 1; i < NumElems; ++i) {
2245    SDOperand Arg = N[i];
2246    if (!isUndefOrEqual(Arg, i))
2247      return false;
2248  }
2249
2250  return true;
2251}
2252
2253bool X86::isMOVLMask(SDNode *N) {
2254  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2255  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2256  return ::isMOVLMask(Ops);
2257}
2258
2259/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
2260/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
2261/// element of vector 2 and the other elements to come from vector 1 in order.
2262static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false,
2263                           bool V2IsUndef = false) {
2264  unsigned NumElems = Ops.size();
2265  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2266    return false;
2267
2268  if (!isUndefOrEqual(Ops[0], 0))
2269    return false;
2270
2271  for (unsigned i = 1; i < NumElems; ++i) {
2272    SDOperand Arg = Ops[i];
2273    if (!(isUndefOrEqual(Arg, i+NumElems) ||
2274          (V2IsUndef && isUndefOrInRange(Arg, NumElems, NumElems*2)) ||
2275          (V2IsSplat && isUndefOrEqual(Arg, NumElems))))
2276      return false;
2277  }
2278
2279  return true;
2280}
2281
2282static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
2283                           bool V2IsUndef = false) {
2284  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2285  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2286  return isCommutedMOVL(Ops, V2IsSplat, V2IsUndef);
2287}
2288
2289/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2290/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
2291bool X86::isMOVSHDUPMask(SDNode *N) {
2292  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2293
2294  if (N->getNumOperands() != 4)
2295    return false;
2296
2297  // Expect 1, 1, 3, 3
2298  for (unsigned i = 0; i < 2; ++i) {
2299    SDOperand Arg = N->getOperand(i);
2300    if (Arg.getOpcode() == ISD::UNDEF) continue;
2301    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2302    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2303    if (Val != 1) return false;
2304  }
2305
2306  bool HasHi = false;
2307  for (unsigned i = 2; i < 4; ++i) {
2308    SDOperand Arg = N->getOperand(i);
2309    if (Arg.getOpcode() == ISD::UNDEF) continue;
2310    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2311    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2312    if (Val != 3) return false;
2313    HasHi = true;
2314  }
2315
2316  // Don't use movshdup if it can be done with a shufps.
2317  return HasHi;
2318}
2319
2320/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2321/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
2322bool X86::isMOVSLDUPMask(SDNode *N) {
2323  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2324
2325  if (N->getNumOperands() != 4)
2326    return false;
2327
2328  // Expect 0, 0, 2, 2
2329  for (unsigned i = 0; i < 2; ++i) {
2330    SDOperand Arg = N->getOperand(i);
2331    if (Arg.getOpcode() == ISD::UNDEF) continue;
2332    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2333    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2334    if (Val != 0) return false;
2335  }
2336
2337  bool HasHi = false;
2338  for (unsigned i = 2; i < 4; ++i) {
2339    SDOperand Arg = N->getOperand(i);
2340    if (Arg.getOpcode() == ISD::UNDEF) continue;
2341    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2342    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2343    if (Val != 2) return false;
2344    HasHi = true;
2345  }
2346
2347  // Don't use movshdup if it can be done with a shufps.
2348  return HasHi;
2349}
2350
2351/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2352/// a splat of a single element.
2353static bool isSplatMask(SDNode *N) {
2354  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2355
2356  // This is a splat operation if each element of the permute is the same, and
2357  // if the value doesn't reference the second vector.
2358  unsigned NumElems = N->getNumOperands();
2359  SDOperand ElementBase;
2360  unsigned i = 0;
2361  for (; i != NumElems; ++i) {
2362    SDOperand Elt = N->getOperand(i);
2363    if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) {
2364      ElementBase = Elt;
2365      break;
2366    }
2367  }
2368
2369  if (!ElementBase.Val)
2370    return false;
2371
2372  for (; i != NumElems; ++i) {
2373    SDOperand Arg = N->getOperand(i);
2374    if (Arg.getOpcode() == ISD::UNDEF) continue;
2375    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2376    if (Arg != ElementBase) return false;
2377  }
2378
2379  // Make sure it is a splat of the first vector operand.
2380  return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
2381}
2382
2383/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2384/// a splat of a single element and it's a 2 or 4 element mask.
2385bool X86::isSplatMask(SDNode *N) {
2386  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2387
2388  // We can only splat 64-bit, and 32-bit quantities with a single instruction.
2389  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
2390    return false;
2391  return ::isSplatMask(N);
2392}
2393
2394/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
2395/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
2396/// instructions.
2397unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
2398  unsigned NumOperands = N->getNumOperands();
2399  unsigned Shift = (NumOperands == 4) ? 2 : 1;
2400  unsigned Mask = 0;
2401  for (unsigned i = 0; i < NumOperands; ++i) {
2402    unsigned Val = 0;
2403    SDOperand Arg = N->getOperand(NumOperands-i-1);
2404    if (Arg.getOpcode() != ISD::UNDEF)
2405      Val = cast<ConstantSDNode>(Arg)->getValue();
2406    if (Val >= NumOperands) Val -= NumOperands;
2407    Mask |= Val;
2408    if (i != NumOperands - 1)
2409      Mask <<= Shift;
2410  }
2411
2412  return Mask;
2413}
2414
2415/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
2416/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
2417/// instructions.
2418unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
2419  unsigned Mask = 0;
2420  // 8 nodes, but we only care about the last 4.
2421  for (unsigned i = 7; i >= 4; --i) {
2422    unsigned Val = 0;
2423    SDOperand Arg = N->getOperand(i);
2424    if (Arg.getOpcode() != ISD::UNDEF)
2425      Val = cast<ConstantSDNode>(Arg)->getValue();
2426    Mask |= (Val - 4);
2427    if (i != 4)
2428      Mask <<= 2;
2429  }
2430
2431  return Mask;
2432}
2433
2434/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
2435/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
2436/// instructions.
2437unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
2438  unsigned Mask = 0;
2439  // 8 nodes, but we only care about the first 4.
2440  for (int i = 3; i >= 0; --i) {
2441    unsigned Val = 0;
2442    SDOperand Arg = N->getOperand(i);
2443    if (Arg.getOpcode() != ISD::UNDEF)
2444      Val = cast<ConstantSDNode>(Arg)->getValue();
2445    Mask |= Val;
2446    if (i != 0)
2447      Mask <<= 2;
2448  }
2449
2450  return Mask;
2451}
2452
2453/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
2454/// specifies a 8 element shuffle that can be broken into a pair of
2455/// PSHUFHW and PSHUFLW.
2456static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
2457  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2458
2459  if (N->getNumOperands() != 8)
2460    return false;
2461
2462  // Lower quadword shuffled.
2463  for (unsigned i = 0; i != 4; ++i) {
2464    SDOperand Arg = N->getOperand(i);
2465    if (Arg.getOpcode() == ISD::UNDEF) continue;
2466    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2467    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2468    if (Val > 4)
2469      return false;
2470  }
2471
2472  // Upper quadword shuffled.
2473  for (unsigned i = 4; i != 8; ++i) {
2474    SDOperand Arg = N->getOperand(i);
2475    if (Arg.getOpcode() == ISD::UNDEF) continue;
2476    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2477    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2478    if (Val < 4 || Val > 7)
2479      return false;
2480  }
2481
2482  return true;
2483}
2484
2485/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
2486/// values in ther permute mask.
2487static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) {
2488  SDOperand V1 = Op.getOperand(0);
2489  SDOperand V2 = Op.getOperand(1);
2490  SDOperand Mask = Op.getOperand(2);
2491  MVT::ValueType VT = Op.getValueType();
2492  MVT::ValueType MaskVT = Mask.getValueType();
2493  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
2494  unsigned NumElems = Mask.getNumOperands();
2495  std::vector<SDOperand> MaskVec;
2496
2497  for (unsigned i = 0; i != NumElems; ++i) {
2498    SDOperand Arg = Mask.getOperand(i);
2499    if (Arg.getOpcode() == ISD::UNDEF) {
2500      MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
2501      continue;
2502    }
2503    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2504    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2505    if (Val < NumElems)
2506      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
2507    else
2508      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
2509  }
2510
2511  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2512  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
2513}
2514
2515/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
2516/// match movhlps. The lower half elements should come from upper half of
2517/// V1 (and in order), and the upper half elements should come from the upper
2518/// half of V2 (and in order).
2519static bool ShouldXformToMOVHLPS(SDNode *Mask) {
2520  unsigned NumElems = Mask->getNumOperands();
2521  if (NumElems != 4)
2522    return false;
2523  for (unsigned i = 0, e = 2; i != e; ++i)
2524    if (!isUndefOrEqual(Mask->getOperand(i), i+2))
2525      return false;
2526  for (unsigned i = 2; i != 4; ++i)
2527    if (!isUndefOrEqual(Mask->getOperand(i), i+4))
2528      return false;
2529  return true;
2530}
2531
2532/// isScalarLoadToVector - Returns true if the node is a scalar load that
2533/// is promoted to a vector.
2534static inline bool isScalarLoadToVector(SDNode *N) {
2535  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
2536    N = N->getOperand(0).Val;
2537    return (N->getOpcode() == ISD::LOAD);
2538  }
2539  return false;
2540}
2541
2542/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
2543/// match movlp{s|d}. The lower half elements should come from lower half of
2544/// V1 (and in order), and the upper half elements should come from the upper
2545/// half of V2 (and in order). And since V1 will become the source of the
2546/// MOVLP, it must be either a vector load or a scalar load to vector.
2547static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) {
2548  if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1))
2549    return false;
2550
2551  unsigned NumElems = Mask->getNumOperands();
2552  if (NumElems != 2 && NumElems != 4)
2553    return false;
2554  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
2555    if (!isUndefOrEqual(Mask->getOperand(i), i))
2556      return false;
2557  for (unsigned i = NumElems/2; i != NumElems; ++i)
2558    if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
2559      return false;
2560  return true;
2561}
2562
2563/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
2564/// all the same.
2565static bool isSplatVector(SDNode *N) {
2566  if (N->getOpcode() != ISD::BUILD_VECTOR)
2567    return false;
2568
2569  SDOperand SplatValue = N->getOperand(0);
2570  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
2571    if (N->getOperand(i) != SplatValue)
2572      return false;
2573  return true;
2574}
2575
2576/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
2577/// to an undef.
2578static bool isUndefShuffle(SDNode *N) {
2579  if (N->getOpcode() != ISD::BUILD_VECTOR)
2580    return false;
2581
2582  SDOperand V1 = N->getOperand(0);
2583  SDOperand V2 = N->getOperand(1);
2584  SDOperand Mask = N->getOperand(2);
2585  unsigned NumElems = Mask.getNumOperands();
2586  for (unsigned i = 0; i != NumElems; ++i) {
2587    SDOperand Arg = Mask.getOperand(i);
2588    if (Arg.getOpcode() != ISD::UNDEF) {
2589      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2590      if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
2591        return false;
2592      else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
2593        return false;
2594    }
2595  }
2596  return true;
2597}
2598
2599/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
2600/// that point to V2 points to its first element.
2601static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
2602  assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
2603
2604  bool Changed = false;
2605  std::vector<SDOperand> MaskVec;
2606  unsigned NumElems = Mask.getNumOperands();
2607  for (unsigned i = 0; i != NumElems; ++i) {
2608    SDOperand Arg = Mask.getOperand(i);
2609    if (Arg.getOpcode() != ISD::UNDEF) {
2610      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2611      if (Val > NumElems) {
2612        Arg = DAG.getConstant(NumElems, Arg.getValueType());
2613        Changed = true;
2614      }
2615    }
2616    MaskVec.push_back(Arg);
2617  }
2618
2619  if (Changed)
2620    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
2621                       &MaskVec[0], MaskVec.size());
2622  return Mask;
2623}
2624
2625/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
2626/// operation of specified width.
2627static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
2628  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2629  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2630
2631  std::vector<SDOperand> MaskVec;
2632  MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
2633  for (unsigned i = 1; i != NumElems; ++i)
2634    MaskVec.push_back(DAG.getConstant(i, BaseVT));
2635  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2636}
2637
2638/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
2639/// of specified width.
2640static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
2641  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2642  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2643  std::vector<SDOperand> MaskVec;
2644  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
2645    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
2646    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
2647  }
2648  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2649}
2650
2651/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
2652/// of specified width.
2653static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
2654  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2655  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2656  unsigned Half = NumElems/2;
2657  std::vector<SDOperand> MaskVec;
2658  for (unsigned i = 0; i != Half; ++i) {
2659    MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
2660    MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
2661  }
2662  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2663}
2664
2665/// getZeroVector - Returns a vector of specified type with all zero elements.
2666///
2667static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
2668  assert(MVT::isVector(VT) && "Expected a vector type");
2669  unsigned NumElems = getVectorNumElements(VT);
2670  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2671  bool isFP = MVT::isFloatingPoint(EVT);
2672  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
2673  std::vector<SDOperand> ZeroVec(NumElems, Zero);
2674  return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
2675}
2676
2677/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
2678///
2679static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
2680  SDOperand V1 = Op.getOperand(0);
2681  SDOperand Mask = Op.getOperand(2);
2682  MVT::ValueType VT = Op.getValueType();
2683  unsigned NumElems = Mask.getNumOperands();
2684  Mask = getUnpacklMask(NumElems, DAG);
2685  while (NumElems != 4) {
2686    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
2687    NumElems >>= 1;
2688  }
2689  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
2690
2691  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2692  Mask = getZeroVector(MaskVT, DAG);
2693  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
2694                                  DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
2695  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
2696}
2697
2698/// isZeroNode - Returns true if Elt is a constant zero or a floating point
2699/// constant +0.0.
2700static inline bool isZeroNode(SDOperand Elt) {
2701  return ((isa<ConstantSDNode>(Elt) &&
2702           cast<ConstantSDNode>(Elt)->getValue() == 0) ||
2703          (isa<ConstantFPSDNode>(Elt) &&
2704           cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
2705}
2706
2707/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
2708/// vector and zero or undef vector.
2709static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
2710                                             unsigned NumElems, unsigned Idx,
2711                                             bool isZero, SelectionDAG &DAG) {
2712  SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
2713  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2714  MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2715  SDOperand Zero = DAG.getConstant(0, EVT);
2716  std::vector<SDOperand> MaskVec(NumElems, Zero);
2717  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
2718  SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2719                               &MaskVec[0], MaskVec.size());
2720  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2721}
2722
2723/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
2724///
2725static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
2726                                       unsigned NumNonZero, unsigned NumZero,
2727                                       SelectionDAG &DAG, TargetLowering &TLI) {
2728  if (NumNonZero > 8)
2729    return SDOperand();
2730
2731  SDOperand V(0, 0);
2732  bool First = true;
2733  for (unsigned i = 0; i < 16; ++i) {
2734    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
2735    if (ThisIsNonZero && First) {
2736      if (NumZero)
2737        V = getZeroVector(MVT::v8i16, DAG);
2738      else
2739        V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2740      First = false;
2741    }
2742
2743    if ((i & 1) != 0) {
2744      SDOperand ThisElt(0, 0), LastElt(0, 0);
2745      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
2746      if (LastIsNonZero) {
2747        LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
2748      }
2749      if (ThisIsNonZero) {
2750        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
2751        ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
2752                              ThisElt, DAG.getConstant(8, MVT::i8));
2753        if (LastIsNonZero)
2754          ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
2755      } else
2756        ThisElt = LastElt;
2757
2758      if (ThisElt.Val)
2759        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
2760                        DAG.getConstant(i/2, TLI.getPointerTy()));
2761    }
2762  }
2763
2764  return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
2765}
2766
2767/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
2768///
2769static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
2770                                       unsigned NumNonZero, unsigned NumZero,
2771                                       SelectionDAG &DAG, TargetLowering &TLI) {
2772  if (NumNonZero > 4)
2773    return SDOperand();
2774
2775  SDOperand V(0, 0);
2776  bool First = true;
2777  for (unsigned i = 0; i < 8; ++i) {
2778    bool isNonZero = (NonZeros & (1 << i)) != 0;
2779    if (isNonZero) {
2780      if (First) {
2781        if (NumZero)
2782          V = getZeroVector(MVT::v8i16, DAG);
2783        else
2784          V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2785        First = false;
2786      }
2787      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
2788                      DAG.getConstant(i, TLI.getPointerTy()));
2789    }
2790  }
2791
2792  return V;
2793}
2794
2795SDOperand
2796X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2797  // All zero's are handled with pxor.
2798  if (ISD::isBuildVectorAllZeros(Op.Val))
2799    return Op;
2800
2801  // All one's are handled with pcmpeqd.
2802  if (ISD::isBuildVectorAllOnes(Op.Val))
2803    return Op;
2804
2805  MVT::ValueType VT = Op.getValueType();
2806  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2807  unsigned EVTBits = MVT::getSizeInBits(EVT);
2808
2809  unsigned NumElems = Op.getNumOperands();
2810  unsigned NumZero  = 0;
2811  unsigned NumNonZero = 0;
2812  unsigned NonZeros = 0;
2813  std::set<SDOperand> Values;
2814  for (unsigned i = 0; i < NumElems; ++i) {
2815    SDOperand Elt = Op.getOperand(i);
2816    if (Elt.getOpcode() != ISD::UNDEF) {
2817      Values.insert(Elt);
2818      if (isZeroNode(Elt))
2819        NumZero++;
2820      else {
2821        NonZeros |= (1 << i);
2822        NumNonZero++;
2823      }
2824    }
2825  }
2826
2827  if (NumNonZero == 0)
2828    // Must be a mix of zero and undef. Return a zero vector.
2829    return getZeroVector(VT, DAG);
2830
2831  // Splat is obviously ok. Let legalizer expand it to a shuffle.
2832  if (Values.size() == 1)
2833    return SDOperand();
2834
2835  // Special case for single non-zero element.
2836  if (NumNonZero == 1) {
2837    unsigned Idx = CountTrailingZeros_32(NonZeros);
2838    SDOperand Item = Op.getOperand(Idx);
2839    Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
2840    if (Idx == 0)
2841      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
2842      return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
2843                                         NumZero > 0, DAG);
2844
2845    if (EVTBits == 32) {
2846      // Turn it into a shuffle of zero and zero-extended scalar to vector.
2847      Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
2848                                         DAG);
2849      MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
2850      MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2851      std::vector<SDOperand> MaskVec;
2852      for (unsigned i = 0; i < NumElems; i++)
2853        MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
2854      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2855                                   &MaskVec[0], MaskVec.size());
2856      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
2857                         DAG.getNode(ISD::UNDEF, VT), Mask);
2858    }
2859  }
2860
2861  // Let legalizer expand 2-widde build_vector's.
2862  if (EVTBits == 64)
2863    return SDOperand();
2864
2865  // If element VT is < 32 bits, convert it to inserts into a zero vector.
2866  if (EVTBits == 8) {
2867    SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
2868                                        *this);
2869    if (V.Val) return V;
2870  }
2871
2872  if (EVTBits == 16) {
2873    SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
2874                                        *this);
2875    if (V.Val) return V;
2876  }
2877
2878  // If element VT is == 32 bits, turn it into a number of shuffles.
2879  std::vector<SDOperand> V(NumElems);
2880  if (NumElems == 4 && NumZero > 0) {
2881    for (unsigned i = 0; i < 4; ++i) {
2882      bool isZero = !(NonZeros & (1 << i));
2883      if (isZero)
2884        V[i] = getZeroVector(VT, DAG);
2885      else
2886        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2887    }
2888
2889    for (unsigned i = 0; i < 2; ++i) {
2890      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
2891        default: break;
2892        case 0:
2893          V[i] = V[i*2];  // Must be a zero vector.
2894          break;
2895        case 1:
2896          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
2897                             getMOVLMask(NumElems, DAG));
2898          break;
2899        case 2:
2900          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2901                             getMOVLMask(NumElems, DAG));
2902          break;
2903        case 3:
2904          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2905                             getUnpacklMask(NumElems, DAG));
2906          break;
2907      }
2908    }
2909
2910    // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
2911    // clears the upper bits.
2912    // FIXME: we can do the same for v4f32 case when we know both parts of
2913    // the lower half come from scalar_to_vector (loadf32). We should do
2914    // that in post legalizer dag combiner with target specific hooks.
2915    if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
2916      return V[0];
2917    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2918    MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2919    std::vector<SDOperand> MaskVec;
2920    bool Reverse = (NonZeros & 0x3) == 2;
2921    for (unsigned i = 0; i < 2; ++i)
2922      if (Reverse)
2923        MaskVec.push_back(DAG.getConstant(1-i, EVT));
2924      else
2925        MaskVec.push_back(DAG.getConstant(i, EVT));
2926    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
2927    for (unsigned i = 0; i < 2; ++i)
2928      if (Reverse)
2929        MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
2930      else
2931        MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
2932    SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2933                                     &MaskVec[0], MaskVec.size());
2934    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
2935  }
2936
2937  if (Values.size() > 2) {
2938    // Expand into a number of unpckl*.
2939    // e.g. for v4f32
2940    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
2941    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
2942    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
2943    SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
2944    for (unsigned i = 0; i < NumElems; ++i)
2945      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2946    NumElems >>= 1;
2947    while (NumElems != 0) {
2948      for (unsigned i = 0; i < NumElems; ++i)
2949        V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
2950                           UnpckMask);
2951      NumElems >>= 1;
2952    }
2953    return V[0];
2954  }
2955
2956  return SDOperand();
2957}
2958
2959SDOperand
2960X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
2961  SDOperand V1 = Op.getOperand(0);
2962  SDOperand V2 = Op.getOperand(1);
2963  SDOperand PermMask = Op.getOperand(2);
2964  MVT::ValueType VT = Op.getValueType();
2965  unsigned NumElems = PermMask.getNumOperands();
2966  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
2967  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
2968
2969  if (isUndefShuffle(Op.Val))
2970    return DAG.getNode(ISD::UNDEF, VT);
2971
2972  if (isSplatMask(PermMask.Val)) {
2973    if (NumElems <= 4) return Op;
2974    // Promote it to a v4i32 splat.
2975    return PromoteSplat(Op, DAG);
2976  }
2977
2978  if (X86::isMOVLMask(PermMask.Val))
2979    return (V1IsUndef) ? V2 : Op;
2980
2981  if (X86::isMOVSHDUPMask(PermMask.Val) ||
2982      X86::isMOVSLDUPMask(PermMask.Val) ||
2983      X86::isMOVHLPSMask(PermMask.Val) ||
2984      X86::isMOVHPMask(PermMask.Val) ||
2985      X86::isMOVLPMask(PermMask.Val))
2986    return Op;
2987
2988  if (ShouldXformToMOVHLPS(PermMask.Val) ||
2989      ShouldXformToMOVLP(V1.Val, PermMask.Val))
2990    return CommuteVectorShuffle(Op, DAG);
2991
2992  bool V1IsSplat = isSplatVector(V1.Val);
2993  bool V2IsSplat = isSplatVector(V2.Val);
2994  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
2995    Op = CommuteVectorShuffle(Op, DAG);
2996    V1 = Op.getOperand(0);
2997    V2 = Op.getOperand(1);
2998    PermMask = Op.getOperand(2);
2999    std::swap(V1IsSplat, V2IsSplat);
3000    std::swap(V1IsUndef, V2IsUndef);
3001  }
3002
3003  if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
3004    if (V2IsUndef) return V1;
3005    Op = CommuteVectorShuffle(Op, DAG);
3006    V1 = Op.getOperand(0);
3007    V2 = Op.getOperand(1);
3008    PermMask = Op.getOperand(2);
3009    if (V2IsSplat) {
3010      // V2 is a splat, so the mask may be malformed. That is, it may point
3011      // to any V2 element. The instruction selectior won't like this. Get
3012      // a corrected mask and commute to form a proper MOVS{S|D}.
3013      SDOperand NewMask = getMOVLMask(NumElems, DAG);
3014      if (NewMask.Val != PermMask.Val)
3015        Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3016    }
3017    return Op;
3018  }
3019
3020  if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
3021      X86::isUNPCKLMask(PermMask.Val) ||
3022      X86::isUNPCKHMask(PermMask.Val))
3023    return Op;
3024
3025  if (V2IsSplat) {
3026    // Normalize mask so all entries that point to V2 points to its first
3027    // element then try to match unpck{h|l} again. If match, return a
3028    // new vector_shuffle with the corrected mask.
3029    SDOperand NewMask = NormalizeMask(PermMask, DAG);
3030    if (NewMask.Val != PermMask.Val) {
3031      if (X86::isUNPCKLMask(PermMask.Val, true)) {
3032        SDOperand NewMask = getUnpacklMask(NumElems, DAG);
3033        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3034      } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
3035        SDOperand NewMask = getUnpackhMask(NumElems, DAG);
3036        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3037      }
3038    }
3039  }
3040
3041  // Normalize the node to match x86 shuffle ops if needed
3042  if (V2.getOpcode() != ISD::UNDEF)
3043    if (isCommutedSHUFP(PermMask.Val)) {
3044      Op = CommuteVectorShuffle(Op, DAG);
3045      V1 = Op.getOperand(0);
3046      V2 = Op.getOperand(1);
3047      PermMask = Op.getOperand(2);
3048    }
3049
3050  // If VT is integer, try PSHUF* first, then SHUFP*.
3051  if (MVT::isInteger(VT)) {
3052    if (X86::isPSHUFDMask(PermMask.Val) ||
3053        X86::isPSHUFHWMask(PermMask.Val) ||
3054        X86::isPSHUFLWMask(PermMask.Val)) {
3055      if (V2.getOpcode() != ISD::UNDEF)
3056        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3057                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3058      return Op;
3059    }
3060
3061    if (X86::isSHUFPMask(PermMask.Val))
3062      return Op;
3063
3064    // Handle v8i16 shuffle high / low shuffle node pair.
3065    if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
3066      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3067      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3068      std::vector<SDOperand> MaskVec;
3069      for (unsigned i = 0; i != 4; ++i)
3070        MaskVec.push_back(PermMask.getOperand(i));
3071      for (unsigned i = 4; i != 8; ++i)
3072        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3073      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3074                                   &MaskVec[0], MaskVec.size());
3075      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3076      MaskVec.clear();
3077      for (unsigned i = 0; i != 4; ++i)
3078        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3079      for (unsigned i = 4; i != 8; ++i)
3080        MaskVec.push_back(PermMask.getOperand(i));
3081      Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
3082      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3083    }
3084  } else {
3085    // Floating point cases in the other order.
3086    if (X86::isSHUFPMask(PermMask.Val))
3087      return Op;
3088    if (X86::isPSHUFDMask(PermMask.Val) ||
3089        X86::isPSHUFHWMask(PermMask.Val) ||
3090        X86::isPSHUFLWMask(PermMask.Val)) {
3091      if (V2.getOpcode() != ISD::UNDEF)
3092        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3093                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3094      return Op;
3095    }
3096  }
3097
3098  if (NumElems == 4) {
3099    MVT::ValueType MaskVT = PermMask.getValueType();
3100    MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
3101    std::vector<std::pair<int, int> > Locs;
3102    Locs.reserve(NumElems);
3103    std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3104    std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3105    unsigned NumHi = 0;
3106    unsigned NumLo = 0;
3107    // If no more than two elements come from either vector. This can be
3108    // implemented with two shuffles. First shuffle gather the elements.
3109    // The second shuffle, which takes the first shuffle as both of its
3110    // vector operands, put the elements into the right order.
3111    for (unsigned i = 0; i != NumElems; ++i) {
3112      SDOperand Elt = PermMask.getOperand(i);
3113      if (Elt.getOpcode() == ISD::UNDEF) {
3114        Locs[i] = std::make_pair(-1, -1);
3115      } else {
3116        unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
3117        if (Val < NumElems) {
3118          Locs[i] = std::make_pair(0, NumLo);
3119          Mask1[NumLo] = Elt;
3120          NumLo++;
3121        } else {
3122          Locs[i] = std::make_pair(1, NumHi);
3123          if (2+NumHi < NumElems)
3124            Mask1[2+NumHi] = Elt;
3125          NumHi++;
3126        }
3127      }
3128    }
3129    if (NumLo <= 2 && NumHi <= 2) {
3130      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3131                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3132                                   &Mask1[0], Mask1.size()));
3133      for (unsigned i = 0; i != NumElems; ++i) {
3134        if (Locs[i].first == -1)
3135          continue;
3136        else {
3137          unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
3138          Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
3139          Mask2[i] = DAG.getConstant(Idx, MaskEVT);
3140        }
3141      }
3142
3143      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
3144                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3145                                     &Mask2[0], Mask2.size()));
3146    }
3147
3148    // Break it into (shuffle shuffle_hi, shuffle_lo).
3149    Locs.clear();
3150    std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3151    std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3152    std::vector<SDOperand> *MaskPtr = &LoMask;
3153    unsigned MaskIdx = 0;
3154    unsigned LoIdx = 0;
3155    unsigned HiIdx = NumElems/2;
3156    for (unsigned i = 0; i != NumElems; ++i) {
3157      if (i == NumElems/2) {
3158        MaskPtr = &HiMask;
3159        MaskIdx = 1;
3160        LoIdx = 0;
3161        HiIdx = NumElems/2;
3162      }
3163      SDOperand Elt = PermMask.getOperand(i);
3164      if (Elt.getOpcode() == ISD::UNDEF) {
3165        Locs[i] = std::make_pair(-1, -1);
3166      } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
3167        Locs[i] = std::make_pair(MaskIdx, LoIdx);
3168        (*MaskPtr)[LoIdx] = Elt;
3169        LoIdx++;
3170      } else {
3171        Locs[i] = std::make_pair(MaskIdx, HiIdx);
3172        (*MaskPtr)[HiIdx] = Elt;
3173        HiIdx++;
3174      }
3175    }
3176
3177    SDOperand LoShuffle =
3178      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3179                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3180                              &LoMask[0], LoMask.size()));
3181    SDOperand HiShuffle =
3182      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3183                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3184                              &HiMask[0], HiMask.size()));
3185    std::vector<SDOperand> MaskOps;
3186    for (unsigned i = 0; i != NumElems; ++i) {
3187      if (Locs[i].first == -1) {
3188        MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
3189      } else {
3190        unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
3191        MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
3192      }
3193    }
3194    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
3195                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3196                                   &MaskOps[0], MaskOps.size()));
3197  }
3198
3199  return SDOperand();
3200}
3201
3202SDOperand
3203X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3204  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3205    return SDOperand();
3206
3207  MVT::ValueType VT = Op.getValueType();
3208  // TODO: handle v16i8.
3209  if (MVT::getSizeInBits(VT) == 16) {
3210    // Transform it so it match pextrw which produces a 32-bit result.
3211    MVT::ValueType EVT = (MVT::ValueType)(VT+1);
3212    SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
3213                                    Op.getOperand(0), Op.getOperand(1));
3214    SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
3215                                    DAG.getValueType(VT));
3216    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
3217  } else if (MVT::getSizeInBits(VT) == 32) {
3218    SDOperand Vec = Op.getOperand(0);
3219    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3220    if (Idx == 0)
3221      return Op;
3222    // SHUFPS the element to the lowest double word, then movss.
3223    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3224    std::vector<SDOperand> IdxVec;
3225    IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
3226    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3227    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3228    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3229    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3230                                 &IdxVec[0], IdxVec.size());
3231    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3232                      Vec, Vec, Mask);
3233    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3234                       DAG.getConstant(0, getPointerTy()));
3235  } else if (MVT::getSizeInBits(VT) == 64) {
3236    SDOperand Vec = Op.getOperand(0);
3237    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3238    if (Idx == 0)
3239      return Op;
3240
3241    // UNPCKHPD the element to the lowest double word, then movsd.
3242    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
3243    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
3244    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3245    std::vector<SDOperand> IdxVec;
3246    IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
3247    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3248    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3249                                 &IdxVec[0], IdxVec.size());
3250    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3251                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
3252    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3253                       DAG.getConstant(0, getPointerTy()));
3254  }
3255
3256  return SDOperand();
3257}
3258
3259SDOperand
3260X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3261  // Transform it so it match pinsrw which expects a 16-bit value in a GR32
3262  // as its second argument.
3263  MVT::ValueType VT = Op.getValueType();
3264  MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
3265  SDOperand N0 = Op.getOperand(0);
3266  SDOperand N1 = Op.getOperand(1);
3267  SDOperand N2 = Op.getOperand(2);
3268  if (MVT::getSizeInBits(BaseVT) == 16) {
3269    if (N1.getValueType() != MVT::i32)
3270      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
3271    if (N2.getValueType() != MVT::i32)
3272      N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
3273    return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
3274  } else if (MVT::getSizeInBits(BaseVT) == 32) {
3275    unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
3276    if (Idx == 0) {
3277      // Use a movss.
3278      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
3279      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3280      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3281      std::vector<SDOperand> MaskVec;
3282      MaskVec.push_back(DAG.getConstant(4, BaseVT));
3283      for (unsigned i = 1; i <= 3; ++i)
3284        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3285      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
3286                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3287                                     &MaskVec[0], MaskVec.size()));
3288    } else {
3289      // Use two pinsrw instructions to insert a 32 bit value.
3290      Idx <<= 1;
3291      if (MVT::isFloatingPoint(N1.getValueType())) {
3292        if (N1.getOpcode() == ISD::LOAD) {
3293          // Just load directly from f32mem to GR32.
3294          N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1),
3295                           N1.getOperand(2));
3296        } else {
3297          N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
3298          N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
3299          N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
3300                           DAG.getConstant(0, getPointerTy()));
3301        }
3302      }
3303      N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
3304      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3305                       DAG.getConstant(Idx, getPointerTy()));
3306      N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
3307      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3308                       DAG.getConstant(Idx+1, getPointerTy()));
3309      return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
3310    }
3311  }
3312
3313  return SDOperand();
3314}
3315
3316SDOperand
3317X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
3318  SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
3319  return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
3320}
3321
3322// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3323// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
3324// one of the above mentioned nodes. It has to be wrapped because otherwise
3325// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3326// be used to form addressing mode. These wrapped nodes will be selected
3327// into MOV32ri.
3328SDOperand
3329X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
3330  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3331  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3332                            DAG.getTargetConstantPool(CP->get(), getPointerTy(),
3333                                                      CP->getAlignment()));
3334  if (Subtarget->isTargetDarwin()) {
3335    // With PIC, the address is actually $g + Offset.
3336    if (!Subtarget->is64Bit() &&
3337        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3338      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3339                    DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
3340  }
3341
3342  return Result;
3343}
3344
3345SDOperand
3346X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
3347  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3348  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3349                                 DAG.getTargetGlobalAddress(GV,
3350                                                            getPointerTy()));
3351  if (Subtarget->isTargetDarwin()) {
3352    // With PIC, the address is actually $g + Offset.
3353    if (!Subtarget->is64Bit() &&
3354        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3355      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3356                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3357                           Result);
3358
3359    // For Darwin, external and weak symbols are indirect, so we want to load
3360    // the value at address GV, not the value of GV itself. This means that
3361    // the GlobalAddress must be in the base or index register of the address,
3362    // not the GV offset field.
3363    if (getTargetMachine().getRelocationModel() != Reloc::Static &&
3364        DarwinGVRequiresExtraLoad(GV))
3365      Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(),
3366                           Result, DAG.getSrcValue(NULL));
3367  }
3368
3369  return Result;
3370}
3371
3372SDOperand
3373X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
3374  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
3375  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3376                                 DAG.getTargetExternalSymbol(Sym,
3377                                                             getPointerTy()));
3378  if (Subtarget->isTargetDarwin()) {
3379    // With PIC, the address is actually $g + Offset.
3380    if (!Subtarget->is64Bit() &&
3381        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3382      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3383                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3384                           Result);
3385  }
3386
3387  return Result;
3388}
3389
3390SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
3391    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
3392           "Not an i64 shift!");
3393    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
3394    SDOperand ShOpLo = Op.getOperand(0);
3395    SDOperand ShOpHi = Op.getOperand(1);
3396    SDOperand ShAmt  = Op.getOperand(2);
3397    SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi,
3398                                         DAG.getConstant(31, MVT::i8))
3399                           : DAG.getConstant(0, MVT::i32);
3400
3401    SDOperand Tmp2, Tmp3;
3402    if (Op.getOpcode() == ISD::SHL_PARTS) {
3403      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
3404      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
3405    } else {
3406      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
3407      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
3408    }
3409
3410    SDOperand InFlag =
3411      DAG.getNode(X86ISD::CMP, MVT::Flag,
3412                  DAG.getNode(ISD::AND, MVT::i8,
3413                              ShAmt, DAG.getConstant(32, MVT::i8)),
3414                  DAG.getConstant(0, MVT::i8));
3415
3416    SDOperand Hi, Lo;
3417    SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3418
3419    std::vector<MVT::ValueType> Tys;
3420    Tys.push_back(MVT::i32);
3421    Tys.push_back(MVT::Flag);
3422    std::vector<SDOperand> Ops;
3423    if (Op.getOpcode() == ISD::SHL_PARTS) {
3424      Ops.push_back(Tmp2);
3425      Ops.push_back(Tmp3);
3426      Ops.push_back(CC);
3427      Ops.push_back(InFlag);
3428      Hi = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size());
3429      InFlag = Hi.getValue(1);
3430
3431      Ops.clear();
3432      Ops.push_back(Tmp3);
3433      Ops.push_back(Tmp1);
3434      Ops.push_back(CC);
3435      Ops.push_back(InFlag);
3436      Lo = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size());
3437    } else {
3438      Ops.push_back(Tmp2);
3439      Ops.push_back(Tmp3);
3440      Ops.push_back(CC);
3441      Ops.push_back(InFlag);
3442      Lo = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size());
3443      InFlag = Lo.getValue(1);
3444
3445      Ops.clear();
3446      Ops.push_back(Tmp3);
3447      Ops.push_back(Tmp1);
3448      Ops.push_back(CC);
3449      Ops.push_back(InFlag);
3450      Hi = DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size());
3451    }
3452
3453    Tys.clear();
3454    Tys.push_back(MVT::i32);
3455    Tys.push_back(MVT::i32);
3456    Ops.clear();
3457    Ops.push_back(Lo);
3458    Ops.push_back(Hi);
3459    return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size());
3460}
3461
3462SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
3463  assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
3464         Op.getOperand(0).getValueType() >= MVT::i16 &&
3465         "Unknown SINT_TO_FP to lower!");
3466
3467  SDOperand Result;
3468  MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
3469  unsigned Size = MVT::getSizeInBits(SrcVT)/8;
3470  MachineFunction &MF = DAG.getMachineFunction();
3471  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3472  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3473  SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
3474                                DAG.getEntryNode(), Op.getOperand(0),
3475                                StackSlot, DAG.getSrcValue(NULL));
3476
3477  // Build the FILD
3478  std::vector<MVT::ValueType> Tys;
3479  Tys.push_back(MVT::f64);
3480  Tys.push_back(MVT::Other);
3481  if (X86ScalarSSE) Tys.push_back(MVT::Flag);
3482  std::vector<SDOperand> Ops;
3483  Ops.push_back(Chain);
3484  Ops.push_back(StackSlot);
3485  Ops.push_back(DAG.getValueType(SrcVT));
3486  Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
3487                       Tys, &Ops[0], Ops.size());
3488
3489  if (X86ScalarSSE) {
3490    Chain = Result.getValue(1);
3491    SDOperand InFlag = Result.getValue(2);
3492
3493    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
3494    // shouldn't be necessary except that RFP cannot be live across
3495    // multiple blocks. When stackifier is fixed, they can be uncoupled.
3496    MachineFunction &MF = DAG.getMachineFunction();
3497    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
3498    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3499    std::vector<MVT::ValueType> Tys;
3500    Tys.push_back(MVT::Other);
3501    std::vector<SDOperand> Ops;
3502    Ops.push_back(Chain);
3503    Ops.push_back(Result);
3504    Ops.push_back(StackSlot);
3505    Ops.push_back(DAG.getValueType(Op.getValueType()));
3506    Ops.push_back(InFlag);
3507    Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
3508    Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
3509                         DAG.getSrcValue(NULL));
3510  }
3511
3512  return Result;
3513}
3514
3515SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
3516  assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
3517         "Unknown FP_TO_SINT to lower!");
3518  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
3519  // stack slot.
3520  MachineFunction &MF = DAG.getMachineFunction();
3521  unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
3522  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3523  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3524
3525  unsigned Opc;
3526  switch (Op.getValueType()) {
3527    default: assert(0 && "Invalid FP_TO_SINT to lower!");
3528    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
3529    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
3530    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
3531  }
3532
3533  SDOperand Chain = DAG.getEntryNode();
3534  SDOperand Value = Op.getOperand(0);
3535  if (X86ScalarSSE) {
3536    assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
3537    Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot,
3538                        DAG.getSrcValue(0));
3539    std::vector<MVT::ValueType> Tys;
3540    Tys.push_back(MVT::f64);
3541    Tys.push_back(MVT::Other);
3542    std::vector<SDOperand> Ops;
3543    Ops.push_back(Chain);
3544    Ops.push_back(StackSlot);
3545    Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType()));
3546    Value = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size());
3547    Chain = Value.getValue(1);
3548    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3549    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3550  }
3551
3552  // Build the FP_TO_INT*_IN_MEM
3553  std::vector<SDOperand> Ops;
3554  Ops.push_back(Chain);
3555  Ops.push_back(Value);
3556  Ops.push_back(StackSlot);
3557  SDOperand FIST = DAG.getNode(Opc, MVT::Other, &Ops[0], Ops.size());
3558
3559  // Load the result.
3560  return DAG.getLoad(Op.getValueType(), FIST, StackSlot,
3561                     DAG.getSrcValue(NULL));
3562}
3563
3564SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
3565  MVT::ValueType VT = Op.getValueType();
3566  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3567  std::vector<Constant*> CV;
3568  if (VT == MVT::f64) {
3569    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
3570    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3571  } else {
3572    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
3573    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3574    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3575    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3576  }
3577  Constant *CS = ConstantStruct::get(CV);
3578  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3579  std::vector<MVT::ValueType> Tys;
3580  Tys.push_back(VT);
3581  Tys.push_back(MVT::Other);
3582  SmallVector<SDOperand, 3> Ops;
3583  Ops.push_back(DAG.getEntryNode());
3584  Ops.push_back(CPIdx);
3585  Ops.push_back(DAG.getSrcValue(NULL));
3586  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3587  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
3588}
3589
3590SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
3591  MVT::ValueType VT = Op.getValueType();
3592  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3593  std::vector<Constant*> CV;
3594  if (VT == MVT::f64) {
3595    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
3596    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3597  } else {
3598    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
3599    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3600    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3601    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3602  }
3603  Constant *CS = ConstantStruct::get(CV);
3604  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3605  std::vector<MVT::ValueType> Tys;
3606  Tys.push_back(VT);
3607  Tys.push_back(MVT::Other);
3608  SmallVector<SDOperand, 3> Ops;
3609  Ops.push_back(DAG.getEntryNode());
3610  Ops.push_back(CPIdx);
3611  Ops.push_back(DAG.getSrcValue(NULL));
3612  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3613  return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
3614}
3615
3616SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
3617  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
3618  SDOperand Cond;
3619  SDOperand CC = Op.getOperand(2);
3620  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3621  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
3622  bool Flip;
3623  unsigned X86CC;
3624  if (translateX86CC(CC, isFP, X86CC, Flip)) {
3625    if (Flip)
3626      Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
3627                         Op.getOperand(1), Op.getOperand(0));
3628    else
3629      Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
3630                         Op.getOperand(0), Op.getOperand(1));
3631    return DAG.getNode(X86ISD::SETCC, MVT::i8,
3632                       DAG.getConstant(X86CC, MVT::i8), Cond);
3633  } else {
3634    assert(isFP && "Illegal integer SetCC!");
3635
3636    Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
3637                       Op.getOperand(0), Op.getOperand(1));
3638    std::vector<MVT::ValueType> Tys;
3639    std::vector<SDOperand> Ops;
3640    switch (SetCCOpcode) {
3641      default: assert(false && "Illegal floating point SetCC!");
3642      case ISD::SETOEQ: {  // !PF & ZF
3643        Tys.push_back(MVT::i8);
3644        Tys.push_back(MVT::Flag);
3645        Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8));
3646        Ops.push_back(Cond);
3647        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size());
3648        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
3649                                     DAG.getConstant(X86ISD::COND_E, MVT::i8),
3650                                     Tmp1.getValue(1));
3651        return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
3652      }
3653      case ISD::SETUNE: {  // PF | !ZF
3654        Tys.push_back(MVT::i8);
3655        Tys.push_back(MVT::Flag);
3656        Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8));
3657        Ops.push_back(Cond);
3658        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size());
3659        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
3660                                     DAG.getConstant(X86ISD::COND_NE, MVT::i8),
3661                                     Tmp1.getValue(1));
3662        return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
3663      }
3664    }
3665  }
3666}
3667
3668SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
3669  MVT::ValueType VT = Op.getValueType();
3670  bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE;
3671  bool addTest   = false;
3672  SDOperand Op0 = Op.getOperand(0);
3673  SDOperand Cond, CC;
3674  if (Op0.getOpcode() == ISD::SETCC)
3675    Op0 = LowerOperation(Op0, DAG);
3676
3677  if (Op0.getOpcode() == X86ISD::SETCC) {
3678    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3679    // (since flag operand cannot be shared). If the X86ISD::SETCC does not
3680    // have another use it will be eliminated.
3681    // If the X86ISD::SETCC has more than one use, then it's probably better
3682    // to use a test instead of duplicating the X86ISD::CMP (for register
3683    // pressure reason).
3684    unsigned CmpOpc = Op0.getOperand(1).getOpcode();
3685    if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
3686        CmpOpc == X86ISD::UCOMI) {
3687      if (!Op0.hasOneUse()) {
3688        std::vector<MVT::ValueType> Tys;
3689        for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i)
3690          Tys.push_back(Op0.Val->getValueType(i));
3691        std::vector<SDOperand> Ops;
3692        for (unsigned i = 0; i < Op0.getNumOperands(); ++i)
3693          Ops.push_back(Op0.getOperand(i));
3694        Op0 = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size());
3695      }
3696
3697      CC   = Op0.getOperand(0);
3698      Cond = Op0.getOperand(1);
3699      // Make a copy as flag result cannot be used by more than one.
3700      Cond = DAG.getNode(CmpOpc, MVT::Flag,
3701                         Cond.getOperand(0), Cond.getOperand(1));
3702      addTest =
3703        isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
3704    } else
3705      addTest = true;
3706  } else
3707    addTest = true;
3708
3709  if (addTest) {
3710    CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3711    Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, Op0,
3712                       DAG.getConstant(0, MVT::i8));
3713  }
3714
3715  std::vector<MVT::ValueType> Tys;
3716  Tys.push_back(Op.getValueType());
3717  Tys.push_back(MVT::Flag);
3718  std::vector<SDOperand> Ops;
3719  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
3720  // condition is true.
3721  Ops.push_back(Op.getOperand(2));
3722  Ops.push_back(Op.getOperand(1));
3723  Ops.push_back(CC);
3724  Ops.push_back(Cond);
3725  return DAG.getNode(X86ISD::CMOV, Tys, &Ops[0], Ops.size());
3726}
3727
3728SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
3729  bool addTest = false;
3730  SDOperand Cond  = Op.getOperand(1);
3731  SDOperand Dest  = Op.getOperand(2);
3732  SDOperand CC;
3733  if (Cond.getOpcode() == ISD::SETCC)
3734    Cond = LowerOperation(Cond, DAG);
3735
3736  if (Cond.getOpcode() == X86ISD::SETCC) {
3737    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3738    // (since flag operand cannot be shared). If the X86ISD::SETCC does not
3739    // have another use it will be eliminated.
3740    // If the X86ISD::SETCC has more than one use, then it's probably better
3741    // to use a test instead of duplicating the X86ISD::CMP (for register
3742    // pressure reason).
3743    unsigned CmpOpc = Cond.getOperand(1).getOpcode();
3744    if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
3745        CmpOpc == X86ISD::UCOMI) {
3746      if (!Cond.hasOneUse()) {
3747        std::vector<MVT::ValueType> Tys;
3748        for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i)
3749          Tys.push_back(Cond.Val->getValueType(i));
3750        std::vector<SDOperand> Ops;
3751        for (unsigned i = 0; i < Cond.getNumOperands(); ++i)
3752          Ops.push_back(Cond.getOperand(i));
3753        Cond = DAG.getNode(X86ISD::SETCC, Tys, &Ops[0], Ops.size());
3754      }
3755
3756      CC   = Cond.getOperand(0);
3757      Cond = Cond.getOperand(1);
3758      // Make a copy as flag result cannot be used by more than one.
3759      Cond = DAG.getNode(CmpOpc, MVT::Flag,
3760                         Cond.getOperand(0), Cond.getOperand(1));
3761    } else
3762      addTest = true;
3763  } else
3764    addTest = true;
3765
3766  if (addTest) {
3767    CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3768    Cond = DAG.getNode(X86ISD::CMP, MVT::Flag, Cond,
3769                       DAG.getConstant(0, MVT::i8));
3770  }
3771  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
3772                     Op.getOperand(0), Op.getOperand(2), CC, Cond);
3773}
3774
3775SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
3776  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3777  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3778                                 DAG.getTargetJumpTable(JT->getIndex(),
3779                                                        getPointerTy()));
3780  if (Subtarget->isTargetDarwin()) {
3781    // With PIC, the address is actually $g + Offset.
3782    if (!Subtarget->is64Bit() &&
3783        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3784      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3785                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3786                           Result);
3787  }
3788
3789  return Result;
3790}
3791
3792SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
3793  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3794  if (Subtarget->is64Bit())
3795    return LowerX86_64CCCCallTo(Op, DAG);
3796  else if (CallingConv == CallingConv::Fast && EnableFastCC)
3797    return LowerFastCCCallTo(Op, DAG);
3798  else
3799    return LowerCCCCallTo(Op, DAG);
3800}
3801
3802SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
3803  SDOperand Copy;
3804
3805  switch(Op.getNumOperands()) {
3806    default:
3807      assert(0 && "Do not know how to return this many arguments!");
3808      abort();
3809    case 1:    // ret void.
3810      return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
3811                        DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
3812    case 3: {
3813      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
3814
3815      if (MVT::isVector(ArgVT) ||
3816          (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) {
3817        // Integer or FP vector result -> XMM0.
3818        if (DAG.getMachineFunction().liveout_empty())
3819          DAG.getMachineFunction().addLiveOut(X86::XMM0);
3820        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
3821                                SDOperand());
3822      } else if (MVT::isInteger(ArgVT)) {
3823        // Integer result -> EAX / RAX.
3824        // The C calling convention guarantees the return value has been
3825        // promoted to at least MVT::i32. The X86-64 ABI doesn't require the
3826        // value to be promoted MVT::i64. So we don't have to extend it to
3827        // 64-bit. Return the value in EAX, but mark RAX as liveout.
3828        unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
3829        if (DAG.getMachineFunction().liveout_empty())
3830          DAG.getMachineFunction().addLiveOut(Reg);
3831
3832        Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX;
3833        Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1),
3834                                SDOperand());
3835      } else if (!X86ScalarSSE) {
3836        // FP return with fp-stack value.
3837        if (DAG.getMachineFunction().liveout_empty())
3838          DAG.getMachineFunction().addLiveOut(X86::ST0);
3839
3840        std::vector<MVT::ValueType> Tys;
3841        Tys.push_back(MVT::Other);
3842        Tys.push_back(MVT::Flag);
3843        std::vector<SDOperand> Ops;
3844        Ops.push_back(Op.getOperand(0));
3845        Ops.push_back(Op.getOperand(1));
3846        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size());
3847      } else {
3848        // FP return with ScalarSSE (return on fp-stack).
3849        if (DAG.getMachineFunction().liveout_empty())
3850          DAG.getMachineFunction().addLiveOut(X86::ST0);
3851
3852        SDOperand MemLoc;
3853        SDOperand Chain = Op.getOperand(0);
3854        SDOperand Value = Op.getOperand(1);
3855
3856        if (Value.getOpcode() == ISD::LOAD &&
3857            (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
3858          Chain  = Value.getOperand(0);
3859          MemLoc = Value.getOperand(1);
3860        } else {
3861          // Spill the value to memory and reload it into top of stack.
3862          unsigned Size = MVT::getSizeInBits(ArgVT)/8;
3863          MachineFunction &MF = DAG.getMachineFunction();
3864          int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3865          MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
3866          Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
3867                              Value, MemLoc, DAG.getSrcValue(0));
3868        }
3869        std::vector<MVT::ValueType> Tys;
3870        Tys.push_back(MVT::f64);
3871        Tys.push_back(MVT::Other);
3872        std::vector<SDOperand> Ops;
3873        Ops.push_back(Chain);
3874        Ops.push_back(MemLoc);
3875        Ops.push_back(DAG.getValueType(ArgVT));
3876        Copy = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size());
3877        Tys.clear();
3878        Tys.push_back(MVT::Other);
3879        Tys.push_back(MVT::Flag);
3880        Ops.clear();
3881        Ops.push_back(Copy.getValue(1));
3882        Ops.push_back(Copy);
3883        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size());
3884      }
3885      break;
3886    }
3887    case 5: {
3888      unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
3889      unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX;
3890      if (DAG.getMachineFunction().liveout_empty()) {
3891        DAG.getMachineFunction().addLiveOut(Reg1);
3892        DAG.getMachineFunction().addLiveOut(Reg2);
3893      }
3894
3895      Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3),
3896                              SDOperand());
3897      Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1));
3898      break;
3899    }
3900  }
3901  return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
3902                     Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
3903                     Copy.getValue(1));
3904}
3905
3906SDOperand
3907X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
3908  MachineFunction &MF = DAG.getMachineFunction();
3909  const Function* Fn = MF.getFunction();
3910  if (Fn->hasExternalLinkage() &&
3911      Subtarget->TargetType == X86Subtarget::isCygwin &&
3912      Fn->getName() == "main")
3913    MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true);
3914
3915  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3916  if (Subtarget->is64Bit())
3917    return LowerX86_64CCCArguments(Op, DAG);
3918  else if (CC == CallingConv::Fast && EnableFastCC)
3919    return LowerFastCCArguments(Op, DAG);
3920  else
3921    return LowerCCCArguments(Op, DAG);
3922}
3923
3924SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
3925  SDOperand InFlag(0, 0);
3926  SDOperand Chain = Op.getOperand(0);
3927  unsigned Align =
3928    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3929  if (Align == 0) Align = 1;
3930
3931  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3932  // If not DWORD aligned, call memset if size is less than the threshold.
3933  // It knows how to align to the right boundary first.
3934  if ((Align & 3) != 0 ||
3935      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3936    MVT::ValueType IntPtr = getPointerTy();
3937    const Type *IntPtrTy = getTargetData()->getIntPtrType();
3938    std::vector<std::pair<SDOperand, const Type*> > Args;
3939    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
3940    // Extend the ubyte argument to be an int value for the call.
3941    SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
3942    Args.push_back(std::make_pair(Val, IntPtrTy));
3943    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
3944    std::pair<SDOperand,SDOperand> CallResult =
3945      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
3946                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
3947    return CallResult.second;
3948  }
3949
3950  MVT::ValueType AVT;
3951  SDOperand Count;
3952  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3953  unsigned BytesLeft = 0;
3954  bool TwoRepStos = false;
3955  if (ValC) {
3956    unsigned ValReg;
3957    uint64_t Val = ValC->getValue() & 255;
3958
3959    // If the value is a constant, then we can potentially use larger sets.
3960    switch (Align & 3) {
3961      case 2:   // WORD aligned
3962        AVT = MVT::i16;
3963        ValReg = X86::AX;
3964        Val = (Val << 8) | Val;
3965        break;
3966      case 0:  // DWORD aligned
3967        AVT = MVT::i32;
3968        ValReg = X86::EAX;
3969        Val = (Val << 8)  | Val;
3970        Val = (Val << 16) | Val;
3971        if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) {  // QWORD aligned
3972          AVT = MVT::i64;
3973          ValReg = X86::RAX;
3974          Val = (Val << 32) | Val;
3975        }
3976        break;
3977      default:  // Byte aligned
3978        AVT = MVT::i8;
3979        ValReg = X86::AL;
3980        Count = Op.getOperand(3);
3981        break;
3982    }
3983
3984    if (AVT > MVT::i8) {
3985      if (I) {
3986        unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
3987        Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
3988        BytesLeft = I->getValue() % UBytes;
3989      } else {
3990        assert(AVT >= MVT::i32 &&
3991               "Do not use rep;stos if not at least DWORD aligned");
3992        Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
3993                            Op.getOperand(3), DAG.getConstant(2, MVT::i8));
3994        TwoRepStos = true;
3995      }
3996    }
3997
3998    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
3999                              InFlag);
4000    InFlag = Chain.getValue(1);
4001  } else {
4002    AVT = MVT::i8;
4003    Count  = Op.getOperand(3);
4004    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
4005    InFlag = Chain.getValue(1);
4006  }
4007
4008  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
4009                            Count, InFlag);
4010  InFlag = Chain.getValue(1);
4011  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4012                            Op.getOperand(1), InFlag);
4013  InFlag = Chain.getValue(1);
4014
4015  std::vector<MVT::ValueType> Tys;
4016  Tys.push_back(MVT::Other);
4017  Tys.push_back(MVT::Flag);
4018  std::vector<SDOperand> Ops;
4019  Ops.push_back(Chain);
4020  Ops.push_back(DAG.getValueType(AVT));
4021  Ops.push_back(InFlag);
4022  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
4023
4024  if (TwoRepStos) {
4025    InFlag = Chain.getValue(1);
4026    Count = Op.getOperand(3);
4027    MVT::ValueType CVT = Count.getValueType();
4028    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
4029                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
4030    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
4031                              Left, InFlag);
4032    InFlag = Chain.getValue(1);
4033    Tys.clear();
4034    Tys.push_back(MVT::Other);
4035    Tys.push_back(MVT::Flag);
4036    Ops.clear();
4037    Ops.push_back(Chain);
4038    Ops.push_back(DAG.getValueType(MVT::i8));
4039    Ops.push_back(InFlag);
4040    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
4041  } else if (BytesLeft) {
4042    // Issue stores for the last 1 - 7 bytes.
4043    SDOperand Value;
4044    unsigned Val = ValC->getValue() & 255;
4045    unsigned Offset = I->getValue() - BytesLeft;
4046    SDOperand DstAddr = Op.getOperand(1);
4047    MVT::ValueType AddrVT = DstAddr.getValueType();
4048    if (BytesLeft >= 4) {
4049      Val = (Val << 8)  | Val;
4050      Val = (Val << 16) | Val;
4051      Value = DAG.getConstant(Val, MVT::i32);
4052      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4053                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4054                                      DAG.getConstant(Offset, AddrVT)),
4055                          DAG.getSrcValue(NULL));
4056      BytesLeft -= 4;
4057      Offset += 4;
4058    }
4059    if (BytesLeft >= 2) {
4060      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
4061      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4062                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4063                                      DAG.getConstant(Offset, AddrVT)),
4064                          DAG.getSrcValue(NULL));
4065      BytesLeft -= 2;
4066      Offset += 2;
4067    }
4068    if (BytesLeft == 1) {
4069      Value = DAG.getConstant(Val, MVT::i8);
4070      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4071                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4072                                      DAG.getConstant(Offset, AddrVT)),
4073                          DAG.getSrcValue(NULL));
4074    }
4075  }
4076
4077  return Chain;
4078}
4079
4080SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
4081  SDOperand Chain = Op.getOperand(0);
4082  unsigned Align =
4083    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
4084  if (Align == 0) Align = 1;
4085
4086  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
4087  // If not DWORD aligned, call memcpy if size is less than the threshold.
4088  // It knows how to align to the right boundary first.
4089  if ((Align & 3) != 0 ||
4090      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
4091    MVT::ValueType IntPtr = getPointerTy();
4092    const Type *IntPtrTy = getTargetData()->getIntPtrType();
4093    std::vector<std::pair<SDOperand, const Type*> > Args;
4094    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
4095    Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
4096    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
4097    std::pair<SDOperand,SDOperand> CallResult =
4098      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
4099                  DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
4100    return CallResult.second;
4101  }
4102
4103  MVT::ValueType AVT;
4104  SDOperand Count;
4105  unsigned BytesLeft = 0;
4106  bool TwoRepMovs = false;
4107  switch (Align & 3) {
4108    case 2:   // WORD aligned
4109      AVT = MVT::i16;
4110      break;
4111    case 0:  // DWORD aligned
4112      AVT = MVT::i32;
4113      if (Subtarget->is64Bit() && ((Align & 0xF) == 0))  // QWORD aligned
4114        AVT = MVT::i64;
4115      break;
4116    default:  // Byte aligned
4117      AVT = MVT::i8;
4118      Count = Op.getOperand(3);
4119      break;
4120  }
4121
4122  if (AVT > MVT::i8) {
4123    if (I) {
4124      unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
4125      Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
4126      BytesLeft = I->getValue() % UBytes;
4127    } else {
4128      assert(AVT >= MVT::i32 &&
4129             "Do not use rep;movs if not at least DWORD aligned");
4130      Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
4131                          Op.getOperand(3), DAG.getConstant(2, MVT::i8));
4132      TwoRepMovs = true;
4133    }
4134  }
4135
4136  SDOperand InFlag(0, 0);
4137  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
4138                            Count, InFlag);
4139  InFlag = Chain.getValue(1);
4140  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4141                            Op.getOperand(1), InFlag);
4142  InFlag = Chain.getValue(1);
4143  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
4144                            Op.getOperand(2), InFlag);
4145  InFlag = Chain.getValue(1);
4146
4147  std::vector<MVT::ValueType> Tys;
4148  Tys.push_back(MVT::Other);
4149  Tys.push_back(MVT::Flag);
4150  std::vector<SDOperand> Ops;
4151  Ops.push_back(Chain);
4152  Ops.push_back(DAG.getValueType(AVT));
4153  Ops.push_back(InFlag);
4154  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
4155
4156  if (TwoRepMovs) {
4157    InFlag = Chain.getValue(1);
4158    Count = Op.getOperand(3);
4159    MVT::ValueType CVT = Count.getValueType();
4160    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
4161                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
4162    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
4163                              Left, InFlag);
4164    InFlag = Chain.getValue(1);
4165    Tys.clear();
4166    Tys.push_back(MVT::Other);
4167    Tys.push_back(MVT::Flag);
4168    Ops.clear();
4169    Ops.push_back(Chain);
4170    Ops.push_back(DAG.getValueType(MVT::i8));
4171    Ops.push_back(InFlag);
4172    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
4173  } else if (BytesLeft) {
4174    // Issue loads and stores for the last 1 - 7 bytes.
4175    unsigned Offset = I->getValue() - BytesLeft;
4176    SDOperand DstAddr = Op.getOperand(1);
4177    MVT::ValueType DstVT = DstAddr.getValueType();
4178    SDOperand SrcAddr = Op.getOperand(2);
4179    MVT::ValueType SrcVT = SrcAddr.getValueType();
4180    SDOperand Value;
4181    if (BytesLeft >= 4) {
4182      Value = DAG.getLoad(MVT::i32, Chain,
4183                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4184                                      DAG.getConstant(Offset, SrcVT)),
4185                          DAG.getSrcValue(NULL));
4186      Chain = Value.getValue(1);
4187      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4188                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
4189                                      DAG.getConstant(Offset, DstVT)),
4190                          DAG.getSrcValue(NULL));
4191      BytesLeft -= 4;
4192      Offset += 4;
4193    }
4194    if (BytesLeft >= 2) {
4195      Value = DAG.getLoad(MVT::i16, Chain,
4196                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4197                                      DAG.getConstant(Offset, SrcVT)),
4198                          DAG.getSrcValue(NULL));
4199      Chain = Value.getValue(1);
4200      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4201                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
4202                                      DAG.getConstant(Offset, DstVT)),
4203                          DAG.getSrcValue(NULL));
4204      BytesLeft -= 2;
4205      Offset += 2;
4206    }
4207
4208    if (BytesLeft == 1) {
4209      Value = DAG.getLoad(MVT::i8, Chain,
4210                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4211                                      DAG.getConstant(Offset, SrcVT)),
4212                          DAG.getSrcValue(NULL));
4213      Chain = Value.getValue(1);
4214      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
4215                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
4216                                      DAG.getConstant(Offset, DstVT)),
4217                          DAG.getSrcValue(NULL));
4218    }
4219  }
4220
4221  return Chain;
4222}
4223
4224SDOperand
4225X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
4226  std::vector<MVT::ValueType> Tys;
4227  Tys.push_back(MVT::Other);
4228  Tys.push_back(MVT::Flag);
4229  std::vector<SDOperand> Ops;
4230  Ops.push_back(Op.getOperand(0));
4231  SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &Ops[0], Ops.size());
4232  Ops.clear();
4233  Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)));
4234  Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX,
4235                                   MVT::i32, Ops[0].getValue(2)));
4236  Ops.push_back(Ops[1].getValue(1));
4237  Tys[0] = Tys[1] = MVT::i32;
4238  Tys.push_back(MVT::Other);
4239  return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size());
4240}
4241
4242SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
4243  if (!Subtarget->is64Bit()) {
4244    // vastart just stores the address of the VarArgsFrameIndex slot into the
4245    // memory location argument.
4246    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4247    return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
4248                       Op.getOperand(1), Op.getOperand(2));
4249  }
4250
4251  // __va_list_tag:
4252  //   gp_offset         (0 - 6 * 8)
4253  //   fp_offset         (48 - 48 + 8 * 16)
4254  //   overflow_arg_area (point to parameters coming in memory).
4255  //   reg_save_area
4256  std::vector<SDOperand> MemOps;
4257  SDOperand FIN = Op.getOperand(1);
4258  // Store gp_offset
4259  SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4260                                DAG.getConstant(VarArgsGPOffset, MVT::i32),
4261                                FIN, Op.getOperand(2));
4262  MemOps.push_back(Store);
4263
4264  // Store fp_offset
4265  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4266                    DAG.getConstant(4, getPointerTy()));
4267  Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4268                      DAG.getConstant(VarArgsFPOffset, MVT::i32),
4269                      FIN, Op.getOperand(2));
4270  MemOps.push_back(Store);
4271
4272  // Store ptr to overflow_arg_area
4273  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4274                    DAG.getConstant(4, getPointerTy()));
4275  SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4276  Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4277                      OVFIN, FIN, Op.getOperand(2));
4278  MemOps.push_back(Store);
4279
4280  // Store ptr to reg_save_area.
4281  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4282                    DAG.getConstant(8, getPointerTy()));
4283  SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
4284  Store = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
4285                      RSFIN, FIN, Op.getOperand(2));
4286  MemOps.push_back(Store);
4287  return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
4288}
4289
4290SDOperand
4291X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
4292  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
4293  switch (IntNo) {
4294  default: return SDOperand();    // Don't custom lower most intrinsics.
4295    // Comparison intrinsics.
4296  case Intrinsic::x86_sse_comieq_ss:
4297  case Intrinsic::x86_sse_comilt_ss:
4298  case Intrinsic::x86_sse_comile_ss:
4299  case Intrinsic::x86_sse_comigt_ss:
4300  case Intrinsic::x86_sse_comige_ss:
4301  case Intrinsic::x86_sse_comineq_ss:
4302  case Intrinsic::x86_sse_ucomieq_ss:
4303  case Intrinsic::x86_sse_ucomilt_ss:
4304  case Intrinsic::x86_sse_ucomile_ss:
4305  case Intrinsic::x86_sse_ucomigt_ss:
4306  case Intrinsic::x86_sse_ucomige_ss:
4307  case Intrinsic::x86_sse_ucomineq_ss:
4308  case Intrinsic::x86_sse2_comieq_sd:
4309  case Intrinsic::x86_sse2_comilt_sd:
4310  case Intrinsic::x86_sse2_comile_sd:
4311  case Intrinsic::x86_sse2_comigt_sd:
4312  case Intrinsic::x86_sse2_comige_sd:
4313  case Intrinsic::x86_sse2_comineq_sd:
4314  case Intrinsic::x86_sse2_ucomieq_sd:
4315  case Intrinsic::x86_sse2_ucomilt_sd:
4316  case Intrinsic::x86_sse2_ucomile_sd:
4317  case Intrinsic::x86_sse2_ucomigt_sd:
4318  case Intrinsic::x86_sse2_ucomige_sd:
4319  case Intrinsic::x86_sse2_ucomineq_sd: {
4320    unsigned Opc = 0;
4321    ISD::CondCode CC = ISD::SETCC_INVALID;
4322    switch (IntNo) {
4323    default: break;
4324    case Intrinsic::x86_sse_comieq_ss:
4325    case Intrinsic::x86_sse2_comieq_sd:
4326      Opc = X86ISD::COMI;
4327      CC = ISD::SETEQ;
4328      break;
4329    case Intrinsic::x86_sse_comilt_ss:
4330    case Intrinsic::x86_sse2_comilt_sd:
4331      Opc = X86ISD::COMI;
4332      CC = ISD::SETLT;
4333      break;
4334    case Intrinsic::x86_sse_comile_ss:
4335    case Intrinsic::x86_sse2_comile_sd:
4336      Opc = X86ISD::COMI;
4337      CC = ISD::SETLE;
4338      break;
4339    case Intrinsic::x86_sse_comigt_ss:
4340    case Intrinsic::x86_sse2_comigt_sd:
4341      Opc = X86ISD::COMI;
4342      CC = ISD::SETGT;
4343      break;
4344    case Intrinsic::x86_sse_comige_ss:
4345    case Intrinsic::x86_sse2_comige_sd:
4346      Opc = X86ISD::COMI;
4347      CC = ISD::SETGE;
4348      break;
4349    case Intrinsic::x86_sse_comineq_ss:
4350    case Intrinsic::x86_sse2_comineq_sd:
4351      Opc = X86ISD::COMI;
4352      CC = ISD::SETNE;
4353      break;
4354    case Intrinsic::x86_sse_ucomieq_ss:
4355    case Intrinsic::x86_sse2_ucomieq_sd:
4356      Opc = X86ISD::UCOMI;
4357      CC = ISD::SETEQ;
4358      break;
4359    case Intrinsic::x86_sse_ucomilt_ss:
4360    case Intrinsic::x86_sse2_ucomilt_sd:
4361      Opc = X86ISD::UCOMI;
4362      CC = ISD::SETLT;
4363      break;
4364    case Intrinsic::x86_sse_ucomile_ss:
4365    case Intrinsic::x86_sse2_ucomile_sd:
4366      Opc = X86ISD::UCOMI;
4367      CC = ISD::SETLE;
4368      break;
4369    case Intrinsic::x86_sse_ucomigt_ss:
4370    case Intrinsic::x86_sse2_ucomigt_sd:
4371      Opc = X86ISD::UCOMI;
4372      CC = ISD::SETGT;
4373      break;
4374    case Intrinsic::x86_sse_ucomige_ss:
4375    case Intrinsic::x86_sse2_ucomige_sd:
4376      Opc = X86ISD::UCOMI;
4377      CC = ISD::SETGE;
4378      break;
4379    case Intrinsic::x86_sse_ucomineq_ss:
4380    case Intrinsic::x86_sse2_ucomineq_sd:
4381      Opc = X86ISD::UCOMI;
4382      CC = ISD::SETNE;
4383      break;
4384    }
4385    bool Flip;
4386    unsigned X86CC;
4387    translateX86CC(CC, true, X86CC, Flip);
4388    SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1),
4389                                 Op.getOperand(Flip?1:2));
4390    SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8,
4391                                  DAG.getConstant(X86CC, MVT::i8), Cond);
4392    return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
4393  }
4394  }
4395}
4396
4397/// LowerOperation - Provide custom lowering hooks for some operations.
4398///
4399SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
4400  switch (Op.getOpcode()) {
4401  default: assert(0 && "Should not custom lower this!");
4402  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
4403  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
4404  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4405  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
4406  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
4407  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
4408  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
4409  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
4410  case ISD::SHL_PARTS:
4411  case ISD::SRA_PARTS:
4412  case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
4413  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
4414  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
4415  case ISD::FABS:               return LowerFABS(Op, DAG);
4416  case ISD::FNEG:               return LowerFNEG(Op, DAG);
4417  case ISD::SETCC:              return LowerSETCC(Op, DAG);
4418  case ISD::SELECT:             return LowerSELECT(Op, DAG);
4419  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
4420  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
4421  case ISD::CALL:               return LowerCALL(Op, DAG);
4422  case ISD::RET:                return LowerRET(Op, DAG);
4423  case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
4424  case ISD::MEMSET:             return LowerMEMSET(Op, DAG);
4425  case ISD::MEMCPY:             return LowerMEMCPY(Op, DAG);
4426  case ISD::READCYCLECOUNTER:   return LowerREADCYCLCECOUNTER(Op, DAG);
4427  case ISD::VASTART:            return LowerVASTART(Op, DAG);
4428  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4429  }
4430}
4431
4432const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
4433  switch (Opcode) {
4434  default: return NULL;
4435  case X86ISD::SHLD:               return "X86ISD::SHLD";
4436  case X86ISD::SHRD:               return "X86ISD::SHRD";
4437  case X86ISD::FAND:               return "X86ISD::FAND";
4438  case X86ISD::FXOR:               return "X86ISD::FXOR";
4439  case X86ISD::FILD:               return "X86ISD::FILD";
4440  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
4441  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
4442  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
4443  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
4444  case X86ISD::FLD:                return "X86ISD::FLD";
4445  case X86ISD::FST:                return "X86ISD::FST";
4446  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
4447  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
4448  case X86ISD::CALL:               return "X86ISD::CALL";
4449  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
4450  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
4451  case X86ISD::CMP:                return "X86ISD::CMP";
4452  case X86ISD::COMI:               return "X86ISD::COMI";
4453  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
4454  case X86ISD::SETCC:              return "X86ISD::SETCC";
4455  case X86ISD::CMOV:               return "X86ISD::CMOV";
4456  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
4457  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
4458  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
4459  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
4460  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
4461  case X86ISD::LOAD_UA:            return "X86ISD::LOAD_UA";
4462  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
4463  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
4464  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
4465  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
4466  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
4467  }
4468}
4469
4470/// isLegalAddressImmediate - Return true if the integer value or
4471/// GlobalValue can be used as the offset of the target addressing mode.
4472bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
4473  // X86 allows a sign-extended 32-bit immediate field.
4474  return (V > -(1LL << 32) && V < (1LL << 32)-1);
4475}
4476
4477bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
4478  // GV is 64-bit but displacement field is 32-bit unless we are in small code
4479  // model. Mac OS X happens to support only small PIC code model.
4480  // FIXME: better support for other OS's.
4481  if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin())
4482    return false;
4483  if (Subtarget->isTargetDarwin()) {
4484    Reloc::Model RModel = getTargetMachine().getRelocationModel();
4485    if (RModel == Reloc::Static)
4486      return true;
4487    else if (RModel == Reloc::DynamicNoPIC)
4488      return !DarwinGVRequiresExtraLoad(GV);
4489    else
4490      return false;
4491  } else
4492    return true;
4493}
4494
4495/// isShuffleMaskLegal - Targets can use this to indicate that they only
4496/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4497/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4498/// are assumed to be legal.
4499bool
4500X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
4501  // Only do shuffles on 128-bit vector types for now.
4502  if (MVT::getSizeInBits(VT) == 64) return false;
4503  return (Mask.Val->getNumOperands() <= 4 ||
4504          isSplatMask(Mask.Val)  ||
4505          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
4506          X86::isUNPCKLMask(Mask.Val) ||
4507          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
4508          X86::isUNPCKHMask(Mask.Val));
4509}
4510
4511bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
4512                                               MVT::ValueType EVT,
4513                                               SelectionDAG &DAG) const {
4514  unsigned NumElts = BVOps.size();
4515  // Only do shuffles on 128-bit vector types for now.
4516  if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
4517  if (NumElts == 2) return true;
4518  if (NumElts == 4) {
4519    return (isMOVLMask(BVOps)  || isCommutedMOVL(BVOps, true) ||
4520            isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps));
4521  }
4522  return false;
4523}
4524
4525//===----------------------------------------------------------------------===//
4526//                           X86 Scheduler Hooks
4527//===----------------------------------------------------------------------===//
4528
4529MachineBasicBlock *
4530X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
4531                                           MachineBasicBlock *BB) {
4532  switch (MI->getOpcode()) {
4533  default: assert(false && "Unexpected instr type to insert");
4534  case X86::CMOV_FR32:
4535  case X86::CMOV_FR64:
4536  case X86::CMOV_V4F32:
4537  case X86::CMOV_V2F64:
4538  case X86::CMOV_V2I64: {
4539    // To "insert" a SELECT_CC instruction, we actually have to insert the
4540    // diamond control-flow pattern.  The incoming instruction knows the
4541    // destination vreg to set, the condition code register to branch on, the
4542    // true/false values to select between, and a branch opcode to use.
4543    const BasicBlock *LLVM_BB = BB->getBasicBlock();
4544    ilist<MachineBasicBlock>::iterator It = BB;
4545    ++It;
4546
4547    //  thisMBB:
4548    //  ...
4549    //   TrueVal = ...
4550    //   cmpTY ccX, r1, r2
4551    //   bCC copy1MBB
4552    //   fallthrough --> copy0MBB
4553    MachineBasicBlock *thisMBB = BB;
4554    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
4555    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
4556    unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue());
4557    BuildMI(BB, Opc, 1).addMBB(sinkMBB);
4558    MachineFunction *F = BB->getParent();
4559    F->getBasicBlockList().insert(It, copy0MBB);
4560    F->getBasicBlockList().insert(It, sinkMBB);
4561    // Update machine-CFG edges by first adding all successors of the current
4562    // block to the new block which will contain the Phi node for the select.
4563    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
4564        e = BB->succ_end(); i != e; ++i)
4565      sinkMBB->addSuccessor(*i);
4566    // Next, remove all successors of the current block, and add the true
4567    // and fallthrough blocks as its successors.
4568    while(!BB->succ_empty())
4569      BB->removeSuccessor(BB->succ_begin());
4570    BB->addSuccessor(copy0MBB);
4571    BB->addSuccessor(sinkMBB);
4572
4573    //  copy0MBB:
4574    //   %FalseValue = ...
4575    //   # fallthrough to sinkMBB
4576    BB = copy0MBB;
4577
4578    // Update machine-CFG edges
4579    BB->addSuccessor(sinkMBB);
4580
4581    //  sinkMBB:
4582    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4583    //  ...
4584    BB = sinkMBB;
4585    BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg())
4586      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
4587      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4588
4589    delete MI;   // The pseudo instruction is gone now.
4590    return BB;
4591  }
4592
4593  case X86::FP_TO_INT16_IN_MEM:
4594  case X86::FP_TO_INT32_IN_MEM:
4595  case X86::FP_TO_INT64_IN_MEM: {
4596    // Change the floating point control register to use "round towards zero"
4597    // mode when truncating to an integer value.
4598    MachineFunction *F = BB->getParent();
4599    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
4600    addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
4601
4602    // Load the old value of the high byte of the control word...
4603    unsigned OldCW =
4604      F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
4605    addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx);
4606
4607    // Set the high part to be round to zero...
4608    addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F);
4609
4610    // Reload the modified control word now...
4611    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
4612
4613    // Restore the memory image of control word to original value
4614    addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW);
4615
4616    // Get the X86 opcode to use.
4617    unsigned Opc;
4618    switch (MI->getOpcode()) {
4619    default: assert(0 && "illegal opcode!");
4620    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
4621    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
4622    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
4623    }
4624
4625    X86AddressMode AM;
4626    MachineOperand &Op = MI->getOperand(0);
4627    if (Op.isRegister()) {
4628      AM.BaseType = X86AddressMode::RegBase;
4629      AM.Base.Reg = Op.getReg();
4630    } else {
4631      AM.BaseType = X86AddressMode::FrameIndexBase;
4632      AM.Base.FrameIndex = Op.getFrameIndex();
4633    }
4634    Op = MI->getOperand(1);
4635    if (Op.isImmediate())
4636      AM.Scale = Op.getImmedValue();
4637    Op = MI->getOperand(2);
4638    if (Op.isImmediate())
4639      AM.IndexReg = Op.getImmedValue();
4640    Op = MI->getOperand(3);
4641    if (Op.isGlobalAddress()) {
4642      AM.GV = Op.getGlobal();
4643    } else {
4644      AM.Disp = Op.getImmedValue();
4645    }
4646    addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg());
4647
4648    // Reload the original control word now.
4649    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
4650
4651    delete MI;   // The pseudo instruction is gone now.
4652    return BB;
4653  }
4654  }
4655}
4656
4657//===----------------------------------------------------------------------===//
4658//                           X86 Optimization Hooks
4659//===----------------------------------------------------------------------===//
4660
4661void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
4662                                                       uint64_t Mask,
4663                                                       uint64_t &KnownZero,
4664                                                       uint64_t &KnownOne,
4665                                                       unsigned Depth) const {
4666  unsigned Opc = Op.getOpcode();
4667  assert((Opc >= ISD::BUILTIN_OP_END ||
4668          Opc == ISD::INTRINSIC_WO_CHAIN ||
4669          Opc == ISD::INTRINSIC_W_CHAIN ||
4670          Opc == ISD::INTRINSIC_VOID) &&
4671         "Should use MaskedValueIsZero if you don't know whether Op"
4672         " is a target node!");
4673
4674  KnownZero = KnownOne = 0;   // Don't know anything.
4675  switch (Opc) {
4676  default: break;
4677  case X86ISD::SETCC:
4678    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
4679    break;
4680  }
4681}
4682
4683/// getShuffleScalarElt - Returns the scalar element that will make up the ith
4684/// element of the result of the vector shuffle.
4685static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
4686  MVT::ValueType VT = N->getValueType(0);
4687  SDOperand PermMask = N->getOperand(2);
4688  unsigned NumElems = PermMask.getNumOperands();
4689  SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
4690  i %= NumElems;
4691  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
4692    return (i == 0)
4693      ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4694  } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
4695    SDOperand Idx = PermMask.getOperand(i);
4696    if (Idx.getOpcode() == ISD::UNDEF)
4697      return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4698    return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
4699  }
4700  return SDOperand();
4701}
4702
4703/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
4704/// node is a GlobalAddress + an offset.
4705static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
4706  if (N->getOpcode() == X86ISD::Wrapper) {
4707    if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
4708      GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
4709      return true;
4710    }
4711  } else if (N->getOpcode() == ISD::ADD) {
4712    SDOperand N1 = N->getOperand(0);
4713    SDOperand N2 = N->getOperand(1);
4714    if (isGAPlusOffset(N1.Val, GA, Offset)) {
4715      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
4716      if (V) {
4717        Offset += V->getSignExtended();
4718        return true;
4719      }
4720    } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
4721      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
4722      if (V) {
4723        Offset += V->getSignExtended();
4724        return true;
4725      }
4726    }
4727  }
4728  return false;
4729}
4730
4731/// isConsecutiveLoad - Returns true if N is loading from an address of Base
4732/// + Dist * Size.
4733static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
4734                              MachineFrameInfo *MFI) {
4735  if (N->getOperand(0).Val != Base->getOperand(0).Val)
4736    return false;
4737
4738  SDOperand Loc = N->getOperand(1);
4739  SDOperand BaseLoc = Base->getOperand(1);
4740  if (Loc.getOpcode() == ISD::FrameIndex) {
4741    if (BaseLoc.getOpcode() != ISD::FrameIndex)
4742      return false;
4743    int FI  = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
4744    int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
4745    int FS  = MFI->getObjectSize(FI);
4746    int BFS = MFI->getObjectSize(BFI);
4747    if (FS != BFS || FS != Size) return false;
4748    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
4749  } else {
4750    GlobalValue *GV1 = NULL;
4751    GlobalValue *GV2 = NULL;
4752    int64_t Offset1 = 0;
4753    int64_t Offset2 = 0;
4754    bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
4755    bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
4756    if (isGA1 && isGA2 && GV1 == GV2)
4757      return Offset1 == (Offset2 + Dist*Size);
4758  }
4759
4760  return false;
4761}
4762
4763static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
4764                              const X86Subtarget *Subtarget) {
4765  GlobalValue *GV;
4766  int64_t Offset;
4767  if (isGAPlusOffset(Base, GV, Offset))
4768    return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
4769  else {
4770    assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
4771    int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
4772    if (BFI < 0)
4773      // Fixed objects do not specify alignment, however the offsets are known.
4774      return ((Subtarget->getStackAlignment() % 16) == 0 &&
4775              (MFI->getObjectOffset(BFI) % 16) == 0);
4776    else
4777      return MFI->getObjectAlignment(BFI) >= 16;
4778  }
4779  return false;
4780}
4781
4782
4783/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
4784/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
4785/// if the load addresses are consecutive, non-overlapping, and in the right
4786/// order.
4787static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
4788                                       const X86Subtarget *Subtarget) {
4789  MachineFunction &MF = DAG.getMachineFunction();
4790  MachineFrameInfo *MFI = MF.getFrameInfo();
4791  MVT::ValueType VT = N->getValueType(0);
4792  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
4793  SDOperand PermMask = N->getOperand(2);
4794  int NumElems = (int)PermMask.getNumOperands();
4795  SDNode *Base = NULL;
4796  for (int i = 0; i < NumElems; ++i) {
4797    SDOperand Idx = PermMask.getOperand(i);
4798    if (Idx.getOpcode() == ISD::UNDEF) {
4799      if (!Base) return SDOperand();
4800    } else {
4801      SDOperand Arg =
4802        getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
4803      if (!Arg.Val || Arg.getOpcode() != ISD::LOAD)
4804        return SDOperand();
4805      if (!Base)
4806        Base = Arg.Val;
4807      else if (!isConsecutiveLoad(Arg.Val, Base,
4808                                  i, MVT::getSizeInBits(EVT)/8,MFI))
4809        return SDOperand();
4810    }
4811  }
4812
4813  bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
4814  if (isAlign16)
4815    return DAG.getLoad(VT, Base->getOperand(0), Base->getOperand(1),
4816                       Base->getOperand(2));
4817  else {
4818    // Just use movups, it's shorter.
4819    std::vector<MVT::ValueType> Tys;
4820    Tys.push_back(MVT::v4f32);
4821    Tys.push_back(MVT::Other);
4822    SmallVector<SDOperand, 3> Ops;
4823    Ops.push_back(Base->getOperand(0));
4824    Ops.push_back(Base->getOperand(1));
4825    Ops.push_back(Base->getOperand(2));
4826    return DAG.getNode(ISD::BIT_CONVERT, VT,
4827                       DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
4828  }
4829}
4830
4831SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
4832                                               DAGCombinerInfo &DCI) const {
4833  TargetMachine &TM = getTargetMachine();
4834  SelectionDAG &DAG = DCI.DAG;
4835  switch (N->getOpcode()) {
4836  default: break;
4837  case ISD::VECTOR_SHUFFLE:
4838    return PerformShuffleCombine(N, DAG, Subtarget);
4839  }
4840
4841  return SDOperand();
4842}
4843
4844//===----------------------------------------------------------------------===//
4845//                           X86 Inline Assembly Support
4846//===----------------------------------------------------------------------===//
4847
4848/// getConstraintType - Given a constraint letter, return the type of
4849/// constraint it is for this target.
4850X86TargetLowering::ConstraintType
4851X86TargetLowering::getConstraintType(char ConstraintLetter) const {
4852  switch (ConstraintLetter) {
4853  case 'A':
4854  case 'r':
4855  case 'R':
4856  case 'l':
4857  case 'q':
4858  case 'Q':
4859  case 'x':
4860  case 'Y':
4861    return C_RegisterClass;
4862  default: return TargetLowering::getConstraintType(ConstraintLetter);
4863  }
4864}
4865
4866std::vector<unsigned> X86TargetLowering::
4867getRegClassForInlineAsmConstraint(const std::string &Constraint,
4868                                  MVT::ValueType VT) const {
4869  if (Constraint.size() == 1) {
4870    // FIXME: not handling fp-stack yet!
4871    // FIXME: not handling MMX registers yet ('y' constraint).
4872    switch (Constraint[0]) {      // GCC X86 Constraint Letters
4873    default: break;  // Unknown constraint letter
4874    case 'A':   // EAX/EDX
4875      if (VT == MVT::i32 || VT == MVT::i64)
4876        return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
4877      break;
4878    case 'r':   // GENERAL_REGS
4879    case 'R':   // LEGACY_REGS
4880      if (VT == MVT::i32)
4881        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4882                                     X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
4883      else if (VT == MVT::i16)
4884        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4885                                     X86::SI, X86::DI, X86::BP, X86::SP, 0);
4886      else if (VT == MVT::i8)
4887        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4888      break;
4889    case 'l':   // INDEX_REGS
4890      if (VT == MVT::i32)
4891        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4892                                     X86::ESI, X86::EDI, X86::EBP, 0);
4893      else if (VT == MVT::i16)
4894        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4895                                     X86::SI, X86::DI, X86::BP, 0);
4896      else if (VT == MVT::i8)
4897        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4898      break;
4899    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
4900    case 'Q':   // Q_REGS
4901      if (VT == MVT::i32)
4902        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
4903      else if (VT == MVT::i16)
4904        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
4905      else if (VT == MVT::i8)
4906        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4907        break;
4908    case 'x':   // SSE_REGS if SSE1 allowed
4909      if (Subtarget->hasSSE1())
4910        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4911                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4912                                     0);
4913      return std::vector<unsigned>();
4914    case 'Y':   // SSE_REGS if SSE2 allowed
4915      if (Subtarget->hasSSE2())
4916        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4917                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4918                                     0);
4919      return std::vector<unsigned>();
4920    }
4921  }
4922
4923  return std::vector<unsigned>();
4924}
4925
4926std::pair<unsigned, const TargetRegisterClass*>
4927X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4928                                                MVT::ValueType VT) const {
4929  // Use the default implementation in TargetLowering to convert the register
4930  // constraint into a member of a register class.
4931  std::pair<unsigned, const TargetRegisterClass*> Res;
4932  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4933
4934  // Not found?  Bail out.
4935  if (Res.second == 0) return Res;
4936
4937  // Otherwise, check to see if this is a register class of the wrong value
4938  // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
4939  // turn into {ax},{dx}.
4940  if (Res.second->hasType(VT))
4941    return Res;   // Correct type already, nothing to do.
4942
4943  // All of the single-register GCC register classes map their values onto
4944  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
4945  // really want an 8-bit or 32-bit register, map to the appropriate register
4946  // class and return the appropriate register.
4947  if (Res.second != X86::GR16RegisterClass)
4948    return Res;
4949
4950  if (VT == MVT::i8) {
4951    unsigned DestReg = 0;
4952    switch (Res.first) {
4953    default: break;
4954    case X86::AX: DestReg = X86::AL; break;
4955    case X86::DX: DestReg = X86::DL; break;
4956    case X86::CX: DestReg = X86::CL; break;
4957    case X86::BX: DestReg = X86::BL; break;
4958    }
4959    if (DestReg) {
4960      Res.first = DestReg;
4961      Res.second = Res.second = X86::GR8RegisterClass;
4962    }
4963  } else if (VT == MVT::i32) {
4964    unsigned DestReg = 0;
4965    switch (Res.first) {
4966    default: break;
4967    case X86::AX: DestReg = X86::EAX; break;
4968    case X86::DX: DestReg = X86::EDX; break;
4969    case X86::CX: DestReg = X86::ECX; break;
4970    case X86::BX: DestReg = X86::EBX; break;
4971    case X86::SI: DestReg = X86::ESI; break;
4972    case X86::DI: DestReg = X86::EDI; break;
4973    case X86::BP: DestReg = X86::EBP; break;
4974    case X86::SP: DestReg = X86::ESP; break;
4975    }
4976    if (DestReg) {
4977      Res.first = DestReg;
4978      Res.second = Res.second = X86::GR32RegisterClass;
4979    }
4980  } else if (VT == MVT::i64) {
4981    unsigned DestReg = 0;
4982    switch (Res.first) {
4983    default: break;
4984    case X86::AX: DestReg = X86::RAX; break;
4985    case X86::DX: DestReg = X86::RDX; break;
4986    case X86::CX: DestReg = X86::RCX; break;
4987    case X86::BX: DestReg = X86::RBX; break;
4988    case X86::SI: DestReg = X86::RSI; break;
4989    case X86::DI: DestReg = X86::RDI; break;
4990    case X86::BP: DestReg = X86::RBP; break;
4991    case X86::SP: DestReg = X86::RSP; break;
4992    }
4993    if (DestReg) {
4994      Res.first = DestReg;
4995      Res.second = Res.second = X86::GR64RegisterClass;
4996    }
4997  }
4998
4999  return Res;
5000}
5001
5002