X86ISelLowering.cpp revision 1b7a81d3aefbbdd0681c78fa00261e3d39454360
1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrBuilder.h"
17#include "X86ISelLowering.h"
18#include "X86MachineFunctionInfo.h"
19#include "X86TargetMachine.h"
20#include "llvm/CallingConv.h"
21#include "llvm/Constants.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/Function.h"
24#include "llvm/Intrinsics.h"
25#include "llvm/ADT/VectorExtras.h"
26#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27#include "llvm/CodeGen/CallingConvLower.h"
28#include "llvm/CodeGen/MachineFrameInfo.h"
29#include "llvm/CodeGen/MachineFunction.h"
30#include "llvm/CodeGen/MachineInstrBuilder.h"
31#include "llvm/CodeGen/SelectionDAG.h"
32#include "llvm/CodeGen/SSARegMap.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Target/TargetOptions.h"
35#include "llvm/ADT/StringExtras.h"
36using namespace llvm;
37
38X86TargetLowering::X86TargetLowering(TargetMachine &TM)
39  : TargetLowering(TM) {
40  Subtarget = &TM.getSubtarget<X86Subtarget>();
41  X86ScalarSSE = Subtarget->hasSSE2();
42  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
43
44  // Set up the TargetLowering object.
45
46  // X86 is weird, it always uses i8 for shift amounts and setcc results.
47  setShiftAmountType(MVT::i8);
48  setSetCCResultType(MVT::i8);
49  setSetCCResultContents(ZeroOrOneSetCCResult);
50  setSchedulingPreference(SchedulingForRegPressure);
51  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
52  setStackPointerRegisterToSaveRestore(X86StackPtr);
53
54  if (Subtarget->isTargetDarwin()) {
55    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
56    setUseUnderscoreSetJmp(false);
57    setUseUnderscoreLongJmp(false);
58  } else if (Subtarget->isTargetMingw()) {
59    // MS runtime is weird: it exports _setjmp, but longjmp!
60    setUseUnderscoreSetJmp(true);
61    setUseUnderscoreLongJmp(false);
62  } else {
63    setUseUnderscoreSetJmp(true);
64    setUseUnderscoreLongJmp(true);
65  }
66
67  // Set up the register classes.
68  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
69  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
70  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
71  if (Subtarget->is64Bit())
72    addRegisterClass(MVT::i64, X86::GR64RegisterClass);
73
74  setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
75
76  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
77  // operation.
78  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
79  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
80  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
81
82  if (Subtarget->is64Bit()) {
83    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
84    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
85  } else {
86    if (X86ScalarSSE)
87      // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
88      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Expand);
89    else
90      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
91  }
92
93  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
94  // this operation.
95  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
96  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
97  // SSE has no i16 to fp conversion, only i32
98  if (X86ScalarSSE)
99    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
100  else {
101    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
102    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
103  }
104
105  if (!Subtarget->is64Bit()) {
106    // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
107    setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
108    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
109  }
110
111  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
112  // this operation.
113  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
114  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
115
116  if (X86ScalarSSE) {
117    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
118  } else {
119    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
120    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
121  }
122
123  // Handle FP_TO_UINT by promoting the destination to a larger signed
124  // conversion.
125  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
126  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
127  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
128
129  if (Subtarget->is64Bit()) {
130    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
131    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
132  } else {
133    if (X86ScalarSSE && !Subtarget->hasSSE3())
134      // Expand FP_TO_UINT into a select.
135      // FIXME: We would like to use a Custom expander here eventually to do
136      // the optimal thing for SSE vs. the default expansion in the legalizer.
137      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
138    else
139      // With SSE3 we can use fisttpll to convert to a signed i64.
140      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
141  }
142
143  // TODO: when we have SSE, these could be more efficient, by using movd/movq.
144  if (!X86ScalarSSE) {
145    setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
146    setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
147  }
148
149  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
150  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
151  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
152  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
153  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
154  if (Subtarget->is64Bit())
155    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
156  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
157  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
158  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
159  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
160  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
161
162  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
163  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
164  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
165  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
166  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
167  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
168  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
169  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
170  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
171  if (Subtarget->is64Bit()) {
172    setOperationAction(ISD::CTPOP          , MVT::i64  , Expand);
173    setOperationAction(ISD::CTTZ           , MVT::i64  , Expand);
174    setOperationAction(ISD::CTLZ           , MVT::i64  , Expand);
175  }
176
177  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
178  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
179
180  // These should be promoted to a larger select which is supported.
181  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
182  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
183  // X86 wants to expand cmov itself.
184  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
185  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
186  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
187  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
188  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
189  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
190  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
191  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
192  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
193  if (Subtarget->is64Bit()) {
194    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
195    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
196  }
197  // X86 ret instruction may pop stack.
198  setOperationAction(ISD::RET             , MVT::Other, Custom);
199  // Darwin ABI issue.
200  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
201  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
202  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
203  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
204  if (Subtarget->is64Bit()) {
205    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
206    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
207    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
208    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
209  }
210  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
211  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
212  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
213  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
214  // X86 wants to expand memset / memcpy itself.
215  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
216  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
217
218  // We don't have line number support yet.
219  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
220  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
221  // FIXME - use subtarget debug flags
222  if (!Subtarget->isTargetDarwin() &&
223      !Subtarget->isTargetELF() &&
224      !Subtarget->isTargetCygMing())
225    setOperationAction(ISD::LABEL, MVT::Other, Expand);
226
227  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
228  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
229  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
230  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
231  if (Subtarget->is64Bit())
232    setOperationAction(ISD::VACOPY          , MVT::Other, Custom);
233  else
234    setOperationAction(ISD::VACOPY          , MVT::Other, Expand);
235
236  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
237  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
238  if (Subtarget->is64Bit())
239    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
240  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
241
242  if (X86ScalarSSE) {
243    // Set up the FP register classes.
244    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
245    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
246
247    // Use ANDPD to simulate FABS.
248    setOperationAction(ISD::FABS , MVT::f64, Custom);
249    setOperationAction(ISD::FABS , MVT::f32, Custom);
250
251    // Use XORP to simulate FNEG.
252    setOperationAction(ISD::FNEG , MVT::f64, Custom);
253    setOperationAction(ISD::FNEG , MVT::f32, Custom);
254
255    // Use ANDPD and ORPD to simulate FCOPYSIGN.
256    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
257    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
258
259    // We don't support sin/cos/fmod
260    setOperationAction(ISD::FSIN , MVT::f64, Expand);
261    setOperationAction(ISD::FCOS , MVT::f64, Expand);
262    setOperationAction(ISD::FREM , MVT::f64, Expand);
263    setOperationAction(ISD::FSIN , MVT::f32, Expand);
264    setOperationAction(ISD::FCOS , MVT::f32, Expand);
265    setOperationAction(ISD::FREM , MVT::f32, Expand);
266
267    // Expand FP immediates into loads from the stack, except for the special
268    // cases we handle.
269    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
270    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
271    addLegalFPImmediate(+0.0); // xorps / xorpd
272  } else {
273    // Set up the FP register classes.
274    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
275
276    setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
277    setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
278    setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
279
280    if (!UnsafeFPMath) {
281      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
282      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
283    }
284
285    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
286    addLegalFPImmediate(+0.0); // FLD0
287    addLegalFPImmediate(+1.0); // FLD1
288    addLegalFPImmediate(-0.0); // FLD0/FCHS
289    addLegalFPImmediate(-1.0); // FLD1/FCHS
290  }
291
292  // First set operation action for all vector types to expand. Then we
293  // will selectively turn on ones that can be effectively codegen'd.
294  for (unsigned VT = (unsigned)MVT::Vector + 1;
295       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
296    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
297    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
298    setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
299    setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
300    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
301    setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
302    setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
303    setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
304    setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
305    setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
306    setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
307    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
308    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
309    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
310    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
311  }
312
313  if (Subtarget->hasMMX()) {
314    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
315    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
316    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
317
318    // FIXME: add MMX packed arithmetics
319
320    setOperationAction(ISD::ADD,                MVT::v8i8,  Legal);
321    setOperationAction(ISD::ADD,                MVT::v4i16, Legal);
322    setOperationAction(ISD::ADD,                MVT::v2i32, Legal);
323
324    setOperationAction(ISD::SUB,                MVT::v8i8,  Legal);
325    setOperationAction(ISD::SUB,                MVT::v4i16, Legal);
326    setOperationAction(ISD::SUB,                MVT::v2i32, Legal);
327
328    setOperationAction(ISD::MULHS,              MVT::v4i16, Legal);
329    setOperationAction(ISD::MUL,                MVT::v4i16, Legal);
330
331    setOperationAction(ISD::AND,                MVT::v8i8,  Promote);
332    AddPromotedToType (ISD::AND,                MVT::v8i8,  MVT::v2i32);
333    setOperationAction(ISD::AND,                MVT::v4i16, Promote);
334    AddPromotedToType (ISD::AND,                MVT::v4i16, MVT::v2i32);
335    setOperationAction(ISD::AND,                MVT::v2i32, Legal);
336
337    setOperationAction(ISD::OR,                 MVT::v8i8,  Promote);
338    AddPromotedToType (ISD::OR,                 MVT::v8i8,  MVT::v2i32);
339    setOperationAction(ISD::OR,                 MVT::v4i16, Promote);
340    AddPromotedToType (ISD::OR,                 MVT::v4i16, MVT::v2i32);
341    setOperationAction(ISD::OR,                 MVT::v2i32, Legal);
342
343    setOperationAction(ISD::XOR,                MVT::v8i8,  Promote);
344    AddPromotedToType (ISD::XOR,                MVT::v8i8,  MVT::v2i32);
345    setOperationAction(ISD::XOR,                MVT::v4i16, Promote);
346    AddPromotedToType (ISD::XOR,                MVT::v4i16, MVT::v2i32);
347    setOperationAction(ISD::XOR,                MVT::v2i32, Legal);
348
349    setOperationAction(ISD::LOAD,               MVT::v8i8,  Promote);
350    AddPromotedToType (ISD::LOAD,               MVT::v8i8,  MVT::v2i32);
351    setOperationAction(ISD::LOAD,               MVT::v4i16, Promote);
352    AddPromotedToType (ISD::LOAD,               MVT::v4i16, MVT::v2i32);
353    setOperationAction(ISD::LOAD,               MVT::v2i32, Legal);
354
355    setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i8,  Expand);
356    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i16, Expand);
357    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i32, Expand);
358  }
359
360  if (Subtarget->hasSSE1()) {
361    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
362
363    setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
364    setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
365    setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
366    setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
367    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
368    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
369    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
370    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
371    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
372  }
373
374  if (Subtarget->hasSSE2()) {
375    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
376    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
377    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
378    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
379    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
380
381    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
382    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
383    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
384    setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
385    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
386    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
387    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
388    setOperationAction(ISD::SUB,                MVT::v2i64, Legal);
389    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
390    setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
391    setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
392    setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
393    setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
394
395    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
396    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
397    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
398    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
399    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
400    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
401
402    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
403    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
404      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
405      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
406      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
407    }
408    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
409    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
410    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
411    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
412    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
413    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
414
415    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
416    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
417      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
418      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
419      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
420      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
421      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
422      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
423      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
424      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
425      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
426      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
427    }
428
429    // Custom lower v2i64 and v2f64 selects.
430    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
431    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
432    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
433    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
434  }
435
436  // We want to custom lower some of our intrinsics.
437  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
438
439  // We have target-specific dag combine patterns for the following nodes:
440  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
441  setTargetDAGCombine(ISD::SELECT);
442
443  computeRegisterProperties();
444
445  // FIXME: These should be based on subtarget info. Plus, the values should
446  // be smaller when we are in optimizing for size mode.
447  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
448  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
449  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
450  allowUnalignedMemoryAccesses = true; // x86 supports it!
451}
452
453
454//===----------------------------------------------------------------------===//
455//               Return Value Calling Convention Implementation
456//===----------------------------------------------------------------------===//
457
458#include "X86GenCallingConv.inc"
459
460/// LowerRET - Lower an ISD::RET node.
461SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
462  assert((Op.getNumOperands() & 1) == 1 && "ISD::RET should have odd # args");
463
464  SmallVector<CCValAssign, 16> RVLocs;
465  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
466  CCState CCInfo(CC, getTargetMachine(), RVLocs);
467  CCInfo.AnalyzeReturn(Op.Val, RetCC_X86);
468
469
470  // If this is the first return lowered for this function, add the regs to the
471  // liveout set for the function.
472  if (DAG.getMachineFunction().liveout_empty()) {
473    for (unsigned i = 0; i != RVLocs.size(); ++i)
474      if (RVLocs[i].isRegLoc())
475        DAG.getMachineFunction().addLiveOut(RVLocs[i].getLocReg());
476  }
477
478  SDOperand Chain = Op.getOperand(0);
479  SDOperand Flag;
480
481  // Copy the result values into the output registers.
482  if (RVLocs.size() != 1 || !RVLocs[0].isRegLoc() ||
483      RVLocs[0].getLocReg() != X86::ST0) {
484    for (unsigned i = 0; i != RVLocs.size(); ++i) {
485      CCValAssign &VA = RVLocs[i];
486      assert(VA.isRegLoc() && "Can only return in registers!");
487      Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1),
488                               Flag);
489      Flag = Chain.getValue(1);
490    }
491  } else {
492    // We need to handle a destination of ST0 specially, because it isn't really
493    // a register.
494    SDOperand Value = Op.getOperand(1);
495
496    // If this is an FP return with ScalarSSE, we need to move the value from
497    // an XMM register onto the fp-stack.
498    if (X86ScalarSSE) {
499      SDOperand MemLoc;
500
501      // If this is a load into a scalarsse value, don't store the loaded value
502      // back to the stack, only to reload it: just replace the scalar-sse load.
503      if (ISD::isNON_EXTLoad(Value.Val) &&
504          (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
505        Chain  = Value.getOperand(0);
506        MemLoc = Value.getOperand(1);
507      } else {
508        // Spill the value to memory and reload it into top of stack.
509        unsigned Size = MVT::getSizeInBits(RVLocs[0].getValVT())/8;
510        MachineFunction &MF = DAG.getMachineFunction();
511        int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
512        MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
513        Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
514      }
515      SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
516      SDOperand Ops[] = {Chain, MemLoc, DAG.getValueType(RVLocs[0].getValVT())};
517      Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
518      Chain = Value.getValue(1);
519    }
520
521    SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
522    SDOperand Ops[] = { Chain, Value };
523    Chain = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops, 2);
524    Flag = Chain.getValue(1);
525  }
526
527  SDOperand BytesToPop = DAG.getConstant(getBytesToPopOnReturn(), MVT::i16);
528  if (Flag.Val)
529    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop, Flag);
530  else
531    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Chain, BytesToPop);
532}
533
534
535/// LowerCallResult - Lower the result values of an ISD::CALL into the
536/// appropriate copies out of appropriate physical registers.  This assumes that
537/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
538/// being lowered.  The returns a SDNode with the same number of values as the
539/// ISD::CALL.
540SDNode *X86TargetLowering::
541LowerCallResult(SDOperand Chain, SDOperand InFlag, SDNode *TheCall,
542                unsigned CallingConv, SelectionDAG &DAG) {
543
544  // Assign locations to each value returned by this call.
545  SmallVector<CCValAssign, 16> RVLocs;
546  CCState CCInfo(CallingConv, getTargetMachine(), RVLocs);
547  CCInfo.AnalyzeCallResult(TheCall, RetCC_X86);
548
549
550  SmallVector<SDOperand, 8> ResultVals;
551
552  // Copy all of the result registers out of their specified physreg.
553  if (RVLocs.size() != 1 || RVLocs[0].getLocReg() != X86::ST0) {
554    for (unsigned i = 0; i != RVLocs.size(); ++i) {
555      Chain = DAG.getCopyFromReg(Chain, RVLocs[i].getLocReg(),
556                                 RVLocs[i].getValVT(), InFlag).getValue(1);
557      InFlag = Chain.getValue(2);
558      ResultVals.push_back(Chain.getValue(0));
559    }
560  } else {
561    // Copies from the FP stack are special, as ST0 isn't a valid register
562    // before the fp stackifier runs.
563
564    // Copy ST0 into an RFP register with FP_GET_RESULT.
565    SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
566    SDOperand GROps[] = { Chain, InFlag };
567    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, GROps, 2);
568    Chain  = RetVal.getValue(1);
569    InFlag = RetVal.getValue(2);
570
571    // If we are using ScalarSSE, store ST(0) to the stack and reload it into
572    // an XMM register.
573    if (X86ScalarSSE) {
574      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
575      // shouldn't be necessary except that RFP cannot be live across
576      // multiple blocks. When stackifier is fixed, they can be uncoupled.
577      MachineFunction &MF = DAG.getMachineFunction();
578      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
579      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
580      SDOperand Ops[] = {
581        Chain, RetVal, StackSlot, DAG.getValueType(RVLocs[0].getValVT()), InFlag
582      };
583      Chain = DAG.getNode(X86ISD::FST, MVT::Other, Ops, 5);
584      RetVal = DAG.getLoad(RVLocs[0].getValVT(), Chain, StackSlot, NULL, 0);
585      Chain = RetVal.getValue(1);
586    }
587
588    if (RVLocs[0].getValVT() == MVT::f32 && !X86ScalarSSE)
589      // FIXME: we would really like to remember that this FP_ROUND
590      // operation is okay to eliminate if we allow excess FP precision.
591      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
592    ResultVals.push_back(RetVal);
593  }
594
595  // Merge everything together with a MERGE_VALUES node.
596  ResultVals.push_back(Chain);
597  return DAG.getNode(ISD::MERGE_VALUES, TheCall->getVTList(),
598                     &ResultVals[0], ResultVals.size()).Val;
599}
600
601
602//===----------------------------------------------------------------------===//
603//                C & StdCall Calling Convention implementation
604//===----------------------------------------------------------------------===//
605//  StdCall calling convention seems to be standard for many Windows' API
606//  routines and around. It differs from C calling convention just a little:
607//  callee should clean up the stack, not caller. Symbols should be also
608//  decorated in some fancy way :) It doesn't support any vector arguments.
609
610/// AddLiveIn - This helper function adds the specified physical register to the
611/// MachineFunction as a live in value.  It also creates a corresponding virtual
612/// register for it.
613static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
614                          const TargetRegisterClass *RC) {
615  assert(RC->contains(PReg) && "Not the correct regclass!");
616  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
617  MF.addLiveIn(PReg, VReg);
618  return VReg;
619}
620
621SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG,
622                                               bool isStdCall) {
623  unsigned NumArgs = Op.Val->getNumValues() - 1;
624  MachineFunction &MF = DAG.getMachineFunction();
625  MachineFrameInfo *MFI = MF.getFrameInfo();
626  SDOperand Root = Op.getOperand(0);
627  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
628
629  // Assign locations to all of the incoming arguments.
630  SmallVector<CCValAssign, 16> ArgLocs;
631  CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(),
632                 ArgLocs);
633  CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_C);
634
635  SmallVector<SDOperand, 8> ArgValues;
636  unsigned LastVal = ~0U;
637  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
638    CCValAssign &VA = ArgLocs[i];
639    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
640    // places.
641    assert(VA.getValNo() != LastVal &&
642           "Don't support value assigned to multiple locs yet");
643    LastVal = VA.getValNo();
644
645    if (VA.isRegLoc()) {
646      MVT::ValueType RegVT = VA.getLocVT();
647      TargetRegisterClass *RC;
648      if (RegVT == MVT::i32)
649        RC = X86::GR32RegisterClass;
650      else {
651        assert(MVT::isVector(RegVT));
652        RC = X86::VR128RegisterClass;
653      }
654
655      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
656      SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
657
658      // If this is an 8 or 16-bit value, it is really passed promoted to 32
659      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
660      // right size.
661      if (VA.getLocInfo() == CCValAssign::SExt)
662        ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
663                               DAG.getValueType(VA.getValVT()));
664      else if (VA.getLocInfo() == CCValAssign::ZExt)
665        ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
666                               DAG.getValueType(VA.getValVT()));
667
668      if (VA.getLocInfo() != CCValAssign::Full)
669        ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
670
671      ArgValues.push_back(ArgValue);
672    } else {
673      assert(VA.isMemLoc());
674
675      // Create the nodes corresponding to a load from this parameter slot.
676      int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
677                                      VA.getLocMemOffset());
678      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
679      ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
680    }
681  }
682
683  unsigned StackSize = CCInfo.getNextStackOffset();
684
685  ArgValues.push_back(Root);
686
687  // If the function takes variable number of arguments, make a frame index for
688  // the start of the first vararg value... for expansion of llvm.va_start.
689  if (isVarArg)
690    VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
691
692  if (isStdCall && !isVarArg) {
693    BytesToPopOnReturn  = StackSize;    // Callee pops everything..
694    BytesCallerReserves = 0;
695  } else {
696    BytesToPopOnReturn  = 0; // Callee pops nothing.
697
698    // If this is an sret function, the return should pop the hidden pointer.
699    if (NumArgs &&
700        (cast<ConstantSDNode>(Op.getOperand(3))->getValue() &
701         ISD::ParamFlags::StructReturn))
702      BytesToPopOnReturn = 4;
703
704    BytesCallerReserves = StackSize;
705  }
706
707  RegSaveFrameIndex = 0xAAAAAAA;  // X86-64 only.
708  ReturnAddrIndex = 0;            // No return address slot generated yet.
709
710  MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
711
712  // Return the new list of results.
713  return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
714                     &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
715}
716
717SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG,
718                                            unsigned CC) {
719  SDOperand Chain     = Op.getOperand(0);
720  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
721  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
722  SDOperand Callee    = Op.getOperand(4);
723  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
724
725  // Analyze operands of the call, assigning locations to each operand.
726  SmallVector<CCValAssign, 16> ArgLocs;
727  CCState CCInfo(CC, getTargetMachine(), ArgLocs);
728  CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_C);
729
730  // Get a count of how many bytes are to be pushed on the stack.
731  unsigned NumBytes = CCInfo.getNextStackOffset();
732
733  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
734
735  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
736  SmallVector<SDOperand, 8> MemOpChains;
737
738  SDOperand StackPtr;
739
740  // Walk the register/memloc assignments, inserting copies/loads.
741  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
742    CCValAssign &VA = ArgLocs[i];
743    SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
744
745    // Promote the value if needed.
746    switch (VA.getLocInfo()) {
747    default: assert(0 && "Unknown loc info!");
748    case CCValAssign::Full: break;
749    case CCValAssign::SExt:
750      Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
751      break;
752    case CCValAssign::ZExt:
753      Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
754      break;
755    case CCValAssign::AExt:
756      Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
757      break;
758    }
759
760    if (VA.isRegLoc()) {
761      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
762    } else {
763      assert(VA.isMemLoc());
764      if (StackPtr.Val == 0)
765        StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
766      SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
767      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
768      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
769    }
770  }
771
772  // If the first argument is an sret pointer, remember it.
773  bool isSRet = NumOps &&
774    (cast<ConstantSDNode>(Op.getOperand(6))->getValue() &
775     ISD::ParamFlags::StructReturn);
776
777  if (!MemOpChains.empty())
778    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
779                        &MemOpChains[0], MemOpChains.size());
780
781  // Build a sequence of copy-to-reg nodes chained together with token chain
782  // and flag operands which copy the outgoing args into registers.
783  SDOperand InFlag;
784  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
785    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
786                             InFlag);
787    InFlag = Chain.getValue(1);
788  }
789
790  // ELF / PIC requires GOT in the EBX register before function calls via PLT
791  // GOT pointer.
792  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
793      Subtarget->isPICStyleGOT()) {
794    Chain = DAG.getCopyToReg(Chain, X86::EBX,
795                             DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
796                             InFlag);
797    InFlag = Chain.getValue(1);
798  }
799
800  // If the callee is a GlobalAddress node (quite common, every direct call is)
801  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
802  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
803    // We should use extra load for direct calls to dllimported functions in
804    // non-JIT mode.
805    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
806                                        getTargetMachine(), true))
807      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
808  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
809    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
810
811  // Returns a chain & a flag for retval copy to use.
812  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
813  SmallVector<SDOperand, 8> Ops;
814  Ops.push_back(Chain);
815  Ops.push_back(Callee);
816
817  // Add argument registers to the end of the list so that they are known live
818  // into the call.
819  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
820    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
821                                  RegsToPass[i].second.getValueType()));
822
823  // Add an implicit use GOT pointer in EBX.
824  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
825      Subtarget->isPICStyleGOT())
826    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
827
828  if (InFlag.Val)
829    Ops.push_back(InFlag);
830
831  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
832                      NodeTys, &Ops[0], Ops.size());
833  InFlag = Chain.getValue(1);
834
835  // Create the CALLSEQ_END node.
836  unsigned NumBytesForCalleeToPush = 0;
837
838  if (CC == CallingConv::X86_StdCall) {
839    if (isVarArg)
840      NumBytesForCalleeToPush = isSRet ? 4 : 0;
841    else
842      NumBytesForCalleeToPush = NumBytes;
843  } else {
844    // If this is is a call to a struct-return function, the callee
845    // pops the hidden struct pointer, so we have to push it back.
846    // This is common for Darwin/X86, Linux & Mingw32 targets.
847    NumBytesForCalleeToPush = isSRet ? 4 : 0;
848  }
849
850  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
851  Ops.clear();
852  Ops.push_back(Chain);
853  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
854  Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
855  Ops.push_back(InFlag);
856  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
857  InFlag = Chain.getValue(1);
858
859  // Handle result values, copying them out of physregs into vregs that we
860  // return.
861  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
862}
863
864
865//===----------------------------------------------------------------------===//
866//                   FastCall Calling Convention implementation
867//===----------------------------------------------------------------------===//
868//
869// The X86 'fastcall' calling convention passes up to two integer arguments in
870// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
871// and requires that the callee pop its arguments off the stack (allowing proper
872// tail calls), and has the same return value conventions as C calling convs.
873//
874// This calling convention always arranges for the callee pop value to be 8n+4
875// bytes, which is needed for tail recursion elimination and stack alignment
876// reasons.
877SDOperand
878X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
879  MachineFunction &MF = DAG.getMachineFunction();
880  MachineFrameInfo *MFI = MF.getFrameInfo();
881  SDOperand Root = Op.getOperand(0);
882
883  // Assign locations to all of the incoming arguments.
884  SmallVector<CCValAssign, 16> ArgLocs;
885  CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(),
886                 ArgLocs);
887  CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_32_FastCall);
888
889  SmallVector<SDOperand, 8> ArgValues;
890  unsigned LastVal = ~0U;
891  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
892    CCValAssign &VA = ArgLocs[i];
893    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
894    // places.
895    assert(VA.getValNo() != LastVal &&
896           "Don't support value assigned to multiple locs yet");
897    LastVal = VA.getValNo();
898
899    if (VA.isRegLoc()) {
900      MVT::ValueType RegVT = VA.getLocVT();
901      TargetRegisterClass *RC;
902      if (RegVT == MVT::i32)
903        RC = X86::GR32RegisterClass;
904      else {
905        assert(MVT::isVector(RegVT));
906        RC = X86::VR128RegisterClass;
907      }
908
909      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
910      SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
911
912      // If this is an 8 or 16-bit value, it is really passed promoted to 32
913      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
914      // right size.
915      if (VA.getLocInfo() == CCValAssign::SExt)
916        ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
917                               DAG.getValueType(VA.getValVT()));
918      else if (VA.getLocInfo() == CCValAssign::ZExt)
919        ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
920                               DAG.getValueType(VA.getValVT()));
921
922      if (VA.getLocInfo() != CCValAssign::Full)
923        ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
924
925      ArgValues.push_back(ArgValue);
926    } else {
927      assert(VA.isMemLoc());
928
929      // Create the nodes corresponding to a load from this parameter slot.
930      int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
931                                      VA.getLocMemOffset());
932      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
933      ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
934    }
935  }
936
937  ArgValues.push_back(Root);
938
939  unsigned StackSize = CCInfo.getNextStackOffset();
940
941  if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
942    // Make sure the instruction takes 8n+4 bytes to make sure the start of the
943    // arguments and the arguments after the retaddr has been pushed are aligned.
944    if ((StackSize & 7) == 0)
945      StackSize += 4;
946  }
947
948  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
949  RegSaveFrameIndex = 0xAAAAAAA;   // X86-64 only.
950  ReturnAddrIndex = 0;             // No return address slot generated yet.
951  BytesToPopOnReturn = StackSize;  // Callee pops all stack arguments.
952  BytesCallerReserves = 0;
953
954  MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
955
956  // Return the new list of results.
957  return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
958                     &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
959}
960
961SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
962                                               unsigned CC) {
963  SDOperand Chain     = Op.getOperand(0);
964  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
965  SDOperand Callee    = Op.getOperand(4);
966
967  // Analyze operands of the call, assigning locations to each operand.
968  SmallVector<CCValAssign, 16> ArgLocs;
969  CCState CCInfo(CC, getTargetMachine(), ArgLocs);
970  CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_32_FastCall);
971
972  // Get a count of how many bytes are to be pushed on the stack.
973  unsigned NumBytes = CCInfo.getNextStackOffset();
974
975  if (!Subtarget->isTargetCygMing() && !Subtarget->isTargetWindows()) {
976    // Make sure the instruction takes 8n+4 bytes to make sure the start of the
977    // arguments and the arguments after the retaddr has been pushed are aligned.
978    if ((NumBytes & 7) == 0)
979      NumBytes += 4;
980  }
981
982  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
983
984  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
985  SmallVector<SDOperand, 8> MemOpChains;
986
987  SDOperand StackPtr;
988
989  // Walk the register/memloc assignments, inserting copies/loads.
990  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
991    CCValAssign &VA = ArgLocs[i];
992    SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
993
994    // Promote the value if needed.
995    switch (VA.getLocInfo()) {
996      default: assert(0 && "Unknown loc info!");
997      case CCValAssign::Full: break;
998      case CCValAssign::SExt:
999        Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
1000        break;
1001      case CCValAssign::ZExt:
1002        Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
1003        break;
1004      case CCValAssign::AExt:
1005        Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
1006        break;
1007    }
1008
1009    if (VA.isRegLoc()) {
1010      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1011    } else {
1012      assert(VA.isMemLoc());
1013      if (StackPtr.Val == 0)
1014        StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
1015      SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
1016      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1017      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1018    }
1019  }
1020
1021  if (!MemOpChains.empty())
1022    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1023                        &MemOpChains[0], MemOpChains.size());
1024
1025  // Build a sequence of copy-to-reg nodes chained together with token chain
1026  // and flag operands which copy the outgoing args into registers.
1027  SDOperand InFlag;
1028  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1029    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1030                             InFlag);
1031    InFlag = Chain.getValue(1);
1032  }
1033
1034  // If the callee is a GlobalAddress node (quite common, every direct call is)
1035  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1036  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1037    // We should use extra load for direct calls to dllimported functions in
1038    // non-JIT mode.
1039    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(),
1040                                        getTargetMachine(), true))
1041      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1042  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1043    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1044
1045  // ELF / PIC requires GOT in the EBX register before function calls via PLT
1046  // GOT pointer.
1047  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1048      Subtarget->isPICStyleGOT()) {
1049    Chain = DAG.getCopyToReg(Chain, X86::EBX,
1050                             DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
1051                             InFlag);
1052    InFlag = Chain.getValue(1);
1053  }
1054
1055  // Returns a chain & a flag for retval copy to use.
1056  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1057  SmallVector<SDOperand, 8> Ops;
1058  Ops.push_back(Chain);
1059  Ops.push_back(Callee);
1060
1061  // Add argument registers to the end of the list so that they are known live
1062  // into the call.
1063  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1064    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1065                                  RegsToPass[i].second.getValueType()));
1066
1067  // Add an implicit use GOT pointer in EBX.
1068  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
1069      Subtarget->isPICStyleGOT())
1070    Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy()));
1071
1072  if (InFlag.Val)
1073    Ops.push_back(InFlag);
1074
1075  // FIXME: Do not generate X86ISD::TAILCALL for now.
1076  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1077                      NodeTys, &Ops[0], Ops.size());
1078  InFlag = Chain.getValue(1);
1079
1080  // Returns a flag for retval copy to use.
1081  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1082  Ops.clear();
1083  Ops.push_back(Chain);
1084  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1085  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1086  Ops.push_back(InFlag);
1087  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1088  InFlag = Chain.getValue(1);
1089
1090  // Handle result values, copying them out of physregs into vregs that we
1091  // return.
1092  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
1093}
1094
1095
1096//===----------------------------------------------------------------------===//
1097//                 X86-64 C Calling Convention implementation
1098//===----------------------------------------------------------------------===//
1099
1100SDOperand
1101X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
1102  MachineFunction &MF = DAG.getMachineFunction();
1103  MachineFrameInfo *MFI = MF.getFrameInfo();
1104  SDOperand Root = Op.getOperand(0);
1105  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1106
1107  static const unsigned GPR64ArgRegs[] = {
1108    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
1109  };
1110  static const unsigned XMMArgRegs[] = {
1111    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1112    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1113  };
1114
1115
1116  // Assign locations to all of the incoming arguments.
1117  SmallVector<CCValAssign, 16> ArgLocs;
1118  CCState CCInfo(MF.getFunction()->getCallingConv(), getTargetMachine(),
1119                 ArgLocs);
1120  CCInfo.AnalyzeFormalArguments(Op.Val, CC_X86_64_C);
1121
1122  SmallVector<SDOperand, 8> ArgValues;
1123  unsigned LastVal = ~0U;
1124  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1125    CCValAssign &VA = ArgLocs[i];
1126    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
1127    // places.
1128    assert(VA.getValNo() != LastVal &&
1129           "Don't support value assigned to multiple locs yet");
1130    LastVal = VA.getValNo();
1131
1132    if (VA.isRegLoc()) {
1133      MVT::ValueType RegVT = VA.getLocVT();
1134      TargetRegisterClass *RC;
1135      if (RegVT == MVT::i32)
1136        RC = X86::GR32RegisterClass;
1137      else if (RegVT == MVT::i64)
1138        RC = X86::GR64RegisterClass;
1139      else if (RegVT == MVT::f32)
1140        RC = X86::FR32RegisterClass;
1141      else if (RegVT == MVT::f64)
1142        RC = X86::FR64RegisterClass;
1143      else {
1144        assert(MVT::isVector(RegVT));
1145        RC = X86::VR128RegisterClass;
1146      }
1147
1148      unsigned Reg = AddLiveIn(DAG.getMachineFunction(), VA.getLocReg(), RC);
1149      SDOperand ArgValue = DAG.getCopyFromReg(Root, Reg, RegVT);
1150
1151      // If this is an 8 or 16-bit value, it is really passed promoted to 32
1152      // bits.  Insert an assert[sz]ext to capture this, then truncate to the
1153      // right size.
1154      if (VA.getLocInfo() == CCValAssign::SExt)
1155        ArgValue = DAG.getNode(ISD::AssertSext, RegVT, ArgValue,
1156                               DAG.getValueType(VA.getValVT()));
1157      else if (VA.getLocInfo() == CCValAssign::ZExt)
1158        ArgValue = DAG.getNode(ISD::AssertZext, RegVT, ArgValue,
1159                               DAG.getValueType(VA.getValVT()));
1160
1161      if (VA.getLocInfo() != CCValAssign::Full)
1162        ArgValue = DAG.getNode(ISD::TRUNCATE, VA.getValVT(), ArgValue);
1163
1164      ArgValues.push_back(ArgValue);
1165    } else {
1166      assert(VA.isMemLoc());
1167
1168      // Create the nodes corresponding to a load from this parameter slot.
1169      int FI = MFI->CreateFixedObject(MVT::getSizeInBits(VA.getValVT())/8,
1170                                      VA.getLocMemOffset());
1171      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
1172      ArgValues.push_back(DAG.getLoad(VA.getValVT(), Root, FIN, NULL, 0));
1173    }
1174  }
1175
1176  unsigned StackSize = CCInfo.getNextStackOffset();
1177
1178  // If the function takes variable number of arguments, make a frame index for
1179  // the start of the first vararg value... for expansion of llvm.va_start.
1180  if (isVarArg) {
1181    unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 6);
1182    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
1183
1184    // For X86-64, if there are vararg parameters that are passed via
1185    // registers, then we must store them to their spots on the stack so they
1186    // may be loaded by deferencing the result of va_next.
1187    VarArgsGPOffset = NumIntRegs * 8;
1188    VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
1189    VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
1190    RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
1191
1192    // Store the integer parameter registers.
1193    SmallVector<SDOperand, 8> MemOps;
1194    SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
1195    SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
1196                              DAG.getConstant(VarArgsGPOffset, getPointerTy()));
1197    for (; NumIntRegs != 6; ++NumIntRegs) {
1198      unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
1199                                X86::GR64RegisterClass);
1200      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
1201      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1202      MemOps.push_back(Store);
1203      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1204                        DAG.getConstant(8, getPointerTy()));
1205    }
1206
1207    // Now store the XMM (fp + vector) parameter registers.
1208    FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
1209                      DAG.getConstant(VarArgsFPOffset, getPointerTy()));
1210    for (; NumXMMRegs != 8; ++NumXMMRegs) {
1211      unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
1212                                X86::VR128RegisterClass);
1213      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
1214      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1215      MemOps.push_back(Store);
1216      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1217                        DAG.getConstant(16, getPointerTy()));
1218    }
1219    if (!MemOps.empty())
1220        Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
1221                           &MemOps[0], MemOps.size());
1222  }
1223
1224  ArgValues.push_back(Root);
1225
1226  ReturnAddrIndex = 0;     // No return address slot generated yet.
1227  BytesToPopOnReturn = 0;  // Callee pops nothing.
1228  BytesCallerReserves = StackSize;
1229
1230  // Return the new list of results.
1231  return DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(),
1232                     &ArgValues[0], ArgValues.size()).getValue(Op.ResNo);
1233}
1234
1235SDOperand
1236X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG,
1237                                        unsigned CC) {
1238  SDOperand Chain     = Op.getOperand(0);
1239  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1240  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1241  SDOperand Callee    = Op.getOperand(4);
1242
1243  // Analyze operands of the call, assigning locations to each operand.
1244  SmallVector<CCValAssign, 16> ArgLocs;
1245  CCState CCInfo(CC, getTargetMachine(), ArgLocs);
1246  CCInfo.AnalyzeCallOperands(Op.Val, CC_X86_64_C);
1247
1248  // Get a count of how many bytes are to be pushed on the stack.
1249  unsigned NumBytes = CCInfo.getNextStackOffset();
1250  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1251
1252  SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
1253  SmallVector<SDOperand, 8> MemOpChains;
1254
1255  SDOperand StackPtr;
1256
1257  // Walk the register/memloc assignments, inserting copies/loads.
1258  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1259    CCValAssign &VA = ArgLocs[i];
1260    SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
1261
1262    // Promote the value if needed.
1263    switch (VA.getLocInfo()) {
1264    default: assert(0 && "Unknown loc info!");
1265    case CCValAssign::Full: break;
1266    case CCValAssign::SExt:
1267      Arg = DAG.getNode(ISD::SIGN_EXTEND, VA.getLocVT(), Arg);
1268      break;
1269    case CCValAssign::ZExt:
1270      Arg = DAG.getNode(ISD::ZERO_EXTEND, VA.getLocVT(), Arg);
1271      break;
1272    case CCValAssign::AExt:
1273      Arg = DAG.getNode(ISD::ANY_EXTEND, VA.getLocVT(), Arg);
1274      break;
1275    }
1276
1277    if (VA.isRegLoc()) {
1278      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1279    } else {
1280      assert(VA.isMemLoc());
1281      if (StackPtr.Val == 0)
1282        StackPtr = DAG.getRegister(getStackPtrReg(), getPointerTy());
1283      SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
1284      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1285      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1286    }
1287  }
1288
1289  if (!MemOpChains.empty())
1290    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1291                        &MemOpChains[0], MemOpChains.size());
1292
1293  // Build a sequence of copy-to-reg nodes chained together with token chain
1294  // and flag operands which copy the outgoing args into registers.
1295  SDOperand InFlag;
1296  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1297    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1298                             InFlag);
1299    InFlag = Chain.getValue(1);
1300  }
1301
1302  if (isVarArg) {
1303    // From AMD64 ABI document:
1304    // For calls that may call functions that use varargs or stdargs
1305    // (prototype-less calls or calls to functions containing ellipsis (...) in
1306    // the declaration) %al is used as hidden argument to specify the number
1307    // of SSE registers used. The contents of %al do not need to match exactly
1308    // the number of registers, but must be an ubound on the number of SSE
1309    // registers used and is in the range 0 - 8 inclusive.
1310
1311    // Count the number of XMM registers allocated.
1312    static const unsigned XMMArgRegs[] = {
1313      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1314      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1315    };
1316    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
1317
1318    Chain = DAG.getCopyToReg(Chain, X86::AL,
1319                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
1320    InFlag = Chain.getValue(1);
1321  }
1322
1323  // If the callee is a GlobalAddress node (quite common, every direct call is)
1324  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1325  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1326    // We should use extra load for direct calls to dllimported functions in
1327    // non-JIT mode.
1328    if (getTargetMachine().getCodeModel() != CodeModel::Large
1329	&& !Subtarget->GVRequiresExtraLoad(G->getGlobal(),
1330					   getTargetMachine(), true))
1331      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1332  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1333    if (getTargetMachine().getCodeModel() != CodeModel::Large)
1334      Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1335
1336  // Returns a chain & a flag for retval copy to use.
1337  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1338  SmallVector<SDOperand, 8> Ops;
1339  Ops.push_back(Chain);
1340  Ops.push_back(Callee);
1341
1342  // Add argument registers to the end of the list so that they are known live
1343  // into the call.
1344  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1345    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1346                                  RegsToPass[i].second.getValueType()));
1347
1348  if (InFlag.Val)
1349    Ops.push_back(InFlag);
1350
1351  // FIXME: Do not generate X86ISD::TAILCALL for now.
1352  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1353                      NodeTys, &Ops[0], Ops.size());
1354  InFlag = Chain.getValue(1);
1355
1356  // Returns a flag for retval copy to use.
1357  NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
1358  Ops.clear();
1359  Ops.push_back(Chain);
1360  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1361  Ops.push_back(DAG.getConstant(0, getPointerTy()));
1362  Ops.push_back(InFlag);
1363  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1364  InFlag = Chain.getValue(1);
1365
1366  // Handle result values, copying them out of physregs into vregs that we
1367  // return.
1368  return SDOperand(LowerCallResult(Chain, InFlag, Op.Val, CC, DAG), Op.ResNo);
1369}
1370
1371
1372//===----------------------------------------------------------------------===//
1373//                           Other Lowering Hooks
1374//===----------------------------------------------------------------------===//
1375
1376
1377SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
1378  if (ReturnAddrIndex == 0) {
1379    // Set up a frame object for the return address.
1380    MachineFunction &MF = DAG.getMachineFunction();
1381    if (Subtarget->is64Bit())
1382      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
1383    else
1384      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1385  }
1386
1387  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
1388}
1389
1390
1391
1392/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1393/// specific condition code. It returns a false if it cannot do a direct
1394/// translation. X86CC is the translated CondCode.  LHS/RHS are modified as
1395/// needed.
1396static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
1397                           unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
1398                           SelectionDAG &DAG) {
1399  X86CC = X86::COND_INVALID;
1400  if (!isFP) {
1401    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
1402      if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
1403        // X > -1   -> X == 0, jump !sign.
1404        RHS = DAG.getConstant(0, RHS.getValueType());
1405        X86CC = X86::COND_NS;
1406        return true;
1407      } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
1408        // X < 0   -> X == 0, jump on sign.
1409        X86CC = X86::COND_S;
1410        return true;
1411      }
1412    }
1413
1414    switch (SetCCOpcode) {
1415    default: break;
1416    case ISD::SETEQ:  X86CC = X86::COND_E;  break;
1417    case ISD::SETGT:  X86CC = X86::COND_G;  break;
1418    case ISD::SETGE:  X86CC = X86::COND_GE; break;
1419    case ISD::SETLT:  X86CC = X86::COND_L;  break;
1420    case ISD::SETLE:  X86CC = X86::COND_LE; break;
1421    case ISD::SETNE:  X86CC = X86::COND_NE; break;
1422    case ISD::SETULT: X86CC = X86::COND_B;  break;
1423    case ISD::SETUGT: X86CC = X86::COND_A;  break;
1424    case ISD::SETULE: X86CC = X86::COND_BE; break;
1425    case ISD::SETUGE: X86CC = X86::COND_AE; break;
1426    }
1427  } else {
1428    // On a floating point condition, the flags are set as follows:
1429    // ZF  PF  CF   op
1430    //  0 | 0 | 0 | X > Y
1431    //  0 | 0 | 1 | X < Y
1432    //  1 | 0 | 0 | X == Y
1433    //  1 | 1 | 1 | unordered
1434    bool Flip = false;
1435    switch (SetCCOpcode) {
1436    default: break;
1437    case ISD::SETUEQ:
1438    case ISD::SETEQ: X86CC = X86::COND_E;  break;
1439    case ISD::SETOLT: Flip = true; // Fallthrough
1440    case ISD::SETOGT:
1441    case ISD::SETGT: X86CC = X86::COND_A;  break;
1442    case ISD::SETOLE: Flip = true; // Fallthrough
1443    case ISD::SETOGE:
1444    case ISD::SETGE: X86CC = X86::COND_AE; break;
1445    case ISD::SETUGT: Flip = true; // Fallthrough
1446    case ISD::SETULT:
1447    case ISD::SETLT: X86CC = X86::COND_B;  break;
1448    case ISD::SETUGE: Flip = true; // Fallthrough
1449    case ISD::SETULE:
1450    case ISD::SETLE: X86CC = X86::COND_BE; break;
1451    case ISD::SETONE:
1452    case ISD::SETNE: X86CC = X86::COND_NE; break;
1453    case ISD::SETUO: X86CC = X86::COND_P;  break;
1454    case ISD::SETO:  X86CC = X86::COND_NP; break;
1455    }
1456    if (Flip)
1457      std::swap(LHS, RHS);
1458  }
1459
1460  return X86CC != X86::COND_INVALID;
1461}
1462
1463/// hasFPCMov - is there a floating point cmov for the specific X86 condition
1464/// code. Current x86 isa includes the following FP cmov instructions:
1465/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1466static bool hasFPCMov(unsigned X86CC) {
1467  switch (X86CC) {
1468  default:
1469    return false;
1470  case X86::COND_B:
1471  case X86::COND_BE:
1472  case X86::COND_E:
1473  case X86::COND_P:
1474  case X86::COND_A:
1475  case X86::COND_AE:
1476  case X86::COND_NE:
1477  case X86::COND_NP:
1478    return true;
1479  }
1480}
1481
1482/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
1483/// true if Op is undef or if its value falls within the specified range (L, H].
1484static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
1485  if (Op.getOpcode() == ISD::UNDEF)
1486    return true;
1487
1488  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
1489  return (Val >= Low && Val < Hi);
1490}
1491
1492/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
1493/// true if Op is undef or if its value equal to the specified value.
1494static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
1495  if (Op.getOpcode() == ISD::UNDEF)
1496    return true;
1497  return cast<ConstantSDNode>(Op)->getValue() == Val;
1498}
1499
1500/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1501/// specifies a shuffle of elements that is suitable for input to PSHUFD.
1502bool X86::isPSHUFDMask(SDNode *N) {
1503  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1504
1505  if (N->getNumOperands() != 4)
1506    return false;
1507
1508  // Check if the value doesn't reference the second vector.
1509  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1510    SDOperand Arg = N->getOperand(i);
1511    if (Arg.getOpcode() == ISD::UNDEF) continue;
1512    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1513    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
1514      return false;
1515  }
1516
1517  return true;
1518}
1519
1520/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
1521/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
1522bool X86::isPSHUFHWMask(SDNode *N) {
1523  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1524
1525  if (N->getNumOperands() != 8)
1526    return false;
1527
1528  // Lower quadword copied in order.
1529  for (unsigned i = 0; i != 4; ++i) {
1530    SDOperand Arg = N->getOperand(i);
1531    if (Arg.getOpcode() == ISD::UNDEF) continue;
1532    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1533    if (cast<ConstantSDNode>(Arg)->getValue() != i)
1534      return false;
1535  }
1536
1537  // Upper quadword shuffled.
1538  for (unsigned i = 4; i != 8; ++i) {
1539    SDOperand Arg = N->getOperand(i);
1540    if (Arg.getOpcode() == ISD::UNDEF) continue;
1541    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1542    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1543    if (Val < 4 || Val > 7)
1544      return false;
1545  }
1546
1547  return true;
1548}
1549
1550/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
1551/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
1552bool X86::isPSHUFLWMask(SDNode *N) {
1553  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1554
1555  if (N->getNumOperands() != 8)
1556    return false;
1557
1558  // Upper quadword copied in order.
1559  for (unsigned i = 4; i != 8; ++i)
1560    if (!isUndefOrEqual(N->getOperand(i), i))
1561      return false;
1562
1563  // Lower quadword shuffled.
1564  for (unsigned i = 0; i != 4; ++i)
1565    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
1566      return false;
1567
1568  return true;
1569}
1570
1571/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
1572/// specifies a shuffle of elements that is suitable for input to SHUFP*.
1573static bool isSHUFPMask(const SDOperand *Elems, unsigned NumElems) {
1574  if (NumElems != 2 && NumElems != 4) return false;
1575
1576  unsigned Half = NumElems / 2;
1577  for (unsigned i = 0; i < Half; ++i)
1578    if (!isUndefOrInRange(Elems[i], 0, NumElems))
1579      return false;
1580  for (unsigned i = Half; i < NumElems; ++i)
1581    if (!isUndefOrInRange(Elems[i], NumElems, NumElems*2))
1582      return false;
1583
1584  return true;
1585}
1586
1587bool X86::isSHUFPMask(SDNode *N) {
1588  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1589  return ::isSHUFPMask(N->op_begin(), N->getNumOperands());
1590}
1591
1592/// isCommutedSHUFP - Returns true if the shuffle mask is except
1593/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
1594/// half elements to come from vector 1 (which would equal the dest.) and
1595/// the upper half to come from vector 2.
1596static bool isCommutedSHUFP(const SDOperand *Ops, unsigned NumOps) {
1597  if (NumOps != 2 && NumOps != 4) return false;
1598
1599  unsigned Half = NumOps / 2;
1600  for (unsigned i = 0; i < Half; ++i)
1601    if (!isUndefOrInRange(Ops[i], NumOps, NumOps*2))
1602      return false;
1603  for (unsigned i = Half; i < NumOps; ++i)
1604    if (!isUndefOrInRange(Ops[i], 0, NumOps))
1605      return false;
1606  return true;
1607}
1608
1609static bool isCommutedSHUFP(SDNode *N) {
1610  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1611  return isCommutedSHUFP(N->op_begin(), N->getNumOperands());
1612}
1613
1614/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
1615/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
1616bool X86::isMOVHLPSMask(SDNode *N) {
1617  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1618
1619  if (N->getNumOperands() != 4)
1620    return false;
1621
1622  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
1623  return isUndefOrEqual(N->getOperand(0), 6) &&
1624         isUndefOrEqual(N->getOperand(1), 7) &&
1625         isUndefOrEqual(N->getOperand(2), 2) &&
1626         isUndefOrEqual(N->getOperand(3), 3);
1627}
1628
1629/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
1630/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
1631/// <2, 3, 2, 3>
1632bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
1633  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1634
1635  if (N->getNumOperands() != 4)
1636    return false;
1637
1638  // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
1639  return isUndefOrEqual(N->getOperand(0), 2) &&
1640         isUndefOrEqual(N->getOperand(1), 3) &&
1641         isUndefOrEqual(N->getOperand(2), 2) &&
1642         isUndefOrEqual(N->getOperand(3), 3);
1643}
1644
1645/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
1646/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
1647bool X86::isMOVLPMask(SDNode *N) {
1648  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1649
1650  unsigned NumElems = N->getNumOperands();
1651  if (NumElems != 2 && NumElems != 4)
1652    return false;
1653
1654  for (unsigned i = 0; i < NumElems/2; ++i)
1655    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
1656      return false;
1657
1658  for (unsigned i = NumElems/2; i < NumElems; ++i)
1659    if (!isUndefOrEqual(N->getOperand(i), i))
1660      return false;
1661
1662  return true;
1663}
1664
1665/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
1666/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
1667/// and MOVLHPS.
1668bool X86::isMOVHPMask(SDNode *N) {
1669  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1670
1671  unsigned NumElems = N->getNumOperands();
1672  if (NumElems != 2 && NumElems != 4)
1673    return false;
1674
1675  for (unsigned i = 0; i < NumElems/2; ++i)
1676    if (!isUndefOrEqual(N->getOperand(i), i))
1677      return false;
1678
1679  for (unsigned i = 0; i < NumElems/2; ++i) {
1680    SDOperand Arg = N->getOperand(i + NumElems/2);
1681    if (!isUndefOrEqual(Arg, i + NumElems))
1682      return false;
1683  }
1684
1685  return true;
1686}
1687
1688/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
1689/// specifies a shuffle of elements that is suitable for input to UNPCKL.
1690bool static isUNPCKLMask(const SDOperand *Elts, unsigned NumElts,
1691                         bool V2IsSplat = false) {
1692  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
1693    return false;
1694
1695  for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
1696    SDOperand BitI  = Elts[i];
1697    SDOperand BitI1 = Elts[i+1];
1698    if (!isUndefOrEqual(BitI, j))
1699      return false;
1700    if (V2IsSplat) {
1701      if (isUndefOrEqual(BitI1, NumElts))
1702        return false;
1703    } else {
1704      if (!isUndefOrEqual(BitI1, j + NumElts))
1705        return false;
1706    }
1707  }
1708
1709  return true;
1710}
1711
1712bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
1713  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1714  return ::isUNPCKLMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
1715}
1716
1717/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
1718/// specifies a shuffle of elements that is suitable for input to UNPCKH.
1719bool static isUNPCKHMask(const SDOperand *Elts, unsigned NumElts,
1720                         bool V2IsSplat = false) {
1721  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
1722    return false;
1723
1724  for (unsigned i = 0, j = 0; i != NumElts; i += 2, ++j) {
1725    SDOperand BitI  = Elts[i];
1726    SDOperand BitI1 = Elts[i+1];
1727    if (!isUndefOrEqual(BitI, j + NumElts/2))
1728      return false;
1729    if (V2IsSplat) {
1730      if (isUndefOrEqual(BitI1, NumElts))
1731        return false;
1732    } else {
1733      if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts))
1734        return false;
1735    }
1736  }
1737
1738  return true;
1739}
1740
1741bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
1742  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1743  return ::isUNPCKHMask(N->op_begin(), N->getNumOperands(), V2IsSplat);
1744}
1745
1746/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
1747/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
1748/// <0, 0, 1, 1>
1749bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
1750  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1751
1752  unsigned NumElems = N->getNumOperands();
1753  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
1754    return false;
1755
1756  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1757    SDOperand BitI  = N->getOperand(i);
1758    SDOperand BitI1 = N->getOperand(i+1);
1759
1760    if (!isUndefOrEqual(BitI, j))
1761      return false;
1762    if (!isUndefOrEqual(BitI1, j))
1763      return false;
1764  }
1765
1766  return true;
1767}
1768
1769/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
1770/// specifies a shuffle of elements that is suitable for input to MOVSS,
1771/// MOVSD, and MOVD, i.e. setting the lowest element.
1772static bool isMOVLMask(const SDOperand *Elts, unsigned NumElts) {
1773  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
1774    return false;
1775
1776  if (!isUndefOrEqual(Elts[0], NumElts))
1777    return false;
1778
1779  for (unsigned i = 1; i < NumElts; ++i) {
1780    if (!isUndefOrEqual(Elts[i], i))
1781      return false;
1782  }
1783
1784  return true;
1785}
1786
1787bool X86::isMOVLMask(SDNode *N) {
1788  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1789  return ::isMOVLMask(N->op_begin(), N->getNumOperands());
1790}
1791
1792/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
1793/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
1794/// element of vector 2 and the other elements to come from vector 1 in order.
1795static bool isCommutedMOVL(const SDOperand *Ops, unsigned NumOps,
1796                           bool V2IsSplat = false,
1797                           bool V2IsUndef = false) {
1798  if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16)
1799    return false;
1800
1801  if (!isUndefOrEqual(Ops[0], 0))
1802    return false;
1803
1804  for (unsigned i = 1; i < NumOps; ++i) {
1805    SDOperand Arg = Ops[i];
1806    if (!(isUndefOrEqual(Arg, i+NumOps) ||
1807          (V2IsUndef && isUndefOrInRange(Arg, NumOps, NumOps*2)) ||
1808          (V2IsSplat && isUndefOrEqual(Arg, NumOps))))
1809      return false;
1810  }
1811
1812  return true;
1813}
1814
1815static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
1816                           bool V2IsUndef = false) {
1817  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1818  return isCommutedMOVL(N->op_begin(), N->getNumOperands(),
1819                        V2IsSplat, V2IsUndef);
1820}
1821
1822/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
1823/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
1824bool X86::isMOVSHDUPMask(SDNode *N) {
1825  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1826
1827  if (N->getNumOperands() != 4)
1828    return false;
1829
1830  // Expect 1, 1, 3, 3
1831  for (unsigned i = 0; i < 2; ++i) {
1832    SDOperand Arg = N->getOperand(i);
1833    if (Arg.getOpcode() == ISD::UNDEF) continue;
1834    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1835    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1836    if (Val != 1) return false;
1837  }
1838
1839  bool HasHi = false;
1840  for (unsigned i = 2; i < 4; ++i) {
1841    SDOperand Arg = N->getOperand(i);
1842    if (Arg.getOpcode() == ISD::UNDEF) continue;
1843    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1844    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1845    if (Val != 3) return false;
1846    HasHi = true;
1847  }
1848
1849  // Don't use movshdup if it can be done with a shufps.
1850  return HasHi;
1851}
1852
1853/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
1854/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
1855bool X86::isMOVSLDUPMask(SDNode *N) {
1856  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1857
1858  if (N->getNumOperands() != 4)
1859    return false;
1860
1861  // Expect 0, 0, 2, 2
1862  for (unsigned i = 0; i < 2; ++i) {
1863    SDOperand Arg = N->getOperand(i);
1864    if (Arg.getOpcode() == ISD::UNDEF) continue;
1865    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1866    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1867    if (Val != 0) return false;
1868  }
1869
1870  bool HasHi = false;
1871  for (unsigned i = 2; i < 4; ++i) {
1872    SDOperand Arg = N->getOperand(i);
1873    if (Arg.getOpcode() == ISD::UNDEF) continue;
1874    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1875    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1876    if (Val != 2) return false;
1877    HasHi = true;
1878  }
1879
1880  // Don't use movshdup if it can be done with a shufps.
1881  return HasHi;
1882}
1883
1884/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
1885/// a splat of a single element.
1886static bool isSplatMask(SDNode *N) {
1887  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1888
1889  // This is a splat operation if each element of the permute is the same, and
1890  // if the value doesn't reference the second vector.
1891  unsigned NumElems = N->getNumOperands();
1892  SDOperand ElementBase;
1893  unsigned i = 0;
1894  for (; i != NumElems; ++i) {
1895    SDOperand Elt = N->getOperand(i);
1896    if (isa<ConstantSDNode>(Elt)) {
1897      ElementBase = Elt;
1898      break;
1899    }
1900  }
1901
1902  if (!ElementBase.Val)
1903    return false;
1904
1905  for (; i != NumElems; ++i) {
1906    SDOperand Arg = N->getOperand(i);
1907    if (Arg.getOpcode() == ISD::UNDEF) continue;
1908    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1909    if (Arg != ElementBase) return false;
1910  }
1911
1912  // Make sure it is a splat of the first vector operand.
1913  return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
1914}
1915
1916/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
1917/// a splat of a single element and it's a 2 or 4 element mask.
1918bool X86::isSplatMask(SDNode *N) {
1919  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1920
1921  // We can only splat 64-bit, and 32-bit quantities with a single instruction.
1922  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
1923    return false;
1924  return ::isSplatMask(N);
1925}
1926
1927/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
1928/// specifies a splat of zero element.
1929bool X86::isSplatLoMask(SDNode *N) {
1930  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1931
1932  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
1933    if (!isUndefOrEqual(N->getOperand(i), 0))
1934      return false;
1935  return true;
1936}
1937
1938/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
1939/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
1940/// instructions.
1941unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
1942  unsigned NumOperands = N->getNumOperands();
1943  unsigned Shift = (NumOperands == 4) ? 2 : 1;
1944  unsigned Mask = 0;
1945  for (unsigned i = 0; i < NumOperands; ++i) {
1946    unsigned Val = 0;
1947    SDOperand Arg = N->getOperand(NumOperands-i-1);
1948    if (Arg.getOpcode() != ISD::UNDEF)
1949      Val = cast<ConstantSDNode>(Arg)->getValue();
1950    if (Val >= NumOperands) Val -= NumOperands;
1951    Mask |= Val;
1952    if (i != NumOperands - 1)
1953      Mask <<= Shift;
1954  }
1955
1956  return Mask;
1957}
1958
1959/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
1960/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
1961/// instructions.
1962unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
1963  unsigned Mask = 0;
1964  // 8 nodes, but we only care about the last 4.
1965  for (unsigned i = 7; i >= 4; --i) {
1966    unsigned Val = 0;
1967    SDOperand Arg = N->getOperand(i);
1968    if (Arg.getOpcode() != ISD::UNDEF)
1969      Val = cast<ConstantSDNode>(Arg)->getValue();
1970    Mask |= (Val - 4);
1971    if (i != 4)
1972      Mask <<= 2;
1973  }
1974
1975  return Mask;
1976}
1977
1978/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
1979/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
1980/// instructions.
1981unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
1982  unsigned Mask = 0;
1983  // 8 nodes, but we only care about the first 4.
1984  for (int i = 3; i >= 0; --i) {
1985    unsigned Val = 0;
1986    SDOperand Arg = N->getOperand(i);
1987    if (Arg.getOpcode() != ISD::UNDEF)
1988      Val = cast<ConstantSDNode>(Arg)->getValue();
1989    Mask |= Val;
1990    if (i != 0)
1991      Mask <<= 2;
1992  }
1993
1994  return Mask;
1995}
1996
1997/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
1998/// specifies a 8 element shuffle that can be broken into a pair of
1999/// PSHUFHW and PSHUFLW.
2000static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
2001  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2002
2003  if (N->getNumOperands() != 8)
2004    return false;
2005
2006  // Lower quadword shuffled.
2007  for (unsigned i = 0; i != 4; ++i) {
2008    SDOperand Arg = N->getOperand(i);
2009    if (Arg.getOpcode() == ISD::UNDEF) continue;
2010    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2011    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2012    if (Val > 4)
2013      return false;
2014  }
2015
2016  // Upper quadword shuffled.
2017  for (unsigned i = 4; i != 8; ++i) {
2018    SDOperand Arg = N->getOperand(i);
2019    if (Arg.getOpcode() == ISD::UNDEF) continue;
2020    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2021    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2022    if (Val < 4 || Val > 7)
2023      return false;
2024  }
2025
2026  return true;
2027}
2028
2029/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
2030/// values in ther permute mask.
2031static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
2032                                      SDOperand &V2, SDOperand &Mask,
2033                                      SelectionDAG &DAG) {
2034  MVT::ValueType VT = Op.getValueType();
2035  MVT::ValueType MaskVT = Mask.getValueType();
2036  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
2037  unsigned NumElems = Mask.getNumOperands();
2038  SmallVector<SDOperand, 8> MaskVec;
2039
2040  for (unsigned i = 0; i != NumElems; ++i) {
2041    SDOperand Arg = Mask.getOperand(i);
2042    if (Arg.getOpcode() == ISD::UNDEF) {
2043      MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
2044      continue;
2045    }
2046    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2047    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2048    if (Val < NumElems)
2049      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
2050    else
2051      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
2052  }
2053
2054  std::swap(V1, V2);
2055  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2056  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2057}
2058
2059/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
2060/// match movhlps. The lower half elements should come from upper half of
2061/// V1 (and in order), and the upper half elements should come from the upper
2062/// half of V2 (and in order).
2063static bool ShouldXformToMOVHLPS(SDNode *Mask) {
2064  unsigned NumElems = Mask->getNumOperands();
2065  if (NumElems != 4)
2066    return false;
2067  for (unsigned i = 0, e = 2; i != e; ++i)
2068    if (!isUndefOrEqual(Mask->getOperand(i), i+2))
2069      return false;
2070  for (unsigned i = 2; i != 4; ++i)
2071    if (!isUndefOrEqual(Mask->getOperand(i), i+4))
2072      return false;
2073  return true;
2074}
2075
2076/// isScalarLoadToVector - Returns true if the node is a scalar load that
2077/// is promoted to a vector.
2078static inline bool isScalarLoadToVector(SDNode *N) {
2079  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
2080    N = N->getOperand(0).Val;
2081    return ISD::isNON_EXTLoad(N);
2082  }
2083  return false;
2084}
2085
2086/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
2087/// match movlp{s|d}. The lower half elements should come from lower half of
2088/// V1 (and in order), and the upper half elements should come from the upper
2089/// half of V2 (and in order). And since V1 will become the source of the
2090/// MOVLP, it must be either a vector load or a scalar load to vector.
2091static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) {
2092  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
2093    return false;
2094  // Is V2 is a vector load, don't do this transformation. We will try to use
2095  // load folding shufps op.
2096  if (ISD::isNON_EXTLoad(V2))
2097    return false;
2098
2099  unsigned NumElems = Mask->getNumOperands();
2100  if (NumElems != 2 && NumElems != 4)
2101    return false;
2102  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
2103    if (!isUndefOrEqual(Mask->getOperand(i), i))
2104      return false;
2105  for (unsigned i = NumElems/2; i != NumElems; ++i)
2106    if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
2107      return false;
2108  return true;
2109}
2110
2111/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
2112/// all the same.
2113static bool isSplatVector(SDNode *N) {
2114  if (N->getOpcode() != ISD::BUILD_VECTOR)
2115    return false;
2116
2117  SDOperand SplatValue = N->getOperand(0);
2118  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
2119    if (N->getOperand(i) != SplatValue)
2120      return false;
2121  return true;
2122}
2123
2124/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
2125/// to an undef.
2126static bool isUndefShuffle(SDNode *N) {
2127  if (N->getOpcode() != ISD::BUILD_VECTOR)
2128    return false;
2129
2130  SDOperand V1 = N->getOperand(0);
2131  SDOperand V2 = N->getOperand(1);
2132  SDOperand Mask = N->getOperand(2);
2133  unsigned NumElems = Mask.getNumOperands();
2134  for (unsigned i = 0; i != NumElems; ++i) {
2135    SDOperand Arg = Mask.getOperand(i);
2136    if (Arg.getOpcode() != ISD::UNDEF) {
2137      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2138      if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
2139        return false;
2140      else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
2141        return false;
2142    }
2143  }
2144  return true;
2145}
2146
2147/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
2148/// that point to V2 points to its first element.
2149static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
2150  assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
2151
2152  bool Changed = false;
2153  SmallVector<SDOperand, 8> MaskVec;
2154  unsigned NumElems = Mask.getNumOperands();
2155  for (unsigned i = 0; i != NumElems; ++i) {
2156    SDOperand Arg = Mask.getOperand(i);
2157    if (Arg.getOpcode() != ISD::UNDEF) {
2158      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2159      if (Val > NumElems) {
2160        Arg = DAG.getConstant(NumElems, Arg.getValueType());
2161        Changed = true;
2162      }
2163    }
2164    MaskVec.push_back(Arg);
2165  }
2166
2167  if (Changed)
2168    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
2169                       &MaskVec[0], MaskVec.size());
2170  return Mask;
2171}
2172
2173/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
2174/// operation of specified width.
2175static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
2176  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2177  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2178
2179  SmallVector<SDOperand, 8> MaskVec;
2180  MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
2181  for (unsigned i = 1; i != NumElems; ++i)
2182    MaskVec.push_back(DAG.getConstant(i, BaseVT));
2183  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2184}
2185
2186/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
2187/// of specified width.
2188static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
2189  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2190  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2191  SmallVector<SDOperand, 8> MaskVec;
2192  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
2193    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
2194    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
2195  }
2196  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2197}
2198
2199/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
2200/// of specified width.
2201static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
2202  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2203  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2204  unsigned Half = NumElems/2;
2205  SmallVector<SDOperand, 8> MaskVec;
2206  for (unsigned i = 0; i != Half; ++i) {
2207    MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
2208    MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
2209  }
2210  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
2211}
2212
2213/// getZeroVector - Returns a vector of specified type with all zero elements.
2214///
2215static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
2216  assert(MVT::isVector(VT) && "Expected a vector type");
2217  unsigned NumElems = getVectorNumElements(VT);
2218  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2219  bool isFP = MVT::isFloatingPoint(EVT);
2220  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
2221  SmallVector<SDOperand, 8> ZeroVec(NumElems, Zero);
2222  return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
2223}
2224
2225/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
2226///
2227static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
2228  SDOperand V1 = Op.getOperand(0);
2229  SDOperand Mask = Op.getOperand(2);
2230  MVT::ValueType VT = Op.getValueType();
2231  unsigned NumElems = Mask.getNumOperands();
2232  Mask = getUnpacklMask(NumElems, DAG);
2233  while (NumElems != 4) {
2234    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
2235    NumElems >>= 1;
2236  }
2237  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
2238
2239  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2240  Mask = getZeroVector(MaskVT, DAG);
2241  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
2242                                  DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
2243  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
2244}
2245
2246/// isZeroNode - Returns true if Elt is a constant zero or a floating point
2247/// constant +0.0.
2248static inline bool isZeroNode(SDOperand Elt) {
2249  return ((isa<ConstantSDNode>(Elt) &&
2250           cast<ConstantSDNode>(Elt)->getValue() == 0) ||
2251          (isa<ConstantFPSDNode>(Elt) &&
2252           cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
2253}
2254
2255/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
2256/// vector and zero or undef vector.
2257static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
2258                                             unsigned NumElems, unsigned Idx,
2259                                             bool isZero, SelectionDAG &DAG) {
2260  SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
2261  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2262  MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2263  SDOperand Zero = DAG.getConstant(0, EVT);
2264  SmallVector<SDOperand, 8> MaskVec(NumElems, Zero);
2265  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
2266  SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2267                               &MaskVec[0], MaskVec.size());
2268  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2269}
2270
2271/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
2272///
2273static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
2274                                       unsigned NumNonZero, unsigned NumZero,
2275                                       SelectionDAG &DAG, TargetLowering &TLI) {
2276  if (NumNonZero > 8)
2277    return SDOperand();
2278
2279  SDOperand V(0, 0);
2280  bool First = true;
2281  for (unsigned i = 0; i < 16; ++i) {
2282    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
2283    if (ThisIsNonZero && First) {
2284      if (NumZero)
2285        V = getZeroVector(MVT::v8i16, DAG);
2286      else
2287        V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2288      First = false;
2289    }
2290
2291    if ((i & 1) != 0) {
2292      SDOperand ThisElt(0, 0), LastElt(0, 0);
2293      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
2294      if (LastIsNonZero) {
2295        LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
2296      }
2297      if (ThisIsNonZero) {
2298        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
2299        ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
2300                              ThisElt, DAG.getConstant(8, MVT::i8));
2301        if (LastIsNonZero)
2302          ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
2303      } else
2304        ThisElt = LastElt;
2305
2306      if (ThisElt.Val)
2307        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
2308                        DAG.getConstant(i/2, TLI.getPointerTy()));
2309    }
2310  }
2311
2312  return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
2313}
2314
2315/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
2316///
2317static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
2318                                       unsigned NumNonZero, unsigned NumZero,
2319                                       SelectionDAG &DAG, TargetLowering &TLI) {
2320  if (NumNonZero > 4)
2321    return SDOperand();
2322
2323  SDOperand V(0, 0);
2324  bool First = true;
2325  for (unsigned i = 0; i < 8; ++i) {
2326    bool isNonZero = (NonZeros & (1 << i)) != 0;
2327    if (isNonZero) {
2328      if (First) {
2329        if (NumZero)
2330          V = getZeroVector(MVT::v8i16, DAG);
2331        else
2332          V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2333        First = false;
2334      }
2335      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
2336                      DAG.getConstant(i, TLI.getPointerTy()));
2337    }
2338  }
2339
2340  return V;
2341}
2342
2343SDOperand
2344X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2345  // All zero's are handled with pxor.
2346  if (ISD::isBuildVectorAllZeros(Op.Val))
2347    return Op;
2348
2349  // All one's are handled with pcmpeqd.
2350  if (ISD::isBuildVectorAllOnes(Op.Val))
2351    return Op;
2352
2353  MVT::ValueType VT = Op.getValueType();
2354  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2355  unsigned EVTBits = MVT::getSizeInBits(EVT);
2356
2357  unsigned NumElems = Op.getNumOperands();
2358  unsigned NumZero  = 0;
2359  unsigned NumNonZero = 0;
2360  unsigned NonZeros = 0;
2361  std::set<SDOperand> Values;
2362  for (unsigned i = 0; i < NumElems; ++i) {
2363    SDOperand Elt = Op.getOperand(i);
2364    if (Elt.getOpcode() != ISD::UNDEF) {
2365      Values.insert(Elt);
2366      if (isZeroNode(Elt))
2367        NumZero++;
2368      else {
2369        NonZeros |= (1 << i);
2370        NumNonZero++;
2371      }
2372    }
2373  }
2374
2375  if (NumNonZero == 0)
2376    // Must be a mix of zero and undef. Return a zero vector.
2377    return getZeroVector(VT, DAG);
2378
2379  // Splat is obviously ok. Let legalizer expand it to a shuffle.
2380  if (Values.size() == 1)
2381    return SDOperand();
2382
2383  // Special case for single non-zero element.
2384  if (NumNonZero == 1) {
2385    unsigned Idx = CountTrailingZeros_32(NonZeros);
2386    SDOperand Item = Op.getOperand(Idx);
2387    Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
2388    if (Idx == 0)
2389      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
2390      return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
2391                                         NumZero > 0, DAG);
2392
2393    if (EVTBits == 32) {
2394      // Turn it into a shuffle of zero and zero-extended scalar to vector.
2395      Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
2396                                         DAG);
2397      MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
2398      MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2399      SmallVector<SDOperand, 8> MaskVec;
2400      for (unsigned i = 0; i < NumElems; i++)
2401        MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
2402      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2403                                   &MaskVec[0], MaskVec.size());
2404      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
2405                         DAG.getNode(ISD::UNDEF, VT), Mask);
2406    }
2407  }
2408
2409  // Let legalizer expand 2-wide build_vector's.
2410  if (EVTBits == 64)
2411    return SDOperand();
2412
2413  // If element VT is < 32 bits, convert it to inserts into a zero vector.
2414  if (EVTBits == 8) {
2415    SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
2416                                        *this);
2417    if (V.Val) return V;
2418  }
2419
2420  if (EVTBits == 16) {
2421    SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
2422                                        *this);
2423    if (V.Val) return V;
2424  }
2425
2426  // If element VT is == 32 bits, turn it into a number of shuffles.
2427  SmallVector<SDOperand, 8> V;
2428  V.resize(NumElems);
2429  if (NumElems == 4 && NumZero > 0) {
2430    for (unsigned i = 0; i < 4; ++i) {
2431      bool isZero = !(NonZeros & (1 << i));
2432      if (isZero)
2433        V[i] = getZeroVector(VT, DAG);
2434      else
2435        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2436    }
2437
2438    for (unsigned i = 0; i < 2; ++i) {
2439      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
2440        default: break;
2441        case 0:
2442          V[i] = V[i*2];  // Must be a zero vector.
2443          break;
2444        case 1:
2445          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
2446                             getMOVLMask(NumElems, DAG));
2447          break;
2448        case 2:
2449          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2450                             getMOVLMask(NumElems, DAG));
2451          break;
2452        case 3:
2453          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2454                             getUnpacklMask(NumElems, DAG));
2455          break;
2456      }
2457    }
2458
2459    // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
2460    // clears the upper bits.
2461    // FIXME: we can do the same for v4f32 case when we know both parts of
2462    // the lower half come from scalar_to_vector (loadf32). We should do
2463    // that in post legalizer dag combiner with target specific hooks.
2464    if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
2465      return V[0];
2466    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2467    MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2468    SmallVector<SDOperand, 8> MaskVec;
2469    bool Reverse = (NonZeros & 0x3) == 2;
2470    for (unsigned i = 0; i < 2; ++i)
2471      if (Reverse)
2472        MaskVec.push_back(DAG.getConstant(1-i, EVT));
2473      else
2474        MaskVec.push_back(DAG.getConstant(i, EVT));
2475    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
2476    for (unsigned i = 0; i < 2; ++i)
2477      if (Reverse)
2478        MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
2479      else
2480        MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
2481    SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2482                                     &MaskVec[0], MaskVec.size());
2483    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
2484  }
2485
2486  if (Values.size() > 2) {
2487    // Expand into a number of unpckl*.
2488    // e.g. for v4f32
2489    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
2490    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
2491    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
2492    SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
2493    for (unsigned i = 0; i < NumElems; ++i)
2494      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2495    NumElems >>= 1;
2496    while (NumElems != 0) {
2497      for (unsigned i = 0; i < NumElems; ++i)
2498        V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
2499                           UnpckMask);
2500      NumElems >>= 1;
2501    }
2502    return V[0];
2503  }
2504
2505  return SDOperand();
2506}
2507
2508SDOperand
2509X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
2510  SDOperand V1 = Op.getOperand(0);
2511  SDOperand V2 = Op.getOperand(1);
2512  SDOperand PermMask = Op.getOperand(2);
2513  MVT::ValueType VT = Op.getValueType();
2514  unsigned NumElems = PermMask.getNumOperands();
2515  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
2516  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
2517  bool V1IsSplat = false;
2518  bool V2IsSplat = false;
2519
2520  if (isUndefShuffle(Op.Val))
2521    return DAG.getNode(ISD::UNDEF, VT);
2522
2523  if (isSplatMask(PermMask.Val)) {
2524    if (NumElems <= 4) return Op;
2525    // Promote it to a v4i32 splat.
2526    return PromoteSplat(Op, DAG);
2527  }
2528
2529  if (X86::isMOVLMask(PermMask.Val))
2530    return (V1IsUndef) ? V2 : Op;
2531
2532  if (X86::isMOVSHDUPMask(PermMask.Val) ||
2533      X86::isMOVSLDUPMask(PermMask.Val) ||
2534      X86::isMOVHLPSMask(PermMask.Val) ||
2535      X86::isMOVHPMask(PermMask.Val) ||
2536      X86::isMOVLPMask(PermMask.Val))
2537    return Op;
2538
2539  if (ShouldXformToMOVHLPS(PermMask.Val) ||
2540      ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
2541    return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2542
2543  bool Commuted = false;
2544  V1IsSplat = isSplatVector(V1.Val);
2545  V2IsSplat = isSplatVector(V2.Val);
2546  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
2547    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2548    std::swap(V1IsSplat, V2IsSplat);
2549    std::swap(V1IsUndef, V2IsUndef);
2550    Commuted = true;
2551  }
2552
2553  if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
2554    if (V2IsUndef) return V1;
2555    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2556    if (V2IsSplat) {
2557      // V2 is a splat, so the mask may be malformed. That is, it may point
2558      // to any V2 element. The instruction selectior won't like this. Get
2559      // a corrected mask and commute to form a proper MOVS{S|D}.
2560      SDOperand NewMask = getMOVLMask(NumElems, DAG);
2561      if (NewMask.Val != PermMask.Val)
2562        Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2563    }
2564    return Op;
2565  }
2566
2567  if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
2568      X86::isUNPCKLMask(PermMask.Val) ||
2569      X86::isUNPCKHMask(PermMask.Val))
2570    return Op;
2571
2572  if (V2IsSplat) {
2573    // Normalize mask so all entries that point to V2 points to its first
2574    // element then try to match unpck{h|l} again. If match, return a
2575    // new vector_shuffle with the corrected mask.
2576    SDOperand NewMask = NormalizeMask(PermMask, DAG);
2577    if (NewMask.Val != PermMask.Val) {
2578      if (X86::isUNPCKLMask(PermMask.Val, true)) {
2579        SDOperand NewMask = getUnpacklMask(NumElems, DAG);
2580        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2581      } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
2582        SDOperand NewMask = getUnpackhMask(NumElems, DAG);
2583        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2584      }
2585    }
2586  }
2587
2588  // Normalize the node to match x86 shuffle ops if needed
2589  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
2590      Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2591
2592  if (Commuted) {
2593    // Commute is back and try unpck* again.
2594    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
2595    if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
2596        X86::isUNPCKLMask(PermMask.Val) ||
2597        X86::isUNPCKHMask(PermMask.Val))
2598      return Op;
2599  }
2600
2601  // If VT is integer, try PSHUF* first, then SHUFP*.
2602  if (MVT::isInteger(VT)) {
2603    if (X86::isPSHUFDMask(PermMask.Val) ||
2604        X86::isPSHUFHWMask(PermMask.Val) ||
2605        X86::isPSHUFLWMask(PermMask.Val)) {
2606      if (V2.getOpcode() != ISD::UNDEF)
2607        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2608                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2609      return Op;
2610    }
2611
2612    if (X86::isSHUFPMask(PermMask.Val))
2613      return Op;
2614
2615    // Handle v8i16 shuffle high / low shuffle node pair.
2616    if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
2617      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2618      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2619      SmallVector<SDOperand, 8> MaskVec;
2620      for (unsigned i = 0; i != 4; ++i)
2621        MaskVec.push_back(PermMask.getOperand(i));
2622      for (unsigned i = 4; i != 8; ++i)
2623        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2624      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2625                                   &MaskVec[0], MaskVec.size());
2626      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2627      MaskVec.clear();
2628      for (unsigned i = 0; i != 4; ++i)
2629        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2630      for (unsigned i = 4; i != 8; ++i)
2631        MaskVec.push_back(PermMask.getOperand(i));
2632      Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
2633      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2634    }
2635  } else {
2636    // Floating point cases in the other order.
2637    if (X86::isSHUFPMask(PermMask.Val))
2638      return Op;
2639    if (X86::isPSHUFDMask(PermMask.Val) ||
2640        X86::isPSHUFHWMask(PermMask.Val) ||
2641        X86::isPSHUFLWMask(PermMask.Val)) {
2642      if (V2.getOpcode() != ISD::UNDEF)
2643        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2644                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2645      return Op;
2646    }
2647  }
2648
2649  if (NumElems == 4) {
2650    MVT::ValueType MaskVT = PermMask.getValueType();
2651    MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2652    SmallVector<std::pair<int, int>, 8> Locs;
2653    Locs.reserve(NumElems);
2654    SmallVector<SDOperand, 8> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2655    SmallVector<SDOperand, 8> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2656    unsigned NumHi = 0;
2657    unsigned NumLo = 0;
2658    // If no more than two elements come from either vector. This can be
2659    // implemented with two shuffles. First shuffle gather the elements.
2660    // The second shuffle, which takes the first shuffle as both of its
2661    // vector operands, put the elements into the right order.
2662    for (unsigned i = 0; i != NumElems; ++i) {
2663      SDOperand Elt = PermMask.getOperand(i);
2664      if (Elt.getOpcode() == ISD::UNDEF) {
2665        Locs[i] = std::make_pair(-1, -1);
2666      } else {
2667        unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
2668        if (Val < NumElems) {
2669          Locs[i] = std::make_pair(0, NumLo);
2670          Mask1[NumLo] = Elt;
2671          NumLo++;
2672        } else {
2673          Locs[i] = std::make_pair(1, NumHi);
2674          if (2+NumHi < NumElems)
2675            Mask1[2+NumHi] = Elt;
2676          NumHi++;
2677        }
2678      }
2679    }
2680    if (NumLo <= 2 && NumHi <= 2) {
2681      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
2682                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2683                                   &Mask1[0], Mask1.size()));
2684      for (unsigned i = 0; i != NumElems; ++i) {
2685        if (Locs[i].first == -1)
2686          continue;
2687        else {
2688          unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
2689          Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
2690          Mask2[i] = DAG.getConstant(Idx, MaskEVT);
2691        }
2692      }
2693
2694      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
2695                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2696                                     &Mask2[0], Mask2.size()));
2697    }
2698
2699    // Break it into (shuffle shuffle_hi, shuffle_lo).
2700    Locs.clear();
2701    SmallVector<SDOperand,8> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2702    SmallVector<SDOperand,8> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2703    SmallVector<SDOperand,8> *MaskPtr = &LoMask;
2704    unsigned MaskIdx = 0;
2705    unsigned LoIdx = 0;
2706    unsigned HiIdx = NumElems/2;
2707    for (unsigned i = 0; i != NumElems; ++i) {
2708      if (i == NumElems/2) {
2709        MaskPtr = &HiMask;
2710        MaskIdx = 1;
2711        LoIdx = 0;
2712        HiIdx = NumElems/2;
2713      }
2714      SDOperand Elt = PermMask.getOperand(i);
2715      if (Elt.getOpcode() == ISD::UNDEF) {
2716        Locs[i] = std::make_pair(-1, -1);
2717      } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
2718        Locs[i] = std::make_pair(MaskIdx, LoIdx);
2719        (*MaskPtr)[LoIdx] = Elt;
2720        LoIdx++;
2721      } else {
2722        Locs[i] = std::make_pair(MaskIdx, HiIdx);
2723        (*MaskPtr)[HiIdx] = Elt;
2724        HiIdx++;
2725      }
2726    }
2727
2728    SDOperand LoShuffle =
2729      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
2730                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2731                              &LoMask[0], LoMask.size()));
2732    SDOperand HiShuffle =
2733      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
2734                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2735                              &HiMask[0], HiMask.size()));
2736    SmallVector<SDOperand, 8> MaskOps;
2737    for (unsigned i = 0; i != NumElems; ++i) {
2738      if (Locs[i].first == -1) {
2739        MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
2740      } else {
2741        unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
2742        MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
2743      }
2744    }
2745    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
2746                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2747                                   &MaskOps[0], MaskOps.size()));
2748  }
2749
2750  return SDOperand();
2751}
2752
2753SDOperand
2754X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2755  if (!isa<ConstantSDNode>(Op.getOperand(1)))
2756    return SDOperand();
2757
2758  MVT::ValueType VT = Op.getValueType();
2759  // TODO: handle v16i8.
2760  if (MVT::getSizeInBits(VT) == 16) {
2761    // Transform it so it match pextrw which produces a 32-bit result.
2762    MVT::ValueType EVT = (MVT::ValueType)(VT+1);
2763    SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
2764                                    Op.getOperand(0), Op.getOperand(1));
2765    SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
2766                                    DAG.getValueType(VT));
2767    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
2768  } else if (MVT::getSizeInBits(VT) == 32) {
2769    SDOperand Vec = Op.getOperand(0);
2770    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
2771    if (Idx == 0)
2772      return Op;
2773    // SHUFPS the element to the lowest double word, then movss.
2774    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2775    SmallVector<SDOperand, 8> IdxVec;
2776    IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
2777    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2778    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2779    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2780    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2781                                 &IdxVec[0], IdxVec.size());
2782    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
2783                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
2784    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
2785                       DAG.getConstant(0, getPointerTy()));
2786  } else if (MVT::getSizeInBits(VT) == 64) {
2787    SDOperand Vec = Op.getOperand(0);
2788    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
2789    if (Idx == 0)
2790      return Op;
2791
2792    // UNPCKHPD the element to the lowest double word, then movsd.
2793    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
2794    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
2795    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2796    SmallVector<SDOperand, 8> IdxVec;
2797    IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
2798    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2799    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2800                                 &IdxVec[0], IdxVec.size());
2801    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
2802                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
2803    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
2804                       DAG.getConstant(0, getPointerTy()));
2805  }
2806
2807  return SDOperand();
2808}
2809
2810SDOperand
2811X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2812  // Transform it so it match pinsrw which expects a 16-bit value in a GR32
2813  // as its second argument.
2814  MVT::ValueType VT = Op.getValueType();
2815  MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
2816  SDOperand N0 = Op.getOperand(0);
2817  SDOperand N1 = Op.getOperand(1);
2818  SDOperand N2 = Op.getOperand(2);
2819  if (MVT::getSizeInBits(BaseVT) == 16) {
2820    if (N1.getValueType() != MVT::i32)
2821      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
2822    if (N2.getValueType() != MVT::i32)
2823      N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
2824    return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
2825  } else if (MVT::getSizeInBits(BaseVT) == 32) {
2826    unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
2827    if (Idx == 0) {
2828      // Use a movss.
2829      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
2830      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2831      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2832      SmallVector<SDOperand, 8> MaskVec;
2833      MaskVec.push_back(DAG.getConstant(4, BaseVT));
2834      for (unsigned i = 1; i <= 3; ++i)
2835        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2836      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
2837                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
2838                                     &MaskVec[0], MaskVec.size()));
2839    } else {
2840      // Use two pinsrw instructions to insert a 32 bit value.
2841      Idx <<= 1;
2842      if (MVT::isFloatingPoint(N1.getValueType())) {
2843        if (ISD::isNON_EXTLoad(N1.Val)) {
2844          // Just load directly from f32mem to GR32.
2845          LoadSDNode *LD = cast<LoadSDNode>(N1);
2846          N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(),
2847                           LD->getSrcValue(), LD->getSrcValueOffset());
2848        } else {
2849          N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
2850          N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
2851          N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
2852                           DAG.getConstant(0, getPointerTy()));
2853        }
2854      }
2855      N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
2856      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
2857                       DAG.getConstant(Idx, getPointerTy()));
2858      N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
2859      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
2860                       DAG.getConstant(Idx+1, getPointerTy()));
2861      return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
2862    }
2863  }
2864
2865  return SDOperand();
2866}
2867
2868SDOperand
2869X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2870  SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
2871  return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
2872}
2873
2874// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2875// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
2876// one of the above mentioned nodes. It has to be wrapped because otherwise
2877// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2878// be used to form addressing mode. These wrapped nodes will be selected
2879// into MOV32ri.
2880SDOperand
2881X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
2882  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2883  SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
2884                                               getPointerTy(),
2885                                               CP->getAlignment());
2886  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
2887  // With PIC, the address is actually $g + Offset.
2888  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
2889      !Subtarget->isPICStyleRIPRel()) {
2890    Result = DAG.getNode(ISD::ADD, getPointerTy(),
2891                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
2892                         Result);
2893  }
2894
2895  return Result;
2896}
2897
2898SDOperand
2899X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
2900  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2901  SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
2902  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
2903  // With PIC, the address is actually $g + Offset.
2904  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
2905      !Subtarget->isPICStyleRIPRel()) {
2906    Result = DAG.getNode(ISD::ADD, getPointerTy(),
2907                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
2908                         Result);
2909  }
2910
2911  // For Darwin & Mingw32, external and weak symbols are indirect, so we want to
2912  // load the value at address GV, not the value of GV itself. This means that
2913  // the GlobalAddress must be in the base or index register of the address, not
2914  // the GV offset field. Platform check is inside GVRequiresExtraLoad() call
2915  // The same applies for external symbols during PIC codegen
2916  if (Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false))
2917    Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
2918
2919  return Result;
2920}
2921
2922SDOperand
2923X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
2924  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
2925  SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
2926  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
2927  // With PIC, the address is actually $g + Offset.
2928  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
2929      !Subtarget->isPICStyleRIPRel()) {
2930    Result = DAG.getNode(ISD::ADD, getPointerTy(),
2931                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
2932                         Result);
2933  }
2934
2935  return Result;
2936}
2937
2938SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
2939  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2940  SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
2941  Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
2942  // With PIC, the address is actually $g + Offset.
2943  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
2944      !Subtarget->isPICStyleRIPRel()) {
2945    Result = DAG.getNode(ISD::ADD, getPointerTy(),
2946                         DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
2947                         Result);
2948  }
2949
2950  return Result;
2951}
2952
2953SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
2954    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
2955           "Not an i64 shift!");
2956    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
2957    SDOperand ShOpLo = Op.getOperand(0);
2958    SDOperand ShOpHi = Op.getOperand(1);
2959    SDOperand ShAmt  = Op.getOperand(2);
2960    SDOperand Tmp1 = isSRA ?
2961      DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
2962      DAG.getConstant(0, MVT::i32);
2963
2964    SDOperand Tmp2, Tmp3;
2965    if (Op.getOpcode() == ISD::SHL_PARTS) {
2966      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
2967      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
2968    } else {
2969      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
2970      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
2971    }
2972
2973    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
2974    SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
2975                                    DAG.getConstant(32, MVT::i8));
2976    SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
2977    SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
2978
2979    SDOperand Hi, Lo;
2980    SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
2981
2982    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
2983    SmallVector<SDOperand, 4> Ops;
2984    if (Op.getOpcode() == ISD::SHL_PARTS) {
2985      Ops.push_back(Tmp2);
2986      Ops.push_back(Tmp3);
2987      Ops.push_back(CC);
2988      Ops.push_back(InFlag);
2989      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
2990      InFlag = Hi.getValue(1);
2991
2992      Ops.clear();
2993      Ops.push_back(Tmp3);
2994      Ops.push_back(Tmp1);
2995      Ops.push_back(CC);
2996      Ops.push_back(InFlag);
2997      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
2998    } else {
2999      Ops.push_back(Tmp2);
3000      Ops.push_back(Tmp3);
3001      Ops.push_back(CC);
3002      Ops.push_back(InFlag);
3003      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3004      InFlag = Lo.getValue(1);
3005
3006      Ops.clear();
3007      Ops.push_back(Tmp3);
3008      Ops.push_back(Tmp1);
3009      Ops.push_back(CC);
3010      Ops.push_back(InFlag);
3011      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3012    }
3013
3014    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
3015    Ops.clear();
3016    Ops.push_back(Lo);
3017    Ops.push_back(Hi);
3018    return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
3019}
3020
3021SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
3022  assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
3023         Op.getOperand(0).getValueType() >= MVT::i16 &&
3024         "Unknown SINT_TO_FP to lower!");
3025
3026  SDOperand Result;
3027  MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
3028  unsigned Size = MVT::getSizeInBits(SrcVT)/8;
3029  MachineFunction &MF = DAG.getMachineFunction();
3030  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3031  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3032  SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
3033                                 StackSlot, NULL, 0);
3034
3035  // Build the FILD
3036  SDVTList Tys;
3037  if (X86ScalarSSE)
3038    Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag);
3039  else
3040    Tys = DAG.getVTList(MVT::f64, MVT::Other);
3041  SmallVector<SDOperand, 8> Ops;
3042  Ops.push_back(Chain);
3043  Ops.push_back(StackSlot);
3044  Ops.push_back(DAG.getValueType(SrcVT));
3045  Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
3046                       Tys, &Ops[0], Ops.size());
3047
3048  if (X86ScalarSSE) {
3049    Chain = Result.getValue(1);
3050    SDOperand InFlag = Result.getValue(2);
3051
3052    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
3053    // shouldn't be necessary except that RFP cannot be live across
3054    // multiple blocks. When stackifier is fixed, they can be uncoupled.
3055    MachineFunction &MF = DAG.getMachineFunction();
3056    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
3057    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3058    Tys = DAG.getVTList(MVT::Other);
3059    SmallVector<SDOperand, 8> Ops;
3060    Ops.push_back(Chain);
3061    Ops.push_back(Result);
3062    Ops.push_back(StackSlot);
3063    Ops.push_back(DAG.getValueType(Op.getValueType()));
3064    Ops.push_back(InFlag);
3065    Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
3066    Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
3067  }
3068
3069  return Result;
3070}
3071
3072SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
3073  assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
3074         "Unknown FP_TO_SINT to lower!");
3075  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
3076  // stack slot.
3077  MachineFunction &MF = DAG.getMachineFunction();
3078  unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
3079  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3080  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3081
3082  unsigned Opc;
3083  switch (Op.getValueType()) {
3084    default: assert(0 && "Invalid FP_TO_SINT to lower!");
3085    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
3086    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
3087    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
3088  }
3089
3090  SDOperand Chain = DAG.getEntryNode();
3091  SDOperand Value = Op.getOperand(0);
3092  if (X86ScalarSSE) {
3093    assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
3094    Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
3095    SDVTList Tys = DAG.getVTList(MVT::f64, MVT::Other);
3096    SDOperand Ops[] = {
3097      Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType())
3098    };
3099    Value = DAG.getNode(X86ISD::FLD, Tys, Ops, 3);
3100    Chain = Value.getValue(1);
3101    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3102    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3103  }
3104
3105  // Build the FP_TO_INT*_IN_MEM
3106  SDOperand Ops[] = { Chain, Value, StackSlot };
3107  SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops, 3);
3108
3109  // Load the result.
3110  return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
3111}
3112
3113SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
3114  MVT::ValueType VT = Op.getValueType();
3115  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3116  std::vector<Constant*> CV;
3117  if (VT == MVT::f64) {
3118    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
3119    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3120  } else {
3121    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
3122    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3123    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3124    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3125  }
3126  Constant *CS = ConstantStruct::get(CV);
3127  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3128  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3129  SmallVector<SDOperand, 3> Ops;
3130  Ops.push_back(DAG.getEntryNode());
3131  Ops.push_back(CPIdx);
3132  Ops.push_back(DAG.getSrcValue(NULL));
3133  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3134  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
3135}
3136
3137SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
3138  MVT::ValueType VT = Op.getValueType();
3139  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3140  std::vector<Constant*> CV;
3141  if (VT == MVT::f64) {
3142    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
3143    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3144  } else {
3145    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
3146    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3147    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3148    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3149  }
3150  Constant *CS = ConstantStruct::get(CV);
3151  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3152  SDVTList Tys = DAG.getVTList(VT, MVT::Other);
3153  SmallVector<SDOperand, 3> Ops;
3154  Ops.push_back(DAG.getEntryNode());
3155  Ops.push_back(CPIdx);
3156  Ops.push_back(DAG.getSrcValue(NULL));
3157  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3158  return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
3159}
3160
3161SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
3162  SDOperand Op0 = Op.getOperand(0);
3163  SDOperand Op1 = Op.getOperand(1);
3164  MVT::ValueType VT = Op.getValueType();
3165  MVT::ValueType SrcVT = Op1.getValueType();
3166  const Type *SrcTy =  MVT::getTypeForValueType(SrcVT);
3167
3168  // If second operand is smaller, extend it first.
3169  if (MVT::getSizeInBits(SrcVT) < MVT::getSizeInBits(VT)) {
3170    Op1 = DAG.getNode(ISD::FP_EXTEND, VT, Op1);
3171    SrcVT = VT;
3172  }
3173
3174  // First get the sign bit of second operand.
3175  std::vector<Constant*> CV;
3176  if (SrcVT == MVT::f64) {
3177    CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(1ULL << 63)));
3178    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3179  } else {
3180    CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(1U << 31)));
3181    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3182    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3183    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3184  }
3185  Constant *CS = ConstantStruct::get(CV);
3186  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3187  SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other);
3188  SmallVector<SDOperand, 3> Ops;
3189  Ops.push_back(DAG.getEntryNode());
3190  Ops.push_back(CPIdx);
3191  Ops.push_back(DAG.getSrcValue(NULL));
3192  SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3193  SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
3194
3195  // Shift sign bit right or left if the two operands have different types.
3196  if (MVT::getSizeInBits(SrcVT) > MVT::getSizeInBits(VT)) {
3197    // Op0 is MVT::f32, Op1 is MVT::f64.
3198    SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, SignBit);
3199    SignBit = DAG.getNode(X86ISD::FSRL, MVT::v2f64, SignBit,
3200                          DAG.getConstant(32, MVT::i32));
3201    SignBit = DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, SignBit);
3202    SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::f32, SignBit,
3203                          DAG.getConstant(0, getPointerTy()));
3204  }
3205
3206  // Clear first operand sign bit.
3207  CV.clear();
3208  if (VT == MVT::f64) {
3209    CV.push_back(ConstantFP::get(SrcTy, BitsToDouble(~(1ULL << 63))));
3210    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3211  } else {
3212    CV.push_back(ConstantFP::get(SrcTy, BitsToFloat(~(1U << 31))));
3213    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3214    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3215    CV.push_back(ConstantFP::get(SrcTy, 0.0));
3216  }
3217  CS = ConstantStruct::get(CV);
3218  CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3219  Tys = DAG.getVTList(VT, MVT::Other);
3220  Ops.clear();
3221  Ops.push_back(DAG.getEntryNode());
3222  Ops.push_back(CPIdx);
3223  Ops.push_back(DAG.getSrcValue(NULL));
3224  SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
3225  SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
3226
3227  // Or the value with the sign bit.
3228  return DAG.getNode(X86ISD::FOR, VT, Val, SignBit);
3229}
3230
3231SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
3232                                        SDOperand Chain) {
3233  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
3234  SDOperand Cond;
3235  SDOperand Op0 = Op.getOperand(0);
3236  SDOperand Op1 = Op.getOperand(1);
3237  SDOperand CC = Op.getOperand(2);
3238  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3239  const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3240  const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
3241  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
3242  unsigned X86CC;
3243
3244  if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
3245                     Op0, Op1, DAG)) {
3246    SDOperand Ops1[] = { Chain, Op0, Op1 };
3247    Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
3248    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
3249    return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
3250  }
3251
3252  assert(isFP && "Illegal integer SetCC!");
3253
3254  SDOperand COps[] = { Chain, Op0, Op1 };
3255  Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
3256
3257  switch (SetCCOpcode) {
3258  default: assert(false && "Illegal floating point SetCC!");
3259  case ISD::SETOEQ: {  // !PF & ZF
3260    SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
3261    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
3262    SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
3263                         Tmp1.getValue(1) };
3264    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
3265    return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
3266  }
3267  case ISD::SETUNE: {  // PF | !ZF
3268    SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
3269    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
3270    SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
3271                         Tmp1.getValue(1) };
3272    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
3273    return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
3274  }
3275  }
3276}
3277
3278SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
3279  bool addTest = true;
3280  SDOperand Chain = DAG.getEntryNode();
3281  SDOperand Cond  = Op.getOperand(0);
3282  SDOperand CC;
3283  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3284
3285  if (Cond.getOpcode() == ISD::SETCC)
3286    Cond = LowerSETCC(Cond, DAG, Chain);
3287
3288  if (Cond.getOpcode() == X86ISD::SETCC) {
3289    CC = Cond.getOperand(0);
3290
3291    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3292    // (since flag operand cannot be shared). Use it as the condition setting
3293    // operand in place of the X86ISD::SETCC.
3294    // If the X86ISD::SETCC has more than one use, then perhaps it's better
3295    // to use a test instead of duplicating the X86ISD::CMP (for register
3296    // pressure reason)?
3297    SDOperand Cmp = Cond.getOperand(1);
3298    unsigned Opc = Cmp.getOpcode();
3299    bool IllegalFPCMov = !X86ScalarSSE &&
3300      MVT::isFloatingPoint(Op.getValueType()) &&
3301      !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
3302    if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) &&
3303        !IllegalFPCMov) {
3304      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
3305      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
3306      addTest = false;
3307    }
3308  }
3309
3310  if (addTest) {
3311    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
3312    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
3313    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
3314  }
3315
3316  VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
3317  SmallVector<SDOperand, 4> Ops;
3318  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
3319  // condition is true.
3320  Ops.push_back(Op.getOperand(2));
3321  Ops.push_back(Op.getOperand(1));
3322  Ops.push_back(CC);
3323  Ops.push_back(Cond.getValue(1));
3324  return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3325}
3326
3327SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
3328  bool addTest = true;
3329  SDOperand Chain = Op.getOperand(0);
3330  SDOperand Cond  = Op.getOperand(1);
3331  SDOperand Dest  = Op.getOperand(2);
3332  SDOperand CC;
3333  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3334
3335  if (Cond.getOpcode() == ISD::SETCC)
3336    Cond = LowerSETCC(Cond, DAG, Chain);
3337
3338  if (Cond.getOpcode() == X86ISD::SETCC) {
3339    CC = Cond.getOperand(0);
3340
3341    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3342    // (since flag operand cannot be shared). Use it as the condition setting
3343    // operand in place of the X86ISD::SETCC.
3344    // If the X86ISD::SETCC has more than one use, then perhaps it's better
3345    // to use a test instead of duplicating the X86ISD::CMP (for register
3346    // pressure reason)?
3347    SDOperand Cmp = Cond.getOperand(1);
3348    unsigned Opc = Cmp.getOpcode();
3349    if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) {
3350      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
3351      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
3352      addTest = false;
3353    }
3354  }
3355
3356  if (addTest) {
3357    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
3358    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
3359    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
3360  }
3361  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
3362                     Cond, Op.getOperand(2), CC, Cond.getValue(1));
3363}
3364
3365SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
3366  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3367
3368  if (Subtarget->is64Bit())
3369    return LowerX86_64CCCCallTo(Op, DAG, CallingConv);
3370  else
3371    switch (CallingConv) {
3372    default:
3373      assert(0 && "Unsupported calling convention");
3374    case CallingConv::Fast:
3375      // TODO: Implement fastcc
3376      // Falls through
3377    case CallingConv::C:
3378    case CallingConv::X86_StdCall:
3379      return LowerCCCCallTo(Op, DAG, CallingConv);
3380    case CallingConv::X86_FastCall:
3381      return LowerFastCCCallTo(Op, DAG, CallingConv);
3382    }
3383}
3384
3385SDOperand
3386X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
3387  MachineFunction &MF = DAG.getMachineFunction();
3388  const Function* Fn = MF.getFunction();
3389  if (Fn->hasExternalLinkage() &&
3390      Subtarget->isTargetCygMing() &&
3391      Fn->getName() == "main")
3392    MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true);
3393
3394  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3395  if (Subtarget->is64Bit())
3396    return LowerX86_64CCCArguments(Op, DAG);
3397  else
3398    switch(CC) {
3399    default:
3400      assert(0 && "Unsupported calling convention");
3401    case CallingConv::Fast:
3402      // TODO: implement fastcc.
3403
3404      // Falls through
3405    case CallingConv::C:
3406      return LowerCCCArguments(Op, DAG);
3407    case CallingConv::X86_StdCall:
3408      MF.getInfo<X86FunctionInfo>()->setDecorationStyle(StdCall);
3409      return LowerCCCArguments(Op, DAG, true);
3410    case CallingConv::X86_FastCall:
3411      MF.getInfo<X86FunctionInfo>()->setDecorationStyle(FastCall);
3412      return LowerFastCCArguments(Op, DAG);
3413    }
3414}
3415
3416SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
3417  SDOperand InFlag(0, 0);
3418  SDOperand Chain = Op.getOperand(0);
3419  unsigned Align =
3420    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3421  if (Align == 0) Align = 1;
3422
3423  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3424  // If not DWORD aligned, call memset if size is less than the threshold.
3425  // It knows how to align to the right boundary first.
3426  if ((Align & 3) != 0 ||
3427      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3428    MVT::ValueType IntPtr = getPointerTy();
3429    const Type *IntPtrTy = getTargetData()->getIntPtrType();
3430    TargetLowering::ArgListTy Args;
3431    TargetLowering::ArgListEntry Entry;
3432    Entry.Node = Op.getOperand(1);
3433    Entry.Ty = IntPtrTy;
3434    Args.push_back(Entry);
3435    // Extend the unsigned i8 argument to be an int value for the call.
3436    Entry.Node = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
3437    Entry.Ty = IntPtrTy;
3438    Args.push_back(Entry);
3439    Entry.Node = Op.getOperand(3);
3440    Args.push_back(Entry);
3441    std::pair<SDOperand,SDOperand> CallResult =
3442      LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
3443                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
3444    return CallResult.second;
3445  }
3446
3447  MVT::ValueType AVT;
3448  SDOperand Count;
3449  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3450  unsigned BytesLeft = 0;
3451  bool TwoRepStos = false;
3452  if (ValC) {
3453    unsigned ValReg;
3454    uint64_t Val = ValC->getValue() & 255;
3455
3456    // If the value is a constant, then we can potentially use larger sets.
3457    switch (Align & 3) {
3458      case 2:   // WORD aligned
3459        AVT = MVT::i16;
3460        ValReg = X86::AX;
3461        Val = (Val << 8) | Val;
3462        break;
3463      case 0:  // DWORD aligned
3464        AVT = MVT::i32;
3465        ValReg = X86::EAX;
3466        Val = (Val << 8)  | Val;
3467        Val = (Val << 16) | Val;
3468        if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) {  // QWORD aligned
3469          AVT = MVT::i64;
3470          ValReg = X86::RAX;
3471          Val = (Val << 32) | Val;
3472        }
3473        break;
3474      default:  // Byte aligned
3475        AVT = MVT::i8;
3476        ValReg = X86::AL;
3477        Count = Op.getOperand(3);
3478        break;
3479    }
3480
3481    if (AVT > MVT::i8) {
3482      if (I) {
3483        unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
3484        Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
3485        BytesLeft = I->getValue() % UBytes;
3486      } else {
3487        assert(AVT >= MVT::i32 &&
3488               "Do not use rep;stos if not at least DWORD aligned");
3489        Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
3490                            Op.getOperand(3), DAG.getConstant(2, MVT::i8));
3491        TwoRepStos = true;
3492      }
3493    }
3494
3495    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
3496                              InFlag);
3497    InFlag = Chain.getValue(1);
3498  } else {
3499    AVT = MVT::i8;
3500    Count  = Op.getOperand(3);
3501    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
3502    InFlag = Chain.getValue(1);
3503  }
3504
3505  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
3506                            Count, InFlag);
3507  InFlag = Chain.getValue(1);
3508  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
3509                            Op.getOperand(1), InFlag);
3510  InFlag = Chain.getValue(1);
3511
3512  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3513  SmallVector<SDOperand, 8> Ops;
3514  Ops.push_back(Chain);
3515  Ops.push_back(DAG.getValueType(AVT));
3516  Ops.push_back(InFlag);
3517  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
3518
3519  if (TwoRepStos) {
3520    InFlag = Chain.getValue(1);
3521    Count = Op.getOperand(3);
3522    MVT::ValueType CVT = Count.getValueType();
3523    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3524                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
3525    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
3526                              Left, InFlag);
3527    InFlag = Chain.getValue(1);
3528    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3529    Ops.clear();
3530    Ops.push_back(Chain);
3531    Ops.push_back(DAG.getValueType(MVT::i8));
3532    Ops.push_back(InFlag);
3533    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
3534  } else if (BytesLeft) {
3535    // Issue stores for the last 1 - 7 bytes.
3536    SDOperand Value;
3537    unsigned Val = ValC->getValue() & 255;
3538    unsigned Offset = I->getValue() - BytesLeft;
3539    SDOperand DstAddr = Op.getOperand(1);
3540    MVT::ValueType AddrVT = DstAddr.getValueType();
3541    if (BytesLeft >= 4) {
3542      Val = (Val << 8)  | Val;
3543      Val = (Val << 16) | Val;
3544      Value = DAG.getConstant(Val, MVT::i32);
3545      Chain = DAG.getStore(Chain, Value,
3546                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3547                                       DAG.getConstant(Offset, AddrVT)),
3548                           NULL, 0);
3549      BytesLeft -= 4;
3550      Offset += 4;
3551    }
3552    if (BytesLeft >= 2) {
3553      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
3554      Chain = DAG.getStore(Chain, Value,
3555                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3556                                       DAG.getConstant(Offset, AddrVT)),
3557                           NULL, 0);
3558      BytesLeft -= 2;
3559      Offset += 2;
3560    }
3561    if (BytesLeft == 1) {
3562      Value = DAG.getConstant(Val, MVT::i8);
3563      Chain = DAG.getStore(Chain, Value,
3564                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3565                                       DAG.getConstant(Offset, AddrVT)),
3566                           NULL, 0);
3567    }
3568  }
3569
3570  return Chain;
3571}
3572
3573SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
3574  SDOperand Chain = Op.getOperand(0);
3575  unsigned Align =
3576    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3577  if (Align == 0) Align = 1;
3578
3579  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3580  // If not DWORD aligned, call memcpy if size is less than the threshold.
3581  // It knows how to align to the right boundary first.
3582  if ((Align & 3) != 0 ||
3583      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3584    MVT::ValueType IntPtr = getPointerTy();
3585    TargetLowering::ArgListTy Args;
3586    TargetLowering::ArgListEntry Entry;
3587    Entry.Ty = getTargetData()->getIntPtrType();
3588    Entry.Node = Op.getOperand(1); Args.push_back(Entry);
3589    Entry.Node = Op.getOperand(2); Args.push_back(Entry);
3590    Entry.Node = Op.getOperand(3); Args.push_back(Entry);
3591    std::pair<SDOperand,SDOperand> CallResult =
3592      LowerCallTo(Chain, Type::VoidTy, false, false, CallingConv::C, false,
3593                  DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
3594    return CallResult.second;
3595  }
3596
3597  MVT::ValueType AVT;
3598  SDOperand Count;
3599  unsigned BytesLeft = 0;
3600  bool TwoRepMovs = false;
3601  switch (Align & 3) {
3602    case 2:   // WORD aligned
3603      AVT = MVT::i16;
3604      break;
3605    case 0:  // DWORD aligned
3606      AVT = MVT::i32;
3607      if (Subtarget->is64Bit() && ((Align & 0xF) == 0))  // QWORD aligned
3608        AVT = MVT::i64;
3609      break;
3610    default:  // Byte aligned
3611      AVT = MVT::i8;
3612      Count = Op.getOperand(3);
3613      break;
3614  }
3615
3616  if (AVT > MVT::i8) {
3617    if (I) {
3618      unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
3619      Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
3620      BytesLeft = I->getValue() % UBytes;
3621    } else {
3622      assert(AVT >= MVT::i32 &&
3623             "Do not use rep;movs if not at least DWORD aligned");
3624      Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
3625                          Op.getOperand(3), DAG.getConstant(2, MVT::i8));
3626      TwoRepMovs = true;
3627    }
3628  }
3629
3630  SDOperand InFlag(0, 0);
3631  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
3632                            Count, InFlag);
3633  InFlag = Chain.getValue(1);
3634  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
3635                            Op.getOperand(1), InFlag);
3636  InFlag = Chain.getValue(1);
3637  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
3638                            Op.getOperand(2), InFlag);
3639  InFlag = Chain.getValue(1);
3640
3641  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3642  SmallVector<SDOperand, 8> Ops;
3643  Ops.push_back(Chain);
3644  Ops.push_back(DAG.getValueType(AVT));
3645  Ops.push_back(InFlag);
3646  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
3647
3648  if (TwoRepMovs) {
3649    InFlag = Chain.getValue(1);
3650    Count = Op.getOperand(3);
3651    MVT::ValueType CVT = Count.getValueType();
3652    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3653                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
3654    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
3655                              Left, InFlag);
3656    InFlag = Chain.getValue(1);
3657    Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3658    Ops.clear();
3659    Ops.push_back(Chain);
3660    Ops.push_back(DAG.getValueType(MVT::i8));
3661    Ops.push_back(InFlag);
3662    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
3663  } else if (BytesLeft) {
3664    // Issue loads and stores for the last 1 - 7 bytes.
3665    unsigned Offset = I->getValue() - BytesLeft;
3666    SDOperand DstAddr = Op.getOperand(1);
3667    MVT::ValueType DstVT = DstAddr.getValueType();
3668    SDOperand SrcAddr = Op.getOperand(2);
3669    MVT::ValueType SrcVT = SrcAddr.getValueType();
3670    SDOperand Value;
3671    if (BytesLeft >= 4) {
3672      Value = DAG.getLoad(MVT::i32, Chain,
3673                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
3674                                      DAG.getConstant(Offset, SrcVT)),
3675                          NULL, 0);
3676      Chain = Value.getValue(1);
3677      Chain = DAG.getStore(Chain, Value,
3678                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
3679                                       DAG.getConstant(Offset, DstVT)),
3680                           NULL, 0);
3681      BytesLeft -= 4;
3682      Offset += 4;
3683    }
3684    if (BytesLeft >= 2) {
3685      Value = DAG.getLoad(MVT::i16, Chain,
3686                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
3687                                      DAG.getConstant(Offset, SrcVT)),
3688                          NULL, 0);
3689      Chain = Value.getValue(1);
3690      Chain = DAG.getStore(Chain, Value,
3691                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
3692                                       DAG.getConstant(Offset, DstVT)),
3693                           NULL, 0);
3694      BytesLeft -= 2;
3695      Offset += 2;
3696    }
3697
3698    if (BytesLeft == 1) {
3699      Value = DAG.getLoad(MVT::i8, Chain,
3700                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
3701                                      DAG.getConstant(Offset, SrcVT)),
3702                          NULL, 0);
3703      Chain = Value.getValue(1);
3704      Chain = DAG.getStore(Chain, Value,
3705                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
3706                                       DAG.getConstant(Offset, DstVT)),
3707                           NULL, 0);
3708    }
3709  }
3710
3711  return Chain;
3712}
3713
3714SDOperand
3715X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
3716  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag);
3717  SDOperand TheOp = Op.getOperand(0);
3718  SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &TheOp, 1);
3719  if (Subtarget->is64Bit()) {
3720    SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
3721    SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
3722                                         MVT::i64, Copy1.getValue(2));
3723    SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
3724                                DAG.getConstant(32, MVT::i8));
3725    SDOperand Ops[] = {
3726      DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp), Copy2.getValue(1)
3727    };
3728
3729    Tys = DAG.getVTList(MVT::i64, MVT::Other);
3730    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 2);
3731  }
3732
3733  SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
3734  SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
3735                                       MVT::i32, Copy1.getValue(2));
3736  SDOperand Ops[] = { Copy1, Copy2, Copy2.getValue(1) };
3737  Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
3738  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops, 3);
3739}
3740
3741SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
3742  SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
3743
3744  if (!Subtarget->is64Bit()) {
3745    // vastart just stores the address of the VarArgsFrameIndex slot into the
3746    // memory location argument.
3747    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
3748    return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
3749                        SV->getOffset());
3750  }
3751
3752  // __va_list_tag:
3753  //   gp_offset         (0 - 6 * 8)
3754  //   fp_offset         (48 - 48 + 8 * 16)
3755  //   overflow_arg_area (point to parameters coming in memory).
3756  //   reg_save_area
3757  SmallVector<SDOperand, 8> MemOps;
3758  SDOperand FIN = Op.getOperand(1);
3759  // Store gp_offset
3760  SDOperand Store = DAG.getStore(Op.getOperand(0),
3761                                 DAG.getConstant(VarArgsGPOffset, MVT::i32),
3762                                 FIN, SV->getValue(), SV->getOffset());
3763  MemOps.push_back(Store);
3764
3765  // Store fp_offset
3766  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
3767                    DAG.getConstant(4, getPointerTy()));
3768  Store = DAG.getStore(Op.getOperand(0),
3769                       DAG.getConstant(VarArgsFPOffset, MVT::i32),
3770                       FIN, SV->getValue(), SV->getOffset());
3771  MemOps.push_back(Store);
3772
3773  // Store ptr to overflow_arg_area
3774  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
3775                    DAG.getConstant(4, getPointerTy()));
3776  SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
3777  Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
3778                       SV->getOffset());
3779  MemOps.push_back(Store);
3780
3781  // Store ptr to reg_save_area.
3782  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
3783                    DAG.getConstant(8, getPointerTy()));
3784  SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
3785  Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
3786                       SV->getOffset());
3787  MemOps.push_back(Store);
3788  return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
3789}
3790
3791SDOperand X86TargetLowering::LowerVACOPY(SDOperand Op, SelectionDAG &DAG) {
3792  // X86-64 va_list is a struct { i32, i32, i8*, i8* }.
3793  SDOperand Chain = Op.getOperand(0);
3794  SDOperand DstPtr = Op.getOperand(1);
3795  SDOperand SrcPtr = Op.getOperand(2);
3796  SrcValueSDNode *DstSV = cast<SrcValueSDNode>(Op.getOperand(3));
3797  SrcValueSDNode *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4));
3798
3799  SrcPtr = DAG.getLoad(getPointerTy(), Chain, SrcPtr,
3800                       SrcSV->getValue(), SrcSV->getOffset());
3801  Chain = SrcPtr.getValue(1);
3802  for (unsigned i = 0; i < 3; ++i) {
3803    SDOperand Val = DAG.getLoad(MVT::i64, Chain, SrcPtr,
3804                                SrcSV->getValue(), SrcSV->getOffset());
3805    Chain = Val.getValue(1);
3806    Chain = DAG.getStore(Chain, Val, DstPtr,
3807                         DstSV->getValue(), DstSV->getOffset());
3808    if (i == 2)
3809      break;
3810    SrcPtr = DAG.getNode(ISD::ADD, getPointerTy(), SrcPtr,
3811                         DAG.getConstant(8, getPointerTy()));
3812    DstPtr = DAG.getNode(ISD::ADD, getPointerTy(), DstPtr,
3813                         DAG.getConstant(8, getPointerTy()));
3814  }
3815  return Chain;
3816}
3817
3818SDOperand
3819X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
3820  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
3821  switch (IntNo) {
3822  default: return SDOperand();    // Don't custom lower most intrinsics.
3823    // Comparison intrinsics.
3824  case Intrinsic::x86_sse_comieq_ss:
3825  case Intrinsic::x86_sse_comilt_ss:
3826  case Intrinsic::x86_sse_comile_ss:
3827  case Intrinsic::x86_sse_comigt_ss:
3828  case Intrinsic::x86_sse_comige_ss:
3829  case Intrinsic::x86_sse_comineq_ss:
3830  case Intrinsic::x86_sse_ucomieq_ss:
3831  case Intrinsic::x86_sse_ucomilt_ss:
3832  case Intrinsic::x86_sse_ucomile_ss:
3833  case Intrinsic::x86_sse_ucomigt_ss:
3834  case Intrinsic::x86_sse_ucomige_ss:
3835  case Intrinsic::x86_sse_ucomineq_ss:
3836  case Intrinsic::x86_sse2_comieq_sd:
3837  case Intrinsic::x86_sse2_comilt_sd:
3838  case Intrinsic::x86_sse2_comile_sd:
3839  case Intrinsic::x86_sse2_comigt_sd:
3840  case Intrinsic::x86_sse2_comige_sd:
3841  case Intrinsic::x86_sse2_comineq_sd:
3842  case Intrinsic::x86_sse2_ucomieq_sd:
3843  case Intrinsic::x86_sse2_ucomilt_sd:
3844  case Intrinsic::x86_sse2_ucomile_sd:
3845  case Intrinsic::x86_sse2_ucomigt_sd:
3846  case Intrinsic::x86_sse2_ucomige_sd:
3847  case Intrinsic::x86_sse2_ucomineq_sd: {
3848    unsigned Opc = 0;
3849    ISD::CondCode CC = ISD::SETCC_INVALID;
3850    switch (IntNo) {
3851    default: break;
3852    case Intrinsic::x86_sse_comieq_ss:
3853    case Intrinsic::x86_sse2_comieq_sd:
3854      Opc = X86ISD::COMI;
3855      CC = ISD::SETEQ;
3856      break;
3857    case Intrinsic::x86_sse_comilt_ss:
3858    case Intrinsic::x86_sse2_comilt_sd:
3859      Opc = X86ISD::COMI;
3860      CC = ISD::SETLT;
3861      break;
3862    case Intrinsic::x86_sse_comile_ss:
3863    case Intrinsic::x86_sse2_comile_sd:
3864      Opc = X86ISD::COMI;
3865      CC = ISD::SETLE;
3866      break;
3867    case Intrinsic::x86_sse_comigt_ss:
3868    case Intrinsic::x86_sse2_comigt_sd:
3869      Opc = X86ISD::COMI;
3870      CC = ISD::SETGT;
3871      break;
3872    case Intrinsic::x86_sse_comige_ss:
3873    case Intrinsic::x86_sse2_comige_sd:
3874      Opc = X86ISD::COMI;
3875      CC = ISD::SETGE;
3876      break;
3877    case Intrinsic::x86_sse_comineq_ss:
3878    case Intrinsic::x86_sse2_comineq_sd:
3879      Opc = X86ISD::COMI;
3880      CC = ISD::SETNE;
3881      break;
3882    case Intrinsic::x86_sse_ucomieq_ss:
3883    case Intrinsic::x86_sse2_ucomieq_sd:
3884      Opc = X86ISD::UCOMI;
3885      CC = ISD::SETEQ;
3886      break;
3887    case Intrinsic::x86_sse_ucomilt_ss:
3888    case Intrinsic::x86_sse2_ucomilt_sd:
3889      Opc = X86ISD::UCOMI;
3890      CC = ISD::SETLT;
3891      break;
3892    case Intrinsic::x86_sse_ucomile_ss:
3893    case Intrinsic::x86_sse2_ucomile_sd:
3894      Opc = X86ISD::UCOMI;
3895      CC = ISD::SETLE;
3896      break;
3897    case Intrinsic::x86_sse_ucomigt_ss:
3898    case Intrinsic::x86_sse2_ucomigt_sd:
3899      Opc = X86ISD::UCOMI;
3900      CC = ISD::SETGT;
3901      break;
3902    case Intrinsic::x86_sse_ucomige_ss:
3903    case Intrinsic::x86_sse2_ucomige_sd:
3904      Opc = X86ISD::UCOMI;
3905      CC = ISD::SETGE;
3906      break;
3907    case Intrinsic::x86_sse_ucomineq_ss:
3908    case Intrinsic::x86_sse2_ucomineq_sd:
3909      Opc = X86ISD::UCOMI;
3910      CC = ISD::SETNE;
3911      break;
3912    }
3913
3914    unsigned X86CC;
3915    SDOperand LHS = Op.getOperand(1);
3916    SDOperand RHS = Op.getOperand(2);
3917    translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
3918
3919    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3920    SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
3921    SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
3922    VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
3923    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
3924    SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
3925    return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
3926  }
3927  }
3928}
3929
3930SDOperand X86TargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) {
3931  // Depths > 0 not supported yet!
3932  if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
3933    return SDOperand();
3934
3935  // Just load the return address
3936  SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
3937  return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0);
3938}
3939
3940SDOperand X86TargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) {
3941  // Depths > 0 not supported yet!
3942  if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0)
3943    return SDOperand();
3944
3945  SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
3946  return DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
3947                     DAG.getConstant(4, getPointerTy()));
3948}
3949
3950/// LowerOperation - Provide custom lowering hooks for some operations.
3951///
3952SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
3953  switch (Op.getOpcode()) {
3954  default: assert(0 && "Should not custom lower this!");
3955  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
3956  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
3957  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3958  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
3959  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
3960  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
3961  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
3962  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
3963  case ISD::SHL_PARTS:
3964  case ISD::SRA_PARTS:
3965  case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
3966  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
3967  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
3968  case ISD::FABS:               return LowerFABS(Op, DAG);
3969  case ISD::FNEG:               return LowerFNEG(Op, DAG);
3970  case ISD::FCOPYSIGN:          return LowerFCOPYSIGN(Op, DAG);
3971  case ISD::SETCC:              return LowerSETCC(Op, DAG, DAG.getEntryNode());
3972  case ISD::SELECT:             return LowerSELECT(Op, DAG);
3973  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
3974  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
3975  case ISD::CALL:               return LowerCALL(Op, DAG);
3976  case ISD::RET:                return LowerRET(Op, DAG);
3977  case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
3978  case ISD::MEMSET:             return LowerMEMSET(Op, DAG);
3979  case ISD::MEMCPY:             return LowerMEMCPY(Op, DAG);
3980  case ISD::READCYCLECOUNTER:   return LowerREADCYCLCECOUNTER(Op, DAG);
3981  case ISD::VASTART:            return LowerVASTART(Op, DAG);
3982  case ISD::VACOPY:             return LowerVACOPY(Op, DAG);
3983  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3984  case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
3985  case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
3986  }
3987  return SDOperand();
3988}
3989
3990const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3991  switch (Opcode) {
3992  default: return NULL;
3993  case X86ISD::SHLD:               return "X86ISD::SHLD";
3994  case X86ISD::SHRD:               return "X86ISD::SHRD";
3995  case X86ISD::FAND:               return "X86ISD::FAND";
3996  case X86ISD::FOR:                return "X86ISD::FOR";
3997  case X86ISD::FXOR:               return "X86ISD::FXOR";
3998  case X86ISD::FSRL:               return "X86ISD::FSRL";
3999  case X86ISD::FILD:               return "X86ISD::FILD";
4000  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
4001  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
4002  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
4003  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
4004  case X86ISD::FLD:                return "X86ISD::FLD";
4005  case X86ISD::FST:                return "X86ISD::FST";
4006  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
4007  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
4008  case X86ISD::CALL:               return "X86ISD::CALL";
4009  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
4010  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
4011  case X86ISD::CMP:                return "X86ISD::CMP";
4012  case X86ISD::COMI:               return "X86ISD::COMI";
4013  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
4014  case X86ISD::SETCC:              return "X86ISD::SETCC";
4015  case X86ISD::CMOV:               return "X86ISD::CMOV";
4016  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
4017  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
4018  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
4019  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
4020  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
4021  case X86ISD::LOAD_UA:            return "X86ISD::LOAD_UA";
4022  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
4023  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
4024  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
4025  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
4026  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
4027  case X86ISD::FMAX:               return "X86ISD::FMAX";
4028  case X86ISD::FMIN:               return "X86ISD::FMIN";
4029  }
4030}
4031
4032/// isLegalAddressImmediate - Return true if the integer value can be used
4033/// as the offset of the target addressing mode for load / store of the
4034/// given type.
4035bool X86TargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{
4036  // X86 allows a sign-extended 32-bit immediate field.
4037  return (V > -(1LL << 32) && V < (1LL << 32)-1);
4038}
4039
4040/// isLegalAddressImmediate - Return true if the GlobalValue can be used as
4041/// the offset of the target addressing mode.
4042bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
4043  // In 64-bit mode, GV is 64-bit so it won't fit in the 32-bit displacement
4044  // field unless we are in small code model.
4045  if (Subtarget->is64Bit() &&
4046      getTargetMachine().getCodeModel() != CodeModel::Small)
4047    return false;
4048
4049  return (!Subtarget->GVRequiresExtraLoad(GV, getTargetMachine(), false));
4050}
4051
4052/// isLegalAddressScale - Return true if the integer value can be used as the
4053/// scale of the target addressing mode for load / store of the given type.
4054bool X86TargetLowering::isLegalAddressScale(int64_t S, const Type *Ty) const {
4055  switch (S) {
4056  default:
4057    return false;
4058  case 2: case 4: case 8:
4059    return true;
4060  // FIXME: These require both scale + index last and thus more expensive.
4061  // How to tell LSR to try for 2, 4, 8 first?
4062  case 3: case 5: case 9:
4063    return true;
4064  }
4065}
4066
4067/// isShuffleMaskLegal - Targets can use this to indicate that they only
4068/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4069/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4070/// are assumed to be legal.
4071bool
4072X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
4073  // Only do shuffles on 128-bit vector types for now.
4074  if (MVT::getSizeInBits(VT) == 64) return false;
4075  return (Mask.Val->getNumOperands() <= 4 ||
4076          isSplatMask(Mask.Val)  ||
4077          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
4078          X86::isUNPCKLMask(Mask.Val) ||
4079          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
4080          X86::isUNPCKHMask(Mask.Val));
4081}
4082
4083bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
4084                                               MVT::ValueType EVT,
4085                                               SelectionDAG &DAG) const {
4086  unsigned NumElts = BVOps.size();
4087  // Only do shuffles on 128-bit vector types for now.
4088  if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
4089  if (NumElts == 2) return true;
4090  if (NumElts == 4) {
4091    return (isMOVLMask(&BVOps[0], 4)  ||
4092            isCommutedMOVL(&BVOps[0], 4, true) ||
4093            isSHUFPMask(&BVOps[0], 4) ||
4094            isCommutedSHUFP(&BVOps[0], 4));
4095  }
4096  return false;
4097}
4098
4099//===----------------------------------------------------------------------===//
4100//                           X86 Scheduler Hooks
4101//===----------------------------------------------------------------------===//
4102
4103MachineBasicBlock *
4104X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
4105                                           MachineBasicBlock *BB) {
4106  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
4107  switch (MI->getOpcode()) {
4108  default: assert(false && "Unexpected instr type to insert");
4109  case X86::CMOV_FR32:
4110  case X86::CMOV_FR64:
4111  case X86::CMOV_V4F32:
4112  case X86::CMOV_V2F64:
4113  case X86::CMOV_V2I64: {
4114    // To "insert" a SELECT_CC instruction, we actually have to insert the
4115    // diamond control-flow pattern.  The incoming instruction knows the
4116    // destination vreg to set, the condition code register to branch on, the
4117    // true/false values to select between, and a branch opcode to use.
4118    const BasicBlock *LLVM_BB = BB->getBasicBlock();
4119    ilist<MachineBasicBlock>::iterator It = BB;
4120    ++It;
4121
4122    //  thisMBB:
4123    //  ...
4124    //   TrueVal = ...
4125    //   cmpTY ccX, r1, r2
4126    //   bCC copy1MBB
4127    //   fallthrough --> copy0MBB
4128    MachineBasicBlock *thisMBB = BB;
4129    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
4130    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
4131    unsigned Opc =
4132      X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
4133    BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
4134    MachineFunction *F = BB->getParent();
4135    F->getBasicBlockList().insert(It, copy0MBB);
4136    F->getBasicBlockList().insert(It, sinkMBB);
4137    // Update machine-CFG edges by first adding all successors of the current
4138    // block to the new block which will contain the Phi node for the select.
4139    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
4140        e = BB->succ_end(); i != e; ++i)
4141      sinkMBB->addSuccessor(*i);
4142    // Next, remove all successors of the current block, and add the true
4143    // and fallthrough blocks as its successors.
4144    while(!BB->succ_empty())
4145      BB->removeSuccessor(BB->succ_begin());
4146    BB->addSuccessor(copy0MBB);
4147    BB->addSuccessor(sinkMBB);
4148
4149    //  copy0MBB:
4150    //   %FalseValue = ...
4151    //   # fallthrough to sinkMBB
4152    BB = copy0MBB;
4153
4154    // Update machine-CFG edges
4155    BB->addSuccessor(sinkMBB);
4156
4157    //  sinkMBB:
4158    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4159    //  ...
4160    BB = sinkMBB;
4161    BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
4162      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
4163      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
4164
4165    delete MI;   // The pseudo instruction is gone now.
4166    return BB;
4167  }
4168
4169  case X86::FP_TO_INT16_IN_MEM:
4170  case X86::FP_TO_INT32_IN_MEM:
4171  case X86::FP_TO_INT64_IN_MEM: {
4172    // Change the floating point control register to use "round towards zero"
4173    // mode when truncating to an integer value.
4174    MachineFunction *F = BB->getParent();
4175    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
4176    addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
4177
4178    // Load the old value of the high byte of the control word...
4179    unsigned OldCW =
4180      F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
4181    addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
4182
4183    // Set the high part to be round to zero...
4184    addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
4185      .addImm(0xC7F);
4186
4187    // Reload the modified control word now...
4188    addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
4189
4190    // Restore the memory image of control word to original value
4191    addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
4192      .addReg(OldCW);
4193
4194    // Get the X86 opcode to use.
4195    unsigned Opc;
4196    switch (MI->getOpcode()) {
4197    default: assert(0 && "illegal opcode!");
4198    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
4199    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
4200    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
4201    }
4202
4203    X86AddressMode AM;
4204    MachineOperand &Op = MI->getOperand(0);
4205    if (Op.isRegister()) {
4206      AM.BaseType = X86AddressMode::RegBase;
4207      AM.Base.Reg = Op.getReg();
4208    } else {
4209      AM.BaseType = X86AddressMode::FrameIndexBase;
4210      AM.Base.FrameIndex = Op.getFrameIndex();
4211    }
4212    Op = MI->getOperand(1);
4213    if (Op.isImmediate())
4214      AM.Scale = Op.getImm();
4215    Op = MI->getOperand(2);
4216    if (Op.isImmediate())
4217      AM.IndexReg = Op.getImm();
4218    Op = MI->getOperand(3);
4219    if (Op.isGlobalAddress()) {
4220      AM.GV = Op.getGlobal();
4221    } else {
4222      AM.Disp = Op.getImm();
4223    }
4224    addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
4225                      .addReg(MI->getOperand(4).getReg());
4226
4227    // Reload the original control word now.
4228    addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
4229
4230    delete MI;   // The pseudo instruction is gone now.
4231    return BB;
4232  }
4233  }
4234}
4235
4236//===----------------------------------------------------------------------===//
4237//                           X86 Optimization Hooks
4238//===----------------------------------------------------------------------===//
4239
4240void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
4241                                                       uint64_t Mask,
4242                                                       uint64_t &KnownZero,
4243                                                       uint64_t &KnownOne,
4244                                                       unsigned Depth) const {
4245  unsigned Opc = Op.getOpcode();
4246  assert((Opc >= ISD::BUILTIN_OP_END ||
4247          Opc == ISD::INTRINSIC_WO_CHAIN ||
4248          Opc == ISD::INTRINSIC_W_CHAIN ||
4249          Opc == ISD::INTRINSIC_VOID) &&
4250         "Should use MaskedValueIsZero if you don't know whether Op"
4251         " is a target node!");
4252
4253  KnownZero = KnownOne = 0;   // Don't know anything.
4254  switch (Opc) {
4255  default: break;
4256  case X86ISD::SETCC:
4257    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
4258    break;
4259  }
4260}
4261
4262/// getShuffleScalarElt - Returns the scalar element that will make up the ith
4263/// element of the result of the vector shuffle.
4264static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
4265  MVT::ValueType VT = N->getValueType(0);
4266  SDOperand PermMask = N->getOperand(2);
4267  unsigned NumElems = PermMask.getNumOperands();
4268  SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
4269  i %= NumElems;
4270  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
4271    return (i == 0)
4272      ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4273  } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
4274    SDOperand Idx = PermMask.getOperand(i);
4275    if (Idx.getOpcode() == ISD::UNDEF)
4276      return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
4277    return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
4278  }
4279  return SDOperand();
4280}
4281
4282/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
4283/// node is a GlobalAddress + an offset.
4284static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
4285  unsigned Opc = N->getOpcode();
4286  if (Opc == X86ISD::Wrapper) {
4287    if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
4288      GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
4289      return true;
4290    }
4291  } else if (Opc == ISD::ADD) {
4292    SDOperand N1 = N->getOperand(0);
4293    SDOperand N2 = N->getOperand(1);
4294    if (isGAPlusOffset(N1.Val, GA, Offset)) {
4295      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
4296      if (V) {
4297        Offset += V->getSignExtended();
4298        return true;
4299      }
4300    } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
4301      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
4302      if (V) {
4303        Offset += V->getSignExtended();
4304        return true;
4305      }
4306    }
4307  }
4308  return false;
4309}
4310
4311/// isConsecutiveLoad - Returns true if N is loading from an address of Base
4312/// + Dist * Size.
4313static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
4314                              MachineFrameInfo *MFI) {
4315  if (N->getOperand(0).Val != Base->getOperand(0).Val)
4316    return false;
4317
4318  SDOperand Loc = N->getOperand(1);
4319  SDOperand BaseLoc = Base->getOperand(1);
4320  if (Loc.getOpcode() == ISD::FrameIndex) {
4321    if (BaseLoc.getOpcode() != ISD::FrameIndex)
4322      return false;
4323    int FI  = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
4324    int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
4325    int FS  = MFI->getObjectSize(FI);
4326    int BFS = MFI->getObjectSize(BFI);
4327    if (FS != BFS || FS != Size) return false;
4328    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
4329  } else {
4330    GlobalValue *GV1 = NULL;
4331    GlobalValue *GV2 = NULL;
4332    int64_t Offset1 = 0;
4333    int64_t Offset2 = 0;
4334    bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
4335    bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
4336    if (isGA1 && isGA2 && GV1 == GV2)
4337      return Offset1 == (Offset2 + Dist*Size);
4338  }
4339
4340  return false;
4341}
4342
4343static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
4344                              const X86Subtarget *Subtarget) {
4345  GlobalValue *GV;
4346  int64_t Offset;
4347  if (isGAPlusOffset(Base, GV, Offset))
4348    return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
4349  else {
4350    assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
4351    int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
4352    if (BFI < 0)
4353      // Fixed objects do not specify alignment, however the offsets are known.
4354      return ((Subtarget->getStackAlignment() % 16) == 0 &&
4355              (MFI->getObjectOffset(BFI) % 16) == 0);
4356    else
4357      return MFI->getObjectAlignment(BFI) >= 16;
4358  }
4359  return false;
4360}
4361
4362
4363/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
4364/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
4365/// if the load addresses are consecutive, non-overlapping, and in the right
4366/// order.
4367static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
4368                                       const X86Subtarget *Subtarget) {
4369  MachineFunction &MF = DAG.getMachineFunction();
4370  MachineFrameInfo *MFI = MF.getFrameInfo();
4371  MVT::ValueType VT = N->getValueType(0);
4372  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
4373  SDOperand PermMask = N->getOperand(2);
4374  int NumElems = (int)PermMask.getNumOperands();
4375  SDNode *Base = NULL;
4376  for (int i = 0; i < NumElems; ++i) {
4377    SDOperand Idx = PermMask.getOperand(i);
4378    if (Idx.getOpcode() == ISD::UNDEF) {
4379      if (!Base) return SDOperand();
4380    } else {
4381      SDOperand Arg =
4382        getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
4383      if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
4384        return SDOperand();
4385      if (!Base)
4386        Base = Arg.Val;
4387      else if (!isConsecutiveLoad(Arg.Val, Base,
4388                                  i, MVT::getSizeInBits(EVT)/8,MFI))
4389        return SDOperand();
4390    }
4391  }
4392
4393  bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
4394  if (isAlign16) {
4395    LoadSDNode *LD = cast<LoadSDNode>(Base);
4396    return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
4397                       LD->getSrcValueOffset());
4398  } else {
4399    // Just use movups, it's shorter.
4400    SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
4401    SmallVector<SDOperand, 3> Ops;
4402    Ops.push_back(Base->getOperand(0));
4403    Ops.push_back(Base->getOperand(1));
4404    Ops.push_back(Base->getOperand(2));
4405    return DAG.getNode(ISD::BIT_CONVERT, VT,
4406                       DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
4407  }
4408}
4409
4410/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
4411static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
4412                                      const X86Subtarget *Subtarget) {
4413  SDOperand Cond = N->getOperand(0);
4414
4415  // If we have SSE[12] support, try to form min/max nodes.
4416  if (Subtarget->hasSSE2() &&
4417      (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) {
4418    if (Cond.getOpcode() == ISD::SETCC) {
4419      // Get the LHS/RHS of the select.
4420      SDOperand LHS = N->getOperand(1);
4421      SDOperand RHS = N->getOperand(2);
4422      ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
4423
4424      unsigned Opcode = 0;
4425      if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
4426        switch (CC) {
4427        default: break;
4428        case ISD::SETOLE: // (X <= Y) ? X : Y -> min
4429        case ISD::SETULE:
4430        case ISD::SETLE:
4431          if (!UnsafeFPMath) break;
4432          // FALL THROUGH.
4433        case ISD::SETOLT:  // (X olt/lt Y) ? X : Y -> min
4434        case ISD::SETLT:
4435          Opcode = X86ISD::FMIN;
4436          break;
4437
4438        case ISD::SETOGT: // (X > Y) ? X : Y -> max
4439        case ISD::SETUGT:
4440        case ISD::SETGT:
4441          if (!UnsafeFPMath) break;
4442          // FALL THROUGH.
4443        case ISD::SETUGE:  // (X uge/ge Y) ? X : Y -> max
4444        case ISD::SETGE:
4445          Opcode = X86ISD::FMAX;
4446          break;
4447        }
4448      } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
4449        switch (CC) {
4450        default: break;
4451        case ISD::SETOGT: // (X > Y) ? Y : X -> min
4452        case ISD::SETUGT:
4453        case ISD::SETGT:
4454          if (!UnsafeFPMath) break;
4455          // FALL THROUGH.
4456        case ISD::SETUGE:  // (X uge/ge Y) ? Y : X -> min
4457        case ISD::SETGE:
4458          Opcode = X86ISD::FMIN;
4459          break;
4460
4461        case ISD::SETOLE:   // (X <= Y) ? Y : X -> max
4462        case ISD::SETULE:
4463        case ISD::SETLE:
4464          if (!UnsafeFPMath) break;
4465          // FALL THROUGH.
4466        case ISD::SETOLT:   // (X olt/lt Y) ? Y : X -> max
4467        case ISD::SETLT:
4468          Opcode = X86ISD::FMAX;
4469          break;
4470        }
4471      }
4472
4473      if (Opcode)
4474        return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
4475    }
4476
4477  }
4478
4479  return SDOperand();
4480}
4481
4482
4483SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
4484                                               DAGCombinerInfo &DCI) const {
4485  SelectionDAG &DAG = DCI.DAG;
4486  switch (N->getOpcode()) {
4487  default: break;
4488  case ISD::VECTOR_SHUFFLE:
4489    return PerformShuffleCombine(N, DAG, Subtarget);
4490  case ISD::SELECT:
4491    return PerformSELECTCombine(N, DAG, Subtarget);
4492  }
4493
4494  return SDOperand();
4495}
4496
4497//===----------------------------------------------------------------------===//
4498//                           X86 Inline Assembly Support
4499//===----------------------------------------------------------------------===//
4500
4501/// getConstraintType - Given a constraint letter, return the type of
4502/// constraint it is for this target.
4503X86TargetLowering::ConstraintType
4504X86TargetLowering::getConstraintType(char ConstraintLetter) const {
4505  switch (ConstraintLetter) {
4506  case 'A':
4507  case 'r':
4508  case 'R':
4509  case 'l':
4510  case 'q':
4511  case 'Q':
4512  case 'x':
4513  case 'Y':
4514    return C_RegisterClass;
4515  default: return TargetLowering::getConstraintType(ConstraintLetter);
4516  }
4517}
4518
4519/// isOperandValidForConstraint - Return the specified operand (possibly
4520/// modified) if the specified SDOperand is valid for the specified target
4521/// constraint letter, otherwise return null.
4522SDOperand X86TargetLowering::
4523isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) {
4524  switch (Constraint) {
4525  default: break;
4526  case 'i':
4527    // Literal immediates are always ok.
4528    if (isa<ConstantSDNode>(Op)) return Op;
4529
4530    // If we are in non-pic codegen mode, we allow the address of a global to
4531    // be used with 'i'.
4532    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
4533      if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
4534        return SDOperand(0, 0);
4535
4536      if (GA->getOpcode() != ISD::TargetGlobalAddress)
4537        Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
4538                                        GA->getOffset());
4539      return Op;
4540    }
4541
4542    // Otherwise, not valid for this mode.
4543    return SDOperand(0, 0);
4544  }
4545  return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG);
4546}
4547
4548
4549std::vector<unsigned> X86TargetLowering::
4550getRegClassForInlineAsmConstraint(const std::string &Constraint,
4551                                  MVT::ValueType VT) const {
4552  if (Constraint.size() == 1) {
4553    // FIXME: not handling fp-stack yet!
4554    // FIXME: not handling MMX registers yet ('y' constraint).
4555    switch (Constraint[0]) {      // GCC X86 Constraint Letters
4556    default: break;  // Unknown constraint letter
4557    case 'A':   // EAX/EDX
4558      if (VT == MVT::i32 || VT == MVT::i64)
4559        return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
4560      break;
4561    case 'r':   // GENERAL_REGS
4562    case 'R':   // LEGACY_REGS
4563      if (VT == MVT::i64 && Subtarget->is64Bit())
4564        return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
4565                                     X86::RSI, X86::RDI, X86::RBP, X86::RSP,
4566                                     X86::R8,  X86::R9,  X86::R10, X86::R11,
4567                                     X86::R12, X86::R13, X86::R14, X86::R15, 0);
4568      if (VT == MVT::i32)
4569        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4570                                     X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
4571      else if (VT == MVT::i16)
4572        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4573                                     X86::SI, X86::DI, X86::BP, X86::SP, 0);
4574      else if (VT == MVT::i8)
4575        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
4576      break;
4577    case 'l':   // INDEX_REGS
4578      if (VT == MVT::i32)
4579        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
4580                                     X86::ESI, X86::EDI, X86::EBP, 0);
4581      else if (VT == MVT::i16)
4582        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
4583                                     X86::SI, X86::DI, X86::BP, 0);
4584      else if (VT == MVT::i8)
4585        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4586      break;
4587    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
4588    case 'Q':   // Q_REGS
4589      if (VT == MVT::i32)
4590        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
4591      else if (VT == MVT::i16)
4592        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
4593      else if (VT == MVT::i8)
4594        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
4595        break;
4596    case 'x':   // SSE_REGS if SSE1 allowed
4597      if (Subtarget->hasSSE1())
4598        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4599                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4600                                     0);
4601      return std::vector<unsigned>();
4602    case 'Y':   // SSE_REGS if SSE2 allowed
4603      if (Subtarget->hasSSE2())
4604        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4605                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4606                                     0);
4607      return std::vector<unsigned>();
4608    }
4609  }
4610
4611  return std::vector<unsigned>();
4612}
4613
4614std::pair<unsigned, const TargetRegisterClass*>
4615X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
4616                                                MVT::ValueType VT) const {
4617  // Use the default implementation in TargetLowering to convert the register
4618  // constraint into a member of a register class.
4619  std::pair<unsigned, const TargetRegisterClass*> Res;
4620  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
4621
4622  // Not found as a standard register?
4623  if (Res.second == 0) {
4624    // GCC calls "st(0)" just plain "st".
4625    if (StringsEqualNoCase("{st}", Constraint)) {
4626      Res.first = X86::ST0;
4627      Res.second = X86::RSTRegisterClass;
4628    }
4629
4630    return Res;
4631  }
4632
4633  // Otherwise, check to see if this is a register class of the wrong value
4634  // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
4635  // turn into {ax},{dx}.
4636  if (Res.second->hasType(VT))
4637    return Res;   // Correct type already, nothing to do.
4638
4639  // All of the single-register GCC register classes map their values onto
4640  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
4641  // really want an 8-bit or 32-bit register, map to the appropriate register
4642  // class and return the appropriate register.
4643  if (Res.second != X86::GR16RegisterClass)
4644    return Res;
4645
4646  if (VT == MVT::i8) {
4647    unsigned DestReg = 0;
4648    switch (Res.first) {
4649    default: break;
4650    case X86::AX: DestReg = X86::AL; break;
4651    case X86::DX: DestReg = X86::DL; break;
4652    case X86::CX: DestReg = X86::CL; break;
4653    case X86::BX: DestReg = X86::BL; break;
4654    }
4655    if (DestReg) {
4656      Res.first = DestReg;
4657      Res.second = Res.second = X86::GR8RegisterClass;
4658    }
4659  } else if (VT == MVT::i32) {
4660    unsigned DestReg = 0;
4661    switch (Res.first) {
4662    default: break;
4663    case X86::AX: DestReg = X86::EAX; break;
4664    case X86::DX: DestReg = X86::EDX; break;
4665    case X86::CX: DestReg = X86::ECX; break;
4666    case X86::BX: DestReg = X86::EBX; break;
4667    case X86::SI: DestReg = X86::ESI; break;
4668    case X86::DI: DestReg = X86::EDI; break;
4669    case X86::BP: DestReg = X86::EBP; break;
4670    case X86::SP: DestReg = X86::ESP; break;
4671    }
4672    if (DestReg) {
4673      Res.first = DestReg;
4674      Res.second = Res.second = X86::GR32RegisterClass;
4675    }
4676  } else if (VT == MVT::i64) {
4677    unsigned DestReg = 0;
4678    switch (Res.first) {
4679    default: break;
4680    case X86::AX: DestReg = X86::RAX; break;
4681    case X86::DX: DestReg = X86::RDX; break;
4682    case X86::CX: DestReg = X86::RCX; break;
4683    case X86::BX: DestReg = X86::RBX; break;
4684    case X86::SI: DestReg = X86::RSI; break;
4685    case X86::DI: DestReg = X86::RDI; break;
4686    case X86::BP: DestReg = X86::RBP; break;
4687    case X86::SP: DestReg = X86::RSP; break;
4688    }
4689    if (DestReg) {
4690      Res.first = DestReg;
4691      Res.second = Res.second = X86::GR64RegisterClass;
4692    }
4693  }
4694
4695  return Res;
4696}
4697