X86ISelLowering.cpp revision a083af14c8130e7cb1f4812b6d6fdcf2cc21882f
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrBuilder.h"
17#include "X86ISelLowering.h"
18#include "X86TargetMachine.h"
19#include "llvm/CallingConv.h"
20#include "llvm/Constants.h"
21#include "llvm/Function.h"
22#include "llvm/Intrinsics.h"
23#include "llvm/ADT/VectorExtras.h"
24#include "llvm/Analysis/ScalarEvolutionExpressions.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/SSARegMap.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32using namespace llvm;
33
34// FIXME: temporary.
35#include "llvm/Support/CommandLine.h"
36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
37                                  cl::desc("Enable fastcc on X86"));
38
39X86TargetLowering::X86TargetLowering(TargetMachine &TM)
40  : TargetLowering(TM) {
41  Subtarget = &TM.getSubtarget<X86Subtarget>();
42  X86ScalarSSE = Subtarget->hasSSE2();
43
44  // Set up the TargetLowering object.
45
46  // X86 is weird, it always uses i8 for shift amounts and setcc results.
47  setShiftAmountType(MVT::i8);
48  setSetCCResultType(MVT::i8);
49  setSetCCResultContents(ZeroOrOneSetCCResult);
50  setSchedulingPreference(SchedulingForRegPressure);
51  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
52  setStackPointerRegisterToSaveRestore(X86::ESP);
53
54  if (!Subtarget->isTargetDarwin())
55    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
56    setUseUnderscoreSetJmpLongJmp(true);
57
58  // Add legal addressing mode scale values.
59  addLegalAddressScale(8);
60  addLegalAddressScale(4);
61  addLegalAddressScale(2);
62  // Enter the ones which require both scale + index last. These are more
63  // expensive.
64  addLegalAddressScale(9);
65  addLegalAddressScale(5);
66  addLegalAddressScale(3);
67
68  // Set up the register classes.
69  addRegisterClass(MVT::i8, X86::R8RegisterClass);
70  addRegisterClass(MVT::i16, X86::R16RegisterClass);
71  addRegisterClass(MVT::i32, X86::R32RegisterClass);
72
73  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
74  // operation.
75  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
76  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
77  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
78
79  if (X86ScalarSSE)
80    // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead.
81    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Expand);
82  else
83    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
84
85  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
86  // this operation.
87  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
88  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
89  // SSE has no i16 to fp conversion, only i32
90  if (X86ScalarSSE)
91    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
92  else {
93    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
94    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
95  }
96
97  // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64
98  // isn't legal.
99  setOperationAction(ISD::SINT_TO_FP       , MVT::i64  , Custom);
100  setOperationAction(ISD::FP_TO_SINT       , MVT::i64  , Custom);
101
102  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
103  // this operation.
104  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
105  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
106
107  if (X86ScalarSSE) {
108    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
109  } else {
110    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
111    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
112  }
113
114  // Handle FP_TO_UINT by promoting the destination to a larger signed
115  // conversion.
116  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
117  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
118  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
119
120  if (X86ScalarSSE && !Subtarget->hasSSE3())
121    // Expand FP_TO_UINT into a select.
122    // FIXME: We would like to use a Custom expander here eventually to do
123    // the optimal thing for SSE vs. the default expansion in the legalizer.
124    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Expand);
125  else
126    // With SSE3 we can use fisttpll to convert to a signed i64.
127    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
128
129  setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
130  setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
131
132  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
133  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
134  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
135  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
136  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
137  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
138  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
139  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
140  setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
141  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
142  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
143  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
144  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
145  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
146  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
147  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
148  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
149  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
150  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
151  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
152  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
153
154  // These should be promoted to a larger select which is supported.
155  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
156  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
157
158  // X86 wants to expand cmov itself.
159  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
160  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
161  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
162  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
163  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
164  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
165  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
166  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
167  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
168  // X86 ret instruction may pop stack.
169  setOperationAction(ISD::RET             , MVT::Other, Custom);
170  // Darwin ABI issue.
171  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
172  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
173  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
174  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
175  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
176  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
177  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
178  // X86 wants to expand memset / memcpy itself.
179  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
180  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
181
182  // We don't have line number support yet.
183  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
184  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
185  // FIXME - use subtarget debug flags
186  if (!Subtarget->isTargetDarwin())
187    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
188
189  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
190  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
191
192  // Use the default implementation.
193  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
194  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
195  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
196  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
197  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
198  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
199
200  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
202
203  if (X86ScalarSSE) {
204    // Set up the FP register classes.
205    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
206    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
207
208    // SSE has no load+extend ops
209    setOperationAction(ISD::EXTLOAD,  MVT::f32, Expand);
210    setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand);
211
212    // Use ANDPD to simulate FABS.
213    setOperationAction(ISD::FABS , MVT::f64, Custom);
214    setOperationAction(ISD::FABS , MVT::f32, Custom);
215
216    // Use XORP to simulate FNEG.
217    setOperationAction(ISD::FNEG , MVT::f64, Custom);
218    setOperationAction(ISD::FNEG , MVT::f32, Custom);
219
220    // We don't support sin/cos/fmod
221    setOperationAction(ISD::FSIN , MVT::f64, Expand);
222    setOperationAction(ISD::FCOS , MVT::f64, Expand);
223    setOperationAction(ISD::FREM , MVT::f64, Expand);
224    setOperationAction(ISD::FSIN , MVT::f32, Expand);
225    setOperationAction(ISD::FCOS , MVT::f32, Expand);
226    setOperationAction(ISD::FREM , MVT::f32, Expand);
227
228    // Expand FP immediates into loads from the stack, except for the special
229    // cases we handle.
230    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
231    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
232    addLegalFPImmediate(+0.0); // xorps / xorpd
233  } else {
234    // Set up the FP register classes.
235    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
236
237    setOperationAction(ISD::UNDEF, MVT::f64, Expand);
238
239    if (!UnsafeFPMath) {
240      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
241      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
242    }
243
244    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
245    addLegalFPImmediate(+0.0); // FLD0
246    addLegalFPImmediate(+1.0); // FLD1
247    addLegalFPImmediate(-0.0); // FLD0/FCHS
248    addLegalFPImmediate(-1.0); // FLD1/FCHS
249  }
250
251  // First set operation action for all vector types to expand. Then we
252  // will selectively turn on ones that can be effectively codegen'd.
253  for (unsigned VT = (unsigned)MVT::Vector + 1;
254       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
255    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
256    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
257    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
258    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
259    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
260    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
261    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
262  }
263
264  if (Subtarget->hasMMX()) {
265    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
266    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
267    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
268
269    // FIXME: add MMX packed arithmetics
270    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
271    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
272    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
273  }
274
275  if (Subtarget->hasSSE1()) {
276    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
277
278    setOperationAction(ISD::AND,                MVT::v4f32, Legal);
279    setOperationAction(ISD::OR,                 MVT::v4f32, Legal);
280    setOperationAction(ISD::XOR,                MVT::v4f32, Legal);
281    setOperationAction(ISD::ADD,                MVT::v4f32, Legal);
282    setOperationAction(ISD::SUB,                MVT::v4f32, Legal);
283    setOperationAction(ISD::MUL,                MVT::v4f32, Legal);
284    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
285    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
286    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
287    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
288    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
289  }
290
291  if (Subtarget->hasSSE2()) {
292    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
293    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
294    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
295    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
296    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
297
298    setOperationAction(ISD::ADD,                MVT::v2f64, Legal);
299    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
300    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
301    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
302    setOperationAction(ISD::SUB,                MVT::v2f64, Legal);
303    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
304    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
305    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
306    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
307    setOperationAction(ISD::MUL,                MVT::v2f64, Legal);
308
309    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
310    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
311    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
312    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
313    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
314    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
315
316    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
317    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
318      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
319      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
320      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
321    }
322    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
323    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
324    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
325    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
326    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
327    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
328
329    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
330    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
331      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
332      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
333      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
334      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
335      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
336      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
337      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
338      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
339      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
340      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
341    }
342
343    // Custom lower v2i64 and v2f64 selects.
344    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
345    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
346    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
347    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
348  }
349
350  // We want to custom lower some of our intrinsics.
351  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
352
353  computeRegisterProperties();
354
355  // FIXME: These should be based on subtarget info. Plus, the values should
356  // be smaller when we are in optimizing for size mode.
357  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
358  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
359  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
360  allowUnalignedMemoryAccesses = true; // x86 supports it!
361}
362
363std::vector<SDOperand>
364X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
365  if (F.getCallingConv() == CallingConv::Fast && EnableFastCC)
366    return LowerFastCCArguments(F, DAG);
367  return LowerCCCArguments(F, DAG);
368}
369
370std::pair<SDOperand, SDOperand>
371X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
372                               bool isVarArg, unsigned CallingConv,
373                               bool isTailCall,
374                               SDOperand Callee, ArgListTy &Args,
375                               SelectionDAG &DAG) {
376  assert((!isVarArg || CallingConv == CallingConv::C) &&
377         "Only C takes varargs!");
378
379  // If the callee is a GlobalAddress node (quite common, every direct call is)
380  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
381  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
382    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
383  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
384    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
385
386  if (CallingConv == CallingConv::Fast && EnableFastCC)
387    return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG);
388  return  LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG);
389}
390
391//===----------------------------------------------------------------------===//
392//                    C Calling Convention implementation
393//===----------------------------------------------------------------------===//
394
395std::vector<SDOperand>
396X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) {
397  std::vector<SDOperand> ArgValues;
398
399  MachineFunction &MF = DAG.getMachineFunction();
400  MachineFrameInfo *MFI = MF.getFrameInfo();
401
402  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
403  // the stack frame looks like this:
404  //
405  // [ESP] -- return address
406  // [ESP + 4] -- first argument (leftmost lexically)
407  // [ESP + 8] -- second argument, if first argument is four bytes in size
408  //    ...
409  //
410  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
411  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
412    MVT::ValueType ObjectVT = getValueType(I->getType());
413    unsigned ArgIncrement = 4;
414    unsigned ObjSize;
415    switch (ObjectVT) {
416    default: assert(0 && "Unhandled argument type!");
417    case MVT::i1:
418    case MVT::i8:  ObjSize = 1;                break;
419    case MVT::i16: ObjSize = 2;                break;
420    case MVT::i32: ObjSize = 4;                break;
421    case MVT::i64: ObjSize = ArgIncrement = 8; break;
422    case MVT::f32: ObjSize = 4;                break;
423    case MVT::f64: ObjSize = ArgIncrement = 8; break;
424    }
425    // Create the frame index object for this incoming parameter...
426    int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
427
428    // Create the SelectionDAG nodes corresponding to a load from this parameter
429    SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
430
431    // Don't codegen dead arguments.  FIXME: remove this check when we can nuke
432    // dead loads.
433    SDOperand ArgValue;
434    if (!I->use_empty())
435      ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
436                             DAG.getSrcValue(NULL));
437    else {
438      if (MVT::isInteger(ObjectVT))
439        ArgValue = DAG.getConstant(0, ObjectVT);
440      else
441        ArgValue = DAG.getConstantFP(0, ObjectVT);
442    }
443    ArgValues.push_back(ArgValue);
444
445    ArgOffset += ArgIncrement;   // Move on to the next argument...
446  }
447
448  // If the function takes variable number of arguments, make a frame index for
449  // the start of the first vararg value... for expansion of llvm.va_start.
450  if (F.isVarArg())
451    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
452  ReturnAddrIndex = 0;     // No return address slot generated yet.
453  BytesToPopOnReturn = 0;  // Callee pops nothing.
454  BytesCallerReserves = ArgOffset;
455  return ArgValues;
456}
457
458std::pair<SDOperand, SDOperand>
459X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy,
460                                  bool isVarArg, bool isTailCall,
461                                  SDOperand Callee, ArgListTy &Args,
462                                  SelectionDAG &DAG) {
463  // Count how many bytes are to be pushed on the stack.
464  unsigned NumBytes = 0;
465
466  if (Args.empty()) {
467    // Save zero bytes.
468    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy()));
469  } else {
470    for (unsigned i = 0, e = Args.size(); i != e; ++i)
471      switch (getValueType(Args[i].second)) {
472      default: assert(0 && "Unknown value type!");
473      case MVT::i1:
474      case MVT::i8:
475      case MVT::i16:
476      case MVT::i32:
477      case MVT::f32:
478        NumBytes += 4;
479        break;
480      case MVT::i64:
481      case MVT::f64:
482        NumBytes += 8;
483        break;
484      }
485
486    Chain = DAG.getCALLSEQ_START(Chain,
487                                 DAG.getConstant(NumBytes, getPointerTy()));
488
489    // Arguments go on the stack in reverse order, as specified by the ABI.
490    unsigned ArgOffset = 0;
491    SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
492    std::vector<SDOperand> Stores;
493
494    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
495      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
496      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
497
498      switch (getValueType(Args[i].second)) {
499      default: assert(0 && "Unexpected ValueType for argument!");
500      case MVT::i1:
501      case MVT::i8:
502      case MVT::i16:
503        // Promote the integer to 32 bits.  If the input type is signed use a
504        // sign extend, otherwise use a zero extend.
505        if (Args[i].second->isSigned())
506          Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
507        else
508          Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
509
510        // FALL THROUGH
511      case MVT::i32:
512      case MVT::f32:
513        Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
514                                     Args[i].first, PtrOff,
515                                     DAG.getSrcValue(NULL)));
516        ArgOffset += 4;
517        break;
518      case MVT::i64:
519      case MVT::f64:
520        Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
521                                     Args[i].first, PtrOff,
522                                     DAG.getSrcValue(NULL)));
523        ArgOffset += 8;
524        break;
525      }
526    }
527    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
528  }
529
530  std::vector<MVT::ValueType> RetVals;
531  MVT::ValueType RetTyVT = getValueType(RetTy);
532  RetVals.push_back(MVT::Other);
533
534  // The result values produced have to be legal.  Promote the result.
535  switch (RetTyVT) {
536  case MVT::isVoid: break;
537  default:
538    RetVals.push_back(RetTyVT);
539    break;
540  case MVT::i1:
541  case MVT::i8:
542  case MVT::i16:
543    RetVals.push_back(MVT::i32);
544    break;
545  case MVT::f32:
546    if (X86ScalarSSE)
547      RetVals.push_back(MVT::f32);
548    else
549      RetVals.push_back(MVT::f64);
550    break;
551  case MVT::i64:
552    RetVals.push_back(MVT::i32);
553    RetVals.push_back(MVT::i32);
554    break;
555  }
556
557  std::vector<MVT::ValueType> NodeTys;
558  NodeTys.push_back(MVT::Other);   // Returns a chain
559  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
560  std::vector<SDOperand> Ops;
561  Ops.push_back(Chain);
562  Ops.push_back(Callee);
563
564  // FIXME: Do not generate X86ISD::TAILCALL for now.
565  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops);
566  SDOperand InFlag = Chain.getValue(1);
567
568  NodeTys.clear();
569  NodeTys.push_back(MVT::Other);   // Returns a chain
570  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
571  Ops.clear();
572  Ops.push_back(Chain);
573  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
574  Ops.push_back(DAG.getConstant(0, getPointerTy()));
575  Ops.push_back(InFlag);
576  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
577  InFlag = Chain.getValue(1);
578
579  SDOperand RetVal;
580  if (RetTyVT != MVT::isVoid) {
581    switch (RetTyVT) {
582    default: assert(0 && "Unknown value type to return!");
583    case MVT::i1:
584    case MVT::i8:
585      RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
586      Chain = RetVal.getValue(1);
587      if (RetTyVT == MVT::i1)
588        RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
589      break;
590    case MVT::i16:
591      RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
592      Chain = RetVal.getValue(1);
593      break;
594    case MVT::i32:
595      RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
596      Chain = RetVal.getValue(1);
597      break;
598    case MVT::i64: {
599      SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
600      SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32,
601                                        Lo.getValue(2));
602      RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
603      Chain = Hi.getValue(1);
604      break;
605    }
606    case MVT::f32:
607    case MVT::f64: {
608      std::vector<MVT::ValueType> Tys;
609      Tys.push_back(MVT::f64);
610      Tys.push_back(MVT::Other);
611      Tys.push_back(MVT::Flag);
612      std::vector<SDOperand> Ops;
613      Ops.push_back(Chain);
614      Ops.push_back(InFlag);
615      RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
616      Chain  = RetVal.getValue(1);
617      InFlag = RetVal.getValue(2);
618      if (X86ScalarSSE) {
619        // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
620        // shouldn't be necessary except that RFP cannot be live across
621        // multiple blocks. When stackifier is fixed, they can be uncoupled.
622        MachineFunction &MF = DAG.getMachineFunction();
623        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
624        SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
625        Tys.clear();
626        Tys.push_back(MVT::Other);
627        Ops.clear();
628        Ops.push_back(Chain);
629        Ops.push_back(RetVal);
630        Ops.push_back(StackSlot);
631        Ops.push_back(DAG.getValueType(RetTyVT));
632        Ops.push_back(InFlag);
633        Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
634        RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
635                             DAG.getSrcValue(NULL));
636        Chain = RetVal.getValue(1);
637      }
638
639      if (RetTyVT == MVT::f32 && !X86ScalarSSE)
640        // FIXME: we would really like to remember that this FP_ROUND
641        // operation is okay to eliminate if we allow excess FP precision.
642        RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
643      break;
644    }
645    }
646  }
647
648  return std::make_pair(RetVal, Chain);
649}
650
651//===----------------------------------------------------------------------===//
652//                    Fast Calling Convention implementation
653//===----------------------------------------------------------------------===//
654//
655// The X86 'fast' calling convention passes up to two integer arguments in
656// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
657// and requires that the callee pop its arguments off the stack (allowing proper
658// tail calls), and has the same return value conventions as C calling convs.
659//
660// This calling convention always arranges for the callee pop value to be 8n+4
661// bytes, which is needed for tail recursion elimination and stack alignment
662// reasons.
663//
664// Note that this can be enhanced in the future to pass fp vals in registers
665// (when we have a global fp allocator) and do other tricks.
666//
667
668/// AddLiveIn - This helper function adds the specified physical register to the
669/// MachineFunction as a live in value.  It also creates a corresponding virtual
670/// register for it.
671static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
672                          TargetRegisterClass *RC) {
673  assert(RC->contains(PReg) && "Not the correct regclass!");
674  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
675  MF.addLiveIn(PReg, VReg);
676  return VReg;
677}
678
679// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments
680// to pass in registers.  0 is none, 1 is is "use EAX", 2 is "use EAX and
681// EDX".  Anything more is illegal.
682//
683// FIXME: The linscan register allocator currently has problem with
684// coalescing.  At the time of this writing, whenever it decides to coalesce
685// a physreg with a virtreg, this increases the size of the physreg's live
686// range, and the live range cannot ever be reduced.  This causes problems if
687// too many physregs are coaleced with virtregs, which can cause the register
688// allocator to wedge itself.
689//
690// This code triggers this problem more often if we pass args in registers,
691// so disable it until this is fixed.
692//
693// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings
694// about code being dead.
695//
696static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0;
697
698
699std::vector<SDOperand>
700X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) {
701  std::vector<SDOperand> ArgValues;
702
703  MachineFunction &MF = DAG.getMachineFunction();
704  MachineFrameInfo *MFI = MF.getFrameInfo();
705
706  // Add DAG nodes to load the arguments...  On entry to a function the stack
707  // frame looks like this:
708  //
709  // [ESP] -- return address
710  // [ESP + 4] -- first nonreg argument (leftmost lexically)
711  // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size
712  //    ...
713  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
714
715  // Keep track of the number of integer regs passed so far.  This can be either
716  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
717  // used).
718  unsigned NumIntRegs = 0;
719
720  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
721    MVT::ValueType ObjectVT = getValueType(I->getType());
722    unsigned ArgIncrement = 4;
723    unsigned ObjSize = 0;
724    SDOperand ArgValue;
725
726    switch (ObjectVT) {
727    default: assert(0 && "Unhandled argument type!");
728    case MVT::i1:
729    case MVT::i8:
730      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
731        if (!I->use_empty()) {
732          unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
733                                    X86::R8RegisterClass);
734          ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8);
735          DAG.setRoot(ArgValue.getValue(1));
736          if (ObjectVT == MVT::i1)
737            // FIXME: Should insert a assertzext here.
738            ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue);
739        }
740        ++NumIntRegs;
741        break;
742      }
743
744      ObjSize = 1;
745      break;
746    case MVT::i16:
747      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
748        if (!I->use_empty()) {
749          unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
750                                    X86::R16RegisterClass);
751          ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16);
752          DAG.setRoot(ArgValue.getValue(1));
753        }
754        ++NumIntRegs;
755        break;
756      }
757      ObjSize = 2;
758      break;
759    case MVT::i32:
760      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
761        if (!I->use_empty()) {
762          unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
763                                    X86::R32RegisterClass);
764          ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
765          DAG.setRoot(ArgValue.getValue(1));
766        }
767        ++NumIntRegs;
768        break;
769      }
770      ObjSize = 4;
771      break;
772    case MVT::i64:
773      if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
774        if (!I->use_empty()) {
775          unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass);
776          unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
777
778          SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32);
779          SDOperand Hi  = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32);
780          DAG.setRoot(Hi.getValue(1));
781
782          ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi);
783        }
784        NumIntRegs += 2;
785        break;
786      } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
787        if (!I->use_empty()) {
788          unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
789          SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32);
790          DAG.setRoot(Low.getValue(1));
791
792          // Load the high part from memory.
793          // Create the frame index object for this incoming parameter...
794          int FI = MFI->CreateFixedObject(4, ArgOffset);
795          SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
796          SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
797                                     DAG.getSrcValue(NULL));
798          ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi);
799        }
800        ArgOffset += 4;
801        NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS;
802        break;
803      }
804      ObjSize = ArgIncrement = 8;
805      break;
806    case MVT::f32: ObjSize = 4;                break;
807    case MVT::f64: ObjSize = ArgIncrement = 8; break;
808    }
809
810    // Don't codegen dead arguments.  FIXME: remove this check when we can nuke
811    // dead loads.
812    if (ObjSize && !I->use_empty()) {
813      // Create the frame index object for this incoming parameter...
814      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
815
816      // Create the SelectionDAG nodes corresponding to a load from this
817      // parameter.
818      SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
819
820      ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
821                             DAG.getSrcValue(NULL));
822    } else if (ArgValue.Val == 0) {
823      if (MVT::isInteger(ObjectVT))
824        ArgValue = DAG.getConstant(0, ObjectVT);
825      else
826        ArgValue = DAG.getConstantFP(0, ObjectVT);
827    }
828    ArgValues.push_back(ArgValue);
829
830    if (ObjSize)
831      ArgOffset += ArgIncrement;   // Move on to the next argument.
832  }
833
834  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
835  // arguments and the arguments after the retaddr has been pushed are aligned.
836  if ((ArgOffset & 7) == 0)
837    ArgOffset += 4;
838
839  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
840  ReturnAddrIndex = 0;             // No return address slot generated yet.
841  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
842  BytesCallerReserves = 0;
843
844  // Finally, inform the code generator which regs we return values in.
845  switch (getValueType(F.getReturnType())) {
846  default: assert(0 && "Unknown type!");
847  case MVT::isVoid: break;
848  case MVT::i1:
849  case MVT::i8:
850  case MVT::i16:
851  case MVT::i32:
852    MF.addLiveOut(X86::EAX);
853    break;
854  case MVT::i64:
855    MF.addLiveOut(X86::EAX);
856    MF.addLiveOut(X86::EDX);
857    break;
858  case MVT::f32:
859  case MVT::f64:
860    MF.addLiveOut(X86::ST0);
861    break;
862  }
863  return ArgValues;
864}
865
866std::pair<SDOperand, SDOperand>
867X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy,
868                                     bool isTailCall, SDOperand Callee,
869                                     ArgListTy &Args, SelectionDAG &DAG) {
870  // Count how many bytes are to be pushed on the stack.
871  unsigned NumBytes = 0;
872
873  // Keep track of the number of integer regs passed so far.  This can be either
874  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
875  // used).
876  unsigned NumIntRegs = 0;
877
878  for (unsigned i = 0, e = Args.size(); i != e; ++i)
879    switch (getValueType(Args[i].second)) {
880    default: assert(0 && "Unknown value type!");
881    case MVT::i1:
882    case MVT::i8:
883    case MVT::i16:
884    case MVT::i32:
885      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
886        ++NumIntRegs;
887        break;
888      }
889      // fall through
890    case MVT::f32:
891      NumBytes += 4;
892      break;
893    case MVT::i64:
894      if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
895        NumIntRegs += 2;
896        break;
897      } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
898        NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS;
899        NumBytes += 4;
900        break;
901      }
902
903      // fall through
904    case MVT::f64:
905      NumBytes += 8;
906      break;
907    }
908
909  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
910  // arguments and the arguments after the retaddr has been pushed are aligned.
911  if ((NumBytes & 7) == 0)
912    NumBytes += 4;
913
914  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
915
916  // Arguments go on the stack in reverse order, as specified by the ABI.
917  unsigned ArgOffset = 0;
918  SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
919  NumIntRegs = 0;
920  std::vector<SDOperand> Stores;
921  std::vector<SDOperand> RegValuesToPass;
922  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
923    switch (getValueType(Args[i].second)) {
924    default: assert(0 && "Unexpected ValueType for argument!");
925    case MVT::i1:
926      Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first);
927      // Fall through.
928    case MVT::i8:
929    case MVT::i16:
930    case MVT::i32:
931      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
932        RegValuesToPass.push_back(Args[i].first);
933        ++NumIntRegs;
934        break;
935      }
936      // Fall through
937    case MVT::f32: {
938      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
939      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
940      Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
941                                   Args[i].first, PtrOff,
942                                   DAG.getSrcValue(NULL)));
943      ArgOffset += 4;
944      break;
945    }
946    case MVT::i64:
947       // Can pass (at least) part of it in regs?
948      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
949        SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
950                                   Args[i].first, DAG.getConstant(1, MVT::i32));
951        SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
952                                   Args[i].first, DAG.getConstant(0, MVT::i32));
953        RegValuesToPass.push_back(Lo);
954        ++NumIntRegs;
955
956        // Pass both parts in regs?
957        if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
958          RegValuesToPass.push_back(Hi);
959          ++NumIntRegs;
960        } else {
961          // Pass the high part in memory.
962          SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
963          PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
964          Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
965                                       Hi, PtrOff, DAG.getSrcValue(NULL)));
966          ArgOffset += 4;
967        }
968        break;
969      }
970      // Fall through
971    case MVT::f64:
972      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
973      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
974      Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
975                                   Args[i].first, PtrOff,
976                                   DAG.getSrcValue(NULL)));
977      ArgOffset += 8;
978      break;
979    }
980  }
981  if (!Stores.empty())
982    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
983
984  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
985  // arguments and the arguments after the retaddr has been pushed are aligned.
986  if ((ArgOffset & 7) == 0)
987    ArgOffset += 4;
988
989  std::vector<MVT::ValueType> RetVals;
990  MVT::ValueType RetTyVT = getValueType(RetTy);
991
992  RetVals.push_back(MVT::Other);
993
994  // The result values produced have to be legal.  Promote the result.
995  switch (RetTyVT) {
996  case MVT::isVoid: break;
997  default:
998    RetVals.push_back(RetTyVT);
999    break;
1000  case MVT::i1:
1001  case MVT::i8:
1002  case MVT::i16:
1003    RetVals.push_back(MVT::i32);
1004    break;
1005  case MVT::f32:
1006    if (X86ScalarSSE)
1007      RetVals.push_back(MVT::f32);
1008    else
1009      RetVals.push_back(MVT::f64);
1010    break;
1011  case MVT::i64:
1012    RetVals.push_back(MVT::i32);
1013    RetVals.push_back(MVT::i32);
1014    break;
1015  }
1016
1017  // Build a sequence of copy-to-reg nodes chained together with token chain
1018  // and flag operands which copy the outgoing args into registers.
1019  SDOperand InFlag;
1020  for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
1021    unsigned CCReg;
1022    SDOperand RegToPass = RegValuesToPass[i];
1023    switch (RegToPass.getValueType()) {
1024    default: assert(0 && "Bad thing to pass in regs");
1025    case MVT::i8:
1026      CCReg = (i == 0) ? X86::AL  : X86::DL;
1027      break;
1028    case MVT::i16:
1029      CCReg = (i == 0) ? X86::AX  : X86::DX;
1030      break;
1031    case MVT::i32:
1032      CCReg = (i == 0) ? X86::EAX : X86::EDX;
1033      break;
1034    }
1035
1036    Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag);
1037    InFlag = Chain.getValue(1);
1038  }
1039
1040  std::vector<MVT::ValueType> NodeTys;
1041  NodeTys.push_back(MVT::Other);   // Returns a chain
1042  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1043  std::vector<SDOperand> Ops;
1044  Ops.push_back(Chain);
1045  Ops.push_back(Callee);
1046  if (InFlag.Val)
1047    Ops.push_back(InFlag);
1048
1049  // FIXME: Do not generate X86ISD::TAILCALL for now.
1050  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops);
1051  InFlag = Chain.getValue(1);
1052
1053  NodeTys.clear();
1054  NodeTys.push_back(MVT::Other);   // Returns a chain
1055  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1056  Ops.clear();
1057  Ops.push_back(Chain);
1058  Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
1059  Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
1060  Ops.push_back(InFlag);
1061  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
1062  InFlag = Chain.getValue(1);
1063
1064  SDOperand RetVal;
1065  if (RetTyVT != MVT::isVoid) {
1066    switch (RetTyVT) {
1067    default: assert(0 && "Unknown value type to return!");
1068    case MVT::i1:
1069    case MVT::i8:
1070      RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
1071      Chain = RetVal.getValue(1);
1072      if (RetTyVT == MVT::i1)
1073        RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
1074      break;
1075    case MVT::i16:
1076      RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
1077      Chain = RetVal.getValue(1);
1078      break;
1079    case MVT::i32:
1080      RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
1081      Chain = RetVal.getValue(1);
1082      break;
1083    case MVT::i64: {
1084      SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
1085      SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32,
1086                                        Lo.getValue(2));
1087      RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
1088      Chain = Hi.getValue(1);
1089      break;
1090    }
1091    case MVT::f32:
1092    case MVT::f64: {
1093      std::vector<MVT::ValueType> Tys;
1094      Tys.push_back(MVT::f64);
1095      Tys.push_back(MVT::Other);
1096      Tys.push_back(MVT::Flag);
1097      std::vector<SDOperand> Ops;
1098      Ops.push_back(Chain);
1099      Ops.push_back(InFlag);
1100      RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
1101      Chain  = RetVal.getValue(1);
1102      InFlag = RetVal.getValue(2);
1103      if (X86ScalarSSE) {
1104        // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
1105        // shouldn't be necessary except that RFP cannot be live across
1106        // multiple blocks. When stackifier is fixed, they can be uncoupled.
1107        MachineFunction &MF = DAG.getMachineFunction();
1108        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
1109        SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
1110        Tys.clear();
1111        Tys.push_back(MVT::Other);
1112        Ops.clear();
1113        Ops.push_back(Chain);
1114        Ops.push_back(RetVal);
1115        Ops.push_back(StackSlot);
1116        Ops.push_back(DAG.getValueType(RetTyVT));
1117        Ops.push_back(InFlag);
1118        Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
1119        RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
1120                             DAG.getSrcValue(NULL));
1121        Chain = RetVal.getValue(1);
1122      }
1123
1124      if (RetTyVT == MVT::f32 && !X86ScalarSSE)
1125        // FIXME: we would really like to remember that this FP_ROUND
1126        // operation is okay to eliminate if we allow excess FP precision.
1127        RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
1128      break;
1129    }
1130    }
1131  }
1132
1133  return std::make_pair(RetVal, Chain);
1134}
1135
1136SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
1137  if (ReturnAddrIndex == 0) {
1138    // Set up a frame object for the return address.
1139    MachineFunction &MF = DAG.getMachineFunction();
1140    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1141  }
1142
1143  return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
1144}
1145
1146
1147
1148std::pair<SDOperand, SDOperand> X86TargetLowering::
1149LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
1150                        SelectionDAG &DAG) {
1151  SDOperand Result;
1152  if (Depth)        // Depths > 0 not supported yet!
1153    Result = DAG.getConstant(0, getPointerTy());
1154  else {
1155    SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
1156    if (!isFrameAddress)
1157      // Just load the return address
1158      Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
1159                           DAG.getSrcValue(NULL));
1160    else
1161      Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
1162                           DAG.getConstant(4, MVT::i32));
1163  }
1164  return std::make_pair(Result, Chain);
1165}
1166
1167/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode
1168/// which corresponds to the condition code.
1169static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) {
1170  switch (X86CC) {
1171  default: assert(0 && "Unknown X86 conditional code!");
1172  case X86ISD::COND_A:  return X86::JA;
1173  case X86ISD::COND_AE: return X86::JAE;
1174  case X86ISD::COND_B:  return X86::JB;
1175  case X86ISD::COND_BE: return X86::JBE;
1176  case X86ISD::COND_E:  return X86::JE;
1177  case X86ISD::COND_G:  return X86::JG;
1178  case X86ISD::COND_GE: return X86::JGE;
1179  case X86ISD::COND_L:  return X86::JL;
1180  case X86ISD::COND_LE: return X86::JLE;
1181  case X86ISD::COND_NE: return X86::JNE;
1182  case X86ISD::COND_NO: return X86::JNO;
1183  case X86ISD::COND_NP: return X86::JNP;
1184  case X86ISD::COND_NS: return X86::JNS;
1185  case X86ISD::COND_O:  return X86::JO;
1186  case X86ISD::COND_P:  return X86::JP;
1187  case X86ISD::COND_S:  return X86::JS;
1188  }
1189}
1190
1191/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1192/// specific condition code. It returns a false if it cannot do a direct
1193/// translation. X86CC is the translated CondCode. Flip is set to true if the
1194/// the order of comparison operands should be flipped.
1195static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
1196                           unsigned &X86CC, bool &Flip) {
1197  Flip = false;
1198  X86CC = X86ISD::COND_INVALID;
1199  if (!isFP) {
1200    switch (SetCCOpcode) {
1201    default: break;
1202    case ISD::SETEQ:  X86CC = X86ISD::COND_E;  break;
1203    case ISD::SETGT:  X86CC = X86ISD::COND_G;  break;
1204    case ISD::SETGE:  X86CC = X86ISD::COND_GE; break;
1205    case ISD::SETLT:  X86CC = X86ISD::COND_L;  break;
1206    case ISD::SETLE:  X86CC = X86ISD::COND_LE; break;
1207    case ISD::SETNE:  X86CC = X86ISD::COND_NE; break;
1208    case ISD::SETULT: X86CC = X86ISD::COND_B;  break;
1209    case ISD::SETUGT: X86CC = X86ISD::COND_A;  break;
1210    case ISD::SETULE: X86CC = X86ISD::COND_BE; break;
1211    case ISD::SETUGE: X86CC = X86ISD::COND_AE; break;
1212    }
1213  } else {
1214    // On a floating point condition, the flags are set as follows:
1215    // ZF  PF  CF   op
1216    //  0 | 0 | 0 | X > Y
1217    //  0 | 0 | 1 | X < Y
1218    //  1 | 0 | 0 | X == Y
1219    //  1 | 1 | 1 | unordered
1220    switch (SetCCOpcode) {
1221    default: break;
1222    case ISD::SETUEQ:
1223    case ISD::SETEQ: X86CC = X86ISD::COND_E;  break;
1224    case ISD::SETOLT: Flip = true; // Fallthrough
1225    case ISD::SETOGT:
1226    case ISD::SETGT: X86CC = X86ISD::COND_A;  break;
1227    case ISD::SETOLE: Flip = true; // Fallthrough
1228    case ISD::SETOGE:
1229    case ISD::SETGE: X86CC = X86ISD::COND_AE; break;
1230    case ISD::SETUGT: Flip = true; // Fallthrough
1231    case ISD::SETULT:
1232    case ISD::SETLT: X86CC = X86ISD::COND_B;  break;
1233    case ISD::SETUGE: Flip = true; // Fallthrough
1234    case ISD::SETULE:
1235    case ISD::SETLE: X86CC = X86ISD::COND_BE; break;
1236    case ISD::SETONE:
1237    case ISD::SETNE: X86CC = X86ISD::COND_NE; break;
1238    case ISD::SETUO: X86CC = X86ISD::COND_P;  break;
1239    case ISD::SETO:  X86CC = X86ISD::COND_NP; break;
1240    }
1241  }
1242
1243  return X86CC != X86ISD::COND_INVALID;
1244}
1245
1246static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC,
1247                           bool &Flip) {
1248  return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip);
1249}
1250
1251/// hasFPCMov - is there a floating point cmov for the specific X86 condition
1252/// code. Current x86 isa includes the following FP cmov instructions:
1253/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1254static bool hasFPCMov(unsigned X86CC) {
1255  switch (X86CC) {
1256  default:
1257    return false;
1258  case X86ISD::COND_B:
1259  case X86ISD::COND_BE:
1260  case X86ISD::COND_E:
1261  case X86ISD::COND_P:
1262  case X86ISD::COND_A:
1263  case X86ISD::COND_AE:
1264  case X86ISD::COND_NE:
1265  case X86ISD::COND_NP:
1266    return true;
1267  }
1268}
1269
1270MachineBasicBlock *
1271X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1272                                           MachineBasicBlock *BB) {
1273  switch (MI->getOpcode()) {
1274  default: assert(false && "Unexpected instr type to insert");
1275  case X86::CMOV_FR32:
1276  case X86::CMOV_FR64:
1277  case X86::CMOV_V4F32:
1278  case X86::CMOV_V2F64:
1279  case X86::CMOV_V2I64: {
1280    // To "insert" a SELECT_CC instruction, we actually have to insert the
1281    // diamond control-flow pattern.  The incoming instruction knows the
1282    // destination vreg to set, the condition code register to branch on, the
1283    // true/false values to select between, and a branch opcode to use.
1284    const BasicBlock *LLVM_BB = BB->getBasicBlock();
1285    ilist<MachineBasicBlock>::iterator It = BB;
1286    ++It;
1287
1288    //  thisMBB:
1289    //  ...
1290    //   TrueVal = ...
1291    //   cmpTY ccX, r1, r2
1292    //   bCC copy1MBB
1293    //   fallthrough --> copy0MBB
1294    MachineBasicBlock *thisMBB = BB;
1295    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
1296    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
1297    unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue());
1298    BuildMI(BB, Opc, 1).addMBB(sinkMBB);
1299    MachineFunction *F = BB->getParent();
1300    F->getBasicBlockList().insert(It, copy0MBB);
1301    F->getBasicBlockList().insert(It, sinkMBB);
1302    // Update machine-CFG edges by first adding all successors of the current
1303    // block to the new block which will contain the Phi node for the select.
1304    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
1305        e = BB->succ_end(); i != e; ++i)
1306      sinkMBB->addSuccessor(*i);
1307    // Next, remove all successors of the current block, and add the true
1308    // and fallthrough blocks as its successors.
1309    while(!BB->succ_empty())
1310      BB->removeSuccessor(BB->succ_begin());
1311    BB->addSuccessor(copy0MBB);
1312    BB->addSuccessor(sinkMBB);
1313
1314    //  copy0MBB:
1315    //   %FalseValue = ...
1316    //   # fallthrough to sinkMBB
1317    BB = copy0MBB;
1318
1319    // Update machine-CFG edges
1320    BB->addSuccessor(sinkMBB);
1321
1322    //  sinkMBB:
1323    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
1324    //  ...
1325    BB = sinkMBB;
1326    BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg())
1327      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
1328      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
1329
1330    delete MI;   // The pseudo instruction is gone now.
1331    return BB;
1332  }
1333
1334  case X86::FP_TO_INT16_IN_MEM:
1335  case X86::FP_TO_INT32_IN_MEM:
1336  case X86::FP_TO_INT64_IN_MEM: {
1337    // Change the floating point control register to use "round towards zero"
1338    // mode when truncating to an integer value.
1339    MachineFunction *F = BB->getParent();
1340    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
1341    addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
1342
1343    // Load the old value of the high byte of the control word...
1344    unsigned OldCW =
1345      F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass);
1346    addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx);
1347
1348    // Set the high part to be round to zero...
1349    addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F);
1350
1351    // Reload the modified control word now...
1352    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1353
1354    // Restore the memory image of control word to original value
1355    addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW);
1356
1357    // Get the X86 opcode to use.
1358    unsigned Opc;
1359    switch (MI->getOpcode()) {
1360    default: assert(0 && "illegal opcode!");
1361    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
1362    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
1363    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
1364    }
1365
1366    X86AddressMode AM;
1367    MachineOperand &Op = MI->getOperand(0);
1368    if (Op.isRegister()) {
1369      AM.BaseType = X86AddressMode::RegBase;
1370      AM.Base.Reg = Op.getReg();
1371    } else {
1372      AM.BaseType = X86AddressMode::FrameIndexBase;
1373      AM.Base.FrameIndex = Op.getFrameIndex();
1374    }
1375    Op = MI->getOperand(1);
1376    if (Op.isImmediate())
1377      AM.Scale = Op.getImmedValue();
1378    Op = MI->getOperand(2);
1379    if (Op.isImmediate())
1380      AM.IndexReg = Op.getImmedValue();
1381    Op = MI->getOperand(3);
1382    if (Op.isGlobalAddress()) {
1383      AM.GV = Op.getGlobal();
1384    } else {
1385      AM.Disp = Op.getImmedValue();
1386    }
1387    addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg());
1388
1389    // Reload the original control word now.
1390    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1391
1392    delete MI;   // The pseudo instruction is gone now.
1393    return BB;
1394  }
1395  }
1396}
1397
1398
1399//===----------------------------------------------------------------------===//
1400//                           X86 Custom Lowering Hooks
1401//===----------------------------------------------------------------------===//
1402
1403/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra
1404/// load. For Darwin, external and weak symbols are indirect, loading the value
1405/// at address GV rather then the value of GV itself. This means that the
1406/// GlobalAddress must be in the base or index register of the address, not the
1407/// GV offset field.
1408static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) {
1409  return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
1410          (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
1411}
1412
1413/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
1414/// true if Op is undef or if its value falls within the specified range (L, H].
1415static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
1416  if (Op.getOpcode() == ISD::UNDEF)
1417    return true;
1418
1419  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
1420  return (Val >= Low && Val < Hi);
1421}
1422
1423/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
1424/// true if Op is undef or if its value equal to the specified value.
1425static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
1426  if (Op.getOpcode() == ISD::UNDEF)
1427    return true;
1428  return cast<ConstantSDNode>(Op)->getValue() == Val;
1429}
1430
1431/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1432/// specifies a shuffle of elements that is suitable for input to PSHUFD.
1433bool X86::isPSHUFDMask(SDNode *N) {
1434  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1435
1436  if (N->getNumOperands() != 4)
1437    return false;
1438
1439  // Check if the value doesn't reference the second vector.
1440  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1441    SDOperand Arg = N->getOperand(i);
1442    if (Arg.getOpcode() == ISD::UNDEF) continue;
1443    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1444    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
1445      return false;
1446  }
1447
1448  return true;
1449}
1450
1451/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
1452/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
1453bool X86::isPSHUFHWMask(SDNode *N) {
1454  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1455
1456  if (N->getNumOperands() != 8)
1457    return false;
1458
1459  // Lower quadword copied in order.
1460  for (unsigned i = 0; i != 4; ++i) {
1461    SDOperand Arg = N->getOperand(i);
1462    if (Arg.getOpcode() == ISD::UNDEF) continue;
1463    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1464    if (cast<ConstantSDNode>(Arg)->getValue() != i)
1465      return false;
1466  }
1467
1468  // Upper quadword shuffled.
1469  for (unsigned i = 4; i != 8; ++i) {
1470    SDOperand Arg = N->getOperand(i);
1471    if (Arg.getOpcode() == ISD::UNDEF) continue;
1472    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1473    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1474    if (Val < 4 || Val > 7)
1475      return false;
1476  }
1477
1478  return true;
1479}
1480
1481/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
1482/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
1483bool X86::isPSHUFLWMask(SDNode *N) {
1484  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1485
1486  if (N->getNumOperands() != 8)
1487    return false;
1488
1489  // Upper quadword copied in order.
1490  for (unsigned i = 4; i != 8; ++i)
1491    if (!isUndefOrEqual(N->getOperand(i), i))
1492      return false;
1493
1494  // Lower quadword shuffled.
1495  for (unsigned i = 0; i != 4; ++i)
1496    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
1497      return false;
1498
1499  return true;
1500}
1501
1502/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
1503/// specifies a shuffle of elements that is suitable for input to SHUFP*.
1504static bool isSHUFPMask(std::vector<SDOperand> &N) {
1505  unsigned NumElems = N.size();
1506  if (NumElems != 2 && NumElems != 4) return false;
1507
1508  unsigned Half = NumElems / 2;
1509  for (unsigned i = 0; i < Half; ++i)
1510    if (!isUndefOrInRange(N[i], 0, NumElems))
1511      return false;
1512  for (unsigned i = Half; i < NumElems; ++i)
1513    if (!isUndefOrInRange(N[i], NumElems, NumElems*2))
1514      return false;
1515
1516  return true;
1517}
1518
1519bool X86::isSHUFPMask(SDNode *N) {
1520  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1521  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1522  return ::isSHUFPMask(Ops);
1523}
1524
1525/// isCommutedSHUFP - Returns true if the shuffle mask is except
1526/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
1527/// half elements to come from vector 1 (which would equal the dest.) and
1528/// the upper half to come from vector 2.
1529static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) {
1530  unsigned NumElems = Ops.size();
1531  if (NumElems != 2 && NumElems != 4) return false;
1532
1533  unsigned Half = NumElems / 2;
1534  for (unsigned i = 0; i < Half; ++i)
1535    if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2))
1536      return false;
1537  for (unsigned i = Half; i < NumElems; ++i)
1538    if (!isUndefOrInRange(Ops[i], 0, NumElems))
1539      return false;
1540  return true;
1541}
1542
1543static bool isCommutedSHUFP(SDNode *N) {
1544  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1545  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1546  return isCommutedSHUFP(Ops);
1547}
1548
1549/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
1550/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
1551bool X86::isMOVHLPSMask(SDNode *N) {
1552  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1553
1554  if (N->getNumOperands() != 4)
1555    return false;
1556
1557  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
1558  return isUndefOrEqual(N->getOperand(0), 6) &&
1559         isUndefOrEqual(N->getOperand(1), 7) &&
1560         isUndefOrEqual(N->getOperand(2), 2) &&
1561         isUndefOrEqual(N->getOperand(3), 3);
1562}
1563
1564/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
1565/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
1566bool X86::isMOVLPMask(SDNode *N) {
1567  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1568
1569  unsigned NumElems = N->getNumOperands();
1570  if (NumElems != 2 && NumElems != 4)
1571    return false;
1572
1573  for (unsigned i = 0; i < NumElems/2; ++i)
1574    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
1575      return false;
1576
1577  for (unsigned i = NumElems/2; i < NumElems; ++i)
1578    if (!isUndefOrEqual(N->getOperand(i), i))
1579      return false;
1580
1581  return true;
1582}
1583
1584/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
1585/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
1586/// and MOVLHPS.
1587bool X86::isMOVHPMask(SDNode *N) {
1588  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1589
1590  unsigned NumElems = N->getNumOperands();
1591  if (NumElems != 2 && NumElems != 4)
1592    return false;
1593
1594  for (unsigned i = 0; i < NumElems/2; ++i)
1595    if (!isUndefOrEqual(N->getOperand(i), i))
1596      return false;
1597
1598  for (unsigned i = 0; i < NumElems/2; ++i) {
1599    SDOperand Arg = N->getOperand(i + NumElems/2);
1600    if (!isUndefOrEqual(Arg, i + NumElems))
1601      return false;
1602  }
1603
1604  return true;
1605}
1606
1607/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
1608/// specifies a shuffle of elements that is suitable for input to UNPCKL.
1609bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
1610  unsigned NumElems = N.size();
1611  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1612    return false;
1613
1614  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1615    SDOperand BitI  = N[i];
1616    SDOperand BitI1 = N[i+1];
1617    if (!isUndefOrEqual(BitI, j))
1618      return false;
1619    if (V2IsSplat) {
1620      if (isUndefOrEqual(BitI1, NumElems))
1621        return false;
1622    } else {
1623      if (!isUndefOrEqual(BitI1, j + NumElems))
1624        return false;
1625    }
1626  }
1627
1628  return true;
1629}
1630
1631bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
1632  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1633  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1634  return ::isUNPCKLMask(Ops, V2IsSplat);
1635}
1636
1637/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
1638/// specifies a shuffle of elements that is suitable for input to UNPCKH.
1639bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
1640  unsigned NumElems = N.size();
1641  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1642    return false;
1643
1644  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1645    SDOperand BitI  = N[i];
1646    SDOperand BitI1 = N[i+1];
1647    if (!isUndefOrEqual(BitI, j + NumElems/2))
1648      return false;
1649    if (V2IsSplat) {
1650      if (isUndefOrEqual(BitI1, NumElems))
1651        return false;
1652    } else {
1653      if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems))
1654        return false;
1655    }
1656  }
1657
1658  return true;
1659}
1660
1661bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
1662  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1663  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1664  return ::isUNPCKHMask(Ops, V2IsSplat);
1665}
1666
1667/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
1668/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
1669/// <0, 0, 1, 1>
1670bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
1671  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1672
1673  unsigned NumElems = N->getNumOperands();
1674  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
1675    return false;
1676
1677  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1678    SDOperand BitI  = N->getOperand(i);
1679    SDOperand BitI1 = N->getOperand(i+1);
1680
1681    if (!isUndefOrEqual(BitI, j))
1682      return false;
1683    if (!isUndefOrEqual(BitI1, j))
1684      return false;
1685  }
1686
1687  return true;
1688}
1689
1690/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
1691/// specifies a shuffle of elements that is suitable for input to MOVSS,
1692/// MOVSD, and MOVD, i.e. setting the lowest element.
1693static bool isMOVLMask(std::vector<SDOperand> &N) {
1694  unsigned NumElems = N.size();
1695  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1696    return false;
1697
1698  if (!isUndefOrEqual(N[0], NumElems))
1699    return false;
1700
1701  for (unsigned i = 1; i < NumElems; ++i) {
1702    SDOperand Arg = N[i];
1703    if (!isUndefOrEqual(Arg, i))
1704      return false;
1705  }
1706
1707  return true;
1708}
1709
1710bool X86::isMOVLMask(SDNode *N) {
1711  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1712  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1713  return ::isMOVLMask(Ops);
1714}
1715
1716/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
1717/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
1718/// element of vector 2 and the other elements to come from vector 1 in order.
1719static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) {
1720  unsigned NumElems = Ops.size();
1721  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1722    return false;
1723
1724  if (!isUndefOrEqual(Ops[0], 0))
1725    return false;
1726
1727  for (unsigned i = 1; i < NumElems; ++i) {
1728    SDOperand Arg = Ops[i];
1729    if (V2IsSplat) {
1730      if (!isUndefOrEqual(Arg, NumElems))
1731        return false;
1732    } else {
1733      if (!isUndefOrEqual(Arg, i+NumElems))
1734        return false;
1735    }
1736  }
1737
1738  return true;
1739}
1740
1741static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) {
1742  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1743  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1744  return isCommutedMOVL(Ops, V2IsSplat);
1745}
1746
1747/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
1748/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
1749bool X86::isMOVSHDUPMask(SDNode *N) {
1750  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1751
1752  if (N->getNumOperands() != 4)
1753    return false;
1754
1755  // Expect 1, 1, 3, 3
1756  for (unsigned i = 0; i < 2; ++i) {
1757    SDOperand Arg = N->getOperand(i);
1758    if (Arg.getOpcode() == ISD::UNDEF) continue;
1759    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1760    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1761    if (Val != 1) return false;
1762  }
1763
1764  bool HasHi = false;
1765  for (unsigned i = 2; i < 4; ++i) {
1766    SDOperand Arg = N->getOperand(i);
1767    if (Arg.getOpcode() == ISD::UNDEF) continue;
1768    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1769    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1770    if (Val != 3) return false;
1771    HasHi = true;
1772  }
1773
1774  // Don't use movshdup if it can be done with a shufps.
1775  return HasHi;
1776}
1777
1778/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
1779/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
1780bool X86::isMOVSLDUPMask(SDNode *N) {
1781  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1782
1783  if (N->getNumOperands() != 4)
1784    return false;
1785
1786  // Expect 0, 0, 2, 2
1787  for (unsigned i = 0; i < 2; ++i) {
1788    SDOperand Arg = N->getOperand(i);
1789    if (Arg.getOpcode() == ISD::UNDEF) continue;
1790    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1791    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1792    if (Val != 0) return false;
1793  }
1794
1795  bool HasHi = false;
1796  for (unsigned i = 2; i < 4; ++i) {
1797    SDOperand Arg = N->getOperand(i);
1798    if (Arg.getOpcode() == ISD::UNDEF) continue;
1799    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1800    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1801    if (Val != 2) return false;
1802    HasHi = true;
1803  }
1804
1805  // Don't use movshdup if it can be done with a shufps.
1806  return HasHi;
1807}
1808
1809/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
1810/// a splat of a single element.
1811static bool isSplatMask(SDNode *N) {
1812  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1813
1814  // This is a splat operation if each element of the permute is the same, and
1815  // if the value doesn't reference the second vector.
1816  unsigned NumElems = N->getNumOperands();
1817  SDOperand ElementBase;
1818  unsigned i = 0;
1819  for (; i != NumElems; ++i) {
1820    SDOperand Elt = N->getOperand(i);
1821    if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) {
1822      ElementBase = Elt;
1823      break;
1824    }
1825  }
1826
1827  if (!ElementBase.Val)
1828    return false;
1829
1830  for (; i != NumElems; ++i) {
1831    SDOperand Arg = N->getOperand(i);
1832    if (Arg.getOpcode() == ISD::UNDEF) continue;
1833    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1834    if (Arg != ElementBase) return false;
1835  }
1836
1837  // Make sure it is a splat of the first vector operand.
1838  return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
1839}
1840
1841/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
1842/// a splat of a single element and it's a 2 or 4 element mask.
1843bool X86::isSplatMask(SDNode *N) {
1844  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1845
1846  // We can only splat 64-bit, and 32-bit quantities with a single instruction.
1847  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
1848    return false;
1849  return ::isSplatMask(N);
1850}
1851
1852/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
1853/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
1854/// instructions.
1855unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
1856  unsigned NumOperands = N->getNumOperands();
1857  unsigned Shift = (NumOperands == 4) ? 2 : 1;
1858  unsigned Mask = 0;
1859  for (unsigned i = 0; i < NumOperands; ++i) {
1860    unsigned Val = 0;
1861    SDOperand Arg = N->getOperand(NumOperands-i-1);
1862    if (Arg.getOpcode() != ISD::UNDEF)
1863      Val = cast<ConstantSDNode>(Arg)->getValue();
1864    if (Val >= NumOperands) Val -= NumOperands;
1865    Mask |= Val;
1866    if (i != NumOperands - 1)
1867      Mask <<= Shift;
1868  }
1869
1870  return Mask;
1871}
1872
1873/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
1874/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
1875/// instructions.
1876unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
1877  unsigned Mask = 0;
1878  // 8 nodes, but we only care about the last 4.
1879  for (unsigned i = 7; i >= 4; --i) {
1880    unsigned Val = 0;
1881    SDOperand Arg = N->getOperand(i);
1882    if (Arg.getOpcode() != ISD::UNDEF)
1883      Val = cast<ConstantSDNode>(Arg)->getValue();
1884    Mask |= (Val - 4);
1885    if (i != 4)
1886      Mask <<= 2;
1887  }
1888
1889  return Mask;
1890}
1891
1892/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
1893/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
1894/// instructions.
1895unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
1896  unsigned Mask = 0;
1897  // 8 nodes, but we only care about the first 4.
1898  for (int i = 3; i >= 0; --i) {
1899    unsigned Val = 0;
1900    SDOperand Arg = N->getOperand(i);
1901    if (Arg.getOpcode() != ISD::UNDEF)
1902      Val = cast<ConstantSDNode>(Arg)->getValue();
1903    Mask |= Val;
1904    if (i != 0)
1905      Mask <<= 2;
1906  }
1907
1908  return Mask;
1909}
1910
1911/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
1912/// specifies a 8 element shuffle that can be broken into a pair of
1913/// PSHUFHW and PSHUFLW.
1914static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
1915  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1916
1917  if (N->getNumOperands() != 8)
1918    return false;
1919
1920  // Lower quadword shuffled.
1921  for (unsigned i = 0; i != 4; ++i) {
1922    SDOperand Arg = N->getOperand(i);
1923    if (Arg.getOpcode() == ISD::UNDEF) continue;
1924    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1925    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1926    if (Val > 4)
1927      return false;
1928  }
1929
1930  // Upper quadword shuffled.
1931  for (unsigned i = 4; i != 8; ++i) {
1932    SDOperand Arg = N->getOperand(i);
1933    if (Arg.getOpcode() == ISD::UNDEF) continue;
1934    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1935    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1936    if (Val < 4 || Val > 7)
1937      return false;
1938  }
1939
1940  return true;
1941}
1942
1943/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
1944/// values in ther permute mask.
1945static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) {
1946  SDOperand V1 = Op.getOperand(0);
1947  SDOperand V2 = Op.getOperand(1);
1948  SDOperand Mask = Op.getOperand(2);
1949  MVT::ValueType VT = Op.getValueType();
1950  MVT::ValueType MaskVT = Mask.getValueType();
1951  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
1952  unsigned NumElems = Mask.getNumOperands();
1953  std::vector<SDOperand> MaskVec;
1954
1955  for (unsigned i = 0; i != NumElems; ++i) {
1956    SDOperand Arg = Mask.getOperand(i);
1957    if (Arg.getOpcode() == ISD::UNDEF) {
1958      MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
1959      continue;
1960    }
1961    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1962    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1963    if (Val < NumElems)
1964      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
1965    else
1966      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
1967  }
1968
1969  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
1970  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
1971}
1972
1973/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
1974/// match movhlps. The lower half elements should come from upper half of
1975/// V1 (and in order), and the upper half elements should come from the upper
1976/// half of V2 (and in order).
1977static bool ShouldXformToMOVHLPS(SDNode *Mask) {
1978  unsigned NumElems = Mask->getNumOperands();
1979  if (NumElems != 4)
1980    return false;
1981  for (unsigned i = 0, e = 2; i != e; ++i)
1982    if (!isUndefOrEqual(Mask->getOperand(i), i+2))
1983      return false;
1984  for (unsigned i = 2; i != 4; ++i)
1985    if (!isUndefOrEqual(Mask->getOperand(i), i+4))
1986      return false;
1987  return true;
1988}
1989
1990/// isScalarLoadToVector - Returns true if the node is a scalar load that
1991/// is promoted to a vector.
1992static inline bool isScalarLoadToVector(SDNode *N) {
1993  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
1994    N = N->getOperand(0).Val;
1995    return (N->getOpcode() == ISD::LOAD);
1996  }
1997  return false;
1998}
1999
2000/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
2001/// match movlp{s|d}. The lower half elements should come from lower half of
2002/// V1 (and in order), and the upper half elements should come from the upper
2003/// half of V2 (and in order). And since V1 will become the source of the
2004/// MOVLP, it must be either a vector load or a scalar load to vector.
2005static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) {
2006  if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1))
2007    return false;
2008
2009  unsigned NumElems = Mask->getNumOperands();
2010  if (NumElems != 2 && NumElems != 4)
2011    return false;
2012  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
2013    if (!isUndefOrEqual(Mask->getOperand(i), i))
2014      return false;
2015  for (unsigned i = NumElems/2; i != NumElems; ++i)
2016    if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
2017      return false;
2018  return true;
2019}
2020
2021/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
2022/// all the same.
2023static bool isSplatVector(SDNode *N) {
2024  if (N->getOpcode() != ISD::BUILD_VECTOR)
2025    return false;
2026
2027  SDOperand SplatValue = N->getOperand(0);
2028  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
2029    if (N->getOperand(i) != SplatValue)
2030      return false;
2031  return true;
2032}
2033
2034/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
2035/// that point to V2 points to its first element.
2036static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
2037  assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
2038
2039  bool Changed = false;
2040  std::vector<SDOperand> MaskVec;
2041  unsigned NumElems = Mask.getNumOperands();
2042  for (unsigned i = 0; i != NumElems; ++i) {
2043    SDOperand Arg = Mask.getOperand(i);
2044    if (Arg.getOpcode() != ISD::UNDEF) {
2045      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2046      if (Val > NumElems) {
2047        Arg = DAG.getConstant(NumElems, Arg.getValueType());
2048        Changed = true;
2049      }
2050    }
2051    MaskVec.push_back(Arg);
2052  }
2053
2054  if (Changed)
2055    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
2056  return Mask;
2057}
2058
2059/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
2060/// operation of specified width.
2061static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
2062  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2063  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2064
2065  std::vector<SDOperand> MaskVec;
2066  MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
2067  for (unsigned i = 1; i != NumElems; ++i)
2068    MaskVec.push_back(DAG.getConstant(i, BaseVT));
2069  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2070}
2071
2072/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
2073/// of specified width.
2074static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
2075  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2076  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2077  std::vector<SDOperand> MaskVec;
2078  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
2079    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
2080    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
2081  }
2082  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2083}
2084
2085/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
2086/// of specified width.
2087static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
2088  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2089  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2090  unsigned Half = NumElems/2;
2091  std::vector<SDOperand> MaskVec;
2092  for (unsigned i = 0; i != Half; ++i) {
2093    MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
2094    MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
2095  }
2096  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2097}
2098
2099/// getZeroVector - Returns a vector of specified type with all zero elements.
2100///
2101static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
2102  assert(MVT::isVector(VT) && "Expected a vector type");
2103  unsigned NumElems = getVectorNumElements(VT);
2104  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2105  bool isFP = MVT::isFloatingPoint(EVT);
2106  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
2107  std::vector<SDOperand> ZeroVec(NumElems, Zero);
2108  return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec);
2109}
2110
2111/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
2112///
2113static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
2114  SDOperand V1 = Op.getOperand(0);
2115  SDOperand Mask = Op.getOperand(2);
2116  MVT::ValueType VT = Op.getValueType();
2117  unsigned NumElems = Mask.getNumOperands();
2118  Mask = getUnpacklMask(NumElems, DAG);
2119  while (NumElems != 4) {
2120    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
2121    NumElems >>= 1;
2122  }
2123  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
2124
2125  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2126  Mask = getZeroVector(MaskVT, DAG);
2127  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
2128                                  DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
2129  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
2130}
2131
2132/// isZeroNode - Returns true if Elt is a constant zero or a floating point
2133/// constant +0.0.
2134static inline bool isZeroNode(SDOperand Elt) {
2135  return ((isa<ConstantSDNode>(Elt) &&
2136           cast<ConstantSDNode>(Elt)->getValue() == 0) ||
2137          (isa<ConstantFPSDNode>(Elt) &&
2138           cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
2139}
2140
2141/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
2142/// vector and zero or undef vector.
2143static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
2144                                             unsigned NumElems, unsigned Idx,
2145                                             bool isZero, SelectionDAG &DAG) {
2146  SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
2147  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2148  MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2149  SDOperand Zero = DAG.getConstant(0, EVT);
2150  std::vector<SDOperand> MaskVec(NumElems, Zero);
2151  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
2152  SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2153  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2154}
2155
2156/// LowerOperation - Provide custom lowering hooks for some operations.
2157///
2158SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
2159  switch (Op.getOpcode()) {
2160  default: assert(0 && "Should not custom lower this!");
2161  case ISD::SHL_PARTS:
2162  case ISD::SRA_PARTS:
2163  case ISD::SRL_PARTS: {
2164    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
2165           "Not an i64 shift!");
2166    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
2167    SDOperand ShOpLo = Op.getOperand(0);
2168    SDOperand ShOpHi = Op.getOperand(1);
2169    SDOperand ShAmt  = Op.getOperand(2);
2170    SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi,
2171                                         DAG.getConstant(31, MVT::i8))
2172                           : DAG.getConstant(0, MVT::i32);
2173
2174    SDOperand Tmp2, Tmp3;
2175    if (Op.getOpcode() == ISD::SHL_PARTS) {
2176      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
2177      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
2178    } else {
2179      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
2180      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
2181    }
2182
2183    SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag,
2184                                   ShAmt, DAG.getConstant(32, MVT::i8));
2185
2186    SDOperand Hi, Lo;
2187    SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
2188
2189    std::vector<MVT::ValueType> Tys;
2190    Tys.push_back(MVT::i32);
2191    Tys.push_back(MVT::Flag);
2192    std::vector<SDOperand> Ops;
2193    if (Op.getOpcode() == ISD::SHL_PARTS) {
2194      Ops.push_back(Tmp2);
2195      Ops.push_back(Tmp3);
2196      Ops.push_back(CC);
2197      Ops.push_back(InFlag);
2198      Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2199      InFlag = Hi.getValue(1);
2200
2201      Ops.clear();
2202      Ops.push_back(Tmp3);
2203      Ops.push_back(Tmp1);
2204      Ops.push_back(CC);
2205      Ops.push_back(InFlag);
2206      Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2207    } else {
2208      Ops.push_back(Tmp2);
2209      Ops.push_back(Tmp3);
2210      Ops.push_back(CC);
2211      Ops.push_back(InFlag);
2212      Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2213      InFlag = Lo.getValue(1);
2214
2215      Ops.clear();
2216      Ops.push_back(Tmp3);
2217      Ops.push_back(Tmp1);
2218      Ops.push_back(CC);
2219      Ops.push_back(InFlag);
2220      Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2221    }
2222
2223    Tys.clear();
2224    Tys.push_back(MVT::i32);
2225    Tys.push_back(MVT::i32);
2226    Ops.clear();
2227    Ops.push_back(Lo);
2228    Ops.push_back(Hi);
2229    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
2230  }
2231  case ISD::SINT_TO_FP: {
2232    assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
2233           Op.getOperand(0).getValueType() >= MVT::i16 &&
2234           "Unknown SINT_TO_FP to lower!");
2235
2236    SDOperand Result;
2237    MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
2238    unsigned Size = MVT::getSizeInBits(SrcVT)/8;
2239    MachineFunction &MF = DAG.getMachineFunction();
2240    int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
2241    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2242    SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
2243                                  DAG.getEntryNode(), Op.getOperand(0),
2244                                  StackSlot, DAG.getSrcValue(NULL));
2245
2246    // Build the FILD
2247    std::vector<MVT::ValueType> Tys;
2248    Tys.push_back(MVT::f64);
2249    Tys.push_back(MVT::Other);
2250    if (X86ScalarSSE) Tys.push_back(MVT::Flag);
2251    std::vector<SDOperand> Ops;
2252    Ops.push_back(Chain);
2253    Ops.push_back(StackSlot);
2254    Ops.push_back(DAG.getValueType(SrcVT));
2255    Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
2256                         Tys, Ops);
2257
2258    if (X86ScalarSSE) {
2259      Chain = Result.getValue(1);
2260      SDOperand InFlag = Result.getValue(2);
2261
2262      // FIXME: Currently the FST is flagged to the FILD_FLAG. This
2263      // shouldn't be necessary except that RFP cannot be live across
2264      // multiple blocks. When stackifier is fixed, they can be uncoupled.
2265      MachineFunction &MF = DAG.getMachineFunction();
2266      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
2267      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2268      std::vector<MVT::ValueType> Tys;
2269      Tys.push_back(MVT::Other);
2270      std::vector<SDOperand> Ops;
2271      Ops.push_back(Chain);
2272      Ops.push_back(Result);
2273      Ops.push_back(StackSlot);
2274      Ops.push_back(DAG.getValueType(Op.getValueType()));
2275      Ops.push_back(InFlag);
2276      Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
2277      Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
2278                           DAG.getSrcValue(NULL));
2279    }
2280
2281    return Result;
2282  }
2283  case ISD::FP_TO_SINT: {
2284    assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
2285           "Unknown FP_TO_SINT to lower!");
2286    // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
2287    // stack slot.
2288    MachineFunction &MF = DAG.getMachineFunction();
2289    unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
2290    int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
2291    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2292
2293    unsigned Opc;
2294    switch (Op.getValueType()) {
2295    default: assert(0 && "Invalid FP_TO_SINT to lower!");
2296    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
2297    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
2298    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
2299    }
2300
2301    SDOperand Chain = DAG.getEntryNode();
2302    SDOperand Value = Op.getOperand(0);
2303    if (X86ScalarSSE) {
2304      assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
2305      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot,
2306                          DAG.getSrcValue(0));
2307      std::vector<MVT::ValueType> Tys;
2308      Tys.push_back(MVT::f64);
2309      Tys.push_back(MVT::Other);
2310      std::vector<SDOperand> Ops;
2311      Ops.push_back(Chain);
2312      Ops.push_back(StackSlot);
2313      Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType()));
2314      Value = DAG.getNode(X86ISD::FLD, Tys, Ops);
2315      Chain = Value.getValue(1);
2316      SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
2317      StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2318    }
2319
2320    // Build the FP_TO_INT*_IN_MEM
2321    std::vector<SDOperand> Ops;
2322    Ops.push_back(Chain);
2323    Ops.push_back(Value);
2324    Ops.push_back(StackSlot);
2325    SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops);
2326
2327    // Load the result.
2328    return DAG.getLoad(Op.getValueType(), FIST, StackSlot,
2329                       DAG.getSrcValue(NULL));
2330  }
2331  case ISD::READCYCLECOUNTER: {
2332    std::vector<MVT::ValueType> Tys;
2333    Tys.push_back(MVT::Other);
2334    Tys.push_back(MVT::Flag);
2335    std::vector<SDOperand> Ops;
2336    Ops.push_back(Op.getOperand(0));
2337    SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops);
2338    Ops.clear();
2339    Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)));
2340    Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX,
2341                                     MVT::i32, Ops[0].getValue(2)));
2342    Ops.push_back(Ops[1].getValue(1));
2343    Tys[0] = Tys[1] = MVT::i32;
2344    Tys.push_back(MVT::Other);
2345    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
2346  }
2347  case ISD::FABS: {
2348    MVT::ValueType VT = Op.getValueType();
2349    const Type *OpNTy =  MVT::getTypeForValueType(VT);
2350    std::vector<Constant*> CV;
2351    if (VT == MVT::f64) {
2352      CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
2353      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2354    } else {
2355      CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
2356      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2357      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2358      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2359    }
2360    Constant *CS = ConstantStruct::get(CV);
2361    SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
2362    SDOperand Mask
2363      = DAG.getNode(X86ISD::LOAD_PACK,
2364                    VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
2365    return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
2366  }
2367  case ISD::FNEG: {
2368    MVT::ValueType VT = Op.getValueType();
2369    const Type *OpNTy =  MVT::getTypeForValueType(VT);
2370    std::vector<Constant*> CV;
2371    if (VT == MVT::f64) {
2372      CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
2373      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2374    } else {
2375      CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
2376      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2377      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2378      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2379    }
2380    Constant *CS = ConstantStruct::get(CV);
2381    SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
2382    SDOperand Mask
2383      = DAG.getNode(X86ISD::LOAD_PACK,
2384                    VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
2385    return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
2386  }
2387  case ISD::SETCC: {
2388    assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
2389    SDOperand Cond;
2390    SDOperand CC = Op.getOperand(2);
2391    ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2392    bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
2393    bool Flip;
2394    unsigned X86CC;
2395    if (translateX86CC(CC, isFP, X86CC, Flip)) {
2396      if (Flip)
2397        Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
2398                           Op.getOperand(1), Op.getOperand(0));
2399      else
2400        Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
2401                           Op.getOperand(0), Op.getOperand(1));
2402      return DAG.getNode(X86ISD::SETCC, MVT::i8,
2403                         DAG.getConstant(X86CC, MVT::i8), Cond);
2404    } else {
2405      assert(isFP && "Illegal integer SetCC!");
2406
2407      Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
2408                         Op.getOperand(0), Op.getOperand(1));
2409      std::vector<MVT::ValueType> Tys;
2410      std::vector<SDOperand> Ops;
2411      switch (SetCCOpcode) {
2412      default: assert(false && "Illegal floating point SetCC!");
2413      case ISD::SETOEQ: {  // !PF & ZF
2414        Tys.push_back(MVT::i8);
2415        Tys.push_back(MVT::Flag);
2416        Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8));
2417        Ops.push_back(Cond);
2418        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
2419        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
2420                                     DAG.getConstant(X86ISD::COND_E, MVT::i8),
2421                                     Tmp1.getValue(1));
2422        return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
2423      }
2424      case ISD::SETUNE: {  // PF | !ZF
2425        Tys.push_back(MVT::i8);
2426        Tys.push_back(MVT::Flag);
2427        Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8));
2428        Ops.push_back(Cond);
2429        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
2430        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
2431                                     DAG.getConstant(X86ISD::COND_NE, MVT::i8),
2432                                     Tmp1.getValue(1));
2433        return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
2434      }
2435      }
2436    }
2437  }
2438  case ISD::SELECT: {
2439    MVT::ValueType VT = Op.getValueType();
2440    bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE;
2441    bool addTest   = false;
2442    SDOperand Op0 = Op.getOperand(0);
2443    SDOperand Cond, CC;
2444    if (Op0.getOpcode() == ISD::SETCC)
2445      Op0 = LowerOperation(Op0, DAG);
2446
2447    if (Op0.getOpcode() == X86ISD::SETCC) {
2448      // If condition flag is set by a X86ISD::CMP, then make a copy of it
2449      // (since flag operand cannot be shared). If the X86ISD::SETCC does not
2450      // have another use it will be eliminated.
2451      // If the X86ISD::SETCC has more than one use, then it's probably better
2452      // to use a test instead of duplicating the X86ISD::CMP (for register
2453      // pressure reason).
2454      unsigned CmpOpc = Op0.getOperand(1).getOpcode();
2455      if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
2456          CmpOpc == X86ISD::UCOMI) {
2457        if (!Op0.hasOneUse()) {
2458          std::vector<MVT::ValueType> Tys;
2459          for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i)
2460            Tys.push_back(Op0.Val->getValueType(i));
2461          std::vector<SDOperand> Ops;
2462          for (unsigned i = 0; i < Op0.getNumOperands(); ++i)
2463            Ops.push_back(Op0.getOperand(i));
2464          Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
2465        }
2466
2467        CC   = Op0.getOperand(0);
2468        Cond = Op0.getOperand(1);
2469        // Make a copy as flag result cannot be used by more than one.
2470        Cond = DAG.getNode(CmpOpc, MVT::Flag,
2471                           Cond.getOperand(0), Cond.getOperand(1));
2472        addTest =
2473          isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
2474      } else
2475        addTest = true;
2476    } else
2477      addTest = true;
2478
2479    if (addTest) {
2480      CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
2481      Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0);
2482    }
2483
2484    std::vector<MVT::ValueType> Tys;
2485    Tys.push_back(Op.getValueType());
2486    Tys.push_back(MVT::Flag);
2487    std::vector<SDOperand> Ops;
2488    // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
2489    // condition is true.
2490    Ops.push_back(Op.getOperand(2));
2491    Ops.push_back(Op.getOperand(1));
2492    Ops.push_back(CC);
2493    Ops.push_back(Cond);
2494    return DAG.getNode(X86ISD::CMOV, Tys, Ops);
2495  }
2496  case ISD::BRCOND: {
2497    bool addTest = false;
2498    SDOperand Cond  = Op.getOperand(1);
2499    SDOperand Dest  = Op.getOperand(2);
2500    SDOperand CC;
2501    if (Cond.getOpcode() == ISD::SETCC)
2502      Cond = LowerOperation(Cond, DAG);
2503
2504    if (Cond.getOpcode() == X86ISD::SETCC) {
2505      // If condition flag is set by a X86ISD::CMP, then make a copy of it
2506      // (since flag operand cannot be shared). If the X86ISD::SETCC does not
2507      // have another use it will be eliminated.
2508      // If the X86ISD::SETCC has more than one use, then it's probably better
2509      // to use a test instead of duplicating the X86ISD::CMP (for register
2510      // pressure reason).
2511      unsigned CmpOpc = Cond.getOperand(1).getOpcode();
2512      if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
2513          CmpOpc == X86ISD::UCOMI) {
2514        if (!Cond.hasOneUse()) {
2515          std::vector<MVT::ValueType> Tys;
2516          for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i)
2517            Tys.push_back(Cond.Val->getValueType(i));
2518          std::vector<SDOperand> Ops;
2519          for (unsigned i = 0; i < Cond.getNumOperands(); ++i)
2520            Ops.push_back(Cond.getOperand(i));
2521          Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops);
2522        }
2523
2524        CC   = Cond.getOperand(0);
2525        Cond = Cond.getOperand(1);
2526        // Make a copy as flag result cannot be used by more than one.
2527        Cond = DAG.getNode(CmpOpc, MVT::Flag,
2528                           Cond.getOperand(0), Cond.getOperand(1));
2529      } else
2530        addTest = true;
2531    } else
2532      addTest = true;
2533
2534    if (addTest) {
2535      CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
2536      Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond);
2537    }
2538    return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
2539                       Op.getOperand(0), Op.getOperand(2), CC, Cond);
2540  }
2541  case ISD::MEMSET: {
2542    SDOperand InFlag(0, 0);
2543    SDOperand Chain = Op.getOperand(0);
2544    unsigned Align =
2545      (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
2546    if (Align == 0) Align = 1;
2547
2548    ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
2549    // If not DWORD aligned, call memset if size is less than the threshold.
2550    // It knows how to align to the right boundary first.
2551    if ((Align & 3) != 0 ||
2552        (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
2553      MVT::ValueType IntPtr = getPointerTy();
2554      const Type *IntPtrTy = getTargetData().getIntPtrType();
2555      std::vector<std::pair<SDOperand, const Type*> > Args;
2556      Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
2557      // Extend the ubyte argument to be an int value for the call.
2558      SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
2559      Args.push_back(std::make_pair(Val, IntPtrTy));
2560      Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
2561      std::pair<SDOperand,SDOperand> CallResult =
2562        LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
2563                    DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
2564      return CallResult.second;
2565    }
2566
2567    MVT::ValueType AVT;
2568    SDOperand Count;
2569    ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2570    unsigned BytesLeft = 0;
2571    bool TwoRepStos = false;
2572    if (ValC) {
2573      unsigned ValReg;
2574      unsigned Val = ValC->getValue() & 255;
2575
2576      // If the value is a constant, then we can potentially use larger sets.
2577      switch (Align & 3) {
2578      case 2:   // WORD aligned
2579        AVT = MVT::i16;
2580        Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
2581        BytesLeft = I->getValue() % 2;
2582        Val    = (Val << 8) | Val;
2583        ValReg = X86::AX;
2584        break;
2585      case 0:   // DWORD aligned
2586        AVT = MVT::i32;
2587        if (I) {
2588          Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
2589          BytesLeft = I->getValue() % 4;
2590        } else {
2591          Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
2592                              DAG.getConstant(2, MVT::i8));
2593          TwoRepStos = true;
2594        }
2595        Val = (Val << 8)  | Val;
2596        Val = (Val << 16) | Val;
2597        ValReg = X86::EAX;
2598        break;
2599      default:  // Byte aligned
2600        AVT = MVT::i8;
2601        Count = Op.getOperand(3);
2602        ValReg = X86::AL;
2603        break;
2604      }
2605
2606      Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
2607                                InFlag);
2608      InFlag = Chain.getValue(1);
2609    } else {
2610      AVT = MVT::i8;
2611      Count  = Op.getOperand(3);
2612      Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
2613      InFlag = Chain.getValue(1);
2614    }
2615
2616    Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
2617    InFlag = Chain.getValue(1);
2618    Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
2619    InFlag = Chain.getValue(1);
2620
2621    std::vector<MVT::ValueType> Tys;
2622    Tys.push_back(MVT::Other);
2623    Tys.push_back(MVT::Flag);
2624    std::vector<SDOperand> Ops;
2625    Ops.push_back(Chain);
2626    Ops.push_back(DAG.getValueType(AVT));
2627    Ops.push_back(InFlag);
2628    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
2629
2630    if (TwoRepStos) {
2631      InFlag = Chain.getValue(1);
2632      Count = Op.getOperand(3);
2633      MVT::ValueType CVT = Count.getValueType();
2634      SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
2635                                   DAG.getConstant(3, CVT));
2636      Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
2637      InFlag = Chain.getValue(1);
2638      Tys.clear();
2639      Tys.push_back(MVT::Other);
2640      Tys.push_back(MVT::Flag);
2641      Ops.clear();
2642      Ops.push_back(Chain);
2643      Ops.push_back(DAG.getValueType(MVT::i8));
2644      Ops.push_back(InFlag);
2645      Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
2646    } else if (BytesLeft) {
2647      // Issue stores for the last 1 - 3 bytes.
2648      SDOperand Value;
2649      unsigned Val = ValC->getValue() & 255;
2650      unsigned Offset = I->getValue() - BytesLeft;
2651      SDOperand DstAddr = Op.getOperand(1);
2652      MVT::ValueType AddrVT = DstAddr.getValueType();
2653      if (BytesLeft >= 2) {
2654        Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
2655        Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
2656                            DAG.getNode(ISD::ADD, AddrVT, DstAddr,
2657                                        DAG.getConstant(Offset, AddrVT)),
2658                            DAG.getSrcValue(NULL));
2659        BytesLeft -= 2;
2660        Offset += 2;
2661      }
2662
2663      if (BytesLeft == 1) {
2664        Value = DAG.getConstant(Val, MVT::i8);
2665        Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
2666                            DAG.getNode(ISD::ADD, AddrVT, DstAddr,
2667                                        DAG.getConstant(Offset, AddrVT)),
2668                            DAG.getSrcValue(NULL));
2669      }
2670    }
2671
2672    return Chain;
2673  }
2674  case ISD::MEMCPY: {
2675    SDOperand Chain = Op.getOperand(0);
2676    unsigned Align =
2677      (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
2678    if (Align == 0) Align = 1;
2679
2680    ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
2681    // If not DWORD aligned, call memcpy if size is less than the threshold.
2682    // It knows how to align to the right boundary first.
2683    if ((Align & 3) != 0 ||
2684        (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
2685      MVT::ValueType IntPtr = getPointerTy();
2686      const Type *IntPtrTy = getTargetData().getIntPtrType();
2687      std::vector<std::pair<SDOperand, const Type*> > Args;
2688      Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
2689      Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
2690      Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
2691      std::pair<SDOperand,SDOperand> CallResult =
2692        LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
2693                    DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
2694      return CallResult.second;
2695    }
2696
2697    MVT::ValueType AVT;
2698    SDOperand Count;
2699    unsigned BytesLeft = 0;
2700    bool TwoRepMovs = false;
2701    switch (Align & 3) {
2702    case 2:   // WORD aligned
2703      AVT = MVT::i16;
2704      Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
2705      BytesLeft = I->getValue() % 2;
2706      break;
2707    case 0:   // DWORD aligned
2708      AVT = MVT::i32;
2709      if (I) {
2710        Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
2711        BytesLeft = I->getValue() % 4;
2712      } else {
2713        Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
2714                            DAG.getConstant(2, MVT::i8));
2715        TwoRepMovs = true;
2716      }
2717      break;
2718    default:  // Byte aligned
2719      AVT = MVT::i8;
2720      Count = Op.getOperand(3);
2721      break;
2722    }
2723
2724    SDOperand InFlag(0, 0);
2725    Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
2726    InFlag = Chain.getValue(1);
2727    Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
2728    InFlag = Chain.getValue(1);
2729    Chain  = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag);
2730    InFlag = Chain.getValue(1);
2731
2732    std::vector<MVT::ValueType> Tys;
2733    Tys.push_back(MVT::Other);
2734    Tys.push_back(MVT::Flag);
2735    std::vector<SDOperand> Ops;
2736    Ops.push_back(Chain);
2737    Ops.push_back(DAG.getValueType(AVT));
2738    Ops.push_back(InFlag);
2739    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
2740
2741    if (TwoRepMovs) {
2742      InFlag = Chain.getValue(1);
2743      Count = Op.getOperand(3);
2744      MVT::ValueType CVT = Count.getValueType();
2745      SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
2746                                   DAG.getConstant(3, CVT));
2747      Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
2748      InFlag = Chain.getValue(1);
2749      Tys.clear();
2750      Tys.push_back(MVT::Other);
2751      Tys.push_back(MVT::Flag);
2752      Ops.clear();
2753      Ops.push_back(Chain);
2754      Ops.push_back(DAG.getValueType(MVT::i8));
2755      Ops.push_back(InFlag);
2756      Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
2757    } else if (BytesLeft) {
2758      // Issue loads and stores for the last 1 - 3 bytes.
2759      unsigned Offset = I->getValue() - BytesLeft;
2760      SDOperand DstAddr = Op.getOperand(1);
2761      MVT::ValueType DstVT = DstAddr.getValueType();
2762      SDOperand SrcAddr = Op.getOperand(2);
2763      MVT::ValueType SrcVT = SrcAddr.getValueType();
2764      SDOperand Value;
2765      if (BytesLeft >= 2) {
2766        Value = DAG.getLoad(MVT::i16, Chain,
2767                            DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
2768                                        DAG.getConstant(Offset, SrcVT)),
2769                            DAG.getSrcValue(NULL));
2770        Chain = Value.getValue(1);
2771        Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
2772                            DAG.getNode(ISD::ADD, DstVT, DstAddr,
2773                                        DAG.getConstant(Offset, DstVT)),
2774                            DAG.getSrcValue(NULL));
2775        BytesLeft -= 2;
2776        Offset += 2;
2777      }
2778
2779      if (BytesLeft == 1) {
2780        Value = DAG.getLoad(MVT::i8, Chain,
2781                            DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
2782                                        DAG.getConstant(Offset, SrcVT)),
2783                            DAG.getSrcValue(NULL));
2784        Chain = Value.getValue(1);
2785        Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
2786                            DAG.getNode(ISD::ADD, DstVT, DstAddr,
2787                                        DAG.getConstant(Offset, DstVT)),
2788                            DAG.getSrcValue(NULL));
2789      }
2790    }
2791
2792    return Chain;
2793  }
2794
2795  // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their
2796  // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
2797  // one of the above mentioned nodes. It has to be wrapped because otherwise
2798  // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2799  // be used to form addressing mode. These wrapped nodes will be selected
2800  // into MOV32ri.
2801  case ISD::ConstantPool: {
2802    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2803    SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2804                         DAG.getTargetConstantPool(CP->get(), getPointerTy(),
2805                                                   CP->getAlignment()));
2806    if (Subtarget->isTargetDarwin()) {
2807      // With PIC, the address is actually $g + Offset.
2808      if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2809        Result = DAG.getNode(ISD::ADD, getPointerTy(),
2810                DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2811    }
2812
2813    return Result;
2814  }
2815  case ISD::GlobalAddress: {
2816    GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2817    SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2818                         DAG.getTargetGlobalAddress(GV, getPointerTy()));
2819    if (Subtarget->isTargetDarwin()) {
2820      // With PIC, the address is actually $g + Offset.
2821      if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2822        Result = DAG.getNode(ISD::ADD, getPointerTy(),
2823                    DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2824
2825      // For Darwin, external and weak symbols are indirect, so we want to load
2826      // the value at address GV, not the value of GV itself. This means that
2827      // the GlobalAddress must be in the base or index register of the address,
2828      // not the GV offset field.
2829      if (getTargetMachine().getRelocationModel() != Reloc::Static &&
2830          DarwinGVRequiresExtraLoad(GV))
2831        Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(),
2832                             Result, DAG.getSrcValue(NULL));
2833    }
2834
2835    return Result;
2836  }
2837  case ISD::ExternalSymbol: {
2838    const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
2839    SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2840                         DAG.getTargetExternalSymbol(Sym, getPointerTy()));
2841    if (Subtarget->isTargetDarwin()) {
2842      // With PIC, the address is actually $g + Offset.
2843      if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2844        Result = DAG.getNode(ISD::ADD, getPointerTy(),
2845                    DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2846    }
2847
2848    return Result;
2849  }
2850  case ISD::VASTART: {
2851    // vastart just stores the address of the VarArgsFrameIndex slot into the
2852    // memory location argument.
2853    // FIXME: Replace MVT::i32 with PointerTy
2854    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
2855    return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
2856                       Op.getOperand(1), Op.getOperand(2));
2857  }
2858  case ISD::RET: {
2859    SDOperand Copy;
2860
2861    switch(Op.getNumOperands()) {
2862    default:
2863      assert(0 && "Do not know how to return this many arguments!");
2864      abort();
2865    case 1:    // ret void.
2866      return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
2867                         DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
2868    case 2: {
2869      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
2870
2871      if (MVT::isVector(ArgVT)) {
2872        // Integer or FP vector result -> XMM0.
2873        if (DAG.getMachineFunction().liveout_empty())
2874          DAG.getMachineFunction().addLiveOut(X86::XMM0);
2875        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
2876                                SDOperand());
2877      } else if (MVT::isInteger(ArgVT)) {
2878        // Integer result -> EAX
2879        if (DAG.getMachineFunction().liveout_empty())
2880          DAG.getMachineFunction().addLiveOut(X86::EAX);
2881
2882        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1),
2883                                SDOperand());
2884      } else if (!X86ScalarSSE) {
2885        // FP return with fp-stack value.
2886        if (DAG.getMachineFunction().liveout_empty())
2887          DAG.getMachineFunction().addLiveOut(X86::ST0);
2888
2889        std::vector<MVT::ValueType> Tys;
2890        Tys.push_back(MVT::Other);
2891        Tys.push_back(MVT::Flag);
2892        std::vector<SDOperand> Ops;
2893        Ops.push_back(Op.getOperand(0));
2894        Ops.push_back(Op.getOperand(1));
2895        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
2896      } else {
2897        // FP return with ScalarSSE (return on fp-stack).
2898        if (DAG.getMachineFunction().liveout_empty())
2899          DAG.getMachineFunction().addLiveOut(X86::ST0);
2900
2901        SDOperand MemLoc;
2902        SDOperand Chain = Op.getOperand(0);
2903        SDOperand Value = Op.getOperand(1);
2904
2905        if (Value.getOpcode() == ISD::LOAD &&
2906            (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
2907          Chain  = Value.getOperand(0);
2908          MemLoc = Value.getOperand(1);
2909        } else {
2910          // Spill the value to memory and reload it into top of stack.
2911          unsigned Size = MVT::getSizeInBits(ArgVT)/8;
2912          MachineFunction &MF = DAG.getMachineFunction();
2913          int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
2914          MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
2915          Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
2916                              Value, MemLoc, DAG.getSrcValue(0));
2917        }
2918        std::vector<MVT::ValueType> Tys;
2919        Tys.push_back(MVT::f64);
2920        Tys.push_back(MVT::Other);
2921        std::vector<SDOperand> Ops;
2922        Ops.push_back(Chain);
2923        Ops.push_back(MemLoc);
2924        Ops.push_back(DAG.getValueType(ArgVT));
2925        Copy = DAG.getNode(X86ISD::FLD, Tys, Ops);
2926        Tys.clear();
2927        Tys.push_back(MVT::Other);
2928        Tys.push_back(MVT::Flag);
2929        Ops.clear();
2930        Ops.push_back(Copy.getValue(1));
2931        Ops.push_back(Copy);
2932        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
2933      }
2934      break;
2935    }
2936    case 3:
2937      if (DAG.getMachineFunction().liveout_empty()) {
2938        DAG.getMachineFunction().addLiveOut(X86::EAX);
2939        DAG.getMachineFunction().addLiveOut(X86::EDX);
2940      }
2941
2942      Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2),
2943                              SDOperand());
2944      Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1));
2945      break;
2946    }
2947    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
2948                       Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
2949                       Copy.getValue(1));
2950  }
2951  case ISD::SCALAR_TO_VECTOR: {
2952    SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
2953    return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
2954  }
2955  case ISD::VECTOR_SHUFFLE: {
2956    SDOperand V1 = Op.getOperand(0);
2957    SDOperand V2 = Op.getOperand(1);
2958    SDOperand PermMask = Op.getOperand(2);
2959    MVT::ValueType VT = Op.getValueType();
2960    unsigned NumElems = PermMask.getNumOperands();
2961
2962    if (isSplatMask(PermMask.Val)) {
2963      if (NumElems <= 4) return Op;
2964      // Promote it to a v4i32 splat.
2965      return PromoteSplat(Op, DAG);
2966    }
2967
2968    if (X86::isMOVLMask(PermMask.Val) ||
2969        X86::isMOVSHDUPMask(PermMask.Val) ||
2970        X86::isMOVSLDUPMask(PermMask.Val) ||
2971        X86::isMOVHLPSMask(PermMask.Val) ||
2972        X86::isMOVHPMask(PermMask.Val) ||
2973        X86::isMOVLPMask(PermMask.Val))
2974      return Op;
2975
2976    if (ShouldXformToMOVHLPS(PermMask.Val) ||
2977        ShouldXformToMOVLP(V1.Val, PermMask.Val))
2978      return CommuteVectorShuffle(Op, DAG);
2979
2980    bool V1IsSplat = isSplatVector(V1.Val);
2981    bool V2IsSplat = isSplatVector(V2.Val);
2982    if (V1IsSplat && !V2IsSplat) {
2983      Op = CommuteVectorShuffle(Op, DAG);
2984      V1 = Op.getOperand(0);
2985      V2 = Op.getOperand(1);
2986      PermMask = Op.getOperand(2);
2987      V2IsSplat = true;
2988    }
2989
2990    if (isCommutedMOVL(PermMask.Val, V2IsSplat)) {
2991      Op = CommuteVectorShuffle(Op, DAG);
2992      V1 = Op.getOperand(0);
2993      V2 = Op.getOperand(1);
2994      PermMask = Op.getOperand(2);
2995      if (V2IsSplat) {
2996        // V2 is a splat, so the mask may be malformed. That is, it may point
2997        // to any V2 element. The instruction selectior won't like this. Get
2998        // a corrected mask and commute to form a proper MOVS{S|D}.
2999        SDOperand NewMask = getMOVLMask(NumElems, DAG);
3000        if (NewMask.Val != PermMask.Val)
3001          Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3002      }
3003      return Op;
3004    }
3005
3006    if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
3007        X86::isUNPCKLMask(PermMask.Val) ||
3008        X86::isUNPCKHMask(PermMask.Val))
3009      return Op;
3010
3011    if (V2IsSplat) {
3012      // Normalize mask so all entries that point to V2 points to its first
3013      // element then try to match unpck{h|l} again. If match, return a
3014      // new vector_shuffle with the corrected mask.
3015      SDOperand NewMask = NormalizeMask(PermMask, DAG);
3016      if (NewMask.Val != PermMask.Val) {
3017        if (X86::isUNPCKLMask(PermMask.Val, true)) {
3018          SDOperand NewMask = getUnpacklMask(NumElems, DAG);
3019          return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3020        } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
3021          SDOperand NewMask = getUnpackhMask(NumElems, DAG);
3022          return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3023        }
3024      }
3025    }
3026
3027    // Normalize the node to match x86 shuffle ops if needed
3028    if (V2.getOpcode() != ISD::UNDEF)
3029      if (isCommutedSHUFP(PermMask.Val)) {
3030        Op = CommuteVectorShuffle(Op, DAG);
3031        V1 = Op.getOperand(0);
3032        V2 = Op.getOperand(1);
3033        PermMask = Op.getOperand(2);
3034      }
3035
3036    // If VT is integer, try PSHUF* first, then SHUFP*.
3037    if (MVT::isInteger(VT)) {
3038      if (X86::isPSHUFDMask(PermMask.Val) ||
3039          X86::isPSHUFHWMask(PermMask.Val) ||
3040          X86::isPSHUFLWMask(PermMask.Val)) {
3041        if (V2.getOpcode() != ISD::UNDEF)
3042          return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3043                             DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3044        return Op;
3045      }
3046
3047      if (X86::isSHUFPMask(PermMask.Val))
3048        return Op;
3049
3050      // Handle v8i16 shuffle high / low shuffle node pair.
3051      if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
3052        MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3053        MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3054        std::vector<SDOperand> MaskVec;
3055        for (unsigned i = 0; i != 4; ++i)
3056          MaskVec.push_back(PermMask.getOperand(i));
3057        for (unsigned i = 4; i != 8; ++i)
3058          MaskVec.push_back(DAG.getConstant(i, BaseVT));
3059        SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
3060        V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3061        MaskVec.clear();
3062        for (unsigned i = 0; i != 4; ++i)
3063          MaskVec.push_back(DAG.getConstant(i, BaseVT));
3064        for (unsigned i = 4; i != 8; ++i)
3065          MaskVec.push_back(PermMask.getOperand(i));
3066        Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
3067        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3068      }
3069    } else {
3070      // Floating point cases in the other order.
3071      if (X86::isSHUFPMask(PermMask.Val))
3072        return Op;
3073      if (X86::isPSHUFDMask(PermMask.Val) ||
3074          X86::isPSHUFHWMask(PermMask.Val) ||
3075          X86::isPSHUFLWMask(PermMask.Val)) {
3076        if (V2.getOpcode() != ISD::UNDEF)
3077          return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3078                             DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3079        return Op;
3080      }
3081    }
3082
3083    if (NumElems == 4) {
3084      // Break it into (shuffle shuffle_hi, shuffle_lo).
3085      MVT::ValueType MaskVT = PermMask.getValueType();
3086      MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
3087      std::map<unsigned, std::pair<int, int> > Locs;
3088      std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3089      std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3090      std::vector<SDOperand> *MaskPtr = &LoMask;
3091      unsigned MaskIdx = 0;
3092      unsigned LoIdx = 0;
3093      unsigned HiIdx = NumElems/2;
3094      for (unsigned i = 0; i != NumElems; ++i) {
3095        if (i == NumElems/2) {
3096          MaskPtr = &HiMask;
3097          MaskIdx = 1;
3098          LoIdx = 0;
3099          HiIdx = NumElems/2;
3100        }
3101        SDOperand Elt = PermMask.getOperand(i);
3102        if (Elt.getOpcode() == ISD::UNDEF) {
3103          Locs[i] = std::make_pair(-1, -1);
3104        } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
3105          Locs[i] = std::make_pair(MaskIdx, LoIdx);
3106          (*MaskPtr)[LoIdx] = Elt;
3107          LoIdx++;
3108        } else {
3109          Locs[i] = std::make_pair(MaskIdx, HiIdx);
3110          (*MaskPtr)[HiIdx] = Elt;
3111          HiIdx++;
3112        }
3113      }
3114
3115      SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3116                                DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask));
3117      SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3118                                DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask));
3119      std::vector<SDOperand> MaskOps;
3120      for (unsigned i = 0; i != NumElems; ++i) {
3121        if (Locs[i].first == -1) {
3122          MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
3123        } else {
3124          unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
3125          MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
3126        }
3127      }
3128      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
3129                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps));
3130    }
3131
3132    return SDOperand();
3133  }
3134  case ISD::BUILD_VECTOR: {
3135    // All one's are handled with pcmpeqd.
3136    if (ISD::isBuildVectorAllOnes(Op.Val))
3137      return Op;
3138
3139    unsigned NumElems = Op.getNumOperands();
3140    unsigned Half = NumElems/2;
3141    MVT::ValueType VT = Op.getValueType();
3142    MVT::ValueType EVT = MVT::getVectorBaseType(VT);
3143    unsigned NumZero  = 0;
3144    unsigned NonZeros = 0;
3145    std::set<SDOperand> Values;
3146    for (unsigned i = 0; i < NumElems; ++i) {
3147      SDOperand Elt = Op.getOperand(i);
3148      Values.insert(Elt);
3149      if (isZeroNode(Elt))
3150        NumZero++;
3151      else if (Elt.getOpcode() != ISD::UNDEF)
3152        NonZeros |= (1 << i);
3153    }
3154
3155    unsigned NumNonZero = CountPopulation_32(NonZeros);
3156    if (NumNonZero == 0)
3157      return Op;
3158
3159    // Splat is obviously ok. Let legalizer expand it to a shuffle.
3160    if (Values.size() == 1)
3161      return SDOperand();
3162
3163    // If element VT is >= 32 bits, turn it into a number of shuffles.
3164    if (NumNonZero == 1) {
3165      unsigned Idx = CountTrailingZeros_32(NonZeros);
3166      SDOperand Item = Op.getOperand(Idx);
3167      Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
3168      if (Idx == 0)
3169        // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
3170        return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
3171                                           NumZero > 0, DAG);
3172
3173      if (MVT::getSizeInBits(EVT) >= 32) {
3174        // Turn it into a shuffle of zero and zero-extended scalar to vector.
3175        Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
3176                                           DAG);
3177        MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
3178        MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
3179        std::vector<SDOperand> MaskVec;
3180        for (unsigned i = 0; i < NumElems; i++)
3181          MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
3182        SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
3183        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
3184                           DAG.getNode(ISD::UNDEF, VT), Mask);
3185      }
3186    }
3187
3188    // If element VT is < 32 bits, convert it to inserts into a zero vector.
3189    if (MVT::getSizeInBits(EVT) <= 16) {
3190      if (NumNonZero <= Half) {
3191        SDOperand V(0, 0);
3192
3193        for (unsigned i = 0; i < NumNonZero; ++i) {
3194          unsigned Idx = CountTrailingZeros_32(NonZeros);
3195          NonZeros ^= (1 << Idx);
3196          SDOperand Item = Op.getOperand(Idx);
3197          if (i == 0) {
3198            if (NumZero)
3199              V = getZeroVector(MVT::v8i16, DAG);
3200            else
3201              V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
3202          }
3203          if (EVT == MVT::i8) {
3204            Item = DAG.getNode(ISD::ANY_EXTEND, MVT::i16, Item);
3205            if ((Idx % 2) != 0)
3206              Item = DAG.getNode(ISD::SHL, MVT::i16,
3207                                 Item, DAG.getConstant(8, MVT::i8));
3208            Idx /= 2;
3209          }
3210          V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Item,
3211                          DAG.getConstant(Idx, MVT::i32));
3212        }
3213
3214        if (EVT == MVT::i8)
3215          V = DAG.getNode(ISD::BIT_CONVERT, VT, V);
3216        return V;
3217      }
3218    }
3219
3220    std::vector<SDOperand> V(NumElems);
3221    if (NumElems == 4 && NumZero > 0) {
3222      for (unsigned i = 0; i < 4; ++i) {
3223        bool isZero = !(NonZeros & (1 << i));
3224        if (isZero)
3225          V[i] = getZeroVector(VT, DAG);
3226        else
3227          V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
3228      }
3229
3230      for (unsigned i = 0; i < 2; ++i) {
3231        switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
3232        default: break;
3233        case 0:
3234          V[i] = V[i*2];  // Must be a zero vector.
3235          break;
3236        case 1:
3237          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
3238                             getMOVLMask(NumElems, DAG));
3239          break;
3240        case 2:
3241          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
3242                             getMOVLMask(NumElems, DAG));
3243          break;
3244        case 3:
3245          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
3246                             getUnpacklMask(NumElems, DAG));
3247          break;
3248        }
3249      }
3250
3251      // Take advantage of the fact R32 to VR128 scalar_to_vector (i.e. movd)
3252      // clears the upper bits.
3253      // FIXME: we can do the same for v4f32 case when we know both parts of
3254      // the lower half come from scalar_to_vector (loadf32). We should do
3255      // that in post legalizer dag combiner with target specific hooks.
3256      if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
3257        return V[0];
3258      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3259      MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
3260      std::vector<SDOperand> MaskVec;
3261      bool Reverse = (NonZeros & 0x3) == 2;
3262      for (unsigned i = 0; i < 2; ++i)
3263        if (Reverse)
3264          MaskVec.push_back(DAG.getConstant(1-i, EVT));
3265        else
3266          MaskVec.push_back(DAG.getConstant(i, EVT));
3267      Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
3268      for (unsigned i = 0; i < 2; ++i)
3269        if (Reverse)
3270          MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
3271        else
3272          MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
3273      SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
3274      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
3275    }
3276
3277    if (Values.size() > 2) {
3278      // Expand into a number of unpckl*.
3279      // e.g. for v4f32
3280      //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
3281      //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
3282      //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
3283      SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
3284      for (unsigned i = 0; i < NumElems; ++i)
3285        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
3286      NumElems >>= 1;
3287      while (NumElems != 0) {
3288        for (unsigned i = 0; i < NumElems; ++i)
3289          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
3290                             UnpckMask);
3291        NumElems >>= 1;
3292      }
3293      return V[0];
3294    }
3295
3296    return SDOperand();
3297  }
3298  case ISD::EXTRACT_VECTOR_ELT: {
3299    if (!isa<ConstantSDNode>(Op.getOperand(1)))
3300        return SDOperand();
3301
3302    MVT::ValueType VT = Op.getValueType();
3303    // TODO: handle v16i8.
3304    if (MVT::getSizeInBits(VT) == 16) {
3305      // Transform it so it match pextrw which produces a 32-bit result.
3306      MVT::ValueType EVT = (MVT::ValueType)(VT+1);
3307      SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
3308                                      Op.getOperand(0), Op.getOperand(1));
3309      SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
3310                                      DAG.getValueType(VT));
3311      return DAG.getNode(ISD::TRUNCATE, VT, Assert);
3312    } else if (MVT::getSizeInBits(VT) == 32) {
3313      SDOperand Vec = Op.getOperand(0);
3314      unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3315      if (Idx == 0)
3316        return Op;
3317
3318      // SHUFPS the element to the lowest double word, then movss.
3319      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3320      SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4,
3321                                          MVT::getVectorBaseType(MaskVT));
3322      std::vector<SDOperand> IdxVec;
3323      IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
3324      IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3325      IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3326      IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3327      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
3328      Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3329                        Vec, Vec, Mask);
3330      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3331                         DAG.getConstant(0, MVT::i32));
3332    } else if (MVT::getSizeInBits(VT) == 64) {
3333      SDOperand Vec = Op.getOperand(0);
3334      unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3335      if (Idx == 0)
3336        return Op;
3337
3338      // UNPCKHPD the element to the lowest double word, then movsd.
3339      // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
3340      // to a f64mem, the whole operation is folded into a single MOVHPDmr.
3341      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3342      std::vector<SDOperand> IdxVec;
3343      IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
3344      IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3345      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
3346      Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3347                        Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
3348      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3349                         DAG.getConstant(0, MVT::i32));
3350    }
3351
3352    return SDOperand();
3353  }
3354  case ISD::INSERT_VECTOR_ELT: {
3355    // Transform it so it match pinsrw which expects a 16-bit value in a R32
3356    // as its second argument.
3357    MVT::ValueType VT = Op.getValueType();
3358    MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
3359    SDOperand N0 = Op.getOperand(0);
3360    SDOperand N1 = Op.getOperand(1);
3361    SDOperand N2 = Op.getOperand(2);
3362    if (MVT::getSizeInBits(BaseVT) == 16) {
3363      if (N1.getValueType() != MVT::i32)
3364        N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
3365      if (N2.getValueType() != MVT::i32)
3366        N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
3367      return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
3368    } else if (MVT::getSizeInBits(BaseVT) == 32) {
3369      unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
3370      if (Idx == 0) {
3371        // Use a movss.
3372        N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
3373        MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3374        MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3375        std::vector<SDOperand> MaskVec;
3376        MaskVec.push_back(DAG.getConstant(4, BaseVT));
3377        for (unsigned i = 1; i <= 3; ++i)
3378          MaskVec.push_back(DAG.getConstant(i, BaseVT));
3379        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
3380                           DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec));
3381      } else {
3382        // Use two pinsrw instructions to insert a 32 bit value.
3383        Idx <<= 1;
3384        if (MVT::isFloatingPoint(N1.getValueType())) {
3385          if (N1.getOpcode() == ISD::LOAD) {
3386            // Just load directly from f32mem to R32.
3387            N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1),
3388                             N1.getOperand(2));
3389          } else {
3390            N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
3391            N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
3392            N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
3393                             DAG.getConstant(0, MVT::i32));
3394          }
3395        }
3396        N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
3397        N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3398                         DAG.getConstant(Idx, MVT::i32));
3399        N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
3400        N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3401                         DAG.getConstant(Idx+1, MVT::i32));
3402        return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
3403      }
3404    }
3405
3406    return SDOperand();
3407  }
3408  case ISD::INTRINSIC_WO_CHAIN: {
3409    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
3410    switch (IntNo) {
3411    default: return SDOperand();    // Don't custom lower most intrinsics.
3412    // Comparison intrinsics.
3413    case Intrinsic::x86_sse_comieq_ss:
3414    case Intrinsic::x86_sse_comilt_ss:
3415    case Intrinsic::x86_sse_comile_ss:
3416    case Intrinsic::x86_sse_comigt_ss:
3417    case Intrinsic::x86_sse_comige_ss:
3418    case Intrinsic::x86_sse_comineq_ss:
3419    case Intrinsic::x86_sse_ucomieq_ss:
3420    case Intrinsic::x86_sse_ucomilt_ss:
3421    case Intrinsic::x86_sse_ucomile_ss:
3422    case Intrinsic::x86_sse_ucomigt_ss:
3423    case Intrinsic::x86_sse_ucomige_ss:
3424    case Intrinsic::x86_sse_ucomineq_ss:
3425    case Intrinsic::x86_sse2_comieq_sd:
3426    case Intrinsic::x86_sse2_comilt_sd:
3427    case Intrinsic::x86_sse2_comile_sd:
3428    case Intrinsic::x86_sse2_comigt_sd:
3429    case Intrinsic::x86_sse2_comige_sd:
3430    case Intrinsic::x86_sse2_comineq_sd:
3431    case Intrinsic::x86_sse2_ucomieq_sd:
3432    case Intrinsic::x86_sse2_ucomilt_sd:
3433    case Intrinsic::x86_sse2_ucomile_sd:
3434    case Intrinsic::x86_sse2_ucomigt_sd:
3435    case Intrinsic::x86_sse2_ucomige_sd:
3436    case Intrinsic::x86_sse2_ucomineq_sd: {
3437      unsigned Opc = 0;
3438      ISD::CondCode CC = ISD::SETCC_INVALID;
3439      switch (IntNo) {
3440        default: break;
3441        case Intrinsic::x86_sse_comieq_ss:
3442        case Intrinsic::x86_sse2_comieq_sd:
3443          Opc = X86ISD::COMI;
3444          CC = ISD::SETEQ;
3445          break;
3446        case Intrinsic::x86_sse_comilt_ss:
3447        case Intrinsic::x86_sse2_comilt_sd:
3448          Opc = X86ISD::COMI;
3449          CC = ISD::SETLT;
3450          break;
3451        case Intrinsic::x86_sse_comile_ss:
3452        case Intrinsic::x86_sse2_comile_sd:
3453          Opc = X86ISD::COMI;
3454          CC = ISD::SETLE;
3455          break;
3456        case Intrinsic::x86_sse_comigt_ss:
3457        case Intrinsic::x86_sse2_comigt_sd:
3458          Opc = X86ISD::COMI;
3459          CC = ISD::SETGT;
3460          break;
3461        case Intrinsic::x86_sse_comige_ss:
3462        case Intrinsic::x86_sse2_comige_sd:
3463          Opc = X86ISD::COMI;
3464          CC = ISD::SETGE;
3465          break;
3466        case Intrinsic::x86_sse_comineq_ss:
3467        case Intrinsic::x86_sse2_comineq_sd:
3468          Opc = X86ISD::COMI;
3469          CC = ISD::SETNE;
3470          break;
3471        case Intrinsic::x86_sse_ucomieq_ss:
3472        case Intrinsic::x86_sse2_ucomieq_sd:
3473          Opc = X86ISD::UCOMI;
3474          CC = ISD::SETEQ;
3475          break;
3476        case Intrinsic::x86_sse_ucomilt_ss:
3477        case Intrinsic::x86_sse2_ucomilt_sd:
3478          Opc = X86ISD::UCOMI;
3479          CC = ISD::SETLT;
3480          break;
3481        case Intrinsic::x86_sse_ucomile_ss:
3482        case Intrinsic::x86_sse2_ucomile_sd:
3483          Opc = X86ISD::UCOMI;
3484          CC = ISD::SETLE;
3485          break;
3486        case Intrinsic::x86_sse_ucomigt_ss:
3487        case Intrinsic::x86_sse2_ucomigt_sd:
3488          Opc = X86ISD::UCOMI;
3489          CC = ISD::SETGT;
3490          break;
3491        case Intrinsic::x86_sse_ucomige_ss:
3492        case Intrinsic::x86_sse2_ucomige_sd:
3493          Opc = X86ISD::UCOMI;
3494          CC = ISD::SETGE;
3495          break;
3496        case Intrinsic::x86_sse_ucomineq_ss:
3497        case Intrinsic::x86_sse2_ucomineq_sd:
3498          Opc = X86ISD::UCOMI;
3499          CC = ISD::SETNE;
3500          break;
3501      }
3502      bool Flip;
3503      unsigned X86CC;
3504      translateX86CC(CC, true, X86CC, Flip);
3505      SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1),
3506                                   Op.getOperand(Flip?1:2));
3507      SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8,
3508                                    DAG.getConstant(X86CC, MVT::i8), Cond);
3509      return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
3510    }
3511    }
3512  }
3513  }
3514}
3515
3516const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3517  switch (Opcode) {
3518  default: return NULL;
3519  case X86ISD::SHLD:               return "X86ISD::SHLD";
3520  case X86ISD::SHRD:               return "X86ISD::SHRD";
3521  case X86ISD::FAND:               return "X86ISD::FAND";
3522  case X86ISD::FXOR:               return "X86ISD::FXOR";
3523  case X86ISD::FILD:               return "X86ISD::FILD";
3524  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
3525  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
3526  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
3527  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
3528  case X86ISD::FLD:                return "X86ISD::FLD";
3529  case X86ISD::FST:                return "X86ISD::FST";
3530  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
3531  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
3532  case X86ISD::CALL:               return "X86ISD::CALL";
3533  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
3534  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
3535  case X86ISD::CMP:                return "X86ISD::CMP";
3536  case X86ISD::TEST:               return "X86ISD::TEST";
3537  case X86ISD::COMI:               return "X86ISD::COMI";
3538  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
3539  case X86ISD::SETCC:              return "X86ISD::SETCC";
3540  case X86ISD::CMOV:               return "X86ISD::CMOV";
3541  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
3542  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
3543  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
3544  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
3545  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
3546  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
3547  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
3548  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
3549  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
3550  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
3551  }
3552}
3553
3554void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
3555                                                       uint64_t Mask,
3556                                                       uint64_t &KnownZero,
3557                                                       uint64_t &KnownOne,
3558                                                       unsigned Depth) const {
3559  unsigned Opc = Op.getOpcode();
3560  assert((Opc >= ISD::BUILTIN_OP_END ||
3561          Opc == ISD::INTRINSIC_WO_CHAIN ||
3562          Opc == ISD::INTRINSIC_W_CHAIN ||
3563          Opc == ISD::INTRINSIC_VOID) &&
3564         "Should use MaskedValueIsZero if you don't know whether Op"
3565         " is a target node!");
3566
3567  KnownZero = KnownOne = 0;   // Don't know anything.
3568  switch (Opc) {
3569  default: break;
3570  case X86ISD::SETCC:
3571    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
3572    break;
3573  }
3574}
3575
3576std::vector<unsigned> X86TargetLowering::
3577getRegClassForInlineAsmConstraint(const std::string &Constraint,
3578                                  MVT::ValueType VT) const {
3579  if (Constraint.size() == 1) {
3580    // FIXME: not handling fp-stack yet!
3581    // FIXME: not handling MMX registers yet ('y' constraint).
3582    switch (Constraint[0]) {      // GCC X86 Constraint Letters
3583    default: break;  // Unknown constriant letter
3584    case 'r':   // GENERAL_REGS
3585    case 'R':   // LEGACY_REGS
3586      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
3587                                   X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
3588    case 'l':   // INDEX_REGS
3589      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
3590                                   X86::ESI, X86::EDI, X86::EBP, 0);
3591    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
3592    case 'Q':   // Q_REGS
3593      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0);
3594    case 'x':   // SSE_REGS if SSE1 allowed
3595      if (Subtarget->hasSSE1())
3596        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3597                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
3598                                     0);
3599      return std::vector<unsigned>();
3600    case 'Y':   // SSE_REGS if SSE2 allowed
3601      if (Subtarget->hasSSE2())
3602        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3603                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
3604                                     0);
3605      return std::vector<unsigned>();
3606    }
3607  }
3608
3609  return std::vector<unsigned>();
3610}
3611
3612/// isLegalAddressImmediate - Return true if the integer value or
3613/// GlobalValue can be used as the offset of the target addressing mode.
3614bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
3615  // X86 allows a sign-extended 32-bit immediate field.
3616  return (V > -(1LL << 32) && V < (1LL << 32)-1);
3617}
3618
3619bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
3620  if (Subtarget->isTargetDarwin()) {
3621    Reloc::Model RModel = getTargetMachine().getRelocationModel();
3622    if (RModel == Reloc::Static)
3623      return true;
3624    else if (RModel == Reloc::DynamicNoPIC)
3625      return !DarwinGVRequiresExtraLoad(GV);
3626    else
3627      return false;
3628  } else
3629    return true;
3630}
3631
3632/// isShuffleMaskLegal - Targets can use this to indicate that they only
3633/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
3634/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
3635/// are assumed to be legal.
3636bool
3637X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
3638  // Only do shuffles on 128-bit vector types for now.
3639  if (MVT::getSizeInBits(VT) == 64) return false;
3640  return (Mask.Val->getNumOperands() <= 4 ||
3641          isSplatMask(Mask.Val)  ||
3642          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
3643          X86::isUNPCKLMask(Mask.Val) ||
3644          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
3645          X86::isUNPCKHMask(Mask.Val));
3646}
3647
3648bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
3649                                               MVT::ValueType EVT,
3650                                               SelectionDAG &DAG) const {
3651  unsigned NumElts = BVOps.size();
3652  // Only do shuffles on 128-bit vector types for now.
3653  if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
3654  if (NumElts == 2) return true;
3655  if (NumElts == 4) {
3656    return (isMOVLMask(BVOps)  || isCommutedMOVL(BVOps, true) ||
3657            isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps));
3658  }
3659  return false;
3660}
3661