X86ISelLowering.cpp revision 85e3800e427fd5367df7a46ce4ad37ad901f894c
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrBuilder.h"
17#include "X86ISelLowering.h"
18#include "X86TargetMachine.h"
19#include "llvm/CallingConv.h"
20#include "llvm/Constants.h"
21#include "llvm/Function.h"
22#include "llvm/Intrinsics.h"
23#include "llvm/ADT/VectorExtras.h"
24#include "llvm/Analysis/ScalarEvolutionExpressions.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/SSARegMap.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32using namespace llvm;
33
34// FIXME: temporary.
35#include "llvm/Support/CommandLine.h"
36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
37                                  cl::desc("Enable fastcc on X86"));
38
39X86TargetLowering::X86TargetLowering(TargetMachine &TM)
40  : TargetLowering(TM) {
41  Subtarget = &TM.getSubtarget<X86Subtarget>();
42  X86ScalarSSE = Subtarget->hasSSE2();
43
44  // Set up the TargetLowering object.
45
46  // X86 is weird, it always uses i8 for shift amounts and setcc results.
47  setShiftAmountType(MVT::i8);
48  setSetCCResultType(MVT::i8);
49  setSetCCResultContents(ZeroOrOneSetCCResult);
50  setSchedulingPreference(SchedulingForRegPressure);
51  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
52  setStackPointerRegisterToSaveRestore(X86::ESP);
53
54  if (!Subtarget->isTargetDarwin())
55    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
56    setUseUnderscoreSetJmpLongJmp(true);
57
58  // Add legal addressing mode scale values.
59  addLegalAddressScale(8);
60  addLegalAddressScale(4);
61  addLegalAddressScale(2);
62  // Enter the ones which require both scale + index last. These are more
63  // expensive.
64  addLegalAddressScale(9);
65  addLegalAddressScale(5);
66  addLegalAddressScale(3);
67
68  // Set up the register classes.
69  addRegisterClass(MVT::i8, X86::R8RegisterClass);
70  addRegisterClass(MVT::i16, X86::R16RegisterClass);
71  addRegisterClass(MVT::i32, X86::R32RegisterClass);
72
73  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
74  // operation.
75  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
76  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
77  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
78
79  if (X86ScalarSSE)
80    // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead.
81    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Expand);
82  else
83    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
84
85  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
86  // this operation.
87  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
88  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
89  // SSE has no i16 to fp conversion, only i32
90  if (X86ScalarSSE)
91    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
92  else {
93    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
94    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
95  }
96
97  // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64
98  // isn't legal.
99  setOperationAction(ISD::SINT_TO_FP       , MVT::i64  , Custom);
100  setOperationAction(ISD::FP_TO_SINT       , MVT::i64  , Custom);
101
102  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
103  // this operation.
104  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
105  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
106
107  if (X86ScalarSSE) {
108    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
109  } else {
110    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
111    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
112  }
113
114  // Handle FP_TO_UINT by promoting the destination to a larger signed
115  // conversion.
116  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
117  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
118  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
119
120  if (X86ScalarSSE && !Subtarget->hasSSE3())
121    // Expand FP_TO_UINT into a select.
122    // FIXME: We would like to use a Custom expander here eventually to do
123    // the optimal thing for SSE vs. the default expansion in the legalizer.
124    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Expand);
125  else
126    // With SSE3 we can use fisttpll to convert to a signed i64.
127    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
128
129  setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
130  setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
131
132  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
133  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
134  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
135  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
136  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
137  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
138  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
139  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
140  setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
141  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
142  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
143  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
144  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
145  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
146  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
147  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
148  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
149  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
150  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
151  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
152  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
153
154  // These should be promoted to a larger select which is supported.
155  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
156  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
157
158  // X86 wants to expand cmov itself.
159  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
160  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
161  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
162  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
163  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
164  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
165  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
166  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
167  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
168  // X86 ret instruction may pop stack.
169  setOperationAction(ISD::RET             , MVT::Other, Custom);
170  // Darwin ABI issue.
171  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
172  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
173  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
174  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
175  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
176  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
177  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
178  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
179  // X86 wants to expand memset / memcpy itself.
180  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
181  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
182
183  // We don't have line number support yet.
184  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
185  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
186  // FIXME - use subtarget debug flags
187  if (!Subtarget->isTargetDarwin())
188    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
189
190  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
191  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
192
193  // Use the default implementation.
194  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
195  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
196  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
197  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
198  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
199  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
200
201  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
202  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
203
204  if (X86ScalarSSE) {
205    // Set up the FP register classes.
206    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
207    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
208
209    // SSE has no load+extend ops
210    setOperationAction(ISD::EXTLOAD,  MVT::f32, Expand);
211    setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand);
212
213    // Use ANDPD to simulate FABS.
214    setOperationAction(ISD::FABS , MVT::f64, Custom);
215    setOperationAction(ISD::FABS , MVT::f32, Custom);
216
217    // Use XORP to simulate FNEG.
218    setOperationAction(ISD::FNEG , MVT::f64, Custom);
219    setOperationAction(ISD::FNEG , MVT::f32, Custom);
220
221    // We don't support sin/cos/fmod
222    setOperationAction(ISD::FSIN , MVT::f64, Expand);
223    setOperationAction(ISD::FCOS , MVT::f64, Expand);
224    setOperationAction(ISD::FREM , MVT::f64, Expand);
225    setOperationAction(ISD::FSIN , MVT::f32, Expand);
226    setOperationAction(ISD::FCOS , MVT::f32, Expand);
227    setOperationAction(ISD::FREM , MVT::f32, Expand);
228
229    // Expand FP immediates into loads from the stack, except for the special
230    // cases we handle.
231    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
232    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
233    addLegalFPImmediate(+0.0); // xorps / xorpd
234  } else {
235    // Set up the FP register classes.
236    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
237
238    setOperationAction(ISD::UNDEF, MVT::f64, Expand);
239
240    if (!UnsafeFPMath) {
241      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
242      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
243    }
244
245    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
246    addLegalFPImmediate(+0.0); // FLD0
247    addLegalFPImmediate(+1.0); // FLD1
248    addLegalFPImmediate(-0.0); // FLD0/FCHS
249    addLegalFPImmediate(-1.0); // FLD1/FCHS
250  }
251
252  // First set operation action for all vector types to expand. Then we
253  // will selectively turn on ones that can be effectively codegen'd.
254  for (unsigned VT = (unsigned)MVT::Vector + 1;
255       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
256    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
257    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
258    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
259    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
260    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
261    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
262    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
263  }
264
265  if (Subtarget->hasMMX()) {
266    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
267    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
268    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
269
270    // FIXME: add MMX packed arithmetics
271    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
272    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
273    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
274  }
275
276  if (Subtarget->hasSSE1()) {
277    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
278
279    setOperationAction(ISD::AND,                MVT::v4f32, Legal);
280    setOperationAction(ISD::OR,                 MVT::v4f32, Legal);
281    setOperationAction(ISD::XOR,                MVT::v4f32, Legal);
282    setOperationAction(ISD::ADD,                MVT::v4f32, Legal);
283    setOperationAction(ISD::SUB,                MVT::v4f32, Legal);
284    setOperationAction(ISD::MUL,                MVT::v4f32, Legal);
285    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
286    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
287    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
288    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
289    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
290  }
291
292  if (Subtarget->hasSSE2()) {
293    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
294    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
295    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
296    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
297    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
298
299    setOperationAction(ISD::ADD,                MVT::v2f64, Legal);
300    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
301    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
302    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
303    setOperationAction(ISD::SUB,                MVT::v2f64, Legal);
304    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
305    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
306    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
307    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
308    setOperationAction(ISD::MUL,                MVT::v2f64, Legal);
309
310    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
311    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
312    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
313    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
314    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
315    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
316
317    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
318    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
319      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
320      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
321      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
322    }
323    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
324    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
325    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
326    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
327    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
328    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
329
330    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
331    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
332      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
333      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
334      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
335      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
336      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
337      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
338      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
339      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
340      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
341      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
342    }
343
344    // Custom lower v2i64 and v2f64 selects.
345    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
346    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
347    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
348    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
349  }
350
351  // We want to custom lower some of our intrinsics.
352  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
353
354  computeRegisterProperties();
355
356  // FIXME: These should be based on subtarget info. Plus, the values should
357  // be smaller when we are in optimizing for size mode.
358  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
359  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
360  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
361  allowUnalignedMemoryAccesses = true; // x86 supports it!
362}
363
364std::vector<SDOperand>
365X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
366  std::vector<SDOperand> Args = TargetLowering::LowerArguments(F, DAG);
367
368  FormalArgs.clear();
369  FormalArgLocs.clear();
370
371  // This sets BytesToPopOnReturn, BytesCallerReserves, etc. which have to be set
372  // before the rest of the function can be lowered.
373  if (F.getCallingConv() == CallingConv::Fast && EnableFastCC)
374    PreprocessFastCCArguments(Args, F, DAG);
375  else
376    PreprocessCCCArguments(Args, F, DAG);
377  return Args;
378}
379
380std::pair<SDOperand, SDOperand>
381X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
382                               bool isVarArg, unsigned CallingConv,
383                               bool isTailCall,
384                               SDOperand Callee, ArgListTy &Args,
385                               SelectionDAG &DAG) {
386  assert((!isVarArg || CallingConv == CallingConv::C) &&
387         "Only C takes varargs!");
388
389  // If the callee is a GlobalAddress node (quite common, every direct call is)
390  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
391  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
392    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
393  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
394    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
395
396  if (CallingConv == CallingConv::Fast && EnableFastCC)
397    return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG);
398  return  LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG);
399}
400
401//===----------------------------------------------------------------------===//
402//                    C Calling Convention implementation
403//===----------------------------------------------------------------------===//
404
405/// AddLiveIn - This helper function adds the specified physical register to the
406/// MachineFunction as a live in value.  It also creates a corresponding virtual
407/// register for it.
408static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
409                          TargetRegisterClass *RC) {
410  assert(RC->contains(PReg) && "Not the correct regclass!");
411  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
412  MF.addLiveIn(PReg, VReg);
413  return VReg;
414}
415
416/// getFormalArgSize - Return the minimum size of the stack frame needed to store
417/// an object of the specified type.
418static unsigned getFormalArgSize(MVT::ValueType ObjectVT) {
419  unsigned ObjSize = 0;
420  switch (ObjectVT) {
421  default: assert(0 && "Unhandled argument type!");
422  case MVT::i1:
423  case MVT::i8:  ObjSize = 1; break;
424  case MVT::i16: ObjSize = 2; break;
425  case MVT::i32: ObjSize = 4; break;
426  case MVT::i64: ObjSize = 8; break;
427  case MVT::f32: ObjSize = 4; break;
428  case MVT::f64: ObjSize = 8; break;
429  }
430  return ObjSize;
431}
432
433/// getFormalArgObjects - Returns itself if Op is a FORMAL_ARGUMENTS, otherwise
434/// returns the FORMAL_ARGUMENTS node(s) that made up parts of the node.
435static std::vector<SDOperand> getFormalArgObjects(SDOperand Op) {
436  unsigned Opc = Op.getOpcode();
437  std::vector<SDOperand> Objs;
438  if (Opc == ISD::TRUNCATE) {
439    Op = Op.getOperand(0);
440    assert(Op.getOpcode() == ISD::AssertSext ||
441           Op.getOpcode() == ISD::AssertZext);
442    Objs.push_back(Op.getOperand(0));
443  } else if (Opc == ISD::FP_ROUND) {
444    Objs.push_back(Op.getOperand(0));
445  } else if (Opc == ISD::BUILD_PAIR) {
446    Objs.push_back(Op.getOperand(0));
447    Objs.push_back(Op.getOperand(1));
448  } else {
449    Objs.push_back(Op);
450  }
451  return Objs;
452}
453
454void X86TargetLowering::PreprocessCCCArguments(std::vector<SDOperand>Args,
455                                               Function &F, SelectionDAG &DAG) {
456  unsigned NumArgs = Args.size();
457  MachineFunction &MF = DAG.getMachineFunction();
458  MachineFrameInfo *MFI = MF.getFrameInfo();
459
460  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
461  // the stack frame looks like this:
462  //
463  // [ESP] -- return address
464  // [ESP + 4] -- first argument (leftmost lexically)
465  // [ESP + 8] -- second argument, if first argument is four bytes in size
466  //    ...
467  //
468  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
469  for (unsigned i = 0; i < NumArgs; ++i) {
470    SDOperand Op = Args[i];
471    std::vector<SDOperand> Objs = getFormalArgObjects(Op);
472    for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end();
473         I != E; ++I) {
474      SDOperand Obj = *I;
475      MVT::ValueType ObjectVT = Obj.getValueType();
476      unsigned ArgIncrement = 4;
477      unsigned ObjSize = getFormalArgSize(ObjectVT);
478      if (ObjSize == 8)
479        ArgIncrement = 8;
480
481      // Create the frame index object for this incoming parameter...
482      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
483      std::pair<FALocInfo, FALocInfo> Loc =
484        std::make_pair(FALocInfo(FALocInfo::StackFrameLoc, FI), FALocInfo());
485      FormalArgLocs.push_back(Loc);
486      ArgOffset += ArgIncrement;   // Move on to the next argument...
487    }
488  }
489
490  // If the function takes variable number of arguments, make a frame index for
491  // the start of the first vararg value... for expansion of llvm.va_start.
492  if (F.isVarArg())
493    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
494  ReturnAddrIndex = 0;     // No return address slot generated yet.
495  BytesToPopOnReturn = 0;  // Callee pops nothing.
496  BytesCallerReserves = ArgOffset;
497}
498
499void X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) {
500  unsigned NumArgs = Op.Val->getNumValues();
501  MachineFunction &MF = DAG.getMachineFunction();
502  MachineFrameInfo *MFI = MF.getFrameInfo();
503
504  for (unsigned i = 0; i < NumArgs; ++i) {
505    // Create the SelectionDAG nodes corresponding to a load from this parameter
506    unsigned FI = FormalArgLocs[i].first.Loc;
507    SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
508    SDOperand ArgValue = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(),
509                                     FIN, DAG.getSrcValue(NULL));
510    FormalArgs.push_back(ArgValue);
511  }
512}
513
514std::pair<SDOperand, SDOperand>
515X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy,
516                                  bool isVarArg, bool isTailCall,
517                                  SDOperand Callee, ArgListTy &Args,
518                                  SelectionDAG &DAG) {
519  // Count how many bytes are to be pushed on the stack.
520  unsigned NumBytes = 0;
521
522  if (Args.empty()) {
523    // Save zero bytes.
524    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy()));
525  } else {
526    for (unsigned i = 0, e = Args.size(); i != e; ++i)
527      switch (getValueType(Args[i].second)) {
528      default: assert(0 && "Unknown value type!");
529      case MVT::i1:
530      case MVT::i8:
531      case MVT::i16:
532      case MVT::i32:
533      case MVT::f32:
534        NumBytes += 4;
535        break;
536      case MVT::i64:
537      case MVT::f64:
538        NumBytes += 8;
539        break;
540      }
541
542    Chain = DAG.getCALLSEQ_START(Chain,
543                                 DAG.getConstant(NumBytes, getPointerTy()));
544
545    // Arguments go on the stack in reverse order, as specified by the ABI.
546    unsigned ArgOffset = 0;
547    SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
548    std::vector<SDOperand> Stores;
549
550    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
551      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
552      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
553
554      switch (getValueType(Args[i].second)) {
555      default: assert(0 && "Unexpected ValueType for argument!");
556      case MVT::i1:
557      case MVT::i8:
558      case MVT::i16:
559        // Promote the integer to 32 bits.  If the input type is signed use a
560        // sign extend, otherwise use a zero extend.
561        if (Args[i].second->isSigned())
562          Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
563        else
564          Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
565
566        // FALL THROUGH
567      case MVT::i32:
568      case MVT::f32:
569        Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
570                                     Args[i].first, PtrOff,
571                                     DAG.getSrcValue(NULL)));
572        ArgOffset += 4;
573        break;
574      case MVT::i64:
575      case MVT::f64:
576        Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
577                                     Args[i].first, PtrOff,
578                                     DAG.getSrcValue(NULL)));
579        ArgOffset += 8;
580        break;
581      }
582    }
583    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
584  }
585
586  std::vector<MVT::ValueType> RetVals;
587  MVT::ValueType RetTyVT = getValueType(RetTy);
588  RetVals.push_back(MVT::Other);
589
590  // The result values produced have to be legal.  Promote the result.
591  switch (RetTyVT) {
592  case MVT::isVoid: break;
593  default:
594    RetVals.push_back(RetTyVT);
595    break;
596  case MVT::i1:
597  case MVT::i8:
598  case MVT::i16:
599    RetVals.push_back(MVT::i32);
600    break;
601  case MVT::f32:
602    if (X86ScalarSSE)
603      RetVals.push_back(MVT::f32);
604    else
605      RetVals.push_back(MVT::f64);
606    break;
607  case MVT::i64:
608    RetVals.push_back(MVT::i32);
609    RetVals.push_back(MVT::i32);
610    break;
611  }
612
613  std::vector<MVT::ValueType> NodeTys;
614  NodeTys.push_back(MVT::Other);   // Returns a chain
615  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
616  std::vector<SDOperand> Ops;
617  Ops.push_back(Chain);
618  Ops.push_back(Callee);
619
620  // FIXME: Do not generate X86ISD::TAILCALL for now.
621  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops);
622  SDOperand InFlag = Chain.getValue(1);
623
624  NodeTys.clear();
625  NodeTys.push_back(MVT::Other);   // Returns a chain
626  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
627  Ops.clear();
628  Ops.push_back(Chain);
629  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
630  Ops.push_back(DAG.getConstant(0, getPointerTy()));
631  Ops.push_back(InFlag);
632  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
633  InFlag = Chain.getValue(1);
634
635  SDOperand RetVal;
636  if (RetTyVT != MVT::isVoid) {
637    switch (RetTyVT) {
638    default: assert(0 && "Unknown value type to return!");
639    case MVT::i1:
640    case MVT::i8:
641      RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
642      Chain = RetVal.getValue(1);
643      if (RetTyVT == MVT::i1)
644        RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
645      break;
646    case MVT::i16:
647      RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
648      Chain = RetVal.getValue(1);
649      break;
650    case MVT::i32:
651      RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
652      Chain = RetVal.getValue(1);
653      break;
654    case MVT::i64: {
655      SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
656      SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32,
657                                        Lo.getValue(2));
658      RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
659      Chain = Hi.getValue(1);
660      break;
661    }
662    case MVT::f32:
663    case MVT::f64: {
664      std::vector<MVT::ValueType> Tys;
665      Tys.push_back(MVT::f64);
666      Tys.push_back(MVT::Other);
667      Tys.push_back(MVT::Flag);
668      std::vector<SDOperand> Ops;
669      Ops.push_back(Chain);
670      Ops.push_back(InFlag);
671      RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
672      Chain  = RetVal.getValue(1);
673      InFlag = RetVal.getValue(2);
674      if (X86ScalarSSE) {
675        // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
676        // shouldn't be necessary except that RFP cannot be live across
677        // multiple blocks. When stackifier is fixed, they can be uncoupled.
678        MachineFunction &MF = DAG.getMachineFunction();
679        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
680        SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
681        Tys.clear();
682        Tys.push_back(MVT::Other);
683        Ops.clear();
684        Ops.push_back(Chain);
685        Ops.push_back(RetVal);
686        Ops.push_back(StackSlot);
687        Ops.push_back(DAG.getValueType(RetTyVT));
688        Ops.push_back(InFlag);
689        Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
690        RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
691                             DAG.getSrcValue(NULL));
692        Chain = RetVal.getValue(1);
693      }
694
695      if (RetTyVT == MVT::f32 && !X86ScalarSSE)
696        // FIXME: we would really like to remember that this FP_ROUND
697        // operation is okay to eliminate if we allow excess FP precision.
698        RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
699      break;
700    }
701    }
702  }
703
704  return std::make_pair(RetVal, Chain);
705}
706
707//===----------------------------------------------------------------------===//
708//                    Fast Calling Convention implementation
709//===----------------------------------------------------------------------===//
710//
711// The X86 'fast' calling convention passes up to two integer arguments in
712// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
713// and requires that the callee pop its arguments off the stack (allowing proper
714// tail calls), and has the same return value conventions as C calling convs.
715//
716// This calling convention always arranges for the callee pop value to be 8n+4
717// bytes, which is needed for tail recursion elimination and stack alignment
718// reasons.
719//
720// Note that this can be enhanced in the future to pass fp vals in registers
721// (when we have a global fp allocator) and do other tricks.
722//
723
724// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments
725// to pass in registers.  0 is none, 1 is is "use EAX", 2 is "use EAX and
726// EDX".  Anything more is illegal.
727//
728// FIXME: The linscan register allocator currently has problem with
729// coalescing.  At the time of this writing, whenever it decides to coalesce
730// a physreg with a virtreg, this increases the size of the physreg's live
731// range, and the live range cannot ever be reduced.  This causes problems if
732// too many physregs are coaleced with virtregs, which can cause the register
733// allocator to wedge itself.
734//
735// This code triggers this problem more often if we pass args in registers,
736// so disable it until this is fixed.
737//
738// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings
739// about code being dead.
740//
741static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0;
742
743
744static void
745HowToPassFastCCArgument(MVT::ValueType ObjectVT, unsigned NumIntRegs,
746                        unsigned &ObjSize, unsigned &ObjIntRegs) {
747  ObjSize = 0;
748  NumIntRegs = 0;
749
750  switch (ObjectVT) {
751  default: assert(0 && "Unhandled argument type!");
752  case MVT::i1:
753  case MVT::i8:
754    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
755      ObjIntRegs = 1;
756    else
757      ObjSize = 1;
758    break;
759  case MVT::i16:
760    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
761      ObjIntRegs = 1;
762    else
763      ObjSize = 2;
764    break;
765  case MVT::i32:
766    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
767      ObjIntRegs = 1;
768    else
769      ObjSize = 4;
770    break;
771  case MVT::i64:
772    if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
773      ObjIntRegs = 2;
774    } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
775      ObjIntRegs = 1;
776      ObjSize = 4;
777    } else
778      ObjSize = 8;
779  case MVT::f32:
780    ObjSize = 4;
781    break;
782  case MVT::f64:
783    ObjSize = 8;
784    break;
785  }
786}
787
788void
789X86TargetLowering::PreprocessFastCCArguments(std::vector<SDOperand>Args,
790                                             Function &F, SelectionDAG &DAG) {
791  unsigned NumArgs = Args.size();
792  MachineFunction &MF = DAG.getMachineFunction();
793  MachineFrameInfo *MFI = MF.getFrameInfo();
794
795  // Add DAG nodes to load the arguments...  On entry to a function the stack
796  // frame looks like this:
797  //
798  // [ESP] -- return address
799  // [ESP + 4] -- first nonreg argument (leftmost lexically)
800  // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size
801  //    ...
802  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
803
804  // Keep track of the number of integer regs passed so far.  This can be either
805  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
806  // used).
807  unsigned NumIntRegs = 0;
808
809  for (unsigned i = 0; i < NumArgs; ++i) {
810    SDOperand Op = Args[i];
811    std::vector<SDOperand> Objs = getFormalArgObjects(Op);
812    for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end();
813         I != E; ++I) {
814      SDOperand Obj = *I;
815      MVT::ValueType ObjectVT = Obj.getValueType();
816      unsigned ArgIncrement = 4;
817      unsigned ObjSize = 0;
818      unsigned ObjIntRegs = 0;
819
820      HowToPassFastCCArgument(ObjectVT, NumIntRegs, ObjSize, ObjIntRegs);
821      if (ObjSize == 8)
822        ArgIncrement = 8;
823
824      unsigned Reg;
825      std::pair<FALocInfo,FALocInfo> Loc = std::make_pair(FALocInfo(),
826                                                          FALocInfo());
827      if (ObjIntRegs) {
828        NumIntRegs += ObjIntRegs;
829        switch (ObjectVT) {
830        default: assert(0 && "Unhandled argument type!");
831        case MVT::i1:
832        case MVT::i8:
833          Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
834                          X86::R8RegisterClass);
835          Loc.first.Kind = FALocInfo::LiveInRegLoc;
836          Loc.first.Loc = Reg;
837          Loc.first.Typ = MVT::i8;
838          break;
839        case MVT::i16:
840          Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
841                          X86::R16RegisterClass);
842          Loc.first.Kind = FALocInfo::LiveInRegLoc;
843          Loc.first.Loc = Reg;
844          Loc.first.Typ = MVT::i16;
845          break;
846        case MVT::i32:
847          Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
848                          X86::R32RegisterClass);
849          Loc.first.Kind = FALocInfo::LiveInRegLoc;
850          Loc.first.Loc = Reg;
851          Loc.first.Typ = MVT::i32;
852          break;
853        case MVT::i64:
854          Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
855                          X86::R32RegisterClass);
856          Loc.first.Kind = FALocInfo::LiveInRegLoc;
857          Loc.first.Loc = Reg;
858          Loc.first.Typ = MVT::i32;
859          if (ObjIntRegs == 2) {
860            Reg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
861            Loc.second.Kind = FALocInfo::LiveInRegLoc;
862            Loc.second.Loc = Reg;
863            Loc.second.Typ = MVT::i32;
864          }
865          break;
866        }
867      }
868      if (ObjSize) {
869        int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
870        if (ObjectVT == MVT::i64 && ObjIntRegs) {
871          Loc.second.Kind = FALocInfo::StackFrameLoc;
872          Loc.second.Loc = FI;
873        } else {
874          Loc.first.Kind = FALocInfo::StackFrameLoc;
875          Loc.first.Loc = FI;
876        }
877        ArgOffset += ArgIncrement;   // Move on to the next argument.
878      }
879
880      FormalArgLocs.push_back(Loc);
881    }
882  }
883
884  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
885  // arguments and the arguments after the retaddr has been pushed are aligned.
886  if ((ArgOffset & 7) == 0)
887    ArgOffset += 4;
888
889  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
890  ReturnAddrIndex = 0;             // No return address slot generated yet.
891  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
892  BytesCallerReserves = 0;
893
894  // Finally, inform the code generator which regs we return values in.
895  switch (getValueType(F.getReturnType())) {
896  default: assert(0 && "Unknown type!");
897  case MVT::isVoid: break;
898  case MVT::i1:
899  case MVT::i8:
900  case MVT::i16:
901  case MVT::i32:
902    MF.addLiveOut(X86::EAX);
903    break;
904  case MVT::i64:
905    MF.addLiveOut(X86::EAX);
906    MF.addLiveOut(X86::EDX);
907    break;
908  case MVT::f32:
909  case MVT::f64:
910    MF.addLiveOut(X86::ST0);
911    break;
912  }
913}
914void
915X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
916  unsigned NumArgs = Op.Val->getNumValues();
917  MachineFunction &MF = DAG.getMachineFunction();
918  MachineFrameInfo *MFI = MF.getFrameInfo();
919
920  for (unsigned i = 0; i < NumArgs; ++i) {
921    MVT::ValueType VT = Op.Val->getValueType(i);
922    std::pair<FALocInfo, FALocInfo> Loc = FormalArgLocs[i];
923    SDOperand ArgValue;
924    if (Loc.first.Kind == FALocInfo::StackFrameLoc) {
925      // Create the SelectionDAG nodes corresponding to a load from this parameter
926      SDOperand FIN = DAG.getFrameIndex(Loc.first.Loc, MVT::i32);
927      ArgValue = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN,
928                             DAG.getSrcValue(NULL));
929    } else {
930      // Must be a CopyFromReg
931      ArgValue= DAG.getCopyFromReg(DAG.getRoot(), Loc.first.Loc, Loc.first.Typ);
932    }
933
934    if (Loc.second.Kind != FALocInfo::None) {
935      SDOperand ArgValue2;
936      if (Loc.second.Kind == FALocInfo::StackFrameLoc) {
937        // Create the SelectionDAG nodes corresponding to a load from this parameter
938        SDOperand FIN = DAG.getFrameIndex(Loc.second.Loc, MVT::i32);
939        ArgValue2 = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN,
940                                DAG.getSrcValue(NULL));
941      } else {
942        // Must be a CopyFromReg
943        ArgValue2 = DAG.getCopyFromReg(DAG.getRoot(),
944                                       Loc.second.Loc, Loc.second.Typ);
945      }
946      ArgValue = DAG.getNode(ISD::BUILD_PAIR, VT, ArgValue, ArgValue2);
947    }
948    FormalArgs.push_back(ArgValue);
949  }
950}
951
952std::pair<SDOperand, SDOperand>
953X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy,
954                                     bool isTailCall, SDOperand Callee,
955                                     ArgListTy &Args, SelectionDAG &DAG) {
956  // Count how many bytes are to be pushed on the stack.
957  unsigned NumBytes = 0;
958
959  // Keep track of the number of integer regs passed so far.  This can be either
960  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
961  // used).
962  unsigned NumIntRegs = 0;
963
964  for (unsigned i = 0, e = Args.size(); i != e; ++i)
965    switch (getValueType(Args[i].second)) {
966    default: assert(0 && "Unknown value type!");
967    case MVT::i1:
968    case MVT::i8:
969    case MVT::i16:
970    case MVT::i32:
971      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
972        ++NumIntRegs;
973        break;
974      }
975      // fall through
976    case MVT::f32:
977      NumBytes += 4;
978      break;
979    case MVT::i64:
980      if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
981        NumIntRegs += 2;
982        break;
983      } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
984        NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS;
985        NumBytes += 4;
986        break;
987      }
988
989      // fall through
990    case MVT::f64:
991      NumBytes += 8;
992      break;
993    }
994
995  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
996  // arguments and the arguments after the retaddr has been pushed are aligned.
997  if ((NumBytes & 7) == 0)
998    NumBytes += 4;
999
1000  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1001
1002  // Arguments go on the stack in reverse order, as specified by the ABI.
1003  unsigned ArgOffset = 0;
1004  SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
1005  NumIntRegs = 0;
1006  std::vector<SDOperand> Stores;
1007  std::vector<SDOperand> RegValuesToPass;
1008  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1009    switch (getValueType(Args[i].second)) {
1010    default: assert(0 && "Unexpected ValueType for argument!");
1011    case MVT::i1:
1012      Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first);
1013      // Fall through.
1014    case MVT::i8:
1015    case MVT::i16:
1016    case MVT::i32:
1017      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1018        RegValuesToPass.push_back(Args[i].first);
1019        ++NumIntRegs;
1020        break;
1021      }
1022      // Fall through
1023    case MVT::f32: {
1024      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1025      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1026      Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1027                                   Args[i].first, PtrOff,
1028                                   DAG.getSrcValue(NULL)));
1029      ArgOffset += 4;
1030      break;
1031    }
1032    case MVT::i64:
1033       // Can pass (at least) part of it in regs?
1034      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1035        SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1036                                   Args[i].first, DAG.getConstant(1, MVT::i32));
1037        SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1038                                   Args[i].first, DAG.getConstant(0, MVT::i32));
1039        RegValuesToPass.push_back(Lo);
1040        ++NumIntRegs;
1041
1042        // Pass both parts in regs?
1043        if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1044          RegValuesToPass.push_back(Hi);
1045          ++NumIntRegs;
1046        } else {
1047          // Pass the high part in memory.
1048          SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1049          PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1050          Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1051                                       Hi, PtrOff, DAG.getSrcValue(NULL)));
1052          ArgOffset += 4;
1053        }
1054        break;
1055      }
1056      // Fall through
1057    case MVT::f64:
1058      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1059      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1060      Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1061                                   Args[i].first, PtrOff,
1062                                   DAG.getSrcValue(NULL)));
1063      ArgOffset += 8;
1064      break;
1065    }
1066  }
1067  if (!Stores.empty())
1068    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
1069
1070  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1071  // arguments and the arguments after the retaddr has been pushed are aligned.
1072  if ((ArgOffset & 7) == 0)
1073    ArgOffset += 4;
1074
1075  std::vector<MVT::ValueType> RetVals;
1076  MVT::ValueType RetTyVT = getValueType(RetTy);
1077
1078  RetVals.push_back(MVT::Other);
1079
1080  // The result values produced have to be legal.  Promote the result.
1081  switch (RetTyVT) {
1082  case MVT::isVoid: break;
1083  default:
1084    RetVals.push_back(RetTyVT);
1085    break;
1086  case MVT::i1:
1087  case MVT::i8:
1088  case MVT::i16:
1089    RetVals.push_back(MVT::i32);
1090    break;
1091  case MVT::f32:
1092    if (X86ScalarSSE)
1093      RetVals.push_back(MVT::f32);
1094    else
1095      RetVals.push_back(MVT::f64);
1096    break;
1097  case MVT::i64:
1098    RetVals.push_back(MVT::i32);
1099    RetVals.push_back(MVT::i32);
1100    break;
1101  }
1102
1103  // Build a sequence of copy-to-reg nodes chained together with token chain
1104  // and flag operands which copy the outgoing args into registers.
1105  SDOperand InFlag;
1106  for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
1107    unsigned CCReg;
1108    SDOperand RegToPass = RegValuesToPass[i];
1109    switch (RegToPass.getValueType()) {
1110    default: assert(0 && "Bad thing to pass in regs");
1111    case MVT::i8:
1112      CCReg = (i == 0) ? X86::AL  : X86::DL;
1113      break;
1114    case MVT::i16:
1115      CCReg = (i == 0) ? X86::AX  : X86::DX;
1116      break;
1117    case MVT::i32:
1118      CCReg = (i == 0) ? X86::EAX : X86::EDX;
1119      break;
1120    }
1121
1122    Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag);
1123    InFlag = Chain.getValue(1);
1124  }
1125
1126  std::vector<MVT::ValueType> NodeTys;
1127  NodeTys.push_back(MVT::Other);   // Returns a chain
1128  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1129  std::vector<SDOperand> Ops;
1130  Ops.push_back(Chain);
1131  Ops.push_back(Callee);
1132  if (InFlag.Val)
1133    Ops.push_back(InFlag);
1134
1135  // FIXME: Do not generate X86ISD::TAILCALL for now.
1136  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops);
1137  InFlag = Chain.getValue(1);
1138
1139  NodeTys.clear();
1140  NodeTys.push_back(MVT::Other);   // Returns a chain
1141  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1142  Ops.clear();
1143  Ops.push_back(Chain);
1144  Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
1145  Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
1146  Ops.push_back(InFlag);
1147  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
1148  InFlag = Chain.getValue(1);
1149
1150  SDOperand RetVal;
1151  if (RetTyVT != MVT::isVoid) {
1152    switch (RetTyVT) {
1153    default: assert(0 && "Unknown value type to return!");
1154    case MVT::i1:
1155    case MVT::i8:
1156      RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
1157      Chain = RetVal.getValue(1);
1158      if (RetTyVT == MVT::i1)
1159        RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
1160      break;
1161    case MVT::i16:
1162      RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
1163      Chain = RetVal.getValue(1);
1164      break;
1165    case MVT::i32:
1166      RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
1167      Chain = RetVal.getValue(1);
1168      break;
1169    case MVT::i64: {
1170      SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
1171      SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32,
1172                                        Lo.getValue(2));
1173      RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
1174      Chain = Hi.getValue(1);
1175      break;
1176    }
1177    case MVT::f32:
1178    case MVT::f64: {
1179      std::vector<MVT::ValueType> Tys;
1180      Tys.push_back(MVT::f64);
1181      Tys.push_back(MVT::Other);
1182      Tys.push_back(MVT::Flag);
1183      std::vector<SDOperand> Ops;
1184      Ops.push_back(Chain);
1185      Ops.push_back(InFlag);
1186      RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
1187      Chain  = RetVal.getValue(1);
1188      InFlag = RetVal.getValue(2);
1189      if (X86ScalarSSE) {
1190        // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
1191        // shouldn't be necessary except that RFP cannot be live across
1192        // multiple blocks. When stackifier is fixed, they can be uncoupled.
1193        MachineFunction &MF = DAG.getMachineFunction();
1194        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
1195        SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
1196        Tys.clear();
1197        Tys.push_back(MVT::Other);
1198        Ops.clear();
1199        Ops.push_back(Chain);
1200        Ops.push_back(RetVal);
1201        Ops.push_back(StackSlot);
1202        Ops.push_back(DAG.getValueType(RetTyVT));
1203        Ops.push_back(InFlag);
1204        Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
1205        RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
1206                             DAG.getSrcValue(NULL));
1207        Chain = RetVal.getValue(1);
1208      }
1209
1210      if (RetTyVT == MVT::f32 && !X86ScalarSSE)
1211        // FIXME: we would really like to remember that this FP_ROUND
1212        // operation is okay to eliminate if we allow excess FP precision.
1213        RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
1214      break;
1215    }
1216    }
1217  }
1218
1219  return std::make_pair(RetVal, Chain);
1220}
1221
1222SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
1223  if (ReturnAddrIndex == 0) {
1224    // Set up a frame object for the return address.
1225    MachineFunction &MF = DAG.getMachineFunction();
1226    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1227  }
1228
1229  return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
1230}
1231
1232
1233
1234std::pair<SDOperand, SDOperand> X86TargetLowering::
1235LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
1236                        SelectionDAG &DAG) {
1237  SDOperand Result;
1238  if (Depth)        // Depths > 0 not supported yet!
1239    Result = DAG.getConstant(0, getPointerTy());
1240  else {
1241    SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
1242    if (!isFrameAddress)
1243      // Just load the return address
1244      Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
1245                           DAG.getSrcValue(NULL));
1246    else
1247      Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
1248                           DAG.getConstant(4, MVT::i32));
1249  }
1250  return std::make_pair(Result, Chain);
1251}
1252
1253/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode
1254/// which corresponds to the condition code.
1255static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) {
1256  switch (X86CC) {
1257  default: assert(0 && "Unknown X86 conditional code!");
1258  case X86ISD::COND_A:  return X86::JA;
1259  case X86ISD::COND_AE: return X86::JAE;
1260  case X86ISD::COND_B:  return X86::JB;
1261  case X86ISD::COND_BE: return X86::JBE;
1262  case X86ISD::COND_E:  return X86::JE;
1263  case X86ISD::COND_G:  return X86::JG;
1264  case X86ISD::COND_GE: return X86::JGE;
1265  case X86ISD::COND_L:  return X86::JL;
1266  case X86ISD::COND_LE: return X86::JLE;
1267  case X86ISD::COND_NE: return X86::JNE;
1268  case X86ISD::COND_NO: return X86::JNO;
1269  case X86ISD::COND_NP: return X86::JNP;
1270  case X86ISD::COND_NS: return X86::JNS;
1271  case X86ISD::COND_O:  return X86::JO;
1272  case X86ISD::COND_P:  return X86::JP;
1273  case X86ISD::COND_S:  return X86::JS;
1274  }
1275}
1276
1277/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1278/// specific condition code. It returns a false if it cannot do a direct
1279/// translation. X86CC is the translated CondCode. Flip is set to true if the
1280/// the order of comparison operands should be flipped.
1281static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
1282                           unsigned &X86CC, bool &Flip) {
1283  Flip = false;
1284  X86CC = X86ISD::COND_INVALID;
1285  if (!isFP) {
1286    switch (SetCCOpcode) {
1287    default: break;
1288    case ISD::SETEQ:  X86CC = X86ISD::COND_E;  break;
1289    case ISD::SETGT:  X86CC = X86ISD::COND_G;  break;
1290    case ISD::SETGE:  X86CC = X86ISD::COND_GE; break;
1291    case ISD::SETLT:  X86CC = X86ISD::COND_L;  break;
1292    case ISD::SETLE:  X86CC = X86ISD::COND_LE; break;
1293    case ISD::SETNE:  X86CC = X86ISD::COND_NE; break;
1294    case ISD::SETULT: X86CC = X86ISD::COND_B;  break;
1295    case ISD::SETUGT: X86CC = X86ISD::COND_A;  break;
1296    case ISD::SETULE: X86CC = X86ISD::COND_BE; break;
1297    case ISD::SETUGE: X86CC = X86ISD::COND_AE; break;
1298    }
1299  } else {
1300    // On a floating point condition, the flags are set as follows:
1301    // ZF  PF  CF   op
1302    //  0 | 0 | 0 | X > Y
1303    //  0 | 0 | 1 | X < Y
1304    //  1 | 0 | 0 | X == Y
1305    //  1 | 1 | 1 | unordered
1306    switch (SetCCOpcode) {
1307    default: break;
1308    case ISD::SETUEQ:
1309    case ISD::SETEQ: X86CC = X86ISD::COND_E;  break;
1310    case ISD::SETOLT: Flip = true; // Fallthrough
1311    case ISD::SETOGT:
1312    case ISD::SETGT: X86CC = X86ISD::COND_A;  break;
1313    case ISD::SETOLE: Flip = true; // Fallthrough
1314    case ISD::SETOGE:
1315    case ISD::SETGE: X86CC = X86ISD::COND_AE; break;
1316    case ISD::SETUGT: Flip = true; // Fallthrough
1317    case ISD::SETULT:
1318    case ISD::SETLT: X86CC = X86ISD::COND_B;  break;
1319    case ISD::SETUGE: Flip = true; // Fallthrough
1320    case ISD::SETULE:
1321    case ISD::SETLE: X86CC = X86ISD::COND_BE; break;
1322    case ISD::SETONE:
1323    case ISD::SETNE: X86CC = X86ISD::COND_NE; break;
1324    case ISD::SETUO: X86CC = X86ISD::COND_P;  break;
1325    case ISD::SETO:  X86CC = X86ISD::COND_NP; break;
1326    }
1327  }
1328
1329  return X86CC != X86ISD::COND_INVALID;
1330}
1331
1332static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC,
1333                           bool &Flip) {
1334  return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip);
1335}
1336
1337/// hasFPCMov - is there a floating point cmov for the specific X86 condition
1338/// code. Current x86 isa includes the following FP cmov instructions:
1339/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1340static bool hasFPCMov(unsigned X86CC) {
1341  switch (X86CC) {
1342  default:
1343    return false;
1344  case X86ISD::COND_B:
1345  case X86ISD::COND_BE:
1346  case X86ISD::COND_E:
1347  case X86ISD::COND_P:
1348  case X86ISD::COND_A:
1349  case X86ISD::COND_AE:
1350  case X86ISD::COND_NE:
1351  case X86ISD::COND_NP:
1352    return true;
1353  }
1354}
1355
1356MachineBasicBlock *
1357X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1358                                           MachineBasicBlock *BB) {
1359  switch (MI->getOpcode()) {
1360  default: assert(false && "Unexpected instr type to insert");
1361  case X86::CMOV_FR32:
1362  case X86::CMOV_FR64:
1363  case X86::CMOV_V4F32:
1364  case X86::CMOV_V2F64:
1365  case X86::CMOV_V2I64: {
1366    // To "insert" a SELECT_CC instruction, we actually have to insert the
1367    // diamond control-flow pattern.  The incoming instruction knows the
1368    // destination vreg to set, the condition code register to branch on, the
1369    // true/false values to select between, and a branch opcode to use.
1370    const BasicBlock *LLVM_BB = BB->getBasicBlock();
1371    ilist<MachineBasicBlock>::iterator It = BB;
1372    ++It;
1373
1374    //  thisMBB:
1375    //  ...
1376    //   TrueVal = ...
1377    //   cmpTY ccX, r1, r2
1378    //   bCC copy1MBB
1379    //   fallthrough --> copy0MBB
1380    MachineBasicBlock *thisMBB = BB;
1381    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
1382    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
1383    unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue());
1384    BuildMI(BB, Opc, 1).addMBB(sinkMBB);
1385    MachineFunction *F = BB->getParent();
1386    F->getBasicBlockList().insert(It, copy0MBB);
1387    F->getBasicBlockList().insert(It, sinkMBB);
1388    // Update machine-CFG edges by first adding all successors of the current
1389    // block to the new block which will contain the Phi node for the select.
1390    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
1391        e = BB->succ_end(); i != e; ++i)
1392      sinkMBB->addSuccessor(*i);
1393    // Next, remove all successors of the current block, and add the true
1394    // and fallthrough blocks as its successors.
1395    while(!BB->succ_empty())
1396      BB->removeSuccessor(BB->succ_begin());
1397    BB->addSuccessor(copy0MBB);
1398    BB->addSuccessor(sinkMBB);
1399
1400    //  copy0MBB:
1401    //   %FalseValue = ...
1402    //   # fallthrough to sinkMBB
1403    BB = copy0MBB;
1404
1405    // Update machine-CFG edges
1406    BB->addSuccessor(sinkMBB);
1407
1408    //  sinkMBB:
1409    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
1410    //  ...
1411    BB = sinkMBB;
1412    BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg())
1413      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
1414      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
1415
1416    delete MI;   // The pseudo instruction is gone now.
1417    return BB;
1418  }
1419
1420  case X86::FP_TO_INT16_IN_MEM:
1421  case X86::FP_TO_INT32_IN_MEM:
1422  case X86::FP_TO_INT64_IN_MEM: {
1423    // Change the floating point control register to use "round towards zero"
1424    // mode when truncating to an integer value.
1425    MachineFunction *F = BB->getParent();
1426    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
1427    addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
1428
1429    // Load the old value of the high byte of the control word...
1430    unsigned OldCW =
1431      F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass);
1432    addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx);
1433
1434    // Set the high part to be round to zero...
1435    addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F);
1436
1437    // Reload the modified control word now...
1438    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1439
1440    // Restore the memory image of control word to original value
1441    addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW);
1442
1443    // Get the X86 opcode to use.
1444    unsigned Opc;
1445    switch (MI->getOpcode()) {
1446    default: assert(0 && "illegal opcode!");
1447    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
1448    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
1449    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
1450    }
1451
1452    X86AddressMode AM;
1453    MachineOperand &Op = MI->getOperand(0);
1454    if (Op.isRegister()) {
1455      AM.BaseType = X86AddressMode::RegBase;
1456      AM.Base.Reg = Op.getReg();
1457    } else {
1458      AM.BaseType = X86AddressMode::FrameIndexBase;
1459      AM.Base.FrameIndex = Op.getFrameIndex();
1460    }
1461    Op = MI->getOperand(1);
1462    if (Op.isImmediate())
1463      AM.Scale = Op.getImmedValue();
1464    Op = MI->getOperand(2);
1465    if (Op.isImmediate())
1466      AM.IndexReg = Op.getImmedValue();
1467    Op = MI->getOperand(3);
1468    if (Op.isGlobalAddress()) {
1469      AM.GV = Op.getGlobal();
1470    } else {
1471      AM.Disp = Op.getImmedValue();
1472    }
1473    addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg());
1474
1475    // Reload the original control word now.
1476    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1477
1478    delete MI;   // The pseudo instruction is gone now.
1479    return BB;
1480  }
1481  }
1482}
1483
1484
1485//===----------------------------------------------------------------------===//
1486//                           X86 Custom Lowering Hooks
1487//===----------------------------------------------------------------------===//
1488
1489/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra
1490/// load. For Darwin, external and weak symbols are indirect, loading the value
1491/// at address GV rather then the value of GV itself. This means that the
1492/// GlobalAddress must be in the base or index register of the address, not the
1493/// GV offset field.
1494static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) {
1495  return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
1496          (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
1497}
1498
1499/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
1500/// true if Op is undef or if its value falls within the specified range (L, H].
1501static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
1502  if (Op.getOpcode() == ISD::UNDEF)
1503    return true;
1504
1505  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
1506  return (Val >= Low && Val < Hi);
1507}
1508
1509/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
1510/// true if Op is undef or if its value equal to the specified value.
1511static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
1512  if (Op.getOpcode() == ISD::UNDEF)
1513    return true;
1514  return cast<ConstantSDNode>(Op)->getValue() == Val;
1515}
1516
1517/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1518/// specifies a shuffle of elements that is suitable for input to PSHUFD.
1519bool X86::isPSHUFDMask(SDNode *N) {
1520  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1521
1522  if (N->getNumOperands() != 4)
1523    return false;
1524
1525  // Check if the value doesn't reference the second vector.
1526  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1527    SDOperand Arg = N->getOperand(i);
1528    if (Arg.getOpcode() == ISD::UNDEF) continue;
1529    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1530    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
1531      return false;
1532  }
1533
1534  return true;
1535}
1536
1537/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
1538/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
1539bool X86::isPSHUFHWMask(SDNode *N) {
1540  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1541
1542  if (N->getNumOperands() != 8)
1543    return false;
1544
1545  // Lower quadword copied in order.
1546  for (unsigned i = 0; i != 4; ++i) {
1547    SDOperand Arg = N->getOperand(i);
1548    if (Arg.getOpcode() == ISD::UNDEF) continue;
1549    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1550    if (cast<ConstantSDNode>(Arg)->getValue() != i)
1551      return false;
1552  }
1553
1554  // Upper quadword shuffled.
1555  for (unsigned i = 4; i != 8; ++i) {
1556    SDOperand Arg = N->getOperand(i);
1557    if (Arg.getOpcode() == ISD::UNDEF) continue;
1558    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1559    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1560    if (Val < 4 || Val > 7)
1561      return false;
1562  }
1563
1564  return true;
1565}
1566
1567/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
1568/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
1569bool X86::isPSHUFLWMask(SDNode *N) {
1570  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1571
1572  if (N->getNumOperands() != 8)
1573    return false;
1574
1575  // Upper quadword copied in order.
1576  for (unsigned i = 4; i != 8; ++i)
1577    if (!isUndefOrEqual(N->getOperand(i), i))
1578      return false;
1579
1580  // Lower quadword shuffled.
1581  for (unsigned i = 0; i != 4; ++i)
1582    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
1583      return false;
1584
1585  return true;
1586}
1587
1588/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
1589/// specifies a shuffle of elements that is suitable for input to SHUFP*.
1590static bool isSHUFPMask(std::vector<SDOperand> &N) {
1591  unsigned NumElems = N.size();
1592  if (NumElems != 2 && NumElems != 4) return false;
1593
1594  unsigned Half = NumElems / 2;
1595  for (unsigned i = 0; i < Half; ++i)
1596    if (!isUndefOrInRange(N[i], 0, NumElems))
1597      return false;
1598  for (unsigned i = Half; i < NumElems; ++i)
1599    if (!isUndefOrInRange(N[i], NumElems, NumElems*2))
1600      return false;
1601
1602  return true;
1603}
1604
1605bool X86::isSHUFPMask(SDNode *N) {
1606  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1607  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1608  return ::isSHUFPMask(Ops);
1609}
1610
1611/// isCommutedSHUFP - Returns true if the shuffle mask is except
1612/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
1613/// half elements to come from vector 1 (which would equal the dest.) and
1614/// the upper half to come from vector 2.
1615static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) {
1616  unsigned NumElems = Ops.size();
1617  if (NumElems != 2 && NumElems != 4) return false;
1618
1619  unsigned Half = NumElems / 2;
1620  for (unsigned i = 0; i < Half; ++i)
1621    if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2))
1622      return false;
1623  for (unsigned i = Half; i < NumElems; ++i)
1624    if (!isUndefOrInRange(Ops[i], 0, NumElems))
1625      return false;
1626  return true;
1627}
1628
1629static bool isCommutedSHUFP(SDNode *N) {
1630  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1631  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1632  return isCommutedSHUFP(Ops);
1633}
1634
1635/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
1636/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
1637bool X86::isMOVHLPSMask(SDNode *N) {
1638  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1639
1640  if (N->getNumOperands() != 4)
1641    return false;
1642
1643  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
1644  return isUndefOrEqual(N->getOperand(0), 6) &&
1645         isUndefOrEqual(N->getOperand(1), 7) &&
1646         isUndefOrEqual(N->getOperand(2), 2) &&
1647         isUndefOrEqual(N->getOperand(3), 3);
1648}
1649
1650/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
1651/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
1652bool X86::isMOVLPMask(SDNode *N) {
1653  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1654
1655  unsigned NumElems = N->getNumOperands();
1656  if (NumElems != 2 && NumElems != 4)
1657    return false;
1658
1659  for (unsigned i = 0; i < NumElems/2; ++i)
1660    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
1661      return false;
1662
1663  for (unsigned i = NumElems/2; i < NumElems; ++i)
1664    if (!isUndefOrEqual(N->getOperand(i), i))
1665      return false;
1666
1667  return true;
1668}
1669
1670/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
1671/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
1672/// and MOVLHPS.
1673bool X86::isMOVHPMask(SDNode *N) {
1674  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1675
1676  unsigned NumElems = N->getNumOperands();
1677  if (NumElems != 2 && NumElems != 4)
1678    return false;
1679
1680  for (unsigned i = 0; i < NumElems/2; ++i)
1681    if (!isUndefOrEqual(N->getOperand(i), i))
1682      return false;
1683
1684  for (unsigned i = 0; i < NumElems/2; ++i) {
1685    SDOperand Arg = N->getOperand(i + NumElems/2);
1686    if (!isUndefOrEqual(Arg, i + NumElems))
1687      return false;
1688  }
1689
1690  return true;
1691}
1692
1693/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
1694/// specifies a shuffle of elements that is suitable for input to UNPCKL.
1695bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
1696  unsigned NumElems = N.size();
1697  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1698    return false;
1699
1700  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1701    SDOperand BitI  = N[i];
1702    SDOperand BitI1 = N[i+1];
1703    if (!isUndefOrEqual(BitI, j))
1704      return false;
1705    if (V2IsSplat) {
1706      if (isUndefOrEqual(BitI1, NumElems))
1707        return false;
1708    } else {
1709      if (!isUndefOrEqual(BitI1, j + NumElems))
1710        return false;
1711    }
1712  }
1713
1714  return true;
1715}
1716
1717bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
1718  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1719  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1720  return ::isUNPCKLMask(Ops, V2IsSplat);
1721}
1722
1723/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
1724/// specifies a shuffle of elements that is suitable for input to UNPCKH.
1725bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
1726  unsigned NumElems = N.size();
1727  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1728    return false;
1729
1730  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1731    SDOperand BitI  = N[i];
1732    SDOperand BitI1 = N[i+1];
1733    if (!isUndefOrEqual(BitI, j + NumElems/2))
1734      return false;
1735    if (V2IsSplat) {
1736      if (isUndefOrEqual(BitI1, NumElems))
1737        return false;
1738    } else {
1739      if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems))
1740        return false;
1741    }
1742  }
1743
1744  return true;
1745}
1746
1747bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
1748  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1749  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1750  return ::isUNPCKHMask(Ops, V2IsSplat);
1751}
1752
1753/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
1754/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
1755/// <0, 0, 1, 1>
1756bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
1757  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1758
1759  unsigned NumElems = N->getNumOperands();
1760  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
1761    return false;
1762
1763  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1764    SDOperand BitI  = N->getOperand(i);
1765    SDOperand BitI1 = N->getOperand(i+1);
1766
1767    if (!isUndefOrEqual(BitI, j))
1768      return false;
1769    if (!isUndefOrEqual(BitI1, j))
1770      return false;
1771  }
1772
1773  return true;
1774}
1775
1776/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
1777/// specifies a shuffle of elements that is suitable for input to MOVSS,
1778/// MOVSD, and MOVD, i.e. setting the lowest element.
1779static bool isMOVLMask(std::vector<SDOperand> &N) {
1780  unsigned NumElems = N.size();
1781  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1782    return false;
1783
1784  if (!isUndefOrEqual(N[0], NumElems))
1785    return false;
1786
1787  for (unsigned i = 1; i < NumElems; ++i) {
1788    SDOperand Arg = N[i];
1789    if (!isUndefOrEqual(Arg, i))
1790      return false;
1791  }
1792
1793  return true;
1794}
1795
1796bool X86::isMOVLMask(SDNode *N) {
1797  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1798  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1799  return ::isMOVLMask(Ops);
1800}
1801
1802/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
1803/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
1804/// element of vector 2 and the other elements to come from vector 1 in order.
1805static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) {
1806  unsigned NumElems = Ops.size();
1807  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1808    return false;
1809
1810  if (!isUndefOrEqual(Ops[0], 0))
1811    return false;
1812
1813  for (unsigned i = 1; i < NumElems; ++i) {
1814    SDOperand Arg = Ops[i];
1815    if (V2IsSplat) {
1816      if (!isUndefOrEqual(Arg, NumElems))
1817        return false;
1818    } else {
1819      if (!isUndefOrEqual(Arg, i+NumElems))
1820        return false;
1821    }
1822  }
1823
1824  return true;
1825}
1826
1827static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) {
1828  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1829  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1830  return isCommutedMOVL(Ops, V2IsSplat);
1831}
1832
1833/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
1834/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
1835bool X86::isMOVSHDUPMask(SDNode *N) {
1836  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1837
1838  if (N->getNumOperands() != 4)
1839    return false;
1840
1841  // Expect 1, 1, 3, 3
1842  for (unsigned i = 0; i < 2; ++i) {
1843    SDOperand Arg = N->getOperand(i);
1844    if (Arg.getOpcode() == ISD::UNDEF) continue;
1845    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1846    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1847    if (Val != 1) return false;
1848  }
1849
1850  bool HasHi = false;
1851  for (unsigned i = 2; i < 4; ++i) {
1852    SDOperand Arg = N->getOperand(i);
1853    if (Arg.getOpcode() == ISD::UNDEF) continue;
1854    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1855    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1856    if (Val != 3) return false;
1857    HasHi = true;
1858  }
1859
1860  // Don't use movshdup if it can be done with a shufps.
1861  return HasHi;
1862}
1863
1864/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
1865/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
1866bool X86::isMOVSLDUPMask(SDNode *N) {
1867  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1868
1869  if (N->getNumOperands() != 4)
1870    return false;
1871
1872  // Expect 0, 0, 2, 2
1873  for (unsigned i = 0; i < 2; ++i) {
1874    SDOperand Arg = N->getOperand(i);
1875    if (Arg.getOpcode() == ISD::UNDEF) continue;
1876    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1877    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1878    if (Val != 0) return false;
1879  }
1880
1881  bool HasHi = false;
1882  for (unsigned i = 2; i < 4; ++i) {
1883    SDOperand Arg = N->getOperand(i);
1884    if (Arg.getOpcode() == ISD::UNDEF) continue;
1885    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1886    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1887    if (Val != 2) return false;
1888    HasHi = true;
1889  }
1890
1891  // Don't use movshdup if it can be done with a shufps.
1892  return HasHi;
1893}
1894
1895/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
1896/// a splat of a single element.
1897static bool isSplatMask(SDNode *N) {
1898  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1899
1900  // This is a splat operation if each element of the permute is the same, and
1901  // if the value doesn't reference the second vector.
1902  unsigned NumElems = N->getNumOperands();
1903  SDOperand ElementBase;
1904  unsigned i = 0;
1905  for (; i != NumElems; ++i) {
1906    SDOperand Elt = N->getOperand(i);
1907    if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) {
1908      ElementBase = Elt;
1909      break;
1910    }
1911  }
1912
1913  if (!ElementBase.Val)
1914    return false;
1915
1916  for (; i != NumElems; ++i) {
1917    SDOperand Arg = N->getOperand(i);
1918    if (Arg.getOpcode() == ISD::UNDEF) continue;
1919    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1920    if (Arg != ElementBase) return false;
1921  }
1922
1923  // Make sure it is a splat of the first vector operand.
1924  return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
1925}
1926
1927/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
1928/// a splat of a single element and it's a 2 or 4 element mask.
1929bool X86::isSplatMask(SDNode *N) {
1930  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1931
1932  // We can only splat 64-bit, and 32-bit quantities with a single instruction.
1933  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
1934    return false;
1935  return ::isSplatMask(N);
1936}
1937
1938/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
1939/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
1940/// instructions.
1941unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
1942  unsigned NumOperands = N->getNumOperands();
1943  unsigned Shift = (NumOperands == 4) ? 2 : 1;
1944  unsigned Mask = 0;
1945  for (unsigned i = 0; i < NumOperands; ++i) {
1946    unsigned Val = 0;
1947    SDOperand Arg = N->getOperand(NumOperands-i-1);
1948    if (Arg.getOpcode() != ISD::UNDEF)
1949      Val = cast<ConstantSDNode>(Arg)->getValue();
1950    if (Val >= NumOperands) Val -= NumOperands;
1951    Mask |= Val;
1952    if (i != NumOperands - 1)
1953      Mask <<= Shift;
1954  }
1955
1956  return Mask;
1957}
1958
1959/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
1960/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
1961/// instructions.
1962unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
1963  unsigned Mask = 0;
1964  // 8 nodes, but we only care about the last 4.
1965  for (unsigned i = 7; i >= 4; --i) {
1966    unsigned Val = 0;
1967    SDOperand Arg = N->getOperand(i);
1968    if (Arg.getOpcode() != ISD::UNDEF)
1969      Val = cast<ConstantSDNode>(Arg)->getValue();
1970    Mask |= (Val - 4);
1971    if (i != 4)
1972      Mask <<= 2;
1973  }
1974
1975  return Mask;
1976}
1977
1978/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
1979/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
1980/// instructions.
1981unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
1982  unsigned Mask = 0;
1983  // 8 nodes, but we only care about the first 4.
1984  for (int i = 3; i >= 0; --i) {
1985    unsigned Val = 0;
1986    SDOperand Arg = N->getOperand(i);
1987    if (Arg.getOpcode() != ISD::UNDEF)
1988      Val = cast<ConstantSDNode>(Arg)->getValue();
1989    Mask |= Val;
1990    if (i != 0)
1991      Mask <<= 2;
1992  }
1993
1994  return Mask;
1995}
1996
1997/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
1998/// specifies a 8 element shuffle that can be broken into a pair of
1999/// PSHUFHW and PSHUFLW.
2000static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
2001  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2002
2003  if (N->getNumOperands() != 8)
2004    return false;
2005
2006  // Lower quadword shuffled.
2007  for (unsigned i = 0; i != 4; ++i) {
2008    SDOperand Arg = N->getOperand(i);
2009    if (Arg.getOpcode() == ISD::UNDEF) continue;
2010    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2011    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2012    if (Val > 4)
2013      return false;
2014  }
2015
2016  // Upper quadword shuffled.
2017  for (unsigned i = 4; i != 8; ++i) {
2018    SDOperand Arg = N->getOperand(i);
2019    if (Arg.getOpcode() == ISD::UNDEF) continue;
2020    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2021    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2022    if (Val < 4 || Val > 7)
2023      return false;
2024  }
2025
2026  return true;
2027}
2028
2029/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
2030/// values in ther permute mask.
2031static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) {
2032  SDOperand V1 = Op.getOperand(0);
2033  SDOperand V2 = Op.getOperand(1);
2034  SDOperand Mask = Op.getOperand(2);
2035  MVT::ValueType VT = Op.getValueType();
2036  MVT::ValueType MaskVT = Mask.getValueType();
2037  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
2038  unsigned NumElems = Mask.getNumOperands();
2039  std::vector<SDOperand> MaskVec;
2040
2041  for (unsigned i = 0; i != NumElems; ++i) {
2042    SDOperand Arg = Mask.getOperand(i);
2043    if (Arg.getOpcode() == ISD::UNDEF) {
2044      MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
2045      continue;
2046    }
2047    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2048    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2049    if (Val < NumElems)
2050      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
2051    else
2052      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
2053  }
2054
2055  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2056  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
2057}
2058
2059/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
2060/// match movhlps. The lower half elements should come from upper half of
2061/// V1 (and in order), and the upper half elements should come from the upper
2062/// half of V2 (and in order).
2063static bool ShouldXformToMOVHLPS(SDNode *Mask) {
2064  unsigned NumElems = Mask->getNumOperands();
2065  if (NumElems != 4)
2066    return false;
2067  for (unsigned i = 0, e = 2; i != e; ++i)
2068    if (!isUndefOrEqual(Mask->getOperand(i), i+2))
2069      return false;
2070  for (unsigned i = 2; i != 4; ++i)
2071    if (!isUndefOrEqual(Mask->getOperand(i), i+4))
2072      return false;
2073  return true;
2074}
2075
2076/// isScalarLoadToVector - Returns true if the node is a scalar load that
2077/// is promoted to a vector.
2078static inline bool isScalarLoadToVector(SDNode *N) {
2079  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
2080    N = N->getOperand(0).Val;
2081    return (N->getOpcode() == ISD::LOAD);
2082  }
2083  return false;
2084}
2085
2086/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
2087/// match movlp{s|d}. The lower half elements should come from lower half of
2088/// V1 (and in order), and the upper half elements should come from the upper
2089/// half of V2 (and in order). And since V1 will become the source of the
2090/// MOVLP, it must be either a vector load or a scalar load to vector.
2091static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) {
2092  if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1))
2093    return false;
2094
2095  unsigned NumElems = Mask->getNumOperands();
2096  if (NumElems != 2 && NumElems != 4)
2097    return false;
2098  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
2099    if (!isUndefOrEqual(Mask->getOperand(i), i))
2100      return false;
2101  for (unsigned i = NumElems/2; i != NumElems; ++i)
2102    if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
2103      return false;
2104  return true;
2105}
2106
2107/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
2108/// all the same.
2109static bool isSplatVector(SDNode *N) {
2110  if (N->getOpcode() != ISD::BUILD_VECTOR)
2111    return false;
2112
2113  SDOperand SplatValue = N->getOperand(0);
2114  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
2115    if (N->getOperand(i) != SplatValue)
2116      return false;
2117  return true;
2118}
2119
2120/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
2121/// that point to V2 points to its first element.
2122static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
2123  assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
2124
2125  bool Changed = false;
2126  std::vector<SDOperand> MaskVec;
2127  unsigned NumElems = Mask.getNumOperands();
2128  for (unsigned i = 0; i != NumElems; ++i) {
2129    SDOperand Arg = Mask.getOperand(i);
2130    if (Arg.getOpcode() != ISD::UNDEF) {
2131      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2132      if (Val > NumElems) {
2133        Arg = DAG.getConstant(NumElems, Arg.getValueType());
2134        Changed = true;
2135      }
2136    }
2137    MaskVec.push_back(Arg);
2138  }
2139
2140  if (Changed)
2141    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
2142  return Mask;
2143}
2144
2145/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
2146/// operation of specified width.
2147static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
2148  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2149  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2150
2151  std::vector<SDOperand> MaskVec;
2152  MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
2153  for (unsigned i = 1; i != NumElems; ++i)
2154    MaskVec.push_back(DAG.getConstant(i, BaseVT));
2155  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2156}
2157
2158/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
2159/// of specified width.
2160static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
2161  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2162  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2163  std::vector<SDOperand> MaskVec;
2164  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
2165    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
2166    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
2167  }
2168  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2169}
2170
2171/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
2172/// of specified width.
2173static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
2174  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2175  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2176  unsigned Half = NumElems/2;
2177  std::vector<SDOperand> MaskVec;
2178  for (unsigned i = 0; i != Half; ++i) {
2179    MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
2180    MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
2181  }
2182  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2183}
2184
2185/// getZeroVector - Returns a vector of specified type with all zero elements.
2186///
2187static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
2188  assert(MVT::isVector(VT) && "Expected a vector type");
2189  unsigned NumElems = getVectorNumElements(VT);
2190  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2191  bool isFP = MVT::isFloatingPoint(EVT);
2192  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
2193  std::vector<SDOperand> ZeroVec(NumElems, Zero);
2194  return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec);
2195}
2196
2197/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
2198///
2199static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
2200  SDOperand V1 = Op.getOperand(0);
2201  SDOperand Mask = Op.getOperand(2);
2202  MVT::ValueType VT = Op.getValueType();
2203  unsigned NumElems = Mask.getNumOperands();
2204  Mask = getUnpacklMask(NumElems, DAG);
2205  while (NumElems != 4) {
2206    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
2207    NumElems >>= 1;
2208  }
2209  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
2210
2211  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2212  Mask = getZeroVector(MaskVT, DAG);
2213  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
2214                                  DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
2215  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
2216}
2217
2218/// isZeroNode - Returns true if Elt is a constant zero or a floating point
2219/// constant +0.0.
2220static inline bool isZeroNode(SDOperand Elt) {
2221  return ((isa<ConstantSDNode>(Elt) &&
2222           cast<ConstantSDNode>(Elt)->getValue() == 0) ||
2223          (isa<ConstantFPSDNode>(Elt) &&
2224           cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
2225}
2226
2227/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
2228/// vector and zero or undef vector.
2229static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
2230                                             unsigned NumElems, unsigned Idx,
2231                                             bool isZero, SelectionDAG &DAG) {
2232  SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
2233  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2234  MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2235  SDOperand Zero = DAG.getConstant(0, EVT);
2236  std::vector<SDOperand> MaskVec(NumElems, Zero);
2237  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
2238  SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2239  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2240}
2241
2242/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
2243///
2244static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
2245                                       unsigned NumNonZero, unsigned NumZero,
2246                                       SelectionDAG &DAG) {
2247  if (NumNonZero > 8)
2248    return SDOperand();
2249
2250  SDOperand V(0, 0);
2251  bool First = true;
2252  for (unsigned i = 0; i < 16; ++i) {
2253    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
2254    if (ThisIsNonZero && First) {
2255      if (NumZero)
2256        V = getZeroVector(MVT::v8i16, DAG);
2257      else
2258        V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2259      First = false;
2260    }
2261
2262    if ((i & 1) != 0) {
2263      SDOperand ThisElt(0, 0), LastElt(0, 0);
2264      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
2265      if (LastIsNonZero) {
2266        LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
2267      }
2268      if (ThisIsNonZero) {
2269        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
2270        ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
2271                              ThisElt, DAG.getConstant(8, MVT::i8));
2272        if (LastIsNonZero)
2273          ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
2274      } else
2275        ThisElt = LastElt;
2276
2277      if (ThisElt.Val)
2278        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
2279                        DAG.getConstant(i/2, MVT::i32));
2280    }
2281  }
2282
2283  return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
2284}
2285
2286/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
2287///
2288static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
2289                                       unsigned NumNonZero, unsigned NumZero,
2290                                       SelectionDAG &DAG) {
2291  if (NumNonZero > 4)
2292    return SDOperand();
2293
2294  SDOperand V(0, 0);
2295  bool First = true;
2296  for (unsigned i = 0; i < 8; ++i) {
2297    bool isNonZero = (NonZeros & (1 << i)) != 0;
2298    if (isNonZero) {
2299      if (First) {
2300        if (NumZero)
2301          V = getZeroVector(MVT::v8i16, DAG);
2302        else
2303          V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2304        First = false;
2305      }
2306      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
2307                      DAG.getConstant(i, MVT::i32));
2308    }
2309  }
2310
2311  return V;
2312}
2313
2314SDOperand
2315X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2316  // All zero's are handled with pxor.
2317  if (ISD::isBuildVectorAllZeros(Op.Val))
2318    return Op;
2319
2320  // All one's are handled with pcmpeqd.
2321  if (ISD::isBuildVectorAllOnes(Op.Val))
2322    return Op;
2323
2324  MVT::ValueType VT = Op.getValueType();
2325  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2326  unsigned EVTBits = MVT::getSizeInBits(EVT);
2327
2328  unsigned NumElems = Op.getNumOperands();
2329  unsigned NumZero  = 0;
2330  unsigned NumNonZero = 0;
2331  unsigned NonZeros = 0;
2332  std::set<SDOperand> Values;
2333  for (unsigned i = 0; i < NumElems; ++i) {
2334    SDOperand Elt = Op.getOperand(i);
2335    if (Elt.getOpcode() != ISD::UNDEF) {
2336      Values.insert(Elt);
2337      if (isZeroNode(Elt))
2338        NumZero++;
2339      else {
2340        NonZeros |= (1 << i);
2341        NumNonZero++;
2342      }
2343    }
2344  }
2345
2346  if (NumNonZero == 0)
2347    // Must be a mix of zero and undef. Return a zero vector.
2348    return getZeroVector(VT, DAG);
2349
2350  // Splat is obviously ok. Let legalizer expand it to a shuffle.
2351  if (Values.size() == 1)
2352    return SDOperand();
2353
2354  // Special case for single non-zero element.
2355  if (NumNonZero == 1) {
2356    unsigned Idx = CountTrailingZeros_32(NonZeros);
2357    SDOperand Item = Op.getOperand(Idx);
2358    Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
2359    if (Idx == 0)
2360      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
2361      return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
2362                                         NumZero > 0, DAG);
2363
2364    if (EVTBits == 32) {
2365      // Turn it into a shuffle of zero and zero-extended scalar to vector.
2366      Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
2367                                         DAG);
2368      MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
2369      MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2370      std::vector<SDOperand> MaskVec;
2371      for (unsigned i = 0; i < NumElems; i++)
2372        MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
2373      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2374      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
2375                         DAG.getNode(ISD::UNDEF, VT), Mask);
2376    }
2377  }
2378
2379  // Let legalizer expand 2-widde build_vector's.
2380  if (EVTBits == 64)
2381    return SDOperand();
2382
2383  // If element VT is < 32 bits, convert it to inserts into a zero vector.
2384  if (EVTBits == 8) {
2385    SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG);
2386    if (V.Val) return V;
2387  }
2388
2389  if (EVTBits == 16) {
2390    SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG);
2391    if (V.Val) return V;
2392  }
2393
2394  // If element VT is == 32 bits, turn it into a number of shuffles.
2395  std::vector<SDOperand> V(NumElems);
2396  if (NumElems == 4 && NumZero > 0) {
2397    for (unsigned i = 0; i < 4; ++i) {
2398      bool isZero = !(NonZeros & (1 << i));
2399      if (isZero)
2400        V[i] = getZeroVector(VT, DAG);
2401      else
2402        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2403    }
2404
2405    for (unsigned i = 0; i < 2; ++i) {
2406      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
2407        default: break;
2408        case 0:
2409          V[i] = V[i*2];  // Must be a zero vector.
2410          break;
2411        case 1:
2412          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
2413                             getMOVLMask(NumElems, DAG));
2414          break;
2415        case 2:
2416          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2417                             getMOVLMask(NumElems, DAG));
2418          break;
2419        case 3:
2420          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2421                             getUnpacklMask(NumElems, DAG));
2422          break;
2423      }
2424    }
2425
2426    // Take advantage of the fact R32 to VR128 scalar_to_vector (i.e. movd)
2427    // clears the upper bits.
2428    // FIXME: we can do the same for v4f32 case when we know both parts of
2429    // the lower half come from scalar_to_vector (loadf32). We should do
2430    // that in post legalizer dag combiner with target specific hooks.
2431    if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
2432      return V[0];
2433    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2434    MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2435    std::vector<SDOperand> MaskVec;
2436    bool Reverse = (NonZeros & 0x3) == 2;
2437    for (unsigned i = 0; i < 2; ++i)
2438      if (Reverse)
2439        MaskVec.push_back(DAG.getConstant(1-i, EVT));
2440      else
2441        MaskVec.push_back(DAG.getConstant(i, EVT));
2442    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
2443    for (unsigned i = 0; i < 2; ++i)
2444      if (Reverse)
2445        MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
2446      else
2447        MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
2448    SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2449    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
2450  }
2451
2452  if (Values.size() > 2) {
2453    // Expand into a number of unpckl*.
2454    // e.g. for v4f32
2455    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
2456    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
2457    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
2458    SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
2459    for (unsigned i = 0; i < NumElems; ++i)
2460      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2461    NumElems >>= 1;
2462    while (NumElems != 0) {
2463      for (unsigned i = 0; i < NumElems; ++i)
2464        V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
2465                           UnpckMask);
2466      NumElems >>= 1;
2467    }
2468    return V[0];
2469  }
2470
2471  return SDOperand();
2472}
2473
2474SDOperand
2475X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
2476  SDOperand V1 = Op.getOperand(0);
2477  SDOperand V2 = Op.getOperand(1);
2478  SDOperand PermMask = Op.getOperand(2);
2479  MVT::ValueType VT = Op.getValueType();
2480  unsigned NumElems = PermMask.getNumOperands();
2481  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
2482  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
2483
2484  if (isSplatMask(PermMask.Val)) {
2485    if (NumElems <= 4) return Op;
2486    // Promote it to a v4i32 splat.
2487    return PromoteSplat(Op, DAG);
2488  }
2489
2490  if (X86::isMOVLMask(PermMask.Val))
2491    return (V1IsUndef) ? V2 : Op;
2492
2493  if (X86::isMOVSHDUPMask(PermMask.Val) ||
2494      X86::isMOVSLDUPMask(PermMask.Val) ||
2495      X86::isMOVHLPSMask(PermMask.Val) ||
2496      X86::isMOVHPMask(PermMask.Val) ||
2497      X86::isMOVLPMask(PermMask.Val))
2498    return Op;
2499
2500  if (ShouldXformToMOVHLPS(PermMask.Val) ||
2501      ShouldXformToMOVLP(V1.Val, PermMask.Val))
2502    return CommuteVectorShuffle(Op, DAG);
2503
2504  bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF;
2505  bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF;
2506  if (V1IsSplat && !V2IsSplat) {
2507    Op = CommuteVectorShuffle(Op, DAG);
2508    V1 = Op.getOperand(0);
2509    V2 = Op.getOperand(1);
2510    PermMask = Op.getOperand(2);
2511    V2IsSplat = true;
2512  }
2513
2514  if (isCommutedMOVL(PermMask.Val, V2IsSplat)) {
2515    if (V2IsUndef) return V1;
2516    Op = CommuteVectorShuffle(Op, DAG);
2517    V1 = Op.getOperand(0);
2518    V2 = Op.getOperand(1);
2519    PermMask = Op.getOperand(2);
2520    if (V2IsSplat) {
2521      // V2 is a splat, so the mask may be malformed. That is, it may point
2522      // to any V2 element. The instruction selectior won't like this. Get
2523      // a corrected mask and commute to form a proper MOVS{S|D}.
2524      SDOperand NewMask = getMOVLMask(NumElems, DAG);
2525      if (NewMask.Val != PermMask.Val)
2526        Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2527    }
2528    return Op;
2529  }
2530
2531  if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
2532      X86::isUNPCKLMask(PermMask.Val) ||
2533      X86::isUNPCKHMask(PermMask.Val))
2534    return Op;
2535
2536  if (V2IsSplat) {
2537    // Normalize mask so all entries that point to V2 points to its first
2538    // element then try to match unpck{h|l} again. If match, return a
2539    // new vector_shuffle with the corrected mask.
2540    SDOperand NewMask = NormalizeMask(PermMask, DAG);
2541    if (NewMask.Val != PermMask.Val) {
2542      if (X86::isUNPCKLMask(PermMask.Val, true)) {
2543        SDOperand NewMask = getUnpacklMask(NumElems, DAG);
2544        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2545      } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
2546        SDOperand NewMask = getUnpackhMask(NumElems, DAG);
2547        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2548      }
2549    }
2550  }
2551
2552  // Normalize the node to match x86 shuffle ops if needed
2553  if (V2.getOpcode() != ISD::UNDEF)
2554    if (isCommutedSHUFP(PermMask.Val)) {
2555      Op = CommuteVectorShuffle(Op, DAG);
2556      V1 = Op.getOperand(0);
2557      V2 = Op.getOperand(1);
2558      PermMask = Op.getOperand(2);
2559    }
2560
2561  // If VT is integer, try PSHUF* first, then SHUFP*.
2562  if (MVT::isInteger(VT)) {
2563    if (X86::isPSHUFDMask(PermMask.Val) ||
2564        X86::isPSHUFHWMask(PermMask.Val) ||
2565        X86::isPSHUFLWMask(PermMask.Val)) {
2566      if (V2.getOpcode() != ISD::UNDEF)
2567        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2568                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2569      return Op;
2570    }
2571
2572    if (X86::isSHUFPMask(PermMask.Val))
2573      return Op;
2574
2575    // Handle v8i16 shuffle high / low shuffle node pair.
2576    if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
2577      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2578      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2579      std::vector<SDOperand> MaskVec;
2580      for (unsigned i = 0; i != 4; ++i)
2581        MaskVec.push_back(PermMask.getOperand(i));
2582      for (unsigned i = 4; i != 8; ++i)
2583        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2584      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2585      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2586      MaskVec.clear();
2587      for (unsigned i = 0; i != 4; ++i)
2588        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2589      for (unsigned i = 4; i != 8; ++i)
2590        MaskVec.push_back(PermMask.getOperand(i));
2591      Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2592      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2593    }
2594  } else {
2595    // Floating point cases in the other order.
2596    if (X86::isSHUFPMask(PermMask.Val))
2597      return Op;
2598    if (X86::isPSHUFDMask(PermMask.Val) ||
2599        X86::isPSHUFHWMask(PermMask.Val) ||
2600        X86::isPSHUFLWMask(PermMask.Val)) {
2601      if (V2.getOpcode() != ISD::UNDEF)
2602        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2603                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2604      return Op;
2605    }
2606  }
2607
2608  if (NumElems == 4) {
2609    // Break it into (shuffle shuffle_hi, shuffle_lo).
2610    MVT::ValueType MaskVT = PermMask.getValueType();
2611    MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2612    std::map<unsigned, std::pair<int, int> > Locs;
2613    std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2614    std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2615    std::vector<SDOperand> *MaskPtr = &LoMask;
2616    unsigned MaskIdx = 0;
2617    unsigned LoIdx = 0;
2618    unsigned HiIdx = NumElems/2;
2619    for (unsigned i = 0; i != NumElems; ++i) {
2620      if (i == NumElems/2) {
2621        MaskPtr = &HiMask;
2622        MaskIdx = 1;
2623        LoIdx = 0;
2624        HiIdx = NumElems/2;
2625      }
2626      SDOperand Elt = PermMask.getOperand(i);
2627      if (Elt.getOpcode() == ISD::UNDEF) {
2628        Locs[i] = std::make_pair(-1, -1);
2629      } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
2630        Locs[i] = std::make_pair(MaskIdx, LoIdx);
2631        (*MaskPtr)[LoIdx] = Elt;
2632        LoIdx++;
2633      } else {
2634        Locs[i] = std::make_pair(MaskIdx, HiIdx);
2635        (*MaskPtr)[HiIdx] = Elt;
2636        HiIdx++;
2637      }
2638    }
2639
2640    SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
2641                                      DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask));
2642    SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
2643                                      DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask));
2644    std::vector<SDOperand> MaskOps;
2645    for (unsigned i = 0; i != NumElems; ++i) {
2646      if (Locs[i].first == -1) {
2647        MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
2648      } else {
2649        unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
2650        MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
2651      }
2652    }
2653    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
2654                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps));
2655  }
2656
2657  return SDOperand();
2658}
2659
2660SDOperand
2661X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2662  if (!isa<ConstantSDNode>(Op.getOperand(1)))
2663    return SDOperand();
2664
2665  MVT::ValueType VT = Op.getValueType();
2666  // TODO: handle v16i8.
2667  if (MVT::getSizeInBits(VT) == 16) {
2668    // Transform it so it match pextrw which produces a 32-bit result.
2669    MVT::ValueType EVT = (MVT::ValueType)(VT+1);
2670    SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
2671                                    Op.getOperand(0), Op.getOperand(1));
2672    SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
2673                                    DAG.getValueType(VT));
2674    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
2675  } else if (MVT::getSizeInBits(VT) == 32) {
2676    SDOperand Vec = Op.getOperand(0);
2677    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
2678    if (Idx == 0)
2679      return Op;
2680
2681    // SHUFPS the element to the lowest double word, then movss.
2682    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2683    SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4,
2684                                        MVT::getVectorBaseType(MaskVT));
2685    std::vector<SDOperand> IdxVec;
2686    IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
2687    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2688    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2689    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2690    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
2691    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
2692                      Vec, Vec, Mask);
2693    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
2694                       DAG.getConstant(0, MVT::i32));
2695  } else if (MVT::getSizeInBits(VT) == 64) {
2696    SDOperand Vec = Op.getOperand(0);
2697    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
2698    if (Idx == 0)
2699      return Op;
2700
2701    // UNPCKHPD the element to the lowest double word, then movsd.
2702    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
2703    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
2704    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2705    std::vector<SDOperand> IdxVec;
2706    IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
2707    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2708    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
2709    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
2710                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
2711    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
2712                       DAG.getConstant(0, MVT::i32));
2713  }
2714
2715  return SDOperand();
2716}
2717
2718SDOperand
2719X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2720  // Transform it so it match pinsrw which expects a 16-bit value in a R32
2721  // as its second argument.
2722  MVT::ValueType VT = Op.getValueType();
2723  MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
2724  SDOperand N0 = Op.getOperand(0);
2725  SDOperand N1 = Op.getOperand(1);
2726  SDOperand N2 = Op.getOperand(2);
2727  if (MVT::getSizeInBits(BaseVT) == 16) {
2728    if (N1.getValueType() != MVT::i32)
2729      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
2730    if (N2.getValueType() != MVT::i32)
2731      N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
2732    return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
2733  } else if (MVT::getSizeInBits(BaseVT) == 32) {
2734    unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
2735    if (Idx == 0) {
2736      // Use a movss.
2737      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
2738      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2739      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2740      std::vector<SDOperand> MaskVec;
2741      MaskVec.push_back(DAG.getConstant(4, BaseVT));
2742      for (unsigned i = 1; i <= 3; ++i)
2743        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2744      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
2745                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec));
2746    } else {
2747      // Use two pinsrw instructions to insert a 32 bit value.
2748      Idx <<= 1;
2749      if (MVT::isFloatingPoint(N1.getValueType())) {
2750        if (N1.getOpcode() == ISD::LOAD) {
2751          // Just load directly from f32mem to R32.
2752          N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1),
2753                           N1.getOperand(2));
2754        } else {
2755          N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
2756          N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
2757          N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
2758                           DAG.getConstant(0, MVT::i32));
2759        }
2760      }
2761      N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
2762      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
2763                       DAG.getConstant(Idx, MVT::i32));
2764      N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
2765      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
2766                       DAG.getConstant(Idx+1, MVT::i32));
2767      return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
2768    }
2769  }
2770
2771  return SDOperand();
2772}
2773
2774SDOperand
2775X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2776  SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
2777  return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
2778}
2779
2780// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2781// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
2782// one of the above mentioned nodes. It has to be wrapped because otherwise
2783// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2784// be used to form addressing mode. These wrapped nodes will be selected
2785// into MOV32ri.
2786SDOperand
2787X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
2788  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2789  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2790                            DAG.getTargetConstantPool(CP->get(), getPointerTy(),
2791                                                      CP->getAlignment()));
2792  if (Subtarget->isTargetDarwin()) {
2793    // With PIC, the address is actually $g + Offset.
2794    if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2795      Result = DAG.getNode(ISD::ADD, getPointerTy(),
2796                    DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2797  }
2798
2799  return Result;
2800}
2801
2802SDOperand
2803X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
2804  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2805  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2806                                 DAG.getTargetGlobalAddress(GV, getPointerTy()));
2807  if (Subtarget->isTargetDarwin()) {
2808    // With PIC, the address is actually $g + Offset.
2809    if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2810      Result = DAG.getNode(ISD::ADD, getPointerTy(),
2811                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2812
2813    // For Darwin, external and weak symbols are indirect, so we want to load
2814    // the value at address GV, not the value of GV itself. This means that
2815    // the GlobalAddress must be in the base or index register of the address,
2816    // not the GV offset field.
2817    if (getTargetMachine().getRelocationModel() != Reloc::Static &&
2818        DarwinGVRequiresExtraLoad(GV))
2819      Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(),
2820                           Result, DAG.getSrcValue(NULL));
2821  }
2822
2823  return Result;
2824}
2825
2826SDOperand
2827X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
2828  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
2829  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2830                                 DAG.getTargetExternalSymbol(Sym, getPointerTy()));
2831  if (Subtarget->isTargetDarwin()) {
2832    // With PIC, the address is actually $g + Offset.
2833    if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2834      Result = DAG.getNode(ISD::ADD, getPointerTy(),
2835                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2836  }
2837
2838  return Result;
2839}
2840
2841SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
2842    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
2843           "Not an i64 shift!");
2844    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
2845    SDOperand ShOpLo = Op.getOperand(0);
2846    SDOperand ShOpHi = Op.getOperand(1);
2847    SDOperand ShAmt  = Op.getOperand(2);
2848    SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi,
2849                                         DAG.getConstant(31, MVT::i8))
2850                           : DAG.getConstant(0, MVT::i32);
2851
2852    SDOperand Tmp2, Tmp3;
2853    if (Op.getOpcode() == ISD::SHL_PARTS) {
2854      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
2855      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
2856    } else {
2857      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
2858      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
2859    }
2860
2861    SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag,
2862                                   ShAmt, DAG.getConstant(32, MVT::i8));
2863
2864    SDOperand Hi, Lo;
2865    SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
2866
2867    std::vector<MVT::ValueType> Tys;
2868    Tys.push_back(MVT::i32);
2869    Tys.push_back(MVT::Flag);
2870    std::vector<SDOperand> Ops;
2871    if (Op.getOpcode() == ISD::SHL_PARTS) {
2872      Ops.push_back(Tmp2);
2873      Ops.push_back(Tmp3);
2874      Ops.push_back(CC);
2875      Ops.push_back(InFlag);
2876      Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2877      InFlag = Hi.getValue(1);
2878
2879      Ops.clear();
2880      Ops.push_back(Tmp3);
2881      Ops.push_back(Tmp1);
2882      Ops.push_back(CC);
2883      Ops.push_back(InFlag);
2884      Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2885    } else {
2886      Ops.push_back(Tmp2);
2887      Ops.push_back(Tmp3);
2888      Ops.push_back(CC);
2889      Ops.push_back(InFlag);
2890      Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2891      InFlag = Lo.getValue(1);
2892
2893      Ops.clear();
2894      Ops.push_back(Tmp3);
2895      Ops.push_back(Tmp1);
2896      Ops.push_back(CC);
2897      Ops.push_back(InFlag);
2898      Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2899    }
2900
2901    Tys.clear();
2902    Tys.push_back(MVT::i32);
2903    Tys.push_back(MVT::i32);
2904    Ops.clear();
2905    Ops.push_back(Lo);
2906    Ops.push_back(Hi);
2907    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
2908}
2909
2910SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
2911  assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
2912         Op.getOperand(0).getValueType() >= MVT::i16 &&
2913         "Unknown SINT_TO_FP to lower!");
2914
2915  SDOperand Result;
2916  MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
2917  unsigned Size = MVT::getSizeInBits(SrcVT)/8;
2918  MachineFunction &MF = DAG.getMachineFunction();
2919  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
2920  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2921  SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
2922                                DAG.getEntryNode(), Op.getOperand(0),
2923                                StackSlot, DAG.getSrcValue(NULL));
2924
2925  // Build the FILD
2926  std::vector<MVT::ValueType> Tys;
2927  Tys.push_back(MVT::f64);
2928  Tys.push_back(MVT::Other);
2929  if (X86ScalarSSE) Tys.push_back(MVT::Flag);
2930  std::vector<SDOperand> Ops;
2931  Ops.push_back(Chain);
2932  Ops.push_back(StackSlot);
2933  Ops.push_back(DAG.getValueType(SrcVT));
2934  Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
2935                       Tys, Ops);
2936
2937  if (X86ScalarSSE) {
2938    Chain = Result.getValue(1);
2939    SDOperand InFlag = Result.getValue(2);
2940
2941    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
2942    // shouldn't be necessary except that RFP cannot be live across
2943    // multiple blocks. When stackifier is fixed, they can be uncoupled.
2944    MachineFunction &MF = DAG.getMachineFunction();
2945    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
2946    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2947    std::vector<MVT::ValueType> Tys;
2948    Tys.push_back(MVT::Other);
2949    std::vector<SDOperand> Ops;
2950    Ops.push_back(Chain);
2951    Ops.push_back(Result);
2952    Ops.push_back(StackSlot);
2953    Ops.push_back(DAG.getValueType(Op.getValueType()));
2954    Ops.push_back(InFlag);
2955    Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
2956    Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
2957                         DAG.getSrcValue(NULL));
2958  }
2959
2960  return Result;
2961}
2962
2963SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
2964  assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
2965         "Unknown FP_TO_SINT to lower!");
2966  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
2967  // stack slot.
2968  MachineFunction &MF = DAG.getMachineFunction();
2969  unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
2970  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
2971  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2972
2973  unsigned Opc;
2974  switch (Op.getValueType()) {
2975    default: assert(0 && "Invalid FP_TO_SINT to lower!");
2976    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
2977    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
2978    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
2979  }
2980
2981  SDOperand Chain = DAG.getEntryNode();
2982  SDOperand Value = Op.getOperand(0);
2983  if (X86ScalarSSE) {
2984    assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
2985    Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot,
2986                        DAG.getSrcValue(0));
2987    std::vector<MVT::ValueType> Tys;
2988    Tys.push_back(MVT::f64);
2989    Tys.push_back(MVT::Other);
2990    std::vector<SDOperand> Ops;
2991    Ops.push_back(Chain);
2992    Ops.push_back(StackSlot);
2993    Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType()));
2994    Value = DAG.getNode(X86ISD::FLD, Tys, Ops);
2995    Chain = Value.getValue(1);
2996    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
2997    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2998  }
2999
3000  // Build the FP_TO_INT*_IN_MEM
3001  std::vector<SDOperand> Ops;
3002  Ops.push_back(Chain);
3003  Ops.push_back(Value);
3004  Ops.push_back(StackSlot);
3005  SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops);
3006
3007  // Load the result.
3008  return DAG.getLoad(Op.getValueType(), FIST, StackSlot,
3009                     DAG.getSrcValue(NULL));
3010}
3011
3012SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
3013  MVT::ValueType VT = Op.getValueType();
3014  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3015  std::vector<Constant*> CV;
3016  if (VT == MVT::f64) {
3017    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
3018    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3019  } else {
3020    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
3021    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3022    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3023    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3024  }
3025  Constant *CS = ConstantStruct::get(CV);
3026  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3027  SDOperand Mask
3028    = DAG.getNode(X86ISD::LOAD_PACK,
3029                  VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
3030  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
3031}
3032
3033SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
3034  MVT::ValueType VT = Op.getValueType();
3035  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3036  std::vector<Constant*> CV;
3037  if (VT == MVT::f64) {
3038    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
3039    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3040  } else {
3041    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
3042    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3043    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3044    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3045  }
3046  Constant *CS = ConstantStruct::get(CV);
3047  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3048  SDOperand Mask  = DAG.getNode(X86ISD::LOAD_PACK,
3049                          VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
3050  return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
3051}
3052
3053SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
3054  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
3055  SDOperand Cond;
3056  SDOperand CC = Op.getOperand(2);
3057  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3058  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
3059  bool Flip;
3060  unsigned X86CC;
3061  if (translateX86CC(CC, isFP, X86CC, Flip)) {
3062    if (Flip)
3063      Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
3064                         Op.getOperand(1), Op.getOperand(0));
3065    else
3066      Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
3067                         Op.getOperand(0), Op.getOperand(1));
3068    return DAG.getNode(X86ISD::SETCC, MVT::i8,
3069                       DAG.getConstant(X86CC, MVT::i8), Cond);
3070  } else {
3071    assert(isFP && "Illegal integer SetCC!");
3072
3073    Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
3074                       Op.getOperand(0), Op.getOperand(1));
3075    std::vector<MVT::ValueType> Tys;
3076    std::vector<SDOperand> Ops;
3077    switch (SetCCOpcode) {
3078      default: assert(false && "Illegal floating point SetCC!");
3079      case ISD::SETOEQ: {  // !PF & ZF
3080        Tys.push_back(MVT::i8);
3081        Tys.push_back(MVT::Flag);
3082        Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8));
3083        Ops.push_back(Cond);
3084        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
3085        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
3086                                     DAG.getConstant(X86ISD::COND_E, MVT::i8),
3087                                     Tmp1.getValue(1));
3088        return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
3089      }
3090      case ISD::SETUNE: {  // PF | !ZF
3091        Tys.push_back(MVT::i8);
3092        Tys.push_back(MVT::Flag);
3093        Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8));
3094        Ops.push_back(Cond);
3095        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
3096        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
3097                                     DAG.getConstant(X86ISD::COND_NE, MVT::i8),
3098                                     Tmp1.getValue(1));
3099        return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
3100      }
3101    }
3102  }
3103}
3104
3105SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
3106  MVT::ValueType VT = Op.getValueType();
3107  bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE;
3108  bool addTest   = false;
3109  SDOperand Op0 = Op.getOperand(0);
3110  SDOperand Cond, CC;
3111  if (Op0.getOpcode() == ISD::SETCC)
3112    Op0 = LowerOperation(Op0, DAG);
3113
3114  if (Op0.getOpcode() == X86ISD::SETCC) {
3115    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3116    // (since flag operand cannot be shared). If the X86ISD::SETCC does not
3117    // have another use it will be eliminated.
3118    // If the X86ISD::SETCC has more than one use, then it's probably better
3119    // to use a test instead of duplicating the X86ISD::CMP (for register
3120    // pressure reason).
3121    unsigned CmpOpc = Op0.getOperand(1).getOpcode();
3122    if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
3123        CmpOpc == X86ISD::UCOMI) {
3124      if (!Op0.hasOneUse()) {
3125        std::vector<MVT::ValueType> Tys;
3126        for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i)
3127          Tys.push_back(Op0.Val->getValueType(i));
3128        std::vector<SDOperand> Ops;
3129        for (unsigned i = 0; i < Op0.getNumOperands(); ++i)
3130          Ops.push_back(Op0.getOperand(i));
3131        Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
3132      }
3133
3134      CC   = Op0.getOperand(0);
3135      Cond = Op0.getOperand(1);
3136      // Make a copy as flag result cannot be used by more than one.
3137      Cond = DAG.getNode(CmpOpc, MVT::Flag,
3138                         Cond.getOperand(0), Cond.getOperand(1));
3139      addTest =
3140        isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
3141    } else
3142      addTest = true;
3143  } else
3144    addTest = true;
3145
3146  if (addTest) {
3147    CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3148    Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0);
3149  }
3150
3151  std::vector<MVT::ValueType> Tys;
3152  Tys.push_back(Op.getValueType());
3153  Tys.push_back(MVT::Flag);
3154  std::vector<SDOperand> Ops;
3155  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
3156  // condition is true.
3157  Ops.push_back(Op.getOperand(2));
3158  Ops.push_back(Op.getOperand(1));
3159  Ops.push_back(CC);
3160  Ops.push_back(Cond);
3161  return DAG.getNode(X86ISD::CMOV, Tys, Ops);
3162}
3163
3164SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
3165  bool addTest = false;
3166  SDOperand Cond  = Op.getOperand(1);
3167  SDOperand Dest  = Op.getOperand(2);
3168  SDOperand CC;
3169  if (Cond.getOpcode() == ISD::SETCC)
3170    Cond = LowerOperation(Cond, DAG);
3171
3172  if (Cond.getOpcode() == X86ISD::SETCC) {
3173    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3174    // (since flag operand cannot be shared). If the X86ISD::SETCC does not
3175    // have another use it will be eliminated.
3176    // If the X86ISD::SETCC has more than one use, then it's probably better
3177    // to use a test instead of duplicating the X86ISD::CMP (for register
3178    // pressure reason).
3179    unsigned CmpOpc = Cond.getOperand(1).getOpcode();
3180    if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
3181        CmpOpc == X86ISD::UCOMI) {
3182      if (!Cond.hasOneUse()) {
3183        std::vector<MVT::ValueType> Tys;
3184        for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i)
3185          Tys.push_back(Cond.Val->getValueType(i));
3186        std::vector<SDOperand> Ops;
3187        for (unsigned i = 0; i < Cond.getNumOperands(); ++i)
3188          Ops.push_back(Cond.getOperand(i));
3189        Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops);
3190      }
3191
3192      CC   = Cond.getOperand(0);
3193      Cond = Cond.getOperand(1);
3194      // Make a copy as flag result cannot be used by more than one.
3195      Cond = DAG.getNode(CmpOpc, MVT::Flag,
3196                         Cond.getOperand(0), Cond.getOperand(1));
3197    } else
3198      addTest = true;
3199  } else
3200    addTest = true;
3201
3202  if (addTest) {
3203    CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3204    Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond);
3205  }
3206  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
3207                     Op.getOperand(0), Op.getOperand(2), CC, Cond);
3208}
3209
3210SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
3211  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3212  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3213                                 DAG.getTargetJumpTable(JT->getIndex(),
3214                                                        getPointerTy()));
3215  if (Subtarget->isTargetDarwin()) {
3216    // With PIC, the address is actually $g + Offset.
3217    if (getTargetMachine().getRelocationModel() == Reloc::PIC)
3218      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3219                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
3220  }
3221
3222  return Result;
3223}
3224
3225SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
3226  SDOperand Copy;
3227
3228  switch(Op.getNumOperands()) {
3229    default:
3230      assert(0 && "Do not know how to return this many arguments!");
3231      abort();
3232    case 1:    // ret void.
3233      return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
3234                        DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
3235    case 2: {
3236      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
3237
3238      if (MVT::isVector(ArgVT)) {
3239        // Integer or FP vector result -> XMM0.
3240        if (DAG.getMachineFunction().liveout_empty())
3241          DAG.getMachineFunction().addLiveOut(X86::XMM0);
3242        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
3243                                SDOperand());
3244      } else if (MVT::isInteger(ArgVT)) {
3245        // Integer result -> EAX
3246        if (DAG.getMachineFunction().liveout_empty())
3247          DAG.getMachineFunction().addLiveOut(X86::EAX);
3248
3249        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1),
3250                                SDOperand());
3251      } else if (!X86ScalarSSE) {
3252        // FP return with fp-stack value.
3253        if (DAG.getMachineFunction().liveout_empty())
3254          DAG.getMachineFunction().addLiveOut(X86::ST0);
3255
3256        std::vector<MVT::ValueType> Tys;
3257        Tys.push_back(MVT::Other);
3258        Tys.push_back(MVT::Flag);
3259        std::vector<SDOperand> Ops;
3260        Ops.push_back(Op.getOperand(0));
3261        Ops.push_back(Op.getOperand(1));
3262        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
3263      } else {
3264        // FP return with ScalarSSE (return on fp-stack).
3265        if (DAG.getMachineFunction().liveout_empty())
3266          DAG.getMachineFunction().addLiveOut(X86::ST0);
3267
3268        SDOperand MemLoc;
3269        SDOperand Chain = Op.getOperand(0);
3270        SDOperand Value = Op.getOperand(1);
3271
3272        if (Value.getOpcode() == ISD::LOAD &&
3273            (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
3274          Chain  = Value.getOperand(0);
3275          MemLoc = Value.getOperand(1);
3276        } else {
3277          // Spill the value to memory and reload it into top of stack.
3278          unsigned Size = MVT::getSizeInBits(ArgVT)/8;
3279          MachineFunction &MF = DAG.getMachineFunction();
3280          int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3281          MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
3282          Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
3283                              Value, MemLoc, DAG.getSrcValue(0));
3284        }
3285        std::vector<MVT::ValueType> Tys;
3286        Tys.push_back(MVT::f64);
3287        Tys.push_back(MVT::Other);
3288        std::vector<SDOperand> Ops;
3289        Ops.push_back(Chain);
3290        Ops.push_back(MemLoc);
3291        Ops.push_back(DAG.getValueType(ArgVT));
3292        Copy = DAG.getNode(X86ISD::FLD, Tys, Ops);
3293        Tys.clear();
3294        Tys.push_back(MVT::Other);
3295        Tys.push_back(MVT::Flag);
3296        Ops.clear();
3297        Ops.push_back(Copy.getValue(1));
3298        Ops.push_back(Copy);
3299        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
3300      }
3301      break;
3302    }
3303    case 3:
3304      if (DAG.getMachineFunction().liveout_empty()) {
3305        DAG.getMachineFunction().addLiveOut(X86::EAX);
3306        DAG.getMachineFunction().addLiveOut(X86::EDX);
3307      }
3308
3309      Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2),
3310                              SDOperand());
3311      Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1));
3312      break;
3313  }
3314  return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
3315                   Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
3316                     Copy.getValue(1));
3317}
3318
3319SDOperand
3320X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
3321  if (FormalArgs.size() == 0) {
3322    unsigned CC = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
3323    if (CC == CallingConv::Fast && EnableFastCC)
3324      LowerFastCCArguments(Op, DAG);
3325    else
3326      LowerCCCArguments(Op, DAG);
3327  }
3328  return FormalArgs[Op.ResNo];
3329}
3330
3331SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
3332  SDOperand InFlag(0, 0);
3333  SDOperand Chain = Op.getOperand(0);
3334  unsigned Align =
3335    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3336  if (Align == 0) Align = 1;
3337
3338  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3339  // If not DWORD aligned, call memset if size is less than the threshold.
3340  // It knows how to align to the right boundary first.
3341  if ((Align & 3) != 0 ||
3342      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3343    MVT::ValueType IntPtr = getPointerTy();
3344    const Type *IntPtrTy = getTargetData().getIntPtrType();
3345    std::vector<std::pair<SDOperand, const Type*> > Args;
3346    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
3347    // Extend the ubyte argument to be an int value for the call.
3348    SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
3349    Args.push_back(std::make_pair(Val, IntPtrTy));
3350    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
3351    std::pair<SDOperand,SDOperand> CallResult =
3352      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
3353                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
3354    return CallResult.second;
3355  }
3356
3357  MVT::ValueType AVT;
3358  SDOperand Count;
3359  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3360  unsigned BytesLeft = 0;
3361  bool TwoRepStos = false;
3362  if (ValC) {
3363    unsigned ValReg;
3364    unsigned Val = ValC->getValue() & 255;
3365
3366    // If the value is a constant, then we can potentially use larger sets.
3367    switch (Align & 3) {
3368      case 2:   // WORD aligned
3369        AVT = MVT::i16;
3370        Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
3371        BytesLeft = I->getValue() % 2;
3372        Val    = (Val << 8) | Val;
3373        ValReg = X86::AX;
3374        break;
3375      case 0:   // DWORD aligned
3376        AVT = MVT::i32;
3377        if (I) {
3378          Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
3379          BytesLeft = I->getValue() % 4;
3380        } else {
3381          Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
3382                              DAG.getConstant(2, MVT::i8));
3383          TwoRepStos = true;
3384        }
3385        Val = (Val << 8)  | Val;
3386        Val = (Val << 16) | Val;
3387        ValReg = X86::EAX;
3388        break;
3389      default:  // Byte aligned
3390        AVT = MVT::i8;
3391        Count = Op.getOperand(3);
3392        ValReg = X86::AL;
3393        break;
3394    }
3395
3396    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
3397                              InFlag);
3398    InFlag = Chain.getValue(1);
3399  } else {
3400    AVT = MVT::i8;
3401    Count  = Op.getOperand(3);
3402    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
3403    InFlag = Chain.getValue(1);
3404  }
3405
3406  Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
3407  InFlag = Chain.getValue(1);
3408  Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
3409  InFlag = Chain.getValue(1);
3410
3411  std::vector<MVT::ValueType> Tys;
3412  Tys.push_back(MVT::Other);
3413  Tys.push_back(MVT::Flag);
3414  std::vector<SDOperand> Ops;
3415  Ops.push_back(Chain);
3416  Ops.push_back(DAG.getValueType(AVT));
3417  Ops.push_back(InFlag);
3418  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
3419
3420  if (TwoRepStos) {
3421    InFlag = Chain.getValue(1);
3422    Count = Op.getOperand(3);
3423    MVT::ValueType CVT = Count.getValueType();
3424    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3425                                 DAG.getConstant(3, CVT));
3426    Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
3427    InFlag = Chain.getValue(1);
3428    Tys.clear();
3429    Tys.push_back(MVT::Other);
3430    Tys.push_back(MVT::Flag);
3431    Ops.clear();
3432    Ops.push_back(Chain);
3433    Ops.push_back(DAG.getValueType(MVT::i8));
3434    Ops.push_back(InFlag);
3435    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
3436  } else if (BytesLeft) {
3437    // Issue stores for the last 1 - 3 bytes.
3438    SDOperand Value;
3439    unsigned Val = ValC->getValue() & 255;
3440    unsigned Offset = I->getValue() - BytesLeft;
3441    SDOperand DstAddr = Op.getOperand(1);
3442    MVT::ValueType AddrVT = DstAddr.getValueType();
3443    if (BytesLeft >= 2) {
3444      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
3445      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
3446                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3447                                      DAG.getConstant(Offset, AddrVT)),
3448                          DAG.getSrcValue(NULL));
3449      BytesLeft -= 2;
3450      Offset += 2;
3451    }
3452
3453    if (BytesLeft == 1) {
3454      Value = DAG.getConstant(Val, MVT::i8);
3455      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
3456                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3457                                      DAG.getConstant(Offset, AddrVT)),
3458                          DAG.getSrcValue(NULL));
3459    }
3460  }
3461
3462  return Chain;
3463}
3464
3465SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
3466  SDOperand Chain = Op.getOperand(0);
3467  unsigned Align =
3468    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3469  if (Align == 0) Align = 1;
3470
3471  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3472  // If not DWORD aligned, call memcpy if size is less than the threshold.
3473  // It knows how to align to the right boundary first.
3474  if ((Align & 3) != 0 ||
3475      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3476    MVT::ValueType IntPtr = getPointerTy();
3477    const Type *IntPtrTy = getTargetData().getIntPtrType();
3478    std::vector<std::pair<SDOperand, const Type*> > Args;
3479    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
3480    Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
3481    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
3482    std::pair<SDOperand,SDOperand> CallResult =
3483      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
3484                  DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
3485    return CallResult.second;
3486  }
3487
3488  MVT::ValueType AVT;
3489  SDOperand Count;
3490  unsigned BytesLeft = 0;
3491  bool TwoRepMovs = false;
3492  switch (Align & 3) {
3493    case 2:   // WORD aligned
3494      AVT = MVT::i16;
3495      Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
3496      BytesLeft = I->getValue() % 2;
3497      break;
3498    case 0:   // DWORD aligned
3499      AVT = MVT::i32;
3500      if (I) {
3501        Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
3502        BytesLeft = I->getValue() % 4;
3503      } else {
3504        Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
3505                            DAG.getConstant(2, MVT::i8));
3506        TwoRepMovs = true;
3507      }
3508      break;
3509    default:  // Byte aligned
3510      AVT = MVT::i8;
3511      Count = Op.getOperand(3);
3512      break;
3513  }
3514
3515  SDOperand InFlag(0, 0);
3516  Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
3517  InFlag = Chain.getValue(1);
3518  Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
3519  InFlag = Chain.getValue(1);
3520  Chain  = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag);
3521  InFlag = Chain.getValue(1);
3522
3523  std::vector<MVT::ValueType> Tys;
3524  Tys.push_back(MVT::Other);
3525  Tys.push_back(MVT::Flag);
3526  std::vector<SDOperand> Ops;
3527  Ops.push_back(Chain);
3528  Ops.push_back(DAG.getValueType(AVT));
3529  Ops.push_back(InFlag);
3530  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
3531
3532  if (TwoRepMovs) {
3533    InFlag = Chain.getValue(1);
3534    Count = Op.getOperand(3);
3535    MVT::ValueType CVT = Count.getValueType();
3536    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3537                                 DAG.getConstant(3, CVT));
3538    Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
3539    InFlag = Chain.getValue(1);
3540    Tys.clear();
3541    Tys.push_back(MVT::Other);
3542    Tys.push_back(MVT::Flag);
3543    Ops.clear();
3544    Ops.push_back(Chain);
3545    Ops.push_back(DAG.getValueType(MVT::i8));
3546    Ops.push_back(InFlag);
3547    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
3548  } else if (BytesLeft) {
3549    // Issue loads and stores for the last 1 - 3 bytes.
3550    unsigned Offset = I->getValue() - BytesLeft;
3551    SDOperand DstAddr = Op.getOperand(1);
3552    MVT::ValueType DstVT = DstAddr.getValueType();
3553    SDOperand SrcAddr = Op.getOperand(2);
3554    MVT::ValueType SrcVT = SrcAddr.getValueType();
3555    SDOperand Value;
3556    if (BytesLeft >= 2) {
3557      Value = DAG.getLoad(MVT::i16, Chain,
3558                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
3559                                      DAG.getConstant(Offset, SrcVT)),
3560                          DAG.getSrcValue(NULL));
3561      Chain = Value.getValue(1);
3562      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
3563                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
3564                                      DAG.getConstant(Offset, DstVT)),
3565                          DAG.getSrcValue(NULL));
3566      BytesLeft -= 2;
3567      Offset += 2;
3568    }
3569
3570    if (BytesLeft == 1) {
3571      Value = DAG.getLoad(MVT::i8, Chain,
3572                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
3573                                      DAG.getConstant(Offset, SrcVT)),
3574                          DAG.getSrcValue(NULL));
3575      Chain = Value.getValue(1);
3576      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
3577                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
3578                                      DAG.getConstant(Offset, DstVT)),
3579                          DAG.getSrcValue(NULL));
3580    }
3581  }
3582
3583  return Chain;
3584}
3585
3586SDOperand
3587X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
3588  std::vector<MVT::ValueType> Tys;
3589  Tys.push_back(MVT::Other);
3590  Tys.push_back(MVT::Flag);
3591  std::vector<SDOperand> Ops;
3592  Ops.push_back(Op.getOperand(0));
3593  SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops);
3594  Ops.clear();
3595  Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)));
3596  Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX,
3597                                   MVT::i32, Ops[0].getValue(2)));
3598  Ops.push_back(Ops[1].getValue(1));
3599  Tys[0] = Tys[1] = MVT::i32;
3600  Tys.push_back(MVT::Other);
3601  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
3602}
3603
3604SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
3605  // vastart just stores the address of the VarArgsFrameIndex slot into the
3606  // memory location argument.
3607  // FIXME: Replace MVT::i32 with PointerTy
3608  SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
3609  return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
3610                     Op.getOperand(1), Op.getOperand(2));
3611}
3612
3613SDOperand
3614X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
3615  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
3616  switch (IntNo) {
3617  default: return SDOperand();    // Don't custom lower most intrinsics.
3618    // Comparison intrinsics.
3619  case Intrinsic::x86_sse_comieq_ss:
3620  case Intrinsic::x86_sse_comilt_ss:
3621  case Intrinsic::x86_sse_comile_ss:
3622  case Intrinsic::x86_sse_comigt_ss:
3623  case Intrinsic::x86_sse_comige_ss:
3624  case Intrinsic::x86_sse_comineq_ss:
3625  case Intrinsic::x86_sse_ucomieq_ss:
3626  case Intrinsic::x86_sse_ucomilt_ss:
3627  case Intrinsic::x86_sse_ucomile_ss:
3628  case Intrinsic::x86_sse_ucomigt_ss:
3629  case Intrinsic::x86_sse_ucomige_ss:
3630  case Intrinsic::x86_sse_ucomineq_ss:
3631  case Intrinsic::x86_sse2_comieq_sd:
3632  case Intrinsic::x86_sse2_comilt_sd:
3633  case Intrinsic::x86_sse2_comile_sd:
3634  case Intrinsic::x86_sse2_comigt_sd:
3635  case Intrinsic::x86_sse2_comige_sd:
3636  case Intrinsic::x86_sse2_comineq_sd:
3637  case Intrinsic::x86_sse2_ucomieq_sd:
3638  case Intrinsic::x86_sse2_ucomilt_sd:
3639  case Intrinsic::x86_sse2_ucomile_sd:
3640  case Intrinsic::x86_sse2_ucomigt_sd:
3641  case Intrinsic::x86_sse2_ucomige_sd:
3642  case Intrinsic::x86_sse2_ucomineq_sd: {
3643    unsigned Opc = 0;
3644    ISD::CondCode CC = ISD::SETCC_INVALID;
3645    switch (IntNo) {
3646    default: break;
3647    case Intrinsic::x86_sse_comieq_ss:
3648    case Intrinsic::x86_sse2_comieq_sd:
3649      Opc = X86ISD::COMI;
3650      CC = ISD::SETEQ;
3651      break;
3652    case Intrinsic::x86_sse_comilt_ss:
3653    case Intrinsic::x86_sse2_comilt_sd:
3654      Opc = X86ISD::COMI;
3655      CC = ISD::SETLT;
3656      break;
3657    case Intrinsic::x86_sse_comile_ss:
3658    case Intrinsic::x86_sse2_comile_sd:
3659      Opc = X86ISD::COMI;
3660      CC = ISD::SETLE;
3661      break;
3662    case Intrinsic::x86_sse_comigt_ss:
3663    case Intrinsic::x86_sse2_comigt_sd:
3664      Opc = X86ISD::COMI;
3665      CC = ISD::SETGT;
3666      break;
3667    case Intrinsic::x86_sse_comige_ss:
3668    case Intrinsic::x86_sse2_comige_sd:
3669      Opc = X86ISD::COMI;
3670      CC = ISD::SETGE;
3671      break;
3672    case Intrinsic::x86_sse_comineq_ss:
3673    case Intrinsic::x86_sse2_comineq_sd:
3674      Opc = X86ISD::COMI;
3675      CC = ISD::SETNE;
3676      break;
3677    case Intrinsic::x86_sse_ucomieq_ss:
3678    case Intrinsic::x86_sse2_ucomieq_sd:
3679      Opc = X86ISD::UCOMI;
3680      CC = ISD::SETEQ;
3681      break;
3682    case Intrinsic::x86_sse_ucomilt_ss:
3683    case Intrinsic::x86_sse2_ucomilt_sd:
3684      Opc = X86ISD::UCOMI;
3685      CC = ISD::SETLT;
3686      break;
3687    case Intrinsic::x86_sse_ucomile_ss:
3688    case Intrinsic::x86_sse2_ucomile_sd:
3689      Opc = X86ISD::UCOMI;
3690      CC = ISD::SETLE;
3691      break;
3692    case Intrinsic::x86_sse_ucomigt_ss:
3693    case Intrinsic::x86_sse2_ucomigt_sd:
3694      Opc = X86ISD::UCOMI;
3695      CC = ISD::SETGT;
3696      break;
3697    case Intrinsic::x86_sse_ucomige_ss:
3698    case Intrinsic::x86_sse2_ucomige_sd:
3699      Opc = X86ISD::UCOMI;
3700      CC = ISD::SETGE;
3701      break;
3702    case Intrinsic::x86_sse_ucomineq_ss:
3703    case Intrinsic::x86_sse2_ucomineq_sd:
3704      Opc = X86ISD::UCOMI;
3705      CC = ISD::SETNE;
3706      break;
3707    }
3708    bool Flip;
3709    unsigned X86CC;
3710    translateX86CC(CC, true, X86CC, Flip);
3711    SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1),
3712                                 Op.getOperand(Flip?1:2));
3713    SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8,
3714                                  DAG.getConstant(X86CC, MVT::i8), Cond);
3715    return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
3716  }
3717  }
3718}
3719
3720/// LowerOperation - Provide custom lowering hooks for some operations.
3721///
3722SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
3723  switch (Op.getOpcode()) {
3724  default: assert(0 && "Should not custom lower this!");
3725  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
3726  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
3727  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3728  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
3729  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
3730  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
3731  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
3732  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
3733  case ISD::SHL_PARTS:
3734  case ISD::SRA_PARTS:
3735  case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
3736  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
3737  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
3738  case ISD::FABS:               return LowerFABS(Op, DAG);
3739  case ISD::FNEG:               return LowerFNEG(Op, DAG);
3740  case ISD::SETCC:              return LowerSETCC(Op, DAG);
3741  case ISD::SELECT:             return LowerSELECT(Op, DAG);
3742  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
3743  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
3744  case ISD::RET:                return LowerRET(Op, DAG);
3745  case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
3746  case ISD::MEMSET:             return LowerMEMSET(Op, DAG);
3747  case ISD::MEMCPY:             return LowerMEMCPY(Op, DAG);
3748  case ISD::READCYCLECOUNTER:   return LowerREADCYCLCECOUNTER(Op, DAG);
3749  case ISD::VASTART:            return LowerVASTART(Op, DAG);
3750  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3751  }
3752}
3753
3754const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3755  switch (Opcode) {
3756  default: return NULL;
3757  case X86ISD::SHLD:               return "X86ISD::SHLD";
3758  case X86ISD::SHRD:               return "X86ISD::SHRD";
3759  case X86ISD::FAND:               return "X86ISD::FAND";
3760  case X86ISD::FXOR:               return "X86ISD::FXOR";
3761  case X86ISD::FILD:               return "X86ISD::FILD";
3762  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
3763  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
3764  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
3765  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
3766  case X86ISD::FLD:                return "X86ISD::FLD";
3767  case X86ISD::FST:                return "X86ISD::FST";
3768  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
3769  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
3770  case X86ISD::CALL:               return "X86ISD::CALL";
3771  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
3772  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
3773  case X86ISD::CMP:                return "X86ISD::CMP";
3774  case X86ISD::TEST:               return "X86ISD::TEST";
3775  case X86ISD::COMI:               return "X86ISD::COMI";
3776  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
3777  case X86ISD::SETCC:              return "X86ISD::SETCC";
3778  case X86ISD::CMOV:               return "X86ISD::CMOV";
3779  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
3780  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
3781  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
3782  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
3783  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
3784  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
3785  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
3786  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
3787  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
3788  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
3789  }
3790}
3791
3792void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
3793                                                       uint64_t Mask,
3794                                                       uint64_t &KnownZero,
3795                                                       uint64_t &KnownOne,
3796                                                       unsigned Depth) const {
3797  unsigned Opc = Op.getOpcode();
3798  assert((Opc >= ISD::BUILTIN_OP_END ||
3799          Opc == ISD::INTRINSIC_WO_CHAIN ||
3800          Opc == ISD::INTRINSIC_W_CHAIN ||
3801          Opc == ISD::INTRINSIC_VOID) &&
3802         "Should use MaskedValueIsZero if you don't know whether Op"
3803         " is a target node!");
3804
3805  KnownZero = KnownOne = 0;   // Don't know anything.
3806  switch (Opc) {
3807  default: break;
3808  case X86ISD::SETCC:
3809    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
3810    break;
3811  }
3812}
3813
3814std::vector<unsigned> X86TargetLowering::
3815getRegClassForInlineAsmConstraint(const std::string &Constraint,
3816                                  MVT::ValueType VT) const {
3817  if (Constraint.size() == 1) {
3818    // FIXME: not handling fp-stack yet!
3819    // FIXME: not handling MMX registers yet ('y' constraint).
3820    switch (Constraint[0]) {      // GCC X86 Constraint Letters
3821    default: break;  // Unknown constriant letter
3822    case 'r':   // GENERAL_REGS
3823    case 'R':   // LEGACY_REGS
3824      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
3825                                   X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
3826    case 'l':   // INDEX_REGS
3827      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
3828                                   X86::ESI, X86::EDI, X86::EBP, 0);
3829    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
3830    case 'Q':   // Q_REGS
3831      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0);
3832    case 'x':   // SSE_REGS if SSE1 allowed
3833      if (Subtarget->hasSSE1())
3834        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3835                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
3836                                     0);
3837      return std::vector<unsigned>();
3838    case 'Y':   // SSE_REGS if SSE2 allowed
3839      if (Subtarget->hasSSE2())
3840        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3841                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
3842                                     0);
3843      return std::vector<unsigned>();
3844    }
3845  }
3846
3847  return std::vector<unsigned>();
3848}
3849
3850/// isLegalAddressImmediate - Return true if the integer value or
3851/// GlobalValue can be used as the offset of the target addressing mode.
3852bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
3853  // X86 allows a sign-extended 32-bit immediate field.
3854  return (V > -(1LL << 32) && V < (1LL << 32)-1);
3855}
3856
3857bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
3858  if (Subtarget->isTargetDarwin()) {
3859    Reloc::Model RModel = getTargetMachine().getRelocationModel();
3860    if (RModel == Reloc::Static)
3861      return true;
3862    else if (RModel == Reloc::DynamicNoPIC)
3863      return !DarwinGVRequiresExtraLoad(GV);
3864    else
3865      return false;
3866  } else
3867    return true;
3868}
3869
3870/// isShuffleMaskLegal - Targets can use this to indicate that they only
3871/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
3872/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
3873/// are assumed to be legal.
3874bool
3875X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
3876  // Only do shuffles on 128-bit vector types for now.
3877  if (MVT::getSizeInBits(VT) == 64) return false;
3878  return (Mask.Val->getNumOperands() <= 4 ||
3879          isSplatMask(Mask.Val)  ||
3880          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
3881          X86::isUNPCKLMask(Mask.Val) ||
3882          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
3883          X86::isUNPCKHMask(Mask.Val));
3884}
3885
3886bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
3887                                               MVT::ValueType EVT,
3888                                               SelectionDAG &DAG) const {
3889  unsigned NumElts = BVOps.size();
3890  // Only do shuffles on 128-bit vector types for now.
3891  if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
3892  if (NumElts == 2) return true;
3893  if (NumElts == 4) {
3894    return (isMOVLMask(BVOps)  || isCommutedMOVL(BVOps, true) ||
3895            isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps));
3896  }
3897  return false;
3898}
3899