X86ISelLowering.cpp revision c575ca22eaf8a656f3fa2c3f0f75264c4c4fcd21
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrBuilder.h"
17#include "X86ISelLowering.h"
18#include "X86TargetMachine.h"
19#include "llvm/CallingConv.h"
20#include "llvm/Constants.h"
21#include "llvm/Function.h"
22#include "llvm/Intrinsics.h"
23#include "llvm/ADT/VectorExtras.h"
24#include "llvm/Analysis/ScalarEvolutionExpressions.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineInstrBuilder.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/SSARegMap.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32using namespace llvm;
33
34// FIXME: temporary.
35#include "llvm/Support/CommandLine.h"
36static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
37                                  cl::desc("Enable fastcc on X86"));
38
39X86TargetLowering::X86TargetLowering(TargetMachine &TM)
40  : TargetLowering(TM) {
41  Subtarget = &TM.getSubtarget<X86Subtarget>();
42  X86ScalarSSE = Subtarget->hasSSE2();
43
44  // Set up the TargetLowering object.
45
46  // X86 is weird, it always uses i8 for shift amounts and setcc results.
47  setShiftAmountType(MVT::i8);
48  setSetCCResultType(MVT::i8);
49  setSetCCResultContents(ZeroOrOneSetCCResult);
50  setSchedulingPreference(SchedulingForRegPressure);
51  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
52  setStackPointerRegisterToSaveRestore(X86::ESP);
53
54  if (!Subtarget->isTargetDarwin())
55    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
56    setUseUnderscoreSetJmpLongJmp(true);
57
58  // Add legal addressing mode scale values.
59  addLegalAddressScale(8);
60  addLegalAddressScale(4);
61  addLegalAddressScale(2);
62  // Enter the ones which require both scale + index last. These are more
63  // expensive.
64  addLegalAddressScale(9);
65  addLegalAddressScale(5);
66  addLegalAddressScale(3);
67
68  // Set up the register classes.
69  addRegisterClass(MVT::i8, X86::R8RegisterClass);
70  addRegisterClass(MVT::i16, X86::R16RegisterClass);
71  addRegisterClass(MVT::i32, X86::R32RegisterClass);
72
73  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
74  // operation.
75  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
76  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
77  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
78
79  if (X86ScalarSSE)
80    // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead.
81    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Expand);
82  else
83    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
84
85  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
86  // this operation.
87  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
88  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
89  // SSE has no i16 to fp conversion, only i32
90  if (X86ScalarSSE)
91    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
92  else {
93    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
94    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
95  }
96
97  // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64
98  // isn't legal.
99  setOperationAction(ISD::SINT_TO_FP       , MVT::i64  , Custom);
100  setOperationAction(ISD::FP_TO_SINT       , MVT::i64  , Custom);
101
102  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
103  // this operation.
104  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
105  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
106
107  if (X86ScalarSSE) {
108    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
109  } else {
110    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
111    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
112  }
113
114  // Handle FP_TO_UINT by promoting the destination to a larger signed
115  // conversion.
116  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
117  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
118  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
119
120  if (X86ScalarSSE && !Subtarget->hasSSE3())
121    // Expand FP_TO_UINT into a select.
122    // FIXME: We would like to use a Custom expander here eventually to do
123    // the optimal thing for SSE vs. the default expansion in the legalizer.
124    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Expand);
125  else
126    // With SSE3 we can use fisttpll to convert to a signed i64.
127    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
128
129  setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
130  setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
131
132  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
133  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
134  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
135  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
136  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
137  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
138  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
139  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
140  setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
141  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
142  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
143  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
144  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
145  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
146  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
147  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
148  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
149  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
150  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
151  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
152  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
153
154  // These should be promoted to a larger select which is supported.
155  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
156  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
157
158  // X86 wants to expand cmov itself.
159  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
160  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
161  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
162  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
163  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
164  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
165  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
166  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
167  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
168  // X86 ret instruction may pop stack.
169  setOperationAction(ISD::RET             , MVT::Other, Custom);
170  // Darwin ABI issue.
171  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
172  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
173  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
174  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
175  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
176  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
177  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
178  // X86 wants to expand memset / memcpy itself.
179  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
180  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
181
182  // We don't have line number support yet.
183  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
184  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
185  // FIXME - use subtarget debug flags
186  if (!Subtarget->isTargetDarwin())
187    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
188
189  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
190  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
191
192  // Use the default implementation.
193  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
194  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
195  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
196  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
197  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
198  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
199
200  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
202
203  if (X86ScalarSSE) {
204    // Set up the FP register classes.
205    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
206    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
207
208    // SSE has no load+extend ops
209    setOperationAction(ISD::EXTLOAD,  MVT::f32, Expand);
210    setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand);
211
212    // Use ANDPD to simulate FABS.
213    setOperationAction(ISD::FABS , MVT::f64, Custom);
214    setOperationAction(ISD::FABS , MVT::f32, Custom);
215
216    // Use XORP to simulate FNEG.
217    setOperationAction(ISD::FNEG , MVT::f64, Custom);
218    setOperationAction(ISD::FNEG , MVT::f32, Custom);
219
220    // We don't support sin/cos/fmod
221    setOperationAction(ISD::FSIN , MVT::f64, Expand);
222    setOperationAction(ISD::FCOS , MVT::f64, Expand);
223    setOperationAction(ISD::FREM , MVT::f64, Expand);
224    setOperationAction(ISD::FSIN , MVT::f32, Expand);
225    setOperationAction(ISD::FCOS , MVT::f32, Expand);
226    setOperationAction(ISD::FREM , MVT::f32, Expand);
227
228    // Expand FP immediates into loads from the stack, except for the special
229    // cases we handle.
230    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
231    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
232    addLegalFPImmediate(+0.0); // xorps / xorpd
233  } else {
234    // Set up the FP register classes.
235    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
236
237    setOperationAction(ISD::UNDEF, MVT::f64, Expand);
238
239    if (!UnsafeFPMath) {
240      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
241      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
242    }
243
244    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
245    addLegalFPImmediate(+0.0); // FLD0
246    addLegalFPImmediate(+1.0); // FLD1
247    addLegalFPImmediate(-0.0); // FLD0/FCHS
248    addLegalFPImmediate(-1.0); // FLD1/FCHS
249  }
250
251  // First set operation action for all vector types to expand. Then we
252  // will selectively turn on ones that can be effectively codegen'd.
253  for (unsigned VT = (unsigned)MVT::Vector + 1;
254       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
255    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
256    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
257    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
258    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
259    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
260    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
261    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
262  }
263
264  if (Subtarget->hasMMX()) {
265    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
266    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
267    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
268
269    // FIXME: add MMX packed arithmetics
270    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
271    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
272    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
273  }
274
275  if (Subtarget->hasSSE1()) {
276    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
277
278    setOperationAction(ISD::AND,                MVT::v4f32, Legal);
279    setOperationAction(ISD::OR,                 MVT::v4f32, Legal);
280    setOperationAction(ISD::XOR,                MVT::v4f32, Legal);
281    setOperationAction(ISD::ADD,                MVT::v4f32, Legal);
282    setOperationAction(ISD::SUB,                MVT::v4f32, Legal);
283    setOperationAction(ISD::MUL,                MVT::v4f32, Legal);
284    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
285    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
286    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
287    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
288    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
289  }
290
291  if (Subtarget->hasSSE2()) {
292    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
293    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
294    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
295    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
296    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
297
298    setOperationAction(ISD::ADD,                MVT::v2f64, Legal);
299    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
300    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
301    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
302    setOperationAction(ISD::SUB,                MVT::v2f64, Legal);
303    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
304    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
305    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
306    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
307    setOperationAction(ISD::MUL,                MVT::v2f64, Legal);
308
309    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
310    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
311    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
312
313    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
314    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
315      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
316      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
317      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
318    }
319    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
320    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
321    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
322    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
323    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
324    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
325
326    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
327    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
328      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
329      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
330      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
331      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
332      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
333      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
334      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
335      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
336      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
337      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
338    }
339
340    // Custom lower v2i64 and v2f64 selects.
341    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
342    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
343    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
344    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
345  }
346
347  // We want to custom lower some of our intrinsics.
348  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
349
350  computeRegisterProperties();
351
352  // FIXME: These should be based on subtarget info. Plus, the values should
353  // be smaller when we are in optimizing for size mode.
354  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
355  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
356  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
357  allowUnalignedMemoryAccesses = true; // x86 supports it!
358}
359
360std::vector<SDOperand>
361X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
362  if (F.getCallingConv() == CallingConv::Fast && EnableFastCC)
363    return LowerFastCCArguments(F, DAG);
364  return LowerCCCArguments(F, DAG);
365}
366
367std::pair<SDOperand, SDOperand>
368X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
369                               bool isVarArg, unsigned CallingConv,
370                               bool isTailCall,
371                               SDOperand Callee, ArgListTy &Args,
372                               SelectionDAG &DAG) {
373  assert((!isVarArg || CallingConv == CallingConv::C) &&
374         "Only C takes varargs!");
375
376  // If the callee is a GlobalAddress node (quite common, every direct call is)
377  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
378  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
379    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
380  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
381    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
382
383  if (CallingConv == CallingConv::Fast && EnableFastCC)
384    return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG);
385  return  LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG);
386}
387
388//===----------------------------------------------------------------------===//
389//                    C Calling Convention implementation
390//===----------------------------------------------------------------------===//
391
392std::vector<SDOperand>
393X86TargetLowering::LowerCCCArguments(Function &F, SelectionDAG &DAG) {
394  std::vector<SDOperand> ArgValues;
395
396  MachineFunction &MF = DAG.getMachineFunction();
397  MachineFrameInfo *MFI = MF.getFrameInfo();
398
399  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
400  // the stack frame looks like this:
401  //
402  // [ESP] -- return address
403  // [ESP + 4] -- first argument (leftmost lexically)
404  // [ESP + 8] -- second argument, if first argument is four bytes in size
405  //    ...
406  //
407  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
408  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
409    MVT::ValueType ObjectVT = getValueType(I->getType());
410    unsigned ArgIncrement = 4;
411    unsigned ObjSize;
412    switch (ObjectVT) {
413    default: assert(0 && "Unhandled argument type!");
414    case MVT::i1:
415    case MVT::i8:  ObjSize = 1;                break;
416    case MVT::i16: ObjSize = 2;                break;
417    case MVT::i32: ObjSize = 4;                break;
418    case MVT::i64: ObjSize = ArgIncrement = 8; break;
419    case MVT::f32: ObjSize = 4;                break;
420    case MVT::f64: ObjSize = ArgIncrement = 8; break;
421    }
422    // Create the frame index object for this incoming parameter...
423    int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
424
425    // Create the SelectionDAG nodes corresponding to a load from this parameter
426    SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
427
428    // Don't codegen dead arguments.  FIXME: remove this check when we can nuke
429    // dead loads.
430    SDOperand ArgValue;
431    if (!I->use_empty())
432      ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
433                             DAG.getSrcValue(NULL));
434    else {
435      if (MVT::isInteger(ObjectVT))
436        ArgValue = DAG.getConstant(0, ObjectVT);
437      else
438        ArgValue = DAG.getConstantFP(0, ObjectVT);
439    }
440    ArgValues.push_back(ArgValue);
441
442    ArgOffset += ArgIncrement;   // Move on to the next argument...
443  }
444
445  // If the function takes variable number of arguments, make a frame index for
446  // the start of the first vararg value... for expansion of llvm.va_start.
447  if (F.isVarArg())
448    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
449  ReturnAddrIndex = 0;     // No return address slot generated yet.
450  BytesToPopOnReturn = 0;  // Callee pops nothing.
451  BytesCallerReserves = ArgOffset;
452  return ArgValues;
453}
454
455std::pair<SDOperand, SDOperand>
456X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy,
457                                  bool isVarArg, bool isTailCall,
458                                  SDOperand Callee, ArgListTy &Args,
459                                  SelectionDAG &DAG) {
460  // Count how many bytes are to be pushed on the stack.
461  unsigned NumBytes = 0;
462
463  if (Args.empty()) {
464    // Save zero bytes.
465    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy()));
466  } else {
467    for (unsigned i = 0, e = Args.size(); i != e; ++i)
468      switch (getValueType(Args[i].second)) {
469      default: assert(0 && "Unknown value type!");
470      case MVT::i1:
471      case MVT::i8:
472      case MVT::i16:
473      case MVT::i32:
474      case MVT::f32:
475        NumBytes += 4;
476        break;
477      case MVT::i64:
478      case MVT::f64:
479        NumBytes += 8;
480        break;
481      }
482
483    Chain = DAG.getCALLSEQ_START(Chain,
484                                 DAG.getConstant(NumBytes, getPointerTy()));
485
486    // Arguments go on the stack in reverse order, as specified by the ABI.
487    unsigned ArgOffset = 0;
488    SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
489    std::vector<SDOperand> Stores;
490
491    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
492      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
493      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
494
495      switch (getValueType(Args[i].second)) {
496      default: assert(0 && "Unexpected ValueType for argument!");
497      case MVT::i1:
498      case MVT::i8:
499      case MVT::i16:
500        // Promote the integer to 32 bits.  If the input type is signed use a
501        // sign extend, otherwise use a zero extend.
502        if (Args[i].second->isSigned())
503          Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
504        else
505          Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
506
507        // FALL THROUGH
508      case MVT::i32:
509      case MVT::f32:
510        Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
511                                     Args[i].first, PtrOff,
512                                     DAG.getSrcValue(NULL)));
513        ArgOffset += 4;
514        break;
515      case MVT::i64:
516      case MVT::f64:
517        Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
518                                     Args[i].first, PtrOff,
519                                     DAG.getSrcValue(NULL)));
520        ArgOffset += 8;
521        break;
522      }
523    }
524    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
525  }
526
527  std::vector<MVT::ValueType> RetVals;
528  MVT::ValueType RetTyVT = getValueType(RetTy);
529  RetVals.push_back(MVT::Other);
530
531  // The result values produced have to be legal.  Promote the result.
532  switch (RetTyVT) {
533  case MVT::isVoid: break;
534  default:
535    RetVals.push_back(RetTyVT);
536    break;
537  case MVT::i1:
538  case MVT::i8:
539  case MVT::i16:
540    RetVals.push_back(MVT::i32);
541    break;
542  case MVT::f32:
543    if (X86ScalarSSE)
544      RetVals.push_back(MVT::f32);
545    else
546      RetVals.push_back(MVT::f64);
547    break;
548  case MVT::i64:
549    RetVals.push_back(MVT::i32);
550    RetVals.push_back(MVT::i32);
551    break;
552  }
553
554  std::vector<MVT::ValueType> NodeTys;
555  NodeTys.push_back(MVT::Other);   // Returns a chain
556  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
557  std::vector<SDOperand> Ops;
558  Ops.push_back(Chain);
559  Ops.push_back(Callee);
560
561  // FIXME: Do not generate X86ISD::TAILCALL for now.
562  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops);
563  SDOperand InFlag = Chain.getValue(1);
564
565  NodeTys.clear();
566  NodeTys.push_back(MVT::Other);   // Returns a chain
567  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
568  Ops.clear();
569  Ops.push_back(Chain);
570  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
571  Ops.push_back(DAG.getConstant(0, getPointerTy()));
572  Ops.push_back(InFlag);
573  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
574  InFlag = Chain.getValue(1);
575
576  SDOperand RetVal;
577  if (RetTyVT != MVT::isVoid) {
578    switch (RetTyVT) {
579    default: assert(0 && "Unknown value type to return!");
580    case MVT::i1:
581    case MVT::i8:
582      RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
583      Chain = RetVal.getValue(1);
584      if (RetTyVT == MVT::i1)
585        RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
586      break;
587    case MVT::i16:
588      RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
589      Chain = RetVal.getValue(1);
590      break;
591    case MVT::i32:
592      RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
593      Chain = RetVal.getValue(1);
594      break;
595    case MVT::i64: {
596      SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
597      SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32,
598                                        Lo.getValue(2));
599      RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
600      Chain = Hi.getValue(1);
601      break;
602    }
603    case MVT::f32:
604    case MVT::f64: {
605      std::vector<MVT::ValueType> Tys;
606      Tys.push_back(MVT::f64);
607      Tys.push_back(MVT::Other);
608      Tys.push_back(MVT::Flag);
609      std::vector<SDOperand> Ops;
610      Ops.push_back(Chain);
611      Ops.push_back(InFlag);
612      RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
613      Chain  = RetVal.getValue(1);
614      InFlag = RetVal.getValue(2);
615      if (X86ScalarSSE) {
616        // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
617        // shouldn't be necessary except that RFP cannot be live across
618        // multiple blocks. When stackifier is fixed, they can be uncoupled.
619        MachineFunction &MF = DAG.getMachineFunction();
620        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
621        SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
622        Tys.clear();
623        Tys.push_back(MVT::Other);
624        Ops.clear();
625        Ops.push_back(Chain);
626        Ops.push_back(RetVal);
627        Ops.push_back(StackSlot);
628        Ops.push_back(DAG.getValueType(RetTyVT));
629        Ops.push_back(InFlag);
630        Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
631        RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
632                             DAG.getSrcValue(NULL));
633        Chain = RetVal.getValue(1);
634      }
635
636      if (RetTyVT == MVT::f32 && !X86ScalarSSE)
637        // FIXME: we would really like to remember that this FP_ROUND
638        // operation is okay to eliminate if we allow excess FP precision.
639        RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
640      break;
641    }
642    }
643  }
644
645  return std::make_pair(RetVal, Chain);
646}
647
648//===----------------------------------------------------------------------===//
649//                    Fast Calling Convention implementation
650//===----------------------------------------------------------------------===//
651//
652// The X86 'fast' calling convention passes up to two integer arguments in
653// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
654// and requires that the callee pop its arguments off the stack (allowing proper
655// tail calls), and has the same return value conventions as C calling convs.
656//
657// This calling convention always arranges for the callee pop value to be 8n+4
658// bytes, which is needed for tail recursion elimination and stack alignment
659// reasons.
660//
661// Note that this can be enhanced in the future to pass fp vals in registers
662// (when we have a global fp allocator) and do other tricks.
663//
664
665/// AddLiveIn - This helper function adds the specified physical register to the
666/// MachineFunction as a live in value.  It also creates a corresponding virtual
667/// register for it.
668static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
669                          TargetRegisterClass *RC) {
670  assert(RC->contains(PReg) && "Not the correct regclass!");
671  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
672  MF.addLiveIn(PReg, VReg);
673  return VReg;
674}
675
676// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments
677// to pass in registers.  0 is none, 1 is is "use EAX", 2 is "use EAX and
678// EDX".  Anything more is illegal.
679//
680// FIXME: The linscan register allocator currently has problem with
681// coalescing.  At the time of this writing, whenever it decides to coalesce
682// a physreg with a virtreg, this increases the size of the physreg's live
683// range, and the live range cannot ever be reduced.  This causes problems if
684// too many physregs are coaleced with virtregs, which can cause the register
685// allocator to wedge itself.
686//
687// This code triggers this problem more often if we pass args in registers,
688// so disable it until this is fixed.
689//
690// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings
691// about code being dead.
692//
693static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0;
694
695
696std::vector<SDOperand>
697X86TargetLowering::LowerFastCCArguments(Function &F, SelectionDAG &DAG) {
698  std::vector<SDOperand> ArgValues;
699
700  MachineFunction &MF = DAG.getMachineFunction();
701  MachineFrameInfo *MFI = MF.getFrameInfo();
702
703  // Add DAG nodes to load the arguments...  On entry to a function the stack
704  // frame looks like this:
705  //
706  // [ESP] -- return address
707  // [ESP + 4] -- first nonreg argument (leftmost lexically)
708  // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size
709  //    ...
710  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
711
712  // Keep track of the number of integer regs passed so far.  This can be either
713  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
714  // used).
715  unsigned NumIntRegs = 0;
716
717  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
718    MVT::ValueType ObjectVT = getValueType(I->getType());
719    unsigned ArgIncrement = 4;
720    unsigned ObjSize = 0;
721    SDOperand ArgValue;
722
723    switch (ObjectVT) {
724    default: assert(0 && "Unhandled argument type!");
725    case MVT::i1:
726    case MVT::i8:
727      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
728        if (!I->use_empty()) {
729          unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
730                                    X86::R8RegisterClass);
731          ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i8);
732          DAG.setRoot(ArgValue.getValue(1));
733          if (ObjectVT == MVT::i1)
734            // FIXME: Should insert a assertzext here.
735            ArgValue = DAG.getNode(ISD::TRUNCATE, MVT::i1, ArgValue);
736        }
737        ++NumIntRegs;
738        break;
739      }
740
741      ObjSize = 1;
742      break;
743    case MVT::i16:
744      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
745        if (!I->use_empty()) {
746          unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
747                                    X86::R16RegisterClass);
748          ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i16);
749          DAG.setRoot(ArgValue.getValue(1));
750        }
751        ++NumIntRegs;
752        break;
753      }
754      ObjSize = 2;
755      break;
756    case MVT::i32:
757      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
758        if (!I->use_empty()) {
759          unsigned VReg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
760                                    X86::R32RegisterClass);
761          ArgValue = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32);
762          DAG.setRoot(ArgValue.getValue(1));
763        }
764        ++NumIntRegs;
765        break;
766      }
767      ObjSize = 4;
768      break;
769    case MVT::i64:
770      if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
771        if (!I->use_empty()) {
772          unsigned BotReg = AddLiveIn(MF, X86::EAX, X86::R32RegisterClass);
773          unsigned TopReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
774
775          SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32);
776          SDOperand Hi  = DAG.getCopyFromReg(Low.getValue(1), TopReg, MVT::i32);
777          DAG.setRoot(Hi.getValue(1));
778
779          ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi);
780        }
781        NumIntRegs += 2;
782        break;
783      } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
784        if (!I->use_empty()) {
785          unsigned BotReg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
786          SDOperand Low = DAG.getCopyFromReg(DAG.getRoot(), BotReg, MVT::i32);
787          DAG.setRoot(Low.getValue(1));
788
789          // Load the high part from memory.
790          // Create the frame index object for this incoming parameter...
791          int FI = MFI->CreateFixedObject(4, ArgOffset);
792          SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
793          SDOperand Hi = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN,
794                                     DAG.getSrcValue(NULL));
795          ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Low, Hi);
796        }
797        ArgOffset += 4;
798        NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS;
799        break;
800      }
801      ObjSize = ArgIncrement = 8;
802      break;
803    case MVT::f32: ObjSize = 4;                break;
804    case MVT::f64: ObjSize = ArgIncrement = 8; break;
805    }
806
807    // Don't codegen dead arguments.  FIXME: remove this check when we can nuke
808    // dead loads.
809    if (ObjSize && !I->use_empty()) {
810      // Create the frame index object for this incoming parameter...
811      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
812
813      // Create the SelectionDAG nodes corresponding to a load from this
814      // parameter.
815      SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
816
817      ArgValue = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN,
818                             DAG.getSrcValue(NULL));
819    } else if (ArgValue.Val == 0) {
820      if (MVT::isInteger(ObjectVT))
821        ArgValue = DAG.getConstant(0, ObjectVT);
822      else
823        ArgValue = DAG.getConstantFP(0, ObjectVT);
824    }
825    ArgValues.push_back(ArgValue);
826
827    if (ObjSize)
828      ArgOffset += ArgIncrement;   // Move on to the next argument.
829  }
830
831  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
832  // arguments and the arguments after the retaddr has been pushed are aligned.
833  if ((ArgOffset & 7) == 0)
834    ArgOffset += 4;
835
836  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
837  ReturnAddrIndex = 0;             // No return address slot generated yet.
838  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
839  BytesCallerReserves = 0;
840
841  // Finally, inform the code generator which regs we return values in.
842  switch (getValueType(F.getReturnType())) {
843  default: assert(0 && "Unknown type!");
844  case MVT::isVoid: break;
845  case MVT::i1:
846  case MVT::i8:
847  case MVT::i16:
848  case MVT::i32:
849    MF.addLiveOut(X86::EAX);
850    break;
851  case MVT::i64:
852    MF.addLiveOut(X86::EAX);
853    MF.addLiveOut(X86::EDX);
854    break;
855  case MVT::f32:
856  case MVT::f64:
857    MF.addLiveOut(X86::ST0);
858    break;
859  }
860  return ArgValues;
861}
862
863std::pair<SDOperand, SDOperand>
864X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy,
865                                     bool isTailCall, SDOperand Callee,
866                                     ArgListTy &Args, SelectionDAG &DAG) {
867  // Count how many bytes are to be pushed on the stack.
868  unsigned NumBytes = 0;
869
870  // Keep track of the number of integer regs passed so far.  This can be either
871  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
872  // used).
873  unsigned NumIntRegs = 0;
874
875  for (unsigned i = 0, e = Args.size(); i != e; ++i)
876    switch (getValueType(Args[i].second)) {
877    default: assert(0 && "Unknown value type!");
878    case MVT::i1:
879    case MVT::i8:
880    case MVT::i16:
881    case MVT::i32:
882      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
883        ++NumIntRegs;
884        break;
885      }
886      // fall through
887    case MVT::f32:
888      NumBytes += 4;
889      break;
890    case MVT::i64:
891      if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
892        NumIntRegs += 2;
893        break;
894      } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
895        NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS;
896        NumBytes += 4;
897        break;
898      }
899
900      // fall through
901    case MVT::f64:
902      NumBytes += 8;
903      break;
904    }
905
906  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
907  // arguments and the arguments after the retaddr has been pushed are aligned.
908  if ((NumBytes & 7) == 0)
909    NumBytes += 4;
910
911  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
912
913  // Arguments go on the stack in reverse order, as specified by the ABI.
914  unsigned ArgOffset = 0;
915  SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
916  NumIntRegs = 0;
917  std::vector<SDOperand> Stores;
918  std::vector<SDOperand> RegValuesToPass;
919  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
920    switch (getValueType(Args[i].second)) {
921    default: assert(0 && "Unexpected ValueType for argument!");
922    case MVT::i1:
923      Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first);
924      // Fall through.
925    case MVT::i8:
926    case MVT::i16:
927    case MVT::i32:
928      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
929        RegValuesToPass.push_back(Args[i].first);
930        ++NumIntRegs;
931        break;
932      }
933      // Fall through
934    case MVT::f32: {
935      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
936      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
937      Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
938                                   Args[i].first, PtrOff,
939                                   DAG.getSrcValue(NULL)));
940      ArgOffset += 4;
941      break;
942    }
943    case MVT::i64:
944       // Can pass (at least) part of it in regs?
945      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
946        SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
947                                   Args[i].first, DAG.getConstant(1, MVT::i32));
948        SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
949                                   Args[i].first, DAG.getConstant(0, MVT::i32));
950        RegValuesToPass.push_back(Lo);
951        ++NumIntRegs;
952
953        // Pass both parts in regs?
954        if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
955          RegValuesToPass.push_back(Hi);
956          ++NumIntRegs;
957        } else {
958          // Pass the high part in memory.
959          SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
960          PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
961          Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
962                                       Hi, PtrOff, DAG.getSrcValue(NULL)));
963          ArgOffset += 4;
964        }
965        break;
966      }
967      // Fall through
968    case MVT::f64:
969      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
970      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
971      Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
972                                   Args[i].first, PtrOff,
973                                   DAG.getSrcValue(NULL)));
974      ArgOffset += 8;
975      break;
976    }
977  }
978  if (!Stores.empty())
979    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
980
981  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
982  // arguments and the arguments after the retaddr has been pushed are aligned.
983  if ((ArgOffset & 7) == 0)
984    ArgOffset += 4;
985
986  std::vector<MVT::ValueType> RetVals;
987  MVT::ValueType RetTyVT = getValueType(RetTy);
988
989  RetVals.push_back(MVT::Other);
990
991  // The result values produced have to be legal.  Promote the result.
992  switch (RetTyVT) {
993  case MVT::isVoid: break;
994  default:
995    RetVals.push_back(RetTyVT);
996    break;
997  case MVT::i1:
998  case MVT::i8:
999  case MVT::i16:
1000    RetVals.push_back(MVT::i32);
1001    break;
1002  case MVT::f32:
1003    if (X86ScalarSSE)
1004      RetVals.push_back(MVT::f32);
1005    else
1006      RetVals.push_back(MVT::f64);
1007    break;
1008  case MVT::i64:
1009    RetVals.push_back(MVT::i32);
1010    RetVals.push_back(MVT::i32);
1011    break;
1012  }
1013
1014  // Build a sequence of copy-to-reg nodes chained together with token chain
1015  // and flag operands which copy the outgoing args into registers.
1016  SDOperand InFlag;
1017  for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
1018    unsigned CCReg;
1019    SDOperand RegToPass = RegValuesToPass[i];
1020    switch (RegToPass.getValueType()) {
1021    default: assert(0 && "Bad thing to pass in regs");
1022    case MVT::i8:
1023      CCReg = (i == 0) ? X86::AL  : X86::DL;
1024      break;
1025    case MVT::i16:
1026      CCReg = (i == 0) ? X86::AX  : X86::DX;
1027      break;
1028    case MVT::i32:
1029      CCReg = (i == 0) ? X86::EAX : X86::EDX;
1030      break;
1031    }
1032
1033    Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag);
1034    InFlag = Chain.getValue(1);
1035  }
1036
1037  std::vector<MVT::ValueType> NodeTys;
1038  NodeTys.push_back(MVT::Other);   // Returns a chain
1039  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1040  std::vector<SDOperand> Ops;
1041  Ops.push_back(Chain);
1042  Ops.push_back(Callee);
1043  if (InFlag.Val)
1044    Ops.push_back(InFlag);
1045
1046  // FIXME: Do not generate X86ISD::TAILCALL for now.
1047  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops);
1048  InFlag = Chain.getValue(1);
1049
1050  NodeTys.clear();
1051  NodeTys.push_back(MVT::Other);   // Returns a chain
1052  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1053  Ops.clear();
1054  Ops.push_back(Chain);
1055  Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
1056  Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
1057  Ops.push_back(InFlag);
1058  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
1059  InFlag = Chain.getValue(1);
1060
1061  SDOperand RetVal;
1062  if (RetTyVT != MVT::isVoid) {
1063    switch (RetTyVT) {
1064    default: assert(0 && "Unknown value type to return!");
1065    case MVT::i1:
1066    case MVT::i8:
1067      RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
1068      Chain = RetVal.getValue(1);
1069      if (RetTyVT == MVT::i1)
1070        RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
1071      break;
1072    case MVT::i16:
1073      RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
1074      Chain = RetVal.getValue(1);
1075      break;
1076    case MVT::i32:
1077      RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
1078      Chain = RetVal.getValue(1);
1079      break;
1080    case MVT::i64: {
1081      SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
1082      SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32,
1083                                        Lo.getValue(2));
1084      RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
1085      Chain = Hi.getValue(1);
1086      break;
1087    }
1088    case MVT::f32:
1089    case MVT::f64: {
1090      std::vector<MVT::ValueType> Tys;
1091      Tys.push_back(MVT::f64);
1092      Tys.push_back(MVT::Other);
1093      Tys.push_back(MVT::Flag);
1094      std::vector<SDOperand> Ops;
1095      Ops.push_back(Chain);
1096      Ops.push_back(InFlag);
1097      RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
1098      Chain  = RetVal.getValue(1);
1099      InFlag = RetVal.getValue(2);
1100      if (X86ScalarSSE) {
1101        // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
1102        // shouldn't be necessary except that RFP cannot be live across
1103        // multiple blocks. When stackifier is fixed, they can be uncoupled.
1104        MachineFunction &MF = DAG.getMachineFunction();
1105        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
1106        SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
1107        Tys.clear();
1108        Tys.push_back(MVT::Other);
1109        Ops.clear();
1110        Ops.push_back(Chain);
1111        Ops.push_back(RetVal);
1112        Ops.push_back(StackSlot);
1113        Ops.push_back(DAG.getValueType(RetTyVT));
1114        Ops.push_back(InFlag);
1115        Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
1116        RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
1117                             DAG.getSrcValue(NULL));
1118        Chain = RetVal.getValue(1);
1119      }
1120
1121      if (RetTyVT == MVT::f32 && !X86ScalarSSE)
1122        // FIXME: we would really like to remember that this FP_ROUND
1123        // operation is okay to eliminate if we allow excess FP precision.
1124        RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
1125      break;
1126    }
1127    }
1128  }
1129
1130  return std::make_pair(RetVal, Chain);
1131}
1132
1133SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
1134  if (ReturnAddrIndex == 0) {
1135    // Set up a frame object for the return address.
1136    MachineFunction &MF = DAG.getMachineFunction();
1137    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1138  }
1139
1140  return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
1141}
1142
1143
1144
1145std::pair<SDOperand, SDOperand> X86TargetLowering::
1146LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
1147                        SelectionDAG &DAG) {
1148  SDOperand Result;
1149  if (Depth)        // Depths > 0 not supported yet!
1150    Result = DAG.getConstant(0, getPointerTy());
1151  else {
1152    SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
1153    if (!isFrameAddress)
1154      // Just load the return address
1155      Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
1156                           DAG.getSrcValue(NULL));
1157    else
1158      Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
1159                           DAG.getConstant(4, MVT::i32));
1160  }
1161  return std::make_pair(Result, Chain);
1162}
1163
1164/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode
1165/// which corresponds to the condition code.
1166static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) {
1167  switch (X86CC) {
1168  default: assert(0 && "Unknown X86 conditional code!");
1169  case X86ISD::COND_A:  return X86::JA;
1170  case X86ISD::COND_AE: return X86::JAE;
1171  case X86ISD::COND_B:  return X86::JB;
1172  case X86ISD::COND_BE: return X86::JBE;
1173  case X86ISD::COND_E:  return X86::JE;
1174  case X86ISD::COND_G:  return X86::JG;
1175  case X86ISD::COND_GE: return X86::JGE;
1176  case X86ISD::COND_L:  return X86::JL;
1177  case X86ISD::COND_LE: return X86::JLE;
1178  case X86ISD::COND_NE: return X86::JNE;
1179  case X86ISD::COND_NO: return X86::JNO;
1180  case X86ISD::COND_NP: return X86::JNP;
1181  case X86ISD::COND_NS: return X86::JNS;
1182  case X86ISD::COND_O:  return X86::JO;
1183  case X86ISD::COND_P:  return X86::JP;
1184  case X86ISD::COND_S:  return X86::JS;
1185  }
1186}
1187
1188/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1189/// specific condition code. It returns a false if it cannot do a direct
1190/// translation. X86CC is the translated CondCode. Flip is set to true if the
1191/// the order of comparison operands should be flipped.
1192static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
1193                           unsigned &X86CC, bool &Flip) {
1194  Flip = false;
1195  X86CC = X86ISD::COND_INVALID;
1196  if (!isFP) {
1197    switch (SetCCOpcode) {
1198    default: break;
1199    case ISD::SETEQ:  X86CC = X86ISD::COND_E;  break;
1200    case ISD::SETGT:  X86CC = X86ISD::COND_G;  break;
1201    case ISD::SETGE:  X86CC = X86ISD::COND_GE; break;
1202    case ISD::SETLT:  X86CC = X86ISD::COND_L;  break;
1203    case ISD::SETLE:  X86CC = X86ISD::COND_LE; break;
1204    case ISD::SETNE:  X86CC = X86ISD::COND_NE; break;
1205    case ISD::SETULT: X86CC = X86ISD::COND_B;  break;
1206    case ISD::SETUGT: X86CC = X86ISD::COND_A;  break;
1207    case ISD::SETULE: X86CC = X86ISD::COND_BE; break;
1208    case ISD::SETUGE: X86CC = X86ISD::COND_AE; break;
1209    }
1210  } else {
1211    // On a floating point condition, the flags are set as follows:
1212    // ZF  PF  CF   op
1213    //  0 | 0 | 0 | X > Y
1214    //  0 | 0 | 1 | X < Y
1215    //  1 | 0 | 0 | X == Y
1216    //  1 | 1 | 1 | unordered
1217    switch (SetCCOpcode) {
1218    default: break;
1219    case ISD::SETUEQ:
1220    case ISD::SETEQ: X86CC = X86ISD::COND_E;  break;
1221    case ISD::SETOLT: Flip = true; // Fallthrough
1222    case ISD::SETOGT:
1223    case ISD::SETGT: X86CC = X86ISD::COND_A;  break;
1224    case ISD::SETOLE: Flip = true; // Fallthrough
1225    case ISD::SETOGE:
1226    case ISD::SETGE: X86CC = X86ISD::COND_AE; break;
1227    case ISD::SETUGT: Flip = true; // Fallthrough
1228    case ISD::SETULT:
1229    case ISD::SETLT: X86CC = X86ISD::COND_B;  break;
1230    case ISD::SETUGE: Flip = true; // Fallthrough
1231    case ISD::SETULE:
1232    case ISD::SETLE: X86CC = X86ISD::COND_BE; break;
1233    case ISD::SETONE:
1234    case ISD::SETNE: X86CC = X86ISD::COND_NE; break;
1235    case ISD::SETUO: X86CC = X86ISD::COND_P;  break;
1236    case ISD::SETO:  X86CC = X86ISD::COND_NP; break;
1237    }
1238  }
1239
1240  return X86CC != X86ISD::COND_INVALID;
1241}
1242
1243static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC,
1244                           bool &Flip) {
1245  return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip);
1246}
1247
1248/// hasFPCMov - is there a floating point cmov for the specific X86 condition
1249/// code. Current x86 isa includes the following FP cmov instructions:
1250/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1251static bool hasFPCMov(unsigned X86CC) {
1252  switch (X86CC) {
1253  default:
1254    return false;
1255  case X86ISD::COND_B:
1256  case X86ISD::COND_BE:
1257  case X86ISD::COND_E:
1258  case X86ISD::COND_P:
1259  case X86ISD::COND_A:
1260  case X86ISD::COND_AE:
1261  case X86ISD::COND_NE:
1262  case X86ISD::COND_NP:
1263    return true;
1264  }
1265}
1266
1267MachineBasicBlock *
1268X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1269                                           MachineBasicBlock *BB) {
1270  switch (MI->getOpcode()) {
1271  default: assert(false && "Unexpected instr type to insert");
1272  case X86::CMOV_FR32:
1273  case X86::CMOV_FR64:
1274  case X86::CMOV_V4F32:
1275  case X86::CMOV_V2F64:
1276  case X86::CMOV_V2I64: {
1277    // To "insert" a SELECT_CC instruction, we actually have to insert the
1278    // diamond control-flow pattern.  The incoming instruction knows the
1279    // destination vreg to set, the condition code register to branch on, the
1280    // true/false values to select between, and a branch opcode to use.
1281    const BasicBlock *LLVM_BB = BB->getBasicBlock();
1282    ilist<MachineBasicBlock>::iterator It = BB;
1283    ++It;
1284
1285    //  thisMBB:
1286    //  ...
1287    //   TrueVal = ...
1288    //   cmpTY ccX, r1, r2
1289    //   bCC copy1MBB
1290    //   fallthrough --> copy0MBB
1291    MachineBasicBlock *thisMBB = BB;
1292    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
1293    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
1294    unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue());
1295    BuildMI(BB, Opc, 1).addMBB(sinkMBB);
1296    MachineFunction *F = BB->getParent();
1297    F->getBasicBlockList().insert(It, copy0MBB);
1298    F->getBasicBlockList().insert(It, sinkMBB);
1299    // Update machine-CFG edges by first adding all successors of the current
1300    // block to the new block which will contain the Phi node for the select.
1301    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
1302        e = BB->succ_end(); i != e; ++i)
1303      sinkMBB->addSuccessor(*i);
1304    // Next, remove all successors of the current block, and add the true
1305    // and fallthrough blocks as its successors.
1306    while(!BB->succ_empty())
1307      BB->removeSuccessor(BB->succ_begin());
1308    BB->addSuccessor(copy0MBB);
1309    BB->addSuccessor(sinkMBB);
1310
1311    //  copy0MBB:
1312    //   %FalseValue = ...
1313    //   # fallthrough to sinkMBB
1314    BB = copy0MBB;
1315
1316    // Update machine-CFG edges
1317    BB->addSuccessor(sinkMBB);
1318
1319    //  sinkMBB:
1320    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
1321    //  ...
1322    BB = sinkMBB;
1323    BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg())
1324      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
1325      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
1326
1327    delete MI;   // The pseudo instruction is gone now.
1328    return BB;
1329  }
1330
1331  case X86::FP_TO_INT16_IN_MEM:
1332  case X86::FP_TO_INT32_IN_MEM:
1333  case X86::FP_TO_INT64_IN_MEM: {
1334    // Change the floating point control register to use "round towards zero"
1335    // mode when truncating to an integer value.
1336    MachineFunction *F = BB->getParent();
1337    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
1338    addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
1339
1340    // Load the old value of the high byte of the control word...
1341    unsigned OldCW =
1342      F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass);
1343    addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx);
1344
1345    // Set the high part to be round to zero...
1346    addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F);
1347
1348    // Reload the modified control word now...
1349    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1350
1351    // Restore the memory image of control word to original value
1352    addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW);
1353
1354    // Get the X86 opcode to use.
1355    unsigned Opc;
1356    switch (MI->getOpcode()) {
1357    default: assert(0 && "illegal opcode!");
1358    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
1359    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
1360    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
1361    }
1362
1363    X86AddressMode AM;
1364    MachineOperand &Op = MI->getOperand(0);
1365    if (Op.isRegister()) {
1366      AM.BaseType = X86AddressMode::RegBase;
1367      AM.Base.Reg = Op.getReg();
1368    } else {
1369      AM.BaseType = X86AddressMode::FrameIndexBase;
1370      AM.Base.FrameIndex = Op.getFrameIndex();
1371    }
1372    Op = MI->getOperand(1);
1373    if (Op.isImmediate())
1374      AM.Scale = Op.getImmedValue();
1375    Op = MI->getOperand(2);
1376    if (Op.isImmediate())
1377      AM.IndexReg = Op.getImmedValue();
1378    Op = MI->getOperand(3);
1379    if (Op.isGlobalAddress()) {
1380      AM.GV = Op.getGlobal();
1381    } else {
1382      AM.Disp = Op.getImmedValue();
1383    }
1384    addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg());
1385
1386    // Reload the original control word now.
1387    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1388
1389    delete MI;   // The pseudo instruction is gone now.
1390    return BB;
1391  }
1392  }
1393}
1394
1395
1396//===----------------------------------------------------------------------===//
1397//                           X86 Custom Lowering Hooks
1398//===----------------------------------------------------------------------===//
1399
1400/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra
1401/// load. For Darwin, external and weak symbols are indirect, loading the value
1402/// at address GV rather then the value of GV itself. This means that the
1403/// GlobalAddress must be in the base or index register of the address, not the
1404/// GV offset field.
1405static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) {
1406  return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
1407          (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
1408}
1409
1410/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
1411/// true if Op is undef or if its value falls within the specified range (L, H].
1412static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
1413  if (Op.getOpcode() == ISD::UNDEF)
1414    return true;
1415
1416  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
1417  return (Val >= Low && Val < Hi);
1418}
1419
1420/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
1421/// true if Op is undef or if its value equal to the specified value.
1422static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
1423  if (Op.getOpcode() == ISD::UNDEF)
1424    return true;
1425  return cast<ConstantSDNode>(Op)->getValue() == Val;
1426}
1427
1428/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1429/// specifies a shuffle of elements that is suitable for input to PSHUFD.
1430bool X86::isPSHUFDMask(SDNode *N) {
1431  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1432
1433  if (N->getNumOperands() != 4)
1434    return false;
1435
1436  // Check if the value doesn't reference the second vector.
1437  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1438    SDOperand Arg = N->getOperand(i);
1439    if (Arg.getOpcode() == ISD::UNDEF) continue;
1440    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1441    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
1442      return false;
1443  }
1444
1445  return true;
1446}
1447
1448/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
1449/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
1450bool X86::isPSHUFHWMask(SDNode *N) {
1451  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1452
1453  if (N->getNumOperands() != 8)
1454    return false;
1455
1456  // Lower quadword copied in order.
1457  for (unsigned i = 0; i != 4; ++i) {
1458    SDOperand Arg = N->getOperand(i);
1459    if (Arg.getOpcode() == ISD::UNDEF) continue;
1460    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1461    if (cast<ConstantSDNode>(Arg)->getValue() != i)
1462      return false;
1463  }
1464
1465  // Upper quadword shuffled.
1466  for (unsigned i = 4; i != 8; ++i) {
1467    SDOperand Arg = N->getOperand(i);
1468    if (Arg.getOpcode() == ISD::UNDEF) continue;
1469    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1470    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1471    if (Val < 4 || Val > 7)
1472      return false;
1473  }
1474
1475  return true;
1476}
1477
1478/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
1479/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
1480bool X86::isPSHUFLWMask(SDNode *N) {
1481  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1482
1483  if (N->getNumOperands() != 8)
1484    return false;
1485
1486  // Upper quadword copied in order.
1487  for (unsigned i = 4; i != 8; ++i)
1488    if (!isUndefOrEqual(N->getOperand(i), i))
1489      return false;
1490
1491  // Lower quadword shuffled.
1492  for (unsigned i = 0; i != 4; ++i)
1493    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
1494      return false;
1495
1496  return true;
1497}
1498
1499/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
1500/// specifies a shuffle of elements that is suitable for input to SHUFP*.
1501bool X86::isSHUFPMask(SDNode *N) {
1502  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1503
1504  unsigned NumElems = N->getNumOperands();
1505  if (NumElems == 2) {
1506    // The only cases that ought be handled by SHUFPD is
1507    // Dest { 2, 1 } <=  shuffle( Dest { 1, 0 },  Src { 3, 2 }
1508    // Dest { 3, 0 } <=  shuffle( Dest { 1, 0 },  Src { 3, 2 }
1509    // Expect bit 0 == 1, bit1 == 2
1510    SDOperand Bit0 = N->getOperand(0);
1511    SDOperand Bit1 = N->getOperand(1);
1512    if (isUndefOrEqual(Bit0, 0) && isUndefOrEqual(Bit1, 3))
1513      return true;
1514    if (isUndefOrEqual(Bit0, 1) && isUndefOrEqual(Bit1, 2))
1515      return true;
1516    return false;
1517  }
1518
1519  if (NumElems != 4) return false;
1520
1521  // Each half must refer to only one of the vector.
1522  for (unsigned i = 0; i < 2; ++i) {
1523    SDOperand Arg = N->getOperand(i);
1524    if (Arg.getOpcode() == ISD::UNDEF) continue;
1525    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1526    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1527    if (Val >= 4) return false;
1528  }
1529  for (unsigned i = 2; i < 4; ++i) {
1530    SDOperand Arg = N->getOperand(i);
1531    if (Arg.getOpcode() == ISD::UNDEF) continue;
1532    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1533    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1534    if (Val < 4) return false;
1535  }
1536
1537  return true;
1538}
1539
1540/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
1541/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
1542bool X86::isMOVHLPSMask(SDNode *N) {
1543  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1544
1545  if (N->getNumOperands() != 4)
1546    return false;
1547
1548  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
1549  return isUndefOrEqual(N->getOperand(0), 6) &&
1550         isUndefOrEqual(N->getOperand(1), 7) &&
1551         isUndefOrEqual(N->getOperand(2), 2) &&
1552         isUndefOrEqual(N->getOperand(3), 3);
1553}
1554
1555/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
1556/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
1557bool X86::isMOVLHPSMask(SDNode *N) {
1558  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1559
1560  if (N->getNumOperands() != 4)
1561    return false;
1562
1563  // Expect bit0 == 0, bit1 == 1, bit2 == 4, bit3 == 5
1564  return isUndefOrEqual(N->getOperand(0), 0) &&
1565         isUndefOrEqual(N->getOperand(1), 1) &&
1566         isUndefOrEqual(N->getOperand(2), 4) &&
1567         isUndefOrEqual(N->getOperand(3), 5);
1568}
1569
1570/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
1571/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
1572bool X86::isMOVLPMask(SDNode *N) {
1573  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1574
1575  unsigned NumElems = N->getNumOperands();
1576  if (NumElems != 2 && NumElems != 4)
1577    return false;
1578
1579  for (unsigned i = 0; i < NumElems/2; ++i)
1580    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
1581      return false;
1582
1583  for (unsigned i = NumElems/2; i < NumElems; ++i)
1584    if (!isUndefOrEqual(N->getOperand(i), i))
1585      return false;
1586
1587  return true;
1588}
1589
1590/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
1591/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}.
1592bool X86::isMOVHPMask(SDNode *N) {
1593  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1594
1595  unsigned NumElems = N->getNumOperands();
1596  if (NumElems != 2 && NumElems != 4)
1597    return false;
1598
1599  for (unsigned i = 0; i < NumElems/2; ++i)
1600    if (!isUndefOrEqual(N->getOperand(i), i))
1601      return false;
1602
1603  for (unsigned i = 0; i < NumElems/2; ++i) {
1604    SDOperand Arg = N->getOperand(i + NumElems/2);
1605    if (!isUndefOrEqual(Arg, i + NumElems))
1606      return false;
1607  }
1608
1609  return true;
1610}
1611
1612/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
1613/// specifies a shuffle of elements that is suitable for input to UNPCKL.
1614bool X86::isUNPCKLMask(SDNode *N) {
1615  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1616
1617  unsigned NumElems = N->getNumOperands();
1618  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1619    return false;
1620
1621  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1622    SDOperand BitI  = N->getOperand(i);
1623    SDOperand BitI1 = N->getOperand(i+1);
1624    if (!isUndefOrEqual(BitI, j))
1625      return false;
1626    if (!isUndefOrEqual(BitI1, j + NumElems))
1627      return false;
1628  }
1629
1630  return true;
1631}
1632
1633/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
1634/// specifies a shuffle of elements that is suitable for input to UNPCKH.
1635bool X86::isUNPCKHMask(SDNode *N) {
1636  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1637
1638  unsigned NumElems = N->getNumOperands();
1639  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1640    return false;
1641
1642  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1643    SDOperand BitI  = N->getOperand(i);
1644    SDOperand BitI1 = N->getOperand(i+1);
1645    if (!isUndefOrEqual(BitI, j + NumElems/2))
1646      return false;
1647    if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems))
1648      return false;
1649  }
1650
1651  return true;
1652}
1653
1654/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
1655/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
1656/// <0, 0, 1, 1>
1657bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
1658  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1659
1660  unsigned NumElems = N->getNumOperands();
1661  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
1662    return false;
1663
1664  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1665    SDOperand BitI  = N->getOperand(i);
1666    SDOperand BitI1 = N->getOperand(i+1);
1667
1668    if (!isUndefOrEqual(BitI, j))
1669      return false;
1670    if (!isUndefOrEqual(BitI1, j))
1671      return false;
1672  }
1673
1674  return true;
1675}
1676
1677/// isMOVSMask - Return true if the specified VECTOR_SHUFFLE operand
1678/// specifies a shuffle of elements that is suitable for input to MOVS{S|D}.
1679bool X86::isMOVSMask(SDNode *N) {
1680  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1681
1682  unsigned NumElems = N->getNumOperands();
1683  if (NumElems != 2 && NumElems != 4)
1684    return false;
1685
1686  if (!isUndefOrEqual(N->getOperand(0), NumElems))
1687    return false;
1688
1689  for (unsigned i = 1; i < NumElems; ++i) {
1690    SDOperand Arg = N->getOperand(i);
1691    if (!isUndefOrEqual(Arg, i))
1692      return false;
1693  }
1694
1695  return true;
1696}
1697
1698/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
1699/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
1700bool X86::isMOVSHDUPMask(SDNode *N) {
1701  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1702
1703  if (N->getNumOperands() != 4)
1704    return false;
1705
1706  // Expect 1, 1, 3, 3
1707  for (unsigned i = 0; i < 2; ++i) {
1708    SDOperand Arg = N->getOperand(i);
1709    if (Arg.getOpcode() == ISD::UNDEF) continue;
1710    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1711    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1712    if (Val != 1) return false;
1713  }
1714
1715  bool HasHi = false;
1716  for (unsigned i = 2; i < 4; ++i) {
1717    SDOperand Arg = N->getOperand(i);
1718    if (Arg.getOpcode() == ISD::UNDEF) continue;
1719    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1720    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1721    if (Val != 3) return false;
1722    HasHi = true;
1723  }
1724
1725  // Don't use movshdup if it can be done with a shufps.
1726  return HasHi;
1727}
1728
1729/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
1730/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
1731bool X86::isMOVSLDUPMask(SDNode *N) {
1732  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1733
1734  if (N->getNumOperands() != 4)
1735    return false;
1736
1737  // Expect 0, 0, 2, 2
1738  for (unsigned i = 0; i < 2; ++i) {
1739    SDOperand Arg = N->getOperand(i);
1740    if (Arg.getOpcode() == ISD::UNDEF) continue;
1741    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1742    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1743    if (Val != 0) return false;
1744  }
1745
1746  bool HasHi = false;
1747  for (unsigned i = 2; i < 4; ++i) {
1748    SDOperand Arg = N->getOperand(i);
1749    if (Arg.getOpcode() == ISD::UNDEF) continue;
1750    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1751    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1752    if (Val != 2) return false;
1753    HasHi = true;
1754  }
1755
1756  // Don't use movshdup if it can be done with a shufps.
1757  return HasHi;
1758}
1759
1760/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
1761/// a splat of a single element.
1762static bool isSplatMask(SDNode *N) {
1763  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1764
1765  // This is a splat operation if each element of the permute is the same, and
1766  // if the value doesn't reference the second vector.
1767  SDOperand Elt = N->getOperand(0);
1768  assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!");
1769  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) {
1770    SDOperand Arg = N->getOperand(i);
1771    if (Arg.getOpcode() == ISD::UNDEF) continue;
1772    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1773    if (Arg != Elt) return false;
1774  }
1775
1776  // Make sure it is a splat of the first vector operand.
1777  return cast<ConstantSDNode>(Elt)->getValue() < N->getNumOperands();
1778}
1779
1780/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
1781/// a splat of a single element and it's a 2 or 4 element mask.
1782bool X86::isSplatMask(SDNode *N) {
1783  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1784
1785  // We can only splat 64-bit, and 32-bit quantities.
1786  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
1787    return false;
1788  return ::isSplatMask(N);
1789}
1790
1791/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
1792/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
1793/// instructions.
1794unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
1795  unsigned NumOperands = N->getNumOperands();
1796  unsigned Shift = (NumOperands == 4) ? 2 : 1;
1797  unsigned Mask = 0;
1798  for (unsigned i = 0; i < NumOperands; ++i) {
1799    unsigned Val = 0;
1800    SDOperand Arg = N->getOperand(NumOperands-i-1);
1801    if (Arg.getOpcode() != ISD::UNDEF)
1802      Val = cast<ConstantSDNode>(Arg)->getValue();
1803    if (Val >= NumOperands) Val -= NumOperands;
1804    Mask |= Val;
1805    if (i != NumOperands - 1)
1806      Mask <<= Shift;
1807  }
1808
1809  return Mask;
1810}
1811
1812/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
1813/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
1814/// instructions.
1815unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
1816  unsigned Mask = 0;
1817  // 8 nodes, but we only care about the last 4.
1818  for (unsigned i = 7; i >= 4; --i) {
1819    unsigned Val = 0;
1820    SDOperand Arg = N->getOperand(i);
1821    if (Arg.getOpcode() != ISD::UNDEF)
1822      Val = cast<ConstantSDNode>(Arg)->getValue();
1823    Mask |= (Val - 4);
1824    if (i != 4)
1825      Mask <<= 2;
1826  }
1827
1828  return Mask;
1829}
1830
1831/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
1832/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
1833/// instructions.
1834unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
1835  unsigned Mask = 0;
1836  // 8 nodes, but we only care about the first 4.
1837  for (int i = 3; i >= 0; --i) {
1838    unsigned Val = 0;
1839    SDOperand Arg = N->getOperand(i);
1840    if (Arg.getOpcode() != ISD::UNDEF)
1841      Val = cast<ConstantSDNode>(Arg)->getValue();
1842    Mask |= Val;
1843    if (i != 0)
1844      Mask <<= 2;
1845  }
1846
1847  return Mask;
1848}
1849
1850/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
1851/// specifies a 8 element shuffle that can be broken into a pair of
1852/// PSHUFHW and PSHUFLW.
1853static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
1854  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1855
1856  if (N->getNumOperands() != 8)
1857    return false;
1858
1859  // Lower quadword shuffled.
1860  for (unsigned i = 0; i != 4; ++i) {
1861    SDOperand Arg = N->getOperand(i);
1862    if (Arg.getOpcode() == ISD::UNDEF) continue;
1863    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1864    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1865    if (Val > 4)
1866      return false;
1867  }
1868
1869  // Upper quadword shuffled.
1870  for (unsigned i = 4; i != 8; ++i) {
1871    SDOperand Arg = N->getOperand(i);
1872    if (Arg.getOpcode() == ISD::UNDEF) continue;
1873    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1874    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1875    if (Val < 4 || Val > 7)
1876      return false;
1877  }
1878
1879  return true;
1880}
1881
1882/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
1883/// values in ther permute mask.
1884static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) {
1885  SDOperand V1 = Op.getOperand(0);
1886  SDOperand V2 = Op.getOperand(1);
1887  SDOperand Mask = Op.getOperand(2);
1888  MVT::ValueType VT = Op.getValueType();
1889  MVT::ValueType MaskVT = Mask.getValueType();
1890  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
1891  unsigned NumElems = Mask.getNumOperands();
1892  std::vector<SDOperand> MaskVec;
1893
1894  for (unsigned i = 0; i != NumElems; ++i) {
1895    SDOperand Arg = Mask.getOperand(i);
1896    if (Arg.getOpcode() == ISD::UNDEF) continue;
1897    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1898    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1899    if (Val < NumElems)
1900      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
1901    else
1902      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
1903  }
1904
1905  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
1906  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
1907}
1908
1909/// isScalarLoadToVector - Returns true if the node is a scalar load that
1910/// is promoted to a vector.
1911static inline bool isScalarLoadToVector(SDOperand Op) {
1912  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR) {
1913    Op = Op.getOperand(0);
1914    return (Op.getOpcode() == ISD::LOAD);
1915  }
1916  return false;
1917}
1918
1919/// ShouldXformedToMOVLP - Return true if the node should be transformed to
1920/// match movlp{d|s}. The lower half elements should come from V1 (and in
1921/// order), and the upper half elements should come from the upper half of
1922/// V2 (not necessarily in order). And since V1 will become the source of
1923/// the MOVLP, it must be a scalar load.
1924static bool ShouldXformedToMOVLP(SDOperand V1, SDOperand V2, SDOperand Mask) {
1925  if (isScalarLoadToVector(V1)) {
1926    unsigned NumElems = Mask.getNumOperands();
1927    for (unsigned i = 0, e = NumElems/2; i != e; ++i)
1928      if (!isUndefOrEqual(Mask.getOperand(i), i))
1929        return false;
1930    for (unsigned i = NumElems/2; i != NumElems; ++i)
1931      if (!isUndefOrInRange(Mask.getOperand(i),
1932                            NumElems+NumElems/2, NumElems*2))
1933        return false;
1934    return true;
1935  }
1936
1937  return false;
1938}
1939
1940/// isLowerFromV2UpperFromV1 - Returns true if the shuffle mask is except
1941/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
1942/// half elements to come from vector 1 (which would equal the dest.) and
1943/// the upper half to come from vector 2.
1944static bool isLowerFromV2UpperFromV1(SDOperand Op) {
1945  assert(Op.getOpcode() == ISD::BUILD_VECTOR);
1946
1947  unsigned NumElems = Op.getNumOperands();
1948  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
1949    if (!isUndefOrInRange(Op.getOperand(i), NumElems, NumElems*2))
1950      return false;
1951  for (unsigned i = NumElems/2; i != NumElems; ++i)
1952    if (!isUndefOrInRange(Op.getOperand(i), 0, NumElems))
1953      return false;
1954  return true;
1955}
1956
1957/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
1958/// of specified width.
1959static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
1960  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
1961  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
1962  std::vector<SDOperand> MaskVec;
1963  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
1964    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
1965    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
1966  }
1967  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
1968}
1969
1970/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
1971///
1972static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
1973  SDOperand V1 = Op.getOperand(0);
1974  SDOperand PermMask = Op.getOperand(2);
1975  MVT::ValueType VT = Op.getValueType();
1976  unsigned NumElems = PermMask.getNumOperands();
1977  PermMask = getUnpacklMask(NumElems, DAG);
1978  while (NumElems != 4) {
1979    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, PermMask);
1980    NumElems >>= 1;
1981  }
1982  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
1983
1984  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
1985  SDOperand Zero = DAG.getConstant(0, MVT::getVectorBaseType(MaskVT));
1986  std::vector<SDOperand> ZeroVec(4, Zero);
1987  SDOperand SplatMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, ZeroVec);
1988  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
1989                                  DAG.getNode(ISD::UNDEF, MVT::v4i32),
1990                                  SplatMask);
1991  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
1992}
1993
1994/// LowerOperation - Provide custom lowering hooks for some operations.
1995///
1996SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
1997  switch (Op.getOpcode()) {
1998  default: assert(0 && "Should not custom lower this!");
1999  case ISD::SHL_PARTS:
2000  case ISD::SRA_PARTS:
2001  case ISD::SRL_PARTS: {
2002    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
2003           "Not an i64 shift!");
2004    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
2005    SDOperand ShOpLo = Op.getOperand(0);
2006    SDOperand ShOpHi = Op.getOperand(1);
2007    SDOperand ShAmt  = Op.getOperand(2);
2008    SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi,
2009                                         DAG.getConstant(31, MVT::i8))
2010                           : DAG.getConstant(0, MVT::i32);
2011
2012    SDOperand Tmp2, Tmp3;
2013    if (Op.getOpcode() == ISD::SHL_PARTS) {
2014      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
2015      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
2016    } else {
2017      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
2018      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
2019    }
2020
2021    SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag,
2022                                   ShAmt, DAG.getConstant(32, MVT::i8));
2023
2024    SDOperand Hi, Lo;
2025    SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
2026
2027    std::vector<MVT::ValueType> Tys;
2028    Tys.push_back(MVT::i32);
2029    Tys.push_back(MVT::Flag);
2030    std::vector<SDOperand> Ops;
2031    if (Op.getOpcode() == ISD::SHL_PARTS) {
2032      Ops.push_back(Tmp2);
2033      Ops.push_back(Tmp3);
2034      Ops.push_back(CC);
2035      Ops.push_back(InFlag);
2036      Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2037      InFlag = Hi.getValue(1);
2038
2039      Ops.clear();
2040      Ops.push_back(Tmp3);
2041      Ops.push_back(Tmp1);
2042      Ops.push_back(CC);
2043      Ops.push_back(InFlag);
2044      Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2045    } else {
2046      Ops.push_back(Tmp2);
2047      Ops.push_back(Tmp3);
2048      Ops.push_back(CC);
2049      Ops.push_back(InFlag);
2050      Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2051      InFlag = Lo.getValue(1);
2052
2053      Ops.clear();
2054      Ops.push_back(Tmp3);
2055      Ops.push_back(Tmp1);
2056      Ops.push_back(CC);
2057      Ops.push_back(InFlag);
2058      Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
2059    }
2060
2061    Tys.clear();
2062    Tys.push_back(MVT::i32);
2063    Tys.push_back(MVT::i32);
2064    Ops.clear();
2065    Ops.push_back(Lo);
2066    Ops.push_back(Hi);
2067    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
2068  }
2069  case ISD::SINT_TO_FP: {
2070    assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
2071           Op.getOperand(0).getValueType() >= MVT::i16 &&
2072           "Unknown SINT_TO_FP to lower!");
2073
2074    SDOperand Result;
2075    MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
2076    unsigned Size = MVT::getSizeInBits(SrcVT)/8;
2077    MachineFunction &MF = DAG.getMachineFunction();
2078    int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
2079    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2080    SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
2081                                  DAG.getEntryNode(), Op.getOperand(0),
2082                                  StackSlot, DAG.getSrcValue(NULL));
2083
2084    // Build the FILD
2085    std::vector<MVT::ValueType> Tys;
2086    Tys.push_back(MVT::f64);
2087    Tys.push_back(MVT::Other);
2088    if (X86ScalarSSE) Tys.push_back(MVT::Flag);
2089    std::vector<SDOperand> Ops;
2090    Ops.push_back(Chain);
2091    Ops.push_back(StackSlot);
2092    Ops.push_back(DAG.getValueType(SrcVT));
2093    Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
2094                         Tys, Ops);
2095
2096    if (X86ScalarSSE) {
2097      Chain = Result.getValue(1);
2098      SDOperand InFlag = Result.getValue(2);
2099
2100      // FIXME: Currently the FST is flagged to the FILD_FLAG. This
2101      // shouldn't be necessary except that RFP cannot be live across
2102      // multiple blocks. When stackifier is fixed, they can be uncoupled.
2103      MachineFunction &MF = DAG.getMachineFunction();
2104      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
2105      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2106      std::vector<MVT::ValueType> Tys;
2107      Tys.push_back(MVT::Other);
2108      std::vector<SDOperand> Ops;
2109      Ops.push_back(Chain);
2110      Ops.push_back(Result);
2111      Ops.push_back(StackSlot);
2112      Ops.push_back(DAG.getValueType(Op.getValueType()));
2113      Ops.push_back(InFlag);
2114      Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
2115      Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
2116                           DAG.getSrcValue(NULL));
2117    }
2118
2119    return Result;
2120  }
2121  case ISD::FP_TO_SINT: {
2122    assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
2123           "Unknown FP_TO_SINT to lower!");
2124    // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
2125    // stack slot.
2126    MachineFunction &MF = DAG.getMachineFunction();
2127    unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
2128    int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
2129    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2130
2131    unsigned Opc;
2132    switch (Op.getValueType()) {
2133    default: assert(0 && "Invalid FP_TO_SINT to lower!");
2134    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
2135    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
2136    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
2137    }
2138
2139    SDOperand Chain = DAG.getEntryNode();
2140    SDOperand Value = Op.getOperand(0);
2141    if (X86ScalarSSE) {
2142      assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
2143      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot,
2144                          DAG.getSrcValue(0));
2145      std::vector<MVT::ValueType> Tys;
2146      Tys.push_back(MVT::f64);
2147      Tys.push_back(MVT::Other);
2148      std::vector<SDOperand> Ops;
2149      Ops.push_back(Chain);
2150      Ops.push_back(StackSlot);
2151      Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType()));
2152      Value = DAG.getNode(X86ISD::FLD, Tys, Ops);
2153      Chain = Value.getValue(1);
2154      SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
2155      StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2156    }
2157
2158    // Build the FP_TO_INT*_IN_MEM
2159    std::vector<SDOperand> Ops;
2160    Ops.push_back(Chain);
2161    Ops.push_back(Value);
2162    Ops.push_back(StackSlot);
2163    SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops);
2164
2165    // Load the result.
2166    return DAG.getLoad(Op.getValueType(), FIST, StackSlot,
2167                       DAG.getSrcValue(NULL));
2168  }
2169  case ISD::READCYCLECOUNTER: {
2170    std::vector<MVT::ValueType> Tys;
2171    Tys.push_back(MVT::Other);
2172    Tys.push_back(MVT::Flag);
2173    std::vector<SDOperand> Ops;
2174    Ops.push_back(Op.getOperand(0));
2175    SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops);
2176    Ops.clear();
2177    Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)));
2178    Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX,
2179                                     MVT::i32, Ops[0].getValue(2)));
2180    Ops.push_back(Ops[1].getValue(1));
2181    Tys[0] = Tys[1] = MVT::i32;
2182    Tys.push_back(MVT::Other);
2183    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
2184  }
2185  case ISD::FABS: {
2186    MVT::ValueType VT = Op.getValueType();
2187    const Type *OpNTy =  MVT::getTypeForValueType(VT);
2188    std::vector<Constant*> CV;
2189    if (VT == MVT::f64) {
2190      CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
2191      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2192    } else {
2193      CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
2194      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2195      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2196      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2197    }
2198    Constant *CS = ConstantStruct::get(CV);
2199    SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
2200    SDOperand Mask
2201      = DAG.getNode(X86ISD::LOAD_PACK,
2202                    VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
2203    return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
2204  }
2205  case ISD::FNEG: {
2206    MVT::ValueType VT = Op.getValueType();
2207    const Type *OpNTy =  MVT::getTypeForValueType(VT);
2208    std::vector<Constant*> CV;
2209    if (VT == MVT::f64) {
2210      CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
2211      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2212    } else {
2213      CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
2214      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2215      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2216      CV.push_back(ConstantFP::get(OpNTy, 0.0));
2217    }
2218    Constant *CS = ConstantStruct::get(CV);
2219    SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
2220    SDOperand Mask
2221      = DAG.getNode(X86ISD::LOAD_PACK,
2222                    VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
2223    return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
2224  }
2225  case ISD::SETCC: {
2226    assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
2227    SDOperand Cond;
2228    SDOperand CC = Op.getOperand(2);
2229    ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
2230    bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
2231    bool Flip;
2232    unsigned X86CC;
2233    if (translateX86CC(CC, isFP, X86CC, Flip)) {
2234      if (Flip)
2235        Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
2236                           Op.getOperand(1), Op.getOperand(0));
2237      else
2238        Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
2239                           Op.getOperand(0), Op.getOperand(1));
2240      return DAG.getNode(X86ISD::SETCC, MVT::i8,
2241                         DAG.getConstant(X86CC, MVT::i8), Cond);
2242    } else {
2243      assert(isFP && "Illegal integer SetCC!");
2244
2245      Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
2246                         Op.getOperand(0), Op.getOperand(1));
2247      std::vector<MVT::ValueType> Tys;
2248      std::vector<SDOperand> Ops;
2249      switch (SetCCOpcode) {
2250      default: assert(false && "Illegal floating point SetCC!");
2251      case ISD::SETOEQ: {  // !PF & ZF
2252        Tys.push_back(MVT::i8);
2253        Tys.push_back(MVT::Flag);
2254        Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8));
2255        Ops.push_back(Cond);
2256        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
2257        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
2258                                     DAG.getConstant(X86ISD::COND_E, MVT::i8),
2259                                     Tmp1.getValue(1));
2260        return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
2261      }
2262      case ISD::SETUNE: {  // PF | !ZF
2263        Tys.push_back(MVT::i8);
2264        Tys.push_back(MVT::Flag);
2265        Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8));
2266        Ops.push_back(Cond);
2267        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
2268        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
2269                                     DAG.getConstant(X86ISD::COND_NE, MVT::i8),
2270                                     Tmp1.getValue(1));
2271        return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
2272      }
2273      }
2274    }
2275  }
2276  case ISD::SELECT: {
2277    MVT::ValueType VT = Op.getValueType();
2278    bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE;
2279    bool addTest   = false;
2280    SDOperand Op0 = Op.getOperand(0);
2281    SDOperand Cond, CC;
2282    if (Op0.getOpcode() == ISD::SETCC)
2283      Op0 = LowerOperation(Op0, DAG);
2284
2285    if (Op0.getOpcode() == X86ISD::SETCC) {
2286      // If condition flag is set by a X86ISD::CMP, then make a copy of it
2287      // (since flag operand cannot be shared). If the X86ISD::SETCC does not
2288      // have another use it will be eliminated.
2289      // If the X86ISD::SETCC has more than one use, then it's probably better
2290      // to use a test instead of duplicating the X86ISD::CMP (for register
2291      // pressure reason).
2292      unsigned CmpOpc = Op0.getOperand(1).getOpcode();
2293      if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
2294          CmpOpc == X86ISD::UCOMI) {
2295        if (!Op0.hasOneUse()) {
2296          std::vector<MVT::ValueType> Tys;
2297          for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i)
2298            Tys.push_back(Op0.Val->getValueType(i));
2299          std::vector<SDOperand> Ops;
2300          for (unsigned i = 0; i < Op0.getNumOperands(); ++i)
2301            Ops.push_back(Op0.getOperand(i));
2302          Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
2303        }
2304
2305        CC   = Op0.getOperand(0);
2306        Cond = Op0.getOperand(1);
2307        // Make a copy as flag result cannot be used by more than one.
2308        Cond = DAG.getNode(CmpOpc, MVT::Flag,
2309                           Cond.getOperand(0), Cond.getOperand(1));
2310        addTest =
2311          isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
2312      } else
2313        addTest = true;
2314    } else
2315      addTest = true;
2316
2317    if (addTest) {
2318      CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
2319      Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0);
2320    }
2321
2322    std::vector<MVT::ValueType> Tys;
2323    Tys.push_back(Op.getValueType());
2324    Tys.push_back(MVT::Flag);
2325    std::vector<SDOperand> Ops;
2326    // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
2327    // condition is true.
2328    Ops.push_back(Op.getOperand(2));
2329    Ops.push_back(Op.getOperand(1));
2330    Ops.push_back(CC);
2331    Ops.push_back(Cond);
2332    return DAG.getNode(X86ISD::CMOV, Tys, Ops);
2333  }
2334  case ISD::BRCOND: {
2335    bool addTest = false;
2336    SDOperand Cond  = Op.getOperand(1);
2337    SDOperand Dest  = Op.getOperand(2);
2338    SDOperand CC;
2339    if (Cond.getOpcode() == ISD::SETCC)
2340      Cond = LowerOperation(Cond, DAG);
2341
2342    if (Cond.getOpcode() == X86ISD::SETCC) {
2343      // If condition flag is set by a X86ISD::CMP, then make a copy of it
2344      // (since flag operand cannot be shared). If the X86ISD::SETCC does not
2345      // have another use it will be eliminated.
2346      // If the X86ISD::SETCC has more than one use, then it's probably better
2347      // to use a test instead of duplicating the X86ISD::CMP (for register
2348      // pressure reason).
2349      unsigned CmpOpc = Cond.getOperand(1).getOpcode();
2350      if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
2351          CmpOpc == X86ISD::UCOMI) {
2352        if (!Cond.hasOneUse()) {
2353          std::vector<MVT::ValueType> Tys;
2354          for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i)
2355            Tys.push_back(Cond.Val->getValueType(i));
2356          std::vector<SDOperand> Ops;
2357          for (unsigned i = 0; i < Cond.getNumOperands(); ++i)
2358            Ops.push_back(Cond.getOperand(i));
2359          Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops);
2360        }
2361
2362        CC   = Cond.getOperand(0);
2363        Cond = Cond.getOperand(1);
2364        // Make a copy as flag result cannot be used by more than one.
2365        Cond = DAG.getNode(CmpOpc, MVT::Flag,
2366                           Cond.getOperand(0), Cond.getOperand(1));
2367      } else
2368        addTest = true;
2369    } else
2370      addTest = true;
2371
2372    if (addTest) {
2373      CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
2374      Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond);
2375    }
2376    return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
2377                       Op.getOperand(0), Op.getOperand(2), CC, Cond);
2378  }
2379  case ISD::MEMSET: {
2380    SDOperand InFlag(0, 0);
2381    SDOperand Chain = Op.getOperand(0);
2382    unsigned Align =
2383      (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
2384    if (Align == 0) Align = 1;
2385
2386    ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
2387    // If not DWORD aligned, call memset if size is less than the threshold.
2388    // It knows how to align to the right boundary first.
2389    if ((Align & 3) != 0 ||
2390        (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
2391      MVT::ValueType IntPtr = getPointerTy();
2392      const Type *IntPtrTy = getTargetData().getIntPtrType();
2393      std::vector<std::pair<SDOperand, const Type*> > Args;
2394      Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
2395      // Extend the ubyte argument to be an int value for the call.
2396      SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
2397      Args.push_back(std::make_pair(Val, IntPtrTy));
2398      Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
2399      std::pair<SDOperand,SDOperand> CallResult =
2400        LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
2401                    DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
2402      return CallResult.second;
2403    }
2404
2405    MVT::ValueType AVT;
2406    SDOperand Count;
2407    ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2408    unsigned BytesLeft = 0;
2409    bool TwoRepStos = false;
2410    if (ValC) {
2411      unsigned ValReg;
2412      unsigned Val = ValC->getValue() & 255;
2413
2414      // If the value is a constant, then we can potentially use larger sets.
2415      switch (Align & 3) {
2416      case 2:   // WORD aligned
2417        AVT = MVT::i16;
2418        Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
2419        BytesLeft = I->getValue() % 2;
2420        Val    = (Val << 8) | Val;
2421        ValReg = X86::AX;
2422        break;
2423      case 0:   // DWORD aligned
2424        AVT = MVT::i32;
2425        if (I) {
2426          Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
2427          BytesLeft = I->getValue() % 4;
2428        } else {
2429          Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
2430                              DAG.getConstant(2, MVT::i8));
2431          TwoRepStos = true;
2432        }
2433        Val = (Val << 8)  | Val;
2434        Val = (Val << 16) | Val;
2435        ValReg = X86::EAX;
2436        break;
2437      default:  // Byte aligned
2438        AVT = MVT::i8;
2439        Count = Op.getOperand(3);
2440        ValReg = X86::AL;
2441        break;
2442      }
2443
2444      Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
2445                                InFlag);
2446      InFlag = Chain.getValue(1);
2447    } else {
2448      AVT = MVT::i8;
2449      Count  = Op.getOperand(3);
2450      Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
2451      InFlag = Chain.getValue(1);
2452    }
2453
2454    Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
2455    InFlag = Chain.getValue(1);
2456    Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
2457    InFlag = Chain.getValue(1);
2458
2459    std::vector<MVT::ValueType> Tys;
2460    Tys.push_back(MVT::Other);
2461    Tys.push_back(MVT::Flag);
2462    std::vector<SDOperand> Ops;
2463    Ops.push_back(Chain);
2464    Ops.push_back(DAG.getValueType(AVT));
2465    Ops.push_back(InFlag);
2466    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
2467
2468    if (TwoRepStos) {
2469      InFlag = Chain.getValue(1);
2470      Count = Op.getOperand(3);
2471      MVT::ValueType CVT = Count.getValueType();
2472      SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
2473                                   DAG.getConstant(3, CVT));
2474      Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
2475      InFlag = Chain.getValue(1);
2476      Tys.clear();
2477      Tys.push_back(MVT::Other);
2478      Tys.push_back(MVT::Flag);
2479      Ops.clear();
2480      Ops.push_back(Chain);
2481      Ops.push_back(DAG.getValueType(MVT::i8));
2482      Ops.push_back(InFlag);
2483      Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
2484    } else if (BytesLeft) {
2485      // Issue stores for the last 1 - 3 bytes.
2486      SDOperand Value;
2487      unsigned Val = ValC->getValue() & 255;
2488      unsigned Offset = I->getValue() - BytesLeft;
2489      SDOperand DstAddr = Op.getOperand(1);
2490      MVT::ValueType AddrVT = DstAddr.getValueType();
2491      if (BytesLeft >= 2) {
2492        Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
2493        Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
2494                            DAG.getNode(ISD::ADD, AddrVT, DstAddr,
2495                                        DAG.getConstant(Offset, AddrVT)),
2496                            DAG.getSrcValue(NULL));
2497        BytesLeft -= 2;
2498        Offset += 2;
2499      }
2500
2501      if (BytesLeft == 1) {
2502        Value = DAG.getConstant(Val, MVT::i8);
2503        Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
2504                            DAG.getNode(ISD::ADD, AddrVT, DstAddr,
2505                                        DAG.getConstant(Offset, AddrVT)),
2506                            DAG.getSrcValue(NULL));
2507      }
2508    }
2509
2510    return Chain;
2511  }
2512  case ISD::MEMCPY: {
2513    SDOperand Chain = Op.getOperand(0);
2514    unsigned Align =
2515      (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
2516    if (Align == 0) Align = 1;
2517
2518    ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
2519    // If not DWORD aligned, call memcpy if size is less than the threshold.
2520    // It knows how to align to the right boundary first.
2521    if ((Align & 3) != 0 ||
2522        (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
2523      MVT::ValueType IntPtr = getPointerTy();
2524      const Type *IntPtrTy = getTargetData().getIntPtrType();
2525      std::vector<std::pair<SDOperand, const Type*> > Args;
2526      Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
2527      Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
2528      Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
2529      std::pair<SDOperand,SDOperand> CallResult =
2530        LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
2531                    DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
2532      return CallResult.second;
2533    }
2534
2535    MVT::ValueType AVT;
2536    SDOperand Count;
2537    unsigned BytesLeft = 0;
2538    bool TwoRepMovs = false;
2539    switch (Align & 3) {
2540    case 2:   // WORD aligned
2541      AVT = MVT::i16;
2542      Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
2543      BytesLeft = I->getValue() % 2;
2544      break;
2545    case 0:   // DWORD aligned
2546      AVT = MVT::i32;
2547      if (I) {
2548        Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
2549        BytesLeft = I->getValue() % 4;
2550      } else {
2551        Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
2552                            DAG.getConstant(2, MVT::i8));
2553        TwoRepMovs = true;
2554      }
2555      break;
2556    default:  // Byte aligned
2557      AVT = MVT::i8;
2558      Count = Op.getOperand(3);
2559      break;
2560    }
2561
2562    SDOperand InFlag(0, 0);
2563    Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
2564    InFlag = Chain.getValue(1);
2565    Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
2566    InFlag = Chain.getValue(1);
2567    Chain  = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag);
2568    InFlag = Chain.getValue(1);
2569
2570    std::vector<MVT::ValueType> Tys;
2571    Tys.push_back(MVT::Other);
2572    Tys.push_back(MVT::Flag);
2573    std::vector<SDOperand> Ops;
2574    Ops.push_back(Chain);
2575    Ops.push_back(DAG.getValueType(AVT));
2576    Ops.push_back(InFlag);
2577    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
2578
2579    if (TwoRepMovs) {
2580      InFlag = Chain.getValue(1);
2581      Count = Op.getOperand(3);
2582      MVT::ValueType CVT = Count.getValueType();
2583      SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
2584                                   DAG.getConstant(3, CVT));
2585      Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
2586      InFlag = Chain.getValue(1);
2587      Tys.clear();
2588      Tys.push_back(MVT::Other);
2589      Tys.push_back(MVT::Flag);
2590      Ops.clear();
2591      Ops.push_back(Chain);
2592      Ops.push_back(DAG.getValueType(MVT::i8));
2593      Ops.push_back(InFlag);
2594      Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
2595    } else if (BytesLeft) {
2596      // Issue loads and stores for the last 1 - 3 bytes.
2597      unsigned Offset = I->getValue() - BytesLeft;
2598      SDOperand DstAddr = Op.getOperand(1);
2599      MVT::ValueType DstVT = DstAddr.getValueType();
2600      SDOperand SrcAddr = Op.getOperand(2);
2601      MVT::ValueType SrcVT = SrcAddr.getValueType();
2602      SDOperand Value;
2603      if (BytesLeft >= 2) {
2604        Value = DAG.getLoad(MVT::i16, Chain,
2605                            DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
2606                                        DAG.getConstant(Offset, SrcVT)),
2607                            DAG.getSrcValue(NULL));
2608        Chain = Value.getValue(1);
2609        Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
2610                            DAG.getNode(ISD::ADD, DstVT, DstAddr,
2611                                        DAG.getConstant(Offset, DstVT)),
2612                            DAG.getSrcValue(NULL));
2613        BytesLeft -= 2;
2614        Offset += 2;
2615      }
2616
2617      if (BytesLeft == 1) {
2618        Value = DAG.getLoad(MVT::i8, Chain,
2619                            DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
2620                                        DAG.getConstant(Offset, SrcVT)),
2621                            DAG.getSrcValue(NULL));
2622        Chain = Value.getValue(1);
2623        Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
2624                            DAG.getNode(ISD::ADD, DstVT, DstAddr,
2625                                        DAG.getConstant(Offset, DstVT)),
2626                            DAG.getSrcValue(NULL));
2627      }
2628    }
2629
2630    return Chain;
2631  }
2632
2633  // ConstantPool, GlobalAddress, and ExternalSymbol are lowered as their
2634  // target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
2635  // one of the above mentioned nodes. It has to be wrapped because otherwise
2636  // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2637  // be used to form addressing mode. These wrapped nodes will be selected
2638  // into MOV32ri.
2639  case ISD::ConstantPool: {
2640    ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2641    SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2642                         DAG.getTargetConstantPool(CP->get(), getPointerTy(),
2643                                                   CP->getAlignment()));
2644    if (Subtarget->isTargetDarwin()) {
2645      // With PIC, the address is actually $g + Offset.
2646      if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2647        Result = DAG.getNode(ISD::ADD, getPointerTy(),
2648                DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2649    }
2650
2651    return Result;
2652  }
2653  case ISD::GlobalAddress: {
2654    GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2655    SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2656                         DAG.getTargetGlobalAddress(GV, getPointerTy()));
2657    if (Subtarget->isTargetDarwin()) {
2658      // With PIC, the address is actually $g + Offset.
2659      if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2660        Result = DAG.getNode(ISD::ADD, getPointerTy(),
2661                    DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2662
2663      // For Darwin, external and weak symbols are indirect, so we want to load
2664      // the value at address GV, not the value of GV itself. This means that
2665      // the GlobalAddress must be in the base or index register of the address,
2666      // not the GV offset field.
2667      if (getTargetMachine().getRelocationModel() != Reloc::Static &&
2668          DarwinGVRequiresExtraLoad(GV))
2669        Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(),
2670                             Result, DAG.getSrcValue(NULL));
2671    }
2672
2673    return Result;
2674  }
2675  case ISD::ExternalSymbol: {
2676    const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
2677    SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2678                         DAG.getTargetExternalSymbol(Sym, getPointerTy()));
2679    if (Subtarget->isTargetDarwin()) {
2680      // With PIC, the address is actually $g + Offset.
2681      if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2682        Result = DAG.getNode(ISD::ADD, getPointerTy(),
2683                    DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2684    }
2685
2686    return Result;
2687  }
2688  case ISD::VASTART: {
2689    // vastart just stores the address of the VarArgsFrameIndex slot into the
2690    // memory location argument.
2691    // FIXME: Replace MVT::i32 with PointerTy
2692    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
2693    return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
2694                       Op.getOperand(1), Op.getOperand(2));
2695  }
2696  case ISD::RET: {
2697    SDOperand Copy;
2698
2699    switch(Op.getNumOperands()) {
2700    default:
2701      assert(0 && "Do not know how to return this many arguments!");
2702      abort();
2703    case 1:    // ret void.
2704      return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
2705                         DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
2706    case 2: {
2707      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
2708
2709      if (MVT::isVector(ArgVT)) {
2710        // Integer or FP vector result -> XMM0.
2711        if (DAG.getMachineFunction().liveout_empty())
2712          DAG.getMachineFunction().addLiveOut(X86::XMM0);
2713        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
2714                                SDOperand());
2715      } else if (MVT::isInteger(ArgVT)) {
2716        // Integer result -> EAX
2717        if (DAG.getMachineFunction().liveout_empty())
2718          DAG.getMachineFunction().addLiveOut(X86::EAX);
2719
2720        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1),
2721                                SDOperand());
2722      } else if (!X86ScalarSSE) {
2723        // FP return with fp-stack value.
2724        if (DAG.getMachineFunction().liveout_empty())
2725          DAG.getMachineFunction().addLiveOut(X86::ST0);
2726
2727        std::vector<MVT::ValueType> Tys;
2728        Tys.push_back(MVT::Other);
2729        Tys.push_back(MVT::Flag);
2730        std::vector<SDOperand> Ops;
2731        Ops.push_back(Op.getOperand(0));
2732        Ops.push_back(Op.getOperand(1));
2733        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
2734      } else {
2735        // FP return with ScalarSSE (return on fp-stack).
2736        if (DAG.getMachineFunction().liveout_empty())
2737          DAG.getMachineFunction().addLiveOut(X86::ST0);
2738
2739        SDOperand MemLoc;
2740        SDOperand Chain = Op.getOperand(0);
2741        SDOperand Value = Op.getOperand(1);
2742
2743        if (Value.getOpcode() == ISD::LOAD &&
2744            (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
2745          Chain  = Value.getOperand(0);
2746          MemLoc = Value.getOperand(1);
2747        } else {
2748          // Spill the value to memory and reload it into top of stack.
2749          unsigned Size = MVT::getSizeInBits(ArgVT)/8;
2750          MachineFunction &MF = DAG.getMachineFunction();
2751          int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
2752          MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
2753          Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
2754                              Value, MemLoc, DAG.getSrcValue(0));
2755        }
2756        std::vector<MVT::ValueType> Tys;
2757        Tys.push_back(MVT::f64);
2758        Tys.push_back(MVT::Other);
2759        std::vector<SDOperand> Ops;
2760        Ops.push_back(Chain);
2761        Ops.push_back(MemLoc);
2762        Ops.push_back(DAG.getValueType(ArgVT));
2763        Copy = DAG.getNode(X86ISD::FLD, Tys, Ops);
2764        Tys.clear();
2765        Tys.push_back(MVT::Other);
2766        Tys.push_back(MVT::Flag);
2767        Ops.clear();
2768        Ops.push_back(Copy.getValue(1));
2769        Ops.push_back(Copy);
2770        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
2771      }
2772      break;
2773    }
2774    case 3:
2775      if (DAG.getMachineFunction().liveout_empty()) {
2776        DAG.getMachineFunction().addLiveOut(X86::EAX);
2777        DAG.getMachineFunction().addLiveOut(X86::EDX);
2778      }
2779
2780      Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2),
2781                              SDOperand());
2782      Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1));
2783      break;
2784    }
2785    return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
2786                       Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
2787                       Copy.getValue(1));
2788  }
2789  case ISD::SCALAR_TO_VECTOR: {
2790    SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
2791    return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
2792  }
2793  case ISD::VECTOR_SHUFFLE: {
2794    SDOperand V1 = Op.getOperand(0);
2795    SDOperand V2 = Op.getOperand(1);
2796    SDOperand PermMask = Op.getOperand(2);
2797    MVT::ValueType VT = Op.getValueType();
2798    unsigned NumElems = PermMask.getNumOperands();
2799
2800    if (isSplatMask(PermMask.Val)) {
2801      if (NumElems <= 4) return Op;
2802      // Promote it to a v4i32 splat.
2803      return PromoteSplat(Op, DAG);
2804    }
2805
2806    // Normalize the node to match x86 shuffle ops if needed
2807    if (V2.getOpcode() != ISD::UNDEF) {
2808      bool DoSwap = false;
2809
2810      if (ShouldXformedToMOVLP(V1, V2, PermMask))
2811        DoSwap = true;
2812      else if (isLowerFromV2UpperFromV1(PermMask))
2813        DoSwap = true;
2814
2815      if (DoSwap) {
2816        Op = CommuteVectorShuffle(Op, DAG);
2817        V1 = Op.getOperand(0);
2818        V2 = Op.getOperand(1);
2819        PermMask = Op.getOperand(2);
2820      }
2821    }
2822
2823    if (NumElems == 2)
2824      return Op;
2825
2826    if (X86::isMOVSMask(PermMask.Val) ||
2827        X86::isMOVSHDUPMask(PermMask.Val) ||
2828        X86::isMOVSLDUPMask(PermMask.Val))
2829      return Op;
2830
2831    if (X86::isUNPCKLMask(PermMask.Val) ||
2832        X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
2833        X86::isUNPCKHMask(PermMask.Val))
2834      // Leave the VECTOR_SHUFFLE alone. It matches {P}UNPCKL*.
2835      return Op;
2836
2837    // If VT is integer, try PSHUF* first, then SHUFP*.
2838    if (MVT::isInteger(VT)) {
2839      if (X86::isPSHUFDMask(PermMask.Val) ||
2840          X86::isPSHUFHWMask(PermMask.Val) ||
2841          X86::isPSHUFLWMask(PermMask.Val)) {
2842        if (V2.getOpcode() != ISD::UNDEF)
2843          return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2844                             DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2845        return Op;
2846      }
2847
2848      if (X86::isSHUFPMask(PermMask.Val))
2849        return Op;
2850
2851      // Handle v8i16 shuffle high / low shuffle node pair.
2852      if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
2853        MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2854        MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2855        std::vector<SDOperand> MaskVec;
2856        for (unsigned i = 0; i != 4; ++i)
2857          MaskVec.push_back(PermMask.getOperand(i));
2858        for (unsigned i = 4; i != 8; ++i)
2859          MaskVec.push_back(DAG.getConstant(i, BaseVT));
2860        SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2861        V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2862        MaskVec.clear();
2863        for (unsigned i = 0; i != 4; ++i)
2864          MaskVec.push_back(DAG.getConstant(i, BaseVT));
2865        for (unsigned i = 4; i != 8; ++i)
2866          MaskVec.push_back(PermMask.getOperand(i));
2867        Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2868        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2869      }
2870    } else {
2871      // Floating point cases in the other order.
2872      if (X86::isSHUFPMask(PermMask.Val))
2873        return Op;
2874      if (X86::isPSHUFDMask(PermMask.Val) ||
2875          X86::isPSHUFHWMask(PermMask.Val) ||
2876          X86::isPSHUFLWMask(PermMask.Val)) {
2877        if (V2.getOpcode() != ISD::UNDEF)
2878          return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2879                             DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2880        return Op;
2881      }
2882    }
2883
2884    return SDOperand();
2885  }
2886  case ISD::BUILD_VECTOR: {
2887    // All one's are handled with pcmpeqd.
2888    if (ISD::isBuildVectorAllOnes(Op.Val))
2889      return Op;
2890
2891    std::set<SDOperand> Values;
2892    SDOperand Elt0 = Op.getOperand(0);
2893    Values.insert(Elt0);
2894    bool Elt0IsZero = (isa<ConstantSDNode>(Elt0) &&
2895                       cast<ConstantSDNode>(Elt0)->getValue() == 0) ||
2896      (isa<ConstantFPSDNode>(Elt0) &&
2897       cast<ConstantFPSDNode>(Elt0)->isExactlyValue(0.0));
2898    bool RestAreZero = true;
2899    unsigned NumElems = Op.getNumOperands();
2900    for (unsigned i = 1; i < NumElems; ++i) {
2901      SDOperand Elt = Op.getOperand(i);
2902      if (ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Elt)) {
2903        if (!FPC->isExactlyValue(+0.0))
2904          RestAreZero = false;
2905      } else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2906        if (!C->isNullValue())
2907          RestAreZero = false;
2908      } else
2909        RestAreZero = false;
2910      Values.insert(Elt);
2911    }
2912
2913    if (RestAreZero) {
2914      if (Elt0IsZero) return Op;
2915
2916      // Zero extend a scalar to a vector.
2917      return DAG.getNode(X86ISD::ZEXT_S2VEC, Op.getValueType(), Elt0);
2918    }
2919
2920    if (Values.size() > 2) {
2921      // Expand into a number of unpckl*.
2922      // e.g. for v4f32
2923      //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
2924      //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
2925      //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
2926      MVT::ValueType VT = Op.getValueType();
2927      SDOperand PermMask = getUnpacklMask(NumElems, DAG);
2928      std::vector<SDOperand> V(NumElems);
2929      for (unsigned i = 0; i < NumElems; ++i)
2930        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2931      NumElems >>= 1;
2932      while (NumElems != 0) {
2933        for (unsigned i = 0; i < NumElems; ++i)
2934          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
2935                             PermMask);
2936        NumElems >>= 1;
2937      }
2938      return V[0];
2939    }
2940
2941    return SDOperand();
2942  }
2943  case ISD::EXTRACT_VECTOR_ELT: {
2944    if (!isa<ConstantSDNode>(Op.getOperand(1)))
2945        return SDOperand();
2946
2947    MVT::ValueType VT = Op.getValueType();
2948    // TODO: handle v16i8.
2949    if (MVT::getSizeInBits(VT) == 16) {
2950      // Transform it so it match pextrw which produces a 32-bit result.
2951      MVT::ValueType EVT = (MVT::ValueType)(VT+1);
2952      SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
2953                                      Op.getOperand(0), Op.getOperand(1));
2954      SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
2955                                      DAG.getValueType(VT));
2956      return DAG.getNode(ISD::TRUNCATE, VT, Assert);
2957    } else if (MVT::getSizeInBits(VT) == 32) {
2958      SDOperand Vec = Op.getOperand(0);
2959      unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
2960      if (Idx == 0)
2961        return Op;
2962
2963      // TODO: if Idex == 2, we can use unpckhps
2964      // SHUFPS the element to the lowest double word, then movss.
2965      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2966      SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4,
2967                                          MVT::getVectorBaseType(MaskVT));
2968      std::vector<SDOperand> IdxVec;
2969      IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
2970      IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2971      IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2972      IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2973      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
2974      Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
2975                        Vec, Vec, Mask);
2976      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
2977                         DAG.getConstant(0, MVT::i32));
2978    } else if (MVT::getSizeInBits(VT) == 64) {
2979      SDOperand Vec = Op.getOperand(0);
2980      unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
2981      if (Idx == 0)
2982        return Op;
2983
2984      // UNPCKHPD the element to the lowest double word, then movsd.
2985      // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
2986      // to a f64mem, the whole operation is folded into a single MOVHPDmr.
2987      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2988      std::vector<SDOperand> IdxVec;
2989      IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
2990      IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2991      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
2992      Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
2993                        Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
2994      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
2995                         DAG.getConstant(0, MVT::i32));
2996    }
2997
2998    return SDOperand();
2999  }
3000  case ISD::INSERT_VECTOR_ELT: {
3001    // Transform it so it match pinsrw which expects a 16-bit value in a R32
3002    // as its second argument.
3003    MVT::ValueType VT = Op.getValueType();
3004    MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
3005    if (MVT::getSizeInBits(BaseVT) == 16) {
3006      SDOperand N1 = Op.getOperand(1);
3007      SDOperand N2 = Op.getOperand(2);
3008      if (N1.getValueType() != MVT::i32)
3009        N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
3010      if (N2.getValueType() != MVT::i32)
3011        N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
3012      return DAG.getNode(X86ISD::PINSRW, VT, Op.getOperand(0), N1, N2);
3013    }
3014
3015    return SDOperand();
3016  }
3017  case ISD::INTRINSIC_WO_CHAIN: {
3018    unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
3019    switch (IntNo) {
3020    default: return SDOperand();    // Don't custom lower most intrinsics.
3021    // Comparison intrinsics.
3022    case Intrinsic::x86_sse_comieq_ss:
3023    case Intrinsic::x86_sse_comilt_ss:
3024    case Intrinsic::x86_sse_comile_ss:
3025    case Intrinsic::x86_sse_comigt_ss:
3026    case Intrinsic::x86_sse_comige_ss:
3027    case Intrinsic::x86_sse_comineq_ss:
3028    case Intrinsic::x86_sse_ucomieq_ss:
3029    case Intrinsic::x86_sse_ucomilt_ss:
3030    case Intrinsic::x86_sse_ucomile_ss:
3031    case Intrinsic::x86_sse_ucomigt_ss:
3032    case Intrinsic::x86_sse_ucomige_ss:
3033    case Intrinsic::x86_sse_ucomineq_ss:
3034    case Intrinsic::x86_sse2_comieq_sd:
3035    case Intrinsic::x86_sse2_comilt_sd:
3036    case Intrinsic::x86_sse2_comile_sd:
3037    case Intrinsic::x86_sse2_comigt_sd:
3038    case Intrinsic::x86_sse2_comige_sd:
3039    case Intrinsic::x86_sse2_comineq_sd:
3040    case Intrinsic::x86_sse2_ucomieq_sd:
3041    case Intrinsic::x86_sse2_ucomilt_sd:
3042    case Intrinsic::x86_sse2_ucomile_sd:
3043    case Intrinsic::x86_sse2_ucomigt_sd:
3044    case Intrinsic::x86_sse2_ucomige_sd:
3045    case Intrinsic::x86_sse2_ucomineq_sd: {
3046      unsigned Opc = 0;
3047      ISD::CondCode CC = ISD::SETCC_INVALID;
3048      switch (IntNo) {
3049        default: break;
3050        case Intrinsic::x86_sse_comieq_ss:
3051        case Intrinsic::x86_sse2_comieq_sd:
3052          Opc = X86ISD::COMI;
3053          CC = ISD::SETEQ;
3054          break;
3055        case Intrinsic::x86_sse_comilt_ss:
3056        case Intrinsic::x86_sse2_comilt_sd:
3057          Opc = X86ISD::COMI;
3058          CC = ISD::SETLT;
3059          break;
3060        case Intrinsic::x86_sse_comile_ss:
3061        case Intrinsic::x86_sse2_comile_sd:
3062          Opc = X86ISD::COMI;
3063          CC = ISD::SETLE;
3064          break;
3065        case Intrinsic::x86_sse_comigt_ss:
3066        case Intrinsic::x86_sse2_comigt_sd:
3067          Opc = X86ISD::COMI;
3068          CC = ISD::SETGT;
3069          break;
3070        case Intrinsic::x86_sse_comige_ss:
3071        case Intrinsic::x86_sse2_comige_sd:
3072          Opc = X86ISD::COMI;
3073          CC = ISD::SETGE;
3074          break;
3075        case Intrinsic::x86_sse_comineq_ss:
3076        case Intrinsic::x86_sse2_comineq_sd:
3077          Opc = X86ISD::COMI;
3078          CC = ISD::SETNE;
3079          break;
3080        case Intrinsic::x86_sse_ucomieq_ss:
3081        case Intrinsic::x86_sse2_ucomieq_sd:
3082          Opc = X86ISD::UCOMI;
3083          CC = ISD::SETEQ;
3084          break;
3085        case Intrinsic::x86_sse_ucomilt_ss:
3086        case Intrinsic::x86_sse2_ucomilt_sd:
3087          Opc = X86ISD::UCOMI;
3088          CC = ISD::SETLT;
3089          break;
3090        case Intrinsic::x86_sse_ucomile_ss:
3091        case Intrinsic::x86_sse2_ucomile_sd:
3092          Opc = X86ISD::UCOMI;
3093          CC = ISD::SETLE;
3094          break;
3095        case Intrinsic::x86_sse_ucomigt_ss:
3096        case Intrinsic::x86_sse2_ucomigt_sd:
3097          Opc = X86ISD::UCOMI;
3098          CC = ISD::SETGT;
3099          break;
3100        case Intrinsic::x86_sse_ucomige_ss:
3101        case Intrinsic::x86_sse2_ucomige_sd:
3102          Opc = X86ISD::UCOMI;
3103          CC = ISD::SETGE;
3104          break;
3105        case Intrinsic::x86_sse_ucomineq_ss:
3106        case Intrinsic::x86_sse2_ucomineq_sd:
3107          Opc = X86ISD::UCOMI;
3108          CC = ISD::SETNE;
3109          break;
3110      }
3111      bool Flip;
3112      unsigned X86CC;
3113      translateX86CC(CC, true, X86CC, Flip);
3114      SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1),
3115                                   Op.getOperand(Flip?1:2));
3116      SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8,
3117                                    DAG.getConstant(X86CC, MVT::i8), Cond);
3118      return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
3119    }
3120    }
3121  }
3122  }
3123}
3124
3125const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3126  switch (Opcode) {
3127  default: return NULL;
3128  case X86ISD::SHLD:               return "X86ISD::SHLD";
3129  case X86ISD::SHRD:               return "X86ISD::SHRD";
3130  case X86ISD::FAND:               return "X86ISD::FAND";
3131  case X86ISD::FXOR:               return "X86ISD::FXOR";
3132  case X86ISD::FILD:               return "X86ISD::FILD";
3133  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
3134  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
3135  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
3136  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
3137  case X86ISD::FLD:                return "X86ISD::FLD";
3138  case X86ISD::FST:                return "X86ISD::FST";
3139  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
3140  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
3141  case X86ISD::CALL:               return "X86ISD::CALL";
3142  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
3143  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
3144  case X86ISD::CMP:                return "X86ISD::CMP";
3145  case X86ISD::TEST:               return "X86ISD::TEST";
3146  case X86ISD::COMI:               return "X86ISD::COMI";
3147  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
3148  case X86ISD::SETCC:              return "X86ISD::SETCC";
3149  case X86ISD::CMOV:               return "X86ISD::CMOV";
3150  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
3151  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
3152  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
3153  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
3154  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
3155  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
3156  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
3157  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
3158  case X86ISD::ZEXT_S2VEC:         return "X86ISD::ZEXT_S2VEC";
3159  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
3160  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
3161  }
3162}
3163
3164void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
3165                                                       uint64_t Mask,
3166                                                       uint64_t &KnownZero,
3167                                                       uint64_t &KnownOne,
3168                                                       unsigned Depth) const {
3169  unsigned Opc = Op.getOpcode();
3170  assert((Opc >= ISD::BUILTIN_OP_END ||
3171          Opc == ISD::INTRINSIC_WO_CHAIN ||
3172          Opc == ISD::INTRINSIC_W_CHAIN ||
3173          Opc == ISD::INTRINSIC_VOID) &&
3174         "Should use MaskedValueIsZero if you don't know whether Op"
3175         " is a target node!");
3176
3177  KnownZero = KnownOne = 0;   // Don't know anything.
3178  switch (Opc) {
3179  default: break;
3180  case X86ISD::SETCC:
3181    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
3182    break;
3183  }
3184}
3185
3186std::vector<unsigned> X86TargetLowering::
3187getRegClassForInlineAsmConstraint(const std::string &Constraint,
3188                                  MVT::ValueType VT) const {
3189  if (Constraint.size() == 1) {
3190    // FIXME: not handling fp-stack yet!
3191    // FIXME: not handling MMX registers yet ('y' constraint).
3192    switch (Constraint[0]) {      // GCC X86 Constraint Letters
3193    default: break;  // Unknown constriant letter
3194    case 'r':   // GENERAL_REGS
3195    case 'R':   // LEGACY_REGS
3196      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
3197                                   X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
3198    case 'l':   // INDEX_REGS
3199      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
3200                                   X86::ESI, X86::EDI, X86::EBP, 0);
3201    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
3202    case 'Q':   // Q_REGS
3203      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0);
3204    case 'x':   // SSE_REGS if SSE1 allowed
3205      if (Subtarget->hasSSE1())
3206        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3207                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
3208                                     0);
3209      return std::vector<unsigned>();
3210    case 'Y':   // SSE_REGS if SSE2 allowed
3211      if (Subtarget->hasSSE2())
3212        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3213                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
3214                                     0);
3215      return std::vector<unsigned>();
3216    }
3217  }
3218
3219  return std::vector<unsigned>();
3220}
3221
3222/// isLegalAddressImmediate - Return true if the integer value or
3223/// GlobalValue can be used as the offset of the target addressing mode.
3224bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
3225  // X86 allows a sign-extended 32-bit immediate field.
3226  return (V > -(1LL << 32) && V < (1LL << 32)-1);
3227}
3228
3229bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
3230  if (Subtarget->isTargetDarwin()) {
3231    Reloc::Model RModel = getTargetMachine().getRelocationModel();
3232    if (RModel == Reloc::Static)
3233      return true;
3234    else if (RModel == Reloc::DynamicNoPIC)
3235      return !DarwinGVRequiresExtraLoad(GV);
3236    else
3237      return false;
3238  } else
3239    return true;
3240}
3241
3242/// isShuffleMaskLegal - Targets can use this to indicate that they only
3243/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
3244/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
3245/// are assumed to be legal.
3246bool
3247X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
3248  // Only do shuffles on 128-bit vector types for now.
3249  if (MVT::getSizeInBits(VT) == 64) return false;
3250  return (Mask.Val->getNumOperands() == 2 ||
3251          isSplatMask(Mask.Val)  ||
3252          X86::isMOVSMask(Mask.Val)   ||
3253          X86::isMOVSHDUPMask(Mask.Val) ||
3254          X86::isMOVSLDUPMask(Mask.Val) ||
3255          X86::isPSHUFDMask(Mask.Val) ||
3256          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
3257          X86::isSHUFPMask(Mask.Val)  ||
3258          X86::isUNPCKLMask(Mask.Val) ||
3259          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
3260          X86::isUNPCKHMask(Mask.Val));
3261}
3262