X86ISelLowering.cpp revision a69571c7991813c93cba64e88eced6899ce93d81
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file was developed by Chris Lattner and is distributed under
6// the University of Illinois Open Source License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86.h"
16#include "X86InstrBuilder.h"
17#include "X86ISelLowering.h"
18#include "X86TargetMachine.h"
19#include "llvm/CallingConv.h"
20#include "llvm/Constants.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/Function.h"
23#include "llvm/Intrinsics.h"
24#include "llvm/ADT/VectorExtras.h"
25#include "llvm/Analysis/ScalarEvolutionExpressions.h"
26#include "llvm/CodeGen/MachineFrameInfo.h"
27#include "llvm/CodeGen/MachineFunction.h"
28#include "llvm/CodeGen/MachineInstrBuilder.h"
29#include "llvm/CodeGen/SelectionDAG.h"
30#include "llvm/CodeGen/SSARegMap.h"
31#include "llvm/Support/MathExtras.h"
32#include "llvm/Target/TargetOptions.h"
33using namespace llvm;
34
35// FIXME: temporary.
36#include "llvm/Support/CommandLine.h"
37static cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
38                                  cl::desc("Enable fastcc on X86"));
39
40X86TargetLowering::X86TargetLowering(TargetMachine &TM)
41  : TargetLowering(TM) {
42  Subtarget = &TM.getSubtarget<X86Subtarget>();
43  X86ScalarSSE = Subtarget->hasSSE2();
44
45  // Set up the TargetLowering object.
46
47  // X86 is weird, it always uses i8 for shift amounts and setcc results.
48  setShiftAmountType(MVT::i8);
49  setSetCCResultType(MVT::i8);
50  setSetCCResultContents(ZeroOrOneSetCCResult);
51  setSchedulingPreference(SchedulingForRegPressure);
52  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
53  setStackPointerRegisterToSaveRestore(X86::ESP);
54
55  if (!Subtarget->isTargetDarwin())
56    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
57    setUseUnderscoreSetJmpLongJmp(true);
58
59  // Add legal addressing mode scale values.
60  addLegalAddressScale(8);
61  addLegalAddressScale(4);
62  addLegalAddressScale(2);
63  // Enter the ones which require both scale + index last. These are more
64  // expensive.
65  addLegalAddressScale(9);
66  addLegalAddressScale(5);
67  addLegalAddressScale(3);
68
69  // Set up the register classes.
70  addRegisterClass(MVT::i8, X86::R8RegisterClass);
71  addRegisterClass(MVT::i16, X86::R16RegisterClass);
72  addRegisterClass(MVT::i32, X86::R32RegisterClass);
73
74  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
75  // operation.
76  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
77  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
78  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
79
80  if (X86ScalarSSE)
81    // No SSE i64 SINT_TO_FP, so expand i32 UINT_TO_FP instead.
82    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Expand);
83  else
84    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
85
86  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
87  // this operation.
88  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
89  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
90  // SSE has no i16 to fp conversion, only i32
91  if (X86ScalarSSE)
92    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
93  else {
94    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
95    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
96  }
97
98  // We can handle SINT_TO_FP and FP_TO_SINT from/to i64 even though i64
99  // isn't legal.
100  setOperationAction(ISD::SINT_TO_FP       , MVT::i64  , Custom);
101  setOperationAction(ISD::FP_TO_SINT       , MVT::i64  , Custom);
102
103  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
104  // this operation.
105  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
106  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
107
108  if (X86ScalarSSE) {
109    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
110  } else {
111    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
112    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
113  }
114
115  // Handle FP_TO_UINT by promoting the destination to a larger signed
116  // conversion.
117  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
118  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
119  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
120
121  if (X86ScalarSSE && !Subtarget->hasSSE3())
122    // Expand FP_TO_UINT into a select.
123    // FIXME: We would like to use a Custom expander here eventually to do
124    // the optimal thing for SSE vs. the default expansion in the legalizer.
125    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Expand);
126  else
127    // With SSE3 we can use fisttpll to convert to a signed i64.
128    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
129
130  setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
131  setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
132
133  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
134  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
135  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
136  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
137  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
138  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
139  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
140  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
141  setOperationAction(ISD::SEXTLOAD         , MVT::i1   , Expand);
142  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
143  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
144  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
145  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
146  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
147  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
148  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
149  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
150  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
151  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
152  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
153  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
154
155  // These should be promoted to a larger select which is supported.
156  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
157  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
158
159  // X86 wants to expand cmov itself.
160  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
161  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
162  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
163  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
164  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
165  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
166  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
167  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
168  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
169  // X86 ret instruction may pop stack.
170  setOperationAction(ISD::RET             , MVT::Other, Custom);
171  // Darwin ABI issue.
172  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
173  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
174  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
175  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
176  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
177  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
178  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
179  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
180  // X86 wants to expand memset / memcpy itself.
181  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
182  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
183
184  // We don't have line number support yet.
185  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
186  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
187  // FIXME - use subtarget debug flags
188  if (!Subtarget->isTargetDarwin())
189    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
190
191  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
192  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
193
194  // Use the default implementation.
195  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
196  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
197  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
198  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
199  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
200  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
201
202  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
203  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
204
205  if (X86ScalarSSE) {
206    // Set up the FP register classes.
207    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
208    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
209
210    // SSE has no load+extend ops
211    setOperationAction(ISD::EXTLOAD,  MVT::f32, Expand);
212    setOperationAction(ISD::ZEXTLOAD, MVT::f32, Expand);
213
214    // Use ANDPD to simulate FABS.
215    setOperationAction(ISD::FABS , MVT::f64, Custom);
216    setOperationAction(ISD::FABS , MVT::f32, Custom);
217
218    // Use XORP to simulate FNEG.
219    setOperationAction(ISD::FNEG , MVT::f64, Custom);
220    setOperationAction(ISD::FNEG , MVT::f32, Custom);
221
222    // We don't support sin/cos/fmod
223    setOperationAction(ISD::FSIN , MVT::f64, Expand);
224    setOperationAction(ISD::FCOS , MVT::f64, Expand);
225    setOperationAction(ISD::FREM , MVT::f64, Expand);
226    setOperationAction(ISD::FSIN , MVT::f32, Expand);
227    setOperationAction(ISD::FCOS , MVT::f32, Expand);
228    setOperationAction(ISD::FREM , MVT::f32, Expand);
229
230    // Expand FP immediates into loads from the stack, except for the special
231    // cases we handle.
232    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
233    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
234    addLegalFPImmediate(+0.0); // xorps / xorpd
235  } else {
236    // Set up the FP register classes.
237    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
238
239    setOperationAction(ISD::UNDEF, MVT::f64, Expand);
240
241    if (!UnsafeFPMath) {
242      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
243      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
244    }
245
246    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
247    addLegalFPImmediate(+0.0); // FLD0
248    addLegalFPImmediate(+1.0); // FLD1
249    addLegalFPImmediate(-0.0); // FLD0/FCHS
250    addLegalFPImmediate(-1.0); // FLD1/FCHS
251  }
252
253  // First set operation action for all vector types to expand. Then we
254  // will selectively turn on ones that can be effectively codegen'd.
255  for (unsigned VT = (unsigned)MVT::Vector + 1;
256       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
257    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
258    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
259    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
260    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
261    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
262    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
263    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
264  }
265
266  if (Subtarget->hasMMX()) {
267    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
268    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
269    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
270
271    // FIXME: add MMX packed arithmetics
272    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
273    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
274    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
275  }
276
277  if (Subtarget->hasSSE1()) {
278    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
279
280    setOperationAction(ISD::AND,                MVT::v4f32, Legal);
281    setOperationAction(ISD::OR,                 MVT::v4f32, Legal);
282    setOperationAction(ISD::XOR,                MVT::v4f32, Legal);
283    setOperationAction(ISD::ADD,                MVT::v4f32, Legal);
284    setOperationAction(ISD::SUB,                MVT::v4f32, Legal);
285    setOperationAction(ISD::MUL,                MVT::v4f32, Legal);
286    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
287    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
288    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
289    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
290    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
291  }
292
293  if (Subtarget->hasSSE2()) {
294    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
295    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
296    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
297    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
298    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
299
300    setOperationAction(ISD::ADD,                MVT::v2f64, Legal);
301    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
302    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
303    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
304    setOperationAction(ISD::SUB,                MVT::v2f64, Legal);
305    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
306    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
307    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
308    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
309    setOperationAction(ISD::MUL,                MVT::v2f64, Legal);
310
311    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
312    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
313    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
314    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
315    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
316    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
317
318    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
319    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
320      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
321      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
322      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
323    }
324    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
325    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
326    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
327    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
328    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
329    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
330
331    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
332    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
333      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
334      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
335      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
336      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
337      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
338      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
339      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
340      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
341      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
342      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
343    }
344
345    // Custom lower v2i64 and v2f64 selects.
346    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
347    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
348    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
349    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
350  }
351
352  // We want to custom lower some of our intrinsics.
353  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
354
355  computeRegisterProperties();
356
357  // FIXME: These should be based on subtarget info. Plus, the values should
358  // be smaller when we are in optimizing for size mode.
359  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
360  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
361  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
362  allowUnalignedMemoryAccesses = true; // x86 supports it!
363}
364
365std::vector<SDOperand>
366X86TargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) {
367  std::vector<SDOperand> Args = TargetLowering::LowerArguments(F, DAG);
368
369  FormalArgs.clear();
370  FormalArgLocs.clear();
371
372  // This sets BytesToPopOnReturn, BytesCallerReserves, etc. which have to be set
373  // before the rest of the function can be lowered.
374  if (F.getCallingConv() == CallingConv::Fast && EnableFastCC)
375    PreprocessFastCCArguments(Args, F, DAG);
376  else
377    PreprocessCCCArguments(Args, F, DAG);
378  return Args;
379}
380
381std::pair<SDOperand, SDOperand>
382X86TargetLowering::LowerCallTo(SDOperand Chain, const Type *RetTy,
383                               bool isVarArg, unsigned CallingConv,
384                               bool isTailCall,
385                               SDOperand Callee, ArgListTy &Args,
386                               SelectionDAG &DAG) {
387  assert((!isVarArg || CallingConv == CallingConv::C) &&
388         "Only C takes varargs!");
389
390  // If the callee is a GlobalAddress node (quite common, every direct call is)
391  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
392  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
393    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
394  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
395    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
396
397  if (CallingConv == CallingConv::Fast && EnableFastCC)
398    return LowerFastCCCallTo(Chain, RetTy, isTailCall, Callee, Args, DAG);
399  return  LowerCCCCallTo(Chain, RetTy, isVarArg, isTailCall, Callee, Args, DAG);
400}
401
402//===----------------------------------------------------------------------===//
403//                    C Calling Convention implementation
404//===----------------------------------------------------------------------===//
405
406/// AddLiveIn - This helper function adds the specified physical register to the
407/// MachineFunction as a live in value.  It also creates a corresponding virtual
408/// register for it.
409static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
410                          TargetRegisterClass *RC) {
411  assert(RC->contains(PReg) && "Not the correct regclass!");
412  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
413  MF.addLiveIn(PReg, VReg);
414  return VReg;
415}
416
417/// HowToPassCCCArgument - Returns how an formal argument of the specified type
418/// should be passed. If it is through stack, returns the size of the stack
419/// frame; if it is through XMM register, returns the number of XMM registers
420/// are needed.
421static void
422HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs,
423                     unsigned &ObjSize, unsigned &ObjXMMRegs) {
424  switch (ObjectVT) {
425  default: assert(0 && "Unhandled argument type!");
426  case MVT::i1:
427  case MVT::i8:  ObjSize = 1; break;
428  case MVT::i16: ObjSize = 2; break;
429  case MVT::i32: ObjSize = 4; break;
430  case MVT::i64: ObjSize = 8; break;
431  case MVT::f32: ObjSize = 4; break;
432  case MVT::f64: ObjSize = 8; break;
433  case MVT::v16i8:
434  case MVT::v8i16:
435  case MVT::v4i32:
436  case MVT::v2i64:
437  case MVT::v4f32:
438  case MVT::v2f64:
439    if (NumXMMRegs < 3)
440      ObjXMMRegs = 1;
441    else
442      ObjSize = 16;
443    break;
444  }
445}
446
447/// getFormalArgObjects - Returns itself if Op is a FORMAL_ARGUMENTS, otherwise
448/// returns the FORMAL_ARGUMENTS node(s) that made up parts of the node.
449static std::vector<SDOperand> getFormalArgObjects(SDOperand Op) {
450  unsigned Opc = Op.getOpcode();
451  std::vector<SDOperand> Objs;
452  if (Opc == ISD::TRUNCATE) {
453    Op = Op.getOperand(0);
454    assert(Op.getOpcode() == ISD::AssertSext ||
455           Op.getOpcode() == ISD::AssertZext);
456    Objs.push_back(Op.getOperand(0));
457  } else if (Opc == ISD::FP_ROUND || Opc == ISD::VBIT_CONVERT) {
458    Objs.push_back(Op.getOperand(0));
459  } else if (Opc == ISD::BUILD_PAIR) {
460    Objs.push_back(Op.getOperand(0));
461    Objs.push_back(Op.getOperand(1));
462  } else {
463    Objs.push_back(Op);
464  }
465  return Objs;
466}
467
468void X86TargetLowering::PreprocessCCCArguments(std::vector<SDOperand>Args,
469                                               Function &F, SelectionDAG &DAG) {
470  unsigned NumArgs = Args.size();
471  MachineFunction &MF = DAG.getMachineFunction();
472  MachineFrameInfo *MFI = MF.getFrameInfo();
473
474  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
475  // the stack frame looks like this:
476  //
477  // [ESP] -- return address
478  // [ESP + 4] -- first argument (leftmost lexically)
479  // [ESP + 8] -- second argument, if first argument is four bytes in size
480  //    ...
481  //
482  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
483  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
484  unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2 };
485  for (unsigned i = 0; i < NumArgs; ++i) {
486    SDOperand Op = Args[i];
487    std::vector<SDOperand> Objs = getFormalArgObjects(Op);
488    for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end();
489         I != E; ++I) {
490      SDOperand Obj = *I;
491      MVT::ValueType ObjectVT = Obj.getValueType();
492      unsigned ArgIncrement = 4;
493      unsigned ObjSize = 0;
494      unsigned ObjXMMRegs = 0;
495      HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs);
496      if (ObjSize >= 8)
497        ArgIncrement = ObjSize;
498
499      if (ObjXMMRegs) {
500        // Passed in a XMM register.
501        unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
502                                 X86::VR128RegisterClass);
503        std::pair<FALocInfo, FALocInfo> Loc =
504          std::make_pair(FALocInfo(FALocInfo::LiveInRegLoc, Reg, ObjectVT),
505                         FALocInfo());
506        FormalArgLocs.push_back(Loc);
507        NumXMMRegs += ObjXMMRegs;
508      } else {
509        // Create the frame index object for this incoming parameter...
510        int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
511        std::pair<FALocInfo, FALocInfo> Loc =
512          std::make_pair(FALocInfo(FALocInfo::StackFrameLoc, FI), FALocInfo());
513        FormalArgLocs.push_back(Loc);
514        ArgOffset += ArgIncrement;   // Move on to the next argument...
515      }
516    }
517  }
518
519  // If the function takes variable number of arguments, make a frame index for
520  // the start of the first vararg value... for expansion of llvm.va_start.
521  if (F.isVarArg())
522    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
523  ReturnAddrIndex = 0;     // No return address slot generated yet.
524  BytesToPopOnReturn = 0;  // Callee pops nothing.
525  BytesCallerReserves = ArgOffset;
526}
527
528void X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) {
529  unsigned NumArgs = Op.Val->getNumValues();
530  MachineFunction &MF = DAG.getMachineFunction();
531  MachineFrameInfo *MFI = MF.getFrameInfo();
532
533  for (unsigned i = 0; i < NumArgs; ++i) {
534    std::pair<FALocInfo, FALocInfo> Loc = FormalArgLocs[i];
535    SDOperand ArgValue;
536    if (Loc.first.Kind == FALocInfo::StackFrameLoc) {
537      // Create the SelectionDAG nodes corresponding to a load from this parameter
538      unsigned FI = FormalArgLocs[i].first.Loc;
539      SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32);
540      ArgValue = DAG.getLoad(Op.Val->getValueType(i),
541                             DAG.getEntryNode(), FIN, DAG.getSrcValue(NULL));
542    } else {
543      // Must be a CopyFromReg
544      ArgValue= DAG.getCopyFromReg(DAG.getEntryNode(), Loc.first.Loc,
545                                   Loc.first.Typ);
546    }
547    FormalArgs.push_back(ArgValue);
548  }
549}
550
551std::pair<SDOperand, SDOperand>
552X86TargetLowering::LowerCCCCallTo(SDOperand Chain, const Type *RetTy,
553                                  bool isVarArg, bool isTailCall,
554                                  SDOperand Callee, ArgListTy &Args,
555                                  SelectionDAG &DAG) {
556  // Count how many bytes are to be pushed on the stack.
557  unsigned NumBytes = 0;
558
559  // Keep track of the number of XMM regs passed so far.
560  unsigned NumXMMRegs = 0;
561  unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2 };
562
563  std::vector<SDOperand> RegValuesToPass;
564  if (Args.empty()) {
565    // Save zero bytes.
566    Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(0, getPointerTy()));
567  } else {
568    for (unsigned i = 0, e = Args.size(); i != e; ++i)
569      switch (getValueType(Args[i].second)) {
570      default: assert(0 && "Unknown value type!");
571      case MVT::i1:
572      case MVT::i8:
573      case MVT::i16:
574      case MVT::i32:
575      case MVT::f32:
576        NumBytes += 4;
577        break;
578      case MVT::i64:
579      case MVT::f64:
580        NumBytes += 8;
581        break;
582      case MVT::Vector:
583        if (NumXMMRegs < 3)
584          ++NumXMMRegs;
585        else
586          NumBytes += 16;
587        break;
588      }
589
590    Chain = DAG.getCALLSEQ_START(Chain,
591                                 DAG.getConstant(NumBytes, getPointerTy()));
592
593    // Arguments go on the stack in reverse order, as specified by the ABI.
594    unsigned ArgOffset = 0;
595    NumXMMRegs = 0;
596    SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
597    std::vector<SDOperand> Stores;
598    for (unsigned i = 0, e = Args.size(); i != e; ++i) {
599      switch (getValueType(Args[i].second)) {
600      default: assert(0 && "Unexpected ValueType for argument!");
601      case MVT::i1:
602      case MVT::i8:
603      case MVT::i16:
604        // Promote the integer to 32 bits.  If the input type is signed use a
605        // sign extend, otherwise use a zero extend.
606        if (Args[i].second->isSigned())
607          Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first);
608        else
609          Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first);
610
611        // FALL THROUGH
612      case MVT::i32:
613      case MVT::f32: {
614        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
615        PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
616        Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
617                                     Args[i].first, PtrOff,
618                                     DAG.getSrcValue(NULL)));
619        ArgOffset += 4;
620        break;
621      }
622      case MVT::i64:
623      case MVT::f64: {
624        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
625        PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
626        Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
627                                     Args[i].first, PtrOff,
628                                     DAG.getSrcValue(NULL)));
629        ArgOffset += 8;
630        break;
631      }
632      case MVT::Vector:
633        if (NumXMMRegs < 3) {
634          RegValuesToPass.push_back(Args[i].first);
635          NumXMMRegs++;
636        } else {
637          SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
638          PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
639          Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
640                                       Args[i].first, PtrOff,
641                                       DAG.getSrcValue(NULL)));
642          ArgOffset += 16;
643        }
644      }
645    }
646  if (!Stores.empty())
647    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
648  }
649
650  std::vector<MVT::ValueType> RetVals;
651  MVT::ValueType RetTyVT = getValueType(RetTy);
652  RetVals.push_back(MVT::Other);
653
654  // The result values produced have to be legal.  Promote the result.
655  switch (RetTyVT) {
656  case MVT::isVoid: break;
657  default:
658    RetVals.push_back(RetTyVT);
659    break;
660  case MVT::i1:
661  case MVT::i8:
662  case MVT::i16:
663    RetVals.push_back(MVT::i32);
664    break;
665  case MVT::f32:
666    if (X86ScalarSSE)
667      RetVals.push_back(MVT::f32);
668    else
669      RetVals.push_back(MVT::f64);
670    break;
671  case MVT::i64:
672    RetVals.push_back(MVT::i32);
673    RetVals.push_back(MVT::i32);
674    break;
675  }
676
677  // Build a sequence of copy-to-reg nodes chained together with token chain
678  // and flag operands which copy the outgoing args into registers.
679  SDOperand InFlag;
680  for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
681    unsigned CCReg = XMMArgRegs[i];
682    SDOperand RegToPass = RegValuesToPass[i];
683    assert(RegToPass.getValueType() == MVT::Vector);
684    unsigned NumElems = cast<ConstantSDNode>(*(RegToPass.Val->op_end()-2))->getValue();
685    MVT::ValueType EVT = cast<VTSDNode>(*(RegToPass.Val->op_end()-1))->getVT();
686    MVT::ValueType PVT = getVectorType(EVT, NumElems);
687    SDOperand CCRegNode = DAG.getRegister(CCReg, PVT);
688    RegToPass = DAG.getNode(ISD::VBIT_CONVERT, PVT, RegToPass);
689    Chain = DAG.getCopyToReg(Chain, CCRegNode, RegToPass, InFlag);
690    InFlag = Chain.getValue(1);
691  }
692
693  std::vector<MVT::ValueType> NodeTys;
694  NodeTys.push_back(MVT::Other);   // Returns a chain
695  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
696  std::vector<SDOperand> Ops;
697  Ops.push_back(Chain);
698  Ops.push_back(Callee);
699  if (InFlag.Val)
700    Ops.push_back(InFlag);
701
702  // FIXME: Do not generate X86ISD::TAILCALL for now.
703  Chain = DAG.getNode(X86ISD::CALL, NodeTys, Ops);
704  InFlag = Chain.getValue(1);
705
706  NodeTys.clear();
707  NodeTys.push_back(MVT::Other);   // Returns a chain
708  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
709  Ops.clear();
710  Ops.push_back(Chain);
711  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
712  Ops.push_back(DAG.getConstant(0, getPointerTy()));
713  Ops.push_back(InFlag);
714  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
715  InFlag = Chain.getValue(1);
716
717  SDOperand RetVal;
718  if (RetTyVT != MVT::isVoid) {
719    switch (RetTyVT) {
720    default: assert(0 && "Unknown value type to return!");
721    case MVT::i1:
722    case MVT::i8:
723      RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
724      Chain = RetVal.getValue(1);
725      if (RetTyVT == MVT::i1)
726        RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
727      break;
728    case MVT::i16:
729      RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
730      Chain = RetVal.getValue(1);
731      break;
732    case MVT::i32:
733      RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
734      Chain = RetVal.getValue(1);
735      break;
736    case MVT::i64: {
737      SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
738      SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32,
739                                        Lo.getValue(2));
740      RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
741      Chain = Hi.getValue(1);
742      break;
743    }
744    case MVT::f32:
745    case MVT::f64: {
746      std::vector<MVT::ValueType> Tys;
747      Tys.push_back(MVT::f64);
748      Tys.push_back(MVT::Other);
749      Tys.push_back(MVT::Flag);
750      std::vector<SDOperand> Ops;
751      Ops.push_back(Chain);
752      Ops.push_back(InFlag);
753      RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
754      Chain  = RetVal.getValue(1);
755      InFlag = RetVal.getValue(2);
756      if (X86ScalarSSE) {
757        // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
758        // shouldn't be necessary except that RFP cannot be live across
759        // multiple blocks. When stackifier is fixed, they can be uncoupled.
760        MachineFunction &MF = DAG.getMachineFunction();
761        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
762        SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
763        Tys.clear();
764        Tys.push_back(MVT::Other);
765        Ops.clear();
766        Ops.push_back(Chain);
767        Ops.push_back(RetVal);
768        Ops.push_back(StackSlot);
769        Ops.push_back(DAG.getValueType(RetTyVT));
770        Ops.push_back(InFlag);
771        Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
772        RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
773                             DAG.getSrcValue(NULL));
774        Chain = RetVal.getValue(1);
775      }
776
777      if (RetTyVT == MVT::f32 && !X86ScalarSSE)
778        // FIXME: we would really like to remember that this FP_ROUND
779        // operation is okay to eliminate if we allow excess FP precision.
780        RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
781      break;
782    }
783    case MVT::Vector: {
784      const PackedType *PTy = cast<PackedType>(RetTy);
785      MVT::ValueType EVT;
786      MVT::ValueType LVT;
787      unsigned NumRegs = getPackedTypeBreakdown(PTy, EVT, LVT);
788      assert(NumRegs == 1 && "Unsupported type!");
789      RetVal = DAG.getCopyFromReg(Chain, X86::XMM0, EVT, InFlag);
790      Chain = RetVal.getValue(1);
791      break;
792    }
793    }
794  }
795
796  return std::make_pair(RetVal, Chain);
797}
798
799//===----------------------------------------------------------------------===//
800//                    Fast Calling Convention implementation
801//===----------------------------------------------------------------------===//
802//
803// The X86 'fast' calling convention passes up to two integer arguments in
804// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
805// and requires that the callee pop its arguments off the stack (allowing proper
806// tail calls), and has the same return value conventions as C calling convs.
807//
808// This calling convention always arranges for the callee pop value to be 8n+4
809// bytes, which is needed for tail recursion elimination and stack alignment
810// reasons.
811//
812// Note that this can be enhanced in the future to pass fp vals in registers
813// (when we have a global fp allocator) and do other tricks.
814//
815
816// FASTCC_NUM_INT_ARGS_INREGS - This is the max number of integer arguments
817// to pass in registers.  0 is none, 1 is is "use EAX", 2 is "use EAX and
818// EDX".  Anything more is illegal.
819//
820// FIXME: The linscan register allocator currently has problem with
821// coalescing.  At the time of this writing, whenever it decides to coalesce
822// a physreg with a virtreg, this increases the size of the physreg's live
823// range, and the live range cannot ever be reduced.  This causes problems if
824// too many physregs are coaleced with virtregs, which can cause the register
825// allocator to wedge itself.
826//
827// This code triggers this problem more often if we pass args in registers,
828// so disable it until this is fixed.
829//
830// NOTE: this isn't marked const, so that GCC doesn't emit annoying warnings
831// about code being dead.
832//
833static unsigned FASTCC_NUM_INT_ARGS_INREGS = 0;
834
835
836/// HowToPassFastCCArgument - Returns how an formal argument of the specified
837/// type should be passed. If it is through stack, returns the size of the stack
838/// frame; if it is through integer or XMM register, returns the number of
839/// integer or XMM registers are needed.
840static void
841HowToPassFastCCArgument(MVT::ValueType ObjectVT,
842                        unsigned NumIntRegs, unsigned NumXMMRegs,
843                        unsigned &ObjSize, unsigned &ObjIntRegs,
844                        unsigned &ObjXMMRegs) {
845  ObjSize = 0;
846  NumIntRegs = 0;
847
848  switch (ObjectVT) {
849  default: assert(0 && "Unhandled argument type!");
850  case MVT::i1:
851  case MVT::i8:
852    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
853      ObjIntRegs = 1;
854    else
855      ObjSize = 1;
856    break;
857  case MVT::i16:
858    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
859      ObjIntRegs = 1;
860    else
861      ObjSize = 2;
862    break;
863  case MVT::i32:
864    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
865      ObjIntRegs = 1;
866    else
867      ObjSize = 4;
868    break;
869  case MVT::i64:
870    if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
871      ObjIntRegs = 2;
872    } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
873      ObjIntRegs = 1;
874      ObjSize = 4;
875    } else
876      ObjSize = 8;
877  case MVT::f32:
878    ObjSize = 4;
879    break;
880  case MVT::f64:
881    ObjSize = 8;
882    break;
883  case MVT::v16i8:
884  case MVT::v8i16:
885  case MVT::v4i32:
886  case MVT::v2i64:
887  case MVT::v4f32:
888  case MVT::v2f64:
889    if (NumXMMRegs < 3)
890      ObjXMMRegs = 1;
891    else
892      ObjSize = 16;
893    break;
894  }
895}
896
897void
898X86TargetLowering::PreprocessFastCCArguments(std::vector<SDOperand>Args,
899                                             Function &F, SelectionDAG &DAG) {
900  unsigned NumArgs = Args.size();
901  MachineFunction &MF = DAG.getMachineFunction();
902  MachineFrameInfo *MFI = MF.getFrameInfo();
903
904  // Add DAG nodes to load the arguments...  On entry to a function the stack
905  // frame looks like this:
906  //
907  // [ESP] -- return address
908  // [ESP + 4] -- first nonreg argument (leftmost lexically)
909  // [ESP + 8] -- second nonreg argument, if first argument is 4 bytes in size
910  //    ...
911  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
912
913  // Keep track of the number of integer regs passed so far.  This can be either
914  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
915  // used).
916  unsigned NumIntRegs = 0;
917  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
918  unsigned XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2 };
919
920  for (unsigned i = 0; i < NumArgs; ++i) {
921    SDOperand Op = Args[i];
922    std::vector<SDOperand> Objs = getFormalArgObjects(Op);
923    for (std::vector<SDOperand>::iterator I = Objs.begin(), E = Objs.end();
924         I != E; ++I) {
925      SDOperand Obj = *I;
926      MVT::ValueType ObjectVT = Obj.getValueType();
927      unsigned ArgIncrement = 4;
928      unsigned ObjSize = 0;
929      unsigned ObjIntRegs = 0;
930      unsigned ObjXMMRegs = 0;
931
932      HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
933                              ObjSize, ObjIntRegs, ObjXMMRegs);
934      if (ObjSize >= 8)
935        ArgIncrement = ObjSize;
936
937      unsigned Reg;
938      std::pair<FALocInfo,FALocInfo> Loc = std::make_pair(FALocInfo(),
939                                                          FALocInfo());
940      if (ObjIntRegs) {
941        switch (ObjectVT) {
942        default: assert(0 && "Unhandled argument type!");
943        case MVT::i1:
944        case MVT::i8:
945          Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
946                          X86::R8RegisterClass);
947          Loc.first.Kind = FALocInfo::LiveInRegLoc;
948          Loc.first.Loc = Reg;
949          Loc.first.Typ = MVT::i8;
950          break;
951        case MVT::i16:
952          Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
953                          X86::R16RegisterClass);
954          Loc.first.Kind = FALocInfo::LiveInRegLoc;
955          Loc.first.Loc = Reg;
956          Loc.first.Typ = MVT::i16;
957          break;
958        case MVT::i32:
959          Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
960                          X86::R32RegisterClass);
961          Loc.first.Kind = FALocInfo::LiveInRegLoc;
962          Loc.first.Loc = Reg;
963          Loc.first.Typ = MVT::i32;
964          break;
965        case MVT::i64:
966          Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
967                          X86::R32RegisterClass);
968          Loc.first.Kind = FALocInfo::LiveInRegLoc;
969          Loc.first.Loc = Reg;
970          Loc.first.Typ = MVT::i32;
971          if (ObjIntRegs == 2) {
972            Reg = AddLiveIn(MF, X86::EDX, X86::R32RegisterClass);
973            Loc.second.Kind = FALocInfo::LiveInRegLoc;
974            Loc.second.Loc = Reg;
975            Loc.second.Typ = MVT::i32;
976          }
977          break;
978        case MVT::v16i8:
979        case MVT::v8i16:
980        case MVT::v4i32:
981        case MVT::v2i64:
982        case MVT::v4f32:
983        case MVT::v2f64:
984          Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
985          Loc.first.Kind = FALocInfo::LiveInRegLoc;
986          Loc.first.Loc = Reg;
987          Loc.first.Typ = ObjectVT;
988          break;
989        }
990        NumIntRegs += ObjIntRegs;
991        NumXMMRegs += ObjXMMRegs;
992      }
993      if (ObjSize) {
994        int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
995        if (ObjectVT == MVT::i64 && ObjIntRegs) {
996          Loc.second.Kind = FALocInfo::StackFrameLoc;
997          Loc.second.Loc = FI;
998        } else {
999          Loc.first.Kind = FALocInfo::StackFrameLoc;
1000          Loc.first.Loc = FI;
1001        }
1002        ArgOffset += ArgIncrement;   // Move on to the next argument.
1003      }
1004
1005      FormalArgLocs.push_back(Loc);
1006    }
1007  }
1008
1009  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1010  // arguments and the arguments after the retaddr has been pushed are aligned.
1011  if ((ArgOffset & 7) == 0)
1012    ArgOffset += 4;
1013
1014  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
1015  ReturnAddrIndex = 0;             // No return address slot generated yet.
1016  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
1017  BytesCallerReserves = 0;
1018
1019  // Finally, inform the code generator which regs we return values in.
1020  switch (getValueType(F.getReturnType())) {
1021  default: assert(0 && "Unknown type!");
1022  case MVT::isVoid: break;
1023  case MVT::i1:
1024  case MVT::i8:
1025  case MVT::i16:
1026  case MVT::i32:
1027    MF.addLiveOut(X86::EAX);
1028    break;
1029  case MVT::i64:
1030    MF.addLiveOut(X86::EAX);
1031    MF.addLiveOut(X86::EDX);
1032    break;
1033  case MVT::f32:
1034  case MVT::f64:
1035    MF.addLiveOut(X86::ST0);
1036    break;
1037  case MVT::Vector: {
1038    const PackedType *PTy = cast<PackedType>(F.getReturnType());
1039    MVT::ValueType EVT;
1040    MVT::ValueType LVT;
1041    unsigned NumRegs = getPackedTypeBreakdown(PTy, EVT, LVT);
1042    assert(NumRegs == 1 && "Unsupported type!");
1043    MF.addLiveOut(X86::XMM0);
1044    break;
1045  }
1046  }
1047}
1048
1049void
1050X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
1051  unsigned NumArgs = Op.Val->getNumValues();
1052  MachineFunction &MF = DAG.getMachineFunction();
1053  MachineFrameInfo *MFI = MF.getFrameInfo();
1054
1055  for (unsigned i = 0; i < NumArgs; ++i) {
1056    MVT::ValueType VT = Op.Val->getValueType(i);
1057    std::pair<FALocInfo, FALocInfo> Loc = FormalArgLocs[i];
1058    SDOperand ArgValue;
1059    if (Loc.first.Kind == FALocInfo::StackFrameLoc) {
1060      // Create the SelectionDAG nodes corresponding to a load from this parameter
1061      SDOperand FIN = DAG.getFrameIndex(Loc.first.Loc, MVT::i32);
1062      ArgValue = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN,
1063                             DAG.getSrcValue(NULL));
1064    } else {
1065      // Must be a CopyFromReg
1066      ArgValue= DAG.getCopyFromReg(DAG.getEntryNode(), Loc.first.Loc,
1067                                   Loc.first.Typ);
1068    }
1069
1070    if (Loc.second.Kind != FALocInfo::None) {
1071      SDOperand ArgValue2;
1072      if (Loc.second.Kind == FALocInfo::StackFrameLoc) {
1073        // Create the SelectionDAG nodes corresponding to a load from this parameter
1074        SDOperand FIN = DAG.getFrameIndex(Loc.second.Loc, MVT::i32);
1075        ArgValue2 = DAG.getLoad(Op.Val->getValueType(i),DAG.getEntryNode(), FIN,
1076                                DAG.getSrcValue(NULL));
1077      } else {
1078        // Must be a CopyFromReg
1079        ArgValue2 = DAG.getCopyFromReg(DAG.getEntryNode(),
1080                                       Loc.second.Loc, Loc.second.Typ);
1081      }
1082      ArgValue = DAG.getNode(ISD::BUILD_PAIR, VT, ArgValue, ArgValue2);
1083    }
1084    FormalArgs.push_back(ArgValue);
1085  }
1086}
1087
1088std::pair<SDOperand, SDOperand>
1089X86TargetLowering::LowerFastCCCallTo(SDOperand Chain, const Type *RetTy,
1090                                     bool isTailCall, SDOperand Callee,
1091                                     ArgListTy &Args, SelectionDAG &DAG) {
1092  // Count how many bytes are to be pushed on the stack.
1093  unsigned NumBytes = 0;
1094
1095  // Keep track of the number of integer regs passed so far.  This can be either
1096  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
1097  // used).
1098  unsigned NumIntRegs = 0;
1099
1100  for (unsigned i = 0, e = Args.size(); i != e; ++i)
1101    switch (getValueType(Args[i].second)) {
1102    default: assert(0 && "Unknown value type!");
1103    case MVT::i1:
1104    case MVT::i8:
1105    case MVT::i16:
1106    case MVT::i32:
1107      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1108        ++NumIntRegs;
1109        break;
1110      }
1111      // fall through
1112    case MVT::f32:
1113      NumBytes += 4;
1114      break;
1115    case MVT::i64:
1116      if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
1117        NumIntRegs += 2;
1118        break;
1119      } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
1120        NumIntRegs = FASTCC_NUM_INT_ARGS_INREGS;
1121        NumBytes += 4;
1122        break;
1123      }
1124
1125      // fall through
1126    case MVT::f64:
1127      NumBytes += 8;
1128      break;
1129    }
1130
1131  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1132  // arguments and the arguments after the retaddr has been pushed are aligned.
1133  if ((NumBytes & 7) == 0)
1134    NumBytes += 4;
1135
1136  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1137
1138  // Arguments go on the stack in reverse order, as specified by the ABI.
1139  unsigned ArgOffset = 0;
1140  SDOperand StackPtr = DAG.getRegister(X86::ESP, MVT::i32);
1141  NumIntRegs = 0;
1142  std::vector<SDOperand> Stores;
1143  std::vector<SDOperand> RegValuesToPass;
1144  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
1145    switch (getValueType(Args[i].second)) {
1146    default: assert(0 && "Unexpected ValueType for argument!");
1147    case MVT::i1:
1148      Args[i].first = DAG.getNode(ISD::ANY_EXTEND, MVT::i8, Args[i].first);
1149      // Fall through.
1150    case MVT::i8:
1151    case MVT::i16:
1152    case MVT::i32:
1153      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1154        RegValuesToPass.push_back(Args[i].first);
1155        ++NumIntRegs;
1156        break;
1157      }
1158      // Fall through
1159    case MVT::f32: {
1160      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1161      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1162      Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1163                                   Args[i].first, PtrOff,
1164                                   DAG.getSrcValue(NULL)));
1165      ArgOffset += 4;
1166      break;
1167    }
1168    case MVT::i64:
1169       // Can pass (at least) part of it in regs?
1170      if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1171        SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1172                                   Args[i].first, DAG.getConstant(1, MVT::i32));
1173        SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32,
1174                                   Args[i].first, DAG.getConstant(0, MVT::i32));
1175        RegValuesToPass.push_back(Lo);
1176        ++NumIntRegs;
1177
1178        // Pass both parts in regs?
1179        if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS) {
1180          RegValuesToPass.push_back(Hi);
1181          ++NumIntRegs;
1182        } else {
1183          // Pass the high part in memory.
1184          SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1185          PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1186          Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1187                                       Hi, PtrOff, DAG.getSrcValue(NULL)));
1188          ArgOffset += 4;
1189        }
1190        break;
1191      }
1192      // Fall through
1193    case MVT::f64:
1194      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1195      PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff);
1196      Stores.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain,
1197                                   Args[i].first, PtrOff,
1198                                   DAG.getSrcValue(NULL)));
1199      ArgOffset += 8;
1200      break;
1201    }
1202  }
1203  if (!Stores.empty())
1204    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, Stores);
1205
1206  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1207  // arguments and the arguments after the retaddr has been pushed are aligned.
1208  if ((ArgOffset & 7) == 0)
1209    ArgOffset += 4;
1210
1211  std::vector<MVT::ValueType> RetVals;
1212  MVT::ValueType RetTyVT = getValueType(RetTy);
1213
1214  RetVals.push_back(MVT::Other);
1215
1216  // The result values produced have to be legal.  Promote the result.
1217  switch (RetTyVT) {
1218  case MVT::isVoid: break;
1219  default:
1220    RetVals.push_back(RetTyVT);
1221    break;
1222  case MVT::i1:
1223  case MVT::i8:
1224  case MVT::i16:
1225    RetVals.push_back(MVT::i32);
1226    break;
1227  case MVT::f32:
1228    if (X86ScalarSSE)
1229      RetVals.push_back(MVT::f32);
1230    else
1231      RetVals.push_back(MVT::f64);
1232    break;
1233  case MVT::i64:
1234    RetVals.push_back(MVT::i32);
1235    RetVals.push_back(MVT::i32);
1236    break;
1237  }
1238
1239  // Build a sequence of copy-to-reg nodes chained together with token chain
1240  // and flag operands which copy the outgoing args into registers.
1241  SDOperand InFlag;
1242  for (unsigned i = 0, e = RegValuesToPass.size(); i != e; ++i) {
1243    unsigned CCReg;
1244    SDOperand RegToPass = RegValuesToPass[i];
1245    switch (RegToPass.getValueType()) {
1246    default: assert(0 && "Bad thing to pass in regs");
1247    case MVT::i8:
1248      CCReg = (i == 0) ? X86::AL  : X86::DL;
1249      break;
1250    case MVT::i16:
1251      CCReg = (i == 0) ? X86::AX  : X86::DX;
1252      break;
1253    case MVT::i32:
1254      CCReg = (i == 0) ? X86::EAX : X86::EDX;
1255      break;
1256    }
1257
1258    Chain = DAG.getCopyToReg(Chain, CCReg, RegToPass, InFlag);
1259    InFlag = Chain.getValue(1);
1260  }
1261
1262  std::vector<MVT::ValueType> NodeTys;
1263  NodeTys.push_back(MVT::Other);   // Returns a chain
1264  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1265  std::vector<SDOperand> Ops;
1266  Ops.push_back(Chain);
1267  Ops.push_back(Callee);
1268  if (InFlag.Val)
1269    Ops.push_back(InFlag);
1270
1271  // FIXME: Do not generate X86ISD::TAILCALL for now.
1272  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL, NodeTys, Ops);
1273  InFlag = Chain.getValue(1);
1274
1275  NodeTys.clear();
1276  NodeTys.push_back(MVT::Other);   // Returns a chain
1277  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1278  Ops.clear();
1279  Ops.push_back(Chain);
1280  Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
1281  Ops.push_back(DAG.getConstant(ArgOffset, getPointerTy()));
1282  Ops.push_back(InFlag);
1283  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, Ops);
1284  InFlag = Chain.getValue(1);
1285
1286  SDOperand RetVal;
1287  if (RetTyVT != MVT::isVoid) {
1288    switch (RetTyVT) {
1289    default: assert(0 && "Unknown value type to return!");
1290    case MVT::i1:
1291    case MVT::i8:
1292      RetVal = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag);
1293      Chain = RetVal.getValue(1);
1294      if (RetTyVT == MVT::i1)
1295        RetVal = DAG.getNode(ISD::TRUNCATE, MVT::i1, RetVal);
1296      break;
1297    case MVT::i16:
1298      RetVal = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag);
1299      Chain = RetVal.getValue(1);
1300      break;
1301    case MVT::i32:
1302      RetVal = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
1303      Chain = RetVal.getValue(1);
1304      break;
1305    case MVT::i64: {
1306      SDOperand Lo = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag);
1307      SDOperand Hi = DAG.getCopyFromReg(Lo.getValue(1), X86::EDX, MVT::i32,
1308                                        Lo.getValue(2));
1309      RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, Lo, Hi);
1310      Chain = Hi.getValue(1);
1311      break;
1312    }
1313    case MVT::f32:
1314    case MVT::f64: {
1315      std::vector<MVT::ValueType> Tys;
1316      Tys.push_back(MVT::f64);
1317      Tys.push_back(MVT::Other);
1318      Tys.push_back(MVT::Flag);
1319      std::vector<SDOperand> Ops;
1320      Ops.push_back(Chain);
1321      Ops.push_back(InFlag);
1322      RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys, Ops);
1323      Chain  = RetVal.getValue(1);
1324      InFlag = RetVal.getValue(2);
1325      if (X86ScalarSSE) {
1326        // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
1327        // shouldn't be necessary except that RFP cannot be live across
1328        // multiple blocks. When stackifier is fixed, they can be uncoupled.
1329        MachineFunction &MF = DAG.getMachineFunction();
1330        int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
1331        SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
1332        Tys.clear();
1333        Tys.push_back(MVT::Other);
1334        Ops.clear();
1335        Ops.push_back(Chain);
1336        Ops.push_back(RetVal);
1337        Ops.push_back(StackSlot);
1338        Ops.push_back(DAG.getValueType(RetTyVT));
1339        Ops.push_back(InFlag);
1340        Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
1341        RetVal = DAG.getLoad(RetTyVT, Chain, StackSlot,
1342                             DAG.getSrcValue(NULL));
1343        Chain = RetVal.getValue(1);
1344      }
1345
1346      if (RetTyVT == MVT::f32 && !X86ScalarSSE)
1347        // FIXME: we would really like to remember that this FP_ROUND
1348        // operation is okay to eliminate if we allow excess FP precision.
1349        RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
1350      break;
1351    }
1352    }
1353  }
1354
1355  return std::make_pair(RetVal, Chain);
1356}
1357
1358SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
1359  if (ReturnAddrIndex == 0) {
1360    // Set up a frame object for the return address.
1361    MachineFunction &MF = DAG.getMachineFunction();
1362    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
1363  }
1364
1365  return DAG.getFrameIndex(ReturnAddrIndex, MVT::i32);
1366}
1367
1368
1369
1370std::pair<SDOperand, SDOperand> X86TargetLowering::
1371LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
1372                        SelectionDAG &DAG) {
1373  SDOperand Result;
1374  if (Depth)        // Depths > 0 not supported yet!
1375    Result = DAG.getConstant(0, getPointerTy());
1376  else {
1377    SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
1378    if (!isFrameAddress)
1379      // Just load the return address
1380      Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(), RetAddrFI,
1381                           DAG.getSrcValue(NULL));
1382    else
1383      Result = DAG.getNode(ISD::SUB, MVT::i32, RetAddrFI,
1384                           DAG.getConstant(4, MVT::i32));
1385  }
1386  return std::make_pair(Result, Chain);
1387}
1388
1389/// getCondBrOpcodeForX86CC - Returns the X86 conditional branch opcode
1390/// which corresponds to the condition code.
1391static unsigned getCondBrOpcodeForX86CC(unsigned X86CC) {
1392  switch (X86CC) {
1393  default: assert(0 && "Unknown X86 conditional code!");
1394  case X86ISD::COND_A:  return X86::JA;
1395  case X86ISD::COND_AE: return X86::JAE;
1396  case X86ISD::COND_B:  return X86::JB;
1397  case X86ISD::COND_BE: return X86::JBE;
1398  case X86ISD::COND_E:  return X86::JE;
1399  case X86ISD::COND_G:  return X86::JG;
1400  case X86ISD::COND_GE: return X86::JGE;
1401  case X86ISD::COND_L:  return X86::JL;
1402  case X86ISD::COND_LE: return X86::JLE;
1403  case X86ISD::COND_NE: return X86::JNE;
1404  case X86ISD::COND_NO: return X86::JNO;
1405  case X86ISD::COND_NP: return X86::JNP;
1406  case X86ISD::COND_NS: return X86::JNS;
1407  case X86ISD::COND_O:  return X86::JO;
1408  case X86ISD::COND_P:  return X86::JP;
1409  case X86ISD::COND_S:  return X86::JS;
1410  }
1411}
1412
1413/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
1414/// specific condition code. It returns a false if it cannot do a direct
1415/// translation. X86CC is the translated CondCode. Flip is set to true if the
1416/// the order of comparison operands should be flipped.
1417static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
1418                           unsigned &X86CC, bool &Flip) {
1419  Flip = false;
1420  X86CC = X86ISD::COND_INVALID;
1421  if (!isFP) {
1422    switch (SetCCOpcode) {
1423    default: break;
1424    case ISD::SETEQ:  X86CC = X86ISD::COND_E;  break;
1425    case ISD::SETGT:  X86CC = X86ISD::COND_G;  break;
1426    case ISD::SETGE:  X86CC = X86ISD::COND_GE; break;
1427    case ISD::SETLT:  X86CC = X86ISD::COND_L;  break;
1428    case ISD::SETLE:  X86CC = X86ISD::COND_LE; break;
1429    case ISD::SETNE:  X86CC = X86ISD::COND_NE; break;
1430    case ISD::SETULT: X86CC = X86ISD::COND_B;  break;
1431    case ISD::SETUGT: X86CC = X86ISD::COND_A;  break;
1432    case ISD::SETULE: X86CC = X86ISD::COND_BE; break;
1433    case ISD::SETUGE: X86CC = X86ISD::COND_AE; break;
1434    }
1435  } else {
1436    // On a floating point condition, the flags are set as follows:
1437    // ZF  PF  CF   op
1438    //  0 | 0 | 0 | X > Y
1439    //  0 | 0 | 1 | X < Y
1440    //  1 | 0 | 0 | X == Y
1441    //  1 | 1 | 1 | unordered
1442    switch (SetCCOpcode) {
1443    default: break;
1444    case ISD::SETUEQ:
1445    case ISD::SETEQ: X86CC = X86ISD::COND_E;  break;
1446    case ISD::SETOLT: Flip = true; // Fallthrough
1447    case ISD::SETOGT:
1448    case ISD::SETGT: X86CC = X86ISD::COND_A;  break;
1449    case ISD::SETOLE: Flip = true; // Fallthrough
1450    case ISD::SETOGE:
1451    case ISD::SETGE: X86CC = X86ISD::COND_AE; break;
1452    case ISD::SETUGT: Flip = true; // Fallthrough
1453    case ISD::SETULT:
1454    case ISD::SETLT: X86CC = X86ISD::COND_B;  break;
1455    case ISD::SETUGE: Flip = true; // Fallthrough
1456    case ISD::SETULE:
1457    case ISD::SETLE: X86CC = X86ISD::COND_BE; break;
1458    case ISD::SETONE:
1459    case ISD::SETNE: X86CC = X86ISD::COND_NE; break;
1460    case ISD::SETUO: X86CC = X86ISD::COND_P;  break;
1461    case ISD::SETO:  X86CC = X86ISD::COND_NP; break;
1462    }
1463  }
1464
1465  return X86CC != X86ISD::COND_INVALID;
1466}
1467
1468static bool translateX86CC(SDOperand CC, bool isFP, unsigned &X86CC,
1469                           bool &Flip) {
1470  return translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC, Flip);
1471}
1472
1473/// hasFPCMov - is there a floating point cmov for the specific X86 condition
1474/// code. Current x86 isa includes the following FP cmov instructions:
1475/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
1476static bool hasFPCMov(unsigned X86CC) {
1477  switch (X86CC) {
1478  default:
1479    return false;
1480  case X86ISD::COND_B:
1481  case X86ISD::COND_BE:
1482  case X86ISD::COND_E:
1483  case X86ISD::COND_P:
1484  case X86ISD::COND_A:
1485  case X86ISD::COND_AE:
1486  case X86ISD::COND_NE:
1487  case X86ISD::COND_NP:
1488    return true;
1489  }
1490}
1491
1492MachineBasicBlock *
1493X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
1494                                           MachineBasicBlock *BB) {
1495  switch (MI->getOpcode()) {
1496  default: assert(false && "Unexpected instr type to insert");
1497  case X86::CMOV_FR32:
1498  case X86::CMOV_FR64:
1499  case X86::CMOV_V4F32:
1500  case X86::CMOV_V2F64:
1501  case X86::CMOV_V2I64: {
1502    // To "insert" a SELECT_CC instruction, we actually have to insert the
1503    // diamond control-flow pattern.  The incoming instruction knows the
1504    // destination vreg to set, the condition code register to branch on, the
1505    // true/false values to select between, and a branch opcode to use.
1506    const BasicBlock *LLVM_BB = BB->getBasicBlock();
1507    ilist<MachineBasicBlock>::iterator It = BB;
1508    ++It;
1509
1510    //  thisMBB:
1511    //  ...
1512    //   TrueVal = ...
1513    //   cmpTY ccX, r1, r2
1514    //   bCC copy1MBB
1515    //   fallthrough --> copy0MBB
1516    MachineBasicBlock *thisMBB = BB;
1517    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
1518    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
1519    unsigned Opc = getCondBrOpcodeForX86CC(MI->getOperand(3).getImmedValue());
1520    BuildMI(BB, Opc, 1).addMBB(sinkMBB);
1521    MachineFunction *F = BB->getParent();
1522    F->getBasicBlockList().insert(It, copy0MBB);
1523    F->getBasicBlockList().insert(It, sinkMBB);
1524    // Update machine-CFG edges by first adding all successors of the current
1525    // block to the new block which will contain the Phi node for the select.
1526    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
1527        e = BB->succ_end(); i != e; ++i)
1528      sinkMBB->addSuccessor(*i);
1529    // Next, remove all successors of the current block, and add the true
1530    // and fallthrough blocks as its successors.
1531    while(!BB->succ_empty())
1532      BB->removeSuccessor(BB->succ_begin());
1533    BB->addSuccessor(copy0MBB);
1534    BB->addSuccessor(sinkMBB);
1535
1536    //  copy0MBB:
1537    //   %FalseValue = ...
1538    //   # fallthrough to sinkMBB
1539    BB = copy0MBB;
1540
1541    // Update machine-CFG edges
1542    BB->addSuccessor(sinkMBB);
1543
1544    //  sinkMBB:
1545    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
1546    //  ...
1547    BB = sinkMBB;
1548    BuildMI(BB, X86::PHI, 4, MI->getOperand(0).getReg())
1549      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
1550      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
1551
1552    delete MI;   // The pseudo instruction is gone now.
1553    return BB;
1554  }
1555
1556  case X86::FP_TO_INT16_IN_MEM:
1557  case X86::FP_TO_INT32_IN_MEM:
1558  case X86::FP_TO_INT64_IN_MEM: {
1559    // Change the floating point control register to use "round towards zero"
1560    // mode when truncating to an integer value.
1561    MachineFunction *F = BB->getParent();
1562    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
1563    addFrameReference(BuildMI(BB, X86::FNSTCW16m, 4), CWFrameIdx);
1564
1565    // Load the old value of the high byte of the control word...
1566    unsigned OldCW =
1567      F->getSSARegMap()->createVirtualRegister(X86::R16RegisterClass);
1568    addFrameReference(BuildMI(BB, X86::MOV16rm, 4, OldCW), CWFrameIdx);
1569
1570    // Set the high part to be round to zero...
1571    addFrameReference(BuildMI(BB, X86::MOV16mi, 5), CWFrameIdx).addImm(0xC7F);
1572
1573    // Reload the modified control word now...
1574    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1575
1576    // Restore the memory image of control word to original value
1577    addFrameReference(BuildMI(BB, X86::MOV16mr, 5), CWFrameIdx).addReg(OldCW);
1578
1579    // Get the X86 opcode to use.
1580    unsigned Opc;
1581    switch (MI->getOpcode()) {
1582    default: assert(0 && "illegal opcode!");
1583    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
1584    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
1585    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
1586    }
1587
1588    X86AddressMode AM;
1589    MachineOperand &Op = MI->getOperand(0);
1590    if (Op.isRegister()) {
1591      AM.BaseType = X86AddressMode::RegBase;
1592      AM.Base.Reg = Op.getReg();
1593    } else {
1594      AM.BaseType = X86AddressMode::FrameIndexBase;
1595      AM.Base.FrameIndex = Op.getFrameIndex();
1596    }
1597    Op = MI->getOperand(1);
1598    if (Op.isImmediate())
1599      AM.Scale = Op.getImmedValue();
1600    Op = MI->getOperand(2);
1601    if (Op.isImmediate())
1602      AM.IndexReg = Op.getImmedValue();
1603    Op = MI->getOperand(3);
1604    if (Op.isGlobalAddress()) {
1605      AM.GV = Op.getGlobal();
1606    } else {
1607      AM.Disp = Op.getImmedValue();
1608    }
1609    addFullAddress(BuildMI(BB, Opc, 5), AM).addReg(MI->getOperand(4).getReg());
1610
1611    // Reload the original control word now.
1612    addFrameReference(BuildMI(BB, X86::FLDCW16m, 4), CWFrameIdx);
1613
1614    delete MI;   // The pseudo instruction is gone now.
1615    return BB;
1616  }
1617  }
1618}
1619
1620
1621//===----------------------------------------------------------------------===//
1622//                           X86 Custom Lowering Hooks
1623//===----------------------------------------------------------------------===//
1624
1625/// DarwinGVRequiresExtraLoad - true if accessing the GV requires an extra
1626/// load. For Darwin, external and weak symbols are indirect, loading the value
1627/// at address GV rather then the value of GV itself. This means that the
1628/// GlobalAddress must be in the base or index register of the address, not the
1629/// GV offset field.
1630static bool DarwinGVRequiresExtraLoad(GlobalValue *GV) {
1631  return (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
1632          (GV->isExternal() && !GV->hasNotBeenReadFromBytecode()));
1633}
1634
1635/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
1636/// true if Op is undef or if its value falls within the specified range (L, H].
1637static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
1638  if (Op.getOpcode() == ISD::UNDEF)
1639    return true;
1640
1641  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
1642  return (Val >= Low && Val < Hi);
1643}
1644
1645/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
1646/// true if Op is undef or if its value equal to the specified value.
1647static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
1648  if (Op.getOpcode() == ISD::UNDEF)
1649    return true;
1650  return cast<ConstantSDNode>(Op)->getValue() == Val;
1651}
1652
1653/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
1654/// specifies a shuffle of elements that is suitable for input to PSHUFD.
1655bool X86::isPSHUFDMask(SDNode *N) {
1656  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1657
1658  if (N->getNumOperands() != 4)
1659    return false;
1660
1661  // Check if the value doesn't reference the second vector.
1662  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1663    SDOperand Arg = N->getOperand(i);
1664    if (Arg.getOpcode() == ISD::UNDEF) continue;
1665    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1666    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
1667      return false;
1668  }
1669
1670  return true;
1671}
1672
1673/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
1674/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
1675bool X86::isPSHUFHWMask(SDNode *N) {
1676  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1677
1678  if (N->getNumOperands() != 8)
1679    return false;
1680
1681  // Lower quadword copied in order.
1682  for (unsigned i = 0; i != 4; ++i) {
1683    SDOperand Arg = N->getOperand(i);
1684    if (Arg.getOpcode() == ISD::UNDEF) continue;
1685    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1686    if (cast<ConstantSDNode>(Arg)->getValue() != i)
1687      return false;
1688  }
1689
1690  // Upper quadword shuffled.
1691  for (unsigned i = 4; i != 8; ++i) {
1692    SDOperand Arg = N->getOperand(i);
1693    if (Arg.getOpcode() == ISD::UNDEF) continue;
1694    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1695    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1696    if (Val < 4 || Val > 7)
1697      return false;
1698  }
1699
1700  return true;
1701}
1702
1703/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
1704/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
1705bool X86::isPSHUFLWMask(SDNode *N) {
1706  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1707
1708  if (N->getNumOperands() != 8)
1709    return false;
1710
1711  // Upper quadword copied in order.
1712  for (unsigned i = 4; i != 8; ++i)
1713    if (!isUndefOrEqual(N->getOperand(i), i))
1714      return false;
1715
1716  // Lower quadword shuffled.
1717  for (unsigned i = 0; i != 4; ++i)
1718    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
1719      return false;
1720
1721  return true;
1722}
1723
1724/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
1725/// specifies a shuffle of elements that is suitable for input to SHUFP*.
1726static bool isSHUFPMask(std::vector<SDOperand> &N) {
1727  unsigned NumElems = N.size();
1728  if (NumElems != 2 && NumElems != 4) return false;
1729
1730  unsigned Half = NumElems / 2;
1731  for (unsigned i = 0; i < Half; ++i)
1732    if (!isUndefOrInRange(N[i], 0, NumElems))
1733      return false;
1734  for (unsigned i = Half; i < NumElems; ++i)
1735    if (!isUndefOrInRange(N[i], NumElems, NumElems*2))
1736      return false;
1737
1738  return true;
1739}
1740
1741bool X86::isSHUFPMask(SDNode *N) {
1742  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1743  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1744  return ::isSHUFPMask(Ops);
1745}
1746
1747/// isCommutedSHUFP - Returns true if the shuffle mask is except
1748/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
1749/// half elements to come from vector 1 (which would equal the dest.) and
1750/// the upper half to come from vector 2.
1751static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) {
1752  unsigned NumElems = Ops.size();
1753  if (NumElems != 2 && NumElems != 4) return false;
1754
1755  unsigned Half = NumElems / 2;
1756  for (unsigned i = 0; i < Half; ++i)
1757    if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2))
1758      return false;
1759  for (unsigned i = Half; i < NumElems; ++i)
1760    if (!isUndefOrInRange(Ops[i], 0, NumElems))
1761      return false;
1762  return true;
1763}
1764
1765static bool isCommutedSHUFP(SDNode *N) {
1766  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1767  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1768  return isCommutedSHUFP(Ops);
1769}
1770
1771/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
1772/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
1773bool X86::isMOVHLPSMask(SDNode *N) {
1774  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1775
1776  if (N->getNumOperands() != 4)
1777    return false;
1778
1779  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
1780  return isUndefOrEqual(N->getOperand(0), 6) &&
1781         isUndefOrEqual(N->getOperand(1), 7) &&
1782         isUndefOrEqual(N->getOperand(2), 2) &&
1783         isUndefOrEqual(N->getOperand(3), 3);
1784}
1785
1786/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
1787/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
1788bool X86::isMOVLPMask(SDNode *N) {
1789  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1790
1791  unsigned NumElems = N->getNumOperands();
1792  if (NumElems != 2 && NumElems != 4)
1793    return false;
1794
1795  for (unsigned i = 0; i < NumElems/2; ++i)
1796    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
1797      return false;
1798
1799  for (unsigned i = NumElems/2; i < NumElems; ++i)
1800    if (!isUndefOrEqual(N->getOperand(i), i))
1801      return false;
1802
1803  return true;
1804}
1805
1806/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
1807/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
1808/// and MOVLHPS.
1809bool X86::isMOVHPMask(SDNode *N) {
1810  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1811
1812  unsigned NumElems = N->getNumOperands();
1813  if (NumElems != 2 && NumElems != 4)
1814    return false;
1815
1816  for (unsigned i = 0; i < NumElems/2; ++i)
1817    if (!isUndefOrEqual(N->getOperand(i), i))
1818      return false;
1819
1820  for (unsigned i = 0; i < NumElems/2; ++i) {
1821    SDOperand Arg = N->getOperand(i + NumElems/2);
1822    if (!isUndefOrEqual(Arg, i + NumElems))
1823      return false;
1824  }
1825
1826  return true;
1827}
1828
1829/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
1830/// specifies a shuffle of elements that is suitable for input to UNPCKL.
1831bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
1832  unsigned NumElems = N.size();
1833  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1834    return false;
1835
1836  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1837    SDOperand BitI  = N[i];
1838    SDOperand BitI1 = N[i+1];
1839    if (!isUndefOrEqual(BitI, j))
1840      return false;
1841    if (V2IsSplat) {
1842      if (isUndefOrEqual(BitI1, NumElems))
1843        return false;
1844    } else {
1845      if (!isUndefOrEqual(BitI1, j + NumElems))
1846        return false;
1847    }
1848  }
1849
1850  return true;
1851}
1852
1853bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
1854  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1855  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1856  return ::isUNPCKLMask(Ops, V2IsSplat);
1857}
1858
1859/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
1860/// specifies a shuffle of elements that is suitable for input to UNPCKH.
1861bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
1862  unsigned NumElems = N.size();
1863  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1864    return false;
1865
1866  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1867    SDOperand BitI  = N[i];
1868    SDOperand BitI1 = N[i+1];
1869    if (!isUndefOrEqual(BitI, j + NumElems/2))
1870      return false;
1871    if (V2IsSplat) {
1872      if (isUndefOrEqual(BitI1, NumElems))
1873        return false;
1874    } else {
1875      if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems))
1876        return false;
1877    }
1878  }
1879
1880  return true;
1881}
1882
1883bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
1884  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1885  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1886  return ::isUNPCKHMask(Ops, V2IsSplat);
1887}
1888
1889/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
1890/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
1891/// <0, 0, 1, 1>
1892bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
1893  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1894
1895  unsigned NumElems = N->getNumOperands();
1896  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
1897    return false;
1898
1899  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
1900    SDOperand BitI  = N->getOperand(i);
1901    SDOperand BitI1 = N->getOperand(i+1);
1902
1903    if (!isUndefOrEqual(BitI, j))
1904      return false;
1905    if (!isUndefOrEqual(BitI1, j))
1906      return false;
1907  }
1908
1909  return true;
1910}
1911
1912/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
1913/// specifies a shuffle of elements that is suitable for input to MOVSS,
1914/// MOVSD, and MOVD, i.e. setting the lowest element.
1915static bool isMOVLMask(std::vector<SDOperand> &N) {
1916  unsigned NumElems = N.size();
1917  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1918    return false;
1919
1920  if (!isUndefOrEqual(N[0], NumElems))
1921    return false;
1922
1923  for (unsigned i = 1; i < NumElems; ++i) {
1924    SDOperand Arg = N[i];
1925    if (!isUndefOrEqual(Arg, i))
1926      return false;
1927  }
1928
1929  return true;
1930}
1931
1932bool X86::isMOVLMask(SDNode *N) {
1933  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1934  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1935  return ::isMOVLMask(Ops);
1936}
1937
1938/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
1939/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
1940/// element of vector 2 and the other elements to come from vector 1 in order.
1941static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false) {
1942  unsigned NumElems = Ops.size();
1943  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
1944    return false;
1945
1946  if (!isUndefOrEqual(Ops[0], 0))
1947    return false;
1948
1949  for (unsigned i = 1; i < NumElems; ++i) {
1950    SDOperand Arg = Ops[i];
1951    if (V2IsSplat) {
1952      if (!isUndefOrEqual(Arg, NumElems))
1953        return false;
1954    } else {
1955      if (!isUndefOrEqual(Arg, i+NumElems))
1956        return false;
1957    }
1958  }
1959
1960  return true;
1961}
1962
1963static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false) {
1964  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1965  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
1966  return isCommutedMOVL(Ops, V2IsSplat);
1967}
1968
1969/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
1970/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
1971bool X86::isMOVSHDUPMask(SDNode *N) {
1972  assert(N->getOpcode() == ISD::BUILD_VECTOR);
1973
1974  if (N->getNumOperands() != 4)
1975    return false;
1976
1977  // Expect 1, 1, 3, 3
1978  for (unsigned i = 0; i < 2; ++i) {
1979    SDOperand Arg = N->getOperand(i);
1980    if (Arg.getOpcode() == ISD::UNDEF) continue;
1981    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1982    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1983    if (Val != 1) return false;
1984  }
1985
1986  bool HasHi = false;
1987  for (unsigned i = 2; i < 4; ++i) {
1988    SDOperand Arg = N->getOperand(i);
1989    if (Arg.getOpcode() == ISD::UNDEF) continue;
1990    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
1991    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
1992    if (Val != 3) return false;
1993    HasHi = true;
1994  }
1995
1996  // Don't use movshdup if it can be done with a shufps.
1997  return HasHi;
1998}
1999
2000/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2001/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
2002bool X86::isMOVSLDUPMask(SDNode *N) {
2003  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2004
2005  if (N->getNumOperands() != 4)
2006    return false;
2007
2008  // Expect 0, 0, 2, 2
2009  for (unsigned i = 0; i < 2; ++i) {
2010    SDOperand Arg = N->getOperand(i);
2011    if (Arg.getOpcode() == ISD::UNDEF) continue;
2012    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2013    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2014    if (Val != 0) return false;
2015  }
2016
2017  bool HasHi = false;
2018  for (unsigned i = 2; i < 4; ++i) {
2019    SDOperand Arg = N->getOperand(i);
2020    if (Arg.getOpcode() == ISD::UNDEF) continue;
2021    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2022    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2023    if (Val != 2) return false;
2024    HasHi = true;
2025  }
2026
2027  // Don't use movshdup if it can be done with a shufps.
2028  return HasHi;
2029}
2030
2031/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2032/// a splat of a single element.
2033static bool isSplatMask(SDNode *N) {
2034  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2035
2036  // This is a splat operation if each element of the permute is the same, and
2037  // if the value doesn't reference the second vector.
2038  unsigned NumElems = N->getNumOperands();
2039  SDOperand ElementBase;
2040  unsigned i = 0;
2041  for (; i != NumElems; ++i) {
2042    SDOperand Elt = N->getOperand(i);
2043    if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) {
2044      ElementBase = Elt;
2045      break;
2046    }
2047  }
2048
2049  if (!ElementBase.Val)
2050    return false;
2051
2052  for (; i != NumElems; ++i) {
2053    SDOperand Arg = N->getOperand(i);
2054    if (Arg.getOpcode() == ISD::UNDEF) continue;
2055    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2056    if (Arg != ElementBase) return false;
2057  }
2058
2059  // Make sure it is a splat of the first vector operand.
2060  return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
2061}
2062
2063/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2064/// a splat of a single element and it's a 2 or 4 element mask.
2065bool X86::isSplatMask(SDNode *N) {
2066  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2067
2068  // We can only splat 64-bit, and 32-bit quantities with a single instruction.
2069  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
2070    return false;
2071  return ::isSplatMask(N);
2072}
2073
2074/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
2075/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
2076/// instructions.
2077unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
2078  unsigned NumOperands = N->getNumOperands();
2079  unsigned Shift = (NumOperands == 4) ? 2 : 1;
2080  unsigned Mask = 0;
2081  for (unsigned i = 0; i < NumOperands; ++i) {
2082    unsigned Val = 0;
2083    SDOperand Arg = N->getOperand(NumOperands-i-1);
2084    if (Arg.getOpcode() != ISD::UNDEF)
2085      Val = cast<ConstantSDNode>(Arg)->getValue();
2086    if (Val >= NumOperands) Val -= NumOperands;
2087    Mask |= Val;
2088    if (i != NumOperands - 1)
2089      Mask <<= Shift;
2090  }
2091
2092  return Mask;
2093}
2094
2095/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
2096/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
2097/// instructions.
2098unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
2099  unsigned Mask = 0;
2100  // 8 nodes, but we only care about the last 4.
2101  for (unsigned i = 7; i >= 4; --i) {
2102    unsigned Val = 0;
2103    SDOperand Arg = N->getOperand(i);
2104    if (Arg.getOpcode() != ISD::UNDEF)
2105      Val = cast<ConstantSDNode>(Arg)->getValue();
2106    Mask |= (Val - 4);
2107    if (i != 4)
2108      Mask <<= 2;
2109  }
2110
2111  return Mask;
2112}
2113
2114/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
2115/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
2116/// instructions.
2117unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
2118  unsigned Mask = 0;
2119  // 8 nodes, but we only care about the first 4.
2120  for (int i = 3; i >= 0; --i) {
2121    unsigned Val = 0;
2122    SDOperand Arg = N->getOperand(i);
2123    if (Arg.getOpcode() != ISD::UNDEF)
2124      Val = cast<ConstantSDNode>(Arg)->getValue();
2125    Mask |= Val;
2126    if (i != 0)
2127      Mask <<= 2;
2128  }
2129
2130  return Mask;
2131}
2132
2133/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
2134/// specifies a 8 element shuffle that can be broken into a pair of
2135/// PSHUFHW and PSHUFLW.
2136static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
2137  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2138
2139  if (N->getNumOperands() != 8)
2140    return false;
2141
2142  // Lower quadword shuffled.
2143  for (unsigned i = 0; i != 4; ++i) {
2144    SDOperand Arg = N->getOperand(i);
2145    if (Arg.getOpcode() == ISD::UNDEF) continue;
2146    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2147    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2148    if (Val > 4)
2149      return false;
2150  }
2151
2152  // Upper quadword shuffled.
2153  for (unsigned i = 4; i != 8; ++i) {
2154    SDOperand Arg = N->getOperand(i);
2155    if (Arg.getOpcode() == ISD::UNDEF) continue;
2156    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2157    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2158    if (Val < 4 || Val > 7)
2159      return false;
2160  }
2161
2162  return true;
2163}
2164
2165/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
2166/// values in ther permute mask.
2167static SDOperand CommuteVectorShuffle(SDOperand Op, SelectionDAG &DAG) {
2168  SDOperand V1 = Op.getOperand(0);
2169  SDOperand V2 = Op.getOperand(1);
2170  SDOperand Mask = Op.getOperand(2);
2171  MVT::ValueType VT = Op.getValueType();
2172  MVT::ValueType MaskVT = Mask.getValueType();
2173  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
2174  unsigned NumElems = Mask.getNumOperands();
2175  std::vector<SDOperand> MaskVec;
2176
2177  for (unsigned i = 0; i != NumElems; ++i) {
2178    SDOperand Arg = Mask.getOperand(i);
2179    if (Arg.getOpcode() == ISD::UNDEF) {
2180      MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
2181      continue;
2182    }
2183    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2184    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2185    if (Val < NumElems)
2186      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
2187    else
2188      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
2189  }
2190
2191  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2192  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V2, V1, Mask);
2193}
2194
2195/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
2196/// match movhlps. The lower half elements should come from upper half of
2197/// V1 (and in order), and the upper half elements should come from the upper
2198/// half of V2 (and in order).
2199static bool ShouldXformToMOVHLPS(SDNode *Mask) {
2200  unsigned NumElems = Mask->getNumOperands();
2201  if (NumElems != 4)
2202    return false;
2203  for (unsigned i = 0, e = 2; i != e; ++i)
2204    if (!isUndefOrEqual(Mask->getOperand(i), i+2))
2205      return false;
2206  for (unsigned i = 2; i != 4; ++i)
2207    if (!isUndefOrEqual(Mask->getOperand(i), i+4))
2208      return false;
2209  return true;
2210}
2211
2212/// isScalarLoadToVector - Returns true if the node is a scalar load that
2213/// is promoted to a vector.
2214static inline bool isScalarLoadToVector(SDNode *N) {
2215  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
2216    N = N->getOperand(0).Val;
2217    return (N->getOpcode() == ISD::LOAD);
2218  }
2219  return false;
2220}
2221
2222/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
2223/// match movlp{s|d}. The lower half elements should come from lower half of
2224/// V1 (and in order), and the upper half elements should come from the upper
2225/// half of V2 (and in order). And since V1 will become the source of the
2226/// MOVLP, it must be either a vector load or a scalar load to vector.
2227static bool ShouldXformToMOVLP(SDNode *V1, SDNode *Mask) {
2228  if (V1->getOpcode() != ISD::LOAD && !isScalarLoadToVector(V1))
2229    return false;
2230
2231  unsigned NumElems = Mask->getNumOperands();
2232  if (NumElems != 2 && NumElems != 4)
2233    return false;
2234  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
2235    if (!isUndefOrEqual(Mask->getOperand(i), i))
2236      return false;
2237  for (unsigned i = NumElems/2; i != NumElems; ++i)
2238    if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
2239      return false;
2240  return true;
2241}
2242
2243/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
2244/// all the same.
2245static bool isSplatVector(SDNode *N) {
2246  if (N->getOpcode() != ISD::BUILD_VECTOR)
2247    return false;
2248
2249  SDOperand SplatValue = N->getOperand(0);
2250  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
2251    if (N->getOperand(i) != SplatValue)
2252      return false;
2253  return true;
2254}
2255
2256/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
2257/// that point to V2 points to its first element.
2258static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
2259  assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
2260
2261  bool Changed = false;
2262  std::vector<SDOperand> MaskVec;
2263  unsigned NumElems = Mask.getNumOperands();
2264  for (unsigned i = 0; i != NumElems; ++i) {
2265    SDOperand Arg = Mask.getOperand(i);
2266    if (Arg.getOpcode() != ISD::UNDEF) {
2267      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2268      if (Val > NumElems) {
2269        Arg = DAG.getConstant(NumElems, Arg.getValueType());
2270        Changed = true;
2271      }
2272    }
2273    MaskVec.push_back(Arg);
2274  }
2275
2276  if (Changed)
2277    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(), MaskVec);
2278  return Mask;
2279}
2280
2281/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
2282/// operation of specified width.
2283static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
2284  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2285  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2286
2287  std::vector<SDOperand> MaskVec;
2288  MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
2289  for (unsigned i = 1; i != NumElems; ++i)
2290    MaskVec.push_back(DAG.getConstant(i, BaseVT));
2291  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2292}
2293
2294/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
2295/// of specified width.
2296static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
2297  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2298  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2299  std::vector<SDOperand> MaskVec;
2300  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
2301    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
2302    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
2303  }
2304  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2305}
2306
2307/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
2308/// of specified width.
2309static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
2310  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2311  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2312  unsigned Half = NumElems/2;
2313  std::vector<SDOperand> MaskVec;
2314  for (unsigned i = 0; i != Half; ++i) {
2315    MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
2316    MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
2317  }
2318  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2319}
2320
2321/// getZeroVector - Returns a vector of specified type with all zero elements.
2322///
2323static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
2324  assert(MVT::isVector(VT) && "Expected a vector type");
2325  unsigned NumElems = getVectorNumElements(VT);
2326  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2327  bool isFP = MVT::isFloatingPoint(EVT);
2328  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
2329  std::vector<SDOperand> ZeroVec(NumElems, Zero);
2330  return DAG.getNode(ISD::BUILD_VECTOR, VT, ZeroVec);
2331}
2332
2333/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
2334///
2335static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
2336  SDOperand V1 = Op.getOperand(0);
2337  SDOperand Mask = Op.getOperand(2);
2338  MVT::ValueType VT = Op.getValueType();
2339  unsigned NumElems = Mask.getNumOperands();
2340  Mask = getUnpacklMask(NumElems, DAG);
2341  while (NumElems != 4) {
2342    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
2343    NumElems >>= 1;
2344  }
2345  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
2346
2347  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2348  Mask = getZeroVector(MaskVT, DAG);
2349  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
2350                                  DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
2351  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
2352}
2353
2354/// isZeroNode - Returns true if Elt is a constant zero or a floating point
2355/// constant +0.0.
2356static inline bool isZeroNode(SDOperand Elt) {
2357  return ((isa<ConstantSDNode>(Elt) &&
2358           cast<ConstantSDNode>(Elt)->getValue() == 0) ||
2359          (isa<ConstantFPSDNode>(Elt) &&
2360           cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
2361}
2362
2363/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
2364/// vector and zero or undef vector.
2365static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
2366                                             unsigned NumElems, unsigned Idx,
2367                                             bool isZero, SelectionDAG &DAG) {
2368  SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
2369  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2370  MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2371  SDOperand Zero = DAG.getConstant(0, EVT);
2372  std::vector<SDOperand> MaskVec(NumElems, Zero);
2373  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
2374  SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2375  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2376}
2377
2378/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
2379///
2380static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
2381                                       unsigned NumNonZero, unsigned NumZero,
2382                                       SelectionDAG &DAG) {
2383  if (NumNonZero > 8)
2384    return SDOperand();
2385
2386  SDOperand V(0, 0);
2387  bool First = true;
2388  for (unsigned i = 0; i < 16; ++i) {
2389    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
2390    if (ThisIsNonZero && First) {
2391      if (NumZero)
2392        V = getZeroVector(MVT::v8i16, DAG);
2393      else
2394        V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2395      First = false;
2396    }
2397
2398    if ((i & 1) != 0) {
2399      SDOperand ThisElt(0, 0), LastElt(0, 0);
2400      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
2401      if (LastIsNonZero) {
2402        LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
2403      }
2404      if (ThisIsNonZero) {
2405        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
2406        ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
2407                              ThisElt, DAG.getConstant(8, MVT::i8));
2408        if (LastIsNonZero)
2409          ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
2410      } else
2411        ThisElt = LastElt;
2412
2413      if (ThisElt.Val)
2414        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
2415                        DAG.getConstant(i/2, MVT::i32));
2416    }
2417  }
2418
2419  return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
2420}
2421
2422/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
2423///
2424static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
2425                                       unsigned NumNonZero, unsigned NumZero,
2426                                       SelectionDAG &DAG) {
2427  if (NumNonZero > 4)
2428    return SDOperand();
2429
2430  SDOperand V(0, 0);
2431  bool First = true;
2432  for (unsigned i = 0; i < 8; ++i) {
2433    bool isNonZero = (NonZeros & (1 << i)) != 0;
2434    if (isNonZero) {
2435      if (First) {
2436        if (NumZero)
2437          V = getZeroVector(MVT::v8i16, DAG);
2438        else
2439          V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
2440        First = false;
2441      }
2442      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
2443                      DAG.getConstant(i, MVT::i32));
2444    }
2445  }
2446
2447  return V;
2448}
2449
2450SDOperand
2451X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2452  // All zero's are handled with pxor.
2453  if (ISD::isBuildVectorAllZeros(Op.Val))
2454    return Op;
2455
2456  // All one's are handled with pcmpeqd.
2457  if (ISD::isBuildVectorAllOnes(Op.Val))
2458    return Op;
2459
2460  MVT::ValueType VT = Op.getValueType();
2461  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
2462  unsigned EVTBits = MVT::getSizeInBits(EVT);
2463
2464  unsigned NumElems = Op.getNumOperands();
2465  unsigned NumZero  = 0;
2466  unsigned NumNonZero = 0;
2467  unsigned NonZeros = 0;
2468  std::set<SDOperand> Values;
2469  for (unsigned i = 0; i < NumElems; ++i) {
2470    SDOperand Elt = Op.getOperand(i);
2471    if (Elt.getOpcode() != ISD::UNDEF) {
2472      Values.insert(Elt);
2473      if (isZeroNode(Elt))
2474        NumZero++;
2475      else {
2476        NonZeros |= (1 << i);
2477        NumNonZero++;
2478      }
2479    }
2480  }
2481
2482  if (NumNonZero == 0)
2483    // Must be a mix of zero and undef. Return a zero vector.
2484    return getZeroVector(VT, DAG);
2485
2486  // Splat is obviously ok. Let legalizer expand it to a shuffle.
2487  if (Values.size() == 1)
2488    return SDOperand();
2489
2490  // Special case for single non-zero element.
2491  if (NumNonZero == 1) {
2492    unsigned Idx = CountTrailingZeros_32(NonZeros);
2493    SDOperand Item = Op.getOperand(Idx);
2494    Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
2495    if (Idx == 0)
2496      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
2497      return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
2498                                         NumZero > 0, DAG);
2499
2500    if (EVTBits == 32) {
2501      // Turn it into a shuffle of zero and zero-extended scalar to vector.
2502      Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
2503                                         DAG);
2504      MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
2505      MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2506      std::vector<SDOperand> MaskVec;
2507      for (unsigned i = 0; i < NumElems; i++)
2508        MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
2509      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2510      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
2511                         DAG.getNode(ISD::UNDEF, VT), Mask);
2512    }
2513  }
2514
2515  // Let legalizer expand 2-widde build_vector's.
2516  if (EVTBits == 64)
2517    return SDOperand();
2518
2519  // If element VT is < 32 bits, convert it to inserts into a zero vector.
2520  if (EVTBits == 8) {
2521    SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG);
2522    if (V.Val) return V;
2523  }
2524
2525  if (EVTBits == 16) {
2526    SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG);
2527    if (V.Val) return V;
2528  }
2529
2530  // If element VT is == 32 bits, turn it into a number of shuffles.
2531  std::vector<SDOperand> V(NumElems);
2532  if (NumElems == 4 && NumZero > 0) {
2533    for (unsigned i = 0; i < 4; ++i) {
2534      bool isZero = !(NonZeros & (1 << i));
2535      if (isZero)
2536        V[i] = getZeroVector(VT, DAG);
2537      else
2538        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2539    }
2540
2541    for (unsigned i = 0; i < 2; ++i) {
2542      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
2543        default: break;
2544        case 0:
2545          V[i] = V[i*2];  // Must be a zero vector.
2546          break;
2547        case 1:
2548          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
2549                             getMOVLMask(NumElems, DAG));
2550          break;
2551        case 2:
2552          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2553                             getMOVLMask(NumElems, DAG));
2554          break;
2555        case 3:
2556          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
2557                             getUnpacklMask(NumElems, DAG));
2558          break;
2559      }
2560    }
2561
2562    // Take advantage of the fact R32 to VR128 scalar_to_vector (i.e. movd)
2563    // clears the upper bits.
2564    // FIXME: we can do the same for v4f32 case when we know both parts of
2565    // the lower half come from scalar_to_vector (loadf32). We should do
2566    // that in post legalizer dag combiner with target specific hooks.
2567    if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
2568      return V[0];
2569    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2570    MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
2571    std::vector<SDOperand> MaskVec;
2572    bool Reverse = (NonZeros & 0x3) == 2;
2573    for (unsigned i = 0; i < 2; ++i)
2574      if (Reverse)
2575        MaskVec.push_back(DAG.getConstant(1-i, EVT));
2576      else
2577        MaskVec.push_back(DAG.getConstant(i, EVT));
2578    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
2579    for (unsigned i = 0; i < 2; ++i)
2580      if (Reverse)
2581        MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
2582      else
2583        MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
2584    SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2585    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
2586  }
2587
2588  if (Values.size() > 2) {
2589    // Expand into a number of unpckl*.
2590    // e.g. for v4f32
2591    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
2592    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
2593    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
2594    SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
2595    for (unsigned i = 0; i < NumElems; ++i)
2596      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
2597    NumElems >>= 1;
2598    while (NumElems != 0) {
2599      for (unsigned i = 0; i < NumElems; ++i)
2600        V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
2601                           UnpckMask);
2602      NumElems >>= 1;
2603    }
2604    return V[0];
2605  }
2606
2607  return SDOperand();
2608}
2609
2610SDOperand
2611X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
2612  SDOperand V1 = Op.getOperand(0);
2613  SDOperand V2 = Op.getOperand(1);
2614  SDOperand PermMask = Op.getOperand(2);
2615  MVT::ValueType VT = Op.getValueType();
2616  unsigned NumElems = PermMask.getNumOperands();
2617  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
2618  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
2619
2620  if (isSplatMask(PermMask.Val)) {
2621    if (NumElems <= 4) return Op;
2622    // Promote it to a v4i32 splat.
2623    return PromoteSplat(Op, DAG);
2624  }
2625
2626  if (X86::isMOVLMask(PermMask.Val))
2627    return (V1IsUndef) ? V2 : Op;
2628
2629  if (X86::isMOVSHDUPMask(PermMask.Val) ||
2630      X86::isMOVSLDUPMask(PermMask.Val) ||
2631      X86::isMOVHLPSMask(PermMask.Val) ||
2632      X86::isMOVHPMask(PermMask.Val) ||
2633      X86::isMOVLPMask(PermMask.Val))
2634    return Op;
2635
2636  if (ShouldXformToMOVHLPS(PermMask.Val) ||
2637      ShouldXformToMOVLP(V1.Val, PermMask.Val))
2638    return CommuteVectorShuffle(Op, DAG);
2639
2640  bool V1IsSplat = isSplatVector(V1.Val) || V1.getOpcode() == ISD::UNDEF;
2641  bool V2IsSplat = isSplatVector(V2.Val) || V2.getOpcode() == ISD::UNDEF;
2642  if (V1IsSplat && !V2IsSplat) {
2643    Op = CommuteVectorShuffle(Op, DAG);
2644    V1 = Op.getOperand(0);
2645    V2 = Op.getOperand(1);
2646    PermMask = Op.getOperand(2);
2647    V2IsSplat = true;
2648  }
2649
2650  if (isCommutedMOVL(PermMask.Val, V2IsSplat)) {
2651    if (V2IsUndef) return V1;
2652    Op = CommuteVectorShuffle(Op, DAG);
2653    V1 = Op.getOperand(0);
2654    V2 = Op.getOperand(1);
2655    PermMask = Op.getOperand(2);
2656    if (V2IsSplat) {
2657      // V2 is a splat, so the mask may be malformed. That is, it may point
2658      // to any V2 element. The instruction selectior won't like this. Get
2659      // a corrected mask and commute to form a proper MOVS{S|D}.
2660      SDOperand NewMask = getMOVLMask(NumElems, DAG);
2661      if (NewMask.Val != PermMask.Val)
2662        Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2663    }
2664    return Op;
2665  }
2666
2667  if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
2668      X86::isUNPCKLMask(PermMask.Val) ||
2669      X86::isUNPCKHMask(PermMask.Val))
2670    return Op;
2671
2672  if (V2IsSplat) {
2673    // Normalize mask so all entries that point to V2 points to its first
2674    // element then try to match unpck{h|l} again. If match, return a
2675    // new vector_shuffle with the corrected mask.
2676    SDOperand NewMask = NormalizeMask(PermMask, DAG);
2677    if (NewMask.Val != PermMask.Val) {
2678      if (X86::isUNPCKLMask(PermMask.Val, true)) {
2679        SDOperand NewMask = getUnpacklMask(NumElems, DAG);
2680        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2681      } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
2682        SDOperand NewMask = getUnpackhMask(NumElems, DAG);
2683        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
2684      }
2685    }
2686  }
2687
2688  // Normalize the node to match x86 shuffle ops if needed
2689  if (V2.getOpcode() != ISD::UNDEF)
2690    if (isCommutedSHUFP(PermMask.Val)) {
2691      Op = CommuteVectorShuffle(Op, DAG);
2692      V1 = Op.getOperand(0);
2693      V2 = Op.getOperand(1);
2694      PermMask = Op.getOperand(2);
2695    }
2696
2697  // If VT is integer, try PSHUF* first, then SHUFP*.
2698  if (MVT::isInteger(VT)) {
2699    if (X86::isPSHUFDMask(PermMask.Val) ||
2700        X86::isPSHUFHWMask(PermMask.Val) ||
2701        X86::isPSHUFLWMask(PermMask.Val)) {
2702      if (V2.getOpcode() != ISD::UNDEF)
2703        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2704                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2705      return Op;
2706    }
2707
2708    if (X86::isSHUFPMask(PermMask.Val))
2709      return Op;
2710
2711    // Handle v8i16 shuffle high / low shuffle node pair.
2712    if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
2713      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
2714      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2715      std::vector<SDOperand> MaskVec;
2716      for (unsigned i = 0; i != 4; ++i)
2717        MaskVec.push_back(PermMask.getOperand(i));
2718      for (unsigned i = 4; i != 8; ++i)
2719        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2720      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2721      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2722      MaskVec.clear();
2723      for (unsigned i = 0; i != 4; ++i)
2724        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2725      for (unsigned i = 4; i != 8; ++i)
2726        MaskVec.push_back(PermMask.getOperand(i));
2727      Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec);
2728      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
2729    }
2730  } else {
2731    // Floating point cases in the other order.
2732    if (X86::isSHUFPMask(PermMask.Val))
2733      return Op;
2734    if (X86::isPSHUFDMask(PermMask.Val) ||
2735        X86::isPSHUFHWMask(PermMask.Val) ||
2736        X86::isPSHUFLWMask(PermMask.Val)) {
2737      if (V2.getOpcode() != ISD::UNDEF)
2738        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
2739                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
2740      return Op;
2741    }
2742  }
2743
2744  if (NumElems == 4) {
2745    MVT::ValueType MaskVT = PermMask.getValueType();
2746    MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
2747    std::vector<std::pair<int, int> > Locs;
2748    Locs.reserve(NumElems);
2749    std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2750    std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2751    unsigned NumHi = 0;
2752    unsigned NumLo = 0;
2753    // If no more than two elements come from either vector. This can be
2754    // implemented with two shuffles. First shuffle gather the elements.
2755    // The second shuffle, which takes the first shuffle as both of its
2756    // vector operands, put the elements into the right order.
2757    for (unsigned i = 0; i != NumElems; ++i) {
2758      SDOperand Elt = PermMask.getOperand(i);
2759      if (Elt.getOpcode() == ISD::UNDEF) {
2760        Locs[i] = std::make_pair(-1, -1);
2761      } else {
2762        unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
2763        if (Val < NumElems) {
2764          Locs[i] = std::make_pair(0, NumLo);
2765          Mask1[NumLo] = Elt;
2766          NumLo++;
2767        } else {
2768          Locs[i] = std::make_pair(1, NumHi);
2769          if (2+NumHi < NumElems)
2770            Mask1[2+NumHi] = Elt;
2771          NumHi++;
2772        }
2773      }
2774    }
2775    if (NumLo <= 2 && NumHi <= 2) {
2776      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
2777                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT, Mask1));
2778      for (unsigned i = 0; i != NumElems; ++i) {
2779        if (Locs[i].first == -1)
2780          continue;
2781        else {
2782          unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
2783          Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
2784          Mask2[i] = DAG.getConstant(Idx, MaskEVT);
2785        }
2786      }
2787
2788      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
2789                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT, Mask2));
2790    }
2791
2792    // Break it into (shuffle shuffle_hi, shuffle_lo).
2793    Locs.clear();
2794    std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2795    std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
2796    std::vector<SDOperand> *MaskPtr = &LoMask;
2797    unsigned MaskIdx = 0;
2798    unsigned LoIdx = 0;
2799    unsigned HiIdx = NumElems/2;
2800    for (unsigned i = 0; i != NumElems; ++i) {
2801      if (i == NumElems/2) {
2802        MaskPtr = &HiMask;
2803        MaskIdx = 1;
2804        LoIdx = 0;
2805        HiIdx = NumElems/2;
2806      }
2807      SDOperand Elt = PermMask.getOperand(i);
2808      if (Elt.getOpcode() == ISD::UNDEF) {
2809        Locs[i] = std::make_pair(-1, -1);
2810      } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
2811        Locs[i] = std::make_pair(MaskIdx, LoIdx);
2812        (*MaskPtr)[LoIdx] = Elt;
2813        LoIdx++;
2814      } else {
2815        Locs[i] = std::make_pair(MaskIdx, HiIdx);
2816        (*MaskPtr)[HiIdx] = Elt;
2817        HiIdx++;
2818      }
2819    }
2820
2821    SDOperand LoShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
2822                                      DAG.getNode(ISD::BUILD_VECTOR, MaskVT, LoMask));
2823    SDOperand HiShuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
2824                                      DAG.getNode(ISD::BUILD_VECTOR, MaskVT, HiMask));
2825    std::vector<SDOperand> MaskOps;
2826    for (unsigned i = 0; i != NumElems; ++i) {
2827      if (Locs[i].first == -1) {
2828        MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
2829      } else {
2830        unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
2831        MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
2832      }
2833    }
2834    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
2835                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskOps));
2836  }
2837
2838  return SDOperand();
2839}
2840
2841SDOperand
2842X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2843  if (!isa<ConstantSDNode>(Op.getOperand(1)))
2844    return SDOperand();
2845
2846  MVT::ValueType VT = Op.getValueType();
2847  // TODO: handle v16i8.
2848  if (MVT::getSizeInBits(VT) == 16) {
2849    // Transform it so it match pextrw which produces a 32-bit result.
2850    MVT::ValueType EVT = (MVT::ValueType)(VT+1);
2851    SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
2852                                    Op.getOperand(0), Op.getOperand(1));
2853    SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
2854                                    DAG.getValueType(VT));
2855    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
2856  } else if (MVT::getSizeInBits(VT) == 32) {
2857    SDOperand Vec = Op.getOperand(0);
2858    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
2859    if (Idx == 0)
2860      return Op;
2861
2862    // SHUFPS the element to the lowest double word, then movss.
2863    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2864    SDOperand IdxNode = DAG.getConstant((Idx < 2) ? Idx : Idx+4,
2865                                        MVT::getVectorBaseType(MaskVT));
2866    std::vector<SDOperand> IdxVec;
2867    IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
2868    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2869    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2870    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2871    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
2872    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
2873                      Vec, Vec, Mask);
2874    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
2875                       DAG.getConstant(0, MVT::i32));
2876  } else if (MVT::getSizeInBits(VT) == 64) {
2877    SDOperand Vec = Op.getOperand(0);
2878    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
2879    if (Idx == 0)
2880      return Op;
2881
2882    // UNPCKHPD the element to the lowest double word, then movsd.
2883    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
2884    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
2885    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2886    std::vector<SDOperand> IdxVec;
2887    IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
2888    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
2889    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, IdxVec);
2890    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
2891                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
2892    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
2893                       DAG.getConstant(0, MVT::i32));
2894  }
2895
2896  return SDOperand();
2897}
2898
2899SDOperand
2900X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2901  // Transform it so it match pinsrw which expects a 16-bit value in a R32
2902  // as its second argument.
2903  MVT::ValueType VT = Op.getValueType();
2904  MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
2905  SDOperand N0 = Op.getOperand(0);
2906  SDOperand N1 = Op.getOperand(1);
2907  SDOperand N2 = Op.getOperand(2);
2908  if (MVT::getSizeInBits(BaseVT) == 16) {
2909    if (N1.getValueType() != MVT::i32)
2910      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
2911    if (N2.getValueType() != MVT::i32)
2912      N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
2913    return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
2914  } else if (MVT::getSizeInBits(BaseVT) == 32) {
2915    unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
2916    if (Idx == 0) {
2917      // Use a movss.
2918      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
2919      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
2920      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
2921      std::vector<SDOperand> MaskVec;
2922      MaskVec.push_back(DAG.getConstant(4, BaseVT));
2923      for (unsigned i = 1; i <= 3; ++i)
2924        MaskVec.push_back(DAG.getConstant(i, BaseVT));
2925      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
2926                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT, MaskVec));
2927    } else {
2928      // Use two pinsrw instructions to insert a 32 bit value.
2929      Idx <<= 1;
2930      if (MVT::isFloatingPoint(N1.getValueType())) {
2931        if (N1.getOpcode() == ISD::LOAD) {
2932          // Just load directly from f32mem to R32.
2933          N1 = DAG.getLoad(MVT::i32, N1.getOperand(0), N1.getOperand(1),
2934                           N1.getOperand(2));
2935        } else {
2936          N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
2937          N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
2938          N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
2939                           DAG.getConstant(0, MVT::i32));
2940        }
2941      }
2942      N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
2943      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
2944                       DAG.getConstant(Idx, MVT::i32));
2945      N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
2946      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
2947                       DAG.getConstant(Idx+1, MVT::i32));
2948      return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
2949    }
2950  }
2951
2952  return SDOperand();
2953}
2954
2955SDOperand
2956X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
2957  SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
2958  return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
2959}
2960
2961// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2962// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
2963// one of the above mentioned nodes. It has to be wrapped because otherwise
2964// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2965// be used to form addressing mode. These wrapped nodes will be selected
2966// into MOV32ri.
2967SDOperand
2968X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
2969  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2970  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2971                            DAG.getTargetConstantPool(CP->get(), getPointerTy(),
2972                                                      CP->getAlignment()));
2973  if (Subtarget->isTargetDarwin()) {
2974    // With PIC, the address is actually $g + Offset.
2975    if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2976      Result = DAG.getNode(ISD::ADD, getPointerTy(),
2977                    DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2978  }
2979
2980  return Result;
2981}
2982
2983SDOperand
2984X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
2985  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
2986  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
2987                                 DAG.getTargetGlobalAddress(GV, getPointerTy()));
2988  if (Subtarget->isTargetDarwin()) {
2989    // With PIC, the address is actually $g + Offset.
2990    if (getTargetMachine().getRelocationModel() == Reloc::PIC)
2991      Result = DAG.getNode(ISD::ADD, getPointerTy(),
2992                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
2993
2994    // For Darwin, external and weak symbols are indirect, so we want to load
2995    // the value at address GV, not the value of GV itself. This means that
2996    // the GlobalAddress must be in the base or index register of the address,
2997    // not the GV offset field.
2998    if (getTargetMachine().getRelocationModel() != Reloc::Static &&
2999        DarwinGVRequiresExtraLoad(GV))
3000      Result = DAG.getLoad(MVT::i32, DAG.getEntryNode(),
3001                           Result, DAG.getSrcValue(NULL));
3002  }
3003
3004  return Result;
3005}
3006
3007SDOperand
3008X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
3009  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
3010  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3011                                 DAG.getTargetExternalSymbol(Sym, getPointerTy()));
3012  if (Subtarget->isTargetDarwin()) {
3013    // With PIC, the address is actually $g + Offset.
3014    if (getTargetMachine().getRelocationModel() == Reloc::PIC)
3015      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3016                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
3017  }
3018
3019  return Result;
3020}
3021
3022SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
3023    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
3024           "Not an i64 shift!");
3025    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
3026    SDOperand ShOpLo = Op.getOperand(0);
3027    SDOperand ShOpHi = Op.getOperand(1);
3028    SDOperand ShAmt  = Op.getOperand(2);
3029    SDOperand Tmp1 = isSRA ? DAG.getNode(ISD::SRA, MVT::i32, ShOpHi,
3030                                         DAG.getConstant(31, MVT::i8))
3031                           : DAG.getConstant(0, MVT::i32);
3032
3033    SDOperand Tmp2, Tmp3;
3034    if (Op.getOpcode() == ISD::SHL_PARTS) {
3035      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
3036      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
3037    } else {
3038      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
3039      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
3040    }
3041
3042    SDOperand InFlag = DAG.getNode(X86ISD::TEST, MVT::Flag,
3043                                   ShAmt, DAG.getConstant(32, MVT::i8));
3044
3045    SDOperand Hi, Lo;
3046    SDOperand CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3047
3048    std::vector<MVT::ValueType> Tys;
3049    Tys.push_back(MVT::i32);
3050    Tys.push_back(MVT::Flag);
3051    std::vector<SDOperand> Ops;
3052    if (Op.getOpcode() == ISD::SHL_PARTS) {
3053      Ops.push_back(Tmp2);
3054      Ops.push_back(Tmp3);
3055      Ops.push_back(CC);
3056      Ops.push_back(InFlag);
3057      Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
3058      InFlag = Hi.getValue(1);
3059
3060      Ops.clear();
3061      Ops.push_back(Tmp3);
3062      Ops.push_back(Tmp1);
3063      Ops.push_back(CC);
3064      Ops.push_back(InFlag);
3065      Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
3066    } else {
3067      Ops.push_back(Tmp2);
3068      Ops.push_back(Tmp3);
3069      Ops.push_back(CC);
3070      Ops.push_back(InFlag);
3071      Lo = DAG.getNode(X86ISD::CMOV, Tys, Ops);
3072      InFlag = Lo.getValue(1);
3073
3074      Ops.clear();
3075      Ops.push_back(Tmp3);
3076      Ops.push_back(Tmp1);
3077      Ops.push_back(CC);
3078      Ops.push_back(InFlag);
3079      Hi = DAG.getNode(X86ISD::CMOV, Tys, Ops);
3080    }
3081
3082    Tys.clear();
3083    Tys.push_back(MVT::i32);
3084    Tys.push_back(MVT::i32);
3085    Ops.clear();
3086    Ops.push_back(Lo);
3087    Ops.push_back(Hi);
3088    return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
3089}
3090
3091SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
3092  assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
3093         Op.getOperand(0).getValueType() >= MVT::i16 &&
3094         "Unknown SINT_TO_FP to lower!");
3095
3096  SDOperand Result;
3097  MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
3098  unsigned Size = MVT::getSizeInBits(SrcVT)/8;
3099  MachineFunction &MF = DAG.getMachineFunction();
3100  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3101  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3102  SDOperand Chain = DAG.getNode(ISD::STORE, MVT::Other,
3103                                DAG.getEntryNode(), Op.getOperand(0),
3104                                StackSlot, DAG.getSrcValue(NULL));
3105
3106  // Build the FILD
3107  std::vector<MVT::ValueType> Tys;
3108  Tys.push_back(MVT::f64);
3109  Tys.push_back(MVT::Other);
3110  if (X86ScalarSSE) Tys.push_back(MVT::Flag);
3111  std::vector<SDOperand> Ops;
3112  Ops.push_back(Chain);
3113  Ops.push_back(StackSlot);
3114  Ops.push_back(DAG.getValueType(SrcVT));
3115  Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
3116                       Tys, Ops);
3117
3118  if (X86ScalarSSE) {
3119    Chain = Result.getValue(1);
3120    SDOperand InFlag = Result.getValue(2);
3121
3122    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
3123    // shouldn't be necessary except that RFP cannot be live across
3124    // multiple blocks. When stackifier is fixed, they can be uncoupled.
3125    MachineFunction &MF = DAG.getMachineFunction();
3126    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
3127    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3128    std::vector<MVT::ValueType> Tys;
3129    Tys.push_back(MVT::Other);
3130    std::vector<SDOperand> Ops;
3131    Ops.push_back(Chain);
3132    Ops.push_back(Result);
3133    Ops.push_back(StackSlot);
3134    Ops.push_back(DAG.getValueType(Op.getValueType()));
3135    Ops.push_back(InFlag);
3136    Chain = DAG.getNode(X86ISD::FST, Tys, Ops);
3137    Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot,
3138                         DAG.getSrcValue(NULL));
3139  }
3140
3141  return Result;
3142}
3143
3144SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
3145  assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
3146         "Unknown FP_TO_SINT to lower!");
3147  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
3148  // stack slot.
3149  MachineFunction &MF = DAG.getMachineFunction();
3150  unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
3151  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3152  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3153
3154  unsigned Opc;
3155  switch (Op.getValueType()) {
3156    default: assert(0 && "Invalid FP_TO_SINT to lower!");
3157    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
3158    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
3159    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
3160  }
3161
3162  SDOperand Chain = DAG.getEntryNode();
3163  SDOperand Value = Op.getOperand(0);
3164  if (X86ScalarSSE) {
3165    assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
3166    Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value, StackSlot,
3167                        DAG.getSrcValue(0));
3168    std::vector<MVT::ValueType> Tys;
3169    Tys.push_back(MVT::f64);
3170    Tys.push_back(MVT::Other);
3171    std::vector<SDOperand> Ops;
3172    Ops.push_back(Chain);
3173    Ops.push_back(StackSlot);
3174    Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType()));
3175    Value = DAG.getNode(X86ISD::FLD, Tys, Ops);
3176    Chain = Value.getValue(1);
3177    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
3178    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3179  }
3180
3181  // Build the FP_TO_INT*_IN_MEM
3182  std::vector<SDOperand> Ops;
3183  Ops.push_back(Chain);
3184  Ops.push_back(Value);
3185  Ops.push_back(StackSlot);
3186  SDOperand FIST = DAG.getNode(Opc, MVT::Other, Ops);
3187
3188  // Load the result.
3189  return DAG.getLoad(Op.getValueType(), FIST, StackSlot,
3190                     DAG.getSrcValue(NULL));
3191}
3192
3193SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
3194  MVT::ValueType VT = Op.getValueType();
3195  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3196  std::vector<Constant*> CV;
3197  if (VT == MVT::f64) {
3198    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
3199    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3200  } else {
3201    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
3202    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3203    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3204    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3205  }
3206  Constant *CS = ConstantStruct::get(CV);
3207  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3208  SDOperand Mask
3209    = DAG.getNode(X86ISD::LOAD_PACK,
3210                  VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
3211  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
3212}
3213
3214SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
3215  MVT::ValueType VT = Op.getValueType();
3216  const Type *OpNTy =  MVT::getTypeForValueType(VT);
3217  std::vector<Constant*> CV;
3218  if (VT == MVT::f64) {
3219    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
3220    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3221  } else {
3222    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
3223    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3224    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3225    CV.push_back(ConstantFP::get(OpNTy, 0.0));
3226  }
3227  Constant *CS = ConstantStruct::get(CV);
3228  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
3229  SDOperand Mask  = DAG.getNode(X86ISD::LOAD_PACK,
3230                          VT, DAG.getEntryNode(), CPIdx, DAG.getSrcValue(NULL));
3231  return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
3232}
3233
3234SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) {
3235  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
3236  SDOperand Cond;
3237  SDOperand CC = Op.getOperand(2);
3238  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3239  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
3240  bool Flip;
3241  unsigned X86CC;
3242  if (translateX86CC(CC, isFP, X86CC, Flip)) {
3243    if (Flip)
3244      Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
3245                         Op.getOperand(1), Op.getOperand(0));
3246    else
3247      Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
3248                         Op.getOperand(0), Op.getOperand(1));
3249    return DAG.getNode(X86ISD::SETCC, MVT::i8,
3250                       DAG.getConstant(X86CC, MVT::i8), Cond);
3251  } else {
3252    assert(isFP && "Illegal integer SetCC!");
3253
3254    Cond = DAG.getNode(X86ISD::CMP, MVT::Flag,
3255                       Op.getOperand(0), Op.getOperand(1));
3256    std::vector<MVT::ValueType> Tys;
3257    std::vector<SDOperand> Ops;
3258    switch (SetCCOpcode) {
3259      default: assert(false && "Illegal floating point SetCC!");
3260      case ISD::SETOEQ: {  // !PF & ZF
3261        Tys.push_back(MVT::i8);
3262        Tys.push_back(MVT::Flag);
3263        Ops.push_back(DAG.getConstant(X86ISD::COND_NP, MVT::i8));
3264        Ops.push_back(Cond);
3265        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
3266        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
3267                                     DAG.getConstant(X86ISD::COND_E, MVT::i8),
3268                                     Tmp1.getValue(1));
3269        return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
3270      }
3271      case ISD::SETUNE: {  // PF | !ZF
3272        Tys.push_back(MVT::i8);
3273        Tys.push_back(MVT::Flag);
3274        Ops.push_back(DAG.getConstant(X86ISD::COND_P, MVT::i8));
3275        Ops.push_back(Cond);
3276        SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
3277        SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, MVT::i8,
3278                                     DAG.getConstant(X86ISD::COND_NE, MVT::i8),
3279                                     Tmp1.getValue(1));
3280        return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
3281      }
3282    }
3283  }
3284}
3285
3286SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
3287  MVT::ValueType VT = Op.getValueType();
3288  bool isFPStack = MVT::isFloatingPoint(VT) && !X86ScalarSSE;
3289  bool addTest   = false;
3290  SDOperand Op0 = Op.getOperand(0);
3291  SDOperand Cond, CC;
3292  if (Op0.getOpcode() == ISD::SETCC)
3293    Op0 = LowerOperation(Op0, DAG);
3294
3295  if (Op0.getOpcode() == X86ISD::SETCC) {
3296    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3297    // (since flag operand cannot be shared). If the X86ISD::SETCC does not
3298    // have another use it will be eliminated.
3299    // If the X86ISD::SETCC has more than one use, then it's probably better
3300    // to use a test instead of duplicating the X86ISD::CMP (for register
3301    // pressure reason).
3302    unsigned CmpOpc = Op0.getOperand(1).getOpcode();
3303    if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
3304        CmpOpc == X86ISD::UCOMI) {
3305      if (!Op0.hasOneUse()) {
3306        std::vector<MVT::ValueType> Tys;
3307        for (unsigned i = 0; i < Op0.Val->getNumValues(); ++i)
3308          Tys.push_back(Op0.Val->getValueType(i));
3309        std::vector<SDOperand> Ops;
3310        for (unsigned i = 0; i < Op0.getNumOperands(); ++i)
3311          Ops.push_back(Op0.getOperand(i));
3312        Op0 = DAG.getNode(X86ISD::SETCC, Tys, Ops);
3313      }
3314
3315      CC   = Op0.getOperand(0);
3316      Cond = Op0.getOperand(1);
3317      // Make a copy as flag result cannot be used by more than one.
3318      Cond = DAG.getNode(CmpOpc, MVT::Flag,
3319                         Cond.getOperand(0), Cond.getOperand(1));
3320      addTest =
3321        isFPStack && !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
3322    } else
3323      addTest = true;
3324  } else
3325    addTest = true;
3326
3327  if (addTest) {
3328    CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3329    Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Op0, Op0);
3330  }
3331
3332  std::vector<MVT::ValueType> Tys;
3333  Tys.push_back(Op.getValueType());
3334  Tys.push_back(MVT::Flag);
3335  std::vector<SDOperand> Ops;
3336  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
3337  // condition is true.
3338  Ops.push_back(Op.getOperand(2));
3339  Ops.push_back(Op.getOperand(1));
3340  Ops.push_back(CC);
3341  Ops.push_back(Cond);
3342  return DAG.getNode(X86ISD::CMOV, Tys, Ops);
3343}
3344
3345SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
3346  bool addTest = false;
3347  SDOperand Cond  = Op.getOperand(1);
3348  SDOperand Dest  = Op.getOperand(2);
3349  SDOperand CC;
3350  if (Cond.getOpcode() == ISD::SETCC)
3351    Cond = LowerOperation(Cond, DAG);
3352
3353  if (Cond.getOpcode() == X86ISD::SETCC) {
3354    // If condition flag is set by a X86ISD::CMP, then make a copy of it
3355    // (since flag operand cannot be shared). If the X86ISD::SETCC does not
3356    // have another use it will be eliminated.
3357    // If the X86ISD::SETCC has more than one use, then it's probably better
3358    // to use a test instead of duplicating the X86ISD::CMP (for register
3359    // pressure reason).
3360    unsigned CmpOpc = Cond.getOperand(1).getOpcode();
3361    if (CmpOpc == X86ISD::CMP || CmpOpc == X86ISD::COMI ||
3362        CmpOpc == X86ISD::UCOMI) {
3363      if (!Cond.hasOneUse()) {
3364        std::vector<MVT::ValueType> Tys;
3365        for (unsigned i = 0; i < Cond.Val->getNumValues(); ++i)
3366          Tys.push_back(Cond.Val->getValueType(i));
3367        std::vector<SDOperand> Ops;
3368        for (unsigned i = 0; i < Cond.getNumOperands(); ++i)
3369          Ops.push_back(Cond.getOperand(i));
3370        Cond = DAG.getNode(X86ISD::SETCC, Tys, Ops);
3371      }
3372
3373      CC   = Cond.getOperand(0);
3374      Cond = Cond.getOperand(1);
3375      // Make a copy as flag result cannot be used by more than one.
3376      Cond = DAG.getNode(CmpOpc, MVT::Flag,
3377                         Cond.getOperand(0), Cond.getOperand(1));
3378    } else
3379      addTest = true;
3380  } else
3381    addTest = true;
3382
3383  if (addTest) {
3384    CC = DAG.getConstant(X86ISD::COND_NE, MVT::i8);
3385    Cond = DAG.getNode(X86ISD::TEST, MVT::Flag, Cond, Cond);
3386  }
3387  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
3388                     Op.getOperand(0), Op.getOperand(2), CC, Cond);
3389}
3390
3391SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
3392  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3393  SDOperand Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(),
3394                                 DAG.getTargetJumpTable(JT->getIndex(),
3395                                                        getPointerTy()));
3396  if (Subtarget->isTargetDarwin()) {
3397    // With PIC, the address is actually $g + Offset.
3398    if (getTargetMachine().getRelocationModel() == Reloc::PIC)
3399      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3400                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
3401  }
3402
3403  return Result;
3404}
3405
3406SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
3407  SDOperand Copy;
3408
3409  switch(Op.getNumOperands()) {
3410    default:
3411      assert(0 && "Do not know how to return this many arguments!");
3412      abort();
3413    case 1:    // ret void.
3414      return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
3415                        DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
3416    case 2: {
3417      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
3418
3419      if (MVT::isVector(ArgVT)) {
3420        // Integer or FP vector result -> XMM0.
3421        if (DAG.getMachineFunction().liveout_empty())
3422          DAG.getMachineFunction().addLiveOut(X86::XMM0);
3423        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
3424                                SDOperand());
3425      } else if (MVT::isInteger(ArgVT)) {
3426        // Integer result -> EAX
3427        if (DAG.getMachineFunction().liveout_empty())
3428          DAG.getMachineFunction().addLiveOut(X86::EAX);
3429
3430        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EAX, Op.getOperand(1),
3431                                SDOperand());
3432      } else if (!X86ScalarSSE) {
3433        // FP return with fp-stack value.
3434        if (DAG.getMachineFunction().liveout_empty())
3435          DAG.getMachineFunction().addLiveOut(X86::ST0);
3436
3437        std::vector<MVT::ValueType> Tys;
3438        Tys.push_back(MVT::Other);
3439        Tys.push_back(MVT::Flag);
3440        std::vector<SDOperand> Ops;
3441        Ops.push_back(Op.getOperand(0));
3442        Ops.push_back(Op.getOperand(1));
3443        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
3444      } else {
3445        // FP return with ScalarSSE (return on fp-stack).
3446        if (DAG.getMachineFunction().liveout_empty())
3447          DAG.getMachineFunction().addLiveOut(X86::ST0);
3448
3449        SDOperand MemLoc;
3450        SDOperand Chain = Op.getOperand(0);
3451        SDOperand Value = Op.getOperand(1);
3452
3453        if (Value.getOpcode() == ISD::LOAD &&
3454            (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
3455          Chain  = Value.getOperand(0);
3456          MemLoc = Value.getOperand(1);
3457        } else {
3458          // Spill the value to memory and reload it into top of stack.
3459          unsigned Size = MVT::getSizeInBits(ArgVT)/8;
3460          MachineFunction &MF = DAG.getMachineFunction();
3461          int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3462          MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
3463          Chain = DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0),
3464                              Value, MemLoc, DAG.getSrcValue(0));
3465        }
3466        std::vector<MVT::ValueType> Tys;
3467        Tys.push_back(MVT::f64);
3468        Tys.push_back(MVT::Other);
3469        std::vector<SDOperand> Ops;
3470        Ops.push_back(Chain);
3471        Ops.push_back(MemLoc);
3472        Ops.push_back(DAG.getValueType(ArgVT));
3473        Copy = DAG.getNode(X86ISD::FLD, Tys, Ops);
3474        Tys.clear();
3475        Tys.push_back(MVT::Other);
3476        Tys.push_back(MVT::Flag);
3477        Ops.clear();
3478        Ops.push_back(Copy.getValue(1));
3479        Ops.push_back(Copy);
3480        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, Ops);
3481      }
3482      break;
3483    }
3484    case 3:
3485      if (DAG.getMachineFunction().liveout_empty()) {
3486        DAG.getMachineFunction().addLiveOut(X86::EAX);
3487        DAG.getMachineFunction().addLiveOut(X86::EDX);
3488      }
3489
3490      Copy = DAG.getCopyToReg(Op.getOperand(0), X86::EDX, Op.getOperand(2),
3491                              SDOperand());
3492      Copy = DAG.getCopyToReg(Copy, X86::EAX,Op.getOperand(1),Copy.getValue(1));
3493      break;
3494  }
3495  return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
3496                   Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
3497                     Copy.getValue(1));
3498}
3499
3500SDOperand
3501X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
3502  if (FormalArgs.size() == 0) {
3503    unsigned CC = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
3504    if (CC == CallingConv::Fast && EnableFastCC)
3505      LowerFastCCArguments(Op, DAG);
3506    else
3507      LowerCCCArguments(Op, DAG);
3508  }
3509  return FormalArgs[Op.ResNo];
3510}
3511
3512SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
3513  SDOperand InFlag(0, 0);
3514  SDOperand Chain = Op.getOperand(0);
3515  unsigned Align =
3516    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3517  if (Align == 0) Align = 1;
3518
3519  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3520  // If not DWORD aligned, call memset if size is less than the threshold.
3521  // It knows how to align to the right boundary first.
3522  if ((Align & 3) != 0 ||
3523      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3524    MVT::ValueType IntPtr = getPointerTy();
3525    const Type *IntPtrTy = getTargetData()->getIntPtrType();
3526    std::vector<std::pair<SDOperand, const Type*> > Args;
3527    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
3528    // Extend the ubyte argument to be an int value for the call.
3529    SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
3530    Args.push_back(std::make_pair(Val, IntPtrTy));
3531    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
3532    std::pair<SDOperand,SDOperand> CallResult =
3533      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
3534                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
3535    return CallResult.second;
3536  }
3537
3538  MVT::ValueType AVT;
3539  SDOperand Count;
3540  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3541  unsigned BytesLeft = 0;
3542  bool TwoRepStos = false;
3543  if (ValC) {
3544    unsigned ValReg;
3545    unsigned Val = ValC->getValue() & 255;
3546
3547    // If the value is a constant, then we can potentially use larger sets.
3548    switch (Align & 3) {
3549      case 2:   // WORD aligned
3550        AVT = MVT::i16;
3551        Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
3552        BytesLeft = I->getValue() % 2;
3553        Val    = (Val << 8) | Val;
3554        ValReg = X86::AX;
3555        break;
3556      case 0:   // DWORD aligned
3557        AVT = MVT::i32;
3558        if (I) {
3559          Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
3560          BytesLeft = I->getValue() % 4;
3561        } else {
3562          Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
3563                              DAG.getConstant(2, MVT::i8));
3564          TwoRepStos = true;
3565        }
3566        Val = (Val << 8)  | Val;
3567        Val = (Val << 16) | Val;
3568        ValReg = X86::EAX;
3569        break;
3570      default:  // Byte aligned
3571        AVT = MVT::i8;
3572        Count = Op.getOperand(3);
3573        ValReg = X86::AL;
3574        break;
3575    }
3576
3577    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
3578                              InFlag);
3579    InFlag = Chain.getValue(1);
3580  } else {
3581    AVT = MVT::i8;
3582    Count  = Op.getOperand(3);
3583    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
3584    InFlag = Chain.getValue(1);
3585  }
3586
3587  Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
3588  InFlag = Chain.getValue(1);
3589  Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
3590  InFlag = Chain.getValue(1);
3591
3592  std::vector<MVT::ValueType> Tys;
3593  Tys.push_back(MVT::Other);
3594  Tys.push_back(MVT::Flag);
3595  std::vector<SDOperand> Ops;
3596  Ops.push_back(Chain);
3597  Ops.push_back(DAG.getValueType(AVT));
3598  Ops.push_back(InFlag);
3599  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
3600
3601  if (TwoRepStos) {
3602    InFlag = Chain.getValue(1);
3603    Count = Op.getOperand(3);
3604    MVT::ValueType CVT = Count.getValueType();
3605    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3606                                 DAG.getConstant(3, CVT));
3607    Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
3608    InFlag = Chain.getValue(1);
3609    Tys.clear();
3610    Tys.push_back(MVT::Other);
3611    Tys.push_back(MVT::Flag);
3612    Ops.clear();
3613    Ops.push_back(Chain);
3614    Ops.push_back(DAG.getValueType(MVT::i8));
3615    Ops.push_back(InFlag);
3616    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, Ops);
3617  } else if (BytesLeft) {
3618    // Issue stores for the last 1 - 3 bytes.
3619    SDOperand Value;
3620    unsigned Val = ValC->getValue() & 255;
3621    unsigned Offset = I->getValue() - BytesLeft;
3622    SDOperand DstAddr = Op.getOperand(1);
3623    MVT::ValueType AddrVT = DstAddr.getValueType();
3624    if (BytesLeft >= 2) {
3625      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
3626      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
3627                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3628                                      DAG.getConstant(Offset, AddrVT)),
3629                          DAG.getSrcValue(NULL));
3630      BytesLeft -= 2;
3631      Offset += 2;
3632    }
3633
3634    if (BytesLeft == 1) {
3635      Value = DAG.getConstant(Val, MVT::i8);
3636      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
3637                          DAG.getNode(ISD::ADD, AddrVT, DstAddr,
3638                                      DAG.getConstant(Offset, AddrVT)),
3639                          DAG.getSrcValue(NULL));
3640    }
3641  }
3642
3643  return Chain;
3644}
3645
3646SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
3647  SDOperand Chain = Op.getOperand(0);
3648  unsigned Align =
3649    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
3650  if (Align == 0) Align = 1;
3651
3652  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
3653  // If not DWORD aligned, call memcpy if size is less than the threshold.
3654  // It knows how to align to the right boundary first.
3655  if ((Align & 3) != 0 ||
3656      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
3657    MVT::ValueType IntPtr = getPointerTy();
3658    const Type *IntPtrTy = getTargetData()->getIntPtrType();
3659    std::vector<std::pair<SDOperand, const Type*> > Args;
3660    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
3661    Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
3662    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
3663    std::pair<SDOperand,SDOperand> CallResult =
3664      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
3665                  DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
3666    return CallResult.second;
3667  }
3668
3669  MVT::ValueType AVT;
3670  SDOperand Count;
3671  unsigned BytesLeft = 0;
3672  bool TwoRepMovs = false;
3673  switch (Align & 3) {
3674    case 2:   // WORD aligned
3675      AVT = MVT::i16;
3676      Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
3677      BytesLeft = I->getValue() % 2;
3678      break;
3679    case 0:   // DWORD aligned
3680      AVT = MVT::i32;
3681      if (I) {
3682        Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
3683        BytesLeft = I->getValue() % 4;
3684      } else {
3685        Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
3686                            DAG.getConstant(2, MVT::i8));
3687        TwoRepMovs = true;
3688      }
3689      break;
3690    default:  // Byte aligned
3691      AVT = MVT::i8;
3692      Count = Op.getOperand(3);
3693      break;
3694  }
3695
3696  SDOperand InFlag(0, 0);
3697  Chain  = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
3698  InFlag = Chain.getValue(1);
3699  Chain  = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
3700  InFlag = Chain.getValue(1);
3701  Chain  = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag);
3702  InFlag = Chain.getValue(1);
3703
3704  std::vector<MVT::ValueType> Tys;
3705  Tys.push_back(MVT::Other);
3706  Tys.push_back(MVT::Flag);
3707  std::vector<SDOperand> Ops;
3708  Ops.push_back(Chain);
3709  Ops.push_back(DAG.getValueType(AVT));
3710  Ops.push_back(InFlag);
3711  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
3712
3713  if (TwoRepMovs) {
3714    InFlag = Chain.getValue(1);
3715    Count = Op.getOperand(3);
3716    MVT::ValueType CVT = Count.getValueType();
3717    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
3718                                 DAG.getConstant(3, CVT));
3719    Chain  = DAG.getCopyToReg(Chain, X86::ECX, Left, InFlag);
3720    InFlag = Chain.getValue(1);
3721    Tys.clear();
3722    Tys.push_back(MVT::Other);
3723    Tys.push_back(MVT::Flag);
3724    Ops.clear();
3725    Ops.push_back(Chain);
3726    Ops.push_back(DAG.getValueType(MVT::i8));
3727    Ops.push_back(InFlag);
3728    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, Ops);
3729  } else if (BytesLeft) {
3730    // Issue loads and stores for the last 1 - 3 bytes.
3731    unsigned Offset = I->getValue() - BytesLeft;
3732    SDOperand DstAddr = Op.getOperand(1);
3733    MVT::ValueType DstVT = DstAddr.getValueType();
3734    SDOperand SrcAddr = Op.getOperand(2);
3735    MVT::ValueType SrcVT = SrcAddr.getValueType();
3736    SDOperand Value;
3737    if (BytesLeft >= 2) {
3738      Value = DAG.getLoad(MVT::i16, Chain,
3739                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
3740                                      DAG.getConstant(Offset, SrcVT)),
3741                          DAG.getSrcValue(NULL));
3742      Chain = Value.getValue(1);
3743      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
3744                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
3745                                      DAG.getConstant(Offset, DstVT)),
3746                          DAG.getSrcValue(NULL));
3747      BytesLeft -= 2;
3748      Offset += 2;
3749    }
3750
3751    if (BytesLeft == 1) {
3752      Value = DAG.getLoad(MVT::i8, Chain,
3753                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
3754                                      DAG.getConstant(Offset, SrcVT)),
3755                          DAG.getSrcValue(NULL));
3756      Chain = Value.getValue(1);
3757      Chain = DAG.getNode(ISD::STORE, MVT::Other, Chain, Value,
3758                          DAG.getNode(ISD::ADD, DstVT, DstAddr,
3759                                      DAG.getConstant(Offset, DstVT)),
3760                          DAG.getSrcValue(NULL));
3761    }
3762  }
3763
3764  return Chain;
3765}
3766
3767SDOperand
3768X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
3769  std::vector<MVT::ValueType> Tys;
3770  Tys.push_back(MVT::Other);
3771  Tys.push_back(MVT::Flag);
3772  std::vector<SDOperand> Ops;
3773  Ops.push_back(Op.getOperand(0));
3774  SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, Ops);
3775  Ops.clear();
3776  Ops.push_back(DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1)));
3777  Ops.push_back(DAG.getCopyFromReg(Ops[0].getValue(1), X86::EDX,
3778                                   MVT::i32, Ops[0].getValue(2)));
3779  Ops.push_back(Ops[1].getValue(1));
3780  Tys[0] = Tys[1] = MVT::i32;
3781  Tys.push_back(MVT::Other);
3782  return DAG.getNode(ISD::MERGE_VALUES, Tys, Ops);
3783}
3784
3785SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
3786  // vastart just stores the address of the VarArgsFrameIndex slot into the
3787  // memory location argument.
3788  // FIXME: Replace MVT::i32 with PointerTy
3789  SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32);
3790  return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR,
3791                     Op.getOperand(1), Op.getOperand(2));
3792}
3793
3794SDOperand
3795X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
3796  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
3797  switch (IntNo) {
3798  default: return SDOperand();    // Don't custom lower most intrinsics.
3799    // Comparison intrinsics.
3800  case Intrinsic::x86_sse_comieq_ss:
3801  case Intrinsic::x86_sse_comilt_ss:
3802  case Intrinsic::x86_sse_comile_ss:
3803  case Intrinsic::x86_sse_comigt_ss:
3804  case Intrinsic::x86_sse_comige_ss:
3805  case Intrinsic::x86_sse_comineq_ss:
3806  case Intrinsic::x86_sse_ucomieq_ss:
3807  case Intrinsic::x86_sse_ucomilt_ss:
3808  case Intrinsic::x86_sse_ucomile_ss:
3809  case Intrinsic::x86_sse_ucomigt_ss:
3810  case Intrinsic::x86_sse_ucomige_ss:
3811  case Intrinsic::x86_sse_ucomineq_ss:
3812  case Intrinsic::x86_sse2_comieq_sd:
3813  case Intrinsic::x86_sse2_comilt_sd:
3814  case Intrinsic::x86_sse2_comile_sd:
3815  case Intrinsic::x86_sse2_comigt_sd:
3816  case Intrinsic::x86_sse2_comige_sd:
3817  case Intrinsic::x86_sse2_comineq_sd:
3818  case Intrinsic::x86_sse2_ucomieq_sd:
3819  case Intrinsic::x86_sse2_ucomilt_sd:
3820  case Intrinsic::x86_sse2_ucomile_sd:
3821  case Intrinsic::x86_sse2_ucomigt_sd:
3822  case Intrinsic::x86_sse2_ucomige_sd:
3823  case Intrinsic::x86_sse2_ucomineq_sd: {
3824    unsigned Opc = 0;
3825    ISD::CondCode CC = ISD::SETCC_INVALID;
3826    switch (IntNo) {
3827    default: break;
3828    case Intrinsic::x86_sse_comieq_ss:
3829    case Intrinsic::x86_sse2_comieq_sd:
3830      Opc = X86ISD::COMI;
3831      CC = ISD::SETEQ;
3832      break;
3833    case Intrinsic::x86_sse_comilt_ss:
3834    case Intrinsic::x86_sse2_comilt_sd:
3835      Opc = X86ISD::COMI;
3836      CC = ISD::SETLT;
3837      break;
3838    case Intrinsic::x86_sse_comile_ss:
3839    case Intrinsic::x86_sse2_comile_sd:
3840      Opc = X86ISD::COMI;
3841      CC = ISD::SETLE;
3842      break;
3843    case Intrinsic::x86_sse_comigt_ss:
3844    case Intrinsic::x86_sse2_comigt_sd:
3845      Opc = X86ISD::COMI;
3846      CC = ISD::SETGT;
3847      break;
3848    case Intrinsic::x86_sse_comige_ss:
3849    case Intrinsic::x86_sse2_comige_sd:
3850      Opc = X86ISD::COMI;
3851      CC = ISD::SETGE;
3852      break;
3853    case Intrinsic::x86_sse_comineq_ss:
3854    case Intrinsic::x86_sse2_comineq_sd:
3855      Opc = X86ISD::COMI;
3856      CC = ISD::SETNE;
3857      break;
3858    case Intrinsic::x86_sse_ucomieq_ss:
3859    case Intrinsic::x86_sse2_ucomieq_sd:
3860      Opc = X86ISD::UCOMI;
3861      CC = ISD::SETEQ;
3862      break;
3863    case Intrinsic::x86_sse_ucomilt_ss:
3864    case Intrinsic::x86_sse2_ucomilt_sd:
3865      Opc = X86ISD::UCOMI;
3866      CC = ISD::SETLT;
3867      break;
3868    case Intrinsic::x86_sse_ucomile_ss:
3869    case Intrinsic::x86_sse2_ucomile_sd:
3870      Opc = X86ISD::UCOMI;
3871      CC = ISD::SETLE;
3872      break;
3873    case Intrinsic::x86_sse_ucomigt_ss:
3874    case Intrinsic::x86_sse2_ucomigt_sd:
3875      Opc = X86ISD::UCOMI;
3876      CC = ISD::SETGT;
3877      break;
3878    case Intrinsic::x86_sse_ucomige_ss:
3879    case Intrinsic::x86_sse2_ucomige_sd:
3880      Opc = X86ISD::UCOMI;
3881      CC = ISD::SETGE;
3882      break;
3883    case Intrinsic::x86_sse_ucomineq_ss:
3884    case Intrinsic::x86_sse2_ucomineq_sd:
3885      Opc = X86ISD::UCOMI;
3886      CC = ISD::SETNE;
3887      break;
3888    }
3889    bool Flip;
3890    unsigned X86CC;
3891    translateX86CC(CC, true, X86CC, Flip);
3892    SDOperand Cond = DAG.getNode(Opc, MVT::Flag, Op.getOperand(Flip?2:1),
3893                                 Op.getOperand(Flip?1:2));
3894    SDOperand SetCC = DAG.getNode(X86ISD::SETCC, MVT::i8,
3895                                  DAG.getConstant(X86CC, MVT::i8), Cond);
3896    return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
3897  }
3898  }
3899}
3900
3901/// LowerOperation - Provide custom lowering hooks for some operations.
3902///
3903SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
3904  switch (Op.getOpcode()) {
3905  default: assert(0 && "Should not custom lower this!");
3906  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
3907  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
3908  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3909  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
3910  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
3911  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
3912  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
3913  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
3914  case ISD::SHL_PARTS:
3915  case ISD::SRA_PARTS:
3916  case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
3917  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
3918  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
3919  case ISD::FABS:               return LowerFABS(Op, DAG);
3920  case ISD::FNEG:               return LowerFNEG(Op, DAG);
3921  case ISD::SETCC:              return LowerSETCC(Op, DAG);
3922  case ISD::SELECT:             return LowerSELECT(Op, DAG);
3923  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
3924  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
3925  case ISD::RET:                return LowerRET(Op, DAG);
3926  case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
3927  case ISD::MEMSET:             return LowerMEMSET(Op, DAG);
3928  case ISD::MEMCPY:             return LowerMEMCPY(Op, DAG);
3929  case ISD::READCYCLECOUNTER:   return LowerREADCYCLCECOUNTER(Op, DAG);
3930  case ISD::VASTART:            return LowerVASTART(Op, DAG);
3931  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3932  }
3933}
3934
3935const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
3936  switch (Opcode) {
3937  default: return NULL;
3938  case X86ISD::SHLD:               return "X86ISD::SHLD";
3939  case X86ISD::SHRD:               return "X86ISD::SHRD";
3940  case X86ISD::FAND:               return "X86ISD::FAND";
3941  case X86ISD::FXOR:               return "X86ISD::FXOR";
3942  case X86ISD::FILD:               return "X86ISD::FILD";
3943  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
3944  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
3945  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
3946  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
3947  case X86ISD::FLD:                return "X86ISD::FLD";
3948  case X86ISD::FST:                return "X86ISD::FST";
3949  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
3950  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
3951  case X86ISD::CALL:               return "X86ISD::CALL";
3952  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
3953  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
3954  case X86ISD::CMP:                return "X86ISD::CMP";
3955  case X86ISD::TEST:               return "X86ISD::TEST";
3956  case X86ISD::COMI:               return "X86ISD::COMI";
3957  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
3958  case X86ISD::SETCC:              return "X86ISD::SETCC";
3959  case X86ISD::CMOV:               return "X86ISD::CMOV";
3960  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
3961  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
3962  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
3963  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
3964  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
3965  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
3966  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
3967  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
3968  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
3969  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
3970  }
3971}
3972
3973void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
3974                                                       uint64_t Mask,
3975                                                       uint64_t &KnownZero,
3976                                                       uint64_t &KnownOne,
3977                                                       unsigned Depth) const {
3978  unsigned Opc = Op.getOpcode();
3979  assert((Opc >= ISD::BUILTIN_OP_END ||
3980          Opc == ISD::INTRINSIC_WO_CHAIN ||
3981          Opc == ISD::INTRINSIC_W_CHAIN ||
3982          Opc == ISD::INTRINSIC_VOID) &&
3983         "Should use MaskedValueIsZero if you don't know whether Op"
3984         " is a target node!");
3985
3986  KnownZero = KnownOne = 0;   // Don't know anything.
3987  switch (Opc) {
3988  default: break;
3989  case X86ISD::SETCC:
3990    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
3991    break;
3992  }
3993}
3994
3995std::vector<unsigned> X86TargetLowering::
3996getRegClassForInlineAsmConstraint(const std::string &Constraint,
3997                                  MVT::ValueType VT) const {
3998  if (Constraint.size() == 1) {
3999    // FIXME: not handling fp-stack yet!
4000    // FIXME: not handling MMX registers yet ('y' constraint).
4001    switch (Constraint[0]) {      // GCC X86 Constraint Letters
4002    default: break;  // Unknown constriant letter
4003    case 'r':   // GENERAL_REGS
4004    case 'R':   // LEGACY_REGS
4005      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
4006                                   X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
4007    case 'l':   // INDEX_REGS
4008      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX,
4009                                   X86::ESI, X86::EDI, X86::EBP, 0);
4010    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
4011    case 'Q':   // Q_REGS
4012      return make_vector<unsigned>(X86::EAX, X86::EBX, X86::ECX, X86::EDX, 0);
4013    case 'x':   // SSE_REGS if SSE1 allowed
4014      if (Subtarget->hasSSE1())
4015        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4016                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4017                                     0);
4018      return std::vector<unsigned>();
4019    case 'Y':   // SSE_REGS if SSE2 allowed
4020      if (Subtarget->hasSSE2())
4021        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4022                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
4023                                     0);
4024      return std::vector<unsigned>();
4025    }
4026  }
4027
4028  return std::vector<unsigned>();
4029}
4030
4031/// isLegalAddressImmediate - Return true if the integer value or
4032/// GlobalValue can be used as the offset of the target addressing mode.
4033bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
4034  // X86 allows a sign-extended 32-bit immediate field.
4035  return (V > -(1LL << 32) && V < (1LL << 32)-1);
4036}
4037
4038bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
4039  if (Subtarget->isTargetDarwin()) {
4040    Reloc::Model RModel = getTargetMachine().getRelocationModel();
4041    if (RModel == Reloc::Static)
4042      return true;
4043    else if (RModel == Reloc::DynamicNoPIC)
4044      return !DarwinGVRequiresExtraLoad(GV);
4045    else
4046      return false;
4047  } else
4048    return true;
4049}
4050
4051/// isShuffleMaskLegal - Targets can use this to indicate that they only
4052/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
4053/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
4054/// are assumed to be legal.
4055bool
4056X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
4057  // Only do shuffles on 128-bit vector types for now.
4058  if (MVT::getSizeInBits(VT) == 64) return false;
4059  return (Mask.Val->getNumOperands() <= 4 ||
4060          isSplatMask(Mask.Val)  ||
4061          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
4062          X86::isUNPCKLMask(Mask.Val) ||
4063          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
4064          X86::isUNPCKHMask(Mask.Val));
4065}
4066
4067bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
4068                                               MVT::ValueType EVT,
4069                                               SelectionDAG &DAG) const {
4070  unsigned NumElts = BVOps.size();
4071  // Only do shuffles on 128-bit vector types for now.
4072  if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
4073  if (NumElts == 2) return true;
4074  if (NumElts == 4) {
4075    return (isMOVLMask(BVOps)  || isCommutedMOVL(BVOps, true) ||
4076            isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps));
4077  }
4078  return false;
4079}
4080