X86ISelLowering.cpp revision d0ff02cf6ff3019dc97d49636a055e13d20a0774
1f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//
3f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//                     The LLVM Compiler Infrastructure
4f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//
5f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// This file was developed by Chris Lattner and is distributed under
6f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// the University of Illinois Open Source License. See LICENSE.TXT for details.
7f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//
8f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//===----------------------------------------------------------------------===//
9f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//
10f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// This file defines the interfaces that X86 uses to lower LLVM code into a
11f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// selection DAG.
12f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//
13f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//===----------------------------------------------------------------------===//
14f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
15f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86.h"
16f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86InstrBuilder.h"
17f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86ISelLowering.h"
18f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86MachineFunctionInfo.h"
19f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86TargetMachine.h"
20f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CallingConv.h"
21f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Constants.h"
22f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/DerivedTypes.h"
23f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Function.h"
24f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Intrinsics.h"
25f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/ADT/VectorExtras.h"
26f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Analysis/ScalarEvolutionExpressions.h"
27f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/MachineFrameInfo.h"
28f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/MachineFunction.h"
29f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/MachineInstrBuilder.h"
30f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/SelectionDAG.h"
31f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/SSARegMap.h"
32f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Support/MathExtras.h"
33f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Target/TargetOptions.h"
34f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Support/CommandLine.h"
35f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/ADT/StringExtras.h"
36f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viveretteusing namespace llvm;
37f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
38f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// FIXME: temporary.
39f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverettestatic cl::opt<bool> EnableFastCC("enable-x86-fastcc", cl::Hidden,
40f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette                                  cl::desc("Enable fastcc on X86"));
41f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan ViveretteX86TargetLowering::X86TargetLowering(TargetMachine &TM)
42f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  : TargetLowering(TM) {
43f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  Subtarget = &TM.getSubtarget<X86Subtarget>();
44f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  X86ScalarSSE = Subtarget->hasSSE2();
45f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
46f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
47f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // Set up the TargetLowering object.
48f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
49f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // X86 is weird, it always uses i8 for shift amounts and setcc results.
50f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setShiftAmountType(MVT::i8);
51f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setSetCCResultType(MVT::i8);
52f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setSetCCResultContents(ZeroOrOneSetCCResult);
53f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setSchedulingPreference(SchedulingForRegPressure);
54f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setShiftAmountFlavor(Mask);   // shl X, 32 == shl X, 0
55f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setStackPointerRegisterToSaveRestore(X86StackPtr);
56f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
57f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  if (!Subtarget->isTargetDarwin())
58f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
59f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    setUseUnderscoreSetJmpLongJmp(true);
60f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
61f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // Add legal addressing mode scale values.
62f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  addLegalAddressScale(8);
63f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  addLegalAddressScale(4);
64f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  addLegalAddressScale(2);
65f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // Enter the ones which require both scale + index last. These are more
66f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // expensive.
67f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  addLegalAddressScale(9);
68f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  addLegalAddressScale(5);
69f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  addLegalAddressScale(3);
70f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
71f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // Set up the register classes.
72f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  addRegisterClass(MVT::i8, X86::GR8RegisterClass);
73f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  addRegisterClass(MVT::i16, X86::GR16RegisterClass);
74f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  addRegisterClass(MVT::i32, X86::GR32RegisterClass);
75f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  if (Subtarget->is64Bit())
76f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    addRegisterClass(MVT::i64, X86::GR64RegisterClass);
77f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
78f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand);
79f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
80f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
81f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // operation.
82f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
83f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
84f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
85f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
86f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  if (Subtarget->is64Bit()) {
87f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Expand);
88f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Promote);
89f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  } else {
90f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    if (X86ScalarSSE)
91f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette      // If SSE i64 SINT_TO_FP is not available, expand i32 UINT_TO_FP.
92f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Expand);
93f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    else
94f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette      setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
95f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  }
96f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
97f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
98f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // this operation.
99f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
100f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
101f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // SSE has no i16 to fp conversion, only i32
102f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  if (X86ScalarSSE)
103f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
104f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  else {
105f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
106f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
107f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  }
108f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
109f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  if (!Subtarget->is64Bit()) {
110f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    // Custom lower SINT_TO_FP and FP_TO_SINT from/to i64 in 32-bit mode.
111f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
112f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette    setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
113f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  }
114f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
115f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
116f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  // this operation.
117f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
118f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette  setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
119f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette
120  if (X86ScalarSSE) {
121    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
122  } else {
123    setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
124    setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
125  }
126
127  // Handle FP_TO_UINT by promoting the destination to a larger signed
128  // conversion.
129  setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
130  setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
131  setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
132
133  if (Subtarget->is64Bit()) {
134    setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Expand);
135    setOperationAction(ISD::FP_TO_UINT     , MVT::i32  , Promote);
136  } else {
137    if (X86ScalarSSE && !Subtarget->hasSSE3())
138      // Expand FP_TO_UINT into a select.
139      // FIXME: We would like to use a Custom expander here eventually to do
140      // the optimal thing for SSE vs. the default expansion in the legalizer.
141      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
142    else
143      // With SSE3 we can use fisttpll to convert to a signed i64.
144      setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
145  }
146
147  setOperationAction(ISD::BIT_CONVERT      , MVT::f32  , Expand);
148  setOperationAction(ISD::BIT_CONVERT      , MVT::i32  , Expand);
149
150  setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
151  setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
152  setOperationAction(ISD::BR_CC            , MVT::Other, Expand);
153  setOperationAction(ISD::SELECT_CC        , MVT::Other, Expand);
154  setOperationAction(ISD::MEMMOVE          , MVT::Other, Expand);
155  if (Subtarget->is64Bit())
156    setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
157  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Expand);
158  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Expand);
159  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
160  setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
161  setOperationAction(ISD::FREM             , MVT::f64  , Expand);
162
163  setOperationAction(ISD::CTPOP            , MVT::i8   , Expand);
164  setOperationAction(ISD::CTTZ             , MVT::i8   , Expand);
165  setOperationAction(ISD::CTLZ             , MVT::i8   , Expand);
166  setOperationAction(ISD::CTPOP            , MVT::i16  , Expand);
167  setOperationAction(ISD::CTTZ             , MVT::i16  , Expand);
168  setOperationAction(ISD::CTLZ             , MVT::i16  , Expand);
169  setOperationAction(ISD::CTPOP            , MVT::i32  , Expand);
170  setOperationAction(ISD::CTTZ             , MVT::i32  , Expand);
171  setOperationAction(ISD::CTLZ             , MVT::i32  , Expand);
172  if (Subtarget->is64Bit()) {
173    setOperationAction(ISD::CTPOP          , MVT::i64  , Expand);
174    setOperationAction(ISD::CTTZ           , MVT::i64  , Expand);
175    setOperationAction(ISD::CTLZ           , MVT::i64  , Expand);
176  }
177
178  setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
179  setOperationAction(ISD::BSWAP            , MVT::i16  , Expand);
180
181  // These should be promoted to a larger select which is supported.
182  setOperationAction(ISD::SELECT           , MVT::i1   , Promote);
183  setOperationAction(ISD::SELECT           , MVT::i8   , Promote);
184  // X86 wants to expand cmov itself.
185  setOperationAction(ISD::SELECT          , MVT::i16  , Custom);
186  setOperationAction(ISD::SELECT          , MVT::i32  , Custom);
187  setOperationAction(ISD::SELECT          , MVT::f32  , Custom);
188  setOperationAction(ISD::SELECT          , MVT::f64  , Custom);
189  setOperationAction(ISD::SETCC           , MVT::i8   , Custom);
190  setOperationAction(ISD::SETCC           , MVT::i16  , Custom);
191  setOperationAction(ISD::SETCC           , MVT::i32  , Custom);
192  setOperationAction(ISD::SETCC           , MVT::f32  , Custom);
193  setOperationAction(ISD::SETCC           , MVT::f64  , Custom);
194  if (Subtarget->is64Bit()) {
195    setOperationAction(ISD::SELECT        , MVT::i64  , Custom);
196    setOperationAction(ISD::SETCC         , MVT::i64  , Custom);
197  }
198  // X86 ret instruction may pop stack.
199  setOperationAction(ISD::RET             , MVT::Other, Custom);
200  // Darwin ABI issue.
201  setOperationAction(ISD::ConstantPool    , MVT::i32  , Custom);
202  setOperationAction(ISD::JumpTable       , MVT::i32  , Custom);
203  setOperationAction(ISD::GlobalAddress   , MVT::i32  , Custom);
204  setOperationAction(ISD::ExternalSymbol  , MVT::i32  , Custom);
205  if (Subtarget->is64Bit()) {
206    setOperationAction(ISD::ConstantPool  , MVT::i64  , Custom);
207    setOperationAction(ISD::JumpTable     , MVT::i64  , Custom);
208    setOperationAction(ISD::GlobalAddress , MVT::i64  , Custom);
209    setOperationAction(ISD::ExternalSymbol, MVT::i64  , Custom);
210  }
211  // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
212  setOperationAction(ISD::SHL_PARTS       , MVT::i32  , Custom);
213  setOperationAction(ISD::SRA_PARTS       , MVT::i32  , Custom);
214  setOperationAction(ISD::SRL_PARTS       , MVT::i32  , Custom);
215  // X86 wants to expand memset / memcpy itself.
216  setOperationAction(ISD::MEMSET          , MVT::Other, Custom);
217  setOperationAction(ISD::MEMCPY          , MVT::Other, Custom);
218
219  // We don't have line number support yet.
220  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
221  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
222  // FIXME - use subtarget debug flags
223  if (!Subtarget->isTargetDarwin() &&
224      !Subtarget->isTargetELF() &&
225      !Subtarget->isTargetCygwin())
226    setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand);
227
228  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
229  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
230
231  // Use the default implementation.
232  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
233  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
234  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
235  setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
236  setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
237  if (Subtarget->is64Bit())
238    setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
239  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
240
241  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
242  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
243
244  if (X86ScalarSSE) {
245    // Set up the FP register classes.
246    addRegisterClass(MVT::f32, X86::FR32RegisterClass);
247    addRegisterClass(MVT::f64, X86::FR64RegisterClass);
248
249    // Use ANDPD to simulate FABS.
250    setOperationAction(ISD::FABS , MVT::f64, Custom);
251    setOperationAction(ISD::FABS , MVT::f32, Custom);
252
253    // Use XORP to simulate FNEG.
254    setOperationAction(ISD::FNEG , MVT::f64, Custom);
255    setOperationAction(ISD::FNEG , MVT::f32, Custom);
256
257    // We don't support sin/cos/fmod
258    setOperationAction(ISD::FSIN , MVT::f64, Expand);
259    setOperationAction(ISD::FCOS , MVT::f64, Expand);
260    setOperationAction(ISD::FREM , MVT::f64, Expand);
261    setOperationAction(ISD::FSIN , MVT::f32, Expand);
262    setOperationAction(ISD::FCOS , MVT::f32, Expand);
263    setOperationAction(ISD::FREM , MVT::f32, Expand);
264
265    // Expand FP immediates into loads from the stack, except for the special
266    // cases we handle.
267    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
268    setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
269    addLegalFPImmediate(+0.0); // xorps / xorpd
270  } else {
271    // Set up the FP register classes.
272    addRegisterClass(MVT::f64, X86::RFPRegisterClass);
273
274    setOperationAction(ISD::UNDEF, MVT::f64, Expand);
275
276    if (!UnsafeFPMath) {
277      setOperationAction(ISD::FSIN           , MVT::f64  , Expand);
278      setOperationAction(ISD::FCOS           , MVT::f64  , Expand);
279    }
280
281    setOperationAction(ISD::ConstantFP, MVT::f64, Expand);
282    addLegalFPImmediate(+0.0); // FLD0
283    addLegalFPImmediate(+1.0); // FLD1
284    addLegalFPImmediate(-0.0); // FLD0/FCHS
285    addLegalFPImmediate(-1.0); // FLD1/FCHS
286  }
287
288  // First set operation action for all vector types to expand. Then we
289  // will selectively turn on ones that can be effectively codegen'd.
290  for (unsigned VT = (unsigned)MVT::Vector + 1;
291       VT != (unsigned)MVT::LAST_VALUETYPE; VT++) {
292    setOperationAction(ISD::ADD , (MVT::ValueType)VT, Expand);
293    setOperationAction(ISD::SUB , (MVT::ValueType)VT, Expand);
294    setOperationAction(ISD::FADD, (MVT::ValueType)VT, Expand);
295    setOperationAction(ISD::FSUB, (MVT::ValueType)VT, Expand);
296    setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand);
297    setOperationAction(ISD::FMUL, (MVT::ValueType)VT, Expand);
298    setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand);
299    setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand);
300    setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand);
301    setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand);
302    setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand);
303    setOperationAction(ISD::LOAD, (MVT::ValueType)VT, Expand);
304    setOperationAction(ISD::VECTOR_SHUFFLE,     (MVT::ValueType)VT, Expand);
305    setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand);
306    setOperationAction(ISD::INSERT_VECTOR_ELT,  (MVT::ValueType)VT, Expand);
307  }
308
309  if (Subtarget->hasMMX()) {
310    addRegisterClass(MVT::v8i8,  X86::VR64RegisterClass);
311    addRegisterClass(MVT::v4i16, X86::VR64RegisterClass);
312    addRegisterClass(MVT::v2i32, X86::VR64RegisterClass);
313
314    // FIXME: add MMX packed arithmetics
315    setOperationAction(ISD::BUILD_VECTOR,     MVT::v8i8,  Expand);
316    setOperationAction(ISD::BUILD_VECTOR,     MVT::v4i16, Expand);
317    setOperationAction(ISD::BUILD_VECTOR,     MVT::v2i32, Expand);
318  }
319
320  if (Subtarget->hasSSE1()) {
321    addRegisterClass(MVT::v4f32, X86::VR128RegisterClass);
322
323    setOperationAction(ISD::FADD,               MVT::v4f32, Legal);
324    setOperationAction(ISD::FSUB,               MVT::v4f32, Legal);
325    setOperationAction(ISD::FMUL,               MVT::v4f32, Legal);
326    setOperationAction(ISD::FDIV,               MVT::v4f32, Legal);
327    setOperationAction(ISD::LOAD,               MVT::v4f32, Legal);
328    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
329    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
330    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
331    setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
332  }
333
334  if (Subtarget->hasSSE2()) {
335    addRegisterClass(MVT::v2f64, X86::VR128RegisterClass);
336    addRegisterClass(MVT::v16i8, X86::VR128RegisterClass);
337    addRegisterClass(MVT::v8i16, X86::VR128RegisterClass);
338    addRegisterClass(MVT::v4i32, X86::VR128RegisterClass);
339    addRegisterClass(MVT::v2i64, X86::VR128RegisterClass);
340
341    setOperationAction(ISD::ADD,                MVT::v16i8, Legal);
342    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
343    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
344    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
345    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
346    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
347    setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
348    setOperationAction(ISD::FADD,               MVT::v2f64, Legal);
349    setOperationAction(ISD::FSUB,               MVT::v2f64, Legal);
350    setOperationAction(ISD::FMUL,               MVT::v2f64, Legal);
351    setOperationAction(ISD::FDIV,               MVT::v2f64, Legal);
352
353    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
354    setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
355    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
356    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
357    // Implement v4f32 insert_vector_elt in terms of SSE2 v8i16 ones.
358    setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
359
360    // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
361    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
362      setOperationAction(ISD::BUILD_VECTOR,        (MVT::ValueType)VT, Custom);
363      setOperationAction(ISD::VECTOR_SHUFFLE,      (MVT::ValueType)VT, Custom);
364      setOperationAction(ISD::EXTRACT_VECTOR_ELT,  (MVT::ValueType)VT, Custom);
365    }
366    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2f64, Custom);
367    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i64, Custom);
368    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2f64, Custom);
369    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i64, Custom);
370    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
371    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom);
372
373    // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
374    for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
375      setOperationAction(ISD::AND,    (MVT::ValueType)VT, Promote);
376      AddPromotedToType (ISD::AND,    (MVT::ValueType)VT, MVT::v2i64);
377      setOperationAction(ISD::OR,     (MVT::ValueType)VT, Promote);
378      AddPromotedToType (ISD::OR,     (MVT::ValueType)VT, MVT::v2i64);
379      setOperationAction(ISD::XOR,    (MVT::ValueType)VT, Promote);
380      AddPromotedToType (ISD::XOR,    (MVT::ValueType)VT, MVT::v2i64);
381      setOperationAction(ISD::LOAD,   (MVT::ValueType)VT, Promote);
382      AddPromotedToType (ISD::LOAD,   (MVT::ValueType)VT, MVT::v2i64);
383      setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote);
384      AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v2i64);
385    }
386
387    // Custom lower v2i64 and v2f64 selects.
388    setOperationAction(ISD::LOAD,               MVT::v2f64, Legal);
389    setOperationAction(ISD::LOAD,               MVT::v2i64, Legal);
390    setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
391    setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
392  }
393
394  // We want to custom lower some of our intrinsics.
395  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
396
397  // We have target-specific dag combine patterns for the following nodes:
398  setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
399  setTargetDAGCombine(ISD::SELECT);
400
401  computeRegisterProperties();
402
403  // FIXME: These should be based on subtarget info. Plus, the values should
404  // be smaller when we are in optimizing for size mode.
405  maxStoresPerMemset = 16; // For %llvm.memset -> sequence of stores
406  maxStoresPerMemcpy = 16; // For %llvm.memcpy -> sequence of stores
407  maxStoresPerMemmove = 16; // For %llvm.memmove -> sequence of stores
408  allowUnalignedMemoryAccesses = true; // x86 supports it!
409}
410
411//===----------------------------------------------------------------------===//
412//                    C Calling Convention implementation
413//===----------------------------------------------------------------------===//
414
415/// AddLiveIn - This helper function adds the specified physical register to the
416/// MachineFunction as a live in value.  It also creates a corresponding virtual
417/// register for it.
418static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
419                          TargetRegisterClass *RC) {
420  assert(RC->contains(PReg) && "Not the correct regclass!");
421  unsigned VReg = MF.getSSARegMap()->createVirtualRegister(RC);
422  MF.addLiveIn(PReg, VReg);
423  return VReg;
424}
425
426/// HowToPassCCCArgument - Returns how an formal argument of the specified type
427/// should be passed. If it is through stack, returns the size of the stack
428/// slot; if it is through XMM register, returns the number of XMM registers
429/// are needed.
430static void
431HowToPassCCCArgument(MVT::ValueType ObjectVT, unsigned NumXMMRegs,
432                     unsigned &ObjSize, unsigned &ObjXMMRegs) {
433  ObjXMMRegs = 0;
434
435  switch (ObjectVT) {
436  default: assert(0 && "Unhandled argument type!");
437  case MVT::i8:  ObjSize = 1; break;
438  case MVT::i16: ObjSize = 2; break;
439  case MVT::i32: ObjSize = 4; break;
440  case MVT::i64: ObjSize = 8; break;
441  case MVT::f32: ObjSize = 4; break;
442  case MVT::f64: ObjSize = 8; break;
443  case MVT::v16i8:
444  case MVT::v8i16:
445  case MVT::v4i32:
446  case MVT::v2i64:
447  case MVT::v4f32:
448  case MVT::v2f64:
449    if (NumXMMRegs < 4)
450      ObjXMMRegs = 1;
451    else
452      ObjSize = 16;
453    break;
454  }
455}
456
457SDOperand X86TargetLowering::LowerCCCArguments(SDOperand Op, SelectionDAG &DAG) {
458  unsigned NumArgs = Op.Val->getNumValues() - 1;
459  MachineFunction &MF = DAG.getMachineFunction();
460  MachineFrameInfo *MFI = MF.getFrameInfo();
461  SDOperand Root = Op.getOperand(0);
462  std::vector<SDOperand> ArgValues;
463
464  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
465  // the stack frame looks like this:
466  //
467  // [ESP] -- return address
468  // [ESP + 4] -- first argument (leftmost lexically)
469  // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
470  //    ...
471  //
472  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
473  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
474  static const unsigned XMMArgRegs[] = {
475    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
476  };
477  for (unsigned i = 0; i < NumArgs; ++i) {
478    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
479    unsigned ArgIncrement = 4;
480    unsigned ObjSize = 0;
481    unsigned ObjXMMRegs = 0;
482    HowToPassCCCArgument(ObjectVT, NumXMMRegs, ObjSize, ObjXMMRegs);
483    if (ObjSize > 4)
484      ArgIncrement = ObjSize;
485
486    SDOperand ArgValue;
487    if (ObjXMMRegs) {
488      // Passed in a XMM register.
489      unsigned Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
490                               X86::VR128RegisterClass);
491      ArgValue= DAG.getCopyFromReg(Root, Reg, ObjectVT);
492      ArgValues.push_back(ArgValue);
493      NumXMMRegs += ObjXMMRegs;
494    } else {
495      // XMM arguments have to be aligned on 16-byte boundary.
496      if (ObjSize == 16)
497        ArgOffset = ((ArgOffset + 15) / 16) * 16;
498      // Create the frame index object for this incoming parameter...
499      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
500      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
501      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
502      ArgValues.push_back(ArgValue);
503      ArgOffset += ArgIncrement;   // Move on to the next argument...
504    }
505  }
506
507  ArgValues.push_back(Root);
508
509  // If the function takes variable number of arguments, make a frame index for
510  // the start of the first vararg value... for expansion of llvm.va_start.
511  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
512  if (isVarArg)
513    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
514  RegSaveFrameIndex = 0xAAAAAAA;  // X86-64 only.
515  ReturnAddrIndex = 0;            // No return address slot generated yet.
516  BytesToPopOnReturn = 0;         // Callee pops nothing.
517  BytesCallerReserves = ArgOffset;
518
519  // If this is a struct return on, the callee pops the hidden struct
520  // pointer. This is common for Darwin/X86, Linux & Mingw32 targets.
521  if (MF.getFunction()->getCallingConv() == CallingConv::CSRet)
522    BytesToPopOnReturn = 4;
523
524  // Return the new list of results.
525  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
526                                     Op.Val->value_end());
527  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
528}
529
530
531SDOperand X86TargetLowering::LowerCCCCallTo(SDOperand Op, SelectionDAG &DAG) {
532  SDOperand Chain     = Op.getOperand(0);
533  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
534  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
535  SDOperand Callee    = Op.getOperand(4);
536  MVT::ValueType RetVT= Op.Val->getValueType(0);
537  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
538
539  // Keep track of the number of XMM regs passed so far.
540  unsigned NumXMMRegs = 0;
541  static const unsigned XMMArgRegs[] = {
542    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
543  };
544
545  // Count how many bytes are to be pushed on the stack.
546  unsigned NumBytes = 0;
547  for (unsigned i = 0; i != NumOps; ++i) {
548    SDOperand Arg = Op.getOperand(5+2*i);
549
550    switch (Arg.getValueType()) {
551    default: assert(0 && "Unexpected ValueType for argument!");
552    case MVT::i8:
553    case MVT::i16:
554    case MVT::i32:
555    case MVT::f32:
556      NumBytes += 4;
557      break;
558    case MVT::i64:
559    case MVT::f64:
560      NumBytes += 8;
561      break;
562    case MVT::v16i8:
563    case MVT::v8i16:
564    case MVT::v4i32:
565    case MVT::v2i64:
566    case MVT::v4f32:
567    case MVT::v2f64:
568      if (NumXMMRegs < 4)
569        ++NumXMMRegs;
570      else {
571        // XMM arguments have to be aligned on 16-byte boundary.
572        NumBytes = ((NumBytes + 15) / 16) * 16;
573        NumBytes += 16;
574      }
575      break;
576    }
577  }
578
579  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
580
581  // Arguments go on the stack in reverse order, as specified by the ABI.
582  unsigned ArgOffset = 0;
583  NumXMMRegs = 0;
584  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
585  std::vector<SDOperand> MemOpChains;
586  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
587  for (unsigned i = 0; i != NumOps; ++i) {
588    SDOperand Arg = Op.getOperand(5+2*i);
589
590    switch (Arg.getValueType()) {
591    default: assert(0 && "Unexpected ValueType for argument!");
592    case MVT::i8:
593    case MVT::i16: {
594      // Promote the integer to 32 bits.  If the input type is signed use a
595      // sign extend, otherwise use a zero extend.
596      unsigned ExtOp =
597        dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ?
598        ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
599      Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
600    }
601    // Fallthrough
602
603    case MVT::i32:
604    case MVT::f32: {
605      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
606      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
607      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
608      ArgOffset += 4;
609      break;
610    }
611    case MVT::i64:
612    case MVT::f64: {
613      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
614      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
615      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
616      ArgOffset += 8;
617      break;
618    }
619    case MVT::v16i8:
620    case MVT::v8i16:
621    case MVT::v4i32:
622    case MVT::v2i64:
623    case MVT::v4f32:
624    case MVT::v2f64:
625      if (NumXMMRegs < 4) {
626        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
627        NumXMMRegs++;
628      } else {
629        // XMM arguments have to be aligned on 16-byte boundary.
630        ArgOffset = ((ArgOffset + 15) / 16) * 16;
631        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
632        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
633        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
634        ArgOffset += 16;
635      }
636    }
637  }
638
639  if (!MemOpChains.empty())
640    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
641                        &MemOpChains[0], MemOpChains.size());
642
643  // Build a sequence of copy-to-reg nodes chained together with token chain
644  // and flag operands which copy the outgoing args into registers.
645  SDOperand InFlag;
646  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
647    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
648                             InFlag);
649    InFlag = Chain.getValue(1);
650  }
651
652  // If the callee is a GlobalAddress node (quite common, every direct call is)
653  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
654  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
655    // We should use extra load for direct calls to dllimported functions
656    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), true))
657      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
658  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
659    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
660
661  std::vector<MVT::ValueType> NodeTys;
662  NodeTys.push_back(MVT::Other);   // Returns a chain
663  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
664  std::vector<SDOperand> Ops;
665  Ops.push_back(Chain);
666  Ops.push_back(Callee);
667
668  // Add argument registers to the end of the list so that they are known live
669  // into the call.
670  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
671    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
672                                  RegsToPass[i].second.getValueType()));
673
674  if (InFlag.Val)
675    Ops.push_back(InFlag);
676
677  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
678                      NodeTys, &Ops[0], Ops.size());
679  InFlag = Chain.getValue(1);
680
681  // Create the CALLSEQ_END node.
682  unsigned NumBytesForCalleeToPush = 0;
683
684  // If this is is a call to a struct-return function, the callee
685  // pops the hidden struct pointer, so we have to push it back.
686  // This is common for Darwin/X86, Linux & Mingw32 targets.
687  if (CallingConv == CallingConv::CSRet)
688    NumBytesForCalleeToPush = 4;
689
690  NodeTys.clear();
691  NodeTys.push_back(MVT::Other);   // Returns a chain
692  if (RetVT != MVT::Other)
693    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
694  Ops.clear();
695  Ops.push_back(Chain);
696  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
697  Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
698  Ops.push_back(InFlag);
699  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
700  if (RetVT != MVT::Other)
701    InFlag = Chain.getValue(1);
702
703  std::vector<SDOperand> ResultVals;
704  NodeTys.clear();
705  switch (RetVT) {
706  default: assert(0 && "Unknown value type to return!");
707  case MVT::Other: break;
708  case MVT::i8:
709    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
710    ResultVals.push_back(Chain.getValue(0));
711    NodeTys.push_back(MVT::i8);
712    break;
713  case MVT::i16:
714    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
715    ResultVals.push_back(Chain.getValue(0));
716    NodeTys.push_back(MVT::i16);
717    break;
718  case MVT::i32:
719    if (Op.Val->getValueType(1) == MVT::i32) {
720      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
721      ResultVals.push_back(Chain.getValue(0));
722      Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
723                                 Chain.getValue(2)).getValue(1);
724      ResultVals.push_back(Chain.getValue(0));
725      NodeTys.push_back(MVT::i32);
726    } else {
727      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
728      ResultVals.push_back(Chain.getValue(0));
729    }
730    NodeTys.push_back(MVT::i32);
731    break;
732  case MVT::v16i8:
733  case MVT::v8i16:
734  case MVT::v4i32:
735  case MVT::v2i64:
736  case MVT::v4f32:
737  case MVT::v2f64:
738    Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
739    ResultVals.push_back(Chain.getValue(0));
740    NodeTys.push_back(RetVT);
741    break;
742  case MVT::f32:
743  case MVT::f64: {
744    std::vector<MVT::ValueType> Tys;
745    Tys.push_back(MVT::f64);
746    Tys.push_back(MVT::Other);
747    Tys.push_back(MVT::Flag);
748    std::vector<SDOperand> Ops;
749    Ops.push_back(Chain);
750    Ops.push_back(InFlag);
751    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys,
752                                   &Ops[0], Ops.size());
753    Chain  = RetVal.getValue(1);
754    InFlag = RetVal.getValue(2);
755    if (X86ScalarSSE) {
756      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
757      // shouldn't be necessary except that RFP cannot be live across
758      // multiple blocks. When stackifier is fixed, they can be uncoupled.
759      MachineFunction &MF = DAG.getMachineFunction();
760      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
761      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
762      Tys.clear();
763      Tys.push_back(MVT::Other);
764      Ops.clear();
765      Ops.push_back(Chain);
766      Ops.push_back(RetVal);
767      Ops.push_back(StackSlot);
768      Ops.push_back(DAG.getValueType(RetVT));
769      Ops.push_back(InFlag);
770      Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
771      RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0);
772      Chain = RetVal.getValue(1);
773    }
774
775    if (RetVT == MVT::f32 && !X86ScalarSSE)
776      // FIXME: we would really like to remember that this FP_ROUND
777      // operation is okay to eliminate if we allow excess FP precision.
778      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
779    ResultVals.push_back(RetVal);
780    NodeTys.push_back(RetVT);
781    break;
782  }
783  }
784
785  // If the function returns void, just return the chain.
786  if (ResultVals.empty())
787    return Chain;
788
789  // Otherwise, merge everything together with a MERGE_VALUES node.
790  NodeTys.push_back(MVT::Other);
791  ResultVals.push_back(Chain);
792  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
793                              &ResultVals[0], ResultVals.size());
794  return Res.getValue(Op.ResNo);
795}
796
797
798//===----------------------------------------------------------------------===//
799//                 X86-64 C Calling Convention implementation
800//===----------------------------------------------------------------------===//
801
802/// HowToPassX86_64CCCArgument - Returns how an formal argument of the specified
803/// type should be passed. If it is through stack, returns the size of the stack
804/// slot; if it is through integer or XMM register, returns the number of
805/// integer or XMM registers are needed.
806static void
807HowToPassX86_64CCCArgument(MVT::ValueType ObjectVT,
808                           unsigned NumIntRegs, unsigned NumXMMRegs,
809                           unsigned &ObjSize, unsigned &ObjIntRegs,
810                           unsigned &ObjXMMRegs) {
811  ObjSize = 0;
812  ObjIntRegs = 0;
813  ObjXMMRegs = 0;
814
815  switch (ObjectVT) {
816  default: assert(0 && "Unhandled argument type!");
817  case MVT::i8:
818  case MVT::i16:
819  case MVT::i32:
820  case MVT::i64:
821    if (NumIntRegs < 6)
822      ObjIntRegs = 1;
823    else {
824      switch (ObjectVT) {
825      default: break;
826      case MVT::i8:  ObjSize = 1; break;
827      case MVT::i16: ObjSize = 2; break;
828      case MVT::i32: ObjSize = 4; break;
829      case MVT::i64: ObjSize = 8; break;
830      }
831    }
832    break;
833  case MVT::f32:
834  case MVT::f64:
835  case MVT::v16i8:
836  case MVT::v8i16:
837  case MVT::v4i32:
838  case MVT::v2i64:
839  case MVT::v4f32:
840  case MVT::v2f64:
841    if (NumXMMRegs < 8)
842      ObjXMMRegs = 1;
843    else {
844      switch (ObjectVT) {
845      default: break;
846      case MVT::f32:  ObjSize = 4; break;
847      case MVT::f64:  ObjSize = 8; break;
848      case MVT::v16i8:
849      case MVT::v8i16:
850      case MVT::v4i32:
851      case MVT::v2i64:
852      case MVT::v4f32:
853      case MVT::v2f64: ObjSize = 16; break;
854    }
855    break;
856  }
857  }
858}
859
860SDOperand
861X86TargetLowering::LowerX86_64CCCArguments(SDOperand Op, SelectionDAG &DAG) {
862  unsigned NumArgs = Op.Val->getNumValues() - 1;
863  MachineFunction &MF = DAG.getMachineFunction();
864  MachineFrameInfo *MFI = MF.getFrameInfo();
865  SDOperand Root = Op.getOperand(0);
866  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
867  std::vector<SDOperand> ArgValues;
868
869  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
870  // the stack frame looks like this:
871  //
872  // [RSP] -- return address
873  // [RSP + 8] -- first nonreg argument (leftmost lexically)
874  // [RSP +16] -- second nonreg argument, if 1st argument is <= 8 bytes in size
875  //    ...
876  //
877  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
878  unsigned NumIntRegs = 0;  // Int regs used for parameter passing.
879  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
880
881  static const unsigned GPR8ArgRegs[] = {
882    X86::DIL, X86::SIL, X86::DL,  X86::CL,  X86::R8B, X86::R9B
883  };
884  static const unsigned GPR16ArgRegs[] = {
885    X86::DI,  X86::SI,  X86::DX,  X86::CX,  X86::R8W, X86::R9W
886  };
887  static const unsigned GPR32ArgRegs[] = {
888    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
889  };
890  static const unsigned GPR64ArgRegs[] = {
891    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
892  };
893  static const unsigned XMMArgRegs[] = {
894    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
895    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
896  };
897
898  for (unsigned i = 0; i < NumArgs; ++i) {
899    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
900    unsigned ArgIncrement = 8;
901    unsigned ObjSize = 0;
902    unsigned ObjIntRegs = 0;
903    unsigned ObjXMMRegs = 0;
904
905    // FIXME: __int128 and long double support?
906    HowToPassX86_64CCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
907                               ObjSize, ObjIntRegs, ObjXMMRegs);
908    if (ObjSize > 8)
909      ArgIncrement = ObjSize;
910
911    unsigned Reg = 0;
912    SDOperand ArgValue;
913    if (ObjIntRegs || ObjXMMRegs) {
914      switch (ObjectVT) {
915      default: assert(0 && "Unhandled argument type!");
916      case MVT::i8:
917      case MVT::i16:
918      case MVT::i32:
919      case MVT::i64: {
920        TargetRegisterClass *RC = NULL;
921        switch (ObjectVT) {
922        default: break;
923        case MVT::i8:
924          RC = X86::GR8RegisterClass;
925          Reg = GPR8ArgRegs[NumIntRegs];
926          break;
927        case MVT::i16:
928          RC = X86::GR16RegisterClass;
929          Reg = GPR16ArgRegs[NumIntRegs];
930          break;
931        case MVT::i32:
932          RC = X86::GR32RegisterClass;
933          Reg = GPR32ArgRegs[NumIntRegs];
934          break;
935        case MVT::i64:
936          RC = X86::GR64RegisterClass;
937          Reg = GPR64ArgRegs[NumIntRegs];
938          break;
939        }
940        Reg = AddLiveIn(MF, Reg, RC);
941        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
942        break;
943      }
944      case MVT::f32:
945      case MVT::f64:
946      case MVT::v16i8:
947      case MVT::v8i16:
948      case MVT::v4i32:
949      case MVT::v2i64:
950      case MVT::v4f32:
951      case MVT::v2f64: {
952        TargetRegisterClass *RC= (ObjectVT == MVT::f32) ?
953          X86::FR32RegisterClass : ((ObjectVT == MVT::f64) ?
954                              X86::FR64RegisterClass : X86::VR128RegisterClass);
955        Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], RC);
956        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
957        break;
958      }
959      }
960      NumIntRegs += ObjIntRegs;
961      NumXMMRegs += ObjXMMRegs;
962    } else if (ObjSize) {
963      // XMM arguments have to be aligned on 16-byte boundary.
964      if (ObjSize == 16)
965        ArgOffset = ((ArgOffset + 15) / 16) * 16;
966      // Create the SelectionDAG nodes corresponding to a load from this
967      // parameter.
968      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
969      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
970      ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
971      ArgOffset += ArgIncrement;   // Move on to the next argument.
972    }
973
974    ArgValues.push_back(ArgValue);
975  }
976
977  // If the function takes variable number of arguments, make a frame index for
978  // the start of the first vararg value... for expansion of llvm.va_start.
979  if (isVarArg) {
980    // For X86-64, if there are vararg parameters that are passed via
981    // registers, then we must store them to their spots on the stack so they
982    // may be loaded by deferencing the result of va_next.
983    VarArgsGPOffset = NumIntRegs * 8;
984    VarArgsFPOffset = 6 * 8 + NumXMMRegs * 16;
985    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
986    RegSaveFrameIndex = MFI->CreateStackObject(6 * 8 + 8 * 16, 16);
987
988    // Store the integer parameter registers.
989    std::vector<SDOperand> MemOps;
990    SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
991    SDOperand FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
992                              DAG.getConstant(VarArgsGPOffset, getPointerTy()));
993    for (; NumIntRegs != 6; ++NumIntRegs) {
994      unsigned VReg = AddLiveIn(MF, GPR64ArgRegs[NumIntRegs],
995                                X86::GR64RegisterClass);
996      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i64);
997      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
998      MemOps.push_back(Store);
999      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1000                        DAG.getConstant(8, getPointerTy()));
1001    }
1002
1003    // Now store the XMM (fp + vector) parameter registers.
1004    FIN = DAG.getNode(ISD::ADD, getPointerTy(), RSFIN,
1005                      DAG.getConstant(VarArgsFPOffset, getPointerTy()));
1006    for (; NumXMMRegs != 8; ++NumXMMRegs) {
1007      unsigned VReg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs],
1008                                X86::VR128RegisterClass);
1009      SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::v4f32);
1010      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1011      MemOps.push_back(Store);
1012      FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
1013                        DAG.getConstant(16, getPointerTy()));
1014    }
1015    if (!MemOps.empty())
1016        Root = DAG.getNode(ISD::TokenFactor, MVT::Other,
1017                           &MemOps[0], MemOps.size());
1018  }
1019
1020  ArgValues.push_back(Root);
1021
1022  ReturnAddrIndex = 0;     // No return address slot generated yet.
1023  BytesToPopOnReturn = 0;  // Callee pops nothing.
1024  BytesCallerReserves = ArgOffset;
1025
1026  // Return the new list of results.
1027  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
1028                                     Op.Val->value_end());
1029  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
1030}
1031
1032SDOperand
1033X86TargetLowering::LowerX86_64CCCCallTo(SDOperand Op, SelectionDAG &DAG) {
1034  SDOperand Chain     = Op.getOperand(0);
1035  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1036  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1037  SDOperand Callee    = Op.getOperand(4);
1038  MVT::ValueType RetVT= Op.Val->getValueType(0);
1039  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1040
1041  // Count how many bytes are to be pushed on the stack.
1042  unsigned NumBytes = 0;
1043  unsigned NumIntRegs = 0;  // Int regs used for parameter passing.
1044  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1045
1046  static const unsigned GPR8ArgRegs[] = {
1047    X86::DIL, X86::SIL, X86::DL,  X86::CL,  X86::R8B, X86::R9B
1048  };
1049  static const unsigned GPR16ArgRegs[] = {
1050    X86::DI,  X86::SI,  X86::DX,  X86::CX,  X86::R8W, X86::R9W
1051  };
1052  static const unsigned GPR32ArgRegs[] = {
1053    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
1054  };
1055  static const unsigned GPR64ArgRegs[] = {
1056    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8,  X86::R9
1057  };
1058  static const unsigned XMMArgRegs[] = {
1059    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1060    X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1061  };
1062
1063  for (unsigned i = 0; i != NumOps; ++i) {
1064    SDOperand Arg = Op.getOperand(5+2*i);
1065    MVT::ValueType ArgVT = Arg.getValueType();
1066
1067    switch (ArgVT) {
1068    default: assert(0 && "Unknown value type!");
1069    case MVT::i8:
1070    case MVT::i16:
1071    case MVT::i32:
1072    case MVT::i64:
1073      if (NumIntRegs < 6)
1074        ++NumIntRegs;
1075      else
1076        NumBytes += 8;
1077      break;
1078    case MVT::f32:
1079    case MVT::f64:
1080    case MVT::v16i8:
1081    case MVT::v8i16:
1082    case MVT::v4i32:
1083    case MVT::v2i64:
1084    case MVT::v4f32:
1085    case MVT::v2f64:
1086      if (NumXMMRegs < 8)
1087        NumXMMRegs++;
1088      else if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
1089        NumBytes += 8;
1090      else {
1091        // XMM arguments have to be aligned on 16-byte boundary.
1092        NumBytes = ((NumBytes + 15) / 16) * 16;
1093        NumBytes += 16;
1094      }
1095      break;
1096    }
1097  }
1098
1099  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1100
1101  // Arguments go on the stack in reverse order, as specified by the ABI.
1102  unsigned ArgOffset = 0;
1103  NumIntRegs = 0;
1104  NumXMMRegs = 0;
1105  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1106  std::vector<SDOperand> MemOpChains;
1107  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1108  for (unsigned i = 0; i != NumOps; ++i) {
1109    SDOperand Arg = Op.getOperand(5+2*i);
1110    MVT::ValueType ArgVT = Arg.getValueType();
1111
1112    switch (ArgVT) {
1113    default: assert(0 && "Unexpected ValueType for argument!");
1114    case MVT::i8:
1115    case MVT::i16:
1116    case MVT::i32:
1117    case MVT::i64:
1118      if (NumIntRegs < 6) {
1119        unsigned Reg = 0;
1120        switch (ArgVT) {
1121        default: break;
1122        case MVT::i8:  Reg = GPR8ArgRegs[NumIntRegs];  break;
1123        case MVT::i16: Reg = GPR16ArgRegs[NumIntRegs]; break;
1124        case MVT::i32: Reg = GPR32ArgRegs[NumIntRegs]; break;
1125        case MVT::i64: Reg = GPR64ArgRegs[NumIntRegs]; break;
1126        }
1127        RegsToPass.push_back(std::make_pair(Reg, Arg));
1128        ++NumIntRegs;
1129      } else {
1130        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1131        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1132        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1133        ArgOffset += 8;
1134      }
1135      break;
1136    case MVT::f32:
1137    case MVT::f64:
1138    case MVT::v16i8:
1139    case MVT::v8i16:
1140    case MVT::v4i32:
1141    case MVT::v2i64:
1142    case MVT::v4f32:
1143    case MVT::v2f64:
1144      if (NumXMMRegs < 8) {
1145        RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
1146        NumXMMRegs++;
1147      } else {
1148        if (ArgVT != MVT::f32 && ArgVT != MVT::f64) {
1149          // XMM arguments have to be aligned on 16-byte boundary.
1150          ArgOffset = ((ArgOffset + 15) / 16) * 16;
1151        }
1152        SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1153        PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1154        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1155        if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
1156          ArgOffset += 8;
1157        else
1158          ArgOffset += 16;
1159      }
1160    }
1161  }
1162
1163  if (!MemOpChains.empty())
1164    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1165                        &MemOpChains[0], MemOpChains.size());
1166
1167  // Build a sequence of copy-to-reg nodes chained together with token chain
1168  // and flag operands which copy the outgoing args into registers.
1169  SDOperand InFlag;
1170  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1171    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1172                             InFlag);
1173    InFlag = Chain.getValue(1);
1174  }
1175
1176  if (isVarArg) {
1177    // From AMD64 ABI document:
1178    // For calls that may call functions that use varargs or stdargs
1179    // (prototype-less calls or calls to functions containing ellipsis (...) in
1180    // the declaration) %al is used as hidden argument to specify the number
1181    // of SSE registers used. The contents of %al do not need to match exactly
1182    // the number of registers, but must be an ubound on the number of SSE
1183    // registers used and is in the range 0 - 8 inclusive.
1184    Chain = DAG.getCopyToReg(Chain, X86::AL,
1185                             DAG.getConstant(NumXMMRegs, MVT::i8), InFlag);
1186    InFlag = Chain.getValue(1);
1187  }
1188
1189  // If the callee is a GlobalAddress node (quite common, every direct call is)
1190  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1191  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1192    // We should use extra load for direct calls to dllimported functions
1193    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), true))
1194      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1195  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1196    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1197
1198  std::vector<MVT::ValueType> NodeTys;
1199  NodeTys.push_back(MVT::Other);   // Returns a chain
1200  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1201  std::vector<SDOperand> Ops;
1202  Ops.push_back(Chain);
1203  Ops.push_back(Callee);
1204
1205  // Add argument registers to the end of the list so that they are known live
1206  // into the call.
1207  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1208    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1209                                  RegsToPass[i].second.getValueType()));
1210
1211  if (InFlag.Val)
1212    Ops.push_back(InFlag);
1213
1214  // FIXME: Do not generate X86ISD::TAILCALL for now.
1215  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1216                      NodeTys, &Ops[0], Ops.size());
1217  InFlag = Chain.getValue(1);
1218
1219  NodeTys.clear();
1220  NodeTys.push_back(MVT::Other);   // Returns a chain
1221  if (RetVT != MVT::Other)
1222    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
1223  Ops.clear();
1224  Ops.push_back(Chain);
1225  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1226  Ops.push_back(DAG.getConstant(0, getPointerTy()));
1227  Ops.push_back(InFlag);
1228  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1229  if (RetVT != MVT::Other)
1230    InFlag = Chain.getValue(1);
1231
1232  std::vector<SDOperand> ResultVals;
1233  NodeTys.clear();
1234  switch (RetVT) {
1235  default: assert(0 && "Unknown value type to return!");
1236  case MVT::Other: break;
1237  case MVT::i8:
1238    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
1239    ResultVals.push_back(Chain.getValue(0));
1240    NodeTys.push_back(MVT::i8);
1241    break;
1242  case MVT::i16:
1243    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
1244    ResultVals.push_back(Chain.getValue(0));
1245    NodeTys.push_back(MVT::i16);
1246    break;
1247  case MVT::i32:
1248    Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1249    ResultVals.push_back(Chain.getValue(0));
1250    NodeTys.push_back(MVT::i32);
1251    break;
1252  case MVT::i64:
1253    if (Op.Val->getValueType(1) == MVT::i64) {
1254      // FIXME: __int128 support?
1255      Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
1256      ResultVals.push_back(Chain.getValue(0));
1257      Chain = DAG.getCopyFromReg(Chain, X86::RDX, MVT::i64,
1258                                 Chain.getValue(2)).getValue(1);
1259      ResultVals.push_back(Chain.getValue(0));
1260      NodeTys.push_back(MVT::i64);
1261    } else {
1262      Chain = DAG.getCopyFromReg(Chain, X86::RAX, MVT::i64, InFlag).getValue(1);
1263      ResultVals.push_back(Chain.getValue(0));
1264    }
1265    NodeTys.push_back(MVT::i64);
1266    break;
1267  case MVT::f32:
1268  case MVT::f64:
1269  case MVT::v16i8:
1270  case MVT::v8i16:
1271  case MVT::v4i32:
1272  case MVT::v2i64:
1273  case MVT::v4f32:
1274  case MVT::v2f64:
1275    // FIXME: long double support?
1276    Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
1277    ResultVals.push_back(Chain.getValue(0));
1278    NodeTys.push_back(RetVT);
1279    break;
1280  }
1281
1282  // If the function returns void, just return the chain.
1283  if (ResultVals.empty())
1284    return Chain;
1285
1286  // Otherwise, merge everything together with a MERGE_VALUES node.
1287  NodeTys.push_back(MVT::Other);
1288  ResultVals.push_back(Chain);
1289  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1290                              &ResultVals[0], ResultVals.size());
1291  return Res.getValue(Op.ResNo);
1292}
1293
1294//===----------------------------------------------------------------------===//
1295//                    Fast Calling Convention implementation
1296//===----------------------------------------------------------------------===//
1297//
1298// The X86 'fast' calling convention passes up to two integer arguments in
1299// registers (an appropriate portion of EAX/EDX), passes arguments in C order,
1300// and requires that the callee pop its arguments off the stack (allowing proper
1301// tail calls), and has the same return value conventions as C calling convs.
1302//
1303// This calling convention always arranges for the callee pop value to be 8n+4
1304// bytes, which is needed for tail recursion elimination and stack alignment
1305// reasons.
1306//
1307// Note that this can be enhanced in the future to pass fp vals in registers
1308// (when we have a global fp allocator) and do other tricks.
1309//
1310
1311/// HowToPassFastCCArgument - Returns how an formal argument of the specified
1312/// type should be passed. If it is through stack, returns the size of the stack
1313/// slot; if it is through integer or XMM register, returns the number of
1314/// integer or XMM registers are needed.
1315static void
1316HowToPassFastCCArgument(MVT::ValueType ObjectVT,
1317                        unsigned NumIntRegs, unsigned NumXMMRegs,
1318                        unsigned &ObjSize, unsigned &ObjIntRegs,
1319                        unsigned &ObjXMMRegs) {
1320  ObjSize = 0;
1321  ObjIntRegs = 0;
1322  ObjXMMRegs = 0;
1323
1324  switch (ObjectVT) {
1325  default: assert(0 && "Unhandled argument type!");
1326  case MVT::i8:
1327#if FASTCC_NUM_INT_ARGS_INREGS > 0
1328    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
1329      ObjIntRegs = 1;
1330    else
1331#endif
1332      ObjSize = 1;
1333    break;
1334  case MVT::i16:
1335#if FASTCC_NUM_INT_ARGS_INREGS > 0
1336    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
1337      ObjIntRegs = 1;
1338    else
1339#endif
1340      ObjSize = 2;
1341    break;
1342  case MVT::i32:
1343#if FASTCC_NUM_INT_ARGS_INREGS > 0
1344    if (NumIntRegs < FASTCC_NUM_INT_ARGS_INREGS)
1345      ObjIntRegs = 1;
1346    else
1347#endif
1348      ObjSize = 4;
1349    break;
1350  case MVT::i64:
1351#if FASTCC_NUM_INT_ARGS_INREGS > 0
1352    if (NumIntRegs+2 <= FASTCC_NUM_INT_ARGS_INREGS) {
1353      ObjIntRegs = 2;
1354    } else if (NumIntRegs+1 <= FASTCC_NUM_INT_ARGS_INREGS) {
1355      ObjIntRegs = 1;
1356      ObjSize = 4;
1357    } else
1358#endif
1359      ObjSize = 8;
1360  case MVT::f32:
1361    ObjSize = 4;
1362    break;
1363  case MVT::f64:
1364    ObjSize = 8;
1365    break;
1366  case MVT::v16i8:
1367  case MVT::v8i16:
1368  case MVT::v4i32:
1369  case MVT::v2i64:
1370  case MVT::v4f32:
1371  case MVT::v2f64:
1372    if (NumXMMRegs < 4)
1373      ObjXMMRegs = 1;
1374    else
1375      ObjSize = 16;
1376    break;
1377  }
1378}
1379
1380SDOperand
1381X86TargetLowering::LowerFastCCArguments(SDOperand Op, SelectionDAG &DAG) {
1382  unsigned NumArgs = Op.Val->getNumValues()-1;
1383  MachineFunction &MF = DAG.getMachineFunction();
1384  MachineFrameInfo *MFI = MF.getFrameInfo();
1385  SDOperand Root = Op.getOperand(0);
1386  std::vector<SDOperand> ArgValues;
1387
1388  // Add DAG nodes to load the arguments...  On entry to a function the stack
1389  // frame looks like this:
1390  //
1391  // [ESP] -- return address
1392  // [ESP + 4] -- first nonreg argument (leftmost lexically)
1393  // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size
1394  //    ...
1395  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
1396
1397  // Keep track of the number of integer regs passed so far.  This can be either
1398  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
1399  // used).
1400  unsigned NumIntRegs = 0;
1401  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1402
1403  static const unsigned XMMArgRegs[] = {
1404    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1405  };
1406
1407  for (unsigned i = 0; i < NumArgs; ++i) {
1408    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
1409    unsigned ArgIncrement = 4;
1410    unsigned ObjSize = 0;
1411    unsigned ObjIntRegs = 0;
1412    unsigned ObjXMMRegs = 0;
1413
1414    HowToPassFastCCArgument(ObjectVT, NumIntRegs, NumXMMRegs,
1415                            ObjSize, ObjIntRegs, ObjXMMRegs);
1416    if (ObjSize > 4)
1417      ArgIncrement = ObjSize;
1418
1419    unsigned Reg = 0;
1420    SDOperand ArgValue;
1421    if (ObjIntRegs || ObjXMMRegs) {
1422      switch (ObjectVT) {
1423      default: assert(0 && "Unhandled argument type!");
1424      case MVT::i8:
1425        Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::AL,
1426                        X86::GR8RegisterClass);
1427        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8);
1428        break;
1429      case MVT::i16:
1430        Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::AX,
1431                        X86::GR16RegisterClass);
1432        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16);
1433        break;
1434      case MVT::i32:
1435        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
1436                        X86::GR32RegisterClass);
1437        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
1438        break;
1439      case MVT::i64:
1440        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::EAX,
1441                        X86::GR32RegisterClass);
1442        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
1443        if (ObjIntRegs == 2) {
1444          Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass);
1445          SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32);
1446          ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
1447        }
1448        break;
1449      case MVT::v16i8:
1450      case MVT::v8i16:
1451      case MVT::v4i32:
1452      case MVT::v2i64:
1453      case MVT::v4f32:
1454      case MVT::v2f64:
1455        Reg = AddLiveIn(MF, XMMArgRegs[NumXMMRegs], X86::VR128RegisterClass);
1456        ArgValue = DAG.getCopyFromReg(Root, Reg, ObjectVT);
1457        break;
1458      }
1459      NumIntRegs += ObjIntRegs;
1460      NumXMMRegs += ObjXMMRegs;
1461    }
1462
1463    if (ObjSize) {
1464      // XMM arguments have to be aligned on 16-byte boundary.
1465      if (ObjSize == 16)
1466        ArgOffset = ((ArgOffset + 15) / 16) * 16;
1467      // Create the SelectionDAG nodes corresponding to a load from this
1468      // parameter.
1469      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1470      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
1471      if (ObjectVT == MVT::i64 && ObjIntRegs) {
1472        SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
1473                                          NULL, 0);
1474        ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
1475      } else
1476        ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
1477      ArgOffset += ArgIncrement;   // Move on to the next argument.
1478    }
1479
1480    ArgValues.push_back(ArgValue);
1481  }
1482
1483  ArgValues.push_back(Root);
1484
1485  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1486  // arguments and the arguments after the retaddr has been pushed are aligned.
1487  if ((ArgOffset & 7) == 0)
1488    ArgOffset += 4;
1489
1490  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
1491  RegSaveFrameIndex = 0xAAAAAAA;   // X86-64 only.
1492  ReturnAddrIndex = 0;             // No return address slot generated yet.
1493  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
1494  BytesCallerReserves = 0;
1495
1496  // Finally, inform the code generator which regs we return values in.
1497  switch (getValueType(MF.getFunction()->getReturnType())) {
1498  default: assert(0 && "Unknown type!");
1499  case MVT::isVoid: break;
1500  case MVT::i1:
1501  case MVT::i8:
1502  case MVT::i16:
1503  case MVT::i32:
1504    MF.addLiveOut(X86::EAX);
1505    break;
1506  case MVT::i64:
1507    MF.addLiveOut(X86::EAX);
1508    MF.addLiveOut(X86::EDX);
1509    break;
1510  case MVT::f32:
1511  case MVT::f64:
1512    MF.addLiveOut(X86::ST0);
1513    break;
1514  case MVT::v16i8:
1515  case MVT::v8i16:
1516  case MVT::v4i32:
1517  case MVT::v2i64:
1518  case MVT::v4f32:
1519  case MVT::v2f64:
1520    MF.addLiveOut(X86::XMM0);
1521    break;
1522  }
1523
1524  // Return the new list of results.
1525  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
1526                                     Op.Val->value_end());
1527  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
1528}
1529
1530SDOperand X86TargetLowering::LowerFastCCCallTo(SDOperand Op, SelectionDAG &DAG,
1531                                               bool isFastCall) {
1532  SDOperand Chain     = Op.getOperand(0);
1533  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1534  SDOperand Callee    = Op.getOperand(4);
1535  MVT::ValueType RetVT= Op.Val->getValueType(0);
1536  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1537
1538  // Count how many bytes are to be pushed on the stack.
1539  unsigned NumBytes = 0;
1540
1541  // Keep track of the number of integer regs passed so far.  This can be either
1542  // 0 (neither EAX or EDX used), 1 (EAX is used) or 2 (EAX and EDX are both
1543  // used).
1544  unsigned NumIntRegs = 0;
1545  unsigned NumXMMRegs = 0;  // XMM regs used for parameter passing.
1546
1547  static const unsigned GPRArgRegs[][2] = {
1548    { X86::AL,  X86::DL },
1549    { X86::AX,  X86::DX },
1550    { X86::EAX, X86::EDX }
1551  };
1552#if 0
1553  static const unsigned FastCallGPRArgRegs[][2] = {
1554    { X86::CL,  X86::DL },
1555    { X86::CX,  X86::DX },
1556    { X86::ECX, X86::EDX }
1557  };
1558#endif
1559  static const unsigned XMMArgRegs[] = {
1560    X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3
1561  };
1562
1563  for (unsigned i = 0; i != NumOps; ++i) {
1564    SDOperand Arg = Op.getOperand(5+2*i);
1565
1566    switch (Arg.getValueType()) {
1567    default: assert(0 && "Unknown value type!");
1568    case MVT::i8:
1569    case MVT::i16:
1570    case MVT::i32: {
1571     unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS);
1572     if (NumIntRegs < MaxNumIntRegs) {
1573       ++NumIntRegs;
1574       break;
1575     }
1576     } // Fall through
1577    case MVT::f32:
1578      NumBytes += 4;
1579      break;
1580    case MVT::f64:
1581      NumBytes += 8;
1582      break;
1583    case MVT::v16i8:
1584    case MVT::v8i16:
1585    case MVT::v4i32:
1586    case MVT::v2i64:
1587    case MVT::v4f32:
1588    case MVT::v2f64:
1589     if (isFastCall) {
1590      assert(0 && "Unknown value type!");
1591     } else {
1592       if (NumXMMRegs < 4)
1593         NumXMMRegs++;
1594       else {
1595         // XMM arguments have to be aligned on 16-byte boundary.
1596         NumBytes = ((NumBytes + 15) / 16) * 16;
1597         NumBytes += 16;
1598       }
1599     }
1600     break;
1601    }
1602  }
1603
1604  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
1605  // arguments and the arguments after the retaddr has been pushed are aligned.
1606  if ((NumBytes & 7) == 0)
1607    NumBytes += 4;
1608
1609  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1610
1611  // Arguments go on the stack in reverse order, as specified by the ABI.
1612  unsigned ArgOffset = 0;
1613  NumIntRegs = 0;
1614  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1615  std::vector<SDOperand> MemOpChains;
1616  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1617  for (unsigned i = 0; i != NumOps; ++i) {
1618    SDOperand Arg = Op.getOperand(5+2*i);
1619
1620    switch (Arg.getValueType()) {
1621    default: assert(0 && "Unexpected ValueType for argument!");
1622    case MVT::i8:
1623    case MVT::i16:
1624    case MVT::i32: {
1625     unsigned MaxNumIntRegs = (isFastCall ? 2 : FASTCC_NUM_INT_ARGS_INREGS);
1626     if (NumIntRegs < MaxNumIntRegs) {
1627       RegsToPass.push_back(
1628         std::make_pair(GPRArgRegs[Arg.getValueType()-MVT::i8][NumIntRegs],
1629                        Arg));
1630       ++NumIntRegs;
1631       break;
1632     }
1633     } // Fall through
1634    case MVT::f32: {
1635      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1636      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1637      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1638      ArgOffset += 4;
1639      break;
1640    }
1641    case MVT::f64: {
1642      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1643      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1644      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1645      ArgOffset += 8;
1646      break;
1647    }
1648    case MVT::v16i8:
1649    case MVT::v8i16:
1650    case MVT::v4i32:
1651    case MVT::v2i64:
1652    case MVT::v4f32:
1653    case MVT::v2f64:
1654     if (isFastCall) {
1655       assert(0 && "Unexpected ValueType for argument!");
1656     } else {
1657       if (NumXMMRegs < 4) {
1658         RegsToPass.push_back(std::make_pair(XMMArgRegs[NumXMMRegs], Arg));
1659         NumXMMRegs++;
1660       } else {
1661         // XMM arguments have to be aligned on 16-byte boundary.
1662         ArgOffset = ((ArgOffset + 15) / 16) * 16;
1663         SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1664         PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1665         MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1666         ArgOffset += 16;
1667       }
1668     }
1669     break;
1670    }
1671  }
1672
1673  if (!MemOpChains.empty())
1674    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1675                        &MemOpChains[0], MemOpChains.size());
1676
1677  // Build a sequence of copy-to-reg nodes chained together with token chain
1678  // and flag operands which copy the outgoing args into registers.
1679  SDOperand InFlag;
1680  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1681    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1682                             InFlag);
1683    InFlag = Chain.getValue(1);
1684  }
1685
1686  // If the callee is a GlobalAddress node (quite common, every direct call is)
1687  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1688  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1689    // We should use extra load for direct calls to dllimported functions
1690    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), true))
1691      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1692  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1693    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1694
1695  std::vector<MVT::ValueType> NodeTys;
1696  NodeTys.push_back(MVT::Other);   // Returns a chain
1697  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1698  std::vector<SDOperand> Ops;
1699  Ops.push_back(Chain);
1700  Ops.push_back(Callee);
1701
1702  // Add argument registers to the end of the list so that they are known live
1703  // into the call.
1704  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1705    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1706                                  RegsToPass[i].second.getValueType()));
1707
1708  if (InFlag.Val)
1709    Ops.push_back(InFlag);
1710
1711  // FIXME: Do not generate X86ISD::TAILCALL for now.
1712  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
1713                      NodeTys, &Ops[0], Ops.size());
1714  InFlag = Chain.getValue(1);
1715
1716  NodeTys.clear();
1717  NodeTys.push_back(MVT::Other);   // Returns a chain
1718  if (RetVT != MVT::Other)
1719    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
1720  Ops.clear();
1721  Ops.push_back(Chain);
1722  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1723  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
1724  Ops.push_back(InFlag);
1725  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
1726  if (RetVT != MVT::Other)
1727    InFlag = Chain.getValue(1);
1728
1729  std::vector<SDOperand> ResultVals;
1730  NodeTys.clear();
1731  switch (RetVT) {
1732  default: assert(0 && "Unknown value type to return!");
1733  case MVT::Other: break;
1734  case MVT::i8:
1735    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
1736    ResultVals.push_back(Chain.getValue(0));
1737    NodeTys.push_back(MVT::i8);
1738    break;
1739  case MVT::i16:
1740    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
1741    ResultVals.push_back(Chain.getValue(0));
1742    NodeTys.push_back(MVT::i16);
1743    break;
1744  case MVT::i32:
1745    if (Op.Val->getValueType(1) == MVT::i32) {
1746      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1747      ResultVals.push_back(Chain.getValue(0));
1748      Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
1749                                 Chain.getValue(2)).getValue(1);
1750      ResultVals.push_back(Chain.getValue(0));
1751      NodeTys.push_back(MVT::i32);
1752    } else {
1753      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
1754      ResultVals.push_back(Chain.getValue(0));
1755    }
1756    NodeTys.push_back(MVT::i32);
1757    break;
1758  case MVT::v16i8:
1759  case MVT::v8i16:
1760  case MVT::v4i32:
1761  case MVT::v2i64:
1762  case MVT::v4f32:
1763  case MVT::v2f64:
1764   if (isFastCall) {
1765     assert(0 && "Unknown value type to return!");
1766   } else {
1767     Chain = DAG.getCopyFromReg(Chain, X86::XMM0, RetVT, InFlag).getValue(1);
1768     ResultVals.push_back(Chain.getValue(0));
1769     NodeTys.push_back(RetVT);
1770   }
1771   break;
1772  case MVT::f32:
1773  case MVT::f64: {
1774    std::vector<MVT::ValueType> Tys;
1775    Tys.push_back(MVT::f64);
1776    Tys.push_back(MVT::Other);
1777    Tys.push_back(MVT::Flag);
1778    std::vector<SDOperand> Ops;
1779    Ops.push_back(Chain);
1780    Ops.push_back(InFlag);
1781    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys,
1782                                   &Ops[0], Ops.size());
1783    Chain  = RetVal.getValue(1);
1784    InFlag = RetVal.getValue(2);
1785    if (X86ScalarSSE) {
1786      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
1787      // shouldn't be necessary except that RFP cannot be live across
1788      // multiple blocks. When stackifier is fixed, they can be uncoupled.
1789      MachineFunction &MF = DAG.getMachineFunction();
1790      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
1791      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
1792      Tys.clear();
1793      Tys.push_back(MVT::Other);
1794      Ops.clear();
1795      Ops.push_back(Chain);
1796      Ops.push_back(RetVal);
1797      Ops.push_back(StackSlot);
1798      Ops.push_back(DAG.getValueType(RetVT));
1799      Ops.push_back(InFlag);
1800      Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
1801      RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0);
1802      Chain = RetVal.getValue(1);
1803    }
1804
1805    if (RetVT == MVT::f32 && !X86ScalarSSE)
1806      // FIXME: we would really like to remember that this FP_ROUND
1807      // operation is okay to eliminate if we allow excess FP precision.
1808      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
1809    ResultVals.push_back(RetVal);
1810    NodeTys.push_back(RetVT);
1811    break;
1812  }
1813  }
1814
1815
1816  // If the function returns void, just return the chain.
1817  if (ResultVals.empty())
1818    return Chain;
1819
1820  // Otherwise, merge everything together with a MERGE_VALUES node.
1821  NodeTys.push_back(MVT::Other);
1822  ResultVals.push_back(Chain);
1823  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1824                              &ResultVals[0], ResultVals.size());
1825  return Res.getValue(Op.ResNo);
1826}
1827
1828//===----------------------------------------------------------------------===//
1829//                  StdCall Calling Convention implementation
1830//===----------------------------------------------------------------------===//
1831//  StdCall calling convention seems to be standard for many Windows' API
1832//  routines and around. It differs from C calling convention just a little:
1833//  callee should clean up the stack, not caller. Symbols should be also
1834//  decorated in some fancy way :) It doesn't support any vector arguments.
1835
1836/// HowToPassStdCallCCArgument - Returns how an formal argument of the specified
1837/// type should be passed. Returns the size of the stack slot
1838static void
1839HowToPassStdCallCCArgument(MVT::ValueType ObjectVT, unsigned &ObjSize) {
1840  switch (ObjectVT) {
1841  default: assert(0 && "Unhandled argument type!");
1842  case MVT::i8:  ObjSize = 1; break;
1843  case MVT::i16: ObjSize = 2; break;
1844  case MVT::i32: ObjSize = 4; break;
1845  case MVT::i64: ObjSize = 8; break;
1846  case MVT::f32: ObjSize = 4; break;
1847  case MVT::f64: ObjSize = 8; break;
1848  }
1849}
1850
1851SDOperand X86TargetLowering::LowerStdCallCCArguments(SDOperand Op,
1852                                                     SelectionDAG &DAG) {
1853  unsigned NumArgs = Op.Val->getNumValues() - 1;
1854  MachineFunction &MF = DAG.getMachineFunction();
1855  MachineFrameInfo *MFI = MF.getFrameInfo();
1856  SDOperand Root = Op.getOperand(0);
1857  std::vector<SDOperand> ArgValues;
1858
1859  // Add DAG nodes to load the arguments...  On entry to a function on the X86,
1860  // the stack frame looks like this:
1861  //
1862  // [ESP] -- return address
1863  // [ESP + 4] -- first argument (leftmost lexically)
1864  // [ESP + 8] -- second argument, if first argument is <= 4 bytes in size
1865  //    ...
1866  //
1867  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
1868  for (unsigned i = 0; i < NumArgs; ++i) {
1869    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
1870    unsigned ArgIncrement = 4;
1871    unsigned ObjSize = 0;
1872    HowToPassStdCallCCArgument(ObjectVT, ObjSize);
1873    if (ObjSize > 4)
1874      ArgIncrement = ObjSize;
1875
1876    SDOperand ArgValue;
1877    // Create the frame index object for this incoming parameter...
1878    int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1879    SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
1880    ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
1881    ArgValues.push_back(ArgValue);
1882    ArgOffset += ArgIncrement;   // Move on to the next argument...
1883  }
1884
1885  ArgValues.push_back(Root);
1886
1887  // If the function takes variable number of arguments, make a frame index for
1888  // the start of the first vararg value... for expansion of llvm.va_start.
1889  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1890  if (isVarArg) {
1891    BytesToPopOnReturn = 0;         // Callee pops nothing.
1892    BytesCallerReserves = ArgOffset;
1893    VarArgsFrameIndex = MFI->CreateFixedObject(1, ArgOffset);
1894  } else {
1895    BytesToPopOnReturn = ArgOffset; // Callee pops everything..
1896    BytesCallerReserves = 0;
1897  }
1898  RegSaveFrameIndex = 0xAAAAAAA;    // X86-64 only.
1899  ReturnAddrIndex = 0;              // No return address slot generated yet.
1900
1901  MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
1902
1903  // Return the new list of results.
1904  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
1905                                     Op.Val->value_end());
1906  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
1907}
1908
1909
1910SDOperand X86TargetLowering::LowerStdCallCCCallTo(SDOperand Op,
1911                                                  SelectionDAG &DAG) {
1912  SDOperand Chain     = Op.getOperand(0);
1913  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1914  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1915  SDOperand Callee    = Op.getOperand(4);
1916  MVT::ValueType RetVT= Op.Val->getValueType(0);
1917  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1918
1919  // Count how many bytes are to be pushed on the stack.
1920  unsigned NumBytes = 0;
1921  for (unsigned i = 0; i != NumOps; ++i) {
1922    SDOperand Arg = Op.getOperand(5+2*i);
1923
1924    switch (Arg.getValueType()) {
1925    default: assert(0 && "Unexpected ValueType for argument!");
1926    case MVT::i8:
1927    case MVT::i16:
1928    case MVT::i32:
1929    case MVT::f32:
1930      NumBytes += 4;
1931      break;
1932    case MVT::i64:
1933    case MVT::f64:
1934      NumBytes += 8;
1935      break;
1936    }
1937  }
1938
1939  Chain = DAG.getCALLSEQ_START(Chain,DAG.getConstant(NumBytes, getPointerTy()));
1940
1941  // Arguments go on the stack in reverse order, as specified by the ABI.
1942  unsigned ArgOffset = 0;
1943  std::vector<SDOperand> MemOpChains;
1944  SDOperand StackPtr = DAG.getRegister(X86StackPtr, getPointerTy());
1945  for (unsigned i = 0; i != NumOps; ++i) {
1946    SDOperand Arg = Op.getOperand(5+2*i);
1947
1948    switch (Arg.getValueType()) {
1949    default: assert(0 && "Unexpected ValueType for argument!");
1950    case MVT::i8:
1951    case MVT::i16: {
1952      // Promote the integer to 32 bits.  If the input type is signed use a
1953      // sign extend, otherwise use a zero extend.
1954      unsigned ExtOp =
1955        dyn_cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue() ?
1956        ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1957      Arg = DAG.getNode(ExtOp, MVT::i32, Arg);
1958    }
1959    // Fallthrough
1960
1961    case MVT::i32:
1962    case MVT::f32: {
1963      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1964      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1965      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1966      ArgOffset += 4;
1967      break;
1968    }
1969    case MVT::i64:
1970    case MVT::f64: {
1971      SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy());
1972      PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
1973      MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1974      ArgOffset += 8;
1975      break;
1976    }
1977    }
1978  }
1979
1980  if (!MemOpChains.empty())
1981    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1982                        &MemOpChains[0], MemOpChains.size());
1983
1984  // If the callee is a GlobalAddress node (quite common, every direct call is)
1985  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
1986  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1987    // We should use extra load for direct calls to dllimported functions
1988    if (!Subtarget->GVRequiresExtraLoad(G->getGlobal(), true))
1989      Callee = DAG.getTargetGlobalAddress(G->getGlobal(), getPointerTy());
1990  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1991    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1992
1993  std::vector<MVT::ValueType> NodeTys;
1994  NodeTys.push_back(MVT::Other);   // Returns a chain
1995  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1996  std::vector<SDOperand> Ops;
1997  Ops.push_back(Chain);
1998  Ops.push_back(Callee);
1999
2000  Chain = DAG.getNode(isTailCall ? X86ISD::TAILCALL : X86ISD::CALL,
2001                      NodeTys, &Ops[0], Ops.size());
2002  SDOperand InFlag = Chain.getValue(1);
2003
2004  // Create the CALLSEQ_END node.
2005  unsigned NumBytesForCalleeToPush;
2006
2007  if (isVarArg) {
2008    NumBytesForCalleeToPush = 0;
2009  } else {
2010    NumBytesForCalleeToPush = NumBytes;
2011  }
2012
2013  NodeTys.clear();
2014  NodeTys.push_back(MVT::Other);   // Returns a chain
2015  if (RetVT != MVT::Other)
2016    NodeTys.push_back(MVT::Flag);  // Returns a flag for retval copy to use.
2017  Ops.clear();
2018  Ops.push_back(Chain);
2019  Ops.push_back(DAG.getConstant(NumBytes, getPointerTy()));
2020  Ops.push_back(DAG.getConstant(NumBytesForCalleeToPush, getPointerTy()));
2021  Ops.push_back(InFlag);
2022  Chain = DAG.getNode(ISD::CALLSEQ_END, NodeTys, &Ops[0], Ops.size());
2023  if (RetVT != MVT::Other)
2024    InFlag = Chain.getValue(1);
2025
2026  std::vector<SDOperand> ResultVals;
2027  NodeTys.clear();
2028  switch (RetVT) {
2029  default: assert(0 && "Unknown value type to return!");
2030  case MVT::Other: break;
2031  case MVT::i8:
2032    Chain = DAG.getCopyFromReg(Chain, X86::AL, MVT::i8, InFlag).getValue(1);
2033    ResultVals.push_back(Chain.getValue(0));
2034    NodeTys.push_back(MVT::i8);
2035    break;
2036  case MVT::i16:
2037    Chain = DAG.getCopyFromReg(Chain, X86::AX, MVT::i16, InFlag).getValue(1);
2038    ResultVals.push_back(Chain.getValue(0));
2039    NodeTys.push_back(MVT::i16);
2040    break;
2041  case MVT::i32:
2042    if (Op.Val->getValueType(1) == MVT::i32) {
2043      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
2044      ResultVals.push_back(Chain.getValue(0));
2045      Chain = DAG.getCopyFromReg(Chain, X86::EDX, MVT::i32,
2046                                 Chain.getValue(2)).getValue(1);
2047      ResultVals.push_back(Chain.getValue(0));
2048      NodeTys.push_back(MVT::i32);
2049    } else {
2050      Chain = DAG.getCopyFromReg(Chain, X86::EAX, MVT::i32, InFlag).getValue(1);
2051      ResultVals.push_back(Chain.getValue(0));
2052    }
2053    NodeTys.push_back(MVT::i32);
2054    break;
2055  case MVT::f32:
2056  case MVT::f64: {
2057    std::vector<MVT::ValueType> Tys;
2058    Tys.push_back(MVT::f64);
2059    Tys.push_back(MVT::Other);
2060    Tys.push_back(MVT::Flag);
2061    std::vector<SDOperand> Ops;
2062    Ops.push_back(Chain);
2063    Ops.push_back(InFlag);
2064    SDOperand RetVal = DAG.getNode(X86ISD::FP_GET_RESULT, Tys,
2065                                   &Ops[0], Ops.size());
2066    Chain  = RetVal.getValue(1);
2067    InFlag = RetVal.getValue(2);
2068    if (X86ScalarSSE) {
2069      // FIXME: Currently the FST is flagged to the FP_GET_RESULT. This
2070      // shouldn't be necessary except that RFP cannot be live across
2071      // multiple blocks. When stackifier is fixed, they can be uncoupled.
2072      MachineFunction &MF = DAG.getMachineFunction();
2073      int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
2074      SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
2075      Tys.clear();
2076      Tys.push_back(MVT::Other);
2077      Ops.clear();
2078      Ops.push_back(Chain);
2079      Ops.push_back(RetVal);
2080      Ops.push_back(StackSlot);
2081      Ops.push_back(DAG.getValueType(RetVT));
2082      Ops.push_back(InFlag);
2083      Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
2084      RetVal = DAG.getLoad(RetVT, Chain, StackSlot, NULL, 0);
2085      Chain = RetVal.getValue(1);
2086    }
2087
2088    if (RetVT == MVT::f32 && !X86ScalarSSE)
2089      // FIXME: we would really like to remember that this FP_ROUND
2090      // operation is okay to eliminate if we allow excess FP precision.
2091      RetVal = DAG.getNode(ISD::FP_ROUND, MVT::f32, RetVal);
2092    ResultVals.push_back(RetVal);
2093    NodeTys.push_back(RetVT);
2094    break;
2095  }
2096  }
2097
2098  // If the function returns void, just return the chain.
2099  if (ResultVals.empty())
2100    return Chain;
2101
2102  // Otherwise, merge everything together with a MERGE_VALUES node.
2103  NodeTys.push_back(MVT::Other);
2104  ResultVals.push_back(Chain);
2105  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
2106                              &ResultVals[0], ResultVals.size());
2107  return Res.getValue(Op.ResNo);
2108}
2109
2110//===----------------------------------------------------------------------===//
2111//                  FastCall Calling Convention implementation
2112//===----------------------------------------------------------------------===//
2113//
2114// The X86 'fastcall' calling convention passes up to two integer arguments in
2115// registers (an appropriate portion of ECX/EDX), passes arguments in C order,
2116// and requires that the callee pop its arguments off the stack (allowing proper
2117// tail calls), and has the same return value conventions as C calling convs.
2118//
2119// This calling convention always arranges for the callee pop value to be 8n+4
2120// bytes, which is needed for tail recursion elimination and stack alignment
2121// reasons.
2122//
2123
2124/// HowToPassFastCallCCArgument - Returns how an formal argument of the
2125/// specified type should be passed. If it is through stack, returns the size of
2126/// the stack slot; if it is through integer register, returns the number of
2127/// integer registers are needed.
2128static void
2129HowToPassFastCallCCArgument(MVT::ValueType ObjectVT,
2130                            unsigned NumIntRegs,
2131                            unsigned &ObjSize,
2132                            unsigned &ObjIntRegs)
2133{
2134  ObjSize = 0;
2135  ObjIntRegs = 0;
2136
2137  switch (ObjectVT) {
2138  default: assert(0 && "Unhandled argument type!");
2139  case MVT::i8:
2140   if (NumIntRegs < 2)
2141     ObjIntRegs = 1;
2142   else
2143     ObjSize = 1;
2144   break;
2145  case MVT::i16:
2146   if (NumIntRegs < 2)
2147     ObjIntRegs = 1;
2148   else
2149     ObjSize = 2;
2150   break;
2151  case MVT::i32:
2152   if (NumIntRegs < 2)
2153     ObjIntRegs = 1;
2154   else
2155     ObjSize = 4;
2156    break;
2157  case MVT::i64:
2158   if (NumIntRegs+2 <= 2) {
2159     ObjIntRegs = 2;
2160   } else if (NumIntRegs+1 <= 2) {
2161     ObjIntRegs = 1;
2162     ObjSize = 4;
2163   } else
2164     ObjSize = 8;
2165   case MVT::f32:
2166    ObjSize = 4;
2167    break;
2168   case MVT::f64:
2169    ObjSize = 8;
2170    break;
2171  }
2172}
2173
2174SDOperand
2175X86TargetLowering::LowerFastCallCCArguments(SDOperand Op, SelectionDAG &DAG) {
2176  unsigned NumArgs = Op.Val->getNumValues()-1;
2177  MachineFunction &MF = DAG.getMachineFunction();
2178  MachineFrameInfo *MFI = MF.getFrameInfo();
2179  SDOperand Root = Op.getOperand(0);
2180  std::vector<SDOperand> ArgValues;
2181
2182  // Add DAG nodes to load the arguments...  On entry to a function the stack
2183  // frame looks like this:
2184  //
2185  // [ESP] -- return address
2186  // [ESP + 4] -- first nonreg argument (leftmost lexically)
2187  // [ESP + 8] -- second nonreg argument, if 1st argument is <= 4 bytes in size
2188  //    ...
2189  unsigned ArgOffset = 0;   // Frame mechanisms handle retaddr slot
2190
2191  // Keep track of the number of integer regs passed so far.  This can be either
2192  // 0 (neither ECX or EDX used), 1 (ECX is used) or 2 (ECX and EDX are both
2193  // used).
2194  unsigned NumIntRegs = 0;
2195
2196  for (unsigned i = 0; i < NumArgs; ++i) {
2197    MVT::ValueType ObjectVT = Op.getValue(i).getValueType();
2198    unsigned ArgIncrement = 4;
2199    unsigned ObjSize = 0;
2200    unsigned ObjIntRegs = 0;
2201
2202    HowToPassFastCallCCArgument(ObjectVT, NumIntRegs, ObjSize, ObjIntRegs);
2203    if (ObjSize > 4)
2204      ArgIncrement = ObjSize;
2205
2206    unsigned Reg = 0;
2207    SDOperand ArgValue;
2208    if (ObjIntRegs) {
2209      switch (ObjectVT) {
2210      default: assert(0 && "Unhandled argument type!");
2211      case MVT::i8:
2212        Reg = AddLiveIn(MF, NumIntRegs ? X86::DL : X86::CL,
2213                        X86::GR8RegisterClass);
2214        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i8);
2215        break;
2216      case MVT::i16:
2217        Reg = AddLiveIn(MF, NumIntRegs ? X86::DX : X86::CX,
2218                        X86::GR16RegisterClass);
2219        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i16);
2220        break;
2221      case MVT::i32:
2222        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::ECX,
2223                        X86::GR32RegisterClass);
2224        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
2225        break;
2226      case MVT::i64:
2227        Reg = AddLiveIn(MF, NumIntRegs ? X86::EDX : X86::ECX,
2228                        X86::GR32RegisterClass);
2229        ArgValue = DAG.getCopyFromReg(Root, Reg, MVT::i32);
2230        if (ObjIntRegs == 2) {
2231          Reg = AddLiveIn(MF, X86::EDX, X86::GR32RegisterClass);
2232          SDOperand ArgValue2 = DAG.getCopyFromReg(Root, Reg, MVT::i32);
2233          ArgValue= DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
2234        }
2235        break;
2236      }
2237
2238      NumIntRegs += ObjIntRegs;
2239    }
2240
2241    if (ObjSize) {
2242      // Create the SelectionDAG nodes corresponding to a load from this
2243      // parameter.
2244      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
2245      SDOperand FIN = DAG.getFrameIndex(FI, getPointerTy());
2246      if (ObjectVT == MVT::i64 && ObjIntRegs) {
2247        SDOperand ArgValue2 = DAG.getLoad(Op.Val->getValueType(i), Root, FIN,
2248                                          NULL, 0);
2249        ArgValue = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, ArgValue, ArgValue2);
2250      } else
2251        ArgValue = DAG.getLoad(Op.Val->getValueType(i), Root, FIN, NULL, 0);
2252      ArgOffset += ArgIncrement;   // Move on to the next argument.
2253    }
2254
2255    ArgValues.push_back(ArgValue);
2256  }
2257
2258  ArgValues.push_back(Root);
2259
2260  // Make sure the instruction takes 8n+4 bytes to make sure the start of the
2261  // arguments and the arguments after the retaddr has been pushed are aligned.
2262  if ((ArgOffset & 7) == 0)
2263    ArgOffset += 4;
2264
2265  VarArgsFrameIndex = 0xAAAAAAA;   // fastcc functions can't have varargs.
2266  RegSaveFrameIndex = 0xAAAAAAA;   // X86-64 only.
2267  ReturnAddrIndex = 0;             // No return address slot generated yet.
2268  BytesToPopOnReturn = ArgOffset;  // Callee pops all stack arguments.
2269  BytesCallerReserves = 0;
2270
2271  MF.getInfo<X86FunctionInfo>()->setBytesToPopOnReturn(BytesToPopOnReturn);
2272
2273  // Finally, inform the code generator which regs we return values in.
2274  switch (getValueType(MF.getFunction()->getReturnType())) {
2275  default: assert(0 && "Unknown type!");
2276  case MVT::isVoid: break;
2277  case MVT::i1:
2278  case MVT::i8:
2279  case MVT::i16:
2280  case MVT::i32:
2281    MF.addLiveOut(X86::ECX);
2282    break;
2283  case MVT::i64:
2284    MF.addLiveOut(X86::ECX);
2285    MF.addLiveOut(X86::EDX);
2286    break;
2287  case MVT::f32:
2288  case MVT::f64:
2289    MF.addLiveOut(X86::ST0);
2290    break;
2291  }
2292
2293  // Return the new list of results.
2294  std::vector<MVT::ValueType> RetVTs(Op.Val->value_begin(),
2295                                     Op.Val->value_end());
2296  return DAG.getNode(ISD::MERGE_VALUES, RetVTs, &ArgValues[0],ArgValues.size());
2297}
2298
2299SDOperand X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
2300  if (ReturnAddrIndex == 0) {
2301    // Set up a frame object for the return address.
2302    MachineFunction &MF = DAG.getMachineFunction();
2303    if (Subtarget->is64Bit())
2304      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(8, -8);
2305    else
2306      ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(4, -4);
2307  }
2308
2309  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
2310}
2311
2312
2313
2314std::pair<SDOperand, SDOperand> X86TargetLowering::
2315LowerFrameReturnAddress(bool isFrameAddress, SDOperand Chain, unsigned Depth,
2316                        SelectionDAG &DAG) {
2317  SDOperand Result;
2318  if (Depth)        // Depths > 0 not supported yet!
2319    Result = DAG.getConstant(0, getPointerTy());
2320  else {
2321    SDOperand RetAddrFI = getReturnAddressFrameIndex(DAG);
2322    if (!isFrameAddress)
2323      // Just load the return address
2324      Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI,
2325                           NULL, 0);
2326    else
2327      Result = DAG.getNode(ISD::SUB, getPointerTy(), RetAddrFI,
2328                           DAG.getConstant(4, getPointerTy()));
2329  }
2330  return std::make_pair(Result, Chain);
2331}
2332
2333/// translateX86CC - do a one to one translation of a ISD::CondCode to the X86
2334/// specific condition code. It returns a false if it cannot do a direct
2335/// translation. X86CC is the translated CondCode.  LHS/RHS are modified as
2336/// needed.
2337static bool translateX86CC(ISD::CondCode SetCCOpcode, bool isFP,
2338                           unsigned &X86CC, SDOperand &LHS, SDOperand &RHS,
2339                           SelectionDAG &DAG) {
2340  X86CC = X86::COND_INVALID;
2341  if (!isFP) {
2342    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
2343      if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
2344        // X > -1   -> X == 0, jump !sign.
2345        RHS = DAG.getConstant(0, RHS.getValueType());
2346        X86CC = X86::COND_NS;
2347        return true;
2348      } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
2349        // X < 0   -> X == 0, jump on sign.
2350        X86CC = X86::COND_S;
2351        return true;
2352      }
2353    }
2354
2355    switch (SetCCOpcode) {
2356    default: break;
2357    case ISD::SETEQ:  X86CC = X86::COND_E;  break;
2358    case ISD::SETGT:  X86CC = X86::COND_G;  break;
2359    case ISD::SETGE:  X86CC = X86::COND_GE; break;
2360    case ISD::SETLT:  X86CC = X86::COND_L;  break;
2361    case ISD::SETLE:  X86CC = X86::COND_LE; break;
2362    case ISD::SETNE:  X86CC = X86::COND_NE; break;
2363    case ISD::SETULT: X86CC = X86::COND_B;  break;
2364    case ISD::SETUGT: X86CC = X86::COND_A;  break;
2365    case ISD::SETULE: X86CC = X86::COND_BE; break;
2366    case ISD::SETUGE: X86CC = X86::COND_AE; break;
2367    }
2368  } else {
2369    // On a floating point condition, the flags are set as follows:
2370    // ZF  PF  CF   op
2371    //  0 | 0 | 0 | X > Y
2372    //  0 | 0 | 1 | X < Y
2373    //  1 | 0 | 0 | X == Y
2374    //  1 | 1 | 1 | unordered
2375    bool Flip = false;
2376    switch (SetCCOpcode) {
2377    default: break;
2378    case ISD::SETUEQ:
2379    case ISD::SETEQ: X86CC = X86::COND_E;  break;
2380    case ISD::SETOLT: Flip = true; // Fallthrough
2381    case ISD::SETOGT:
2382    case ISD::SETGT: X86CC = X86::COND_A;  break;
2383    case ISD::SETOLE: Flip = true; // Fallthrough
2384    case ISD::SETOGE:
2385    case ISD::SETGE: X86CC = X86::COND_AE; break;
2386    case ISD::SETUGT: Flip = true; // Fallthrough
2387    case ISD::SETULT:
2388    case ISD::SETLT: X86CC = X86::COND_B;  break;
2389    case ISD::SETUGE: Flip = true; // Fallthrough
2390    case ISD::SETULE:
2391    case ISD::SETLE: X86CC = X86::COND_BE; break;
2392    case ISD::SETONE:
2393    case ISD::SETNE: X86CC = X86::COND_NE; break;
2394    case ISD::SETUO: X86CC = X86::COND_P;  break;
2395    case ISD::SETO:  X86CC = X86::COND_NP; break;
2396    }
2397    if (Flip)
2398      std::swap(LHS, RHS);
2399  }
2400
2401  return X86CC != X86::COND_INVALID;
2402}
2403
2404/// hasFPCMov - is there a floating point cmov for the specific X86 condition
2405/// code. Current x86 isa includes the following FP cmov instructions:
2406/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
2407static bool hasFPCMov(unsigned X86CC) {
2408  switch (X86CC) {
2409  default:
2410    return false;
2411  case X86::COND_B:
2412  case X86::COND_BE:
2413  case X86::COND_E:
2414  case X86::COND_P:
2415  case X86::COND_A:
2416  case X86::COND_AE:
2417  case X86::COND_NE:
2418  case X86::COND_NP:
2419    return true;
2420  }
2421}
2422
2423/// isUndefOrInRange - Op is either an undef node or a ConstantSDNode.  Return
2424/// true if Op is undef or if its value falls within the specified range (L, H].
2425static bool isUndefOrInRange(SDOperand Op, unsigned Low, unsigned Hi) {
2426  if (Op.getOpcode() == ISD::UNDEF)
2427    return true;
2428
2429  unsigned Val = cast<ConstantSDNode>(Op)->getValue();
2430  return (Val >= Low && Val < Hi);
2431}
2432
2433/// isUndefOrEqual - Op is either an undef node or a ConstantSDNode.  Return
2434/// true if Op is undef or if its value equal to the specified value.
2435static bool isUndefOrEqual(SDOperand Op, unsigned Val) {
2436  if (Op.getOpcode() == ISD::UNDEF)
2437    return true;
2438  return cast<ConstantSDNode>(Op)->getValue() == Val;
2439}
2440
2441/// isPSHUFDMask - Return true if the specified VECTOR_SHUFFLE operand
2442/// specifies a shuffle of elements that is suitable for input to PSHUFD.
2443bool X86::isPSHUFDMask(SDNode *N) {
2444  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2445
2446  if (N->getNumOperands() != 4)
2447    return false;
2448
2449  // Check if the value doesn't reference the second vector.
2450  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2451    SDOperand Arg = N->getOperand(i);
2452    if (Arg.getOpcode() == ISD::UNDEF) continue;
2453    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2454    if (cast<ConstantSDNode>(Arg)->getValue() >= 4)
2455      return false;
2456  }
2457
2458  return true;
2459}
2460
2461/// isPSHUFHWMask - Return true if the specified VECTOR_SHUFFLE operand
2462/// specifies a shuffle of elements that is suitable for input to PSHUFHW.
2463bool X86::isPSHUFHWMask(SDNode *N) {
2464  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2465
2466  if (N->getNumOperands() != 8)
2467    return false;
2468
2469  // Lower quadword copied in order.
2470  for (unsigned i = 0; i != 4; ++i) {
2471    SDOperand Arg = N->getOperand(i);
2472    if (Arg.getOpcode() == ISD::UNDEF) continue;
2473    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2474    if (cast<ConstantSDNode>(Arg)->getValue() != i)
2475      return false;
2476  }
2477
2478  // Upper quadword shuffled.
2479  for (unsigned i = 4; i != 8; ++i) {
2480    SDOperand Arg = N->getOperand(i);
2481    if (Arg.getOpcode() == ISD::UNDEF) continue;
2482    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2483    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2484    if (Val < 4 || Val > 7)
2485      return false;
2486  }
2487
2488  return true;
2489}
2490
2491/// isPSHUFLWMask - Return true if the specified VECTOR_SHUFFLE operand
2492/// specifies a shuffle of elements that is suitable for input to PSHUFLW.
2493bool X86::isPSHUFLWMask(SDNode *N) {
2494  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2495
2496  if (N->getNumOperands() != 8)
2497    return false;
2498
2499  // Upper quadword copied in order.
2500  for (unsigned i = 4; i != 8; ++i)
2501    if (!isUndefOrEqual(N->getOperand(i), i))
2502      return false;
2503
2504  // Lower quadword shuffled.
2505  for (unsigned i = 0; i != 4; ++i)
2506    if (!isUndefOrInRange(N->getOperand(i), 0, 4))
2507      return false;
2508
2509  return true;
2510}
2511
2512/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand
2513/// specifies a shuffle of elements that is suitable for input to SHUFP*.
2514static bool isSHUFPMask(std::vector<SDOperand> &N) {
2515  unsigned NumElems = N.size();
2516  if (NumElems != 2 && NumElems != 4) return false;
2517
2518  unsigned Half = NumElems / 2;
2519  for (unsigned i = 0; i < Half; ++i)
2520    if (!isUndefOrInRange(N[i], 0, NumElems))
2521      return false;
2522  for (unsigned i = Half; i < NumElems; ++i)
2523    if (!isUndefOrInRange(N[i], NumElems, NumElems*2))
2524      return false;
2525
2526  return true;
2527}
2528
2529bool X86::isSHUFPMask(SDNode *N) {
2530  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2531  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2532  return ::isSHUFPMask(Ops);
2533}
2534
2535/// isCommutedSHUFP - Returns true if the shuffle mask is except
2536/// the reverse of what x86 shuffles want. x86 shuffles requires the lower
2537/// half elements to come from vector 1 (which would equal the dest.) and
2538/// the upper half to come from vector 2.
2539static bool isCommutedSHUFP(std::vector<SDOperand> &Ops) {
2540  unsigned NumElems = Ops.size();
2541  if (NumElems != 2 && NumElems != 4) return false;
2542
2543  unsigned Half = NumElems / 2;
2544  for (unsigned i = 0; i < Half; ++i)
2545    if (!isUndefOrInRange(Ops[i], NumElems, NumElems*2))
2546      return false;
2547  for (unsigned i = Half; i < NumElems; ++i)
2548    if (!isUndefOrInRange(Ops[i], 0, NumElems))
2549      return false;
2550  return true;
2551}
2552
2553static bool isCommutedSHUFP(SDNode *N) {
2554  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2555  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2556  return isCommutedSHUFP(Ops);
2557}
2558
2559/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand
2560/// specifies a shuffle of elements that is suitable for input to MOVHLPS.
2561bool X86::isMOVHLPSMask(SDNode *N) {
2562  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2563
2564  if (N->getNumOperands() != 4)
2565    return false;
2566
2567  // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3
2568  return isUndefOrEqual(N->getOperand(0), 6) &&
2569         isUndefOrEqual(N->getOperand(1), 7) &&
2570         isUndefOrEqual(N->getOperand(2), 2) &&
2571         isUndefOrEqual(N->getOperand(3), 3);
2572}
2573
2574/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
2575/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
2576/// <2, 3, 2, 3>
2577bool X86::isMOVHLPS_v_undef_Mask(SDNode *N) {
2578  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2579
2580  if (N->getNumOperands() != 4)
2581    return false;
2582
2583  // Expect bit0 == 2, bit1 == 3, bit2 == 2, bit3 == 3
2584  return isUndefOrEqual(N->getOperand(0), 2) &&
2585         isUndefOrEqual(N->getOperand(1), 3) &&
2586         isUndefOrEqual(N->getOperand(2), 2) &&
2587         isUndefOrEqual(N->getOperand(3), 3);
2588}
2589
2590/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
2591/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
2592bool X86::isMOVLPMask(SDNode *N) {
2593  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2594
2595  unsigned NumElems = N->getNumOperands();
2596  if (NumElems != 2 && NumElems != 4)
2597    return false;
2598
2599  for (unsigned i = 0; i < NumElems/2; ++i)
2600    if (!isUndefOrEqual(N->getOperand(i), i + NumElems))
2601      return false;
2602
2603  for (unsigned i = NumElems/2; i < NumElems; ++i)
2604    if (!isUndefOrEqual(N->getOperand(i), i))
2605      return false;
2606
2607  return true;
2608}
2609
2610/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
2611/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
2612/// and MOVLHPS.
2613bool X86::isMOVHPMask(SDNode *N) {
2614  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2615
2616  unsigned NumElems = N->getNumOperands();
2617  if (NumElems != 2 && NumElems != 4)
2618    return false;
2619
2620  for (unsigned i = 0; i < NumElems/2; ++i)
2621    if (!isUndefOrEqual(N->getOperand(i), i))
2622      return false;
2623
2624  for (unsigned i = 0; i < NumElems/2; ++i) {
2625    SDOperand Arg = N->getOperand(i + NumElems/2);
2626    if (!isUndefOrEqual(Arg, i + NumElems))
2627      return false;
2628  }
2629
2630  return true;
2631}
2632
2633/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
2634/// specifies a shuffle of elements that is suitable for input to UNPCKL.
2635bool static isUNPCKLMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
2636  unsigned NumElems = N.size();
2637  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2638    return false;
2639
2640  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2641    SDOperand BitI  = N[i];
2642    SDOperand BitI1 = N[i+1];
2643    if (!isUndefOrEqual(BitI, j))
2644      return false;
2645    if (V2IsSplat) {
2646      if (isUndefOrEqual(BitI1, NumElems))
2647        return false;
2648    } else {
2649      if (!isUndefOrEqual(BitI1, j + NumElems))
2650        return false;
2651    }
2652  }
2653
2654  return true;
2655}
2656
2657bool X86::isUNPCKLMask(SDNode *N, bool V2IsSplat) {
2658  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2659  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2660  return ::isUNPCKLMask(Ops, V2IsSplat);
2661}
2662
2663/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand
2664/// specifies a shuffle of elements that is suitable for input to UNPCKH.
2665bool static isUNPCKHMask(std::vector<SDOperand> &N, bool V2IsSplat = false) {
2666  unsigned NumElems = N.size();
2667  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2668    return false;
2669
2670  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2671    SDOperand BitI  = N[i];
2672    SDOperand BitI1 = N[i+1];
2673    if (!isUndefOrEqual(BitI, j + NumElems/2))
2674      return false;
2675    if (V2IsSplat) {
2676      if (isUndefOrEqual(BitI1, NumElems))
2677        return false;
2678    } else {
2679      if (!isUndefOrEqual(BitI1, j + NumElems/2 + NumElems))
2680        return false;
2681    }
2682  }
2683
2684  return true;
2685}
2686
2687bool X86::isUNPCKHMask(SDNode *N, bool V2IsSplat) {
2688  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2689  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2690  return ::isUNPCKHMask(Ops, V2IsSplat);
2691}
2692
2693/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form
2694/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef,
2695/// <0, 0, 1, 1>
2696bool X86::isUNPCKL_v_undef_Mask(SDNode *N) {
2697  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2698
2699  unsigned NumElems = N->getNumOperands();
2700  if (NumElems != 4 && NumElems != 8 && NumElems != 16)
2701    return false;
2702
2703  for (unsigned i = 0, j = 0; i != NumElems; i += 2, ++j) {
2704    SDOperand BitI  = N->getOperand(i);
2705    SDOperand BitI1 = N->getOperand(i+1);
2706
2707    if (!isUndefOrEqual(BitI, j))
2708      return false;
2709    if (!isUndefOrEqual(BitI1, j))
2710      return false;
2711  }
2712
2713  return true;
2714}
2715
2716/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand
2717/// specifies a shuffle of elements that is suitable for input to MOVSS,
2718/// MOVSD, and MOVD, i.e. setting the lowest element.
2719static bool isMOVLMask(std::vector<SDOperand> &N) {
2720  unsigned NumElems = N.size();
2721  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2722    return false;
2723
2724  if (!isUndefOrEqual(N[0], NumElems))
2725    return false;
2726
2727  for (unsigned i = 1; i < NumElems; ++i) {
2728    SDOperand Arg = N[i];
2729    if (!isUndefOrEqual(Arg, i))
2730      return false;
2731  }
2732
2733  return true;
2734}
2735
2736bool X86::isMOVLMask(SDNode *N) {
2737  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2738  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2739  return ::isMOVLMask(Ops);
2740}
2741
2742/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse
2743/// of what x86 movss want. X86 movs requires the lowest  element to be lowest
2744/// element of vector 2 and the other elements to come from vector 1 in order.
2745static bool isCommutedMOVL(std::vector<SDOperand> &Ops, bool V2IsSplat = false,
2746                           bool V2IsUndef = false) {
2747  unsigned NumElems = Ops.size();
2748  if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
2749    return false;
2750
2751  if (!isUndefOrEqual(Ops[0], 0))
2752    return false;
2753
2754  for (unsigned i = 1; i < NumElems; ++i) {
2755    SDOperand Arg = Ops[i];
2756    if (!(isUndefOrEqual(Arg, i+NumElems) ||
2757          (V2IsUndef && isUndefOrInRange(Arg, NumElems, NumElems*2)) ||
2758          (V2IsSplat && isUndefOrEqual(Arg, NumElems))))
2759      return false;
2760  }
2761
2762  return true;
2763}
2764
2765static bool isCommutedMOVL(SDNode *N, bool V2IsSplat = false,
2766                           bool V2IsUndef = false) {
2767  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2768  std::vector<SDOperand> Ops(N->op_begin(), N->op_end());
2769  return isCommutedMOVL(Ops, V2IsSplat, V2IsUndef);
2770}
2771
2772/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2773/// specifies a shuffle of elements that is suitable for input to MOVSHDUP.
2774bool X86::isMOVSHDUPMask(SDNode *N) {
2775  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2776
2777  if (N->getNumOperands() != 4)
2778    return false;
2779
2780  // Expect 1, 1, 3, 3
2781  for (unsigned i = 0; i < 2; ++i) {
2782    SDOperand Arg = N->getOperand(i);
2783    if (Arg.getOpcode() == ISD::UNDEF) continue;
2784    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2785    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2786    if (Val != 1) return false;
2787  }
2788
2789  bool HasHi = false;
2790  for (unsigned i = 2; i < 4; ++i) {
2791    SDOperand Arg = N->getOperand(i);
2792    if (Arg.getOpcode() == ISD::UNDEF) continue;
2793    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2794    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2795    if (Val != 3) return false;
2796    HasHi = true;
2797  }
2798
2799  // Don't use movshdup if it can be done with a shufps.
2800  return HasHi;
2801}
2802
2803/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand
2804/// specifies a shuffle of elements that is suitable for input to MOVSLDUP.
2805bool X86::isMOVSLDUPMask(SDNode *N) {
2806  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2807
2808  if (N->getNumOperands() != 4)
2809    return false;
2810
2811  // Expect 0, 0, 2, 2
2812  for (unsigned i = 0; i < 2; ++i) {
2813    SDOperand Arg = N->getOperand(i);
2814    if (Arg.getOpcode() == ISD::UNDEF) continue;
2815    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2816    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2817    if (Val != 0) return false;
2818  }
2819
2820  bool HasHi = false;
2821  for (unsigned i = 2; i < 4; ++i) {
2822    SDOperand Arg = N->getOperand(i);
2823    if (Arg.getOpcode() == ISD::UNDEF) continue;
2824    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2825    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2826    if (Val != 2) return false;
2827    HasHi = true;
2828  }
2829
2830  // Don't use movshdup if it can be done with a shufps.
2831  return HasHi;
2832}
2833
2834/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2835/// a splat of a single element.
2836static bool isSplatMask(SDNode *N) {
2837  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2838
2839  // This is a splat operation if each element of the permute is the same, and
2840  // if the value doesn't reference the second vector.
2841  unsigned NumElems = N->getNumOperands();
2842  SDOperand ElementBase;
2843  unsigned i = 0;
2844  for (; i != NumElems; ++i) {
2845    SDOperand Elt = N->getOperand(i);
2846    if (isa<ConstantSDNode>(Elt)) {
2847      ElementBase = Elt;
2848      break;
2849    }
2850  }
2851
2852  if (!ElementBase.Val)
2853    return false;
2854
2855  for (; i != NumElems; ++i) {
2856    SDOperand Arg = N->getOperand(i);
2857    if (Arg.getOpcode() == ISD::UNDEF) continue;
2858    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2859    if (Arg != ElementBase) return false;
2860  }
2861
2862  // Make sure it is a splat of the first vector operand.
2863  return cast<ConstantSDNode>(ElementBase)->getValue() < NumElems;
2864}
2865
2866/// isSplatMask - Return true if the specified VECTOR_SHUFFLE operand specifies
2867/// a splat of a single element and it's a 2 or 4 element mask.
2868bool X86::isSplatMask(SDNode *N) {
2869  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2870
2871  // We can only splat 64-bit, and 32-bit quantities with a single instruction.
2872  if (N->getNumOperands() != 4 && N->getNumOperands() != 2)
2873    return false;
2874  return ::isSplatMask(N);
2875}
2876
2877/// isSplatLoMask - Return true if the specified VECTOR_SHUFFLE operand
2878/// specifies a splat of zero element.
2879bool X86::isSplatLoMask(SDNode *N) {
2880  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2881
2882  for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
2883    if (!isUndefOrEqual(N->getOperand(i), 0))
2884      return false;
2885  return true;
2886}
2887
2888/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle
2889/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUF* and SHUFP*
2890/// instructions.
2891unsigned X86::getShuffleSHUFImmediate(SDNode *N) {
2892  unsigned NumOperands = N->getNumOperands();
2893  unsigned Shift = (NumOperands == 4) ? 2 : 1;
2894  unsigned Mask = 0;
2895  for (unsigned i = 0; i < NumOperands; ++i) {
2896    unsigned Val = 0;
2897    SDOperand Arg = N->getOperand(NumOperands-i-1);
2898    if (Arg.getOpcode() != ISD::UNDEF)
2899      Val = cast<ConstantSDNode>(Arg)->getValue();
2900    if (Val >= NumOperands) Val -= NumOperands;
2901    Mask |= Val;
2902    if (i != NumOperands - 1)
2903      Mask <<= Shift;
2904  }
2905
2906  return Mask;
2907}
2908
2909/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle
2910/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFHW
2911/// instructions.
2912unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) {
2913  unsigned Mask = 0;
2914  // 8 nodes, but we only care about the last 4.
2915  for (unsigned i = 7; i >= 4; --i) {
2916    unsigned Val = 0;
2917    SDOperand Arg = N->getOperand(i);
2918    if (Arg.getOpcode() != ISD::UNDEF)
2919      Val = cast<ConstantSDNode>(Arg)->getValue();
2920    Mask |= (Val - 4);
2921    if (i != 4)
2922      Mask <<= 2;
2923  }
2924
2925  return Mask;
2926}
2927
2928/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle
2929/// the specified isShuffleMask VECTOR_SHUFFLE mask with PSHUFLW
2930/// instructions.
2931unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) {
2932  unsigned Mask = 0;
2933  // 8 nodes, but we only care about the first 4.
2934  for (int i = 3; i >= 0; --i) {
2935    unsigned Val = 0;
2936    SDOperand Arg = N->getOperand(i);
2937    if (Arg.getOpcode() != ISD::UNDEF)
2938      Val = cast<ConstantSDNode>(Arg)->getValue();
2939    Mask |= Val;
2940    if (i != 0)
2941      Mask <<= 2;
2942  }
2943
2944  return Mask;
2945}
2946
2947/// isPSHUFHW_PSHUFLWMask - true if the specified VECTOR_SHUFFLE operand
2948/// specifies a 8 element shuffle that can be broken into a pair of
2949/// PSHUFHW and PSHUFLW.
2950static bool isPSHUFHW_PSHUFLWMask(SDNode *N) {
2951  assert(N->getOpcode() == ISD::BUILD_VECTOR);
2952
2953  if (N->getNumOperands() != 8)
2954    return false;
2955
2956  // Lower quadword shuffled.
2957  for (unsigned i = 0; i != 4; ++i) {
2958    SDOperand Arg = N->getOperand(i);
2959    if (Arg.getOpcode() == ISD::UNDEF) continue;
2960    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2961    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2962    if (Val > 4)
2963      return false;
2964  }
2965
2966  // Upper quadword shuffled.
2967  for (unsigned i = 4; i != 8; ++i) {
2968    SDOperand Arg = N->getOperand(i);
2969    if (Arg.getOpcode() == ISD::UNDEF) continue;
2970    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2971    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2972    if (Val < 4 || Val > 7)
2973      return false;
2974  }
2975
2976  return true;
2977}
2978
2979/// CommuteVectorShuffle - Swap vector_shuffle operandsas well as
2980/// values in ther permute mask.
2981static SDOperand CommuteVectorShuffle(SDOperand Op, SDOperand &V1,
2982                                      SDOperand &V2, SDOperand &Mask,
2983                                      SelectionDAG &DAG) {
2984  MVT::ValueType VT = Op.getValueType();
2985  MVT::ValueType MaskVT = Mask.getValueType();
2986  MVT::ValueType EltVT = MVT::getVectorBaseType(MaskVT);
2987  unsigned NumElems = Mask.getNumOperands();
2988  std::vector<SDOperand> MaskVec;
2989
2990  for (unsigned i = 0; i != NumElems; ++i) {
2991    SDOperand Arg = Mask.getOperand(i);
2992    if (Arg.getOpcode() == ISD::UNDEF) {
2993      MaskVec.push_back(DAG.getNode(ISD::UNDEF, EltVT));
2994      continue;
2995    }
2996    assert(isa<ConstantSDNode>(Arg) && "Invalid VECTOR_SHUFFLE mask!");
2997    unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
2998    if (Val < NumElems)
2999      MaskVec.push_back(DAG.getConstant(Val + NumElems, EltVT));
3000    else
3001      MaskVec.push_back(DAG.getConstant(Val - NumElems, EltVT));
3002  }
3003
3004  std::swap(V1, V2);
3005  Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
3006  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3007}
3008
3009/// ShouldXformToMOVHLPS - Return true if the node should be transformed to
3010/// match movhlps. The lower half elements should come from upper half of
3011/// V1 (and in order), and the upper half elements should come from the upper
3012/// half of V2 (and in order).
3013static bool ShouldXformToMOVHLPS(SDNode *Mask) {
3014  unsigned NumElems = Mask->getNumOperands();
3015  if (NumElems != 4)
3016    return false;
3017  for (unsigned i = 0, e = 2; i != e; ++i)
3018    if (!isUndefOrEqual(Mask->getOperand(i), i+2))
3019      return false;
3020  for (unsigned i = 2; i != 4; ++i)
3021    if (!isUndefOrEqual(Mask->getOperand(i), i+4))
3022      return false;
3023  return true;
3024}
3025
3026/// isScalarLoadToVector - Returns true if the node is a scalar load that
3027/// is promoted to a vector.
3028static inline bool isScalarLoadToVector(SDNode *N) {
3029  if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
3030    N = N->getOperand(0).Val;
3031    return ISD::isNON_EXTLoad(N);
3032  }
3033  return false;
3034}
3035
3036/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to
3037/// match movlp{s|d}. The lower half elements should come from lower half of
3038/// V1 (and in order), and the upper half elements should come from the upper
3039/// half of V2 (and in order). And since V1 will become the source of the
3040/// MOVLP, it must be either a vector load or a scalar load to vector.
3041static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, SDNode *Mask) {
3042  if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1))
3043    return false;
3044  // Is V2 is a vector load, don't do this transformation. We will try to use
3045  // load folding shufps op.
3046  if (ISD::isNON_EXTLoad(V2))
3047    return false;
3048
3049  unsigned NumElems = Mask->getNumOperands();
3050  if (NumElems != 2 && NumElems != 4)
3051    return false;
3052  for (unsigned i = 0, e = NumElems/2; i != e; ++i)
3053    if (!isUndefOrEqual(Mask->getOperand(i), i))
3054      return false;
3055  for (unsigned i = NumElems/2; i != NumElems; ++i)
3056    if (!isUndefOrEqual(Mask->getOperand(i), i+NumElems))
3057      return false;
3058  return true;
3059}
3060
3061/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are
3062/// all the same.
3063static bool isSplatVector(SDNode *N) {
3064  if (N->getOpcode() != ISD::BUILD_VECTOR)
3065    return false;
3066
3067  SDOperand SplatValue = N->getOperand(0);
3068  for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i)
3069    if (N->getOperand(i) != SplatValue)
3070      return false;
3071  return true;
3072}
3073
3074/// isUndefShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved
3075/// to an undef.
3076static bool isUndefShuffle(SDNode *N) {
3077  if (N->getOpcode() != ISD::BUILD_VECTOR)
3078    return false;
3079
3080  SDOperand V1 = N->getOperand(0);
3081  SDOperand V2 = N->getOperand(1);
3082  SDOperand Mask = N->getOperand(2);
3083  unsigned NumElems = Mask.getNumOperands();
3084  for (unsigned i = 0; i != NumElems; ++i) {
3085    SDOperand Arg = Mask.getOperand(i);
3086    if (Arg.getOpcode() != ISD::UNDEF) {
3087      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
3088      if (Val < NumElems && V1.getOpcode() != ISD::UNDEF)
3089        return false;
3090      else if (Val >= NumElems && V2.getOpcode() != ISD::UNDEF)
3091        return false;
3092    }
3093  }
3094  return true;
3095}
3096
3097/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
3098/// that point to V2 points to its first element.
3099static SDOperand NormalizeMask(SDOperand Mask, SelectionDAG &DAG) {
3100  assert(Mask.getOpcode() == ISD::BUILD_VECTOR);
3101
3102  bool Changed = false;
3103  std::vector<SDOperand> MaskVec;
3104  unsigned NumElems = Mask.getNumOperands();
3105  for (unsigned i = 0; i != NumElems; ++i) {
3106    SDOperand Arg = Mask.getOperand(i);
3107    if (Arg.getOpcode() != ISD::UNDEF) {
3108      unsigned Val = cast<ConstantSDNode>(Arg)->getValue();
3109      if (Val > NumElems) {
3110        Arg = DAG.getConstant(NumElems, Arg.getValueType());
3111        Changed = true;
3112      }
3113    }
3114    MaskVec.push_back(Arg);
3115  }
3116
3117  if (Changed)
3118    Mask = DAG.getNode(ISD::BUILD_VECTOR, Mask.getValueType(),
3119                       &MaskVec[0], MaskVec.size());
3120  return Mask;
3121}
3122
3123/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd
3124/// operation of specified width.
3125static SDOperand getMOVLMask(unsigned NumElems, SelectionDAG &DAG) {
3126  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3127  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3128
3129  std::vector<SDOperand> MaskVec;
3130  MaskVec.push_back(DAG.getConstant(NumElems, BaseVT));
3131  for (unsigned i = 1; i != NumElems; ++i)
3132    MaskVec.push_back(DAG.getConstant(i, BaseVT));
3133  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
3134}
3135
3136/// getUnpacklMask - Returns a vector_shuffle mask for an unpackl operation
3137/// of specified width.
3138static SDOperand getUnpacklMask(unsigned NumElems, SelectionDAG &DAG) {
3139  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3140  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3141  std::vector<SDOperand> MaskVec;
3142  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
3143    MaskVec.push_back(DAG.getConstant(i,            BaseVT));
3144    MaskVec.push_back(DAG.getConstant(i + NumElems, BaseVT));
3145  }
3146  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
3147}
3148
3149/// getUnpackhMask - Returns a vector_shuffle mask for an unpackh operation
3150/// of specified width.
3151static SDOperand getUnpackhMask(unsigned NumElems, SelectionDAG &DAG) {
3152  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3153  MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3154  unsigned Half = NumElems/2;
3155  std::vector<SDOperand> MaskVec;
3156  for (unsigned i = 0; i != Half; ++i) {
3157    MaskVec.push_back(DAG.getConstant(i + Half,            BaseVT));
3158    MaskVec.push_back(DAG.getConstant(i + NumElems + Half, BaseVT));
3159  }
3160  return DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0], MaskVec.size());
3161}
3162
3163/// getZeroVector - Returns a vector of specified type with all zero elements.
3164///
3165static SDOperand getZeroVector(MVT::ValueType VT, SelectionDAG &DAG) {
3166  assert(MVT::isVector(VT) && "Expected a vector type");
3167  unsigned NumElems = getVectorNumElements(VT);
3168  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
3169  bool isFP = MVT::isFloatingPoint(EVT);
3170  SDOperand Zero = isFP ? DAG.getConstantFP(0.0, EVT) : DAG.getConstant(0, EVT);
3171  std::vector<SDOperand> ZeroVec(NumElems, Zero);
3172  return DAG.getNode(ISD::BUILD_VECTOR, VT, &ZeroVec[0], ZeroVec.size());
3173}
3174
3175/// PromoteSplat - Promote a splat of v8i16 or v16i8 to v4i32.
3176///
3177static SDOperand PromoteSplat(SDOperand Op, SelectionDAG &DAG) {
3178  SDOperand V1 = Op.getOperand(0);
3179  SDOperand Mask = Op.getOperand(2);
3180  MVT::ValueType VT = Op.getValueType();
3181  unsigned NumElems = Mask.getNumOperands();
3182  Mask = getUnpacklMask(NumElems, DAG);
3183  while (NumElems != 4) {
3184    V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1, Mask);
3185    NumElems >>= 1;
3186  }
3187  V1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, V1);
3188
3189  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3190  Mask = getZeroVector(MaskVT, DAG);
3191  SDOperand Shuffle = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v4i32, V1,
3192                                  DAG.getNode(ISD::UNDEF, MVT::v4i32), Mask);
3193  return DAG.getNode(ISD::BIT_CONVERT, VT, Shuffle);
3194}
3195
3196/// isZeroNode - Returns true if Elt is a constant zero or a floating point
3197/// constant +0.0.
3198static inline bool isZeroNode(SDOperand Elt) {
3199  return ((isa<ConstantSDNode>(Elt) &&
3200           cast<ConstantSDNode>(Elt)->getValue() == 0) ||
3201          (isa<ConstantFPSDNode>(Elt) &&
3202           cast<ConstantFPSDNode>(Elt)->isExactlyValue(0.0)));
3203}
3204
3205/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified
3206/// vector and zero or undef vector.
3207static SDOperand getShuffleVectorZeroOrUndef(SDOperand V2, MVT::ValueType VT,
3208                                             unsigned NumElems, unsigned Idx,
3209                                             bool isZero, SelectionDAG &DAG) {
3210  SDOperand V1 = isZero ? getZeroVector(VT, DAG) : DAG.getNode(ISD::UNDEF, VT);
3211  MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3212  MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
3213  SDOperand Zero = DAG.getConstant(0, EVT);
3214  std::vector<SDOperand> MaskVec(NumElems, Zero);
3215  MaskVec[Idx] = DAG.getConstant(NumElems, EVT);
3216  SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3217                               &MaskVec[0], MaskVec.size());
3218  return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3219}
3220
3221/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8.
3222///
3223static SDOperand LowerBuildVectorv16i8(SDOperand Op, unsigned NonZeros,
3224                                       unsigned NumNonZero, unsigned NumZero,
3225                                       SelectionDAG &DAG, TargetLowering &TLI) {
3226  if (NumNonZero > 8)
3227    return SDOperand();
3228
3229  SDOperand V(0, 0);
3230  bool First = true;
3231  for (unsigned i = 0; i < 16; ++i) {
3232    bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
3233    if (ThisIsNonZero && First) {
3234      if (NumZero)
3235        V = getZeroVector(MVT::v8i16, DAG);
3236      else
3237        V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
3238      First = false;
3239    }
3240
3241    if ((i & 1) != 0) {
3242      SDOperand ThisElt(0, 0), LastElt(0, 0);
3243      bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0;
3244      if (LastIsNonZero) {
3245        LastElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i-1));
3246      }
3247      if (ThisIsNonZero) {
3248        ThisElt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, Op.getOperand(i));
3249        ThisElt = DAG.getNode(ISD::SHL, MVT::i16,
3250                              ThisElt, DAG.getConstant(8, MVT::i8));
3251        if (LastIsNonZero)
3252          ThisElt = DAG.getNode(ISD::OR, MVT::i16, ThisElt, LastElt);
3253      } else
3254        ThisElt = LastElt;
3255
3256      if (ThisElt.Val)
3257        V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, ThisElt,
3258                        DAG.getConstant(i/2, TLI.getPointerTy()));
3259    }
3260  }
3261
3262  return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, V);
3263}
3264
3265/// LowerBuildVectorv16i8 - Custom lower build_vector of v8i16.
3266///
3267static SDOperand LowerBuildVectorv8i16(SDOperand Op, unsigned NonZeros,
3268                                       unsigned NumNonZero, unsigned NumZero,
3269                                       SelectionDAG &DAG, TargetLowering &TLI) {
3270  if (NumNonZero > 4)
3271    return SDOperand();
3272
3273  SDOperand V(0, 0);
3274  bool First = true;
3275  for (unsigned i = 0; i < 8; ++i) {
3276    bool isNonZero = (NonZeros & (1 << i)) != 0;
3277    if (isNonZero) {
3278      if (First) {
3279        if (NumZero)
3280          V = getZeroVector(MVT::v8i16, DAG);
3281        else
3282          V = DAG.getNode(ISD::UNDEF, MVT::v8i16);
3283        First = false;
3284      }
3285      V = DAG.getNode(ISD::INSERT_VECTOR_ELT, MVT::v8i16, V, Op.getOperand(i),
3286                      DAG.getConstant(i, TLI.getPointerTy()));
3287    }
3288  }
3289
3290  return V;
3291}
3292
3293SDOperand
3294X86TargetLowering::LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
3295  // All zero's are handled with pxor.
3296  if (ISD::isBuildVectorAllZeros(Op.Val))
3297    return Op;
3298
3299  // All one's are handled with pcmpeqd.
3300  if (ISD::isBuildVectorAllOnes(Op.Val))
3301    return Op;
3302
3303  MVT::ValueType VT = Op.getValueType();
3304  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
3305  unsigned EVTBits = MVT::getSizeInBits(EVT);
3306
3307  unsigned NumElems = Op.getNumOperands();
3308  unsigned NumZero  = 0;
3309  unsigned NumNonZero = 0;
3310  unsigned NonZeros = 0;
3311  std::set<SDOperand> Values;
3312  for (unsigned i = 0; i < NumElems; ++i) {
3313    SDOperand Elt = Op.getOperand(i);
3314    if (Elt.getOpcode() != ISD::UNDEF) {
3315      Values.insert(Elt);
3316      if (isZeroNode(Elt))
3317        NumZero++;
3318      else {
3319        NonZeros |= (1 << i);
3320        NumNonZero++;
3321      }
3322    }
3323  }
3324
3325  if (NumNonZero == 0)
3326    // Must be a mix of zero and undef. Return a zero vector.
3327    return getZeroVector(VT, DAG);
3328
3329  // Splat is obviously ok. Let legalizer expand it to a shuffle.
3330  if (Values.size() == 1)
3331    return SDOperand();
3332
3333  // Special case for single non-zero element.
3334  if (NumNonZero == 1) {
3335    unsigned Idx = CountTrailingZeros_32(NonZeros);
3336    SDOperand Item = Op.getOperand(Idx);
3337    Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Item);
3338    if (Idx == 0)
3339      // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
3340      return getShuffleVectorZeroOrUndef(Item, VT, NumElems, Idx,
3341                                         NumZero > 0, DAG);
3342
3343    if (EVTBits == 32) {
3344      // Turn it into a shuffle of zero and zero-extended scalar to vector.
3345      Item = getShuffleVectorZeroOrUndef(Item, VT, NumElems, 0, NumZero > 0,
3346                                         DAG);
3347      MVT::ValueType MaskVT  = MVT::getIntVectorWithNumElements(NumElems);
3348      MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
3349      std::vector<SDOperand> MaskVec;
3350      for (unsigned i = 0; i < NumElems; i++)
3351        MaskVec.push_back(DAG.getConstant((i == Idx) ? 0 : 1, MaskEVT));
3352      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3353                                   &MaskVec[0], MaskVec.size());
3354      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, Item,
3355                         DAG.getNode(ISD::UNDEF, VT), Mask);
3356    }
3357  }
3358
3359  // Let legalizer expand 2-wide build_vector's.
3360  if (EVTBits == 64)
3361    return SDOperand();
3362
3363  // If element VT is < 32 bits, convert it to inserts into a zero vector.
3364  if (EVTBits == 8) {
3365    SDOperand V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG,
3366                                        *this);
3367    if (V.Val) return V;
3368  }
3369
3370  if (EVTBits == 16) {
3371    SDOperand V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG,
3372                                        *this);
3373    if (V.Val) return V;
3374  }
3375
3376  // If element VT is == 32 bits, turn it into a number of shuffles.
3377  std::vector<SDOperand> V(NumElems);
3378  if (NumElems == 4 && NumZero > 0) {
3379    for (unsigned i = 0; i < 4; ++i) {
3380      bool isZero = !(NonZeros & (1 << i));
3381      if (isZero)
3382        V[i] = getZeroVector(VT, DAG);
3383      else
3384        V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
3385    }
3386
3387    for (unsigned i = 0; i < 2; ++i) {
3388      switch ((NonZeros & (0x3 << i*2)) >> (i*2)) {
3389        default: break;
3390        case 0:
3391          V[i] = V[i*2];  // Must be a zero vector.
3392          break;
3393        case 1:
3394          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2+1], V[i*2],
3395                             getMOVLMask(NumElems, DAG));
3396          break;
3397        case 2:
3398          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
3399                             getMOVLMask(NumElems, DAG));
3400          break;
3401        case 3:
3402          V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i*2], V[i*2+1],
3403                             getUnpacklMask(NumElems, DAG));
3404          break;
3405      }
3406    }
3407
3408    // Take advantage of the fact GR32 to VR128 scalar_to_vector (i.e. movd)
3409    // clears the upper bits.
3410    // FIXME: we can do the same for v4f32 case when we know both parts of
3411    // the lower half come from scalar_to_vector (loadf32). We should do
3412    // that in post legalizer dag combiner with target specific hooks.
3413    if (MVT::isInteger(EVT) && (NonZeros & (0x3 << 2)) == 0)
3414      return V[0];
3415    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3416    MVT::ValueType EVT = MVT::getVectorBaseType(MaskVT);
3417    std::vector<SDOperand> MaskVec;
3418    bool Reverse = (NonZeros & 0x3) == 2;
3419    for (unsigned i = 0; i < 2; ++i)
3420      if (Reverse)
3421        MaskVec.push_back(DAG.getConstant(1-i, EVT));
3422      else
3423        MaskVec.push_back(DAG.getConstant(i, EVT));
3424    Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2;
3425    for (unsigned i = 0; i < 2; ++i)
3426      if (Reverse)
3427        MaskVec.push_back(DAG.getConstant(1-i+NumElems, EVT));
3428      else
3429        MaskVec.push_back(DAG.getConstant(i+NumElems, EVT));
3430    SDOperand ShufMask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3431                                     &MaskVec[0], MaskVec.size());
3432    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[0], V[1], ShufMask);
3433  }
3434
3435  if (Values.size() > 2) {
3436    // Expand into a number of unpckl*.
3437    // e.g. for v4f32
3438    //   Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
3439    //         : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
3440    //   Step 2: unpcklps X, Y ==>    <3, 2, 1, 0>
3441    SDOperand UnpckMask = getUnpacklMask(NumElems, DAG);
3442    for (unsigned i = 0; i < NumElems; ++i)
3443      V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, Op.getOperand(i));
3444    NumElems >>= 1;
3445    while (NumElems != 0) {
3446      for (unsigned i = 0; i < NumElems; ++i)
3447        V[i] = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V[i], V[i + NumElems],
3448                           UnpckMask);
3449      NumElems >>= 1;
3450    }
3451    return V[0];
3452  }
3453
3454  return SDOperand();
3455}
3456
3457SDOperand
3458X86TargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
3459  SDOperand V1 = Op.getOperand(0);
3460  SDOperand V2 = Op.getOperand(1);
3461  SDOperand PermMask = Op.getOperand(2);
3462  MVT::ValueType VT = Op.getValueType();
3463  unsigned NumElems = PermMask.getNumOperands();
3464  bool V1IsUndef = V1.getOpcode() == ISD::UNDEF;
3465  bool V2IsUndef = V2.getOpcode() == ISD::UNDEF;
3466  bool V1IsSplat = false;
3467  bool V2IsSplat = false;
3468
3469  if (isUndefShuffle(Op.Val))
3470    return DAG.getNode(ISD::UNDEF, VT);
3471
3472  if (isSplatMask(PermMask.Val)) {
3473    if (NumElems <= 4) return Op;
3474    // Promote it to a v4i32 splat.
3475    return PromoteSplat(Op, DAG);
3476  }
3477
3478  if (X86::isMOVLMask(PermMask.Val))
3479    return (V1IsUndef) ? V2 : Op;
3480
3481  if (X86::isMOVSHDUPMask(PermMask.Val) ||
3482      X86::isMOVSLDUPMask(PermMask.Val) ||
3483      X86::isMOVHLPSMask(PermMask.Val) ||
3484      X86::isMOVHPMask(PermMask.Val) ||
3485      X86::isMOVLPMask(PermMask.Val))
3486    return Op;
3487
3488  if (ShouldXformToMOVHLPS(PermMask.Val) ||
3489      ShouldXformToMOVLP(V1.Val, V2.Val, PermMask.Val))
3490    return CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
3491
3492  bool Commuted = false;
3493  V1IsSplat = isSplatVector(V1.Val);
3494  V2IsSplat = isSplatVector(V2.Val);
3495  if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) {
3496    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
3497    std::swap(V1IsSplat, V2IsSplat);
3498    std::swap(V1IsUndef, V2IsUndef);
3499    Commuted = true;
3500  }
3501
3502  if (isCommutedMOVL(PermMask.Val, V2IsSplat, V2IsUndef)) {
3503    if (V2IsUndef) return V1;
3504    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
3505    if (V2IsSplat) {
3506      // V2 is a splat, so the mask may be malformed. That is, it may point
3507      // to any V2 element. The instruction selectior won't like this. Get
3508      // a corrected mask and commute to form a proper MOVS{S|D}.
3509      SDOperand NewMask = getMOVLMask(NumElems, DAG);
3510      if (NewMask.Val != PermMask.Val)
3511        Op = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3512    }
3513    return Op;
3514  }
3515
3516  if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
3517      X86::isUNPCKLMask(PermMask.Val) ||
3518      X86::isUNPCKHMask(PermMask.Val))
3519    return Op;
3520
3521  if (V2IsSplat) {
3522    // Normalize mask so all entries that point to V2 points to its first
3523    // element then try to match unpck{h|l} again. If match, return a
3524    // new vector_shuffle with the corrected mask.
3525    SDOperand NewMask = NormalizeMask(PermMask, DAG);
3526    if (NewMask.Val != PermMask.Val) {
3527      if (X86::isUNPCKLMask(PermMask.Val, true)) {
3528        SDOperand NewMask = getUnpacklMask(NumElems, DAG);
3529        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3530      } else if (X86::isUNPCKHMask(PermMask.Val, true)) {
3531        SDOperand NewMask = getUnpackhMask(NumElems, DAG);
3532        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, NewMask);
3533      }
3534    }
3535  }
3536
3537  // Normalize the node to match x86 shuffle ops if needed
3538  if (V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(PermMask.Val))
3539      Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
3540
3541  if (Commuted) {
3542    // Commute is back and try unpck* again.
3543    Op = CommuteVectorShuffle(Op, V1, V2, PermMask, DAG);
3544    if (X86::isUNPCKL_v_undef_Mask(PermMask.Val) ||
3545        X86::isUNPCKLMask(PermMask.Val) ||
3546        X86::isUNPCKHMask(PermMask.Val))
3547      return Op;
3548  }
3549
3550  // If VT is integer, try PSHUF* first, then SHUFP*.
3551  if (MVT::isInteger(VT)) {
3552    if (X86::isPSHUFDMask(PermMask.Val) ||
3553        X86::isPSHUFHWMask(PermMask.Val) ||
3554        X86::isPSHUFLWMask(PermMask.Val)) {
3555      if (V2.getOpcode() != ISD::UNDEF)
3556        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3557                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3558      return Op;
3559    }
3560
3561    if (X86::isSHUFPMask(PermMask.Val))
3562      return Op;
3563
3564    // Handle v8i16 shuffle high / low shuffle node pair.
3565    if (VT == MVT::v8i16 && isPSHUFHW_PSHUFLWMask(PermMask.Val)) {
3566      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(NumElems);
3567      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3568      std::vector<SDOperand> MaskVec;
3569      for (unsigned i = 0; i != 4; ++i)
3570        MaskVec.push_back(PermMask.getOperand(i));
3571      for (unsigned i = 4; i != 8; ++i)
3572        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3573      SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3574                                   &MaskVec[0], MaskVec.size());
3575      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3576      MaskVec.clear();
3577      for (unsigned i = 0; i != 4; ++i)
3578        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3579      for (unsigned i = 4; i != 8; ++i)
3580        MaskVec.push_back(PermMask.getOperand(i));
3581      Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT, &MaskVec[0],MaskVec.size());
3582      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2, Mask);
3583    }
3584  } else {
3585    // Floating point cases in the other order.
3586    if (X86::isSHUFPMask(PermMask.Val))
3587      return Op;
3588    if (X86::isPSHUFDMask(PermMask.Val) ||
3589        X86::isPSHUFHWMask(PermMask.Val) ||
3590        X86::isPSHUFLWMask(PermMask.Val)) {
3591      if (V2.getOpcode() != ISD::UNDEF)
3592        return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1,
3593                           DAG.getNode(ISD::UNDEF, V1.getValueType()),PermMask);
3594      return Op;
3595    }
3596  }
3597
3598  if (NumElems == 4) {
3599    MVT::ValueType MaskVT = PermMask.getValueType();
3600    MVT::ValueType MaskEVT = MVT::getVectorBaseType(MaskVT);
3601    std::vector<std::pair<int, int> > Locs;
3602    Locs.reserve(NumElems);
3603    std::vector<SDOperand> Mask1(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3604    std::vector<SDOperand> Mask2(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3605    unsigned NumHi = 0;
3606    unsigned NumLo = 0;
3607    // If no more than two elements come from either vector. This can be
3608    // implemented with two shuffles. First shuffle gather the elements.
3609    // The second shuffle, which takes the first shuffle as both of its
3610    // vector operands, put the elements into the right order.
3611    for (unsigned i = 0; i != NumElems; ++i) {
3612      SDOperand Elt = PermMask.getOperand(i);
3613      if (Elt.getOpcode() == ISD::UNDEF) {
3614        Locs[i] = std::make_pair(-1, -1);
3615      } else {
3616        unsigned Val = cast<ConstantSDNode>(Elt)->getValue();
3617        if (Val < NumElems) {
3618          Locs[i] = std::make_pair(0, NumLo);
3619          Mask1[NumLo] = Elt;
3620          NumLo++;
3621        } else {
3622          Locs[i] = std::make_pair(1, NumHi);
3623          if (2+NumHi < NumElems)
3624            Mask1[2+NumHi] = Elt;
3625          NumHi++;
3626        }
3627      }
3628    }
3629    if (NumLo <= 2 && NumHi <= 2) {
3630      V1 = DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3631                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3632                                   &Mask1[0], Mask1.size()));
3633      for (unsigned i = 0; i != NumElems; ++i) {
3634        if (Locs[i].first == -1)
3635          continue;
3636        else {
3637          unsigned Idx = (i < NumElems/2) ? 0 : NumElems;
3638          Idx += Locs[i].first * (NumElems/2) + Locs[i].second;
3639          Mask2[i] = DAG.getConstant(Idx, MaskEVT);
3640        }
3641      }
3642
3643      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V1,
3644                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3645                                     &Mask2[0], Mask2.size()));
3646    }
3647
3648    // Break it into (shuffle shuffle_hi, shuffle_lo).
3649    Locs.clear();
3650    std::vector<SDOperand> LoMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3651    std::vector<SDOperand> HiMask(NumElems, DAG.getNode(ISD::UNDEF, MaskEVT));
3652    std::vector<SDOperand> *MaskPtr = &LoMask;
3653    unsigned MaskIdx = 0;
3654    unsigned LoIdx = 0;
3655    unsigned HiIdx = NumElems/2;
3656    for (unsigned i = 0; i != NumElems; ++i) {
3657      if (i == NumElems/2) {
3658        MaskPtr = &HiMask;
3659        MaskIdx = 1;
3660        LoIdx = 0;
3661        HiIdx = NumElems/2;
3662      }
3663      SDOperand Elt = PermMask.getOperand(i);
3664      if (Elt.getOpcode() == ISD::UNDEF) {
3665        Locs[i] = std::make_pair(-1, -1);
3666      } else if (cast<ConstantSDNode>(Elt)->getValue() < NumElems) {
3667        Locs[i] = std::make_pair(MaskIdx, LoIdx);
3668        (*MaskPtr)[LoIdx] = Elt;
3669        LoIdx++;
3670      } else {
3671        Locs[i] = std::make_pair(MaskIdx, HiIdx);
3672        (*MaskPtr)[HiIdx] = Elt;
3673        HiIdx++;
3674      }
3675    }
3676
3677    SDOperand LoShuffle =
3678      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3679                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3680                              &LoMask[0], LoMask.size()));
3681    SDOperand HiShuffle =
3682      DAG.getNode(ISD::VECTOR_SHUFFLE, VT, V1, V2,
3683                  DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3684                              &HiMask[0], HiMask.size()));
3685    std::vector<SDOperand> MaskOps;
3686    for (unsigned i = 0; i != NumElems; ++i) {
3687      if (Locs[i].first == -1) {
3688        MaskOps.push_back(DAG.getNode(ISD::UNDEF, MaskEVT));
3689      } else {
3690        unsigned Idx = Locs[i].first * NumElems + Locs[i].second;
3691        MaskOps.push_back(DAG.getConstant(Idx, MaskEVT));
3692      }
3693    }
3694    return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, LoShuffle, HiShuffle,
3695                       DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3696                                   &MaskOps[0], MaskOps.size()));
3697  }
3698
3699  return SDOperand();
3700}
3701
3702SDOperand
3703X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3704  if (!isa<ConstantSDNode>(Op.getOperand(1)))
3705    return SDOperand();
3706
3707  MVT::ValueType VT = Op.getValueType();
3708  // TODO: handle v16i8.
3709  if (MVT::getSizeInBits(VT) == 16) {
3710    // Transform it so it match pextrw which produces a 32-bit result.
3711    MVT::ValueType EVT = (MVT::ValueType)(VT+1);
3712    SDOperand Extract = DAG.getNode(X86ISD::PEXTRW, EVT,
3713                                    Op.getOperand(0), Op.getOperand(1));
3714    SDOperand Assert  = DAG.getNode(ISD::AssertZext, EVT, Extract,
3715                                    DAG.getValueType(VT));
3716    return DAG.getNode(ISD::TRUNCATE, VT, Assert);
3717  } else if (MVT::getSizeInBits(VT) == 32) {
3718    SDOperand Vec = Op.getOperand(0);
3719    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3720    if (Idx == 0)
3721      return Op;
3722    // SHUFPS the element to the lowest double word, then movss.
3723    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3724    std::vector<SDOperand> IdxVec;
3725    IdxVec.push_back(DAG.getConstant(Idx, MVT::getVectorBaseType(MaskVT)));
3726    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3727    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3728    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3729    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3730                                 &IdxVec[0], IdxVec.size());
3731    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3732                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
3733    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3734                       DAG.getConstant(0, getPointerTy()));
3735  } else if (MVT::getSizeInBits(VT) == 64) {
3736    SDOperand Vec = Op.getOperand(0);
3737    unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
3738    if (Idx == 0)
3739      return Op;
3740
3741    // UNPCKHPD the element to the lowest double word, then movsd.
3742    // Note if the lower 64 bits of the result of the UNPCKHPD is then stored
3743    // to a f64mem, the whole operation is folded into a single MOVHPDmr.
3744    MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3745    std::vector<SDOperand> IdxVec;
3746    IdxVec.push_back(DAG.getConstant(1, MVT::getVectorBaseType(MaskVT)));
3747    IdxVec.push_back(DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(MaskVT)));
3748    SDOperand Mask = DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3749                                 &IdxVec[0], IdxVec.size());
3750    Vec = DAG.getNode(ISD::VECTOR_SHUFFLE, Vec.getValueType(),
3751                      Vec, DAG.getNode(ISD::UNDEF, Vec.getValueType()), Mask);
3752    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, VT, Vec,
3753                       DAG.getConstant(0, getPointerTy()));
3754  }
3755
3756  return SDOperand();
3757}
3758
3759SDOperand
3760X86TargetLowering::LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
3761  // Transform it so it match pinsrw which expects a 16-bit value in a GR32
3762  // as its second argument.
3763  MVT::ValueType VT = Op.getValueType();
3764  MVT::ValueType BaseVT = MVT::getVectorBaseType(VT);
3765  SDOperand N0 = Op.getOperand(0);
3766  SDOperand N1 = Op.getOperand(1);
3767  SDOperand N2 = Op.getOperand(2);
3768  if (MVT::getSizeInBits(BaseVT) == 16) {
3769    if (N1.getValueType() != MVT::i32)
3770      N1 = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, N1);
3771    if (N2.getValueType() != MVT::i32)
3772      N2 = DAG.getConstant(cast<ConstantSDNode>(N2)->getValue(), MVT::i32);
3773    return DAG.getNode(X86ISD::PINSRW, VT, N0, N1, N2);
3774  } else if (MVT::getSizeInBits(BaseVT) == 32) {
3775    unsigned Idx = cast<ConstantSDNode>(N2)->getValue();
3776    if (Idx == 0) {
3777      // Use a movss.
3778      N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, N1);
3779      MVT::ValueType MaskVT = MVT::getIntVectorWithNumElements(4);
3780      MVT::ValueType BaseVT = MVT::getVectorBaseType(MaskVT);
3781      std::vector<SDOperand> MaskVec;
3782      MaskVec.push_back(DAG.getConstant(4, BaseVT));
3783      for (unsigned i = 1; i <= 3; ++i)
3784        MaskVec.push_back(DAG.getConstant(i, BaseVT));
3785      return DAG.getNode(ISD::VECTOR_SHUFFLE, VT, N0, N1,
3786                         DAG.getNode(ISD::BUILD_VECTOR, MaskVT,
3787                                     &MaskVec[0], MaskVec.size()));
3788    } else {
3789      // Use two pinsrw instructions to insert a 32 bit value.
3790      Idx <<= 1;
3791      if (MVT::isFloatingPoint(N1.getValueType())) {
3792        if (ISD::isNON_EXTLoad(N1.Val)) {
3793          // Just load directly from f32mem to GR32.
3794          LoadSDNode *LD = cast<LoadSDNode>(N1);
3795          N1 = DAG.getLoad(MVT::i32, LD->getChain(), LD->getBasePtr(),
3796                           LD->getSrcValue(), LD->getSrcValueOffset());
3797        } else {
3798          N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, N1);
3799          N1 = DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, N1);
3800          N1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, N1,
3801                           DAG.getConstant(0, getPointerTy()));
3802        }
3803      }
3804      N0 = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, N0);
3805      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3806                       DAG.getConstant(Idx, getPointerTy()));
3807      N1 = DAG.getNode(ISD::SRL, MVT::i32, N1, DAG.getConstant(16, MVT::i8));
3808      N0 = DAG.getNode(X86ISD::PINSRW, MVT::v8i16, N0, N1,
3809                       DAG.getConstant(Idx+1, getPointerTy()));
3810      return DAG.getNode(ISD::BIT_CONVERT, VT, N0);
3811    }
3812  }
3813
3814  return SDOperand();
3815}
3816
3817SDOperand
3818X86TargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
3819  SDOperand AnyExt = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, Op.getOperand(0));
3820  return DAG.getNode(X86ISD::S2VEC, Op.getValueType(), AnyExt);
3821}
3822
3823// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3824// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is
3825// one of the above mentioned nodes. It has to be wrapped because otherwise
3826// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3827// be used to form addressing mode. These wrapped nodes will be selected
3828// into MOV32ri.
3829SDOperand
3830X86TargetLowering::LowerConstantPool(SDOperand Op, SelectionDAG &DAG) {
3831  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3832  SDOperand Result = DAG.getTargetConstantPool(CP->getConstVal(),
3833                                               getPointerTy(),
3834                                               CP->getAlignment());
3835  if (Subtarget->isTargetDarwin()) {
3836    Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3837    // With PIC, the address is actually $g + Offset.
3838    if (!Subtarget->is64Bit() &&
3839        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3840      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3841                    DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()), Result);
3842  }
3843
3844  return Result;
3845}
3846
3847SDOperand
3848X86TargetLowering::LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) {
3849  GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3850  SDOperand Result = DAG.getTargetGlobalAddress(GV, getPointerTy());
3851  if (Subtarget->isTargetDarwin()) {
3852    Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3853    // With PIC, the address is actually $g + Offset.
3854    if (!Subtarget->is64Bit() &&
3855        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3856      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3857                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3858                           Result);
3859
3860    // For Darwin, external and weak symbols are indirect, so we want to load
3861    // the value at address GV, not the value of GV itself. This means that
3862    // the GlobalAddress must be in the base or index register of the address,
3863    // not the GV offset field.
3864    if (getTargetMachine().getRelocationModel() != Reloc::Static &&
3865        Subtarget->GVRequiresExtraLoad(GV, false))
3866      Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
3867  } else if (Subtarget->GVRequiresExtraLoad(GV, false)) {
3868    Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3869    Result = DAG.getLoad(getPointerTy(), DAG.getEntryNode(), Result, NULL, 0);
3870  }
3871
3872  return Result;
3873}
3874
3875SDOperand
3876X86TargetLowering::LowerExternalSymbol(SDOperand Op, SelectionDAG &DAG) {
3877  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
3878  SDOperand Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
3879  if (Subtarget->isTargetDarwin()) {
3880    Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
3881    // With PIC, the address is actually $g + Offset.
3882    if (!Subtarget->is64Bit() &&
3883        getTargetMachine().getRelocationModel() == Reloc::PIC_)
3884      Result = DAG.getNode(ISD::ADD, getPointerTy(),
3885                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
3886                           Result);
3887  }
3888
3889  return Result;
3890}
3891
3892SDOperand X86TargetLowering::LowerShift(SDOperand Op, SelectionDAG &DAG) {
3893    assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 &&
3894           "Not an i64 shift!");
3895    bool isSRA = Op.getOpcode() == ISD::SRA_PARTS;
3896    SDOperand ShOpLo = Op.getOperand(0);
3897    SDOperand ShOpHi = Op.getOperand(1);
3898    SDOperand ShAmt  = Op.getOperand(2);
3899    SDOperand Tmp1 = isSRA ?
3900      DAG.getNode(ISD::SRA, MVT::i32, ShOpHi, DAG.getConstant(31, MVT::i8)) :
3901      DAG.getConstant(0, MVT::i32);
3902
3903    SDOperand Tmp2, Tmp3;
3904    if (Op.getOpcode() == ISD::SHL_PARTS) {
3905      Tmp2 = DAG.getNode(X86ISD::SHLD, MVT::i32, ShOpHi, ShOpLo, ShAmt);
3906      Tmp3 = DAG.getNode(ISD::SHL, MVT::i32, ShOpLo, ShAmt);
3907    } else {
3908      Tmp2 = DAG.getNode(X86ISD::SHRD, MVT::i32, ShOpLo, ShOpHi, ShAmt);
3909      Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, MVT::i32, ShOpHi, ShAmt);
3910    }
3911
3912    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
3913    SDOperand AndNode = DAG.getNode(ISD::AND, MVT::i8, ShAmt,
3914                                    DAG.getConstant(32, MVT::i8));
3915    SDOperand COps[]={DAG.getEntryNode(), AndNode, DAG.getConstant(0, MVT::i8)};
3916    SDOperand InFlag = DAG.getNode(X86ISD::CMP, VTs, 2, COps, 3).getValue(1);
3917
3918    SDOperand Hi, Lo;
3919    SDOperand CC = DAG.getConstant(X86::COND_NE, MVT::i8);
3920
3921    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::Flag);
3922    SmallVector<SDOperand, 4> Ops;
3923    if (Op.getOpcode() == ISD::SHL_PARTS) {
3924      Ops.push_back(Tmp2);
3925      Ops.push_back(Tmp3);
3926      Ops.push_back(CC);
3927      Ops.push_back(InFlag);
3928      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3929      InFlag = Hi.getValue(1);
3930
3931      Ops.clear();
3932      Ops.push_back(Tmp3);
3933      Ops.push_back(Tmp1);
3934      Ops.push_back(CC);
3935      Ops.push_back(InFlag);
3936      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3937    } else {
3938      Ops.push_back(Tmp2);
3939      Ops.push_back(Tmp3);
3940      Ops.push_back(CC);
3941      Ops.push_back(InFlag);
3942      Lo = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3943      InFlag = Lo.getValue(1);
3944
3945      Ops.clear();
3946      Ops.push_back(Tmp3);
3947      Ops.push_back(Tmp1);
3948      Ops.push_back(CC);
3949      Ops.push_back(InFlag);
3950      Hi = DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
3951    }
3952
3953    VTs = DAG.getNodeValueTypes(MVT::i32, MVT::i32);
3954    Ops.clear();
3955    Ops.push_back(Lo);
3956    Ops.push_back(Hi);
3957    return DAG.getNode(ISD::MERGE_VALUES, VTs, 2, &Ops[0], Ops.size());
3958}
3959
3960SDOperand X86TargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) {
3961  assert(Op.getOperand(0).getValueType() <= MVT::i64 &&
3962         Op.getOperand(0).getValueType() >= MVT::i16 &&
3963         "Unknown SINT_TO_FP to lower!");
3964
3965  SDOperand Result;
3966  MVT::ValueType SrcVT = Op.getOperand(0).getValueType();
3967  unsigned Size = MVT::getSizeInBits(SrcVT)/8;
3968  MachineFunction &MF = DAG.getMachineFunction();
3969  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
3970  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3971  SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Op.getOperand(0),
3972                                 StackSlot, NULL, 0);
3973
3974  // Build the FILD
3975  std::vector<MVT::ValueType> Tys;
3976  Tys.push_back(MVT::f64);
3977  Tys.push_back(MVT::Other);
3978  if (X86ScalarSSE) Tys.push_back(MVT::Flag);
3979  std::vector<SDOperand> Ops;
3980  Ops.push_back(Chain);
3981  Ops.push_back(StackSlot);
3982  Ops.push_back(DAG.getValueType(SrcVT));
3983  Result = DAG.getNode(X86ScalarSSE ? X86ISD::FILD_FLAG :X86ISD::FILD,
3984                       Tys, &Ops[0], Ops.size());
3985
3986  if (X86ScalarSSE) {
3987    Chain = Result.getValue(1);
3988    SDOperand InFlag = Result.getValue(2);
3989
3990    // FIXME: Currently the FST is flagged to the FILD_FLAG. This
3991    // shouldn't be necessary except that RFP cannot be live across
3992    // multiple blocks. When stackifier is fixed, they can be uncoupled.
3993    MachineFunction &MF = DAG.getMachineFunction();
3994    int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
3995    SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
3996    std::vector<MVT::ValueType> Tys;
3997    Tys.push_back(MVT::Other);
3998    std::vector<SDOperand> Ops;
3999    Ops.push_back(Chain);
4000    Ops.push_back(Result);
4001    Ops.push_back(StackSlot);
4002    Ops.push_back(DAG.getValueType(Op.getValueType()));
4003    Ops.push_back(InFlag);
4004    Chain = DAG.getNode(X86ISD::FST, Tys, &Ops[0], Ops.size());
4005    Result = DAG.getLoad(Op.getValueType(), Chain, StackSlot, NULL, 0);
4006  }
4007
4008  return Result;
4009}
4010
4011SDOperand X86TargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) {
4012  assert(Op.getValueType() <= MVT::i64 && Op.getValueType() >= MVT::i16 &&
4013         "Unknown FP_TO_SINT to lower!");
4014  // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary
4015  // stack slot.
4016  MachineFunction &MF = DAG.getMachineFunction();
4017  unsigned MemSize = MVT::getSizeInBits(Op.getValueType())/8;
4018  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
4019  SDOperand StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
4020
4021  unsigned Opc;
4022  switch (Op.getValueType()) {
4023    default: assert(0 && "Invalid FP_TO_SINT to lower!");
4024    case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break;
4025    case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break;
4026    case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break;
4027  }
4028
4029  SDOperand Chain = DAG.getEntryNode();
4030  SDOperand Value = Op.getOperand(0);
4031  if (X86ScalarSSE) {
4032    assert(Op.getValueType() == MVT::i64 && "Invalid FP_TO_SINT to lower!");
4033    Chain = DAG.getStore(Chain, Value, StackSlot, NULL, 0);
4034    std::vector<MVT::ValueType> Tys;
4035    Tys.push_back(MVT::f64);
4036    Tys.push_back(MVT::Other);
4037    std::vector<SDOperand> Ops;
4038    Ops.push_back(Chain);
4039    Ops.push_back(StackSlot);
4040    Ops.push_back(DAG.getValueType(Op.getOperand(0).getValueType()));
4041    Value = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size());
4042    Chain = Value.getValue(1);
4043    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
4044    StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
4045  }
4046
4047  // Build the FP_TO_INT*_IN_MEM
4048  std::vector<SDOperand> Ops;
4049  Ops.push_back(Chain);
4050  Ops.push_back(Value);
4051  Ops.push_back(StackSlot);
4052  SDOperand FIST = DAG.getNode(Opc, MVT::Other, &Ops[0], Ops.size());
4053
4054  // Load the result.
4055  return DAG.getLoad(Op.getValueType(), FIST, StackSlot, NULL, 0);
4056}
4057
4058SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
4059  MVT::ValueType VT = Op.getValueType();
4060  const Type *OpNTy =  MVT::getTypeForValueType(VT);
4061  std::vector<Constant*> CV;
4062  if (VT == MVT::f64) {
4063    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(~(1ULL << 63))));
4064    CV.push_back(ConstantFP::get(OpNTy, 0.0));
4065  } else {
4066    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(~(1U << 31))));
4067    CV.push_back(ConstantFP::get(OpNTy, 0.0));
4068    CV.push_back(ConstantFP::get(OpNTy, 0.0));
4069    CV.push_back(ConstantFP::get(OpNTy, 0.0));
4070  }
4071  Constant *CS = ConstantStruct::get(CV);
4072  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
4073  std::vector<MVT::ValueType> Tys;
4074  Tys.push_back(VT);
4075  Tys.push_back(MVT::Other);
4076  SmallVector<SDOperand, 3> Ops;
4077  Ops.push_back(DAG.getEntryNode());
4078  Ops.push_back(CPIdx);
4079  Ops.push_back(DAG.getSrcValue(NULL));
4080  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
4081  return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
4082}
4083
4084SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
4085  MVT::ValueType VT = Op.getValueType();
4086  const Type *OpNTy =  MVT::getTypeForValueType(VT);
4087  std::vector<Constant*> CV;
4088  if (VT == MVT::f64) {
4089    CV.push_back(ConstantFP::get(OpNTy, BitsToDouble(1ULL << 63)));
4090    CV.push_back(ConstantFP::get(OpNTy, 0.0));
4091  } else {
4092    CV.push_back(ConstantFP::get(OpNTy, BitsToFloat(1U << 31)));
4093    CV.push_back(ConstantFP::get(OpNTy, 0.0));
4094    CV.push_back(ConstantFP::get(OpNTy, 0.0));
4095    CV.push_back(ConstantFP::get(OpNTy, 0.0));
4096  }
4097  Constant *CS = ConstantStruct::get(CV);
4098  SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
4099  std::vector<MVT::ValueType> Tys;
4100  Tys.push_back(VT);
4101  Tys.push_back(MVT::Other);
4102  SmallVector<SDOperand, 3> Ops;
4103  Ops.push_back(DAG.getEntryNode());
4104  Ops.push_back(CPIdx);
4105  Ops.push_back(DAG.getSrcValue(NULL));
4106  SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
4107  return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
4108}
4109
4110SDOperand X86TargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG,
4111                                        SDOperand Chain) {
4112  assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer");
4113  SDOperand Cond;
4114  SDOperand Op0 = Op.getOperand(0);
4115  SDOperand Op1 = Op.getOperand(1);
4116  SDOperand CC = Op.getOperand(2);
4117  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4118  const MVT::ValueType *VTs1 = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
4119  const MVT::ValueType *VTs2 = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
4120  bool isFP = MVT::isFloatingPoint(Op.getOperand(1).getValueType());
4121  unsigned X86CC;
4122
4123  if (translateX86CC(cast<CondCodeSDNode>(CC)->get(), isFP, X86CC,
4124                     Op0, Op1, DAG)) {
4125    SDOperand Ops1[] = { Chain, Op0, Op1 };
4126    Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, Ops1, 3).getValue(1);
4127    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
4128    return DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
4129  }
4130
4131  assert(isFP && "Illegal integer SetCC!");
4132
4133  SDOperand COps[] = { Chain, Op0, Op1 };
4134  Cond = DAG.getNode(X86ISD::CMP, VTs1, 2, COps, 3).getValue(1);
4135
4136  switch (SetCCOpcode) {
4137  default: assert(false && "Illegal floating point SetCC!");
4138  case ISD::SETOEQ: {  // !PF & ZF
4139    SDOperand Ops1[] = { DAG.getConstant(X86::COND_NP, MVT::i8), Cond };
4140    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
4141    SDOperand Ops2[] = { DAG.getConstant(X86::COND_E, MVT::i8),
4142                         Tmp1.getValue(1) };
4143    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
4144    return DAG.getNode(ISD::AND, MVT::i8, Tmp1, Tmp2);
4145  }
4146  case ISD::SETUNE: {  // PF | !ZF
4147    SDOperand Ops1[] = { DAG.getConstant(X86::COND_P, MVT::i8), Cond };
4148    SDOperand Tmp1 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops1, 2);
4149    SDOperand Ops2[] = { DAG.getConstant(X86::COND_NE, MVT::i8),
4150                         Tmp1.getValue(1) };
4151    SDOperand Tmp2 = DAG.getNode(X86ISD::SETCC, VTs2, 2, Ops2, 2);
4152    return DAG.getNode(ISD::OR, MVT::i8, Tmp1, Tmp2);
4153  }
4154  }
4155}
4156
4157SDOperand X86TargetLowering::LowerSELECT(SDOperand Op, SelectionDAG &DAG) {
4158  bool addTest = true;
4159  SDOperand Chain = DAG.getEntryNode();
4160  SDOperand Cond  = Op.getOperand(0);
4161  SDOperand CC;
4162  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
4163
4164  if (Cond.getOpcode() == ISD::SETCC)
4165    Cond = LowerSETCC(Cond, DAG, Chain);
4166
4167  if (Cond.getOpcode() == X86ISD::SETCC) {
4168    CC = Cond.getOperand(0);
4169
4170    // If condition flag is set by a X86ISD::CMP, then make a copy of it
4171    // (since flag operand cannot be shared). Use it as the condition setting
4172    // operand in place of the X86ISD::SETCC.
4173    // If the X86ISD::SETCC has more than one use, then perhaps it's better
4174    // to use a test instead of duplicating the X86ISD::CMP (for register
4175    // pressure reason)?
4176    SDOperand Cmp = Cond.getOperand(1);
4177    unsigned Opc = Cmp.getOpcode();
4178    bool IllegalFPCMov = !X86ScalarSSE &&
4179      MVT::isFloatingPoint(Op.getValueType()) &&
4180      !hasFPCMov(cast<ConstantSDNode>(CC)->getSignExtended());
4181    if ((Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) &&
4182        !IllegalFPCMov) {
4183      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
4184      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
4185      addTest = false;
4186    }
4187  }
4188
4189  if (addTest) {
4190    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
4191    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
4192    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
4193  }
4194
4195  VTs = DAG.getNodeValueTypes(Op.getValueType(), MVT::Flag);
4196  SmallVector<SDOperand, 4> Ops;
4197  // X86ISD::CMOV means set the result (which is operand 1) to the RHS if
4198  // condition is true.
4199  Ops.push_back(Op.getOperand(2));
4200  Ops.push_back(Op.getOperand(1));
4201  Ops.push_back(CC);
4202  Ops.push_back(Cond.getValue(1));
4203  return DAG.getNode(X86ISD::CMOV, VTs, 2, &Ops[0], Ops.size());
4204}
4205
4206SDOperand X86TargetLowering::LowerBRCOND(SDOperand Op, SelectionDAG &DAG) {
4207  bool addTest = true;
4208  SDOperand Chain = Op.getOperand(0);
4209  SDOperand Cond  = Op.getOperand(1);
4210  SDOperand Dest  = Op.getOperand(2);
4211  SDOperand CC;
4212  const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
4213
4214  if (Cond.getOpcode() == ISD::SETCC)
4215    Cond = LowerSETCC(Cond, DAG, Chain);
4216
4217  if (Cond.getOpcode() == X86ISD::SETCC) {
4218    CC = Cond.getOperand(0);
4219
4220    // If condition flag is set by a X86ISD::CMP, then make a copy of it
4221    // (since flag operand cannot be shared). Use it as the condition setting
4222    // operand in place of the X86ISD::SETCC.
4223    // If the X86ISD::SETCC has more than one use, then perhaps it's better
4224    // to use a test instead of duplicating the X86ISD::CMP (for register
4225    // pressure reason)?
4226    SDOperand Cmp = Cond.getOperand(1);
4227    unsigned Opc = Cmp.getOpcode();
4228    if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) {
4229      SDOperand Ops[] = { Chain, Cmp.getOperand(1), Cmp.getOperand(2) };
4230      Cond = DAG.getNode(Opc, VTs, 2, Ops, 3);
4231      addTest = false;
4232    }
4233  }
4234
4235  if (addTest) {
4236    CC = DAG.getConstant(X86::COND_NE, MVT::i8);
4237    SDOperand Ops[] = { Chain, Cond, DAG.getConstant(0, MVT::i8) };
4238    Cond = DAG.getNode(X86ISD::CMP, VTs, 2, Ops, 3);
4239  }
4240  return DAG.getNode(X86ISD::BRCOND, Op.getValueType(),
4241                     Cond, Op.getOperand(2), CC, Cond.getValue(1));
4242}
4243
4244SDOperand X86TargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) {
4245  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4246  SDOperand Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
4247  if (Subtarget->isTargetDarwin()) {
4248    Result = DAG.getNode(X86ISD::Wrapper, getPointerTy(), Result);
4249    // With PIC, the address is actually $g + Offset.
4250    if (!Subtarget->is64Bit() &&
4251        getTargetMachine().getRelocationModel() == Reloc::PIC_)
4252      Result = DAG.getNode(ISD::ADD, getPointerTy(),
4253                           DAG.getNode(X86ISD::GlobalBaseReg, getPointerTy()),
4254                           Result);
4255  }
4256
4257  return Result;
4258}
4259
4260SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
4261  unsigned CallingConv= cast<ConstantSDNode>(Op.getOperand(1))->getValue();
4262
4263  if (Subtarget->is64Bit())
4264    return LowerX86_64CCCCallTo(Op, DAG);
4265  else
4266    switch (CallingConv) {
4267    default:
4268      assert(0 && "Unsupported calling convention");
4269    case CallingConv::Fast:
4270      if (EnableFastCC) {
4271        return LowerFastCCCallTo(Op, DAG, false);
4272      }
4273      // Falls through
4274    case CallingConv::C:
4275    case CallingConv::CSRet:
4276      return LowerCCCCallTo(Op, DAG);
4277    case CallingConv::X86_StdCall:
4278      return LowerStdCallCCCallTo(Op, DAG);
4279    case CallingConv::X86_FastCall:
4280      return LowerFastCCCallTo(Op, DAG, true);
4281    }
4282}
4283
4284SDOperand X86TargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG) {
4285  SDOperand Copy;
4286
4287  switch(Op.getNumOperands()) {
4288    default:
4289      assert(0 && "Do not know how to return this many arguments!");
4290      abort();
4291    case 1:    // ret void.
4292      return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
4293                        DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
4294    case 3: {
4295      MVT::ValueType ArgVT = Op.getOperand(1).getValueType();
4296
4297      if (MVT::isVector(ArgVT) ||
4298          (Subtarget->is64Bit() && MVT::isFloatingPoint(ArgVT))) {
4299        // Integer or FP vector result -> XMM0.
4300        if (DAG.getMachineFunction().liveout_empty())
4301          DAG.getMachineFunction().addLiveOut(X86::XMM0);
4302        Copy = DAG.getCopyToReg(Op.getOperand(0), X86::XMM0, Op.getOperand(1),
4303                                SDOperand());
4304      } else if (MVT::isInteger(ArgVT)) {
4305        // Integer result -> EAX / RAX.
4306        // The C calling convention guarantees the return value has been
4307        // promoted to at least MVT::i32. The X86-64 ABI doesn't require the
4308        // value to be promoted MVT::i64. So we don't have to extend it to
4309        // 64-bit. Return the value in EAX, but mark RAX as liveout.
4310        unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
4311        if (DAG.getMachineFunction().liveout_empty())
4312          DAG.getMachineFunction().addLiveOut(Reg);
4313
4314        Reg = (ArgVT == MVT::i64) ? X86::RAX : X86::EAX;
4315        Copy = DAG.getCopyToReg(Op.getOperand(0), Reg, Op.getOperand(1),
4316                                SDOperand());
4317      } else if (!X86ScalarSSE) {
4318        // FP return with fp-stack value.
4319        if (DAG.getMachineFunction().liveout_empty())
4320          DAG.getMachineFunction().addLiveOut(X86::ST0);
4321
4322        std::vector<MVT::ValueType> Tys;
4323        Tys.push_back(MVT::Other);
4324        Tys.push_back(MVT::Flag);
4325        std::vector<SDOperand> Ops;
4326        Ops.push_back(Op.getOperand(0));
4327        Ops.push_back(Op.getOperand(1));
4328        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size());
4329      } else {
4330        // FP return with ScalarSSE (return on fp-stack).
4331        if (DAG.getMachineFunction().liveout_empty())
4332          DAG.getMachineFunction().addLiveOut(X86::ST0);
4333
4334        SDOperand MemLoc;
4335        SDOperand Chain = Op.getOperand(0);
4336        SDOperand Value = Op.getOperand(1);
4337
4338        if (ISD::isNON_EXTLoad(Value.Val) &&
4339            (Chain == Value.getValue(1) || Chain == Value.getOperand(0))) {
4340          Chain  = Value.getOperand(0);
4341          MemLoc = Value.getOperand(1);
4342        } else {
4343          // Spill the value to memory and reload it into top of stack.
4344          unsigned Size = MVT::getSizeInBits(ArgVT)/8;
4345          MachineFunction &MF = DAG.getMachineFunction();
4346          int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
4347          MemLoc = DAG.getFrameIndex(SSFI, getPointerTy());
4348          Chain = DAG.getStore(Op.getOperand(0), Value, MemLoc, NULL, 0);
4349        }
4350        std::vector<MVT::ValueType> Tys;
4351        Tys.push_back(MVT::f64);
4352        Tys.push_back(MVT::Other);
4353        std::vector<SDOperand> Ops;
4354        Ops.push_back(Chain);
4355        Ops.push_back(MemLoc);
4356        Ops.push_back(DAG.getValueType(ArgVT));
4357        Copy = DAG.getNode(X86ISD::FLD, Tys, &Ops[0], Ops.size());
4358        Tys.clear();
4359        Tys.push_back(MVT::Other);
4360        Tys.push_back(MVT::Flag);
4361        Ops.clear();
4362        Ops.push_back(Copy.getValue(1));
4363        Ops.push_back(Copy);
4364        Copy = DAG.getNode(X86ISD::FP_SET_RESULT, Tys, &Ops[0], Ops.size());
4365      }
4366      break;
4367    }
4368    case 5: {
4369      unsigned Reg1 = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
4370      unsigned Reg2 = Subtarget->is64Bit() ? X86::RDX : X86::EDX;
4371      if (DAG.getMachineFunction().liveout_empty()) {
4372        DAG.getMachineFunction().addLiveOut(Reg1);
4373        DAG.getMachineFunction().addLiveOut(Reg2);
4374      }
4375
4376      Copy = DAG.getCopyToReg(Op.getOperand(0), Reg2, Op.getOperand(3),
4377                              SDOperand());
4378      Copy = DAG.getCopyToReg(Copy, Reg1, Op.getOperand(1), Copy.getValue(1));
4379      break;
4380    }
4381  }
4382  return DAG.getNode(X86ISD::RET_FLAG, MVT::Other,
4383                     Copy, DAG.getConstant(getBytesToPopOnReturn(), MVT::i16),
4384                     Copy.getValue(1));
4385}
4386
4387SDOperand
4388X86TargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG) {
4389  MachineFunction &MF = DAG.getMachineFunction();
4390  const Function* Fn = MF.getFunction();
4391  if (Fn->hasExternalLinkage() &&
4392      Subtarget->isTargetCygwin() &&
4393      Fn->getName() == "main")
4394    MF.getInfo<X86FunctionInfo>()->setForceFramePointer(true);
4395
4396  unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue();
4397  if (Subtarget->is64Bit())
4398    return LowerX86_64CCCArguments(Op, DAG);
4399  else
4400    switch(CC) {
4401    default:
4402      assert(0 && "Unsupported calling convention");
4403    case CallingConv::Fast:
4404      if (EnableFastCC) {
4405        return LowerFastCCArguments(Op, DAG);
4406      }
4407      // Falls through
4408    case CallingConv::C:
4409    case CallingConv::CSRet:
4410      return LowerCCCArguments(Op, DAG);
4411    case CallingConv::X86_StdCall:
4412      MF.getInfo<X86FunctionInfo>()->setDecorationStyle(StdCall);
4413      return LowerStdCallCCArguments(Op, DAG);
4414    case CallingConv::X86_FastCall:
4415      MF.getInfo<X86FunctionInfo>()->setDecorationStyle(FastCall);
4416      return LowerFastCallCCArguments(Op, DAG);
4417    }
4418}
4419
4420SDOperand X86TargetLowering::LowerMEMSET(SDOperand Op, SelectionDAG &DAG) {
4421  SDOperand InFlag(0, 0);
4422  SDOperand Chain = Op.getOperand(0);
4423  unsigned Align =
4424    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
4425  if (Align == 0) Align = 1;
4426
4427  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
4428  // If not DWORD aligned, call memset if size is less than the threshold.
4429  // It knows how to align to the right boundary first.
4430  if ((Align & 3) != 0 ||
4431      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
4432    MVT::ValueType IntPtr = getPointerTy();
4433    const Type *IntPtrTy = getTargetData()->getIntPtrType();
4434    std::vector<std::pair<SDOperand, const Type*> > Args;
4435    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
4436    // Extend the ubyte argument to be an int value for the call.
4437    SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
4438    Args.push_back(std::make_pair(Val, IntPtrTy));
4439    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
4440    std::pair<SDOperand,SDOperand> CallResult =
4441      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
4442                  DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
4443    return CallResult.second;
4444  }
4445
4446  MVT::ValueType AVT;
4447  SDOperand Count;
4448  ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2));
4449  unsigned BytesLeft = 0;
4450  bool TwoRepStos = false;
4451  if (ValC) {
4452    unsigned ValReg;
4453    uint64_t Val = ValC->getValue() & 255;
4454
4455    // If the value is a constant, then we can potentially use larger sets.
4456    switch (Align & 3) {
4457      case 2:   // WORD aligned
4458        AVT = MVT::i16;
4459        ValReg = X86::AX;
4460        Val = (Val << 8) | Val;
4461        break;
4462      case 0:  // DWORD aligned
4463        AVT = MVT::i32;
4464        ValReg = X86::EAX;
4465        Val = (Val << 8)  | Val;
4466        Val = (Val << 16) | Val;
4467        if (Subtarget->is64Bit() && ((Align & 0xF) == 0)) {  // QWORD aligned
4468          AVT = MVT::i64;
4469          ValReg = X86::RAX;
4470          Val = (Val << 32) | Val;
4471        }
4472        break;
4473      default:  // Byte aligned
4474        AVT = MVT::i8;
4475        ValReg = X86::AL;
4476        Count = Op.getOperand(3);
4477        break;
4478    }
4479
4480    if (AVT > MVT::i8) {
4481      if (I) {
4482        unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
4483        Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
4484        BytesLeft = I->getValue() % UBytes;
4485      } else {
4486        assert(AVT >= MVT::i32 &&
4487               "Do not use rep;stos if not at least DWORD aligned");
4488        Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
4489                            Op.getOperand(3), DAG.getConstant(2, MVT::i8));
4490        TwoRepStos = true;
4491      }
4492    }
4493
4494    Chain  = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
4495                              InFlag);
4496    InFlag = Chain.getValue(1);
4497  } else {
4498    AVT = MVT::i8;
4499    Count  = Op.getOperand(3);
4500    Chain  = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
4501    InFlag = Chain.getValue(1);
4502  }
4503
4504  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
4505                            Count, InFlag);
4506  InFlag = Chain.getValue(1);
4507  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4508                            Op.getOperand(1), InFlag);
4509  InFlag = Chain.getValue(1);
4510
4511  std::vector<MVT::ValueType> Tys;
4512  Tys.push_back(MVT::Other);
4513  Tys.push_back(MVT::Flag);
4514  std::vector<SDOperand> Ops;
4515  Ops.push_back(Chain);
4516  Ops.push_back(DAG.getValueType(AVT));
4517  Ops.push_back(InFlag);
4518  Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
4519
4520  if (TwoRepStos) {
4521    InFlag = Chain.getValue(1);
4522    Count = Op.getOperand(3);
4523    MVT::ValueType CVT = Count.getValueType();
4524    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
4525                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
4526    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
4527                              Left, InFlag);
4528    InFlag = Chain.getValue(1);
4529    Tys.clear();
4530    Tys.push_back(MVT::Other);
4531    Tys.push_back(MVT::Flag);
4532    Ops.clear();
4533    Ops.push_back(Chain);
4534    Ops.push_back(DAG.getValueType(MVT::i8));
4535    Ops.push_back(InFlag);
4536    Chain  = DAG.getNode(X86ISD::REP_STOS, Tys, &Ops[0], Ops.size());
4537  } else if (BytesLeft) {
4538    // Issue stores for the last 1 - 7 bytes.
4539    SDOperand Value;
4540    unsigned Val = ValC->getValue() & 255;
4541    unsigned Offset = I->getValue() - BytesLeft;
4542    SDOperand DstAddr = Op.getOperand(1);
4543    MVT::ValueType AddrVT = DstAddr.getValueType();
4544    if (BytesLeft >= 4) {
4545      Val = (Val << 8)  | Val;
4546      Val = (Val << 16) | Val;
4547      Value = DAG.getConstant(Val, MVT::i32);
4548      Chain = DAG.getStore(Chain, Value,
4549                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4550                                       DAG.getConstant(Offset, AddrVT)),
4551                           NULL, 0);
4552      BytesLeft -= 4;
4553      Offset += 4;
4554    }
4555    if (BytesLeft >= 2) {
4556      Value = DAG.getConstant((Val << 8) | Val, MVT::i16);
4557      Chain = DAG.getStore(Chain, Value,
4558                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4559                                       DAG.getConstant(Offset, AddrVT)),
4560                           NULL, 0);
4561      BytesLeft -= 2;
4562      Offset += 2;
4563    }
4564    if (BytesLeft == 1) {
4565      Value = DAG.getConstant(Val, MVT::i8);
4566      Chain = DAG.getStore(Chain, Value,
4567                           DAG.getNode(ISD::ADD, AddrVT, DstAddr,
4568                                       DAG.getConstant(Offset, AddrVT)),
4569                           NULL, 0);
4570    }
4571  }
4572
4573  return Chain;
4574}
4575
4576SDOperand X86TargetLowering::LowerMEMCPY(SDOperand Op, SelectionDAG &DAG) {
4577  SDOperand Chain = Op.getOperand(0);
4578  unsigned Align =
4579    (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
4580  if (Align == 0) Align = 1;
4581
4582  ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
4583  // If not DWORD aligned, call memcpy if size is less than the threshold.
4584  // It knows how to align to the right boundary first.
4585  if ((Align & 3) != 0 ||
4586      (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
4587    MVT::ValueType IntPtr = getPointerTy();
4588    const Type *IntPtrTy = getTargetData()->getIntPtrType();
4589    std::vector<std::pair<SDOperand, const Type*> > Args;
4590    Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
4591    Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
4592    Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
4593    std::pair<SDOperand,SDOperand> CallResult =
4594      LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
4595                  DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
4596    return CallResult.second;
4597  }
4598
4599  MVT::ValueType AVT;
4600  SDOperand Count;
4601  unsigned BytesLeft = 0;
4602  bool TwoRepMovs = false;
4603  switch (Align & 3) {
4604    case 2:   // WORD aligned
4605      AVT = MVT::i16;
4606      break;
4607    case 0:  // DWORD aligned
4608      AVT = MVT::i32;
4609      if (Subtarget->is64Bit() && ((Align & 0xF) == 0))  // QWORD aligned
4610        AVT = MVT::i64;
4611      break;
4612    default:  // Byte aligned
4613      AVT = MVT::i8;
4614      Count = Op.getOperand(3);
4615      break;
4616  }
4617
4618  if (AVT > MVT::i8) {
4619    if (I) {
4620      unsigned UBytes = MVT::getSizeInBits(AVT) / 8;
4621      Count = DAG.getConstant(I->getValue() / UBytes, getPointerTy());
4622      BytesLeft = I->getValue() % UBytes;
4623    } else {
4624      assert(AVT >= MVT::i32 &&
4625             "Do not use rep;movs if not at least DWORD aligned");
4626      Count = DAG.getNode(ISD::SRL, Op.getOperand(3).getValueType(),
4627                          Op.getOperand(3), DAG.getConstant(2, MVT::i8));
4628      TwoRepMovs = true;
4629    }
4630  }
4631
4632  SDOperand InFlag(0, 0);
4633  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
4634                            Count, InFlag);
4635  InFlag = Chain.getValue(1);
4636  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RDI : X86::EDI,
4637                            Op.getOperand(1), InFlag);
4638  InFlag = Chain.getValue(1);
4639  Chain  = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RSI : X86::ESI,
4640                            Op.getOperand(2), InFlag);
4641  InFlag = Chain.getValue(1);
4642
4643  std::vector<MVT::ValueType> Tys;
4644  Tys.push_back(MVT::Other);
4645  Tys.push_back(MVT::Flag);
4646  std::vector<SDOperand> Ops;
4647  Ops.push_back(Chain);
4648  Ops.push_back(DAG.getValueType(AVT));
4649  Ops.push_back(InFlag);
4650  Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
4651
4652  if (TwoRepMovs) {
4653    InFlag = Chain.getValue(1);
4654    Count = Op.getOperand(3);
4655    MVT::ValueType CVT = Count.getValueType();
4656    SDOperand Left = DAG.getNode(ISD::AND, CVT, Count,
4657                               DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT));
4658    Chain  = DAG.getCopyToReg(Chain, (CVT == MVT::i64) ? X86::RCX : X86::ECX,
4659                              Left, InFlag);
4660    InFlag = Chain.getValue(1);
4661    Tys.clear();
4662    Tys.push_back(MVT::Other);
4663    Tys.push_back(MVT::Flag);
4664    Ops.clear();
4665    Ops.push_back(Chain);
4666    Ops.push_back(DAG.getValueType(MVT::i8));
4667    Ops.push_back(InFlag);
4668    Chain = DAG.getNode(X86ISD::REP_MOVS, Tys, &Ops[0], Ops.size());
4669  } else if (BytesLeft) {
4670    // Issue loads and stores for the last 1 - 7 bytes.
4671    unsigned Offset = I->getValue() - BytesLeft;
4672    SDOperand DstAddr = Op.getOperand(1);
4673    MVT::ValueType DstVT = DstAddr.getValueType();
4674    SDOperand SrcAddr = Op.getOperand(2);
4675    MVT::ValueType SrcVT = SrcAddr.getValueType();
4676    SDOperand Value;
4677    if (BytesLeft >= 4) {
4678      Value = DAG.getLoad(MVT::i32, Chain,
4679                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4680                                      DAG.getConstant(Offset, SrcVT)),
4681                          NULL, 0);
4682      Chain = Value.getValue(1);
4683      Chain = DAG.getStore(Chain, Value,
4684                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
4685                                       DAG.getConstant(Offset, DstVT)),
4686                           NULL, 0);
4687      BytesLeft -= 4;
4688      Offset += 4;
4689    }
4690    if (BytesLeft >= 2) {
4691      Value = DAG.getLoad(MVT::i16, Chain,
4692                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4693                                      DAG.getConstant(Offset, SrcVT)),
4694                          NULL, 0);
4695      Chain = Value.getValue(1);
4696      Chain = DAG.getStore(Chain, Value,
4697                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
4698                                       DAG.getConstant(Offset, DstVT)),
4699                           NULL, 0);
4700      BytesLeft -= 2;
4701      Offset += 2;
4702    }
4703
4704    if (BytesLeft == 1) {
4705      Value = DAG.getLoad(MVT::i8, Chain,
4706                          DAG.getNode(ISD::ADD, SrcVT, SrcAddr,
4707                                      DAG.getConstant(Offset, SrcVT)),
4708                          NULL, 0);
4709      Chain = Value.getValue(1);
4710      Chain = DAG.getStore(Chain, Value,
4711                           DAG.getNode(ISD::ADD, DstVT, DstAddr,
4712                                       DAG.getConstant(Offset, DstVT)),
4713                           NULL, 0);
4714    }
4715  }
4716
4717  return Chain;
4718}
4719
4720SDOperand
4721X86TargetLowering::LowerREADCYCLCECOUNTER(SDOperand Op, SelectionDAG &DAG) {
4722  std::vector<MVT::ValueType> Tys;
4723  Tys.push_back(MVT::Other);
4724  Tys.push_back(MVT::Flag);
4725  std::vector<SDOperand> Ops;
4726  Ops.push_back(Op.getOperand(0));
4727  SDOperand rd = DAG.getNode(X86ISD::RDTSC_DAG, Tys, &Ops[0], Ops.size());
4728  Ops.clear();
4729  if (Subtarget->is64Bit()) {
4730    SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::RAX, MVT::i64, rd.getValue(1));
4731    SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::RDX,
4732                                         MVT::i64, Copy1.getValue(2));
4733    SDOperand Tmp = DAG.getNode(ISD::SHL, MVT::i64, Copy2,
4734                                DAG.getConstant(32, MVT::i8));
4735    Ops.push_back(DAG.getNode(ISD::OR, MVT::i64, Copy1, Tmp));
4736    Ops.push_back(Copy2.getValue(1));
4737    Tys[0] = MVT::i64;
4738    Tys[1] = MVT::Other;
4739  } else {
4740    SDOperand Copy1 = DAG.getCopyFromReg(rd, X86::EAX, MVT::i32, rd.getValue(1));
4741    SDOperand Copy2 = DAG.getCopyFromReg(Copy1.getValue(1), X86::EDX,
4742                                         MVT::i32, Copy1.getValue(2));
4743    Ops.push_back(Copy1);
4744    Ops.push_back(Copy2);
4745    Ops.push_back(Copy2.getValue(1));
4746    Tys[0] = Tys[1] = MVT::i32;
4747    Tys.push_back(MVT::Other);
4748  }
4749  return DAG.getNode(ISD::MERGE_VALUES, Tys, &Ops[0], Ops.size());
4750}
4751
4752SDOperand X86TargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG) {
4753  SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2));
4754
4755  if (!Subtarget->is64Bit()) {
4756    // vastart just stores the address of the VarArgsFrameIndex slot into the
4757    // memory location argument.
4758    SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4759    return DAG.getStore(Op.getOperand(0), FR,Op.getOperand(1), SV->getValue(),
4760                        SV->getOffset());
4761  }
4762
4763  // __va_list_tag:
4764  //   gp_offset         (0 - 6 * 8)
4765  //   fp_offset         (48 - 48 + 8 * 16)
4766  //   overflow_arg_area (point to parameters coming in memory).
4767  //   reg_save_area
4768  std::vector<SDOperand> MemOps;
4769  SDOperand FIN = Op.getOperand(1);
4770  // Store gp_offset
4771  SDOperand Store = DAG.getStore(Op.getOperand(0),
4772                                 DAG.getConstant(VarArgsGPOffset, MVT::i32),
4773                                 FIN, SV->getValue(), SV->getOffset());
4774  MemOps.push_back(Store);
4775
4776  // Store fp_offset
4777  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4778                    DAG.getConstant(4, getPointerTy()));
4779  Store = DAG.getStore(Op.getOperand(0),
4780                       DAG.getConstant(VarArgsFPOffset, MVT::i32),
4781                       FIN, SV->getValue(), SV->getOffset());
4782  MemOps.push_back(Store);
4783
4784  // Store ptr to overflow_arg_area
4785  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4786                    DAG.getConstant(4, getPointerTy()));
4787  SDOperand OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
4788  Store = DAG.getStore(Op.getOperand(0), OVFIN, FIN, SV->getValue(),
4789                       SV->getOffset());
4790  MemOps.push_back(Store);
4791
4792  // Store ptr to reg_save_area.
4793  FIN = DAG.getNode(ISD::ADD, getPointerTy(), FIN,
4794                    DAG.getConstant(8, getPointerTy()));
4795  SDOperand RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy());
4796  Store = DAG.getStore(Op.getOperand(0), RSFIN, FIN, SV->getValue(),
4797                       SV->getOffset());
4798  MemOps.push_back(Store);
4799  return DAG.getNode(ISD::TokenFactor, MVT::Other, &MemOps[0], MemOps.size());
4800}
4801
4802SDOperand
4803X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) {
4804  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue();
4805  switch (IntNo) {
4806  default: return SDOperand();    // Don't custom lower most intrinsics.
4807    // Comparison intrinsics.
4808  case Intrinsic::x86_sse_comieq_ss:
4809  case Intrinsic::x86_sse_comilt_ss:
4810  case Intrinsic::x86_sse_comile_ss:
4811  case Intrinsic::x86_sse_comigt_ss:
4812  case Intrinsic::x86_sse_comige_ss:
4813  case Intrinsic::x86_sse_comineq_ss:
4814  case Intrinsic::x86_sse_ucomieq_ss:
4815  case Intrinsic::x86_sse_ucomilt_ss:
4816  case Intrinsic::x86_sse_ucomile_ss:
4817  case Intrinsic::x86_sse_ucomigt_ss:
4818  case Intrinsic::x86_sse_ucomige_ss:
4819  case Intrinsic::x86_sse_ucomineq_ss:
4820  case Intrinsic::x86_sse2_comieq_sd:
4821  case Intrinsic::x86_sse2_comilt_sd:
4822  case Intrinsic::x86_sse2_comile_sd:
4823  case Intrinsic::x86_sse2_comigt_sd:
4824  case Intrinsic::x86_sse2_comige_sd:
4825  case Intrinsic::x86_sse2_comineq_sd:
4826  case Intrinsic::x86_sse2_ucomieq_sd:
4827  case Intrinsic::x86_sse2_ucomilt_sd:
4828  case Intrinsic::x86_sse2_ucomile_sd:
4829  case Intrinsic::x86_sse2_ucomigt_sd:
4830  case Intrinsic::x86_sse2_ucomige_sd:
4831  case Intrinsic::x86_sse2_ucomineq_sd: {
4832    unsigned Opc = 0;
4833    ISD::CondCode CC = ISD::SETCC_INVALID;
4834    switch (IntNo) {
4835    default: break;
4836    case Intrinsic::x86_sse_comieq_ss:
4837    case Intrinsic::x86_sse2_comieq_sd:
4838      Opc = X86ISD::COMI;
4839      CC = ISD::SETEQ;
4840      break;
4841    case Intrinsic::x86_sse_comilt_ss:
4842    case Intrinsic::x86_sse2_comilt_sd:
4843      Opc = X86ISD::COMI;
4844      CC = ISD::SETLT;
4845      break;
4846    case Intrinsic::x86_sse_comile_ss:
4847    case Intrinsic::x86_sse2_comile_sd:
4848      Opc = X86ISD::COMI;
4849      CC = ISD::SETLE;
4850      break;
4851    case Intrinsic::x86_sse_comigt_ss:
4852    case Intrinsic::x86_sse2_comigt_sd:
4853      Opc = X86ISD::COMI;
4854      CC = ISD::SETGT;
4855      break;
4856    case Intrinsic::x86_sse_comige_ss:
4857    case Intrinsic::x86_sse2_comige_sd:
4858      Opc = X86ISD::COMI;
4859      CC = ISD::SETGE;
4860      break;
4861    case Intrinsic::x86_sse_comineq_ss:
4862    case Intrinsic::x86_sse2_comineq_sd:
4863      Opc = X86ISD::COMI;
4864      CC = ISD::SETNE;
4865      break;
4866    case Intrinsic::x86_sse_ucomieq_ss:
4867    case Intrinsic::x86_sse2_ucomieq_sd:
4868      Opc = X86ISD::UCOMI;
4869      CC = ISD::SETEQ;
4870      break;
4871    case Intrinsic::x86_sse_ucomilt_ss:
4872    case Intrinsic::x86_sse2_ucomilt_sd:
4873      Opc = X86ISD::UCOMI;
4874      CC = ISD::SETLT;
4875      break;
4876    case Intrinsic::x86_sse_ucomile_ss:
4877    case Intrinsic::x86_sse2_ucomile_sd:
4878      Opc = X86ISD::UCOMI;
4879      CC = ISD::SETLE;
4880      break;
4881    case Intrinsic::x86_sse_ucomigt_ss:
4882    case Intrinsic::x86_sse2_ucomigt_sd:
4883      Opc = X86ISD::UCOMI;
4884      CC = ISD::SETGT;
4885      break;
4886    case Intrinsic::x86_sse_ucomige_ss:
4887    case Intrinsic::x86_sse2_ucomige_sd:
4888      Opc = X86ISD::UCOMI;
4889      CC = ISD::SETGE;
4890      break;
4891    case Intrinsic::x86_sse_ucomineq_ss:
4892    case Intrinsic::x86_sse2_ucomineq_sd:
4893      Opc = X86ISD::UCOMI;
4894      CC = ISD::SETNE;
4895      break;
4896    }
4897
4898    unsigned X86CC;
4899    SDOperand LHS = Op.getOperand(1);
4900    SDOperand RHS = Op.getOperand(2);
4901    translateX86CC(CC, true, X86CC, LHS, RHS, DAG);
4902
4903    const MVT::ValueType *VTs = DAG.getNodeValueTypes(MVT::Other, MVT::Flag);
4904    SDOperand Ops1[] = { DAG.getEntryNode(), LHS, RHS };
4905    SDOperand Cond = DAG.getNode(Opc, VTs, 2, Ops1, 3);
4906    VTs = DAG.getNodeValueTypes(MVT::i8, MVT::Flag);
4907    SDOperand Ops2[] = { DAG.getConstant(X86CC, MVT::i8), Cond };
4908    SDOperand SetCC = DAG.getNode(X86ISD::SETCC, VTs, 2, Ops2, 2);
4909    return DAG.getNode(ISD::ANY_EXTEND, MVT::i32, SetCC);
4910  }
4911  }
4912}
4913
4914/// LowerOperation - Provide custom lowering hooks for some operations.
4915///
4916SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
4917  switch (Op.getOpcode()) {
4918  default: assert(0 && "Should not custom lower this!");
4919  case ISD::BUILD_VECTOR:       return LowerBUILD_VECTOR(Op, DAG);
4920  case ISD::VECTOR_SHUFFLE:     return LowerVECTOR_SHUFFLE(Op, DAG);
4921  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4922  case ISD::INSERT_VECTOR_ELT:  return LowerINSERT_VECTOR_ELT(Op, DAG);
4923  case ISD::SCALAR_TO_VECTOR:   return LowerSCALAR_TO_VECTOR(Op, DAG);
4924  case ISD::ConstantPool:       return LowerConstantPool(Op, DAG);
4925  case ISD::GlobalAddress:      return LowerGlobalAddress(Op, DAG);
4926  case ISD::ExternalSymbol:     return LowerExternalSymbol(Op, DAG);
4927  case ISD::SHL_PARTS:
4928  case ISD::SRA_PARTS:
4929  case ISD::SRL_PARTS:          return LowerShift(Op, DAG);
4930  case ISD::SINT_TO_FP:         return LowerSINT_TO_FP(Op, DAG);
4931  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
4932  case ISD::FABS:               return LowerFABS(Op, DAG);
4933  case ISD::FNEG:               return LowerFNEG(Op, DAG);
4934  case ISD::SETCC:              return LowerSETCC(Op, DAG, DAG.getEntryNode());
4935  case ISD::SELECT:             return LowerSELECT(Op, DAG);
4936  case ISD::BRCOND:             return LowerBRCOND(Op, DAG);
4937  case ISD::JumpTable:          return LowerJumpTable(Op, DAG);
4938  case ISD::CALL:               return LowerCALL(Op, DAG);
4939  case ISD::RET:                return LowerRET(Op, DAG);
4940  case ISD::FORMAL_ARGUMENTS:   return LowerFORMAL_ARGUMENTS(Op, DAG);
4941  case ISD::MEMSET:             return LowerMEMSET(Op, DAG);
4942  case ISD::MEMCPY:             return LowerMEMCPY(Op, DAG);
4943  case ISD::READCYCLECOUNTER:   return LowerREADCYCLCECOUNTER(Op, DAG);
4944  case ISD::VASTART:            return LowerVASTART(Op, DAG);
4945  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4946  }
4947}
4948
4949const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
4950  switch (Opcode) {
4951  default: return NULL;
4952  case X86ISD::SHLD:               return "X86ISD::SHLD";
4953  case X86ISD::SHRD:               return "X86ISD::SHRD";
4954  case X86ISD::FAND:               return "X86ISD::FAND";
4955  case X86ISD::FXOR:               return "X86ISD::FXOR";
4956  case X86ISD::FILD:               return "X86ISD::FILD";
4957  case X86ISD::FILD_FLAG:          return "X86ISD::FILD_FLAG";
4958  case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM";
4959  case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM";
4960  case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM";
4961  case X86ISD::FLD:                return "X86ISD::FLD";
4962  case X86ISD::FST:                return "X86ISD::FST";
4963  case X86ISD::FP_GET_RESULT:      return "X86ISD::FP_GET_RESULT";
4964  case X86ISD::FP_SET_RESULT:      return "X86ISD::FP_SET_RESULT";
4965  case X86ISD::CALL:               return "X86ISD::CALL";
4966  case X86ISD::TAILCALL:           return "X86ISD::TAILCALL";
4967  case X86ISD::RDTSC_DAG:          return "X86ISD::RDTSC_DAG";
4968  case X86ISD::CMP:                return "X86ISD::CMP";
4969  case X86ISD::COMI:               return "X86ISD::COMI";
4970  case X86ISD::UCOMI:              return "X86ISD::UCOMI";
4971  case X86ISD::SETCC:              return "X86ISD::SETCC";
4972  case X86ISD::CMOV:               return "X86ISD::CMOV";
4973  case X86ISD::BRCOND:             return "X86ISD::BRCOND";
4974  case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
4975  case X86ISD::REP_STOS:           return "X86ISD::REP_STOS";
4976  case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
4977  case X86ISD::LOAD_PACK:          return "X86ISD::LOAD_PACK";
4978  case X86ISD::LOAD_UA:            return "X86ISD::LOAD_UA";
4979  case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
4980  case X86ISD::Wrapper:            return "X86ISD::Wrapper";
4981  case X86ISD::S2VEC:              return "X86ISD::S2VEC";
4982  case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
4983  case X86ISD::PINSRW:             return "X86ISD::PINSRW";
4984  case X86ISD::FMAX:               return "X86ISD::FMAX";
4985  case X86ISD::FMIN:               return "X86ISD::FMIN";
4986  }
4987}
4988
4989/// isLegalAddressImmediate - Return true if the integer value or
4990/// GlobalValue can be used as the offset of the target addressing mode.
4991bool X86TargetLowering::isLegalAddressImmediate(int64_t V) const {
4992  // X86 allows a sign-extended 32-bit immediate field.
4993  return (V > -(1LL << 32) && V < (1LL << 32)-1);
4994}
4995
4996bool X86TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const {
4997  // GV is 64-bit but displacement field is 32-bit unless we are in small code
4998  // model. Mac OS X happens to support only small PIC code model.
4999  // FIXME: better support for other OS's.
5000  if (Subtarget->is64Bit() && !Subtarget->isTargetDarwin())
5001    return false;
5002  if (Subtarget->isTargetDarwin()) {
5003    Reloc::Model RModel = getTargetMachine().getRelocationModel();
5004    if (RModel == Reloc::Static)
5005      return true;
5006    else if (RModel == Reloc::DynamicNoPIC)
5007      return !(Subtarget->GVRequiresExtraLoad(GV, false));
5008    else
5009      return false;
5010  } else
5011    return true;
5012}
5013
5014/// isShuffleMaskLegal - Targets can use this to indicate that they only
5015/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
5016/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
5017/// are assumed to be legal.
5018bool
5019X86TargetLowering::isShuffleMaskLegal(SDOperand Mask, MVT::ValueType VT) const {
5020  // Only do shuffles on 128-bit vector types for now.
5021  if (MVT::getSizeInBits(VT) == 64) return false;
5022  return (Mask.Val->getNumOperands() <= 4 ||
5023          isSplatMask(Mask.Val)  ||
5024          isPSHUFHW_PSHUFLWMask(Mask.Val) ||
5025          X86::isUNPCKLMask(Mask.Val) ||
5026          X86::isUNPCKL_v_undef_Mask(Mask.Val) ||
5027          X86::isUNPCKHMask(Mask.Val));
5028}
5029
5030bool X86TargetLowering::isVectorClearMaskLegal(std::vector<SDOperand> &BVOps,
5031                                               MVT::ValueType EVT,
5032                                               SelectionDAG &DAG) const {
5033  unsigned NumElts = BVOps.size();
5034  // Only do shuffles on 128-bit vector types for now.
5035  if (MVT::getSizeInBits(EVT) * NumElts == 64) return false;
5036  if (NumElts == 2) return true;
5037  if (NumElts == 4) {
5038    return (isMOVLMask(BVOps)  || isCommutedMOVL(BVOps, true) ||
5039            isSHUFPMask(BVOps) || isCommutedSHUFP(BVOps));
5040  }
5041  return false;
5042}
5043
5044//===----------------------------------------------------------------------===//
5045//                           X86 Scheduler Hooks
5046//===----------------------------------------------------------------------===//
5047
5048MachineBasicBlock *
5049X86TargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI,
5050                                           MachineBasicBlock *BB) {
5051  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
5052  switch (MI->getOpcode()) {
5053  default: assert(false && "Unexpected instr type to insert");
5054  case X86::CMOV_FR32:
5055  case X86::CMOV_FR64:
5056  case X86::CMOV_V4F32:
5057  case X86::CMOV_V2F64:
5058  case X86::CMOV_V2I64: {
5059    // To "insert" a SELECT_CC instruction, we actually have to insert the
5060    // diamond control-flow pattern.  The incoming instruction knows the
5061    // destination vreg to set, the condition code register to branch on, the
5062    // true/false values to select between, and a branch opcode to use.
5063    const BasicBlock *LLVM_BB = BB->getBasicBlock();
5064    ilist<MachineBasicBlock>::iterator It = BB;
5065    ++It;
5066
5067    //  thisMBB:
5068    //  ...
5069    //   TrueVal = ...
5070    //   cmpTY ccX, r1, r2
5071    //   bCC copy1MBB
5072    //   fallthrough --> copy0MBB
5073    MachineBasicBlock *thisMBB = BB;
5074    MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB);
5075    MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB);
5076    unsigned Opc =
5077      X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm());
5078    BuildMI(BB, TII->get(Opc)).addMBB(sinkMBB);
5079    MachineFunction *F = BB->getParent();
5080    F->getBasicBlockList().insert(It, copy0MBB);
5081    F->getBasicBlockList().insert(It, sinkMBB);
5082    // Update machine-CFG edges by first adding all successors of the current
5083    // block to the new block which will contain the Phi node for the select.
5084    for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
5085        e = BB->succ_end(); i != e; ++i)
5086      sinkMBB->addSuccessor(*i);
5087    // Next, remove all successors of the current block, and add the true
5088    // and fallthrough blocks as its successors.
5089    while(!BB->succ_empty())
5090      BB->removeSuccessor(BB->succ_begin());
5091    BB->addSuccessor(copy0MBB);
5092    BB->addSuccessor(sinkMBB);
5093
5094    //  copy0MBB:
5095    //   %FalseValue = ...
5096    //   # fallthrough to sinkMBB
5097    BB = copy0MBB;
5098
5099    // Update machine-CFG edges
5100    BB->addSuccessor(sinkMBB);
5101
5102    //  sinkMBB:
5103    //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
5104    //  ...
5105    BB = sinkMBB;
5106    BuildMI(BB, TII->get(X86::PHI), MI->getOperand(0).getReg())
5107      .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
5108      .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
5109
5110    delete MI;   // The pseudo instruction is gone now.
5111    return BB;
5112  }
5113
5114  case X86::FP_TO_INT16_IN_MEM:
5115  case X86::FP_TO_INT32_IN_MEM:
5116  case X86::FP_TO_INT64_IN_MEM: {
5117    // Change the floating point control register to use "round towards zero"
5118    // mode when truncating to an integer value.
5119    MachineFunction *F = BB->getParent();
5120    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
5121    addFrameReference(BuildMI(BB, TII->get(X86::FNSTCW16m)), CWFrameIdx);
5122
5123    // Load the old value of the high byte of the control word...
5124    unsigned OldCW =
5125      F->getSSARegMap()->createVirtualRegister(X86::GR16RegisterClass);
5126    addFrameReference(BuildMI(BB, TII->get(X86::MOV16rm), OldCW), CWFrameIdx);
5127
5128    // Set the high part to be round to zero...
5129    addFrameReference(BuildMI(BB, TII->get(X86::MOV16mi)), CWFrameIdx)
5130      .addImm(0xC7F);
5131
5132    // Reload the modified control word now...
5133    addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
5134
5135    // Restore the memory image of control word to original value
5136    addFrameReference(BuildMI(BB, TII->get(X86::MOV16mr)), CWFrameIdx)
5137      .addReg(OldCW);
5138
5139    // Get the X86 opcode to use.
5140    unsigned Opc;
5141    switch (MI->getOpcode()) {
5142    default: assert(0 && "illegal opcode!");
5143    case X86::FP_TO_INT16_IN_MEM: Opc = X86::FpIST16m; break;
5144    case X86::FP_TO_INT32_IN_MEM: Opc = X86::FpIST32m; break;
5145    case X86::FP_TO_INT64_IN_MEM: Opc = X86::FpIST64m; break;
5146    }
5147
5148    X86AddressMode AM;
5149    MachineOperand &Op = MI->getOperand(0);
5150    if (Op.isRegister()) {
5151      AM.BaseType = X86AddressMode::RegBase;
5152      AM.Base.Reg = Op.getReg();
5153    } else {
5154      AM.BaseType = X86AddressMode::FrameIndexBase;
5155      AM.Base.FrameIndex = Op.getFrameIndex();
5156    }
5157    Op = MI->getOperand(1);
5158    if (Op.isImmediate())
5159      AM.Scale = Op.getImm();
5160    Op = MI->getOperand(2);
5161    if (Op.isImmediate())
5162      AM.IndexReg = Op.getImm();
5163    Op = MI->getOperand(3);
5164    if (Op.isGlobalAddress()) {
5165      AM.GV = Op.getGlobal();
5166    } else {
5167      AM.Disp = Op.getImm();
5168    }
5169    addFullAddress(BuildMI(BB, TII->get(Opc)), AM)
5170                      .addReg(MI->getOperand(4).getReg());
5171
5172    // Reload the original control word now.
5173    addFrameReference(BuildMI(BB, TII->get(X86::FLDCW16m)), CWFrameIdx);
5174
5175    delete MI;   // The pseudo instruction is gone now.
5176    return BB;
5177  }
5178  }
5179}
5180
5181//===----------------------------------------------------------------------===//
5182//                           X86 Optimization Hooks
5183//===----------------------------------------------------------------------===//
5184
5185void X86TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
5186                                                       uint64_t Mask,
5187                                                       uint64_t &KnownZero,
5188                                                       uint64_t &KnownOne,
5189                                                       unsigned Depth) const {
5190  unsigned Opc = Op.getOpcode();
5191  assert((Opc >= ISD::BUILTIN_OP_END ||
5192          Opc == ISD::INTRINSIC_WO_CHAIN ||
5193          Opc == ISD::INTRINSIC_W_CHAIN ||
5194          Opc == ISD::INTRINSIC_VOID) &&
5195         "Should use MaskedValueIsZero if you don't know whether Op"
5196         " is a target node!");
5197
5198  KnownZero = KnownOne = 0;   // Don't know anything.
5199  switch (Opc) {
5200  default: break;
5201  case X86ISD::SETCC:
5202    KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL);
5203    break;
5204  }
5205}
5206
5207/// getShuffleScalarElt - Returns the scalar element that will make up the ith
5208/// element of the result of the vector shuffle.
5209static SDOperand getShuffleScalarElt(SDNode *N, unsigned i, SelectionDAG &DAG) {
5210  MVT::ValueType VT = N->getValueType(0);
5211  SDOperand PermMask = N->getOperand(2);
5212  unsigned NumElems = PermMask.getNumOperands();
5213  SDOperand V = (i < NumElems) ? N->getOperand(0) : N->getOperand(1);
5214  i %= NumElems;
5215  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR) {
5216    return (i == 0)
5217      ? V.getOperand(0) : DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
5218  } else if (V.getOpcode() == ISD::VECTOR_SHUFFLE) {
5219    SDOperand Idx = PermMask.getOperand(i);
5220    if (Idx.getOpcode() == ISD::UNDEF)
5221      return DAG.getNode(ISD::UNDEF, MVT::getVectorBaseType(VT));
5222    return getShuffleScalarElt(V.Val,cast<ConstantSDNode>(Idx)->getValue(),DAG);
5223  }
5224  return SDOperand();
5225}
5226
5227/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
5228/// node is a GlobalAddress + an offset.
5229static bool isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) {
5230  if (N->getOpcode() == X86ISD::Wrapper) {
5231    if (dyn_cast<GlobalAddressSDNode>(N->getOperand(0))) {
5232      GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal();
5233      return true;
5234    }
5235  } else if (N->getOpcode() == ISD::ADD) {
5236    SDOperand N1 = N->getOperand(0);
5237    SDOperand N2 = N->getOperand(1);
5238    if (isGAPlusOffset(N1.Val, GA, Offset)) {
5239      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2);
5240      if (V) {
5241        Offset += V->getSignExtended();
5242        return true;
5243      }
5244    } else if (isGAPlusOffset(N2.Val, GA, Offset)) {
5245      ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1);
5246      if (V) {
5247        Offset += V->getSignExtended();
5248        return true;
5249      }
5250    }
5251  }
5252  return false;
5253}
5254
5255/// isConsecutiveLoad - Returns true if N is loading from an address of Base
5256/// + Dist * Size.
5257static bool isConsecutiveLoad(SDNode *N, SDNode *Base, int Dist, int Size,
5258                              MachineFrameInfo *MFI) {
5259  if (N->getOperand(0).Val != Base->getOperand(0).Val)
5260    return false;
5261
5262  SDOperand Loc = N->getOperand(1);
5263  SDOperand BaseLoc = Base->getOperand(1);
5264  if (Loc.getOpcode() == ISD::FrameIndex) {
5265    if (BaseLoc.getOpcode() != ISD::FrameIndex)
5266      return false;
5267    int FI  = dyn_cast<FrameIndexSDNode>(Loc)->getIndex();
5268    int BFI = dyn_cast<FrameIndexSDNode>(BaseLoc)->getIndex();
5269    int FS  = MFI->getObjectSize(FI);
5270    int BFS = MFI->getObjectSize(BFI);
5271    if (FS != BFS || FS != Size) return false;
5272    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Size);
5273  } else {
5274    GlobalValue *GV1 = NULL;
5275    GlobalValue *GV2 = NULL;
5276    int64_t Offset1 = 0;
5277    int64_t Offset2 = 0;
5278    bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1);
5279    bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2);
5280    if (isGA1 && isGA2 && GV1 == GV2)
5281      return Offset1 == (Offset2 + Dist*Size);
5282  }
5283
5284  return false;
5285}
5286
5287static bool isBaseAlignment16(SDNode *Base, MachineFrameInfo *MFI,
5288                              const X86Subtarget *Subtarget) {
5289  GlobalValue *GV;
5290  int64_t Offset;
5291  if (isGAPlusOffset(Base, GV, Offset))
5292    return (GV->getAlignment() >= 16 && (Offset % 16) == 0);
5293  else {
5294    assert(Base->getOpcode() == ISD::FrameIndex && "Unexpected base node!");
5295    int BFI = dyn_cast<FrameIndexSDNode>(Base)->getIndex();
5296    if (BFI < 0)
5297      // Fixed objects do not specify alignment, however the offsets are known.
5298      return ((Subtarget->getStackAlignment() % 16) == 0 &&
5299              (MFI->getObjectOffset(BFI) % 16) == 0);
5300    else
5301      return MFI->getObjectAlignment(BFI) >= 16;
5302  }
5303  return false;
5304}
5305
5306
5307/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
5308/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
5309/// if the load addresses are consecutive, non-overlapping, and in the right
5310/// order.
5311static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
5312                                       const X86Subtarget *Subtarget) {
5313  MachineFunction &MF = DAG.getMachineFunction();
5314  MachineFrameInfo *MFI = MF.getFrameInfo();
5315  MVT::ValueType VT = N->getValueType(0);
5316  MVT::ValueType EVT = MVT::getVectorBaseType(VT);
5317  SDOperand PermMask = N->getOperand(2);
5318  int NumElems = (int)PermMask.getNumOperands();
5319  SDNode *Base = NULL;
5320  for (int i = 0; i < NumElems; ++i) {
5321    SDOperand Idx = PermMask.getOperand(i);
5322    if (Idx.getOpcode() == ISD::UNDEF) {
5323      if (!Base) return SDOperand();
5324    } else {
5325      SDOperand Arg =
5326        getShuffleScalarElt(N, cast<ConstantSDNode>(Idx)->getValue(), DAG);
5327      if (!Arg.Val || !ISD::isNON_EXTLoad(Arg.Val))
5328        return SDOperand();
5329      if (!Base)
5330        Base = Arg.Val;
5331      else if (!isConsecutiveLoad(Arg.Val, Base,
5332                                  i, MVT::getSizeInBits(EVT)/8,MFI))
5333        return SDOperand();
5334    }
5335  }
5336
5337  bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
5338  if (isAlign16) {
5339    LoadSDNode *LD = cast<LoadSDNode>(Base);
5340    return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
5341                       LD->getSrcValueOffset());
5342  } else {
5343    // Just use movups, it's shorter.
5344    std::vector<MVT::ValueType> Tys;
5345    Tys.push_back(MVT::v4f32);
5346    Tys.push_back(MVT::Other);
5347    SmallVector<SDOperand, 3> Ops;
5348    Ops.push_back(Base->getOperand(0));
5349    Ops.push_back(Base->getOperand(1));
5350    Ops.push_back(Base->getOperand(2));
5351    return DAG.getNode(ISD::BIT_CONVERT, VT,
5352                       DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
5353  }
5354}
5355
5356/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes.
5357static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
5358                                      const X86Subtarget *Subtarget) {
5359  SDOperand Cond = N->getOperand(0);
5360
5361  // If we have SSE[12] support, try to form min/max nodes.
5362  if (Subtarget->hasSSE2() &&
5363      (N->getValueType(0) == MVT::f32 || N->getValueType(0) == MVT::f64)) {
5364    if (Cond.getOpcode() == ISD::SETCC) {
5365      // Get the LHS/RHS of the select.
5366      SDOperand LHS = N->getOperand(1);
5367      SDOperand RHS = N->getOperand(2);
5368      ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
5369
5370      unsigned Opcode = 0;
5371      if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
5372        switch (CC) {
5373        default: break;
5374        case ISD::SETOLE: // (X <= Y) ? X : Y -> min
5375        case ISD::SETULE:
5376        case ISD::SETLE:
5377          if (!UnsafeFPMath) break;
5378          // FALL THROUGH.
5379        case ISD::SETOLT:  // (X olt/lt Y) ? X : Y -> min
5380        case ISD::SETLT:
5381          Opcode = X86ISD::FMIN;
5382          break;
5383
5384        case ISD::SETOGT: // (X > Y) ? X : Y -> max
5385        case ISD::SETUGT:
5386        case ISD::SETGT:
5387          if (!UnsafeFPMath) break;
5388          // FALL THROUGH.
5389        case ISD::SETUGE:  // (X uge/ge Y) ? X : Y -> max
5390        case ISD::SETGE:
5391          Opcode = X86ISD::FMAX;
5392          break;
5393        }
5394      } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
5395        switch (CC) {
5396        default: break;
5397        case ISD::SETOGT: // (X > Y) ? Y : X -> min
5398        case ISD::SETUGT:
5399        case ISD::SETGT:
5400          if (!UnsafeFPMath) break;
5401          // FALL THROUGH.
5402        case ISD::SETUGE:  // (X uge/ge Y) ? Y : X -> min
5403        case ISD::SETGE:
5404          Opcode = X86ISD::FMIN;
5405          break;
5406
5407        case ISD::SETOLE:   // (X <= Y) ? Y : X -> max
5408        case ISD::SETULE:
5409        case ISD::SETLE:
5410          if (!UnsafeFPMath) break;
5411          // FALL THROUGH.
5412        case ISD::SETOLT:   // (X olt/lt Y) ? Y : X -> max
5413        case ISD::SETLT:
5414          Opcode = X86ISD::FMAX;
5415          break;
5416        }
5417      }
5418
5419      if (Opcode)
5420        return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
5421    }
5422
5423  }
5424
5425  return SDOperand();
5426}
5427
5428
5429SDOperand X86TargetLowering::PerformDAGCombine(SDNode *N,
5430                                               DAGCombinerInfo &DCI) const {
5431  SelectionDAG &DAG = DCI.DAG;
5432  switch (N->getOpcode()) {
5433  default: break;
5434  case ISD::VECTOR_SHUFFLE:
5435    return PerformShuffleCombine(N, DAG, Subtarget);
5436  case ISD::SELECT:
5437    return PerformSELECTCombine(N, DAG, Subtarget);
5438  }
5439
5440  return SDOperand();
5441}
5442
5443//===----------------------------------------------------------------------===//
5444//                           X86 Inline Assembly Support
5445//===----------------------------------------------------------------------===//
5446
5447/// getConstraintType - Given a constraint letter, return the type of
5448/// constraint it is for this target.
5449X86TargetLowering::ConstraintType
5450X86TargetLowering::getConstraintType(char ConstraintLetter) const {
5451  switch (ConstraintLetter) {
5452  case 'A':
5453  case 'r':
5454  case 'R':
5455  case 'l':
5456  case 'q':
5457  case 'Q':
5458  case 'x':
5459  case 'Y':
5460    return C_RegisterClass;
5461  default: return TargetLowering::getConstraintType(ConstraintLetter);
5462  }
5463}
5464
5465/// isOperandValidForConstraint - Return the specified operand (possibly
5466/// modified) if the specified SDOperand is valid for the specified target
5467/// constraint letter, otherwise return null.
5468SDOperand X86TargetLowering::
5469isOperandValidForConstraint(SDOperand Op, char Constraint, SelectionDAG &DAG) {
5470  switch (Constraint) {
5471  default: break;
5472  case 'i':
5473    // Literal immediates are always ok.
5474    if (isa<ConstantSDNode>(Op)) return Op;
5475
5476    // If we are in non-pic codegen mode, we allow the address of a global to
5477    // be used with 'i'.
5478    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5479      if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
5480        return SDOperand(0, 0);
5481
5482      if (GA->getOpcode() != ISD::TargetGlobalAddress)
5483        Op = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
5484                                        GA->getOffset());
5485      return Op;
5486    }
5487
5488    // Otherwise, not valid for this mode.
5489    return SDOperand(0, 0);
5490  }
5491  return TargetLowering::isOperandValidForConstraint(Op, Constraint, DAG);
5492}
5493
5494
5495std::vector<unsigned> X86TargetLowering::
5496getRegClassForInlineAsmConstraint(const std::string &Constraint,
5497                                  MVT::ValueType VT) const {
5498  if (Constraint.size() == 1) {
5499    // FIXME: not handling fp-stack yet!
5500    // FIXME: not handling MMX registers yet ('y' constraint).
5501    switch (Constraint[0]) {      // GCC X86 Constraint Letters
5502    default: break;  // Unknown constraint letter
5503    case 'A':   // EAX/EDX
5504      if (VT == MVT::i32 || VT == MVT::i64)
5505        return make_vector<unsigned>(X86::EAX, X86::EDX, 0);
5506      break;
5507    case 'r':   // GENERAL_REGS
5508    case 'R':   // LEGACY_REGS
5509      if (VT == MVT::i32)
5510        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
5511                                     X86::ESI, X86::EDI, X86::EBP, X86::ESP, 0);
5512      else if (VT == MVT::i16)
5513        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
5514                                     X86::SI, X86::DI, X86::BP, X86::SP, 0);
5515      else if (VT == MVT::i8)
5516        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
5517      break;
5518    case 'l':   // INDEX_REGS
5519      if (VT == MVT::i32)
5520        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
5521                                     X86::ESI, X86::EDI, X86::EBP, 0);
5522      else if (VT == MVT::i16)
5523        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX,
5524                                     X86::SI, X86::DI, X86::BP, 0);
5525      else if (VT == MVT::i8)
5526        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
5527      break;
5528    case 'q':   // Q_REGS (GENERAL_REGS in 64-bit mode)
5529    case 'Q':   // Q_REGS
5530      if (VT == MVT::i32)
5531        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
5532      else if (VT == MVT::i16)
5533        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
5534      else if (VT == MVT::i8)
5535        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::DL, 0);
5536        break;
5537    case 'x':   // SSE_REGS if SSE1 allowed
5538      if (Subtarget->hasSSE1())
5539        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
5540                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
5541                                     0);
5542      return std::vector<unsigned>();
5543    case 'Y':   // SSE_REGS if SSE2 allowed
5544      if (Subtarget->hasSSE2())
5545        return make_vector<unsigned>(X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
5546                                     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7,
5547                                     0);
5548      return std::vector<unsigned>();
5549    }
5550  }
5551
5552  return std::vector<unsigned>();
5553}
5554
5555std::pair<unsigned, const TargetRegisterClass*>
5556X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
5557                                                MVT::ValueType VT) const {
5558  // Use the default implementation in TargetLowering to convert the register
5559  // constraint into a member of a register class.
5560  std::pair<unsigned, const TargetRegisterClass*> Res;
5561  Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
5562
5563  // Not found as a standard register?
5564  if (Res.second == 0) {
5565    // GCC calls "st(0)" just plain "st".
5566    if (StringsEqualNoCase("{st}", Constraint)) {
5567      Res.first = X86::ST0;
5568      Res.second = X86::RSTRegisterClass;
5569    }
5570
5571    return Res;
5572  }
5573
5574  // Otherwise, check to see if this is a register class of the wrong value
5575  // type.  For example, we want to map "{ax},i32" -> {eax}, we don't want it to
5576  // turn into {ax},{dx}.
5577  if (Res.second->hasType(VT))
5578    return Res;   // Correct type already, nothing to do.
5579
5580  // All of the single-register GCC register classes map their values onto
5581  // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp".  If we
5582  // really want an 8-bit or 32-bit register, map to the appropriate register
5583  // class and return the appropriate register.
5584  if (Res.second != X86::GR16RegisterClass)
5585    return Res;
5586
5587  if (VT == MVT::i8) {
5588    unsigned DestReg = 0;
5589    switch (Res.first) {
5590    default: break;
5591    case X86::AX: DestReg = X86::AL; break;
5592    case X86::DX: DestReg = X86::DL; break;
5593    case X86::CX: DestReg = X86::CL; break;
5594    case X86::BX: DestReg = X86::BL; break;
5595    }
5596    if (DestReg) {
5597      Res.first = DestReg;
5598      Res.second = Res.second = X86::GR8RegisterClass;
5599    }
5600  } else if (VT == MVT::i32) {
5601    unsigned DestReg = 0;
5602    switch (Res.first) {
5603    default: break;
5604    case X86::AX: DestReg = X86::EAX; break;
5605    case X86::DX: DestReg = X86::EDX; break;
5606    case X86::CX: DestReg = X86::ECX; break;
5607    case X86::BX: DestReg = X86::EBX; break;
5608    case X86::SI: DestReg = X86::ESI; break;
5609    case X86::DI: DestReg = X86::EDI; break;
5610    case X86::BP: DestReg = X86::EBP; break;
5611    case X86::SP: DestReg = X86::ESP; break;
5612    }
5613    if (DestReg) {
5614      Res.first = DestReg;
5615      Res.second = Res.second = X86::GR32RegisterClass;
5616    }
5617  } else if (VT == MVT::i64) {
5618    unsigned DestReg = 0;
5619    switch (Res.first) {
5620    default: break;
5621    case X86::AX: DestReg = X86::RAX; break;
5622    case X86::DX: DestReg = X86::RDX; break;
5623    case X86::CX: DestReg = X86::RCX; break;
5624    case X86::BX: DestReg = X86::RBX; break;
5625    case X86::SI: DestReg = X86::RSI; break;
5626    case X86::DI: DestReg = X86::RDI; break;
5627    case X86::BP: DestReg = X86::RBP; break;
5628    case X86::SP: DestReg = X86::RSP; break;
5629    }
5630    if (DestReg) {
5631      Res.first = DestReg;
5632      Res.second = Res.second = X86::GR64RegisterClass;
5633    }
5634  }
5635
5636  return Res;
5637}
5638