X86ISelLowering.cpp revision 607a7ab3da72a2eb53553a520507cbb8068dd1d8
1f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===// 2f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// 3f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// The LLVM Compiler Infrastructure 4f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// 5f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// This file is distributed under the University of Illinois Open Source 6f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// License. See LICENSE.TXT for details. 7f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// 8f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//===----------------------------------------------------------------------===// 9f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// 10f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// This file defines the interfaces that X86 uses to lower LLVM code into a 11f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// selection DAG. 12f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// 13f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette//===----------------------------------------------------------------------===// 14f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 15f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#define DEBUG_TYPE "x86-isel" 16f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86.h" 17f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86InstrBuilder.h" 18f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86ISelLowering.h" 19f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86TargetMachine.h" 20f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "X86TargetObjectFile.h" 21f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CallingConv.h" 22f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Constants.h" 23f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/DerivedTypes.h" 24f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/GlobalAlias.h" 25f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/GlobalVariable.h" 26f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Function.h" 27f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Instructions.h" 28f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Intrinsics.h" 29f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/LLVMContext.h" 30f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/MachineFrameInfo.h" 31f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/MachineFunction.h" 32f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/MachineInstrBuilder.h" 33f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/MachineJumpTableInfo.h" 34f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/MachineModuleInfo.h" 35f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/MachineRegisterInfo.h" 36f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/CodeGen/PseudoSourceValue.h" 37f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/MC/MCAsmInfo.h" 38f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/MC/MCContext.h" 39f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/MC/MCExpr.h" 40f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/MC/MCSymbol.h" 41f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/ADT/BitVector.h" 42f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/ADT/SmallSet.h" 43f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/ADT/Statistic.h" 44f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/ADT/StringExtras.h" 45f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/ADT/VectorExtras.h" 46f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Support/CommandLine.h" 47f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Support/Debug.h" 48f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Support/Dwarf.h" 49f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Support/ErrorHandling.h" 50f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Support/MathExtras.h" 51f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette#include "llvm/Support/raw_ostream.h" 52f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viveretteusing namespace llvm; 53f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viveretteusing namespace dwarf; 54f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 55f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan ViveretteSTATISTIC(NumTailCalls, "Number of tail calls"); 56f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 57f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverettestatic cl::opt<bool> 58f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan ViveretteDisableMMX("disable-mmx", cl::Hidden, cl::desc("Disable use of MMX")); 59f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 60f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// Disable16Bit - 16-bit operations typically have a larger encoding than 61f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// corresponding 32-bit instructions, and 16-bit code is slow on some 62f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// processors. This is an experimental flag to disable 16-bit operations 63f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// (which forces them to be Legalized to 32-bit operations). 64f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverettestatic cl::opt<bool> 65f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan ViveretteDisable16Bit("disable-16bit", cl::Hidden, 66f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette cl::desc("Disable use of 16-bit instructions")); 67f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 68f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette// Forward declarations. 69f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverettestatic SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, 70f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette SDValue V2); 71f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 72f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverettestatic TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { 73f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette switch (TM.getSubtarget<X86Subtarget>().TargetType) { 74f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette default: llvm_unreachable("unknown subtarget type"); 75f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette case X86Subtarget::isDarwin: 76f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette if (TM.getSubtarget<X86Subtarget>().is64Bit()) 77f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette return new X8664_MachoTargetObjectFile(); 78f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette return new TargetLoweringObjectFileMachO(); 79f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette case X86Subtarget::isELF: 80f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette if (TM.getSubtarget<X86Subtarget>().is64Bit()) 81f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette return new X8664_ELFTargetObjectFile(TM); 82f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette return new X8632_ELFTargetObjectFile(TM); 83f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette case X86Subtarget::isMingw: 84f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette case X86Subtarget::isCygwin: 85f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette case X86Subtarget::isWindows: 86f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette return new TargetLoweringObjectFileCOFF(); 87f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette } 88f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette} 89f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 90f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan ViveretteX86TargetLowering::X86TargetLowering(X86TargetMachine &TM) 91f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette : TargetLowering(TM, createTLOF(TM)) { 92f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette Subtarget = &TM.getSubtarget<X86Subtarget>(); 93f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette X86ScalarSSEf64 = Subtarget->hasSSE2(); 94f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette X86ScalarSSEf32 = Subtarget->hasSSE1(); 95f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette X86StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 96f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 97f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette RegInfo = TM.getRegisterInfo(); 98f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette TD = getTargetData(); 99f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 100f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette // Set up the TargetLowering object. 101f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 102f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette // X86 is weird, it always uses i8 for shift amounts and setcc results. 103f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setShiftAmountType(MVT::i8); 104f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setBooleanContents(ZeroOrOneBooleanContent); 105f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setSchedulingPreference(SchedulingForRegPressure); 106f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setStackPointerRegisterToSaveRestore(X86StackPtr); 107f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 108f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette if (Subtarget->isTargetDarwin()) { 109f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp. 110f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setUseUnderscoreSetJmp(false); 111f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setUseUnderscoreLongJmp(false); 112f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette } else if (Subtarget->isTargetMingw()) { 113f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette // MS runtime is weird: it exports _setjmp, but longjmp! 114f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setUseUnderscoreSetJmp(true); 115f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setUseUnderscoreLongJmp(false); 116f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette } else { 117f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setUseUnderscoreSetJmp(true); 118f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setUseUnderscoreLongJmp(true); 119f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette } 120f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 121f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette // Set up the register classes. 122f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette addRegisterClass(MVT::i8, X86::GR8RegisterClass); 123f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette if (!Disable16Bit) 124f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette addRegisterClass(MVT::i16, X86::GR16RegisterClass); 125f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette addRegisterClass(MVT::i32, X86::GR32RegisterClass); 126f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette if (Subtarget->is64Bit()) 127f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette addRegisterClass(MVT::i64, X86::GR64RegisterClass); 128f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 129f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 130f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette 131f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette // We don't accept any truncstore of integer registers. 132f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setTruncStoreAction(MVT::i64, MVT::i32, Expand); 133f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette if (!Disable16Bit) 134f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setTruncStoreAction(MVT::i64, MVT::i16, Expand); 135f11879ae94e7598cb6ae59fdc13104947b66e3e6Alan Viverette setTruncStoreAction(MVT::i64, MVT::i8 , Expand); 136 if (!Disable16Bit) 137 setTruncStoreAction(MVT::i32, MVT::i16, Expand); 138 setTruncStoreAction(MVT::i32, MVT::i8 , Expand); 139 setTruncStoreAction(MVT::i16, MVT::i8, Expand); 140 141 // SETOEQ and SETUNE require checking two conditions. 142 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); 143 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); 144 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand); 145 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand); 146 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); 147 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand); 148 149 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this 150 // operation. 151 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote); 152 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote); 153 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote); 154 155 if (Subtarget->is64Bit()) { 156 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote); 157 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Expand); 158 } else if (!UseSoftFloat) { 159 if (X86ScalarSSEf64) { 160 // We have an impenetrably clever algorithm for ui64->double only. 161 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom); 162 } 163 // We have an algorithm for SSE2, and we turn this into a 64-bit 164 // FILD for other targets. 165 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom); 166 } 167 168 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have 169 // this operation. 170 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); 171 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); 172 173 if (!UseSoftFloat) { 174 // SSE has no i16 to fp conversion, only i32 175 if (X86ScalarSSEf32) { 176 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 177 // f32 and f64 cases are Legal, f80 case is not 178 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 179 } else { 180 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom); 181 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom); 182 } 183 } else { 184 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); 185 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote); 186 } 187 188 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64 189 // are Legal, f80 is custom lowered. 190 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom); 191 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom); 192 193 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have 194 // this operation. 195 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote); 196 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote); 197 198 if (X86ScalarSSEf32) { 199 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote); 200 // f32 and f64 cases are Legal, f80 case is not 201 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 202 } else { 203 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom); 204 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom); 205 } 206 207 // Handle FP_TO_UINT by promoting the destination to a larger signed 208 // conversion. 209 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote); 210 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote); 211 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote); 212 213 if (Subtarget->is64Bit()) { 214 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand); 215 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote); 216 } else if (!UseSoftFloat) { 217 if (X86ScalarSSEf32 && !Subtarget->hasSSE3()) 218 // Expand FP_TO_UINT into a select. 219 // FIXME: We would like to use a Custom expander here eventually to do 220 // the optimal thing for SSE vs. the default expansion in the legalizer. 221 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand); 222 else 223 // With SSE3 we can use fisttpll to convert to a signed i64; without 224 // SSE, we're stuck with a fistpll. 225 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom); 226 } 227 228 // TODO: when we have SSE, these could be more efficient, by using movd/movq. 229 if (!X86ScalarSSEf64) { 230 setOperationAction(ISD::BIT_CONVERT , MVT::f32 , Expand); 231 setOperationAction(ISD::BIT_CONVERT , MVT::i32 , Expand); 232 } 233 234 // Scalar integer divide and remainder are lowered to use operations that 235 // produce two results, to match the available instructions. This exposes 236 // the two-result form to trivial CSE, which is able to combine x/y and x%y 237 // into a single instruction. 238 // 239 // Scalar integer multiply-high is also lowered to use two-result 240 // operations, to match the available instructions. However, plain multiply 241 // (low) operations are left as Legal, as there are single-result 242 // instructions for this in x86. Using the two-result multiply instructions 243 // when both high and low results are needed must be arranged by dagcombine. 244 setOperationAction(ISD::MULHS , MVT::i8 , Expand); 245 setOperationAction(ISD::MULHU , MVT::i8 , Expand); 246 setOperationAction(ISD::SDIV , MVT::i8 , Expand); 247 setOperationAction(ISD::UDIV , MVT::i8 , Expand); 248 setOperationAction(ISD::SREM , MVT::i8 , Expand); 249 setOperationAction(ISD::UREM , MVT::i8 , Expand); 250 setOperationAction(ISD::MULHS , MVT::i16 , Expand); 251 setOperationAction(ISD::MULHU , MVT::i16 , Expand); 252 setOperationAction(ISD::SDIV , MVT::i16 , Expand); 253 setOperationAction(ISD::UDIV , MVT::i16 , Expand); 254 setOperationAction(ISD::SREM , MVT::i16 , Expand); 255 setOperationAction(ISD::UREM , MVT::i16 , Expand); 256 setOperationAction(ISD::MULHS , MVT::i32 , Expand); 257 setOperationAction(ISD::MULHU , MVT::i32 , Expand); 258 setOperationAction(ISD::SDIV , MVT::i32 , Expand); 259 setOperationAction(ISD::UDIV , MVT::i32 , Expand); 260 setOperationAction(ISD::SREM , MVT::i32 , Expand); 261 setOperationAction(ISD::UREM , MVT::i32 , Expand); 262 setOperationAction(ISD::MULHS , MVT::i64 , Expand); 263 setOperationAction(ISD::MULHU , MVT::i64 , Expand); 264 setOperationAction(ISD::SDIV , MVT::i64 , Expand); 265 setOperationAction(ISD::UDIV , MVT::i64 , Expand); 266 setOperationAction(ISD::SREM , MVT::i64 , Expand); 267 setOperationAction(ISD::UREM , MVT::i64 , Expand); 268 269 setOperationAction(ISD::BR_JT , MVT::Other, Expand); 270 setOperationAction(ISD::BRCOND , MVT::Other, Custom); 271 setOperationAction(ISD::BR_CC , MVT::Other, Expand); 272 setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); 273 if (Subtarget->is64Bit()) 274 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); 275 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); 276 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); 277 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); 278 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand); 279 setOperationAction(ISD::FREM , MVT::f32 , Expand); 280 setOperationAction(ISD::FREM , MVT::f64 , Expand); 281 setOperationAction(ISD::FREM , MVT::f80 , Expand); 282 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); 283 284 setOperationAction(ISD::CTPOP , MVT::i8 , Expand); 285 setOperationAction(ISD::CTTZ , MVT::i8 , Custom); 286 setOperationAction(ISD::CTLZ , MVT::i8 , Custom); 287 setOperationAction(ISD::CTPOP , MVT::i16 , Expand); 288 if (Disable16Bit) { 289 setOperationAction(ISD::CTTZ , MVT::i16 , Expand); 290 setOperationAction(ISD::CTLZ , MVT::i16 , Expand); 291 } else { 292 setOperationAction(ISD::CTTZ , MVT::i16 , Custom); 293 setOperationAction(ISD::CTLZ , MVT::i16 , Custom); 294 } 295 setOperationAction(ISD::CTPOP , MVT::i32 , Expand); 296 setOperationAction(ISD::CTTZ , MVT::i32 , Custom); 297 setOperationAction(ISD::CTLZ , MVT::i32 , Custom); 298 if (Subtarget->is64Bit()) { 299 setOperationAction(ISD::CTPOP , MVT::i64 , Expand); 300 setOperationAction(ISD::CTTZ , MVT::i64 , Custom); 301 setOperationAction(ISD::CTLZ , MVT::i64 , Custom); 302 } 303 304 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); 305 setOperationAction(ISD::BSWAP , MVT::i16 , Expand); 306 307 // These should be promoted to a larger select which is supported. 308 setOperationAction(ISD::SELECT , MVT::i1 , Promote); 309 // X86 wants to expand cmov itself. 310 setOperationAction(ISD::SELECT , MVT::i8 , Custom); 311 if (Disable16Bit) 312 setOperationAction(ISD::SELECT , MVT::i16 , Expand); 313 else 314 setOperationAction(ISD::SELECT , MVT::i16 , Custom); 315 setOperationAction(ISD::SELECT , MVT::i32 , Custom); 316 setOperationAction(ISD::SELECT , MVT::f32 , Custom); 317 setOperationAction(ISD::SELECT , MVT::f64 , Custom); 318 setOperationAction(ISD::SELECT , MVT::f80 , Custom); 319 setOperationAction(ISD::SETCC , MVT::i8 , Custom); 320 if (Disable16Bit) 321 setOperationAction(ISD::SETCC , MVT::i16 , Expand); 322 else 323 setOperationAction(ISD::SETCC , MVT::i16 , Custom); 324 setOperationAction(ISD::SETCC , MVT::i32 , Custom); 325 setOperationAction(ISD::SETCC , MVT::f32 , Custom); 326 setOperationAction(ISD::SETCC , MVT::f64 , Custom); 327 setOperationAction(ISD::SETCC , MVT::f80 , Custom); 328 if (Subtarget->is64Bit()) { 329 setOperationAction(ISD::SELECT , MVT::i64 , Custom); 330 setOperationAction(ISD::SETCC , MVT::i64 , Custom); 331 } 332 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); 333 334 // Darwin ABI issue. 335 setOperationAction(ISD::ConstantPool , MVT::i32 , Custom); 336 setOperationAction(ISD::JumpTable , MVT::i32 , Custom); 337 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom); 338 setOperationAction(ISD::GlobalTLSAddress, MVT::i32 , Custom); 339 if (Subtarget->is64Bit()) 340 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 341 setOperationAction(ISD::ExternalSymbol , MVT::i32 , Custom); 342 setOperationAction(ISD::BlockAddress , MVT::i32 , Custom); 343 if (Subtarget->is64Bit()) { 344 setOperationAction(ISD::ConstantPool , MVT::i64 , Custom); 345 setOperationAction(ISD::JumpTable , MVT::i64 , Custom); 346 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom); 347 setOperationAction(ISD::ExternalSymbol, MVT::i64 , Custom); 348 setOperationAction(ISD::BlockAddress , MVT::i64 , Custom); 349 } 350 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86) 351 setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom); 352 setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom); 353 setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom); 354 if (Subtarget->is64Bit()) { 355 setOperationAction(ISD::SHL_PARTS , MVT::i64 , Custom); 356 setOperationAction(ISD::SRA_PARTS , MVT::i64 , Custom); 357 setOperationAction(ISD::SRL_PARTS , MVT::i64 , Custom); 358 } 359 360 if (Subtarget->hasSSE1()) 361 setOperationAction(ISD::PREFETCH , MVT::Other, Legal); 362 363 if (!Subtarget->hasSSE2()) 364 setOperationAction(ISD::MEMBARRIER , MVT::Other, Expand); 365 366 // Expand certain atomics 367 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Custom); 368 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Custom); 369 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom); 370 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 371 372 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Custom); 373 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Custom); 374 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 375 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 376 377 if (!Subtarget->is64Bit()) { 378 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 379 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 380 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 381 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 382 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 383 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); 384 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 385 } 386 387 // FIXME - use subtarget debug flags 388 if (!Subtarget->isTargetDarwin() && 389 !Subtarget->isTargetELF() && 390 !Subtarget->isTargetCygMing()) { 391 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); 392 } 393 394 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 395 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 396 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 397 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 398 if (Subtarget->is64Bit()) { 399 setExceptionPointerRegister(X86::RAX); 400 setExceptionSelectorRegister(X86::RDX); 401 } else { 402 setExceptionPointerRegister(X86::EAX); 403 setExceptionSelectorRegister(X86::EDX); 404 } 405 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); 406 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); 407 408 setOperationAction(ISD::TRAMPOLINE, MVT::Other, Custom); 409 410 setOperationAction(ISD::TRAP, MVT::Other, Legal); 411 412 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 413 setOperationAction(ISD::VASTART , MVT::Other, Custom); 414 setOperationAction(ISD::VAEND , MVT::Other, Expand); 415 if (Subtarget->is64Bit()) { 416 setOperationAction(ISD::VAARG , MVT::Other, Custom); 417 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 418 } else { 419 setOperationAction(ISD::VAARG , MVT::Other, Expand); 420 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 421 } 422 423 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 424 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 425 if (Subtarget->is64Bit()) 426 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); 427 if (Subtarget->isTargetCygMing()) 428 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 429 else 430 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 431 432 if (!UseSoftFloat && X86ScalarSSEf64) { 433 // f32 and f64 use SSE. 434 // Set up the FP register classes. 435 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 436 addRegisterClass(MVT::f64, X86::FR64RegisterClass); 437 438 // Use ANDPD to simulate FABS. 439 setOperationAction(ISD::FABS , MVT::f64, Custom); 440 setOperationAction(ISD::FABS , MVT::f32, Custom); 441 442 // Use XORP to simulate FNEG. 443 setOperationAction(ISD::FNEG , MVT::f64, Custom); 444 setOperationAction(ISD::FNEG , MVT::f32, Custom); 445 446 // Use ANDPD and ORPD to simulate FCOPYSIGN. 447 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 448 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 449 450 // We don't support sin/cos/fmod 451 setOperationAction(ISD::FSIN , MVT::f64, Expand); 452 setOperationAction(ISD::FCOS , MVT::f64, Expand); 453 setOperationAction(ISD::FSIN , MVT::f32, Expand); 454 setOperationAction(ISD::FCOS , MVT::f32, Expand); 455 456 // Expand FP immediates into loads from the stack, except for the special 457 // cases we handle. 458 addLegalFPImmediate(APFloat(+0.0)); // xorpd 459 addLegalFPImmediate(APFloat(+0.0f)); // xorps 460 } else if (!UseSoftFloat && X86ScalarSSEf32) { 461 // Use SSE for f32, x87 for f64. 462 // Set up the FP register classes. 463 addRegisterClass(MVT::f32, X86::FR32RegisterClass); 464 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 465 466 // Use ANDPS to simulate FABS. 467 setOperationAction(ISD::FABS , MVT::f32, Custom); 468 469 // Use XORP to simulate FNEG. 470 setOperationAction(ISD::FNEG , MVT::f32, Custom); 471 472 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 473 474 // Use ANDPS and ORPS to simulate FCOPYSIGN. 475 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 476 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 477 478 // We don't support sin/cos/fmod 479 setOperationAction(ISD::FSIN , MVT::f32, Expand); 480 setOperationAction(ISD::FCOS , MVT::f32, Expand); 481 482 // Special cases we handle for FP constants. 483 addLegalFPImmediate(APFloat(+0.0f)); // xorps 484 addLegalFPImmediate(APFloat(+0.0)); // FLD0 485 addLegalFPImmediate(APFloat(+1.0)); // FLD1 486 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 487 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 488 489 if (!UnsafeFPMath) { 490 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 491 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 492 } 493 } else if (!UseSoftFloat) { 494 // f32 and f64 in x87. 495 // Set up the FP register classes. 496 addRegisterClass(MVT::f64, X86::RFP64RegisterClass); 497 addRegisterClass(MVT::f32, X86::RFP32RegisterClass); 498 499 setOperationAction(ISD::UNDEF, MVT::f64, Expand); 500 setOperationAction(ISD::UNDEF, MVT::f32, Expand); 501 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 502 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 503 504 if (!UnsafeFPMath) { 505 setOperationAction(ISD::FSIN , MVT::f64 , Expand); 506 setOperationAction(ISD::FCOS , MVT::f64 , Expand); 507 } 508 addLegalFPImmediate(APFloat(+0.0)); // FLD0 509 addLegalFPImmediate(APFloat(+1.0)); // FLD1 510 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS 511 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS 512 addLegalFPImmediate(APFloat(+0.0f)); // FLD0 513 addLegalFPImmediate(APFloat(+1.0f)); // FLD1 514 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS 515 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS 516 } 517 518 // Long double always uses X87. 519 if (!UseSoftFloat) { 520 addRegisterClass(MVT::f80, X86::RFP80RegisterClass); 521 setOperationAction(ISD::UNDEF, MVT::f80, Expand); 522 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); 523 { 524 bool ignored; 525 APFloat TmpFlt(+0.0); 526 TmpFlt.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 527 &ignored); 528 addLegalFPImmediate(TmpFlt); // FLD0 529 TmpFlt.changeSign(); 530 addLegalFPImmediate(TmpFlt); // FLD0/FCHS 531 APFloat TmpFlt2(+1.0); 532 TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven, 533 &ignored); 534 addLegalFPImmediate(TmpFlt2); // FLD1 535 TmpFlt2.changeSign(); 536 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS 537 } 538 539 if (!UnsafeFPMath) { 540 setOperationAction(ISD::FSIN , MVT::f80 , Expand); 541 setOperationAction(ISD::FCOS , MVT::f80 , Expand); 542 } 543 } 544 545 // Always use a library call for pow. 546 setOperationAction(ISD::FPOW , MVT::f32 , Expand); 547 setOperationAction(ISD::FPOW , MVT::f64 , Expand); 548 setOperationAction(ISD::FPOW , MVT::f80 , Expand); 549 550 setOperationAction(ISD::FLOG, MVT::f80, Expand); 551 setOperationAction(ISD::FLOG2, MVT::f80, Expand); 552 setOperationAction(ISD::FLOG10, MVT::f80, Expand); 553 setOperationAction(ISD::FEXP, MVT::f80, Expand); 554 setOperationAction(ISD::FEXP2, MVT::f80, Expand); 555 556 // First set operation action for all vector types to either promote 557 // (for widening) or expand (for scalarization). Then we will selectively 558 // turn on ones that can be effectively codegen'd. 559 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 560 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 561 setOperationAction(ISD::ADD , (MVT::SimpleValueType)VT, Expand); 562 setOperationAction(ISD::SUB , (MVT::SimpleValueType)VT, Expand); 563 setOperationAction(ISD::FADD, (MVT::SimpleValueType)VT, Expand); 564 setOperationAction(ISD::FNEG, (MVT::SimpleValueType)VT, Expand); 565 setOperationAction(ISD::FSUB, (MVT::SimpleValueType)VT, Expand); 566 setOperationAction(ISD::MUL , (MVT::SimpleValueType)VT, Expand); 567 setOperationAction(ISD::FMUL, (MVT::SimpleValueType)VT, Expand); 568 setOperationAction(ISD::SDIV, (MVT::SimpleValueType)VT, Expand); 569 setOperationAction(ISD::UDIV, (MVT::SimpleValueType)VT, Expand); 570 setOperationAction(ISD::FDIV, (MVT::SimpleValueType)VT, Expand); 571 setOperationAction(ISD::SREM, (MVT::SimpleValueType)VT, Expand); 572 setOperationAction(ISD::UREM, (MVT::SimpleValueType)VT, Expand); 573 setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Expand); 574 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::SimpleValueType)VT, Expand); 575 setOperationAction(ISD::EXTRACT_VECTOR_ELT,(MVT::SimpleValueType)VT,Expand); 576 setOperationAction(ISD::EXTRACT_SUBVECTOR,(MVT::SimpleValueType)VT,Expand); 577 setOperationAction(ISD::INSERT_VECTOR_ELT,(MVT::SimpleValueType)VT, Expand); 578 setOperationAction(ISD::FABS, (MVT::SimpleValueType)VT, Expand); 579 setOperationAction(ISD::FSIN, (MVT::SimpleValueType)VT, Expand); 580 setOperationAction(ISD::FCOS, (MVT::SimpleValueType)VT, Expand); 581 setOperationAction(ISD::FREM, (MVT::SimpleValueType)VT, Expand); 582 setOperationAction(ISD::FPOWI, (MVT::SimpleValueType)VT, Expand); 583 setOperationAction(ISD::FSQRT, (MVT::SimpleValueType)VT, Expand); 584 setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); 585 setOperationAction(ISD::SMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 586 setOperationAction(ISD::UMUL_LOHI, (MVT::SimpleValueType)VT, Expand); 587 setOperationAction(ISD::SDIVREM, (MVT::SimpleValueType)VT, Expand); 588 setOperationAction(ISD::UDIVREM, (MVT::SimpleValueType)VT, Expand); 589 setOperationAction(ISD::FPOW, (MVT::SimpleValueType)VT, Expand); 590 setOperationAction(ISD::CTPOP, (MVT::SimpleValueType)VT, Expand); 591 setOperationAction(ISD::CTTZ, (MVT::SimpleValueType)VT, Expand); 592 setOperationAction(ISD::CTLZ, (MVT::SimpleValueType)VT, Expand); 593 setOperationAction(ISD::SHL, (MVT::SimpleValueType)VT, Expand); 594 setOperationAction(ISD::SRA, (MVT::SimpleValueType)VT, Expand); 595 setOperationAction(ISD::SRL, (MVT::SimpleValueType)VT, Expand); 596 setOperationAction(ISD::ROTL, (MVT::SimpleValueType)VT, Expand); 597 setOperationAction(ISD::ROTR, (MVT::SimpleValueType)VT, Expand); 598 setOperationAction(ISD::BSWAP, (MVT::SimpleValueType)VT, Expand); 599 setOperationAction(ISD::VSETCC, (MVT::SimpleValueType)VT, Expand); 600 setOperationAction(ISD::FLOG, (MVT::SimpleValueType)VT, Expand); 601 setOperationAction(ISD::FLOG2, (MVT::SimpleValueType)VT, Expand); 602 setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); 603 setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand); 604 setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand); 605 setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand); 606 setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand); 607 setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand); 608 setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand); 609 setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand); 610 setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand); 611 setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand); 612 setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand); 613 setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand); 614 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 615 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 616 setTruncStoreAction((MVT::SimpleValueType)VT, 617 (MVT::SimpleValueType)InnerVT, Expand); 618 setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 619 setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 620 setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 621 } 622 623 // FIXME: In order to prevent SSE instructions being expanded to MMX ones 624 // with -msoft-float, disable use of MMX as well. 625 if (!UseSoftFloat && !DisableMMX && Subtarget->hasMMX()) { 626 addRegisterClass(MVT::v8i8, X86::VR64RegisterClass); 627 addRegisterClass(MVT::v4i16, X86::VR64RegisterClass); 628 addRegisterClass(MVT::v2i32, X86::VR64RegisterClass); 629 addRegisterClass(MVT::v2f32, X86::VR64RegisterClass); 630 addRegisterClass(MVT::v1i64, X86::VR64RegisterClass); 631 632 setOperationAction(ISD::ADD, MVT::v8i8, Legal); 633 setOperationAction(ISD::ADD, MVT::v4i16, Legal); 634 setOperationAction(ISD::ADD, MVT::v2i32, Legal); 635 setOperationAction(ISD::ADD, MVT::v1i64, Legal); 636 637 setOperationAction(ISD::SUB, MVT::v8i8, Legal); 638 setOperationAction(ISD::SUB, MVT::v4i16, Legal); 639 setOperationAction(ISD::SUB, MVT::v2i32, Legal); 640 setOperationAction(ISD::SUB, MVT::v1i64, Legal); 641 642 setOperationAction(ISD::MULHS, MVT::v4i16, Legal); 643 setOperationAction(ISD::MUL, MVT::v4i16, Legal); 644 645 setOperationAction(ISD::AND, MVT::v8i8, Promote); 646 AddPromotedToType (ISD::AND, MVT::v8i8, MVT::v1i64); 647 setOperationAction(ISD::AND, MVT::v4i16, Promote); 648 AddPromotedToType (ISD::AND, MVT::v4i16, MVT::v1i64); 649 setOperationAction(ISD::AND, MVT::v2i32, Promote); 650 AddPromotedToType (ISD::AND, MVT::v2i32, MVT::v1i64); 651 setOperationAction(ISD::AND, MVT::v1i64, Legal); 652 653 setOperationAction(ISD::OR, MVT::v8i8, Promote); 654 AddPromotedToType (ISD::OR, MVT::v8i8, MVT::v1i64); 655 setOperationAction(ISD::OR, MVT::v4i16, Promote); 656 AddPromotedToType (ISD::OR, MVT::v4i16, MVT::v1i64); 657 setOperationAction(ISD::OR, MVT::v2i32, Promote); 658 AddPromotedToType (ISD::OR, MVT::v2i32, MVT::v1i64); 659 setOperationAction(ISD::OR, MVT::v1i64, Legal); 660 661 setOperationAction(ISD::XOR, MVT::v8i8, Promote); 662 AddPromotedToType (ISD::XOR, MVT::v8i8, MVT::v1i64); 663 setOperationAction(ISD::XOR, MVT::v4i16, Promote); 664 AddPromotedToType (ISD::XOR, MVT::v4i16, MVT::v1i64); 665 setOperationAction(ISD::XOR, MVT::v2i32, Promote); 666 AddPromotedToType (ISD::XOR, MVT::v2i32, MVT::v1i64); 667 setOperationAction(ISD::XOR, MVT::v1i64, Legal); 668 669 setOperationAction(ISD::LOAD, MVT::v8i8, Promote); 670 AddPromotedToType (ISD::LOAD, MVT::v8i8, MVT::v1i64); 671 setOperationAction(ISD::LOAD, MVT::v4i16, Promote); 672 AddPromotedToType (ISD::LOAD, MVT::v4i16, MVT::v1i64); 673 setOperationAction(ISD::LOAD, MVT::v2i32, Promote); 674 AddPromotedToType (ISD::LOAD, MVT::v2i32, MVT::v1i64); 675 setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 676 AddPromotedToType (ISD::LOAD, MVT::v2f32, MVT::v1i64); 677 setOperationAction(ISD::LOAD, MVT::v1i64, Legal); 678 679 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); 680 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); 681 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); 682 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); 683 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); 684 685 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); 686 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); 687 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); 688 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); 689 690 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f32, Custom); 691 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i8, Custom); 692 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i16, Custom); 693 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v1i64, Custom); 694 695 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom); 696 697 setOperationAction(ISD::SELECT, MVT::v8i8, Promote); 698 setOperationAction(ISD::SELECT, MVT::v4i16, Promote); 699 setOperationAction(ISD::SELECT, MVT::v2i32, Promote); 700 setOperationAction(ISD::SELECT, MVT::v1i64, Custom); 701 setOperationAction(ISD::VSETCC, MVT::v8i8, Custom); 702 setOperationAction(ISD::VSETCC, MVT::v4i16, Custom); 703 setOperationAction(ISD::VSETCC, MVT::v2i32, Custom); 704 } 705 706 if (!UseSoftFloat && Subtarget->hasSSE1()) { 707 addRegisterClass(MVT::v4f32, X86::VR128RegisterClass); 708 709 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 710 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 711 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 712 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 713 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 714 setOperationAction(ISD::FNEG, MVT::v4f32, Custom); 715 setOperationAction(ISD::LOAD, MVT::v4f32, Legal); 716 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 717 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); 718 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 719 setOperationAction(ISD::SELECT, MVT::v4f32, Custom); 720 setOperationAction(ISD::VSETCC, MVT::v4f32, Custom); 721 } 722 723 if (!UseSoftFloat && Subtarget->hasSSE2()) { 724 addRegisterClass(MVT::v2f64, X86::VR128RegisterClass); 725 726 // FIXME: Unfortunately -soft-float and -no-implicit-float means XMM 727 // registers cannot be used even for integer operations. 728 addRegisterClass(MVT::v16i8, X86::VR128RegisterClass); 729 addRegisterClass(MVT::v8i16, X86::VR128RegisterClass); 730 addRegisterClass(MVT::v4i32, X86::VR128RegisterClass); 731 addRegisterClass(MVT::v2i64, X86::VR128RegisterClass); 732 733 setOperationAction(ISD::ADD, MVT::v16i8, Legal); 734 setOperationAction(ISD::ADD, MVT::v8i16, Legal); 735 setOperationAction(ISD::ADD, MVT::v4i32, Legal); 736 setOperationAction(ISD::ADD, MVT::v2i64, Legal); 737 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 738 setOperationAction(ISD::SUB, MVT::v16i8, Legal); 739 setOperationAction(ISD::SUB, MVT::v8i16, Legal); 740 setOperationAction(ISD::SUB, MVT::v4i32, Legal); 741 setOperationAction(ISD::SUB, MVT::v2i64, Legal); 742 setOperationAction(ISD::MUL, MVT::v8i16, Legal); 743 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 744 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 745 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 746 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 747 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 748 setOperationAction(ISD::FNEG, MVT::v2f64, Custom); 749 750 setOperationAction(ISD::VSETCC, MVT::v2f64, Custom); 751 setOperationAction(ISD::VSETCC, MVT::v16i8, Custom); 752 setOperationAction(ISD::VSETCC, MVT::v8i16, Custom); 753 setOperationAction(ISD::VSETCC, MVT::v4i32, Custom); 754 755 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom); 756 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom); 757 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 758 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 759 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 760 761 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Custom); 762 setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Custom); 763 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Custom); 764 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Custom); 765 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 766 767 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 768 for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; ++i) { 769 EVT VT = (MVT::SimpleValueType)i; 770 // Do not attempt to custom lower non-power-of-2 vectors 771 if (!isPowerOf2_32(VT.getVectorNumElements())) 772 continue; 773 // Do not attempt to custom lower non-128-bit vectors 774 if (!VT.is128BitVector()) 775 continue; 776 setOperationAction(ISD::BUILD_VECTOR, 777 VT.getSimpleVT().SimpleTy, Custom); 778 setOperationAction(ISD::VECTOR_SHUFFLE, 779 VT.getSimpleVT().SimpleTy, Custom); 780 setOperationAction(ISD::EXTRACT_VECTOR_ELT, 781 VT.getSimpleVT().SimpleTy, Custom); 782 } 783 784 setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); 785 setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); 786 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); 787 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); 788 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 789 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 790 791 if (Subtarget->is64Bit()) { 792 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); 793 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); 794 } 795 796 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. 797 for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) { 798 MVT::SimpleValueType SVT = (MVT::SimpleValueType)i; 799 EVT VT = SVT; 800 801 // Do not attempt to promote non-128-bit vectors 802 if (!VT.is128BitVector()) { 803 continue; 804 } 805 806 setOperationAction(ISD::AND, SVT, Promote); 807 AddPromotedToType (ISD::AND, SVT, MVT::v2i64); 808 setOperationAction(ISD::OR, SVT, Promote); 809 AddPromotedToType (ISD::OR, SVT, MVT::v2i64); 810 setOperationAction(ISD::XOR, SVT, Promote); 811 AddPromotedToType (ISD::XOR, SVT, MVT::v2i64); 812 setOperationAction(ISD::LOAD, SVT, Promote); 813 AddPromotedToType (ISD::LOAD, SVT, MVT::v2i64); 814 setOperationAction(ISD::SELECT, SVT, Promote); 815 AddPromotedToType (ISD::SELECT, SVT, MVT::v2i64); 816 } 817 818 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 819 820 // Custom lower v2i64 and v2f64 selects. 821 setOperationAction(ISD::LOAD, MVT::v2f64, Legal); 822 setOperationAction(ISD::LOAD, MVT::v2i64, Legal); 823 setOperationAction(ISD::SELECT, MVT::v2f64, Custom); 824 setOperationAction(ISD::SELECT, MVT::v2i64, Custom); 825 826 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 827 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 828 if (!DisableMMX && Subtarget->hasMMX()) { 829 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); 830 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); 831 } 832 } 833 834 if (Subtarget->hasSSE41()) { 835 // FIXME: Do we need to handle scalar-to-vector here? 836 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 837 838 // i8 and i16 vectors are custom , because the source register and source 839 // source memory operand types are not the same width. f32 vectors are 840 // custom since the immediate controlling the insert encodes additional 841 // information. 842 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); 843 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); 844 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); 845 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 846 847 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i8, Custom); 848 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i16, Custom); 849 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); 850 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 851 852 if (Subtarget->is64Bit()) { 853 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); 854 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); 855 } 856 } 857 858 if (Subtarget->hasSSE42()) { 859 setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); 860 } 861 862 if (!UseSoftFloat && Subtarget->hasAVX()) { 863 addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); 864 addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); 865 addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); 866 addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); 867 868 setOperationAction(ISD::LOAD, MVT::v8f32, Legal); 869 setOperationAction(ISD::LOAD, MVT::v8i32, Legal); 870 setOperationAction(ISD::LOAD, MVT::v4f64, Legal); 871 setOperationAction(ISD::LOAD, MVT::v4i64, Legal); 872 setOperationAction(ISD::FADD, MVT::v8f32, Legal); 873 setOperationAction(ISD::FSUB, MVT::v8f32, Legal); 874 setOperationAction(ISD::FMUL, MVT::v8f32, Legal); 875 setOperationAction(ISD::FDIV, MVT::v8f32, Legal); 876 setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); 877 setOperationAction(ISD::FNEG, MVT::v8f32, Custom); 878 //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); 879 //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom); 880 //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom); 881 //setOperationAction(ISD::SELECT, MVT::v8f32, Custom); 882 //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom); 883 884 // Operations to consider commented out -v16i16 v32i8 885 //setOperationAction(ISD::ADD, MVT::v16i16, Legal); 886 setOperationAction(ISD::ADD, MVT::v8i32, Custom); 887 setOperationAction(ISD::ADD, MVT::v4i64, Custom); 888 //setOperationAction(ISD::SUB, MVT::v32i8, Legal); 889 //setOperationAction(ISD::SUB, MVT::v16i16, Legal); 890 setOperationAction(ISD::SUB, MVT::v8i32, Custom); 891 setOperationAction(ISD::SUB, MVT::v4i64, Custom); 892 //setOperationAction(ISD::MUL, MVT::v16i16, Legal); 893 setOperationAction(ISD::FADD, MVT::v4f64, Legal); 894 setOperationAction(ISD::FSUB, MVT::v4f64, Legal); 895 setOperationAction(ISD::FMUL, MVT::v4f64, Legal); 896 setOperationAction(ISD::FDIV, MVT::v4f64, Legal); 897 setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); 898 setOperationAction(ISD::FNEG, MVT::v4f64, Custom); 899 900 setOperationAction(ISD::VSETCC, MVT::v4f64, Custom); 901 // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom); 902 // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom); 903 setOperationAction(ISD::VSETCC, MVT::v8i32, Custom); 904 905 // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom); 906 // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom); 907 // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom); 908 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom); 909 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom); 910 911 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); 912 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom); 913 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom); 914 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom); 915 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom); 916 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom); 917 918#if 0 919 // Not sure we want to do this since there are no 256-bit integer 920 // operations in AVX 921 922 // Custom lower build_vector, vector_shuffle, and extract_vector_elt. 923 // This includes 256-bit vectors 924 for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) { 925 EVT VT = (MVT::SimpleValueType)i; 926 927 // Do not attempt to custom lower non-power-of-2 vectors 928 if (!isPowerOf2_32(VT.getVectorNumElements())) 929 continue; 930 931 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 932 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 933 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 934 } 935 936 if (Subtarget->is64Bit()) { 937 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom); 938 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom); 939 } 940#endif 941 942#if 0 943 // Not sure we want to do this since there are no 256-bit integer 944 // operations in AVX 945 946 // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64. 947 // Including 256-bit vectors 948 for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) { 949 EVT VT = (MVT::SimpleValueType)i; 950 951 if (!VT.is256BitVector()) { 952 continue; 953 } 954 setOperationAction(ISD::AND, VT, Promote); 955 AddPromotedToType (ISD::AND, VT, MVT::v4i64); 956 setOperationAction(ISD::OR, VT, Promote); 957 AddPromotedToType (ISD::OR, VT, MVT::v4i64); 958 setOperationAction(ISD::XOR, VT, Promote); 959 AddPromotedToType (ISD::XOR, VT, MVT::v4i64); 960 setOperationAction(ISD::LOAD, VT, Promote); 961 AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); 962 setOperationAction(ISD::SELECT, VT, Promote); 963 AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); 964 } 965 966 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 967#endif 968 } 969 970 // We want to custom lower some of our intrinsics. 971 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 972 973 // Add/Sub/Mul with overflow operations are custom lowered. 974 setOperationAction(ISD::SADDO, MVT::i32, Custom); 975 setOperationAction(ISD::SADDO, MVT::i64, Custom); 976 setOperationAction(ISD::UADDO, MVT::i32, Custom); 977 setOperationAction(ISD::UADDO, MVT::i64, Custom); 978 setOperationAction(ISD::SSUBO, MVT::i32, Custom); 979 setOperationAction(ISD::SSUBO, MVT::i64, Custom); 980 setOperationAction(ISD::USUBO, MVT::i32, Custom); 981 setOperationAction(ISD::USUBO, MVT::i64, Custom); 982 setOperationAction(ISD::SMULO, MVT::i32, Custom); 983 setOperationAction(ISD::SMULO, MVT::i64, Custom); 984 985 if (!Subtarget->is64Bit()) { 986 // These libcalls are not available in 32-bit. 987 setLibcallName(RTLIB::SHL_I128, 0); 988 setLibcallName(RTLIB::SRL_I128, 0); 989 setLibcallName(RTLIB::SRA_I128, 0); 990 } 991 992 // We have target-specific dag combine patterns for the following nodes: 993 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 994 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); 995 setTargetDAGCombine(ISD::BUILD_VECTOR); 996 setTargetDAGCombine(ISD::SELECT); 997 setTargetDAGCombine(ISD::SHL); 998 setTargetDAGCombine(ISD::SRA); 999 setTargetDAGCombine(ISD::SRL); 1000 setTargetDAGCombine(ISD::OR); 1001 setTargetDAGCombine(ISD::STORE); 1002 setTargetDAGCombine(ISD::MEMBARRIER); 1003 setTargetDAGCombine(ISD::ZERO_EXTEND); 1004 if (Subtarget->is64Bit()) 1005 setTargetDAGCombine(ISD::MUL); 1006 1007 computeRegisterProperties(); 1008 1009 // FIXME: These should be based on subtarget info. Plus, the values should 1010 // be smaller when we are in optimizing for size mode. 1011 maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores 1012 maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores 1013 maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores 1014 setPrefLoopAlignment(16); 1015 benefitFromCodePlacementOpt = true; 1016} 1017 1018 1019MVT::SimpleValueType X86TargetLowering::getSetCCResultType(EVT VT) const { 1020 return MVT::i8; 1021} 1022 1023 1024/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 1025/// the desired ByVal argument alignment. 1026static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) { 1027 if (MaxAlign == 16) 1028 return; 1029 if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1030 if (VTy->getBitWidth() == 128) 1031 MaxAlign = 16; 1032 } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 1033 unsigned EltAlign = 0; 1034 getMaxByValAlign(ATy->getElementType(), EltAlign); 1035 if (EltAlign > MaxAlign) 1036 MaxAlign = EltAlign; 1037 } else if (const StructType *STy = dyn_cast<StructType>(Ty)) { 1038 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 1039 unsigned EltAlign = 0; 1040 getMaxByValAlign(STy->getElementType(i), EltAlign); 1041 if (EltAlign > MaxAlign) 1042 MaxAlign = EltAlign; 1043 if (MaxAlign == 16) 1044 break; 1045 } 1046 } 1047 return; 1048} 1049 1050/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 1051/// function arguments in the caller parameter area. For X86, aggregates 1052/// that contain SSE vectors are placed at 16-byte boundaries while the rest 1053/// are at 4-byte boundaries. 1054unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { 1055 if (Subtarget->is64Bit()) { 1056 // Max of 8 and alignment of type. 1057 unsigned TyAlign = TD->getABITypeAlignment(Ty); 1058 if (TyAlign > 8) 1059 return TyAlign; 1060 return 8; 1061 } 1062 1063 unsigned Align = 4; 1064 if (Subtarget->hasSSE1()) 1065 getMaxByValAlign(Ty, Align); 1066 return Align; 1067} 1068 1069/// getOptimalMemOpType - Returns the target specific optimal type for load 1070/// and store operations as a result of memset, memcpy, and memmove 1071/// lowering. If DstAlign is zero that means it's safe to destination 1072/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 1073/// means there isn't a need to check it against alignment requirement, 1074/// probably because the source does not need to be loaded. If 1075/// 'NonScalarIntSafe' is true, that means it's safe to return a 1076/// non-scalar-integer type, e.g. empty string source, constant, or loaded 1077/// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is 1078/// constant so it does not need to be loaded. 1079/// It returns EVT::Other if SelectionDAG should be responsible for 1080/// determining the type. 1081EVT 1082X86TargetLowering::getOptimalMemOpType(uint64_t Size, 1083 unsigned DstAlign, unsigned SrcAlign, 1084 bool NonScalarIntSafe, 1085 bool MemcpyStrSrc, 1086 SelectionDAG &DAG) const { 1087 // FIXME: This turns off use of xmm stores for memset/memcpy on targets like 1088 // linux. This is because the stack realignment code can't handle certain 1089 // cases like PR2962. This should be removed when PR2962 is fixed. 1090 const Function *F = DAG.getMachineFunction().getFunction(); 1091 if (NonScalarIntSafe && 1092 !F->hasFnAttr(Attribute::NoImplicitFloat)) { 1093 if (Size >= 16 && 1094 (Subtarget->isUnalignedMemAccessFast() || 1095 ((DstAlign == 0 || DstAlign >= 16) && 1096 (SrcAlign == 0 || SrcAlign >= 16))) && 1097 Subtarget->getStackAlignment() >= 16) { 1098 if (Subtarget->hasSSE2()) 1099 return MVT::v4i32; 1100 if (Subtarget->hasSSE1()) 1101 return MVT::v4f32; 1102 } else if (!MemcpyStrSrc && Size >= 8 && 1103 !Subtarget->is64Bit() && 1104 Subtarget->getStackAlignment() >= 8 && 1105 Subtarget->hasSSE2()) { 1106 // Do not use f64 to lower memcpy if source is string constant. It's 1107 // better to use i32 to avoid the loads. 1108 return MVT::f64; 1109 } 1110 } 1111 if (Subtarget->is64Bit() && Size >= 8) 1112 return MVT::i64; 1113 return MVT::i32; 1114} 1115 1116/// getJumpTableEncoding - Return the entry encoding for a jump table in the 1117/// current function. The returned value is a member of the 1118/// MachineJumpTableInfo::JTEntryKind enum. 1119unsigned X86TargetLowering::getJumpTableEncoding() const { 1120 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF 1121 // symbol. 1122 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1123 Subtarget->isPICStyleGOT()) 1124 return MachineJumpTableInfo::EK_Custom32; 1125 1126 // Otherwise, use the normal jump table encoding heuristics. 1127 return TargetLowering::getJumpTableEncoding(); 1128} 1129 1130/// getPICBaseSymbol - Return the X86-32 PIC base. 1131MCSymbol * 1132X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF, 1133 MCContext &Ctx) const { 1134 const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo(); 1135 return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+ 1136 Twine(MF->getFunctionNumber())+"$pb"); 1137} 1138 1139 1140const MCExpr * 1141X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 1142 const MachineBasicBlock *MBB, 1143 unsigned uid,MCContext &Ctx) const{ 1144 assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ && 1145 Subtarget->isPICStyleGOT()); 1146 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF 1147 // entries. 1148 return MCSymbolRefExpr::Create(MBB->getSymbol(), 1149 MCSymbolRefExpr::VK_GOTOFF, Ctx); 1150} 1151 1152/// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 1153/// jumptable. 1154SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, 1155 SelectionDAG &DAG) const { 1156 if (!Subtarget->is64Bit()) 1157 // This doesn't have DebugLoc associated with it, but is not really the 1158 // same as a Register. 1159 return DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), getPointerTy()); 1160 return Table; 1161} 1162 1163/// getPICJumpTableRelocBaseExpr - This returns the relocation base for the 1164/// given PIC jumptable, the same as getPICJumpTableRelocBase, but as an 1165/// MCExpr. 1166const MCExpr *X86TargetLowering:: 1167getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, 1168 MCContext &Ctx) const { 1169 // X86-64 uses RIP relative addressing based on the jump table label. 1170 if (Subtarget->isPICStyleRIPRel()) 1171 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); 1172 1173 // Otherwise, the reference is relative to the PIC base. 1174 return MCSymbolRefExpr::Create(getPICBaseSymbol(MF, Ctx), Ctx); 1175} 1176 1177/// getFunctionAlignment - Return the Log2 alignment of this function. 1178unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { 1179 return F->hasFnAttr(Attribute::OptimizeForSize) ? 0 : 4; 1180} 1181 1182//===----------------------------------------------------------------------===// 1183// Return Value Calling Convention Implementation 1184//===----------------------------------------------------------------------===// 1185 1186#include "X86GenCallingConv.inc" 1187 1188bool 1189X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, 1190 const SmallVectorImpl<EVT> &OutTys, 1191 const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, 1192 SelectionDAG &DAG) { 1193 SmallVector<CCValAssign, 16> RVLocs; 1194 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 1195 RVLocs, *DAG.getContext()); 1196 return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86); 1197} 1198 1199SDValue 1200X86TargetLowering::LowerReturn(SDValue Chain, 1201 CallingConv::ID CallConv, bool isVarArg, 1202 const SmallVectorImpl<ISD::OutputArg> &Outs, 1203 DebugLoc dl, SelectionDAG &DAG) { 1204 1205 SmallVector<CCValAssign, 16> RVLocs; 1206 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 1207 RVLocs, *DAG.getContext()); 1208 CCInfo.AnalyzeReturn(Outs, RetCC_X86); 1209 1210 // Add the regs to the liveout set for the function. 1211 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 1212 for (unsigned i = 0; i != RVLocs.size(); ++i) 1213 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) 1214 MRI.addLiveOut(RVLocs[i].getLocReg()); 1215 1216 SDValue Flag; 1217 1218 SmallVector<SDValue, 6> RetOps; 1219 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 1220 // Operand #1 = Bytes To Pop 1221 RetOps.push_back(DAG.getTargetConstant(getBytesToPopOnReturn(), MVT::i16)); 1222 1223 // Copy the result values into the output registers. 1224 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1225 CCValAssign &VA = RVLocs[i]; 1226 assert(VA.isRegLoc() && "Can only return in registers!"); 1227 SDValue ValToCopy = Outs[i].Val; 1228 1229 // Returns in ST0/ST1 are handled specially: these are pushed as operands to 1230 // the RET instruction and handled by the FP Stackifier. 1231 if (VA.getLocReg() == X86::ST0 || 1232 VA.getLocReg() == X86::ST1) { 1233 // If this is a copy from an xmm register to ST(0), use an FPExtend to 1234 // change the value to the FP stack register class. 1235 if (isScalarFPTypeInSSEReg(VA.getValVT())) 1236 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); 1237 RetOps.push_back(ValToCopy); 1238 // Don't emit a copytoreg. 1239 continue; 1240 } 1241 1242 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64 1243 // which is returned in RAX / RDX. 1244 if (Subtarget->is64Bit()) { 1245 EVT ValVT = ValToCopy.getValueType(); 1246 if (ValVT.isVector() && ValVT.getSizeInBits() == 64) { 1247 ValToCopy = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, ValToCopy); 1248 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) 1249 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, ValToCopy); 1250 } 1251 } 1252 1253 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag); 1254 Flag = Chain.getValue(1); 1255 } 1256 1257 // The x86-64 ABI for returning structs by value requires that we copy 1258 // the sret argument into %rax for the return. We saved the argument into 1259 // a virtual register in the entry block, so now we copy the value out 1260 // and into %rax. 1261 if (Subtarget->is64Bit() && 1262 DAG.getMachineFunction().getFunction()->hasStructRetAttr()) { 1263 MachineFunction &MF = DAG.getMachineFunction(); 1264 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1265 unsigned Reg = FuncInfo->getSRetReturnReg(); 1266 if (!Reg) { 1267 Reg = MRI.createVirtualRegister(getRegClassFor(MVT::i64)); 1268 FuncInfo->setSRetReturnReg(Reg); 1269 } 1270 SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy()); 1271 1272 Chain = DAG.getCopyToReg(Chain, dl, X86::RAX, Val, Flag); 1273 Flag = Chain.getValue(1); 1274 1275 // RAX now acts like a return value. 1276 MRI.addLiveOut(X86::RAX); 1277 } 1278 1279 RetOps[0] = Chain; // Update chain. 1280 1281 // Add the flag if we have it. 1282 if (Flag.getNode()) 1283 RetOps.push_back(Flag); 1284 1285 return DAG.getNode(X86ISD::RET_FLAG, dl, 1286 MVT::Other, &RetOps[0], RetOps.size()); 1287} 1288 1289/// LowerCallResult - Lower the result values of a call into the 1290/// appropriate copies out of appropriate physical registers. 1291/// 1292SDValue 1293X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1294 CallingConv::ID CallConv, bool isVarArg, 1295 const SmallVectorImpl<ISD::InputArg> &Ins, 1296 DebugLoc dl, SelectionDAG &DAG, 1297 SmallVectorImpl<SDValue> &InVals) { 1298 1299 // Assign locations to each value returned by this call. 1300 SmallVector<CCValAssign, 16> RVLocs; 1301 bool Is64Bit = Subtarget->is64Bit(); 1302 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 1303 RVLocs, *DAG.getContext()); 1304 CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 1305 1306 // Copy all of the result registers out of their specified physreg. 1307 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1308 CCValAssign &VA = RVLocs[i]; 1309 EVT CopyVT = VA.getValVT(); 1310 1311 // If this is x86-64, and we disabled SSE, we can't return FP values 1312 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) && 1313 ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) { 1314 report_fatal_error("SSE register return with SSE disabled"); 1315 } 1316 1317 // If this is a call to a function that returns an fp value on the floating 1318 // point stack, but where we prefer to use the value in xmm registers, copy 1319 // it out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1320 if ((VA.getLocReg() == X86::ST0 || 1321 VA.getLocReg() == X86::ST1) && 1322 isScalarFPTypeInSSEReg(VA.getValVT())) { 1323 CopyVT = MVT::f80; 1324 } 1325 1326 SDValue Val; 1327 if (Is64Bit && CopyVT.isVector() && CopyVT.getSizeInBits() == 64) { 1328 // For x86-64, MMX values are returned in XMM0 / XMM1 except for v1i64. 1329 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { 1330 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), 1331 MVT::v2i64, InFlag).getValue(1); 1332 Val = Chain.getValue(0); 1333 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, 1334 Val, DAG.getConstant(0, MVT::i64)); 1335 } else { 1336 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), 1337 MVT::i64, InFlag).getValue(1); 1338 Val = Chain.getValue(0); 1339 } 1340 Val = DAG.getNode(ISD::BIT_CONVERT, dl, CopyVT, Val); 1341 } else { 1342 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), 1343 CopyVT, InFlag).getValue(1); 1344 Val = Chain.getValue(0); 1345 } 1346 InFlag = Chain.getValue(2); 1347 1348 if (CopyVT != VA.getValVT()) { 1349 // Round the F80 the right size, which also moves to the appropriate xmm 1350 // register. 1351 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, 1352 // This truncation won't change the value. 1353 DAG.getIntPtrConstant(1)); 1354 } 1355 1356 InVals.push_back(Val); 1357 } 1358 1359 return Chain; 1360} 1361 1362 1363//===----------------------------------------------------------------------===// 1364// C & StdCall & Fast Calling Convention implementation 1365//===----------------------------------------------------------------------===// 1366// StdCall calling convention seems to be standard for many Windows' API 1367// routines and around. It differs from C calling convention just a little: 1368// callee should clean up the stack, not caller. Symbols should be also 1369// decorated in some fancy way :) It doesn't support any vector arguments. 1370// For info on fast calling convention see Fast Calling Convention (tail call) 1371// implementation LowerX86_32FastCCCallTo. 1372 1373/// CallIsStructReturn - Determines whether a call uses struct return 1374/// semantics. 1375static bool CallIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs) { 1376 if (Outs.empty()) 1377 return false; 1378 1379 return Outs[0].Flags.isSRet(); 1380} 1381 1382/// ArgsAreStructReturn - Determines whether a function uses struct 1383/// return semantics. 1384static bool 1385ArgsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins) { 1386 if (Ins.empty()) 1387 return false; 1388 1389 return Ins[0].Flags.isSRet(); 1390} 1391 1392/// IsCalleePop - Determines whether the callee is required to pop its 1393/// own arguments. Callee pop is necessary to support tail calls. 1394bool X86TargetLowering::IsCalleePop(bool IsVarArg, CallingConv::ID CallingConv){ 1395 if (IsVarArg) 1396 return false; 1397 1398 switch (CallingConv) { 1399 default: 1400 return false; 1401 case CallingConv::X86_StdCall: 1402 return !Subtarget->is64Bit(); 1403 case CallingConv::X86_FastCall: 1404 return !Subtarget->is64Bit(); 1405 case CallingConv::Fast: 1406 return GuaranteedTailCallOpt; 1407 case CallingConv::GHC: 1408 return GuaranteedTailCallOpt; 1409 } 1410} 1411 1412/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1413/// given CallingConvention value. 1414CCAssignFn *X86TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { 1415 if (Subtarget->is64Bit()) { 1416 if (CC == CallingConv::GHC) 1417 return CC_X86_64_GHC; 1418 else if (Subtarget->isTargetWin64()) 1419 return CC_X86_Win64_C; 1420 else 1421 return CC_X86_64_C; 1422 } 1423 1424 if (CC == CallingConv::X86_FastCall) 1425 return CC_X86_32_FastCall; 1426 else if (CC == CallingConv::Fast) 1427 return CC_X86_32_FastCC; 1428 else if (CC == CallingConv::GHC) 1429 return CC_X86_32_GHC; 1430 else 1431 return CC_X86_32_C; 1432} 1433 1434/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1435/// by "Src" to address "Dst" with size and alignment information specified by 1436/// the specific parameter attribute. The copy will be passed as a byval 1437/// function parameter. 1438static SDValue 1439CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1440 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 1441 DebugLoc dl) { 1442 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1443 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 1444 /*isVolatile*/false, /*AlwaysInline=*/true, 1445 NULL, 0, NULL, 0); 1446} 1447 1448/// IsTailCallConvention - Return true if the calling convention is one that 1449/// supports tail call optimization. 1450static bool IsTailCallConvention(CallingConv::ID CC) { 1451 return (CC == CallingConv::Fast || CC == CallingConv::GHC); 1452} 1453 1454/// FuncIsMadeTailCallSafe - Return true if the function is being made into 1455/// a tailcall target by changing its ABI. 1456static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) { 1457 return GuaranteedTailCallOpt && IsTailCallConvention(CC); 1458} 1459 1460SDValue 1461X86TargetLowering::LowerMemArgument(SDValue Chain, 1462 CallingConv::ID CallConv, 1463 const SmallVectorImpl<ISD::InputArg> &Ins, 1464 DebugLoc dl, SelectionDAG &DAG, 1465 const CCValAssign &VA, 1466 MachineFrameInfo *MFI, 1467 unsigned i) { 1468 // Create the nodes corresponding to a load from this parameter slot. 1469 ISD::ArgFlagsTy Flags = Ins[i].Flags; 1470 bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv); 1471 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 1472 EVT ValVT; 1473 1474 // If value is passed by pointer we have address passed instead of the value 1475 // itself. 1476 if (VA.getLocInfo() == CCValAssign::Indirect) 1477 ValVT = VA.getLocVT(); 1478 else 1479 ValVT = VA.getValVT(); 1480 1481 // FIXME: For now, all byval parameter objects are marked mutable. This can be 1482 // changed with more analysis. 1483 // In case of tail call optimization mark all arguments mutable. Since they 1484 // could be overwritten by lowering of arguments in case of a tail call. 1485 if (Flags.isByVal()) { 1486 int FI = MFI->CreateFixedObject(Flags.getByValSize(), 1487 VA.getLocMemOffset(), isImmutable, false); 1488 return DAG.getFrameIndex(FI, getPointerTy()); 1489 } else { 1490 int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, 1491 VA.getLocMemOffset(), isImmutable, false); 1492 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 1493 return DAG.getLoad(ValVT, dl, Chain, FIN, 1494 PseudoSourceValue::getFixedStack(FI), 0, 1495 false, false, 0); 1496 } 1497} 1498 1499SDValue 1500X86TargetLowering::LowerFormalArguments(SDValue Chain, 1501 CallingConv::ID CallConv, 1502 bool isVarArg, 1503 const SmallVectorImpl<ISD::InputArg> &Ins, 1504 DebugLoc dl, 1505 SelectionDAG &DAG, 1506 SmallVectorImpl<SDValue> &InVals) { 1507 MachineFunction &MF = DAG.getMachineFunction(); 1508 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1509 1510 const Function* Fn = MF.getFunction(); 1511 if (Fn->hasExternalLinkage() && 1512 Subtarget->isTargetCygMing() && 1513 Fn->getName() == "main") 1514 FuncInfo->setForceFramePointer(true); 1515 1516 MachineFrameInfo *MFI = MF.getFrameInfo(); 1517 bool Is64Bit = Subtarget->is64Bit(); 1518 bool IsWin64 = Subtarget->isTargetWin64(); 1519 1520 assert(!(isVarArg && IsTailCallConvention(CallConv)) && 1521 "Var args not supported with calling convention fastcc or ghc"); 1522 1523 // Assign locations to all of the incoming arguments. 1524 SmallVector<CCValAssign, 16> ArgLocs; 1525 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 1526 ArgLocs, *DAG.getContext()); 1527 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv)); 1528 1529 unsigned LastVal = ~0U; 1530 SDValue ArgValue; 1531 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1532 CCValAssign &VA = ArgLocs[i]; 1533 // TODO: If an arg is passed in two places (e.g. reg and stack), skip later 1534 // places. 1535 assert(VA.getValNo() != LastVal && 1536 "Don't support value assigned to multiple locs yet"); 1537 LastVal = VA.getValNo(); 1538 1539 if (VA.isRegLoc()) { 1540 EVT RegVT = VA.getLocVT(); 1541 TargetRegisterClass *RC = NULL; 1542 if (RegVT == MVT::i32) 1543 RC = X86::GR32RegisterClass; 1544 else if (Is64Bit && RegVT == MVT::i64) 1545 RC = X86::GR64RegisterClass; 1546 else if (RegVT == MVT::f32) 1547 RC = X86::FR32RegisterClass; 1548 else if (RegVT == MVT::f64) 1549 RC = X86::FR64RegisterClass; 1550 else if (RegVT.isVector() && RegVT.getSizeInBits() == 128) 1551 RC = X86::VR128RegisterClass; 1552 else if (RegVT.isVector() && RegVT.getSizeInBits() == 64) 1553 RC = X86::VR64RegisterClass; 1554 else 1555 llvm_unreachable("Unknown argument type!"); 1556 1557 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1558 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 1559 1560 // If this is an 8 or 16-bit value, it is really passed promoted to 32 1561 // bits. Insert an assert[sz]ext to capture this, then truncate to the 1562 // right size. 1563 if (VA.getLocInfo() == CCValAssign::SExt) 1564 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 1565 DAG.getValueType(VA.getValVT())); 1566 else if (VA.getLocInfo() == CCValAssign::ZExt) 1567 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 1568 DAG.getValueType(VA.getValVT())); 1569 else if (VA.getLocInfo() == CCValAssign::BCvt) 1570 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); 1571 1572 if (VA.isExtInLoc()) { 1573 // Handle MMX values passed in XMM regs. 1574 if (RegVT.isVector()) { 1575 ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, 1576 ArgValue, DAG.getConstant(0, MVT::i64)); 1577 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); 1578 } else 1579 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 1580 } 1581 } else { 1582 assert(VA.isMemLoc()); 1583 ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i); 1584 } 1585 1586 // If value is passed via pointer - do a load. 1587 if (VA.getLocInfo() == CCValAssign::Indirect) 1588 ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, NULL, 0, 1589 false, false, 0); 1590 1591 InVals.push_back(ArgValue); 1592 } 1593 1594 // The x86-64 ABI for returning structs by value requires that we copy 1595 // the sret argument into %rax for the return. Save the argument into 1596 // a virtual register so that we can access it from the return points. 1597 if (Is64Bit && MF.getFunction()->hasStructRetAttr()) { 1598 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 1599 unsigned Reg = FuncInfo->getSRetReturnReg(); 1600 if (!Reg) { 1601 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); 1602 FuncInfo->setSRetReturnReg(Reg); 1603 } 1604 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[0]); 1605 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); 1606 } 1607 1608 unsigned StackSize = CCInfo.getNextStackOffset(); 1609 // Align stack specially for tail calls. 1610 if (FuncIsMadeTailCallSafe(CallConv)) 1611 StackSize = GetAlignedArgumentStackSize(StackSize, DAG); 1612 1613 // If the function takes variable number of arguments, make a frame index for 1614 // the start of the first vararg value... for expansion of llvm.va_start. 1615 if (isVarArg) { 1616 if (Is64Bit || CallConv != CallingConv::X86_FastCall) { 1617 VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false); 1618 } 1619 if (Is64Bit) { 1620 unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; 1621 1622 // FIXME: We should really autogenerate these arrays 1623 static const unsigned GPR64ArgRegsWin64[] = { 1624 X86::RCX, X86::RDX, X86::R8, X86::R9 1625 }; 1626 static const unsigned XMMArgRegsWin64[] = { 1627 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3 1628 }; 1629 static const unsigned GPR64ArgRegs64Bit[] = { 1630 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 1631 }; 1632 static const unsigned XMMArgRegs64Bit[] = { 1633 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1634 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1635 }; 1636 const unsigned *GPR64ArgRegs, *XMMArgRegs; 1637 1638 if (IsWin64) { 1639 TotalNumIntRegs = 4; TotalNumXMMRegs = 4; 1640 GPR64ArgRegs = GPR64ArgRegsWin64; 1641 XMMArgRegs = XMMArgRegsWin64; 1642 } else { 1643 TotalNumIntRegs = 6; TotalNumXMMRegs = 8; 1644 GPR64ArgRegs = GPR64ArgRegs64Bit; 1645 XMMArgRegs = XMMArgRegs64Bit; 1646 } 1647 unsigned NumIntRegs = CCInfo.getFirstUnallocated(GPR64ArgRegs, 1648 TotalNumIntRegs); 1649 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 1650 TotalNumXMMRegs); 1651 1652 bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); 1653 assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && 1654 "SSE register cannot be used when SSE is disabled!"); 1655 assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && 1656 "SSE register cannot be used when SSE is disabled!"); 1657 if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) 1658 // Kernel mode asks for SSE to be disabled, so don't push them 1659 // on the stack. 1660 TotalNumXMMRegs = 0; 1661 1662 // For X86-64, if there are vararg parameters that are passed via 1663 // registers, then we must store them to their spots on the stack so they 1664 // may be loaded by deferencing the result of va_next. 1665 VarArgsGPOffset = NumIntRegs * 8; 1666 VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; 1667 RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + 1668 TotalNumXMMRegs * 16, 16, 1669 false); 1670 1671 // Store the integer parameter registers. 1672 SmallVector<SDValue, 8> MemOps; 1673 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 1674 unsigned Offset = VarArgsGPOffset; 1675 for (; NumIntRegs != TotalNumIntRegs; ++NumIntRegs) { 1676 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), RSFIN, 1677 DAG.getIntPtrConstant(Offset)); 1678 unsigned VReg = MF.addLiveIn(GPR64ArgRegs[NumIntRegs], 1679 X86::GR64RegisterClass); 1680 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 1681 SDValue Store = 1682 DAG.getStore(Val.getValue(1), dl, Val, FIN, 1683 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 1684 Offset, false, false, 0); 1685 MemOps.push_back(Store); 1686 Offset += 8; 1687 } 1688 1689 if (TotalNumXMMRegs != 0 && NumXMMRegs != TotalNumXMMRegs) { 1690 // Now store the XMM (fp + vector) parameter registers. 1691 SmallVector<SDValue, 11> SaveXMMOps; 1692 SaveXMMOps.push_back(Chain); 1693 1694 unsigned AL = MF.addLiveIn(X86::AL, X86::GR8RegisterClass); 1695 SDValue ALVal = DAG.getCopyFromReg(DAG.getEntryNode(), dl, AL, MVT::i8); 1696 SaveXMMOps.push_back(ALVal); 1697 1698 SaveXMMOps.push_back(DAG.getIntPtrConstant(RegSaveFrameIndex)); 1699 SaveXMMOps.push_back(DAG.getIntPtrConstant(VarArgsFPOffset)); 1700 1701 for (; NumXMMRegs != TotalNumXMMRegs; ++NumXMMRegs) { 1702 unsigned VReg = MF.addLiveIn(XMMArgRegs[NumXMMRegs], 1703 X86::VR128RegisterClass); 1704 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::v4f32); 1705 SaveXMMOps.push_back(Val); 1706 } 1707 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl, 1708 MVT::Other, 1709 &SaveXMMOps[0], SaveXMMOps.size())); 1710 } 1711 1712 if (!MemOps.empty()) 1713 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1714 &MemOps[0], MemOps.size()); 1715 } 1716 } 1717 1718 // Some CCs need callee pop. 1719 if (IsCalleePop(isVarArg, CallConv)) { 1720 BytesToPopOnReturn = StackSize; // Callee pops everything. 1721 } else { 1722 BytesToPopOnReturn = 0; // Callee pops nothing. 1723 // If this is an sret function, the return should pop the hidden pointer. 1724 if (!Is64Bit && !IsTailCallConvention(CallConv) && ArgsAreStructReturn(Ins)) 1725 BytesToPopOnReturn = 4; 1726 } 1727 1728 if (!Is64Bit) { 1729 RegSaveFrameIndex = 0xAAAAAAA; // RegSaveFrameIndex is X86-64 only. 1730 if (CallConv == CallingConv::X86_FastCall) 1731 VarArgsFrameIndex = 0xAAAAAAA; // fastcc functions can't have varargs. 1732 } 1733 1734 FuncInfo->setBytesToPopOnReturn(BytesToPopOnReturn); 1735 1736 return Chain; 1737} 1738 1739SDValue 1740X86TargetLowering::LowerMemOpCallTo(SDValue Chain, 1741 SDValue StackPtr, SDValue Arg, 1742 DebugLoc dl, SelectionDAG &DAG, 1743 const CCValAssign &VA, 1744 ISD::ArgFlagsTy Flags) { 1745 const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0); 1746 unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset(); 1747 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1748 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1749 if (Flags.isByVal()) { 1750 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 1751 } 1752 return DAG.getStore(Chain, dl, Arg, PtrOff, 1753 PseudoSourceValue::getStack(), LocMemOffset, 1754 false, false, 0); 1755} 1756 1757/// EmitTailCallLoadRetAddr - Emit a load of return address if tail call 1758/// optimization is performed and it is required. 1759SDValue 1760X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, 1761 SDValue &OutRetAddr, SDValue Chain, 1762 bool IsTailCall, bool Is64Bit, 1763 int FPDiff, DebugLoc dl) { 1764 // Adjust the Return address stack slot. 1765 EVT VT = getPointerTy(); 1766 OutRetAddr = getReturnAddressFrameIndex(DAG); 1767 1768 // Load the "old" Return address. 1769 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, NULL, 0, false, false, 0); 1770 return SDValue(OutRetAddr.getNode(), 1); 1771} 1772 1773/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call 1774/// optimization is performed and it is required (FPDiff!=0). 1775static SDValue 1776EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, 1777 SDValue Chain, SDValue RetAddrFrIdx, 1778 bool Is64Bit, int FPDiff, DebugLoc dl) { 1779 // Store the return address to the appropriate stack slot. 1780 if (!FPDiff) return Chain; 1781 // Calculate the new stack slot for the return address. 1782 int SlotSize = Is64Bit ? 8 : 4; 1783 int NewReturnAddrFI = 1784 MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false); 1785 EVT VT = Is64Bit ? MVT::i64 : MVT::i32; 1786 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); 1787 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, 1788 PseudoSourceValue::getFixedStack(NewReturnAddrFI), 0, 1789 false, false, 0); 1790 return Chain; 1791} 1792 1793SDValue 1794X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1795 CallingConv::ID CallConv, bool isVarArg, 1796 bool &isTailCall, 1797 const SmallVectorImpl<ISD::OutputArg> &Outs, 1798 const SmallVectorImpl<ISD::InputArg> &Ins, 1799 DebugLoc dl, SelectionDAG &DAG, 1800 SmallVectorImpl<SDValue> &InVals) { 1801 MachineFunction &MF = DAG.getMachineFunction(); 1802 bool Is64Bit = Subtarget->is64Bit(); 1803 bool IsStructRet = CallIsStructReturn(Outs); 1804 bool IsSibcall = false; 1805 1806 if (isTailCall) { 1807 // Check if it's really possible to do a tail call. 1808 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1809 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1810 Outs, Ins, DAG); 1811 1812 // Sibcalls are automatically detected tailcalls which do not require 1813 // ABI changes. 1814 if (!GuaranteedTailCallOpt && isTailCall) 1815 IsSibcall = true; 1816 1817 if (isTailCall) 1818 ++NumTailCalls; 1819 } 1820 1821 assert(!(isVarArg && IsTailCallConvention(CallConv)) && 1822 "Var args not supported with calling convention fastcc or ghc"); 1823 1824 // Analyze operands of the call, assigning locations to each operand. 1825 SmallVector<CCValAssign, 16> ArgLocs; 1826 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 1827 ArgLocs, *DAG.getContext()); 1828 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); 1829 1830 // Get a count of how many bytes are to be pushed on the stack. 1831 unsigned NumBytes = CCInfo.getNextStackOffset(); 1832 if (IsSibcall) 1833 // This is a sibcall. The memory operands are available in caller's 1834 // own caller's stack. 1835 NumBytes = 0; 1836 else if (GuaranteedTailCallOpt && IsTailCallConvention(CallConv)) 1837 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); 1838 1839 int FPDiff = 0; 1840 if (isTailCall && !IsSibcall) { 1841 // Lower arguments at fp - stackoffset + fpdiff. 1842 unsigned NumBytesCallerPushed = 1843 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn(); 1844 FPDiff = NumBytesCallerPushed - NumBytes; 1845 1846 // Set the delta of movement of the returnaddr stackslot. 1847 // But only set if delta is greater than previous delta. 1848 if (FPDiff < (MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta())) 1849 MF.getInfo<X86MachineFunctionInfo>()->setTCReturnAddrDelta(FPDiff); 1850 } 1851 1852 if (!IsSibcall) 1853 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1854 1855 SDValue RetAddrFrIdx; 1856 // Load return adress for tail calls. 1857 if (isTailCall && FPDiff) 1858 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, 1859 Is64Bit, FPDiff, dl); 1860 1861 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 1862 SmallVector<SDValue, 8> MemOpChains; 1863 SDValue StackPtr; 1864 1865 // Walk the register/memloc assignments, inserting copies/loads. In the case 1866 // of tail call optimization arguments are handle later. 1867 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1868 CCValAssign &VA = ArgLocs[i]; 1869 EVT RegVT = VA.getLocVT(); 1870 SDValue Arg = Outs[i].Val; 1871 ISD::ArgFlagsTy Flags = Outs[i].Flags; 1872 bool isByVal = Flags.isByVal(); 1873 1874 // Promote the value if needed. 1875 switch (VA.getLocInfo()) { 1876 default: llvm_unreachable("Unknown loc info!"); 1877 case CCValAssign::Full: break; 1878 case CCValAssign::SExt: 1879 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); 1880 break; 1881 case CCValAssign::ZExt: 1882 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); 1883 break; 1884 case CCValAssign::AExt: 1885 if (RegVT.isVector() && RegVT.getSizeInBits() == 128) { 1886 // Special case: passing MMX values in XMM registers. 1887 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i64, Arg); 1888 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); 1889 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); 1890 } else 1891 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); 1892 break; 1893 case CCValAssign::BCvt: 1894 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg); 1895 break; 1896 case CCValAssign::Indirect: { 1897 // Store the argument. 1898 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); 1899 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 1900 Chain = DAG.getStore(Chain, dl, Arg, SpillSlot, 1901 PseudoSourceValue::getFixedStack(FI), 0, 1902 false, false, 0); 1903 Arg = SpillSlot; 1904 break; 1905 } 1906 } 1907 1908 if (VA.isRegLoc()) { 1909 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1910 } else if (!IsSibcall && (!isTailCall || isByVal)) { 1911 assert(VA.isMemLoc()); 1912 if (StackPtr.getNode() == 0) 1913 StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, getPointerTy()); 1914 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1915 dl, DAG, VA, Flags)); 1916 } 1917 } 1918 1919 if (!MemOpChains.empty()) 1920 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1921 &MemOpChains[0], MemOpChains.size()); 1922 1923 // Build a sequence of copy-to-reg nodes chained together with token chain 1924 // and flag operands which copy the outgoing args into registers. 1925 SDValue InFlag; 1926 // Tail call byval lowering might overwrite argument registers so in case of 1927 // tail call optimization the copies to registers are lowered later. 1928 if (!isTailCall) 1929 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1930 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1931 RegsToPass[i].second, InFlag); 1932 InFlag = Chain.getValue(1); 1933 } 1934 1935 if (Subtarget->isPICStyleGOT()) { 1936 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1937 // GOT pointer. 1938 if (!isTailCall) { 1939 Chain = DAG.getCopyToReg(Chain, dl, X86::EBX, 1940 DAG.getNode(X86ISD::GlobalBaseReg, 1941 DebugLoc(), getPointerTy()), 1942 InFlag); 1943 InFlag = Chain.getValue(1); 1944 } else { 1945 // If we are tail calling and generating PIC/GOT style code load the 1946 // address of the callee into ECX. The value in ecx is used as target of 1947 // the tail jump. This is done to circumvent the ebx/callee-saved problem 1948 // for tail calls on PIC/GOT architectures. Normally we would just put the 1949 // address of GOT into ebx and then call target@PLT. But for tail calls 1950 // ebx would be restored (since ebx is callee saved) before jumping to the 1951 // target@PLT. 1952 1953 // Note: The actual moving to ECX is done further down. 1954 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); 1955 if (G && !G->getGlobal()->hasHiddenVisibility() && 1956 !G->getGlobal()->hasProtectedVisibility()) 1957 Callee = LowerGlobalAddress(Callee, DAG); 1958 else if (isa<ExternalSymbolSDNode>(Callee)) 1959 Callee = LowerExternalSymbol(Callee, DAG); 1960 } 1961 } 1962 1963 if (Is64Bit && isVarArg) { 1964 // From AMD64 ABI document: 1965 // For calls that may call functions that use varargs or stdargs 1966 // (prototype-less calls or calls to functions containing ellipsis (...) in 1967 // the declaration) %al is used as hidden argument to specify the number 1968 // of SSE registers used. The contents of %al do not need to match exactly 1969 // the number of registers, but must be an ubound on the number of SSE 1970 // registers used and is in the range 0 - 8 inclusive. 1971 1972 // FIXME: Verify this on Win64 1973 // Count the number of XMM registers allocated. 1974 static const unsigned XMMArgRegs[] = { 1975 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, 1976 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 1977 }; 1978 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8); 1979 assert((Subtarget->hasSSE1() || !NumXMMRegs) 1980 && "SSE registers cannot be used when SSE is disabled"); 1981 1982 Chain = DAG.getCopyToReg(Chain, dl, X86::AL, 1983 DAG.getConstant(NumXMMRegs, MVT::i8), InFlag); 1984 InFlag = Chain.getValue(1); 1985 } 1986 1987 1988 // For tail calls lower the arguments to the 'real' stack slot. 1989 if (isTailCall) { 1990 // Force all the incoming stack arguments to be loaded from the stack 1991 // before any new outgoing arguments are stored to the stack, because the 1992 // outgoing stack slots may alias the incoming argument stack slots, and 1993 // the alias isn't otherwise explicit. This is slightly more conservative 1994 // than necessary, because it means that each store effectively depends 1995 // on every argument instead of just those arguments it would clobber. 1996 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); 1997 1998 SmallVector<SDValue, 8> MemOpChains2; 1999 SDValue FIN; 2000 int FI = 0; 2001 // Do not flag preceeding copytoreg stuff together with the following stuff. 2002 InFlag = SDValue(); 2003 if (GuaranteedTailCallOpt) { 2004 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2005 CCValAssign &VA = ArgLocs[i]; 2006 if (VA.isRegLoc()) 2007 continue; 2008 assert(VA.isMemLoc()); 2009 SDValue Arg = Outs[i].Val; 2010 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2011 // Create frame index. 2012 int32_t Offset = VA.getLocMemOffset()+FPDiff; 2013 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; 2014 FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false); 2015 FIN = DAG.getFrameIndex(FI, getPointerTy()); 2016 2017 if (Flags.isByVal()) { 2018 // Copy relative to framepointer. 2019 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset()); 2020 if (StackPtr.getNode() == 0) 2021 StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr, 2022 getPointerTy()); 2023 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, Source); 2024 2025 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, 2026 ArgChain, 2027 Flags, DAG, dl)); 2028 } else { 2029 // Store relative to framepointer. 2030 MemOpChains2.push_back( 2031 DAG.getStore(ArgChain, dl, Arg, FIN, 2032 PseudoSourceValue::getFixedStack(FI), 0, 2033 false, false, 0)); 2034 } 2035 } 2036 } 2037 2038 if (!MemOpChains2.empty()) 2039 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2040 &MemOpChains2[0], MemOpChains2.size()); 2041 2042 // Copy arguments to their registers. 2043 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 2044 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 2045 RegsToPass[i].second, InFlag); 2046 InFlag = Chain.getValue(1); 2047 } 2048 InFlag =SDValue(); 2049 2050 // Store the return address to the appropriate stack slot. 2051 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, Is64Bit, 2052 FPDiff, dl); 2053 } 2054 2055 bool WasGlobalOrExternal = false; 2056 if (getTargetMachine().getCodeModel() == CodeModel::Large) { 2057 assert(Is64Bit && "Large code model is only legal in 64-bit mode."); 2058 // In the 64-bit large code model, we have to make all calls 2059 // through a register, since the call instruction's 32-bit 2060 // pc-relative offset may not be large enough to hold the whole 2061 // address. 2062 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2063 WasGlobalOrExternal = true; 2064 // If the callee is a GlobalAddress node (quite common, every direct call 2065 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack 2066 // it. 2067 2068 // We should use extra load for direct calls to dllimported functions in 2069 // non-JIT mode. 2070 const GlobalValue *GV = G->getGlobal(); 2071 if (!GV->hasDLLImportLinkage()) { 2072 unsigned char OpFlags = 0; 2073 2074 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 2075 // external symbols most go through the PLT in PIC mode. If the symbol 2076 // has hidden or protected visibility, or if it is static or local, then 2077 // we don't need to use the PLT - we can directly call it. 2078 if (Subtarget->isTargetELF() && 2079 getTargetMachine().getRelocationModel() == Reloc::PIC_ && 2080 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 2081 OpFlags = X86II::MO_PLT; 2082 } else if (Subtarget->isPICStyleStubAny() && 2083 (GV->isDeclaration() || GV->isWeakForLinker()) && 2084 Subtarget->getDarwinVers() < 9) { 2085 // PC-relative references to external symbols should go through $stub, 2086 // unless we're building with the leopard linker or later, which 2087 // automatically synthesizes these stubs. 2088 OpFlags = X86II::MO_DARWIN_STUB; 2089 } 2090 2091 Callee = DAG.getTargetGlobalAddress(GV, getPointerTy(), 2092 G->getOffset(), OpFlags); 2093 } 2094 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2095 WasGlobalOrExternal = true; 2096 unsigned char OpFlags = 0; 2097 2098 // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external 2099 // symbols should go through the PLT. 2100 if (Subtarget->isTargetELF() && 2101 getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2102 OpFlags = X86II::MO_PLT; 2103 } else if (Subtarget->isPICStyleStubAny() && 2104 Subtarget->getDarwinVers() < 9) { 2105 // PC-relative references to external symbols should go through $stub, 2106 // unless we're building with the leopard linker or later, which 2107 // automatically synthesizes these stubs. 2108 OpFlags = X86II::MO_DARWIN_STUB; 2109 } 2110 2111 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), 2112 OpFlags); 2113 } 2114 2115 // Returns a chain & a flag for retval copy to use. 2116 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2117 SmallVector<SDValue, 8> Ops; 2118 2119 if (!IsSibcall && isTailCall) { 2120 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 2121 DAG.getIntPtrConstant(0, true), InFlag); 2122 InFlag = Chain.getValue(1); 2123 } 2124 2125 Ops.push_back(Chain); 2126 Ops.push_back(Callee); 2127 2128 if (isTailCall) 2129 Ops.push_back(DAG.getConstant(FPDiff, MVT::i32)); 2130 2131 // Add argument registers to the end of the list so that they are known live 2132 // into the call. 2133 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 2134 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 2135 RegsToPass[i].second.getValueType())); 2136 2137 // Add an implicit use GOT pointer in EBX. 2138 if (!isTailCall && Subtarget->isPICStyleGOT()) 2139 Ops.push_back(DAG.getRegister(X86::EBX, getPointerTy())); 2140 2141 // Add an implicit use of AL for x86 vararg functions. 2142 if (Is64Bit && isVarArg) 2143 Ops.push_back(DAG.getRegister(X86::AL, MVT::i8)); 2144 2145 if (InFlag.getNode()) 2146 Ops.push_back(InFlag); 2147 2148 if (isTailCall) { 2149 // If this is the first return lowered for this function, add the regs 2150 // to the liveout set for the function. 2151 if (MF.getRegInfo().liveout_empty()) { 2152 SmallVector<CCValAssign, 16> RVLocs; 2153 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 2154 *DAG.getContext()); 2155 CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 2156 for (unsigned i = 0; i != RVLocs.size(); ++i) 2157 if (RVLocs[i].isRegLoc()) 2158 MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 2159 } 2160 return DAG.getNode(X86ISD::TC_RETURN, dl, 2161 NodeTys, &Ops[0], Ops.size()); 2162 } 2163 2164 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); 2165 InFlag = Chain.getValue(1); 2166 2167 // Create the CALLSEQ_END node. 2168 unsigned NumBytesForCalleeToPush; 2169 if (IsCalleePop(isVarArg, CallConv)) 2170 NumBytesForCalleeToPush = NumBytes; // Callee pops everything 2171 else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet) 2172 // If this is a call to a struct-return function, the callee 2173 // pops the hidden struct pointer, so we have to push it back. 2174 // This is common for Darwin/X86, Linux & Mingw32 targets. 2175 NumBytesForCalleeToPush = 4; 2176 else 2177 NumBytesForCalleeToPush = 0; // Callee pops nothing. 2178 2179 // Returns a flag for retval copy to use. 2180 if (!IsSibcall) { 2181 Chain = DAG.getCALLSEQ_END(Chain, 2182 DAG.getIntPtrConstant(NumBytes, true), 2183 DAG.getIntPtrConstant(NumBytesForCalleeToPush, 2184 true), 2185 InFlag); 2186 InFlag = Chain.getValue(1); 2187 } 2188 2189 // Handle result values, copying them out of physregs into vregs that we 2190 // return. 2191 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 2192 Ins, dl, DAG, InVals); 2193} 2194 2195 2196//===----------------------------------------------------------------------===// 2197// Fast Calling Convention (tail call) implementation 2198//===----------------------------------------------------------------------===// 2199 2200// Like std call, callee cleans arguments, convention except that ECX is 2201// reserved for storing the tail called function address. Only 2 registers are 2202// free for argument passing (inreg). Tail call optimization is performed 2203// provided: 2204// * tailcallopt is enabled 2205// * caller/callee are fastcc 2206// On X86_64 architecture with GOT-style position independent code only local 2207// (within module) calls are supported at the moment. 2208// To keep the stack aligned according to platform abi the function 2209// GetAlignedArgumentStackSize ensures that argument delta is always multiples 2210// of stack alignment. (Dynamic linkers need this - darwin's dyld for example) 2211// If a tail called function callee has more arguments than the caller the 2212// caller needs to make sure that there is room to move the RETADDR to. This is 2213// achieved by reserving an area the size of the argument delta right after the 2214// original REtADDR, but before the saved framepointer or the spilled registers 2215// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4) 2216// stack layout: 2217// arg1 2218// arg2 2219// RETADDR 2220// [ new RETADDR 2221// move area ] 2222// (possible EBP) 2223// ESI 2224// EDI 2225// local1 .. 2226 2227/// GetAlignedArgumentStackSize - Make the stack size align e.g 16n + 12 aligned 2228/// for a 16 byte align requirement. 2229unsigned X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize, 2230 SelectionDAG& DAG) { 2231 MachineFunction &MF = DAG.getMachineFunction(); 2232 const TargetMachine &TM = MF.getTarget(); 2233 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 2234 unsigned StackAlignment = TFI.getStackAlignment(); 2235 uint64_t AlignMask = StackAlignment - 1; 2236 int64_t Offset = StackSize; 2237 uint64_t SlotSize = TD->getPointerSize(); 2238 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) { 2239 // Number smaller than 12 so just add the difference. 2240 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask)); 2241 } else { 2242 // Mask out lower bits, add stackalignment once plus the 12 bytes. 2243 Offset = ((~AlignMask) & Offset) + StackAlignment + 2244 (StackAlignment-SlotSize); 2245 } 2246 return Offset; 2247} 2248 2249/// MatchingStackOffset - Return true if the given stack call argument is 2250/// already available in the same position (relatively) of the caller's 2251/// incoming argument stack. 2252static 2253bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 2254 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 2255 const X86InstrInfo *TII) { 2256 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 2257 int FI = INT_MAX; 2258 if (Arg.getOpcode() == ISD::CopyFromReg) { 2259 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 2260 if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) 2261 return false; 2262 MachineInstr *Def = MRI->getVRegDef(VR); 2263 if (!Def) 2264 return false; 2265 if (!Flags.isByVal()) { 2266 if (!TII->isLoadFromStackSlot(Def, FI)) 2267 return false; 2268 } else { 2269 unsigned Opcode = Def->getOpcode(); 2270 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) && 2271 Def->getOperand(1).isFI()) { 2272 FI = Def->getOperand(1).getIndex(); 2273 Bytes = Flags.getByValSize(); 2274 } else 2275 return false; 2276 } 2277 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 2278 if (Flags.isByVal()) 2279 // ByVal argument is passed in as a pointer but it's now being 2280 // dereferenced. e.g. 2281 // define @foo(%struct.X* %A) { 2282 // tail call @bar(%struct.X* byval %A) 2283 // } 2284 return false; 2285 SDValue Ptr = Ld->getBasePtr(); 2286 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 2287 if (!FINode) 2288 return false; 2289 FI = FINode->getIndex(); 2290 } else 2291 return false; 2292 2293 assert(FI != INT_MAX); 2294 if (!MFI->isFixedObjectIndex(FI)) 2295 return false; 2296 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 2297} 2298 2299/// IsEligibleForTailCallOptimization - Check whether the call is eligible 2300/// for tail call optimization. Targets which want to do tail call 2301/// optimization should implement this function. 2302bool 2303X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 2304 CallingConv::ID CalleeCC, 2305 bool isVarArg, 2306 bool isCalleeStructRet, 2307 bool isCallerStructRet, 2308 const SmallVectorImpl<ISD::OutputArg> &Outs, 2309 const SmallVectorImpl<ISD::InputArg> &Ins, 2310 SelectionDAG& DAG) const { 2311 if (!IsTailCallConvention(CalleeCC) && 2312 CalleeCC != CallingConv::C) 2313 return false; 2314 2315 // If -tailcallopt is specified, make fastcc functions tail-callable. 2316 const MachineFunction &MF = DAG.getMachineFunction(); 2317 const Function *CallerF = DAG.getMachineFunction().getFunction(); 2318 if (GuaranteedTailCallOpt) { 2319 if (IsTailCallConvention(CalleeCC) && 2320 CallerF->getCallingConv() == CalleeCC) 2321 return true; 2322 return false; 2323 } 2324 2325 // Look for obvious safe cases to perform tail call optimization that does not 2326 // requite ABI changes. This is what gcc calls sibcall. 2327 2328 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to 2329 // emit a special epilogue. 2330 if (RegInfo->needsStackRealignment(MF)) 2331 return false; 2332 2333 // Do not sibcall optimize vararg calls unless the call site is not passing any 2334 // arguments. 2335 if (isVarArg && !Outs.empty()) 2336 return false; 2337 2338 // Also avoid sibcall optimization if either caller or callee uses struct 2339 // return semantics. 2340 if (isCalleeStructRet || isCallerStructRet) 2341 return false; 2342 2343 // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack. 2344 // Therefore if it's not used by the call it is not safe to optimize this into 2345 // a sibcall. 2346 bool Unused = false; 2347 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 2348 if (!Ins[i].Used) { 2349 Unused = true; 2350 break; 2351 } 2352 } 2353 if (Unused) { 2354 SmallVector<CCValAssign, 16> RVLocs; 2355 CCState CCInfo(CalleeCC, false, getTargetMachine(), 2356 RVLocs, *DAG.getContext()); 2357 CCInfo.AnalyzeCallResult(Ins, RetCC_X86); 2358 for (unsigned i = 0; i != RVLocs.size(); ++i) { 2359 CCValAssign &VA = RVLocs[i]; 2360 if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) 2361 return false; 2362 } 2363 } 2364 2365 // If the callee takes no arguments then go on to check the results of the 2366 // call. 2367 if (!Outs.empty()) { 2368 // Check if stack adjustment is needed. For now, do not do this if any 2369 // argument is passed on the stack. 2370 SmallVector<CCValAssign, 16> ArgLocs; 2371 CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), 2372 ArgLocs, *DAG.getContext()); 2373 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC)); 2374 if (CCInfo.getNextStackOffset()) { 2375 MachineFunction &MF = DAG.getMachineFunction(); 2376 if (MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) 2377 return false; 2378 if (Subtarget->isTargetWin64()) 2379 // Win64 ABI has additional complications. 2380 return false; 2381 2382 // Check if the arguments are already laid out in the right way as 2383 // the caller's fixed stack objects. 2384 MachineFrameInfo *MFI = MF.getFrameInfo(); 2385 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 2386 const X86InstrInfo *TII = 2387 ((X86TargetMachine&)getTargetMachine()).getInstrInfo(); 2388 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2389 CCValAssign &VA = ArgLocs[i]; 2390 EVT RegVT = VA.getLocVT(); 2391 SDValue Arg = Outs[i].Val; 2392 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2393 if (VA.getLocInfo() == CCValAssign::Indirect) 2394 return false; 2395 if (!VA.isRegLoc()) { 2396 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 2397 MFI, MRI, TII)) 2398 return false; 2399 } 2400 } 2401 } 2402 } 2403 2404 return true; 2405} 2406 2407FastISel * 2408X86TargetLowering::createFastISel(MachineFunction &mf, 2409 DenseMap<const Value *, unsigned> &vm, 2410 DenseMap<const BasicBlock*, MachineBasicBlock*> &bm, 2411 DenseMap<const AllocaInst *, int> &am 2412#ifndef NDEBUG 2413 , SmallSet<const Instruction *, 8> &cil 2414#endif 2415 ) { 2416 return X86::createFastISel(mf, vm, bm, am 2417#ifndef NDEBUG 2418 , cil 2419#endif 2420 ); 2421} 2422 2423 2424//===----------------------------------------------------------------------===// 2425// Other Lowering Hooks 2426//===----------------------------------------------------------------------===// 2427 2428 2429SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { 2430 MachineFunction &MF = DAG.getMachineFunction(); 2431 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); 2432 int ReturnAddrIndex = FuncInfo->getRAIndex(); 2433 2434 if (ReturnAddrIndex == 0) { 2435 // Set up a frame object for the return address. 2436 uint64_t SlotSize = TD->getPointerSize(); 2437 ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, 2438 false, false); 2439 FuncInfo->setRAIndex(ReturnAddrIndex); 2440 } 2441 2442 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy()); 2443} 2444 2445 2446bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 2447 bool hasSymbolicDisplacement) { 2448 // Offset should fit into 32 bit immediate field. 2449 if (!isInt<32>(Offset)) 2450 return false; 2451 2452 // If we don't have a symbolic displacement - we don't have any extra 2453 // restrictions. 2454 if (!hasSymbolicDisplacement) 2455 return true; 2456 2457 // FIXME: Some tweaks might be needed for medium code model. 2458 if (M != CodeModel::Small && M != CodeModel::Kernel) 2459 return false; 2460 2461 // For small code model we assume that latest object is 16MB before end of 31 2462 // bits boundary. We may also accept pretty large negative constants knowing 2463 // that all objects are in the positive half of address space. 2464 if (M == CodeModel::Small && Offset < 16*1024*1024) 2465 return true; 2466 2467 // For kernel code model we know that all object resist in the negative half 2468 // of 32bits address space. We may not accept negative offsets, since they may 2469 // be just off and we may accept pretty large positive ones. 2470 if (M == CodeModel::Kernel && Offset > 0) 2471 return true; 2472 2473 return false; 2474} 2475 2476/// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86 2477/// specific condition code, returning the condition code and the LHS/RHS of the 2478/// comparison to make. 2479static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, bool isFP, 2480 SDValue &LHS, SDValue &RHS, SelectionDAG &DAG) { 2481 if (!isFP) { 2482 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 2483 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { 2484 // X > -1 -> X == 0, jump !sign. 2485 RHS = DAG.getConstant(0, RHS.getValueType()); 2486 return X86::COND_NS; 2487 } else if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { 2488 // X < 0 -> X == 0, jump on sign. 2489 return X86::COND_S; 2490 } else if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) { 2491 // X < 1 -> X <= 0 2492 RHS = DAG.getConstant(0, RHS.getValueType()); 2493 return X86::COND_LE; 2494 } 2495 } 2496 2497 switch (SetCCOpcode) { 2498 default: llvm_unreachable("Invalid integer condition!"); 2499 case ISD::SETEQ: return X86::COND_E; 2500 case ISD::SETGT: return X86::COND_G; 2501 case ISD::SETGE: return X86::COND_GE; 2502 case ISD::SETLT: return X86::COND_L; 2503 case ISD::SETLE: return X86::COND_LE; 2504 case ISD::SETNE: return X86::COND_NE; 2505 case ISD::SETULT: return X86::COND_B; 2506 case ISD::SETUGT: return X86::COND_A; 2507 case ISD::SETULE: return X86::COND_BE; 2508 case ISD::SETUGE: return X86::COND_AE; 2509 } 2510 } 2511 2512 // First determine if it is required or is profitable to flip the operands. 2513 2514 // If LHS is a foldable load, but RHS is not, flip the condition. 2515 if ((ISD::isNON_EXTLoad(LHS.getNode()) && LHS.hasOneUse()) && 2516 !(ISD::isNON_EXTLoad(RHS.getNode()) && RHS.hasOneUse())) { 2517 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); 2518 std::swap(LHS, RHS); 2519 } 2520 2521 switch (SetCCOpcode) { 2522 default: break; 2523 case ISD::SETOLT: 2524 case ISD::SETOLE: 2525 case ISD::SETUGT: 2526 case ISD::SETUGE: 2527 std::swap(LHS, RHS); 2528 break; 2529 } 2530 2531 // On a floating point condition, the flags are set as follows: 2532 // ZF PF CF op 2533 // 0 | 0 | 0 | X > Y 2534 // 0 | 0 | 1 | X < Y 2535 // 1 | 0 | 0 | X == Y 2536 // 1 | 1 | 1 | unordered 2537 switch (SetCCOpcode) { 2538 default: llvm_unreachable("Condcode should be pre-legalized away"); 2539 case ISD::SETUEQ: 2540 case ISD::SETEQ: return X86::COND_E; 2541 case ISD::SETOLT: // flipped 2542 case ISD::SETOGT: 2543 case ISD::SETGT: return X86::COND_A; 2544 case ISD::SETOLE: // flipped 2545 case ISD::SETOGE: 2546 case ISD::SETGE: return X86::COND_AE; 2547 case ISD::SETUGT: // flipped 2548 case ISD::SETULT: 2549 case ISD::SETLT: return X86::COND_B; 2550 case ISD::SETUGE: // flipped 2551 case ISD::SETULE: 2552 case ISD::SETLE: return X86::COND_BE; 2553 case ISD::SETONE: 2554 case ISD::SETNE: return X86::COND_NE; 2555 case ISD::SETUO: return X86::COND_P; 2556 case ISD::SETO: return X86::COND_NP; 2557 case ISD::SETOEQ: 2558 case ISD::SETUNE: return X86::COND_INVALID; 2559 } 2560} 2561 2562/// hasFPCMov - is there a floating point cmov for the specific X86 condition 2563/// code. Current x86 isa includes the following FP cmov instructions: 2564/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu. 2565static bool hasFPCMov(unsigned X86CC) { 2566 switch (X86CC) { 2567 default: 2568 return false; 2569 case X86::COND_B: 2570 case X86::COND_BE: 2571 case X86::COND_E: 2572 case X86::COND_P: 2573 case X86::COND_A: 2574 case X86::COND_AE: 2575 case X86::COND_NE: 2576 case X86::COND_NP: 2577 return true; 2578 } 2579} 2580 2581/// isFPImmLegal - Returns true if the target can instruction select the 2582/// specified FP immediate natively. If false, the legalizer will 2583/// materialize the FP immediate as a load from a constant pool. 2584bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 2585 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) { 2586 if (Imm.bitwiseIsEqual(LegalFPImmediates[i])) 2587 return true; 2588 } 2589 return false; 2590} 2591 2592/// isUndefOrInRange - Return true if Val is undef or if its value falls within 2593/// the specified range (L, H]. 2594static bool isUndefOrInRange(int Val, int Low, int Hi) { 2595 return (Val < 0) || (Val >= Low && Val < Hi); 2596} 2597 2598/// isUndefOrEqual - Val is either less than zero (undef) or equal to the 2599/// specified value. 2600static bool isUndefOrEqual(int Val, int CmpVal) { 2601 if (Val < 0 || Val == CmpVal) 2602 return true; 2603 return false; 2604} 2605 2606/// isPSHUFDMask - Return true if the node specifies a shuffle of elements that 2607/// is suitable for input to PSHUFD or PSHUFW. That is, it doesn't reference 2608/// the second operand. 2609static bool isPSHUFDMask(const SmallVectorImpl<int> &Mask, EVT VT) { 2610 if (VT == MVT::v4f32 || VT == MVT::v4i32 || VT == MVT::v4i16) 2611 return (Mask[0] < 4 && Mask[1] < 4 && Mask[2] < 4 && Mask[3] < 4); 2612 if (VT == MVT::v2f64 || VT == MVT::v2i64) 2613 return (Mask[0] < 2 && Mask[1] < 2); 2614 return false; 2615} 2616 2617bool X86::isPSHUFDMask(ShuffleVectorSDNode *N) { 2618 SmallVector<int, 8> M; 2619 N->getMask(M); 2620 return ::isPSHUFDMask(M, N->getValueType(0)); 2621} 2622 2623/// isPSHUFHWMask - Return true if the node specifies a shuffle of elements that 2624/// is suitable for input to PSHUFHW. 2625static bool isPSHUFHWMask(const SmallVectorImpl<int> &Mask, EVT VT) { 2626 if (VT != MVT::v8i16) 2627 return false; 2628 2629 // Lower quadword copied in order or undef. 2630 for (int i = 0; i != 4; ++i) 2631 if (Mask[i] >= 0 && Mask[i] != i) 2632 return false; 2633 2634 // Upper quadword shuffled. 2635 for (int i = 4; i != 8; ++i) 2636 if (Mask[i] >= 0 && (Mask[i] < 4 || Mask[i] > 7)) 2637 return false; 2638 2639 return true; 2640} 2641 2642bool X86::isPSHUFHWMask(ShuffleVectorSDNode *N) { 2643 SmallVector<int, 8> M; 2644 N->getMask(M); 2645 return ::isPSHUFHWMask(M, N->getValueType(0)); 2646} 2647 2648/// isPSHUFLWMask - Return true if the node specifies a shuffle of elements that 2649/// is suitable for input to PSHUFLW. 2650static bool isPSHUFLWMask(const SmallVectorImpl<int> &Mask, EVT VT) { 2651 if (VT != MVT::v8i16) 2652 return false; 2653 2654 // Upper quadword copied in order. 2655 for (int i = 4; i != 8; ++i) 2656 if (Mask[i] >= 0 && Mask[i] != i) 2657 return false; 2658 2659 // Lower quadword shuffled. 2660 for (int i = 0; i != 4; ++i) 2661 if (Mask[i] >= 4) 2662 return false; 2663 2664 return true; 2665} 2666 2667bool X86::isPSHUFLWMask(ShuffleVectorSDNode *N) { 2668 SmallVector<int, 8> M; 2669 N->getMask(M); 2670 return ::isPSHUFLWMask(M, N->getValueType(0)); 2671} 2672 2673/// isPALIGNRMask - Return true if the node specifies a shuffle of elements that 2674/// is suitable for input to PALIGNR. 2675static bool isPALIGNRMask(const SmallVectorImpl<int> &Mask, EVT VT, 2676 bool hasSSSE3) { 2677 int i, e = VT.getVectorNumElements(); 2678 2679 // Do not handle v2i64 / v2f64 shuffles with palignr. 2680 if (e < 4 || !hasSSSE3) 2681 return false; 2682 2683 for (i = 0; i != e; ++i) 2684 if (Mask[i] >= 0) 2685 break; 2686 2687 // All undef, not a palignr. 2688 if (i == e) 2689 return false; 2690 2691 // Determine if it's ok to perform a palignr with only the LHS, since we 2692 // don't have access to the actual shuffle elements to see if RHS is undef. 2693 bool Unary = Mask[i] < (int)e; 2694 bool NeedsUnary = false; 2695 2696 int s = Mask[i] - i; 2697 2698 // Check the rest of the elements to see if they are consecutive. 2699 for (++i; i != e; ++i) { 2700 int m = Mask[i]; 2701 if (m < 0) 2702 continue; 2703 2704 Unary = Unary && (m < (int)e); 2705 NeedsUnary = NeedsUnary || (m < s); 2706 2707 if (NeedsUnary && !Unary) 2708 return false; 2709 if (Unary && m != ((s+i) & (e-1))) 2710 return false; 2711 if (!Unary && m != (s+i)) 2712 return false; 2713 } 2714 return true; 2715} 2716 2717bool X86::isPALIGNRMask(ShuffleVectorSDNode *N) { 2718 SmallVector<int, 8> M; 2719 N->getMask(M); 2720 return ::isPALIGNRMask(M, N->getValueType(0), true); 2721} 2722 2723/// isSHUFPMask - Return true if the specified VECTOR_SHUFFLE operand 2724/// specifies a shuffle of elements that is suitable for input to SHUFP*. 2725static bool isSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { 2726 int NumElems = VT.getVectorNumElements(); 2727 if (NumElems != 2 && NumElems != 4) 2728 return false; 2729 2730 int Half = NumElems / 2; 2731 for (int i = 0; i < Half; ++i) 2732 if (!isUndefOrInRange(Mask[i], 0, NumElems)) 2733 return false; 2734 for (int i = Half; i < NumElems; ++i) 2735 if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) 2736 return false; 2737 2738 return true; 2739} 2740 2741bool X86::isSHUFPMask(ShuffleVectorSDNode *N) { 2742 SmallVector<int, 8> M; 2743 N->getMask(M); 2744 return ::isSHUFPMask(M, N->getValueType(0)); 2745} 2746 2747/// isCommutedSHUFP - Returns true if the shuffle mask is exactly 2748/// the reverse of what x86 shuffles want. x86 shuffles requires the lower 2749/// half elements to come from vector 1 (which would equal the dest.) and 2750/// the upper half to come from vector 2. 2751static bool isCommutedSHUFPMask(const SmallVectorImpl<int> &Mask, EVT VT) { 2752 int NumElems = VT.getVectorNumElements(); 2753 2754 if (NumElems != 2 && NumElems != 4) 2755 return false; 2756 2757 int Half = NumElems / 2; 2758 for (int i = 0; i < Half; ++i) 2759 if (!isUndefOrInRange(Mask[i], NumElems, NumElems*2)) 2760 return false; 2761 for (int i = Half; i < NumElems; ++i) 2762 if (!isUndefOrInRange(Mask[i], 0, NumElems)) 2763 return false; 2764 return true; 2765} 2766 2767static bool isCommutedSHUFP(ShuffleVectorSDNode *N) { 2768 SmallVector<int, 8> M; 2769 N->getMask(M); 2770 return isCommutedSHUFPMask(M, N->getValueType(0)); 2771} 2772 2773/// isMOVHLPSMask - Return true if the specified VECTOR_SHUFFLE operand 2774/// specifies a shuffle of elements that is suitable for input to MOVHLPS. 2775bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { 2776 if (N->getValueType(0).getVectorNumElements() != 4) 2777 return false; 2778 2779 // Expect bit0 == 6, bit1 == 7, bit2 == 2, bit3 == 3 2780 return isUndefOrEqual(N->getMaskElt(0), 6) && 2781 isUndefOrEqual(N->getMaskElt(1), 7) && 2782 isUndefOrEqual(N->getMaskElt(2), 2) && 2783 isUndefOrEqual(N->getMaskElt(3), 3); 2784} 2785 2786/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form 2787/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, 2788/// <2, 3, 2, 3> 2789bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { 2790 unsigned NumElems = N->getValueType(0).getVectorNumElements(); 2791 2792 if (NumElems != 4) 2793 return false; 2794 2795 return isUndefOrEqual(N->getMaskElt(0), 2) && 2796 isUndefOrEqual(N->getMaskElt(1), 3) && 2797 isUndefOrEqual(N->getMaskElt(2), 2) && 2798 isUndefOrEqual(N->getMaskElt(3), 3); 2799} 2800 2801/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand 2802/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. 2803bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { 2804 unsigned NumElems = N->getValueType(0).getVectorNumElements(); 2805 2806 if (NumElems != 2 && NumElems != 4) 2807 return false; 2808 2809 for (unsigned i = 0; i < NumElems/2; ++i) 2810 if (!isUndefOrEqual(N->getMaskElt(i), i + NumElems)) 2811 return false; 2812 2813 for (unsigned i = NumElems/2; i < NumElems; ++i) 2814 if (!isUndefOrEqual(N->getMaskElt(i), i)) 2815 return false; 2816 2817 return true; 2818} 2819 2820/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand 2821/// specifies a shuffle of elements that is suitable for input to MOVLHPS. 2822bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { 2823 unsigned NumElems = N->getValueType(0).getVectorNumElements(); 2824 2825 if (NumElems != 2 && NumElems != 4) 2826 return false; 2827 2828 for (unsigned i = 0; i < NumElems/2; ++i) 2829 if (!isUndefOrEqual(N->getMaskElt(i), i)) 2830 return false; 2831 2832 for (unsigned i = 0; i < NumElems/2; ++i) 2833 if (!isUndefOrEqual(N->getMaskElt(i + NumElems/2), i + NumElems)) 2834 return false; 2835 2836 return true; 2837} 2838 2839/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand 2840/// specifies a shuffle of elements that is suitable for input to UNPCKL. 2841static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, 2842 bool V2IsSplat = false) { 2843 int NumElts = VT.getVectorNumElements(); 2844 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2845 return false; 2846 2847 for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { 2848 int BitI = Mask[i]; 2849 int BitI1 = Mask[i+1]; 2850 if (!isUndefOrEqual(BitI, j)) 2851 return false; 2852 if (V2IsSplat) { 2853 if (!isUndefOrEqual(BitI1, NumElts)) 2854 return false; 2855 } else { 2856 if (!isUndefOrEqual(BitI1, j + NumElts)) 2857 return false; 2858 } 2859 } 2860 return true; 2861} 2862 2863bool X86::isUNPCKLMask(ShuffleVectorSDNode *N, bool V2IsSplat) { 2864 SmallVector<int, 8> M; 2865 N->getMask(M); 2866 return ::isUNPCKLMask(M, N->getValueType(0), V2IsSplat); 2867} 2868 2869/// isUNPCKHMask - Return true if the specified VECTOR_SHUFFLE operand 2870/// specifies a shuffle of elements that is suitable for input to UNPCKH. 2871static bool isUNPCKHMask(const SmallVectorImpl<int> &Mask, EVT VT, 2872 bool V2IsSplat = false) { 2873 int NumElts = VT.getVectorNumElements(); 2874 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16) 2875 return false; 2876 2877 for (int i = 0, j = 0; i != NumElts; i += 2, ++j) { 2878 int BitI = Mask[i]; 2879 int BitI1 = Mask[i+1]; 2880 if (!isUndefOrEqual(BitI, j + NumElts/2)) 2881 return false; 2882 if (V2IsSplat) { 2883 if (isUndefOrEqual(BitI1, NumElts)) 2884 return false; 2885 } else { 2886 if (!isUndefOrEqual(BitI1, j + NumElts/2 + NumElts)) 2887 return false; 2888 } 2889 } 2890 return true; 2891} 2892 2893bool X86::isUNPCKHMask(ShuffleVectorSDNode *N, bool V2IsSplat) { 2894 SmallVector<int, 8> M; 2895 N->getMask(M); 2896 return ::isUNPCKHMask(M, N->getValueType(0), V2IsSplat); 2897} 2898 2899/// isUNPCKL_v_undef_Mask - Special case of isUNPCKLMask for canonical form 2900/// of vector_shuffle v, v, <0, 4, 1, 5>, i.e. vector_shuffle v, undef, 2901/// <0, 0, 1, 1> 2902static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { 2903 int NumElems = VT.getVectorNumElements(); 2904 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2905 return false; 2906 2907 for (int i = 0, j = 0; i != NumElems; i += 2, ++j) { 2908 int BitI = Mask[i]; 2909 int BitI1 = Mask[i+1]; 2910 if (!isUndefOrEqual(BitI, j)) 2911 return false; 2912 if (!isUndefOrEqual(BitI1, j)) 2913 return false; 2914 } 2915 return true; 2916} 2917 2918bool X86::isUNPCKL_v_undef_Mask(ShuffleVectorSDNode *N) { 2919 SmallVector<int, 8> M; 2920 N->getMask(M); 2921 return ::isUNPCKL_v_undef_Mask(M, N->getValueType(0)); 2922} 2923 2924/// isUNPCKH_v_undef_Mask - Special case of isUNPCKHMask for canonical form 2925/// of vector_shuffle v, v, <2, 6, 3, 7>, i.e. vector_shuffle v, undef, 2926/// <2, 2, 3, 3> 2927static bool isUNPCKH_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) { 2928 int NumElems = VT.getVectorNumElements(); 2929 if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16) 2930 return false; 2931 2932 for (int i = 0, j = NumElems / 2; i != NumElems; i += 2, ++j) { 2933 int BitI = Mask[i]; 2934 int BitI1 = Mask[i+1]; 2935 if (!isUndefOrEqual(BitI, j)) 2936 return false; 2937 if (!isUndefOrEqual(BitI1, j)) 2938 return false; 2939 } 2940 return true; 2941} 2942 2943bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { 2944 SmallVector<int, 8> M; 2945 N->getMask(M); 2946 return ::isUNPCKH_v_undef_Mask(M, N->getValueType(0)); 2947} 2948 2949/// isMOVLMask - Return true if the specified VECTOR_SHUFFLE operand 2950/// specifies a shuffle of elements that is suitable for input to MOVSS, 2951/// MOVSD, and MOVD, i.e. setting the lowest element. 2952static bool isMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT) { 2953 if (VT.getVectorElementType().getSizeInBits() < 32) 2954 return false; 2955 2956 int NumElts = VT.getVectorNumElements(); 2957 2958 if (!isUndefOrEqual(Mask[0], NumElts)) 2959 return false; 2960 2961 for (int i = 1; i < NumElts; ++i) 2962 if (!isUndefOrEqual(Mask[i], i)) 2963 return false; 2964 2965 return true; 2966} 2967 2968bool X86::isMOVLMask(ShuffleVectorSDNode *N) { 2969 SmallVector<int, 8> M; 2970 N->getMask(M); 2971 return ::isMOVLMask(M, N->getValueType(0)); 2972} 2973 2974/// isCommutedMOVL - Returns true if the shuffle mask is except the reverse 2975/// of what x86 movss want. X86 movs requires the lowest element to be lowest 2976/// element of vector 2 and the other elements to come from vector 1 in order. 2977static bool isCommutedMOVLMask(const SmallVectorImpl<int> &Mask, EVT VT, 2978 bool V2IsSplat = false, bool V2IsUndef = false) { 2979 int NumOps = VT.getVectorNumElements(); 2980 if (NumOps != 2 && NumOps != 4 && NumOps != 8 && NumOps != 16) 2981 return false; 2982 2983 if (!isUndefOrEqual(Mask[0], 0)) 2984 return false; 2985 2986 for (int i = 1; i < NumOps; ++i) 2987 if (!(isUndefOrEqual(Mask[i], i+NumOps) || 2988 (V2IsUndef && isUndefOrInRange(Mask[i], NumOps, NumOps*2)) || 2989 (V2IsSplat && isUndefOrEqual(Mask[i], NumOps)))) 2990 return false; 2991 2992 return true; 2993} 2994 2995static bool isCommutedMOVL(ShuffleVectorSDNode *N, bool V2IsSplat = false, 2996 bool V2IsUndef = false) { 2997 SmallVector<int, 8> M; 2998 N->getMask(M); 2999 return isCommutedMOVLMask(M, N->getValueType(0), V2IsSplat, V2IsUndef); 3000} 3001 3002/// isMOVSHDUPMask - Return true if the specified VECTOR_SHUFFLE operand 3003/// specifies a shuffle of elements that is suitable for input to MOVSHDUP. 3004bool X86::isMOVSHDUPMask(ShuffleVectorSDNode *N) { 3005 if (N->getValueType(0).getVectorNumElements() != 4) 3006 return false; 3007 3008 // Expect 1, 1, 3, 3 3009 for (unsigned i = 0; i < 2; ++i) { 3010 int Elt = N->getMaskElt(i); 3011 if (Elt >= 0 && Elt != 1) 3012 return false; 3013 } 3014 3015 bool HasHi = false; 3016 for (unsigned i = 2; i < 4; ++i) { 3017 int Elt = N->getMaskElt(i); 3018 if (Elt >= 0 && Elt != 3) 3019 return false; 3020 if (Elt == 3) 3021 HasHi = true; 3022 } 3023 // Don't use movshdup if it can be done with a shufps. 3024 // FIXME: verify that matching u, u, 3, 3 is what we want. 3025 return HasHi; 3026} 3027 3028/// isMOVSLDUPMask - Return true if the specified VECTOR_SHUFFLE operand 3029/// specifies a shuffle of elements that is suitable for input to MOVSLDUP. 3030bool X86::isMOVSLDUPMask(ShuffleVectorSDNode *N) { 3031 if (N->getValueType(0).getVectorNumElements() != 4) 3032 return false; 3033 3034 // Expect 0, 0, 2, 2 3035 for (unsigned i = 0; i < 2; ++i) 3036 if (N->getMaskElt(i) > 0) 3037 return false; 3038 3039 bool HasHi = false; 3040 for (unsigned i = 2; i < 4; ++i) { 3041 int Elt = N->getMaskElt(i); 3042 if (Elt >= 0 && Elt != 2) 3043 return false; 3044 if (Elt == 2) 3045 HasHi = true; 3046 } 3047 // Don't use movsldup if it can be done with a shufps. 3048 return HasHi; 3049} 3050 3051/// isMOVDDUPMask - Return true if the specified VECTOR_SHUFFLE operand 3052/// specifies a shuffle of elements that is suitable for input to MOVDDUP. 3053bool X86::isMOVDDUPMask(ShuffleVectorSDNode *N) { 3054 int e = N->getValueType(0).getVectorNumElements() / 2; 3055 3056 for (int i = 0; i < e; ++i) 3057 if (!isUndefOrEqual(N->getMaskElt(i), i)) 3058 return false; 3059 for (int i = 0; i < e; ++i) 3060 if (!isUndefOrEqual(N->getMaskElt(e+i), i)) 3061 return false; 3062 return true; 3063} 3064 3065/// getShuffleSHUFImmediate - Return the appropriate immediate to shuffle 3066/// the specified VECTOR_SHUFFLE mask with PSHUF* and SHUFP* instructions. 3067unsigned X86::getShuffleSHUFImmediate(SDNode *N) { 3068 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 3069 int NumOperands = SVOp->getValueType(0).getVectorNumElements(); 3070 3071 unsigned Shift = (NumOperands == 4) ? 2 : 1; 3072 unsigned Mask = 0; 3073 for (int i = 0; i < NumOperands; ++i) { 3074 int Val = SVOp->getMaskElt(NumOperands-i-1); 3075 if (Val < 0) Val = 0; 3076 if (Val >= NumOperands) Val -= NumOperands; 3077 Mask |= Val; 3078 if (i != NumOperands - 1) 3079 Mask <<= Shift; 3080 } 3081 return Mask; 3082} 3083 3084/// getShufflePSHUFHWImmediate - Return the appropriate immediate to shuffle 3085/// the specified VECTOR_SHUFFLE mask with the PSHUFHW instruction. 3086unsigned X86::getShufflePSHUFHWImmediate(SDNode *N) { 3087 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 3088 unsigned Mask = 0; 3089 // 8 nodes, but we only care about the last 4. 3090 for (unsigned i = 7; i >= 4; --i) { 3091 int Val = SVOp->getMaskElt(i); 3092 if (Val >= 0) 3093 Mask |= (Val - 4); 3094 if (i != 4) 3095 Mask <<= 2; 3096 } 3097 return Mask; 3098} 3099 3100/// getShufflePSHUFLWImmediate - Return the appropriate immediate to shuffle 3101/// the specified VECTOR_SHUFFLE mask with the PSHUFLW instruction. 3102unsigned X86::getShufflePSHUFLWImmediate(SDNode *N) { 3103 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 3104 unsigned Mask = 0; 3105 // 8 nodes, but we only care about the first 4. 3106 for (int i = 3; i >= 0; --i) { 3107 int Val = SVOp->getMaskElt(i); 3108 if (Val >= 0) 3109 Mask |= Val; 3110 if (i != 0) 3111 Mask <<= 2; 3112 } 3113 return Mask; 3114} 3115 3116/// getShufflePALIGNRImmediate - Return the appropriate immediate to shuffle 3117/// the specified VECTOR_SHUFFLE mask with the PALIGNR instruction. 3118unsigned X86::getShufflePALIGNRImmediate(SDNode *N) { 3119 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 3120 EVT VVT = N->getValueType(0); 3121 unsigned EltSize = VVT.getVectorElementType().getSizeInBits() >> 3; 3122 int Val = 0; 3123 3124 unsigned i, e; 3125 for (i = 0, e = VVT.getVectorNumElements(); i != e; ++i) { 3126 Val = SVOp->getMaskElt(i); 3127 if (Val >= 0) 3128 break; 3129 } 3130 return (Val - i) * EltSize; 3131} 3132 3133/// isZeroNode - Returns true if Elt is a constant zero or a floating point 3134/// constant +0.0. 3135bool X86::isZeroNode(SDValue Elt) { 3136 return ((isa<ConstantSDNode>(Elt) && 3137 cast<ConstantSDNode>(Elt)->getZExtValue() == 0) || 3138 (isa<ConstantFPSDNode>(Elt) && 3139 cast<ConstantFPSDNode>(Elt)->getValueAPF().isPosZero())); 3140} 3141 3142/// CommuteVectorShuffle - Swap vector_shuffle operands as well as values in 3143/// their permute mask. 3144static SDValue CommuteVectorShuffle(ShuffleVectorSDNode *SVOp, 3145 SelectionDAG &DAG) { 3146 EVT VT = SVOp->getValueType(0); 3147 unsigned NumElems = VT.getVectorNumElements(); 3148 SmallVector<int, 8> MaskVec; 3149 3150 for (unsigned i = 0; i != NumElems; ++i) { 3151 int idx = SVOp->getMaskElt(i); 3152 if (idx < 0) 3153 MaskVec.push_back(idx); 3154 else if (idx < (int)NumElems) 3155 MaskVec.push_back(idx + NumElems); 3156 else 3157 MaskVec.push_back(idx - NumElems); 3158 } 3159 return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(1), 3160 SVOp->getOperand(0), &MaskVec[0]); 3161} 3162 3163/// CommuteVectorShuffleMask - Change values in a shuffle permute mask assuming 3164/// the two vector operands have swapped position. 3165static void CommuteVectorShuffleMask(SmallVectorImpl<int> &Mask, EVT VT) { 3166 unsigned NumElems = VT.getVectorNumElements(); 3167 for (unsigned i = 0; i != NumElems; ++i) { 3168 int idx = Mask[i]; 3169 if (idx < 0) 3170 continue; 3171 else if (idx < (int)NumElems) 3172 Mask[i] = idx + NumElems; 3173 else 3174 Mask[i] = idx - NumElems; 3175 } 3176} 3177 3178/// ShouldXformToMOVHLPS - Return true if the node should be transformed to 3179/// match movhlps. The lower half elements should come from upper half of 3180/// V1 (and in order), and the upper half elements should come from the upper 3181/// half of V2 (and in order). 3182static bool ShouldXformToMOVHLPS(ShuffleVectorSDNode *Op) { 3183 if (Op->getValueType(0).getVectorNumElements() != 4) 3184 return false; 3185 for (unsigned i = 0, e = 2; i != e; ++i) 3186 if (!isUndefOrEqual(Op->getMaskElt(i), i+2)) 3187 return false; 3188 for (unsigned i = 2; i != 4; ++i) 3189 if (!isUndefOrEqual(Op->getMaskElt(i), i+4)) 3190 return false; 3191 return true; 3192} 3193 3194/// isScalarLoadToVector - Returns true if the node is a scalar load that 3195/// is promoted to a vector. It also returns the LoadSDNode by reference if 3196/// required. 3197static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { 3198 if (N->getOpcode() != ISD::SCALAR_TO_VECTOR) 3199 return false; 3200 N = N->getOperand(0).getNode(); 3201 if (!ISD::isNON_EXTLoad(N)) 3202 return false; 3203 if (LD) 3204 *LD = cast<LoadSDNode>(N); 3205 return true; 3206} 3207 3208/// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to 3209/// match movlp{s|d}. The lower half elements should come from lower half of 3210/// V1 (and in order), and the upper half elements should come from the upper 3211/// half of V2 (and in order). And since V1 will become the source of the 3212/// MOVLP, it must be either a vector load or a scalar load to vector. 3213static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, 3214 ShuffleVectorSDNode *Op) { 3215 if (!ISD::isNON_EXTLoad(V1) && !isScalarLoadToVector(V1)) 3216 return false; 3217 // Is V2 is a vector load, don't do this transformation. We will try to use 3218 // load folding shufps op. 3219 if (ISD::isNON_EXTLoad(V2)) 3220 return false; 3221 3222 unsigned NumElems = Op->getValueType(0).getVectorNumElements(); 3223 3224 if (NumElems != 2 && NumElems != 4) 3225 return false; 3226 for (unsigned i = 0, e = NumElems/2; i != e; ++i) 3227 if (!isUndefOrEqual(Op->getMaskElt(i), i)) 3228 return false; 3229 for (unsigned i = NumElems/2; i != NumElems; ++i) 3230 if (!isUndefOrEqual(Op->getMaskElt(i), i+NumElems)) 3231 return false; 3232 return true; 3233} 3234 3235/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are 3236/// all the same. 3237static bool isSplatVector(SDNode *N) { 3238 if (N->getOpcode() != ISD::BUILD_VECTOR) 3239 return false; 3240 3241 SDValue SplatValue = N->getOperand(0); 3242 for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) 3243 if (N->getOperand(i) != SplatValue) 3244 return false; 3245 return true; 3246} 3247 3248/// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved 3249/// to an zero vector. 3250/// FIXME: move to dag combiner / method on ShuffleVectorSDNode 3251static bool isZeroShuffle(ShuffleVectorSDNode *N) { 3252 SDValue V1 = N->getOperand(0); 3253 SDValue V2 = N->getOperand(1); 3254 unsigned NumElems = N->getValueType(0).getVectorNumElements(); 3255 for (unsigned i = 0; i != NumElems; ++i) { 3256 int Idx = N->getMaskElt(i); 3257 if (Idx >= (int)NumElems) { 3258 unsigned Opc = V2.getOpcode(); 3259 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V2.getNode())) 3260 continue; 3261 if (Opc != ISD::BUILD_VECTOR || 3262 !X86::isZeroNode(V2.getOperand(Idx-NumElems))) 3263 return false; 3264 } else if (Idx >= 0) { 3265 unsigned Opc = V1.getOpcode(); 3266 if (Opc == ISD::UNDEF || ISD::isBuildVectorAllZeros(V1.getNode())) 3267 continue; 3268 if (Opc != ISD::BUILD_VECTOR || 3269 !X86::isZeroNode(V1.getOperand(Idx))) 3270 return false; 3271 } 3272 } 3273 return true; 3274} 3275 3276/// getZeroVector - Returns a vector of specified type with all zero elements. 3277/// 3278static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG, 3279 DebugLoc dl) { 3280 assert(VT.isVector() && "Expected a vector type"); 3281 3282 // Always build zero vectors as <4 x i32> or <2 x i32> bitcasted to their dest 3283 // type. This ensures they get CSE'd. 3284 SDValue Vec; 3285 if (VT.getSizeInBits() == 64) { // MMX 3286 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 3287 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); 3288 } else if (HasSSE2) { // SSE2 3289 SDValue Cst = DAG.getTargetConstant(0, MVT::i32); 3290 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); 3291 } else { // SSE1 3292 SDValue Cst = DAG.getTargetConstantFP(+0.0, MVT::f32); 3293 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4f32, Cst, Cst, Cst, Cst); 3294 } 3295 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); 3296} 3297 3298/// getOnesVector - Returns a vector of specified type with all bits set. 3299/// 3300static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3301 assert(VT.isVector() && "Expected a vector type"); 3302 3303 // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest 3304 // type. This ensures they get CSE'd. 3305 SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32); 3306 SDValue Vec; 3307 if (VT.getSizeInBits() == 64) // MMX 3308 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i32, Cst, Cst); 3309 else // SSE 3310 Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst); 3311 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec); 3312} 3313 3314 3315/// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements 3316/// that point to V2 points to its first element. 3317static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { 3318 EVT VT = SVOp->getValueType(0); 3319 unsigned NumElems = VT.getVectorNumElements(); 3320 3321 bool Changed = false; 3322 SmallVector<int, 8> MaskVec; 3323 SVOp->getMask(MaskVec); 3324 3325 for (unsigned i = 0; i != NumElems; ++i) { 3326 if (MaskVec[i] > (int)NumElems) { 3327 MaskVec[i] = NumElems; 3328 Changed = true; 3329 } 3330 } 3331 if (Changed) 3332 return DAG.getVectorShuffle(VT, SVOp->getDebugLoc(), SVOp->getOperand(0), 3333 SVOp->getOperand(1), &MaskVec[0]); 3334 return SDValue(SVOp, 0); 3335} 3336 3337/// getMOVLMask - Returns a vector_shuffle mask for an movs{s|d}, movd 3338/// operation of specified width. 3339static SDValue getMOVL(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, 3340 SDValue V2) { 3341 unsigned NumElems = VT.getVectorNumElements(); 3342 SmallVector<int, 8> Mask; 3343 Mask.push_back(NumElems); 3344 for (unsigned i = 1; i != NumElems; ++i) 3345 Mask.push_back(i); 3346 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); 3347} 3348 3349/// getUnpackl - Returns a vector_shuffle node for an unpackl operation. 3350static SDValue getUnpackl(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, 3351 SDValue V2) { 3352 unsigned NumElems = VT.getVectorNumElements(); 3353 SmallVector<int, 8> Mask; 3354 for (unsigned i = 0, e = NumElems/2; i != e; ++i) { 3355 Mask.push_back(i); 3356 Mask.push_back(i + NumElems); 3357 } 3358 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); 3359} 3360 3361/// getUnpackhMask - Returns a vector_shuffle node for an unpackh operation. 3362static SDValue getUnpackh(SelectionDAG &DAG, DebugLoc dl, EVT VT, SDValue V1, 3363 SDValue V2) { 3364 unsigned NumElems = VT.getVectorNumElements(); 3365 unsigned Half = NumElems/2; 3366 SmallVector<int, 8> Mask; 3367 for (unsigned i = 0; i != Half; ++i) { 3368 Mask.push_back(i + Half); 3369 Mask.push_back(i + NumElems + Half); 3370 } 3371 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask[0]); 3372} 3373 3374/// PromoteSplat - Promote a splat of v4f32, v8i16 or v16i8 to v4i32. 3375static SDValue PromoteSplat(ShuffleVectorSDNode *SV, SelectionDAG &DAG, 3376 bool HasSSE2) { 3377 if (SV->getValueType(0).getVectorNumElements() <= 4) 3378 return SDValue(SV, 0); 3379 3380 EVT PVT = MVT::v4f32; 3381 EVT VT = SV->getValueType(0); 3382 DebugLoc dl = SV->getDebugLoc(); 3383 SDValue V1 = SV->getOperand(0); 3384 int NumElems = VT.getVectorNumElements(); 3385 int EltNo = SV->getSplatIndex(); 3386 3387 // unpack elements to the correct location 3388 while (NumElems > 4) { 3389 if (EltNo < NumElems/2) { 3390 V1 = getUnpackl(DAG, dl, VT, V1, V1); 3391 } else { 3392 V1 = getUnpackh(DAG, dl, VT, V1, V1); 3393 EltNo -= NumElems/2; 3394 } 3395 NumElems >>= 1; 3396 } 3397 3398 // Perform the splat. 3399 int SplatMask[4] = { EltNo, EltNo, EltNo, EltNo }; 3400 V1 = DAG.getNode(ISD::BIT_CONVERT, dl, PVT, V1); 3401 V1 = DAG.getVectorShuffle(PVT, dl, V1, DAG.getUNDEF(PVT), &SplatMask[0]); 3402 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, V1); 3403} 3404 3405/// getShuffleVectorZeroOrUndef - Return a vector_shuffle of the specified 3406/// vector of zero or undef vector. This produces a shuffle where the low 3407/// element of V2 is swizzled into the zero/undef vector, landing at element 3408/// Idx. This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3). 3409static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx, 3410 bool isZero, bool HasSSE2, 3411 SelectionDAG &DAG) { 3412 EVT VT = V2.getValueType(); 3413 SDValue V1 = isZero 3414 ? getZeroVector(VT, HasSSE2, DAG, V2.getDebugLoc()) : DAG.getUNDEF(VT); 3415 unsigned NumElems = VT.getVectorNumElements(); 3416 SmallVector<int, 16> MaskVec; 3417 for (unsigned i = 0; i != NumElems; ++i) 3418 // If this is the insertion idx, put the low elt of V2 here. 3419 MaskVec.push_back(i == Idx ? NumElems : i); 3420 return DAG.getVectorShuffle(VT, V2.getDebugLoc(), V1, V2, &MaskVec[0]); 3421} 3422 3423/// getNumOfConsecutiveZeros - Return the number of elements in a result of 3424/// a shuffle that is zero. 3425static 3426unsigned getNumOfConsecutiveZeros(ShuffleVectorSDNode *SVOp, int NumElems, 3427 bool Low, SelectionDAG &DAG) { 3428 unsigned NumZeros = 0; 3429 for (int i = 0; i < NumElems; ++i) { 3430 unsigned Index = Low ? i : NumElems-i-1; 3431 int Idx = SVOp->getMaskElt(Index); 3432 if (Idx < 0) { 3433 ++NumZeros; 3434 continue; 3435 } 3436 SDValue Elt = DAG.getShuffleScalarElt(SVOp, Index); 3437 if (Elt.getNode() && X86::isZeroNode(Elt)) 3438 ++NumZeros; 3439 else 3440 break; 3441 } 3442 return NumZeros; 3443} 3444 3445/// isVectorShift - Returns true if the shuffle can be implemented as a 3446/// logical left or right shift of a vector. 3447/// FIXME: split into pslldqi, psrldqi, palignr variants. 3448static bool isVectorShift(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG, 3449 bool &isLeft, SDValue &ShVal, unsigned &ShAmt) { 3450 unsigned NumElems = SVOp->getValueType(0).getVectorNumElements(); 3451 3452 isLeft = true; 3453 unsigned NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, true, DAG); 3454 if (!NumZeros) { 3455 isLeft = false; 3456 NumZeros = getNumOfConsecutiveZeros(SVOp, NumElems, false, DAG); 3457 if (!NumZeros) 3458 return false; 3459 } 3460 bool SeenV1 = false; 3461 bool SeenV2 = false; 3462 for (unsigned i = NumZeros; i < NumElems; ++i) { 3463 unsigned Val = isLeft ? (i - NumZeros) : i; 3464 int Idx_ = SVOp->getMaskElt(isLeft ? i : (i - NumZeros)); 3465 if (Idx_ < 0) 3466 continue; 3467 unsigned Idx = (unsigned) Idx_; 3468 if (Idx < NumElems) 3469 SeenV1 = true; 3470 else { 3471 Idx -= NumElems; 3472 SeenV2 = true; 3473 } 3474 if (Idx != Val) 3475 return false; 3476 } 3477 if (SeenV1 && SeenV2) 3478 return false; 3479 3480 ShVal = SeenV1 ? SVOp->getOperand(0) : SVOp->getOperand(1); 3481 ShAmt = NumZeros; 3482 return true; 3483} 3484 3485 3486/// LowerBuildVectorv16i8 - Custom lower build_vector of v16i8. 3487/// 3488static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros, 3489 unsigned NumNonZero, unsigned NumZero, 3490 SelectionDAG &DAG, TargetLowering &TLI) { 3491 if (NumNonZero > 8) 3492 return SDValue(); 3493 3494 DebugLoc dl = Op.getDebugLoc(); 3495 SDValue V(0, 0); 3496 bool First = true; 3497 for (unsigned i = 0; i < 16; ++i) { 3498 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0; 3499 if (ThisIsNonZero && First) { 3500 if (NumZero) 3501 V = getZeroVector(MVT::v8i16, true, DAG, dl); 3502 else 3503 V = DAG.getUNDEF(MVT::v8i16); 3504 First = false; 3505 } 3506 3507 if ((i & 1) != 0) { 3508 SDValue ThisElt(0, 0), LastElt(0, 0); 3509 bool LastIsNonZero = (NonZeros & (1 << (i-1))) != 0; 3510 if (LastIsNonZero) { 3511 LastElt = DAG.getNode(ISD::ZERO_EXTEND, dl, 3512 MVT::i16, Op.getOperand(i-1)); 3513 } 3514 if (ThisIsNonZero) { 3515 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i)); 3516 ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, 3517 ThisElt, DAG.getConstant(8, MVT::i8)); 3518 if (LastIsNonZero) 3519 ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt); 3520 } else 3521 ThisElt = LastElt; 3522 3523 if (ThisElt.getNode()) 3524 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt, 3525 DAG.getIntPtrConstant(i/2)); 3526 } 3527 } 3528 3529 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V); 3530} 3531 3532/// LowerBuildVectorv8i16 - Custom lower build_vector of v8i16. 3533/// 3534static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros, 3535 unsigned NumNonZero, unsigned NumZero, 3536 SelectionDAG &DAG, TargetLowering &TLI) { 3537 if (NumNonZero > 4) 3538 return SDValue(); 3539 3540 DebugLoc dl = Op.getDebugLoc(); 3541 SDValue V(0, 0); 3542 bool First = true; 3543 for (unsigned i = 0; i < 8; ++i) { 3544 bool isNonZero = (NonZeros & (1 << i)) != 0; 3545 if (isNonZero) { 3546 if (First) { 3547 if (NumZero) 3548 V = getZeroVector(MVT::v8i16, true, DAG, dl); 3549 else 3550 V = DAG.getUNDEF(MVT::v8i16); 3551 First = false; 3552 } 3553 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, 3554 MVT::v8i16, V, Op.getOperand(i), 3555 DAG.getIntPtrConstant(i)); 3556 } 3557 } 3558 3559 return V; 3560} 3561 3562/// getVShift - Return a vector logical shift node. 3563/// 3564static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, 3565 unsigned NumBits, SelectionDAG &DAG, 3566 const TargetLowering &TLI, DebugLoc dl) { 3567 bool isMMX = VT.getSizeInBits() == 64; 3568 EVT ShVT = isMMX ? MVT::v1i64 : MVT::v2i64; 3569 unsigned Opc = isLeft ? X86ISD::VSHL : X86ISD::VSRL; 3570 SrcOp = DAG.getNode(ISD::BIT_CONVERT, dl, ShVT, SrcOp); 3571 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 3572 DAG.getNode(Opc, dl, ShVT, SrcOp, 3573 DAG.getConstant(NumBits, TLI.getShiftAmountTy()))); 3574} 3575 3576SDValue 3577X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, 3578 SelectionDAG &DAG) { 3579 3580 // Check if the scalar load can be widened into a vector load. And if 3581 // the address is "base + cst" see if the cst can be "absorbed" into 3582 // the shuffle mask. 3583 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) { 3584 SDValue Ptr = LD->getBasePtr(); 3585 if (!ISD::isNormalLoad(LD) || LD->isVolatile()) 3586 return SDValue(); 3587 EVT PVT = LD->getValueType(0); 3588 if (PVT != MVT::i32 && PVT != MVT::f32) 3589 return SDValue(); 3590 3591 int FI = -1; 3592 int64_t Offset = 0; 3593 if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { 3594 FI = FINode->getIndex(); 3595 Offset = 0; 3596 } else if (Ptr.getOpcode() == ISD::ADD && 3597 isa<ConstantSDNode>(Ptr.getOperand(1)) && 3598 isa<FrameIndexSDNode>(Ptr.getOperand(0))) { 3599 FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); 3600 Offset = Ptr.getConstantOperandVal(1); 3601 Ptr = Ptr.getOperand(0); 3602 } else { 3603 return SDValue(); 3604 } 3605 3606 SDValue Chain = LD->getChain(); 3607 // Make sure the stack object alignment is at least 16. 3608 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3609 if (DAG.InferPtrAlignment(Ptr) < 16) { 3610 if (MFI->isFixedObjectIndex(FI)) { 3611 // Can't change the alignment. FIXME: It's possible to compute 3612 // the exact stack offset and reference FI + adjust offset instead. 3613 // If someone *really* cares about this. That's the way to implement it. 3614 return SDValue(); 3615 } else { 3616 MFI->setObjectAlignment(FI, 16); 3617 } 3618 } 3619 3620 // (Offset % 16) must be multiple of 4. Then address is then 3621 // Ptr + (Offset & ~15). 3622 if (Offset < 0) 3623 return SDValue(); 3624 if ((Offset % 16) & 3) 3625 return SDValue(); 3626 int64_t StartOffset = Offset & ~15; 3627 if (StartOffset) 3628 Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(), 3629 Ptr,DAG.getConstant(StartOffset, Ptr.getValueType())); 3630 3631 int EltNo = (Offset - StartOffset) >> 2; 3632 int Mask[4] = { EltNo, EltNo, EltNo, EltNo }; 3633 EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32; 3634 SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0, 3635 false, false, 0); 3636 // Canonicalize it to a v4i32 shuffle. 3637 V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1); 3638 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 3639 DAG.getVectorShuffle(MVT::v4i32, dl, V1, 3640 DAG.getUNDEF(MVT::v4i32), &Mask[0])); 3641 } 3642 3643 return SDValue(); 3644} 3645 3646/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a 3647/// vector of type 'VT', see if the elements can be replaced by a single large 3648/// load which has the same value as a build_vector whose operands are 'elts'. 3649/// 3650/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a 3651/// 3652/// FIXME: we'd also like to handle the case where the last elements are zero 3653/// rather than undef via VZEXT_LOAD, but we do not detect that case today. 3654/// There's even a handy isZeroNode for that purpose. 3655static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, 3656 DebugLoc &dl, SelectionDAG &DAG) { 3657 EVT EltVT = VT.getVectorElementType(); 3658 unsigned NumElems = Elts.size(); 3659 3660 LoadSDNode *LDBase = NULL; 3661 unsigned LastLoadedElt = -1U; 3662 3663 // For each element in the initializer, see if we've found a load or an undef. 3664 // If we don't find an initial load element, or later load elements are 3665 // non-consecutive, bail out. 3666 for (unsigned i = 0; i < NumElems; ++i) { 3667 SDValue Elt = Elts[i]; 3668 3669 if (!Elt.getNode() || 3670 (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode()))) 3671 return SDValue(); 3672 if (!LDBase) { 3673 if (Elt.getNode()->getOpcode() == ISD::UNDEF) 3674 return SDValue(); 3675 LDBase = cast<LoadSDNode>(Elt.getNode()); 3676 LastLoadedElt = i; 3677 continue; 3678 } 3679 if (Elt.getOpcode() == ISD::UNDEF) 3680 continue; 3681 3682 LoadSDNode *LD = cast<LoadSDNode>(Elt); 3683 if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i)) 3684 return SDValue(); 3685 LastLoadedElt = i; 3686 } 3687 3688 // If we have found an entire vector of loads and undefs, then return a large 3689 // load of the entire vector width starting at the base pointer. If we found 3690 // consecutive loads for the low half, generate a vzext_load node. 3691 if (LastLoadedElt == NumElems - 1) { 3692 if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) 3693 return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), 3694 LDBase->getSrcValue(), LDBase->getSrcValueOffset(), 3695 LDBase->isVolatile(), LDBase->isNonTemporal(), 0); 3696 return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(), 3697 LDBase->getSrcValue(), LDBase->getSrcValueOffset(), 3698 LDBase->isVolatile(), LDBase->isNonTemporal(), 3699 LDBase->getAlignment()); 3700 } else if (NumElems == 4 && LastLoadedElt == 1) { 3701 SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); 3702 SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; 3703 SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); 3704 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); 3705 } 3706 return SDValue(); 3707} 3708 3709SDValue 3710X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 3711 DebugLoc dl = Op.getDebugLoc(); 3712 // All zero's are handled with pxor, all one's are handled with pcmpeqd. 3713 if (ISD::isBuildVectorAllZeros(Op.getNode()) 3714 || ISD::isBuildVectorAllOnes(Op.getNode())) { 3715 // Canonicalize this to either <4 x i32> or <2 x i32> (SSE vs MMX) to 3716 // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are 3717 // eliminated on x86-32 hosts. 3718 if (Op.getValueType() == MVT::v4i32 || Op.getValueType() == MVT::v2i32) 3719 return Op; 3720 3721 if (ISD::isBuildVectorAllOnes(Op.getNode())) 3722 return getOnesVector(Op.getValueType(), DAG, dl); 3723 return getZeroVector(Op.getValueType(), Subtarget->hasSSE2(), DAG, dl); 3724 } 3725 3726 EVT VT = Op.getValueType(); 3727 EVT ExtVT = VT.getVectorElementType(); 3728 unsigned EVTBits = ExtVT.getSizeInBits(); 3729 3730 unsigned NumElems = Op.getNumOperands(); 3731 unsigned NumZero = 0; 3732 unsigned NumNonZero = 0; 3733 unsigned NonZeros = 0; 3734 bool IsAllConstants = true; 3735 SmallSet<SDValue, 8> Values; 3736 for (unsigned i = 0; i < NumElems; ++i) { 3737 SDValue Elt = Op.getOperand(i); 3738 if (Elt.getOpcode() == ISD::UNDEF) 3739 continue; 3740 Values.insert(Elt); 3741 if (Elt.getOpcode() != ISD::Constant && 3742 Elt.getOpcode() != ISD::ConstantFP) 3743 IsAllConstants = false; 3744 if (X86::isZeroNode(Elt)) 3745 NumZero++; 3746 else { 3747 NonZeros |= (1 << i); 3748 NumNonZero++; 3749 } 3750 } 3751 3752 if (NumNonZero == 0) { 3753 // All undef vector. Return an UNDEF. All zero vectors were handled above. 3754 return DAG.getUNDEF(VT); 3755 } 3756 3757 // Special case for single non-zero, non-undef, element. 3758 if (NumNonZero == 1) { 3759 unsigned Idx = CountTrailingZeros_32(NonZeros); 3760 SDValue Item = Op.getOperand(Idx); 3761 3762 // If this is an insertion of an i64 value on x86-32, and if the top bits of 3763 // the value are obviously zero, truncate the value to i32 and do the 3764 // insertion that way. Only do this if the value is non-constant or if the 3765 // value is a constant being inserted into element 0. It is cheaper to do 3766 // a constant pool load than it is to do a movd + shuffle. 3767 if (ExtVT == MVT::i64 && !Subtarget->is64Bit() && 3768 (!IsAllConstants || Idx == 0)) { 3769 if (DAG.MaskedValueIsZero(Item, APInt::getBitsSet(64, 32, 64))) { 3770 // Handle MMX and SSE both. 3771 EVT VecVT = VT == MVT::v2i64 ? MVT::v4i32 : MVT::v2i32; 3772 unsigned VecElts = VT == MVT::v2i64 ? 4 : 2; 3773 3774 // Truncate the value (which may itself be a constant) to i32, and 3775 // convert it to a vector with movd (S2V+shuffle to zero extend). 3776 Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item); 3777 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item); 3778 Item = getShuffleVectorZeroOrUndef(Item, 0, true, 3779 Subtarget->hasSSE2(), DAG); 3780 3781 // Now we have our 32-bit value zero extended in the low element of 3782 // a vector. If Idx != 0, swizzle it into place. 3783 if (Idx != 0) { 3784 SmallVector<int, 4> Mask; 3785 Mask.push_back(Idx); 3786 for (unsigned i = 1; i != VecElts; ++i) 3787 Mask.push_back(i); 3788 Item = DAG.getVectorShuffle(VecVT, dl, Item, 3789 DAG.getUNDEF(Item.getValueType()), 3790 &Mask[0]); 3791 } 3792 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Item); 3793 } 3794 } 3795 3796 // If we have a constant or non-constant insertion into the low element of 3797 // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into 3798 // the rest of the elements. This will be matched as movd/movq/movss/movsd 3799 // depending on what the source datatype is. 3800 if (Idx == 0) { 3801 if (NumZero == 0) { 3802 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); 3803 } else if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 || 3804 (ExtVT == MVT::i64 && Subtarget->is64Bit())) { 3805 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); 3806 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. 3807 return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), 3808 DAG); 3809 } else if (ExtVT == MVT::i16 || ExtVT == MVT::i8) { 3810 Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); 3811 EVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; 3812 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); 3813 Item = getShuffleVectorZeroOrUndef(Item, 0, true, 3814 Subtarget->hasSSE2(), DAG); 3815 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item); 3816 } 3817 } 3818 3819 // Is it a vector logical left shift? 3820 if (NumElems == 2 && Idx == 1 && 3821 X86::isZeroNode(Op.getOperand(0)) && 3822 !X86::isZeroNode(Op.getOperand(1))) { 3823 unsigned NumBits = VT.getSizeInBits(); 3824 return getVShift(true, VT, 3825 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, 3826 VT, Op.getOperand(1)), 3827 NumBits/2, DAG, *this, dl); 3828 } 3829 3830 if (IsAllConstants) // Otherwise, it's better to do a constpool load. 3831 return SDValue(); 3832 3833 // Otherwise, if this is a vector with i32 or f32 elements, and the element 3834 // is a non-constant being inserted into an element other than the low one, 3835 // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka 3836 // movd/movss) to move this into the low element, then shuffle it into 3837 // place. 3838 if (EVTBits == 32) { 3839 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); 3840 3841 // Turn it into a shuffle of zero and zero-extended scalar to vector. 3842 Item = getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, 3843 Subtarget->hasSSE2(), DAG); 3844 SmallVector<int, 8> MaskVec; 3845 for (unsigned i = 0; i < NumElems; i++) 3846 MaskVec.push_back(i == Idx ? 0 : 1); 3847 return DAG.getVectorShuffle(VT, dl, Item, DAG.getUNDEF(VT), &MaskVec[0]); 3848 } 3849 } 3850 3851 // Splat is obviously ok. Let legalizer expand it to a shuffle. 3852 if (Values.size() == 1) { 3853 if (EVTBits == 32) { 3854 // Instead of a shuffle like this: 3855 // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> 3856 // Check if it's possible to issue this instead. 3857 // shuffle (vload ptr)), undef, <1, 1, 1, 1> 3858 unsigned Idx = CountTrailingZeros_32(NonZeros); 3859 SDValue Item = Op.getOperand(Idx); 3860 if (Op.getNode()->isOnlyUserOf(Item.getNode())) 3861 return LowerAsSplatVectorLoad(Item, VT, dl, DAG); 3862 } 3863 return SDValue(); 3864 } 3865 3866 // A vector full of immediates; various special cases are already 3867 // handled, so this is best done with a single constant-pool load. 3868 if (IsAllConstants) 3869 return SDValue(); 3870 3871 // Let legalizer expand 2-wide build_vectors. 3872 if (EVTBits == 64) { 3873 if (NumNonZero == 1) { 3874 // One half is zero or undef. 3875 unsigned Idx = CountTrailingZeros_32(NonZeros); 3876 SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, 3877 Op.getOperand(Idx)); 3878 return getShuffleVectorZeroOrUndef(V2, Idx, true, 3879 Subtarget->hasSSE2(), DAG); 3880 } 3881 return SDValue(); 3882 } 3883 3884 // If element VT is < 32 bits, convert it to inserts into a zero vector. 3885 if (EVTBits == 8 && NumElems == 16) { 3886 SDValue V = LowerBuildVectorv16i8(Op, NonZeros,NumNonZero,NumZero, DAG, 3887 *this); 3888 if (V.getNode()) return V; 3889 } 3890 3891 if (EVTBits == 16 && NumElems == 8) { 3892 SDValue V = LowerBuildVectorv8i16(Op, NonZeros,NumNonZero,NumZero, DAG, 3893 *this); 3894 if (V.getNode()) return V; 3895 } 3896 3897 // If element VT is == 32 bits, turn it into a number of shuffles. 3898 SmallVector<SDValue, 8> V; 3899 V.resize(NumElems); 3900 if (NumElems == 4 && NumZero > 0) { 3901 for (unsigned i = 0; i < 4; ++i) { 3902 bool isZero = !(NonZeros & (1 << i)); 3903 if (isZero) 3904 V[i] = getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); 3905 else 3906 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); 3907 } 3908 3909 for (unsigned i = 0; i < 2; ++i) { 3910 switch ((NonZeros & (0x3 << i*2)) >> (i*2)) { 3911 default: break; 3912 case 0: 3913 V[i] = V[i*2]; // Must be a zero vector. 3914 break; 3915 case 1: 3916 V[i] = getMOVL(DAG, dl, VT, V[i*2+1], V[i*2]); 3917 break; 3918 case 2: 3919 V[i] = getMOVL(DAG, dl, VT, V[i*2], V[i*2+1]); 3920 break; 3921 case 3: 3922 V[i] = getUnpackl(DAG, dl, VT, V[i*2], V[i*2+1]); 3923 break; 3924 } 3925 } 3926 3927 SmallVector<int, 8> MaskVec; 3928 bool Reverse = (NonZeros & 0x3) == 2; 3929 for (unsigned i = 0; i < 2; ++i) 3930 MaskVec.push_back(Reverse ? 1-i : i); 3931 Reverse = ((NonZeros & (0x3 << 2)) >> 2) == 2; 3932 for (unsigned i = 0; i < 2; ++i) 3933 MaskVec.push_back(Reverse ? 1-i+NumElems : i+NumElems); 3934 return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]); 3935 } 3936 3937 if (Values.size() > 1 && VT.getSizeInBits() == 128) { 3938 // Check for a build vector of consecutive loads. 3939 for (unsigned i = 0; i < NumElems; ++i) 3940 V[i] = Op.getOperand(i); 3941 3942 // Check for elements which are consecutive loads. 3943 SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG); 3944 if (LD.getNode()) 3945 return LD; 3946 3947 // For SSE 4.1, use inserts into undef. 3948 if (getSubtarget()->hasSSE41()) { 3949 V[0] = DAG.getUNDEF(VT); 3950 for (unsigned i = 0; i < NumElems; ++i) 3951 if (Op.getOperand(i).getOpcode() != ISD::UNDEF) 3952 V[0] = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V[0], 3953 Op.getOperand(i), DAG.getIntPtrConstant(i)); 3954 return V[0]; 3955 } 3956 3957 // Otherwise, expand into a number of unpckl* 3958 // e.g. for v4f32 3959 // Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0> 3960 // : unpcklps 1, 3 ==> Y: <?, ?, 3, 1> 3961 // Step 2: unpcklps X, Y ==> <3, 2, 1, 0> 3962 for (unsigned i = 0; i < NumElems; ++i) 3963 V[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); 3964 NumElems >>= 1; 3965 while (NumElems != 0) { 3966 for (unsigned i = 0; i < NumElems; ++i) 3967 V[i] = getUnpackl(DAG, dl, VT, V[i], V[i + NumElems]); 3968 NumElems >>= 1; 3969 } 3970 return V[0]; 3971 } 3972 return SDValue(); 3973} 3974 3975SDValue 3976X86TargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 3977 // We support concatenate two MMX registers and place them in a MMX 3978 // register. This is better than doing a stack convert. 3979 DebugLoc dl = Op.getDebugLoc(); 3980 EVT ResVT = Op.getValueType(); 3981 assert(Op.getNumOperands() == 2); 3982 assert(ResVT == MVT::v2i64 || ResVT == MVT::v4i32 || 3983 ResVT == MVT::v8i16 || ResVT == MVT::v16i8); 3984 int Mask[2]; 3985 SDValue InVec = DAG.getNode(ISD::BIT_CONVERT,dl, MVT::v1i64, Op.getOperand(0)); 3986 SDValue VecOp = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); 3987 InVec = Op.getOperand(1); 3988 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 3989 unsigned NumElts = ResVT.getVectorNumElements(); 3990 VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); 3991 VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ResVT, VecOp, 3992 InVec.getOperand(0), DAG.getIntPtrConstant(NumElts/2+1)); 3993 } else { 3994 InVec = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v1i64, InVec); 3995 SDValue VecOp2 = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, InVec); 3996 Mask[0] = 0; Mask[1] = 2; 3997 VecOp = DAG.getVectorShuffle(MVT::v2i64, dl, VecOp, VecOp2, Mask); 3998 } 3999 return DAG.getNode(ISD::BIT_CONVERT, dl, ResVT, VecOp); 4000} 4001 4002// v8i16 shuffles - Prefer shuffles in the following order: 4003// 1. [all] pshuflw, pshufhw, optional move 4004// 2. [ssse3] 1 x pshufb 4005// 3. [ssse3] 2 x pshufb + 1 x por 4006// 4. [all] mov + pshuflw + pshufhw + N x (pextrw + pinsrw) 4007static 4008SDValue LowerVECTOR_SHUFFLEv8i16(ShuffleVectorSDNode *SVOp, 4009 SelectionDAG &DAG, X86TargetLowering &TLI) { 4010 SDValue V1 = SVOp->getOperand(0); 4011 SDValue V2 = SVOp->getOperand(1); 4012 DebugLoc dl = SVOp->getDebugLoc(); 4013 SmallVector<int, 8> MaskVals; 4014 4015 // Determine if more than 1 of the words in each of the low and high quadwords 4016 // of the result come from the same quadword of one of the two inputs. Undef 4017 // mask values count as coming from any quadword, for better codegen. 4018 SmallVector<unsigned, 4> LoQuad(4); 4019 SmallVector<unsigned, 4> HiQuad(4); 4020 BitVector InputQuads(4); 4021 for (unsigned i = 0; i < 8; ++i) { 4022 SmallVectorImpl<unsigned> &Quad = i < 4 ? LoQuad : HiQuad; 4023 int EltIdx = SVOp->getMaskElt(i); 4024 MaskVals.push_back(EltIdx); 4025 if (EltIdx < 0) { 4026 ++Quad[0]; 4027 ++Quad[1]; 4028 ++Quad[2]; 4029 ++Quad[3]; 4030 continue; 4031 } 4032 ++Quad[EltIdx / 4]; 4033 InputQuads.set(EltIdx / 4); 4034 } 4035 4036 int BestLoQuad = -1; 4037 unsigned MaxQuad = 1; 4038 for (unsigned i = 0; i < 4; ++i) { 4039 if (LoQuad[i] > MaxQuad) { 4040 BestLoQuad = i; 4041 MaxQuad = LoQuad[i]; 4042 } 4043 } 4044 4045 int BestHiQuad = -1; 4046 MaxQuad = 1; 4047 for (unsigned i = 0; i < 4; ++i) { 4048 if (HiQuad[i] > MaxQuad) { 4049 BestHiQuad = i; 4050 MaxQuad = HiQuad[i]; 4051 } 4052 } 4053 4054 // For SSSE3, If all 8 words of the result come from only 1 quadword of each 4055 // of the two input vectors, shuffle them into one input vector so only a 4056 // single pshufb instruction is necessary. If There are more than 2 input 4057 // quads, disable the next transformation since it does not help SSSE3. 4058 bool V1Used = InputQuads[0] || InputQuads[1]; 4059 bool V2Used = InputQuads[2] || InputQuads[3]; 4060 if (TLI.getSubtarget()->hasSSSE3()) { 4061 if (InputQuads.count() == 2 && V1Used && V2Used) { 4062 BestLoQuad = InputQuads.find_first(); 4063 BestHiQuad = InputQuads.find_next(BestLoQuad); 4064 } 4065 if (InputQuads.count() > 2) { 4066 BestLoQuad = -1; 4067 BestHiQuad = -1; 4068 } 4069 } 4070 4071 // If BestLoQuad or BestHiQuad are set, shuffle the quads together and update 4072 // the shuffle mask. If a quad is scored as -1, that means that it contains 4073 // words from all 4 input quadwords. 4074 SDValue NewV; 4075 if (BestLoQuad >= 0 || BestHiQuad >= 0) { 4076 SmallVector<int, 8> MaskV; 4077 MaskV.push_back(BestLoQuad < 0 ? 0 : BestLoQuad); 4078 MaskV.push_back(BestHiQuad < 0 ? 1 : BestHiQuad); 4079 NewV = DAG.getVectorShuffle(MVT::v2i64, dl, 4080 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V1), 4081 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, V2), &MaskV[0]); 4082 NewV = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, NewV); 4083 4084 // Rewrite the MaskVals and assign NewV to V1 if NewV now contains all the 4085 // source words for the shuffle, to aid later transformations. 4086 bool AllWordsInNewV = true; 4087 bool InOrder[2] = { true, true }; 4088 for (unsigned i = 0; i != 8; ++i) { 4089 int idx = MaskVals[i]; 4090 if (idx != (int)i) 4091 InOrder[i/4] = false; 4092 if (idx < 0 || (idx/4) == BestLoQuad || (idx/4) == BestHiQuad) 4093 continue; 4094 AllWordsInNewV = false; 4095 break; 4096 } 4097 4098 bool pshuflw = AllWordsInNewV, pshufhw = AllWordsInNewV; 4099 if (AllWordsInNewV) { 4100 for (int i = 0; i != 8; ++i) { 4101 int idx = MaskVals[i]; 4102 if (idx < 0) 4103 continue; 4104 idx = MaskVals[i] = (idx / 4) == BestLoQuad ? (idx & 3) : (idx & 3) + 4; 4105 if ((idx != i) && idx < 4) 4106 pshufhw = false; 4107 if ((idx != i) && idx > 3) 4108 pshuflw = false; 4109 } 4110 V1 = NewV; 4111 V2Used = false; 4112 BestLoQuad = 0; 4113 BestHiQuad = 1; 4114 } 4115 4116 // If we've eliminated the use of V2, and the new mask is a pshuflw or 4117 // pshufhw, that's as cheap as it gets. Return the new shuffle. 4118 if ((pshufhw && InOrder[0]) || (pshuflw && InOrder[1])) { 4119 return DAG.getVectorShuffle(MVT::v8i16, dl, NewV, 4120 DAG.getUNDEF(MVT::v8i16), &MaskVals[0]); 4121 } 4122 } 4123 4124 // If we have SSSE3, and all words of the result are from 1 input vector, 4125 // case 2 is generated, otherwise case 3 is generated. If no SSSE3 4126 // is present, fall back to case 4. 4127 if (TLI.getSubtarget()->hasSSSE3()) { 4128 SmallVector<SDValue,16> pshufbMask; 4129 4130 // If we have elements from both input vectors, set the high bit of the 4131 // shuffle mask element to zero out elements that come from V2 in the V1 4132 // mask, and elements that come from V1 in the V2 mask, so that the two 4133 // results can be OR'd together. 4134 bool TwoInputs = V1Used && V2Used; 4135 for (unsigned i = 0; i != 8; ++i) { 4136 int EltIdx = MaskVals[i] * 2; 4137 if (TwoInputs && (EltIdx >= 16)) { 4138 pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); 4139 pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); 4140 continue; 4141 } 4142 pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); 4143 pshufbMask.push_back(DAG.getConstant(EltIdx+1, MVT::i8)); 4144 } 4145 V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V1); 4146 V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, 4147 DAG.getNode(ISD::BUILD_VECTOR, dl, 4148 MVT::v16i8, &pshufbMask[0], 16)); 4149 if (!TwoInputs) 4150 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); 4151 4152 // Calculate the shuffle mask for the second input, shuffle it, and 4153 // OR it with the first shuffled input. 4154 pshufbMask.clear(); 4155 for (unsigned i = 0; i != 8; ++i) { 4156 int EltIdx = MaskVals[i] * 2; 4157 if (EltIdx < 16) { 4158 pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); 4159 pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); 4160 continue; 4161 } 4162 pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); 4163 pshufbMask.push_back(DAG.getConstant(EltIdx - 15, MVT::i8)); 4164 } 4165 V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, V2); 4166 V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, 4167 DAG.getNode(ISD::BUILD_VECTOR, dl, 4168 MVT::v16i8, &pshufbMask[0], 16)); 4169 V1 = DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); 4170 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); 4171 } 4172 4173 // If BestLoQuad >= 0, generate a pshuflw to put the low elements in order, 4174 // and update MaskVals with new element order. 4175 BitVector InOrder(8); 4176 if (BestLoQuad >= 0) { 4177 SmallVector<int, 8> MaskV; 4178 for (int i = 0; i != 4; ++i) { 4179 int idx = MaskVals[i]; 4180 if (idx < 0) { 4181 MaskV.push_back(-1); 4182 InOrder.set(i); 4183 } else if ((idx / 4) == BestLoQuad) { 4184 MaskV.push_back(idx & 3); 4185 InOrder.set(i); 4186 } else { 4187 MaskV.push_back(-1); 4188 } 4189 } 4190 for (unsigned i = 4; i != 8; ++i) 4191 MaskV.push_back(i); 4192 NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), 4193 &MaskV[0]); 4194 } 4195 4196 // If BestHi >= 0, generate a pshufhw to put the high elements in order, 4197 // and update MaskVals with the new element order. 4198 if (BestHiQuad >= 0) { 4199 SmallVector<int, 8> MaskV; 4200 for (unsigned i = 0; i != 4; ++i) 4201 MaskV.push_back(i); 4202 for (unsigned i = 4; i != 8; ++i) { 4203 int idx = MaskVals[i]; 4204 if (idx < 0) { 4205 MaskV.push_back(-1); 4206 InOrder.set(i); 4207 } else if ((idx / 4) == BestHiQuad) { 4208 MaskV.push_back((idx & 3) + 4); 4209 InOrder.set(i); 4210 } else { 4211 MaskV.push_back(-1); 4212 } 4213 } 4214 NewV = DAG.getVectorShuffle(MVT::v8i16, dl, NewV, DAG.getUNDEF(MVT::v8i16), 4215 &MaskV[0]); 4216 } 4217 4218 // In case BestHi & BestLo were both -1, which means each quadword has a word 4219 // from each of the four input quadwords, calculate the InOrder bitvector now 4220 // before falling through to the insert/extract cleanup. 4221 if (BestLoQuad == -1 && BestHiQuad == -1) { 4222 NewV = V1; 4223 for (int i = 0; i != 8; ++i) 4224 if (MaskVals[i] < 0 || MaskVals[i] == i) 4225 InOrder.set(i); 4226 } 4227 4228 // The other elements are put in the right place using pextrw and pinsrw. 4229 for (unsigned i = 0; i != 8; ++i) { 4230 if (InOrder[i]) 4231 continue; 4232 int EltIdx = MaskVals[i]; 4233 if (EltIdx < 0) 4234 continue; 4235 SDValue ExtOp = (EltIdx < 8) 4236 ? DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V1, 4237 DAG.getIntPtrConstant(EltIdx)) 4238 : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, V2, 4239 DAG.getIntPtrConstant(EltIdx - 8)); 4240 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, ExtOp, 4241 DAG.getIntPtrConstant(i)); 4242 } 4243 return NewV; 4244} 4245 4246// v16i8 shuffles - Prefer shuffles in the following order: 4247// 1. [ssse3] 1 x pshufb 4248// 2. [ssse3] 2 x pshufb + 1 x por 4249// 3. [all] v8i16 shuffle + N x pextrw + rotate + pinsrw 4250static 4251SDValue LowerVECTOR_SHUFFLEv16i8(ShuffleVectorSDNode *SVOp, 4252 SelectionDAG &DAG, X86TargetLowering &TLI) { 4253 SDValue V1 = SVOp->getOperand(0); 4254 SDValue V2 = SVOp->getOperand(1); 4255 DebugLoc dl = SVOp->getDebugLoc(); 4256 SmallVector<int, 16> MaskVals; 4257 SVOp->getMask(MaskVals); 4258 4259 // If we have SSSE3, case 1 is generated when all result bytes come from 4260 // one of the inputs. Otherwise, case 2 is generated. If no SSSE3 is 4261 // present, fall back to case 3. 4262 // FIXME: kill V2Only once shuffles are canonizalized by getNode. 4263 bool V1Only = true; 4264 bool V2Only = true; 4265 for (unsigned i = 0; i < 16; ++i) { 4266 int EltIdx = MaskVals[i]; 4267 if (EltIdx < 0) 4268 continue; 4269 if (EltIdx < 16) 4270 V2Only = false; 4271 else 4272 V1Only = false; 4273 } 4274 4275 // If SSSE3, use 1 pshufb instruction per vector with elements in the result. 4276 if (TLI.getSubtarget()->hasSSSE3()) { 4277 SmallVector<SDValue,16> pshufbMask; 4278 4279 // If all result elements are from one input vector, then only translate 4280 // undef mask values to 0x80 (zero out result) in the pshufb mask. 4281 // 4282 // Otherwise, we have elements from both input vectors, and must zero out 4283 // elements that come from V2 in the first mask, and V1 in the second mask 4284 // so that we can OR them together. 4285 bool TwoInputs = !(V1Only || V2Only); 4286 for (unsigned i = 0; i != 16; ++i) { 4287 int EltIdx = MaskVals[i]; 4288 if (EltIdx < 0 || (TwoInputs && EltIdx >= 16)) { 4289 pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); 4290 continue; 4291 } 4292 pshufbMask.push_back(DAG.getConstant(EltIdx, MVT::i8)); 4293 } 4294 // If all the elements are from V2, assign it to V1 and return after 4295 // building the first pshufb. 4296 if (V2Only) 4297 V1 = V2; 4298 V1 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V1, 4299 DAG.getNode(ISD::BUILD_VECTOR, dl, 4300 MVT::v16i8, &pshufbMask[0], 16)); 4301 if (!TwoInputs) 4302 return V1; 4303 4304 // Calculate the shuffle mask for the second input, shuffle it, and 4305 // OR it with the first shuffled input. 4306 pshufbMask.clear(); 4307 for (unsigned i = 0; i != 16; ++i) { 4308 int EltIdx = MaskVals[i]; 4309 if (EltIdx < 16) { 4310 pshufbMask.push_back(DAG.getConstant(0x80, MVT::i8)); 4311 continue; 4312 } 4313 pshufbMask.push_back(DAG.getConstant(EltIdx - 16, MVT::i8)); 4314 } 4315 V2 = DAG.getNode(X86ISD::PSHUFB, dl, MVT::v16i8, V2, 4316 DAG.getNode(ISD::BUILD_VECTOR, dl, 4317 MVT::v16i8, &pshufbMask[0], 16)); 4318 return DAG.getNode(ISD::OR, dl, MVT::v16i8, V1, V2); 4319 } 4320 4321 // No SSSE3 - Calculate in place words and then fix all out of place words 4322 // With 0-16 extracts & inserts. Worst case is 16 bytes out of order from 4323 // the 16 different words that comprise the two doublequadword input vectors. 4324 V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V1); 4325 V2 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v8i16, V2); 4326 SDValue NewV = V2Only ? V2 : V1; 4327 for (int i = 0; i != 8; ++i) { 4328 int Elt0 = MaskVals[i*2]; 4329 int Elt1 = MaskVals[i*2+1]; 4330 4331 // This word of the result is all undef, skip it. 4332 if (Elt0 < 0 && Elt1 < 0) 4333 continue; 4334 4335 // This word of the result is already in the correct place, skip it. 4336 if (V1Only && (Elt0 == i*2) && (Elt1 == i*2+1)) 4337 continue; 4338 if (V2Only && (Elt0 == i*2+16) && (Elt1 == i*2+17)) 4339 continue; 4340 4341 SDValue Elt0Src = Elt0 < 16 ? V1 : V2; 4342 SDValue Elt1Src = Elt1 < 16 ? V1 : V2; 4343 SDValue InsElt; 4344 4345 // If Elt0 and Elt1 are defined, are consecutive, and can be load 4346 // using a single extract together, load it and store it. 4347 if ((Elt0 >= 0) && ((Elt0 + 1) == Elt1) && ((Elt0 & 1) == 0)) { 4348 InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src, 4349 DAG.getIntPtrConstant(Elt1 / 2)); 4350 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt, 4351 DAG.getIntPtrConstant(i)); 4352 continue; 4353 } 4354 4355 // If Elt1 is defined, extract it from the appropriate source. If the 4356 // source byte is not also odd, shift the extracted word left 8 bits 4357 // otherwise clear the bottom 8 bits if we need to do an or. 4358 if (Elt1 >= 0) { 4359 InsElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Elt1Src, 4360 DAG.getIntPtrConstant(Elt1 / 2)); 4361 if ((Elt1 & 1) == 0) 4362 InsElt = DAG.getNode(ISD::SHL, dl, MVT::i16, InsElt, 4363 DAG.getConstant(8, TLI.getShiftAmountTy())); 4364 else if (Elt0 >= 0) 4365 InsElt = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt, 4366 DAG.getConstant(0xFF00, MVT::i16)); 4367 } 4368 // If Elt0 is defined, extract it from the appropriate source. If the 4369 // source byte is not also even, shift the extracted word right 8 bits. If 4370 // Elt1 was also defined, OR the extracted values together before 4371 // inserting them in the result. 4372 if (Elt0 >= 0) { 4373 SDValue InsElt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, 4374 Elt0Src, DAG.getIntPtrConstant(Elt0 / 2)); 4375 if ((Elt0 & 1) != 0) 4376 InsElt0 = DAG.getNode(ISD::SRL, dl, MVT::i16, InsElt0, 4377 DAG.getConstant(8, TLI.getShiftAmountTy())); 4378 else if (Elt1 >= 0) 4379 InsElt0 = DAG.getNode(ISD::AND, dl, MVT::i16, InsElt0, 4380 DAG.getConstant(0x00FF, MVT::i16)); 4381 InsElt = Elt1 >= 0 ? DAG.getNode(ISD::OR, dl, MVT::i16, InsElt, InsElt0) 4382 : InsElt0; 4383 } 4384 NewV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, NewV, InsElt, 4385 DAG.getIntPtrConstant(i)); 4386 } 4387 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v16i8, NewV); 4388} 4389 4390/// RewriteAsNarrowerShuffle - Try rewriting v8i16 and v16i8 shuffles as 4 wide 4391/// ones, or rewriting v4i32 / v2f32 as 2 wide ones if possible. This can be 4392/// done when every pair / quad of shuffle mask elements point to elements in 4393/// the right sequence. e.g. 4394/// vector_shuffle <>, <>, < 3, 4, | 10, 11, | 0, 1, | 14, 15> 4395static 4396SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp, 4397 SelectionDAG &DAG, 4398 TargetLowering &TLI, DebugLoc dl) { 4399 EVT VT = SVOp->getValueType(0); 4400 SDValue V1 = SVOp->getOperand(0); 4401 SDValue V2 = SVOp->getOperand(1); 4402 unsigned NumElems = VT.getVectorNumElements(); 4403 unsigned NewWidth = (NumElems == 4) ? 2 : 4; 4404 EVT MaskVT = MVT::getIntVectorWithNumElements(NewWidth); 4405 EVT MaskEltVT = MaskVT.getVectorElementType(); 4406 EVT NewVT = MaskVT; 4407 switch (VT.getSimpleVT().SimpleTy) { 4408 default: assert(false && "Unexpected!"); 4409 case MVT::v4f32: NewVT = MVT::v2f64; break; 4410 case MVT::v4i32: NewVT = MVT::v2i64; break; 4411 case MVT::v8i16: NewVT = MVT::v4i32; break; 4412 case MVT::v16i8: NewVT = MVT::v4i32; break; 4413 } 4414 4415 if (NewWidth == 2) { 4416 if (VT.isInteger()) 4417 NewVT = MVT::v2i64; 4418 else 4419 NewVT = MVT::v2f64; 4420 } 4421 int Scale = NumElems / NewWidth; 4422 SmallVector<int, 8> MaskVec; 4423 for (unsigned i = 0; i < NumElems; i += Scale) { 4424 int StartIdx = -1; 4425 for (int j = 0; j < Scale; ++j) { 4426 int EltIdx = SVOp->getMaskElt(i+j); 4427 if (EltIdx < 0) 4428 continue; 4429 if (StartIdx == -1) 4430 StartIdx = EltIdx - (EltIdx % Scale); 4431 if (EltIdx != StartIdx + j) 4432 return SDValue(); 4433 } 4434 if (StartIdx == -1) 4435 MaskVec.push_back(-1); 4436 else 4437 MaskVec.push_back(StartIdx / Scale); 4438 } 4439 4440 V1 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V1); 4441 V2 = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, V2); 4442 return DAG.getVectorShuffle(NewVT, dl, V1, V2, &MaskVec[0]); 4443} 4444 4445/// getVZextMovL - Return a zero-extending vector move low node. 4446/// 4447static SDValue getVZextMovL(EVT VT, EVT OpVT, 4448 SDValue SrcOp, SelectionDAG &DAG, 4449 const X86Subtarget *Subtarget, DebugLoc dl) { 4450 if (VT == MVT::v2f64 || VT == MVT::v4f32) { 4451 LoadSDNode *LD = NULL; 4452 if (!isScalarLoadToVector(SrcOp.getNode(), &LD)) 4453 LD = dyn_cast<LoadSDNode>(SrcOp); 4454 if (!LD) { 4455 // movssrr and movsdrr do not clear top bits. Try to use movd, movq 4456 // instead. 4457 MVT ExtVT = (OpVT == MVT::v2f64) ? MVT::i64 : MVT::i32; 4458 if ((ExtVT.SimpleTy != MVT::i64 || Subtarget->is64Bit()) && 4459 SrcOp.getOpcode() == ISD::SCALAR_TO_VECTOR && 4460 SrcOp.getOperand(0).getOpcode() == ISD::BIT_CONVERT && 4461 SrcOp.getOperand(0).getOperand(0).getValueType() == ExtVT) { 4462 // PR2108 4463 OpVT = (OpVT == MVT::v2f64) ? MVT::v2i64 : MVT::v4i32; 4464 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 4465 DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, 4466 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, 4467 OpVT, 4468 SrcOp.getOperand(0) 4469 .getOperand(0)))); 4470 } 4471 } 4472 } 4473 4474 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 4475 DAG.getNode(X86ISD::VZEXT_MOVL, dl, OpVT, 4476 DAG.getNode(ISD::BIT_CONVERT, dl, 4477 OpVT, SrcOp))); 4478} 4479 4480/// LowerVECTOR_SHUFFLE_4wide - Handle all 4 wide cases with a number of 4481/// shuffles. 4482static SDValue 4483LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) { 4484 SDValue V1 = SVOp->getOperand(0); 4485 SDValue V2 = SVOp->getOperand(1); 4486 DebugLoc dl = SVOp->getDebugLoc(); 4487 EVT VT = SVOp->getValueType(0); 4488 4489 SmallVector<std::pair<int, int>, 8> Locs; 4490 Locs.resize(4); 4491 SmallVector<int, 8> Mask1(4U, -1); 4492 SmallVector<int, 8> PermMask; 4493 SVOp->getMask(PermMask); 4494 4495 unsigned NumHi = 0; 4496 unsigned NumLo = 0; 4497 for (unsigned i = 0; i != 4; ++i) { 4498 int Idx = PermMask[i]; 4499 if (Idx < 0) { 4500 Locs[i] = std::make_pair(-1, -1); 4501 } else { 4502 assert(Idx < 8 && "Invalid VECTOR_SHUFFLE index!"); 4503 if (Idx < 4) { 4504 Locs[i] = std::make_pair(0, NumLo); 4505 Mask1[NumLo] = Idx; 4506 NumLo++; 4507 } else { 4508 Locs[i] = std::make_pair(1, NumHi); 4509 if (2+NumHi < 4) 4510 Mask1[2+NumHi] = Idx; 4511 NumHi++; 4512 } 4513 } 4514 } 4515 4516 if (NumLo <= 2 && NumHi <= 2) { 4517 // If no more than two elements come from either vector. This can be 4518 // implemented with two shuffles. First shuffle gather the elements. 4519 // The second shuffle, which takes the first shuffle as both of its 4520 // vector operands, put the elements into the right order. 4521 V1 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); 4522 4523 SmallVector<int, 8> Mask2(4U, -1); 4524 4525 for (unsigned i = 0; i != 4; ++i) { 4526 if (Locs[i].first == -1) 4527 continue; 4528 else { 4529 unsigned Idx = (i < 2) ? 0 : 4; 4530 Idx += Locs[i].first * 2 + Locs[i].second; 4531 Mask2[i] = Idx; 4532 } 4533 } 4534 4535 return DAG.getVectorShuffle(VT, dl, V1, V1, &Mask2[0]); 4536 } else if (NumLo == 3 || NumHi == 3) { 4537 // Otherwise, we must have three elements from one vector, call it X, and 4538 // one element from the other, call it Y. First, use a shufps to build an 4539 // intermediate vector with the one element from Y and the element from X 4540 // that will be in the same half in the final destination (the indexes don't 4541 // matter). Then, use a shufps to build the final vector, taking the half 4542 // containing the element from Y from the intermediate, and the other half 4543 // from X. 4544 if (NumHi == 3) { 4545 // Normalize it so the 3 elements come from V1. 4546 CommuteVectorShuffleMask(PermMask, VT); 4547 std::swap(V1, V2); 4548 } 4549 4550 // Find the element from V2. 4551 unsigned HiIndex; 4552 for (HiIndex = 0; HiIndex < 3; ++HiIndex) { 4553 int Val = PermMask[HiIndex]; 4554 if (Val < 0) 4555 continue; 4556 if (Val >= 4) 4557 break; 4558 } 4559 4560 Mask1[0] = PermMask[HiIndex]; 4561 Mask1[1] = -1; 4562 Mask1[2] = PermMask[HiIndex^1]; 4563 Mask1[3] = -1; 4564 V2 = DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); 4565 4566 if (HiIndex >= 2) { 4567 Mask1[0] = PermMask[0]; 4568 Mask1[1] = PermMask[1]; 4569 Mask1[2] = HiIndex & 1 ? 6 : 4; 4570 Mask1[3] = HiIndex & 1 ? 4 : 6; 4571 return DAG.getVectorShuffle(VT, dl, V1, V2, &Mask1[0]); 4572 } else { 4573 Mask1[0] = HiIndex & 1 ? 2 : 0; 4574 Mask1[1] = HiIndex & 1 ? 0 : 2; 4575 Mask1[2] = PermMask[2]; 4576 Mask1[3] = PermMask[3]; 4577 if (Mask1[2] >= 0) 4578 Mask1[2] += 4; 4579 if (Mask1[3] >= 0) 4580 Mask1[3] += 4; 4581 return DAG.getVectorShuffle(VT, dl, V2, V1, &Mask1[0]); 4582 } 4583 } 4584 4585 // Break it into (shuffle shuffle_hi, shuffle_lo). 4586 Locs.clear(); 4587 SmallVector<int,8> LoMask(4U, -1); 4588 SmallVector<int,8> HiMask(4U, -1); 4589 4590 SmallVector<int,8> *MaskPtr = &LoMask; 4591 unsigned MaskIdx = 0; 4592 unsigned LoIdx = 0; 4593 unsigned HiIdx = 2; 4594 for (unsigned i = 0; i != 4; ++i) { 4595 if (i == 2) { 4596 MaskPtr = &HiMask; 4597 MaskIdx = 1; 4598 LoIdx = 0; 4599 HiIdx = 2; 4600 } 4601 int Idx = PermMask[i]; 4602 if (Idx < 0) { 4603 Locs[i] = std::make_pair(-1, -1); 4604 } else if (Idx < 4) { 4605 Locs[i] = std::make_pair(MaskIdx, LoIdx); 4606 (*MaskPtr)[LoIdx] = Idx; 4607 LoIdx++; 4608 } else { 4609 Locs[i] = std::make_pair(MaskIdx, HiIdx); 4610 (*MaskPtr)[HiIdx] = Idx; 4611 HiIdx++; 4612 } 4613 } 4614 4615 SDValue LoShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &LoMask[0]); 4616 SDValue HiShuffle = DAG.getVectorShuffle(VT, dl, V1, V2, &HiMask[0]); 4617 SmallVector<int, 8> MaskOps; 4618 for (unsigned i = 0; i != 4; ++i) { 4619 if (Locs[i].first == -1) { 4620 MaskOps.push_back(-1); 4621 } else { 4622 unsigned Idx = Locs[i].first * 4 + Locs[i].second; 4623 MaskOps.push_back(Idx); 4624 } 4625 } 4626 return DAG.getVectorShuffle(VT, dl, LoShuffle, HiShuffle, &MaskOps[0]); 4627} 4628 4629SDValue 4630X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 4631 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 4632 SDValue V1 = Op.getOperand(0); 4633 SDValue V2 = Op.getOperand(1); 4634 EVT VT = Op.getValueType(); 4635 DebugLoc dl = Op.getDebugLoc(); 4636 unsigned NumElems = VT.getVectorNumElements(); 4637 bool isMMX = VT.getSizeInBits() == 64; 4638 bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; 4639 bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; 4640 bool V1IsSplat = false; 4641 bool V2IsSplat = false; 4642 4643 if (isZeroShuffle(SVOp)) 4644 return getZeroVector(VT, Subtarget->hasSSE2(), DAG, dl); 4645 4646 // Promote splats to v4f32. 4647 if (SVOp->isSplat()) { 4648 if (isMMX || NumElems < 4) 4649 return Op; 4650 return PromoteSplat(SVOp, DAG, Subtarget->hasSSE2()); 4651 } 4652 4653 // If the shuffle can be profitably rewritten as a narrower shuffle, then 4654 // do it! 4655 if (VT == MVT::v8i16 || VT == MVT::v16i8) { 4656 SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); 4657 if (NewOp.getNode()) 4658 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 4659 LowerVECTOR_SHUFFLE(NewOp, DAG)); 4660 } else if ((VT == MVT::v4i32 || (VT == MVT::v4f32 && Subtarget->hasSSE2()))) { 4661 // FIXME: Figure out a cleaner way to do this. 4662 // Try to make use of movq to zero out the top part. 4663 if (ISD::isBuildVectorAllZeros(V2.getNode())) { 4664 SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); 4665 if (NewOp.getNode()) { 4666 if (isCommutedMOVL(cast<ShuffleVectorSDNode>(NewOp), true, false)) 4667 return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(0), 4668 DAG, Subtarget, dl); 4669 } 4670 } else if (ISD::isBuildVectorAllZeros(V1.getNode())) { 4671 SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG, *this, dl); 4672 if (NewOp.getNode() && X86::isMOVLMask(cast<ShuffleVectorSDNode>(NewOp))) 4673 return getVZextMovL(VT, NewOp.getValueType(), NewOp.getOperand(1), 4674 DAG, Subtarget, dl); 4675 } 4676 } 4677 4678 if (X86::isPSHUFDMask(SVOp)) 4679 return Op; 4680 4681 // Check if this can be converted into a logical shift. 4682 bool isLeft = false; 4683 unsigned ShAmt = 0; 4684 SDValue ShVal; 4685 bool isShift = getSubtarget()->hasSSE2() && 4686 isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt); 4687 if (isShift && ShVal.hasOneUse()) { 4688 // If the shifted value has multiple uses, it may be cheaper to use 4689 // v_set0 + movlhps or movhlps, etc. 4690 EVT EltVT = VT.getVectorElementType(); 4691 ShAmt *= EltVT.getSizeInBits(); 4692 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); 4693 } 4694 4695 if (X86::isMOVLMask(SVOp)) { 4696 if (V1IsUndef) 4697 return V2; 4698 if (ISD::isBuildVectorAllZeros(V1.getNode())) 4699 return getVZextMovL(VT, VT, V2, DAG, Subtarget, dl); 4700 if (!isMMX) 4701 return Op; 4702 } 4703 4704 // FIXME: fold these into legal mask. 4705 if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || 4706 X86::isMOVSLDUPMask(SVOp) || 4707 X86::isMOVHLPSMask(SVOp) || 4708 X86::isMOVLHPSMask(SVOp) || 4709 X86::isMOVLPMask(SVOp))) 4710 return Op; 4711 4712 if (ShouldXformToMOVHLPS(SVOp) || 4713 ShouldXformToMOVLP(V1.getNode(), V2.getNode(), SVOp)) 4714 return CommuteVectorShuffle(SVOp, DAG); 4715 4716 if (isShift) { 4717 // No better options. Use a vshl / vsrl. 4718 EVT EltVT = VT.getVectorElementType(); 4719 ShAmt *= EltVT.getSizeInBits(); 4720 return getVShift(isLeft, VT, ShVal, ShAmt, DAG, *this, dl); 4721 } 4722 4723 bool Commuted = false; 4724 // FIXME: This should also accept a bitcast of a splat? Be careful, not 4725 // 1,1,1,1 -> v8i16 though. 4726 V1IsSplat = isSplatVector(V1.getNode()); 4727 V2IsSplat = isSplatVector(V2.getNode()); 4728 4729 // Canonicalize the splat or undef, if present, to be on the RHS. 4730 if ((V1IsSplat || V1IsUndef) && !(V2IsSplat || V2IsUndef)) { 4731 Op = CommuteVectorShuffle(SVOp, DAG); 4732 SVOp = cast<ShuffleVectorSDNode>(Op); 4733 V1 = SVOp->getOperand(0); 4734 V2 = SVOp->getOperand(1); 4735 std::swap(V1IsSplat, V2IsSplat); 4736 std::swap(V1IsUndef, V2IsUndef); 4737 Commuted = true; 4738 } 4739 4740 if (isCommutedMOVL(SVOp, V2IsSplat, V2IsUndef)) { 4741 // Shuffling low element of v1 into undef, just return v1. 4742 if (V2IsUndef) 4743 return V1; 4744 // If V2 is a splat, the mask may be malformed such as <4,3,3,3>, which 4745 // the instruction selector will not match, so get a canonical MOVL with 4746 // swapped operands to undo the commute. 4747 return getMOVL(DAG, dl, VT, V2, V1); 4748 } 4749 4750 if (X86::isUNPCKL_v_undef_Mask(SVOp) || 4751 X86::isUNPCKH_v_undef_Mask(SVOp) || 4752 X86::isUNPCKLMask(SVOp) || 4753 X86::isUNPCKHMask(SVOp)) 4754 return Op; 4755 4756 if (V2IsSplat) { 4757 // Normalize mask so all entries that point to V2 points to its first 4758 // element then try to match unpck{h|l} again. If match, return a 4759 // new vector_shuffle with the corrected mask. 4760 SDValue NewMask = NormalizeMask(SVOp, DAG); 4761 ShuffleVectorSDNode *NSVOp = cast<ShuffleVectorSDNode>(NewMask); 4762 if (NSVOp != SVOp) { 4763 if (X86::isUNPCKLMask(NSVOp, true)) { 4764 return NewMask; 4765 } else if (X86::isUNPCKHMask(NSVOp, true)) { 4766 return NewMask; 4767 } 4768 } 4769 } 4770 4771 if (Commuted) { 4772 // Commute is back and try unpck* again. 4773 // FIXME: this seems wrong. 4774 SDValue NewOp = CommuteVectorShuffle(SVOp, DAG); 4775 ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp); 4776 if (X86::isUNPCKL_v_undef_Mask(NewSVOp) || 4777 X86::isUNPCKH_v_undef_Mask(NewSVOp) || 4778 X86::isUNPCKLMask(NewSVOp) || 4779 X86::isUNPCKHMask(NewSVOp)) 4780 return NewOp; 4781 } 4782 4783 // FIXME: for mmx, bitcast v2i32 to v4i16 for shuffle. 4784 4785 // Normalize the node to match x86 shuffle ops if needed 4786 if (!isMMX && V2.getOpcode() != ISD::UNDEF && isCommutedSHUFP(SVOp)) 4787 return CommuteVectorShuffle(SVOp, DAG); 4788 4789 // Check for legal shuffle and return? 4790 SmallVector<int, 16> PermMask; 4791 SVOp->getMask(PermMask); 4792 if (isShuffleMaskLegal(PermMask, VT)) 4793 return Op; 4794 4795 // Handle v8i16 specifically since SSE can do byte extraction and insertion. 4796 if (VT == MVT::v8i16) { 4797 SDValue NewOp = LowerVECTOR_SHUFFLEv8i16(SVOp, DAG, *this); 4798 if (NewOp.getNode()) 4799 return NewOp; 4800 } 4801 4802 if (VT == MVT::v16i8) { 4803 SDValue NewOp = LowerVECTOR_SHUFFLEv16i8(SVOp, DAG, *this); 4804 if (NewOp.getNode()) 4805 return NewOp; 4806 } 4807 4808 // Handle all 4 wide cases with a number of shuffles except for MMX. 4809 if (NumElems == 4 && !isMMX) 4810 return LowerVECTOR_SHUFFLE_4wide(SVOp, DAG); 4811 4812 return SDValue(); 4813} 4814 4815SDValue 4816X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, 4817 SelectionDAG &DAG) { 4818 EVT VT = Op.getValueType(); 4819 DebugLoc dl = Op.getDebugLoc(); 4820 if (VT.getSizeInBits() == 8) { 4821 SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, 4822 Op.getOperand(0), Op.getOperand(1)); 4823 SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, 4824 DAG.getValueType(VT)); 4825 return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); 4826 } else if (VT.getSizeInBits() == 16) { 4827 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4828 // If Idx is 0, it's cheaper to do a move instead of a pextrw. 4829 if (Idx == 0) 4830 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, 4831 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, 4832 DAG.getNode(ISD::BIT_CONVERT, dl, 4833 MVT::v4i32, 4834 Op.getOperand(0)), 4835 Op.getOperand(1))); 4836 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, 4837 Op.getOperand(0), Op.getOperand(1)); 4838 SDValue Assert = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract, 4839 DAG.getValueType(VT)); 4840 return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); 4841 } else if (VT == MVT::f32) { 4842 // EXTRACTPS outputs to a GPR32 register which will require a movd to copy 4843 // the result back to FR32 register. It's only worth matching if the 4844 // result has a single use which is a store or a bitcast to i32. And in 4845 // the case of a store, it's not worth it if the index is a constant 0, 4846 // because a MOVSSmr can be used instead, which is smaller and faster. 4847 if (!Op.hasOneUse()) 4848 return SDValue(); 4849 SDNode *User = *Op.getNode()->use_begin(); 4850 if ((User->getOpcode() != ISD::STORE || 4851 (isa<ConstantSDNode>(Op.getOperand(1)) && 4852 cast<ConstantSDNode>(Op.getOperand(1))->isNullValue())) && 4853 (User->getOpcode() != ISD::BIT_CONVERT || 4854 User->getValueType(0) != MVT::i32)) 4855 return SDValue(); 4856 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, 4857 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, 4858 Op.getOperand(0)), 4859 Op.getOperand(1)); 4860 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Extract); 4861 } else if (VT == MVT::i32) { 4862 // ExtractPS works with constant index. 4863 if (isa<ConstantSDNode>(Op.getOperand(1))) 4864 return Op; 4865 } 4866 return SDValue(); 4867} 4868 4869 4870SDValue 4871X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4872 if (!isa<ConstantSDNode>(Op.getOperand(1))) 4873 return SDValue(); 4874 4875 if (Subtarget->hasSSE41()) { 4876 SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG); 4877 if (Res.getNode()) 4878 return Res; 4879 } 4880 4881 EVT VT = Op.getValueType(); 4882 DebugLoc dl = Op.getDebugLoc(); 4883 // TODO: handle v16i8. 4884 if (VT.getSizeInBits() == 16) { 4885 SDValue Vec = Op.getOperand(0); 4886 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4887 if (Idx == 0) 4888 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, 4889 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, 4890 DAG.getNode(ISD::BIT_CONVERT, dl, 4891 MVT::v4i32, Vec), 4892 Op.getOperand(1))); 4893 // Transform it so it match pextrw which produces a 32-bit result. 4894 EVT EltVT = MVT::i32; 4895 SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, 4896 Op.getOperand(0), Op.getOperand(1)); 4897 SDValue Assert = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract, 4898 DAG.getValueType(VT)); 4899 return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert); 4900 } else if (VT.getSizeInBits() == 32) { 4901 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4902 if (Idx == 0) 4903 return Op; 4904 4905 // SHUFPS the element to the lowest double word, then movss. 4906 int Mask[4] = { Idx, -1, -1, -1 }; 4907 EVT VVT = Op.getOperand(0).getValueType(); 4908 SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), 4909 DAG.getUNDEF(VVT), Mask); 4910 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, 4911 DAG.getIntPtrConstant(0)); 4912 } else if (VT.getSizeInBits() == 64) { 4913 // FIXME: .td only matches this for <2 x f64>, not <2 x i64> on 32b 4914 // FIXME: seems like this should be unnecessary if mov{h,l}pd were taught 4915 // to match extract_elt for f64. 4916 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 4917 if (Idx == 0) 4918 return Op; 4919 4920 // UNPCKHPD the element to the lowest double word, then movsd. 4921 // Note if the lower 64 bits of the result of the UNPCKHPD is then stored 4922 // to a f64mem, the whole operation is folded into a single MOVHPDmr. 4923 int Mask[2] = { 1, -1 }; 4924 EVT VVT = Op.getOperand(0).getValueType(); 4925 SDValue Vec = DAG.getVectorShuffle(VVT, dl, Op.getOperand(0), 4926 DAG.getUNDEF(VVT), Mask); 4927 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, 4928 DAG.getIntPtrConstant(0)); 4929 } 4930 4931 return SDValue(); 4932} 4933 4934SDValue 4935X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){ 4936 EVT VT = Op.getValueType(); 4937 EVT EltVT = VT.getVectorElementType(); 4938 DebugLoc dl = Op.getDebugLoc(); 4939 4940 SDValue N0 = Op.getOperand(0); 4941 SDValue N1 = Op.getOperand(1); 4942 SDValue N2 = Op.getOperand(2); 4943 4944 if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) && 4945 isa<ConstantSDNode>(N2)) { 4946 unsigned Opc; 4947 if (VT == MVT::v8i16) 4948 Opc = X86ISD::PINSRW; 4949 else if (VT == MVT::v4i16) 4950 Opc = X86ISD::MMX_PINSRW; 4951 else if (VT == MVT::v16i8) 4952 Opc = X86ISD::PINSRB; 4953 else 4954 Opc = X86ISD::PINSRB; 4955 4956 // Transform it so it match pinsr{b,w} which expects a GR32 as its second 4957 // argument. 4958 if (N1.getValueType() != MVT::i32) 4959 N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); 4960 if (N2.getValueType() != MVT::i32) 4961 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); 4962 return DAG.getNode(Opc, dl, VT, N0, N1, N2); 4963 } else if (EltVT == MVT::f32 && isa<ConstantSDNode>(N2)) { 4964 // Bits [7:6] of the constant are the source select. This will always be 4965 // zero here. The DAG Combiner may combine an extract_elt index into these 4966 // bits. For example (insert (extract, 3), 2) could be matched by putting 4967 // the '3' into bits [7:6] of X86ISD::INSERTPS. 4968 // Bits [5:4] of the constant are the destination select. This is the 4969 // value of the incoming immediate. 4970 // Bits [3:0] of the constant are the zero mask. The DAG Combiner may 4971 // combine either bitwise AND or insert of float 0.0 to set these bits. 4972 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue() << 4); 4973 // Create this as a scalar to vector.. 4974 N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); 4975 return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); 4976 } else if (EltVT == MVT::i32 && isa<ConstantSDNode>(N2)) { 4977 // PINSR* works with constant index. 4978 return Op; 4979 } 4980 return SDValue(); 4981} 4982 4983SDValue 4984X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4985 EVT VT = Op.getValueType(); 4986 EVT EltVT = VT.getVectorElementType(); 4987 4988 if (Subtarget->hasSSE41()) 4989 return LowerINSERT_VECTOR_ELT_SSE4(Op, DAG); 4990 4991 if (EltVT == MVT::i8) 4992 return SDValue(); 4993 4994 DebugLoc dl = Op.getDebugLoc(); 4995 SDValue N0 = Op.getOperand(0); 4996 SDValue N1 = Op.getOperand(1); 4997 SDValue N2 = Op.getOperand(2); 4998 4999 if (EltVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) { 5000 // Transform it so it match pinsrw which expects a 16-bit value in a GR32 5001 // as its second argument. 5002 if (N1.getValueType() != MVT::i32) 5003 N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); 5004 if (N2.getValueType() != MVT::i32) 5005 N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue()); 5006 return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW, 5007 dl, VT, N0, N1, N2); 5008 } 5009 return SDValue(); 5010} 5011 5012SDValue 5013X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 5014 DebugLoc dl = Op.getDebugLoc(); 5015 if (Op.getValueType() == MVT::v2f32) 5016 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f32, 5017 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i32, 5018 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, 5019 Op.getOperand(0)))); 5020 5021 if (Op.getValueType() == MVT::v1i64 && Op.getOperand(0).getValueType() == MVT::i64) 5022 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0)); 5023 5024 SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); 5025 EVT VT = MVT::v2i32; 5026 switch (Op.getValueType().getSimpleVT().SimpleTy) { 5027 default: break; 5028 case MVT::v16i8: 5029 case MVT::v8i16: 5030 VT = MVT::v4i32; 5031 break; 5032 } 5033 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), 5034 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, AnyExt)); 5035} 5036 5037// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 5038// their target countpart wrapped in the X86ISD::Wrapper node. Suppose N is 5039// one of the above mentioned nodes. It has to be wrapped because otherwise 5040// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 5041// be used to form addressing mode. These wrapped nodes will be selected 5042// into MOV32ri. 5043SDValue 5044X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 5045 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 5046 5047 // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the 5048 // global base reg. 5049 unsigned char OpFlag = 0; 5050 unsigned WrapperKind = X86ISD::Wrapper; 5051 CodeModel::Model M = getTargetMachine().getCodeModel(); 5052 5053 if (Subtarget->isPICStyleRIPRel() && 5054 (M == CodeModel::Small || M == CodeModel::Kernel)) 5055 WrapperKind = X86ISD::WrapperRIP; 5056 else if (Subtarget->isPICStyleGOT()) 5057 OpFlag = X86II::MO_GOTOFF; 5058 else if (Subtarget->isPICStyleStubPIC()) 5059 OpFlag = X86II::MO_PIC_BASE_OFFSET; 5060 5061 SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(), 5062 CP->getAlignment(), 5063 CP->getOffset(), OpFlag); 5064 DebugLoc DL = CP->getDebugLoc(); 5065 Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); 5066 // With PIC, the address is actually $g + Offset. 5067 if (OpFlag) { 5068 Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), 5069 DAG.getNode(X86ISD::GlobalBaseReg, 5070 DebugLoc(), getPointerTy()), 5071 Result); 5072 } 5073 5074 return Result; 5075} 5076 5077SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { 5078 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 5079 5080 // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the 5081 // global base reg. 5082 unsigned char OpFlag = 0; 5083 unsigned WrapperKind = X86ISD::Wrapper; 5084 CodeModel::Model M = getTargetMachine().getCodeModel(); 5085 5086 if (Subtarget->isPICStyleRIPRel() && 5087 (M == CodeModel::Small || M == CodeModel::Kernel)) 5088 WrapperKind = X86ISD::WrapperRIP; 5089 else if (Subtarget->isPICStyleGOT()) 5090 OpFlag = X86II::MO_GOTOFF; 5091 else if (Subtarget->isPICStyleStubPIC()) 5092 OpFlag = X86II::MO_PIC_BASE_OFFSET; 5093 5094 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(), 5095 OpFlag); 5096 DebugLoc DL = JT->getDebugLoc(); 5097 Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); 5098 5099 // With PIC, the address is actually $g + Offset. 5100 if (OpFlag) { 5101 Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), 5102 DAG.getNode(X86ISD::GlobalBaseReg, 5103 DebugLoc(), getPointerTy()), 5104 Result); 5105 } 5106 5107 return Result; 5108} 5109 5110SDValue 5111X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) { 5112 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 5113 5114 // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the 5115 // global base reg. 5116 unsigned char OpFlag = 0; 5117 unsigned WrapperKind = X86ISD::Wrapper; 5118 CodeModel::Model M = getTargetMachine().getCodeModel(); 5119 5120 if (Subtarget->isPICStyleRIPRel() && 5121 (M == CodeModel::Small || M == CodeModel::Kernel)) 5122 WrapperKind = X86ISD::WrapperRIP; 5123 else if (Subtarget->isPICStyleGOT()) 5124 OpFlag = X86II::MO_GOTOFF; 5125 else if (Subtarget->isPICStyleStubPIC()) 5126 OpFlag = X86II::MO_PIC_BASE_OFFSET; 5127 5128 SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag); 5129 5130 DebugLoc DL = Op.getDebugLoc(); 5131 Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); 5132 5133 5134 // With PIC, the address is actually $g + Offset. 5135 if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && 5136 !Subtarget->is64Bit()) { 5137 Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), 5138 DAG.getNode(X86ISD::GlobalBaseReg, 5139 DebugLoc(), getPointerTy()), 5140 Result); 5141 } 5142 5143 return Result; 5144} 5145 5146SDValue 5147X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { 5148 // Create the TargetBlockAddressAddress node. 5149 unsigned char OpFlags = 5150 Subtarget->ClassifyBlockAddressReference(); 5151 CodeModel::Model M = getTargetMachine().getCodeModel(); 5152 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 5153 DebugLoc dl = Op.getDebugLoc(); 5154 SDValue Result = DAG.getBlockAddress(BA, getPointerTy(), 5155 /*isTarget=*/true, OpFlags); 5156 5157 if (Subtarget->isPICStyleRIPRel() && 5158 (M == CodeModel::Small || M == CodeModel::Kernel)) 5159 Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); 5160 else 5161 Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); 5162 5163 // With PIC, the address is actually $g + Offset. 5164 if (isGlobalRelativeToPICBase(OpFlags)) { 5165 Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), 5166 DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), 5167 Result); 5168 } 5169 5170 return Result; 5171} 5172 5173SDValue 5174X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, 5175 int64_t Offset, 5176 SelectionDAG &DAG) const { 5177 // Create the TargetGlobalAddress node, folding in the constant 5178 // offset if it is legal. 5179 unsigned char OpFlags = 5180 Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); 5181 CodeModel::Model M = getTargetMachine().getCodeModel(); 5182 SDValue Result; 5183 if (OpFlags == X86II::MO_NO_FLAG && 5184 X86::isOffsetSuitableForCodeModel(Offset, M)) { 5185 // A direct static reference to a global. 5186 Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset); 5187 Offset = 0; 5188 } else { 5189 Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags); 5190 } 5191 5192 if (Subtarget->isPICStyleRIPRel() && 5193 (M == CodeModel::Small || M == CodeModel::Kernel)) 5194 Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result); 5195 else 5196 Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result); 5197 5198 // With PIC, the address is actually $g + Offset. 5199 if (isGlobalRelativeToPICBase(OpFlags)) { 5200 Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), 5201 DAG.getNode(X86ISD::GlobalBaseReg, dl, getPointerTy()), 5202 Result); 5203 } 5204 5205 // For globals that require a load from a stub to get the address, emit the 5206 // load. 5207 if (isGlobalStubReference(OpFlags)) 5208 Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, 5209 PseudoSourceValue::getGOT(), 0, false, false, 0); 5210 5211 // If there was a non-zero offset that we didn't fold, create an explicit 5212 // addition for it. 5213 if (Offset != 0) 5214 Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result, 5215 DAG.getConstant(Offset, getPointerTy())); 5216 5217 return Result; 5218} 5219 5220SDValue 5221X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) { 5222 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 5223 int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset(); 5224 return LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); 5225} 5226 5227static SDValue 5228GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, 5229 SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, 5230 unsigned char OperandFlags) { 5231 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 5232 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 5233 DebugLoc dl = GA->getDebugLoc(); 5234 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), 5235 GA->getValueType(0), 5236 GA->getOffset(), 5237 OperandFlags); 5238 if (InFlag) { 5239 SDValue Ops[] = { Chain, TGA, *InFlag }; 5240 Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3); 5241 } else { 5242 SDValue Ops[] = { Chain, TGA }; 5243 Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2); 5244 } 5245 5246 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls. 5247 MFI->setHasCalls(true); 5248 5249 SDValue Flag = Chain.getValue(1); 5250 return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); 5251} 5252 5253// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 32 bit 5254static SDValue 5255LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, 5256 const EVT PtrVT) { 5257 SDValue InFlag; 5258 DebugLoc dl = GA->getDebugLoc(); // ? function entry point might be better 5259 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, 5260 DAG.getNode(X86ISD::GlobalBaseReg, 5261 DebugLoc(), PtrVT), InFlag); 5262 InFlag = Chain.getValue(1); 5263 5264 return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); 5265} 5266 5267// Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit 5268static SDValue 5269LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, 5270 const EVT PtrVT) { 5271 return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, 5272 X86::RAX, X86II::MO_TLSGD); 5273} 5274 5275// Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or 5276// "local exec" model. 5277static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, 5278 const EVT PtrVT, TLSModel::Model model, 5279 bool is64Bit) { 5280 DebugLoc dl = GA->getDebugLoc(); 5281 // Get the Thread Pointer 5282 SDValue Base = DAG.getNode(X86ISD::SegmentBaseAddress, 5283 DebugLoc(), PtrVT, 5284 DAG.getRegister(is64Bit? X86::FS : X86::GS, 5285 MVT::i32)); 5286 5287 SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base, 5288 NULL, 0, false, false, 0); 5289 5290 unsigned char OperandFlags = 0; 5291 // Most TLS accesses are not RIP relative, even on x86-64. One exception is 5292 // initialexec. 5293 unsigned WrapperKind = X86ISD::Wrapper; 5294 if (model == TLSModel::LocalExec) { 5295 OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; 5296 } else if (is64Bit) { 5297 assert(model == TLSModel::InitialExec); 5298 OperandFlags = X86II::MO_GOTTPOFF; 5299 WrapperKind = X86ISD::WrapperRIP; 5300 } else { 5301 assert(model == TLSModel::InitialExec); 5302 OperandFlags = X86II::MO_INDNTPOFF; 5303 } 5304 5305 // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial 5306 // exec) 5307 SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0), 5308 GA->getOffset(), OperandFlags); 5309 SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); 5310 5311 if (model == TLSModel::InitialExec) 5312 Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, 5313 PseudoSourceValue::getGOT(), 0, false, false, 0); 5314 5315 // The address of the thread local variable is the add of the thread 5316 // pointer with the offset of the variable. 5317 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 5318} 5319 5320SDValue 5321X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) { 5322 // TODO: implement the "local dynamic" model 5323 // TODO: implement the "initial exec"model for pic executables 5324 assert(Subtarget->isTargetELF() && 5325 "TLS not implemented for non-ELF targets"); 5326 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 5327 const GlobalValue *GV = GA->getGlobal(); 5328 5329 // If GV is an alias then use the aliasee for determining 5330 // thread-localness. 5331 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) 5332 GV = GA->resolveAliasedGlobal(false); 5333 5334 TLSModel::Model model = getTLSModel(GV, 5335 getTargetMachine().getRelocationModel()); 5336 5337 switch (model) { 5338 case TLSModel::GeneralDynamic: 5339 case TLSModel::LocalDynamic: // not implemented 5340 if (Subtarget->is64Bit()) 5341 return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy()); 5342 return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy()); 5343 5344 case TLSModel::InitialExec: 5345 case TLSModel::LocalExec: 5346 return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, 5347 Subtarget->is64Bit()); 5348 } 5349 5350 llvm_unreachable("Unreachable"); 5351 return SDValue(); 5352} 5353 5354 5355/// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and 5356/// take a 2 x i32 value to shift plus a shift amount. 5357SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { 5358 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 5359 EVT VT = Op.getValueType(); 5360 unsigned VTBits = VT.getSizeInBits(); 5361 DebugLoc dl = Op.getDebugLoc(); 5362 bool isSRA = Op.getOpcode() == ISD::SRA_PARTS; 5363 SDValue ShOpLo = Op.getOperand(0); 5364 SDValue ShOpHi = Op.getOperand(1); 5365 SDValue ShAmt = Op.getOperand(2); 5366 SDValue Tmp1 = isSRA ? DAG.getNode(ISD::SRA, dl, VT, ShOpHi, 5367 DAG.getConstant(VTBits - 1, MVT::i8)) 5368 : DAG.getConstant(0, VT); 5369 5370 SDValue Tmp2, Tmp3; 5371 if (Op.getOpcode() == ISD::SHL_PARTS) { 5372 Tmp2 = DAG.getNode(X86ISD::SHLD, dl, VT, ShOpHi, ShOpLo, ShAmt); 5373 Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 5374 } else { 5375 Tmp2 = DAG.getNode(X86ISD::SHRD, dl, VT, ShOpLo, ShOpHi, ShAmt); 5376 Tmp3 = DAG.getNode(isSRA ? ISD::SRA : ISD::SRL, dl, VT, ShOpHi, ShAmt); 5377 } 5378 5379 SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt, 5380 DAG.getConstant(VTBits, MVT::i8)); 5381 SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32, 5382 AndNode, DAG.getConstant(0, MVT::i8)); 5383 5384 SDValue Hi, Lo; 5385 SDValue CC = DAG.getConstant(X86::COND_NE, MVT::i8); 5386 SDValue Ops0[4] = { Tmp2, Tmp3, CC, Cond }; 5387 SDValue Ops1[4] = { Tmp3, Tmp1, CC, Cond }; 5388 5389 if (Op.getOpcode() == ISD::SHL_PARTS) { 5390 Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4); 5391 Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4); 5392 } else { 5393 Lo = DAG.getNode(X86ISD::CMOV, dl, VT, Ops0, 4); 5394 Hi = DAG.getNode(X86ISD::CMOV, dl, VT, Ops1, 4); 5395 } 5396 5397 SDValue Ops[2] = { Lo, Hi }; 5398 return DAG.getMergeValues(Ops, 2, dl); 5399} 5400 5401SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 5402 EVT SrcVT = Op.getOperand(0).getValueType(); 5403 5404 if (SrcVT.isVector()) { 5405 if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) { 5406 return Op; 5407 } 5408 return SDValue(); 5409 } 5410 5411 assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && 5412 "Unknown SINT_TO_FP to lower!"); 5413 5414 // These are really Legal; return the operand so the caller accepts it as 5415 // Legal. 5416 if (SrcVT == MVT::i32 && isScalarFPTypeInSSEReg(Op.getValueType())) 5417 return Op; 5418 if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) && 5419 Subtarget->is64Bit()) { 5420 return Op; 5421 } 5422 5423 DebugLoc dl = Op.getDebugLoc(); 5424 unsigned Size = SrcVT.getSizeInBits()/8; 5425 MachineFunction &MF = DAG.getMachineFunction(); 5426 int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); 5427 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 5428 SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), 5429 StackSlot, 5430 PseudoSourceValue::getFixedStack(SSFI), 0, 5431 false, false, 0); 5432 return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG); 5433} 5434 5435SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, 5436 SDValue StackSlot, 5437 SelectionDAG &DAG) { 5438 // Build the FILD 5439 DebugLoc dl = Op.getDebugLoc(); 5440 SDVTList Tys; 5441 bool useSSE = isScalarFPTypeInSSEReg(Op.getValueType()); 5442 if (useSSE) 5443 Tys = DAG.getVTList(MVT::f64, MVT::Other, MVT::Flag); 5444 else 5445 Tys = DAG.getVTList(Op.getValueType(), MVT::Other); 5446 SDValue Ops[] = { Chain, StackSlot, DAG.getValueType(SrcVT) }; 5447 SDValue Result = DAG.getNode(useSSE ? X86ISD::FILD_FLAG : X86ISD::FILD, dl, 5448 Tys, Ops, array_lengthof(Ops)); 5449 5450 if (useSSE) { 5451 Chain = Result.getValue(1); 5452 SDValue InFlag = Result.getValue(2); 5453 5454 // FIXME: Currently the FST is flagged to the FILD_FLAG. This 5455 // shouldn't be necessary except that RFP cannot be live across 5456 // multiple blocks. When stackifier is fixed, they can be uncoupled. 5457 MachineFunction &MF = DAG.getMachineFunction(); 5458 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 5459 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 5460 Tys = DAG.getVTList(MVT::Other); 5461 SDValue Ops[] = { 5462 Chain, Result, StackSlot, DAG.getValueType(Op.getValueType()), InFlag 5463 }; 5464 Chain = DAG.getNode(X86ISD::FST, dl, Tys, Ops, array_lengthof(Ops)); 5465 Result = DAG.getLoad(Op.getValueType(), dl, Chain, StackSlot, 5466 PseudoSourceValue::getFixedStack(SSFI), 0, 5467 false, false, 0); 5468 } 5469 5470 return Result; 5471} 5472 5473// LowerUINT_TO_FP_i64 - 64-bit unsigned integer to double expansion. 5474SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) { 5475 // This algorithm is not obvious. Here it is in C code, more or less: 5476 /* 5477 double uint64_to_double( uint32_t hi, uint32_t lo ) { 5478 static const __m128i exp = { 0x4330000045300000ULL, 0 }; 5479 static const __m128d bias = { 0x1.0p84, 0x1.0p52 }; 5480 5481 // Copy ints to xmm registers. 5482 __m128i xh = _mm_cvtsi32_si128( hi ); 5483 __m128i xl = _mm_cvtsi32_si128( lo ); 5484 5485 // Combine into low half of a single xmm register. 5486 __m128i x = _mm_unpacklo_epi32( xh, xl ); 5487 __m128d d; 5488 double sd; 5489 5490 // Merge in appropriate exponents to give the integer bits the right 5491 // magnitude. 5492 x = _mm_unpacklo_epi32( x, exp ); 5493 5494 // Subtract away the biases to deal with the IEEE-754 double precision 5495 // implicit 1. 5496 d = _mm_sub_pd( (__m128d) x, bias ); 5497 5498 // All conversions up to here are exact. The correctly rounded result is 5499 // calculated using the current rounding mode using the following 5500 // horizontal add. 5501 d = _mm_add_sd( d, _mm_unpackhi_pd( d, d ) ); 5502 _mm_store_sd( &sd, d ); // Because we are returning doubles in XMM, this 5503 // store doesn't really need to be here (except 5504 // maybe to zero the other double) 5505 return sd; 5506 } 5507 */ 5508 5509 DebugLoc dl = Op.getDebugLoc(); 5510 LLVMContext *Context = DAG.getContext(); 5511 5512 // Build some magic constants. 5513 std::vector<Constant*> CV0; 5514 CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x45300000))); 5515 CV0.push_back(ConstantInt::get(*Context, APInt(32, 0x43300000))); 5516 CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); 5517 CV0.push_back(ConstantInt::get(*Context, APInt(32, 0))); 5518 Constant *C0 = ConstantVector::get(CV0); 5519 SDValue CPIdx0 = DAG.getConstantPool(C0, getPointerTy(), 16); 5520 5521 std::vector<Constant*> CV1; 5522 CV1.push_back( 5523 ConstantFP::get(*Context, APFloat(APInt(64, 0x4530000000000000ULL)))); 5524 CV1.push_back( 5525 ConstantFP::get(*Context, APFloat(APInt(64, 0x4330000000000000ULL)))); 5526 Constant *C1 = ConstantVector::get(CV1); 5527 SDValue CPIdx1 = DAG.getConstantPool(C1, getPointerTy(), 16); 5528 5529 SDValue XR1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, 5530 DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5531 Op.getOperand(0), 5532 DAG.getIntPtrConstant(1))); 5533 SDValue XR2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, 5534 DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5535 Op.getOperand(0), 5536 DAG.getIntPtrConstant(0))); 5537 SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); 5538 SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, 5539 PseudoSourceValue::getConstantPool(), 0, 5540 false, false, 16); 5541 SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); 5542 SDValue XR2F = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Unpck2); 5543 SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, 5544 PseudoSourceValue::getConstantPool(), 0, 5545 false, false, 16); 5546 SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); 5547 5548 // Add the halves; easiest way is to swap them into another reg first. 5549 int ShufMask[2] = { 1, -1 }; 5550 SDValue Shuf = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, 5551 DAG.getUNDEF(MVT::v2f64), ShufMask); 5552 SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuf, Sub); 5553 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Add, 5554 DAG.getIntPtrConstant(0)); 5555} 5556 5557// LowerUINT_TO_FP_i32 - 32-bit unsigned integer to float expansion. 5558SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) { 5559 DebugLoc dl = Op.getDebugLoc(); 5560 // FP constant to bias correct the final result. 5561 SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), 5562 MVT::f64); 5563 5564 // Load the 32-bit value into an XMM register. 5565 SDValue Load = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, 5566 DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5567 Op.getOperand(0), 5568 DAG.getIntPtrConstant(0))); 5569 5570 Load = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 5571 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Load), 5572 DAG.getIntPtrConstant(0)); 5573 5574 // Or the load with the bias. 5575 SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, 5576 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, 5577 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, 5578 MVT::v2f64, Load)), 5579 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, 5580 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, 5581 MVT::v2f64, Bias))); 5582 Or = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 5583 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Or), 5584 DAG.getIntPtrConstant(0)); 5585 5586 // Subtract the bias. 5587 SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); 5588 5589 // Handle final rounding. 5590 EVT DestVT = Op.getValueType(); 5591 5592 if (DestVT.bitsLT(MVT::f64)) { 5593 return DAG.getNode(ISD::FP_ROUND, dl, DestVT, Sub, 5594 DAG.getIntPtrConstant(0)); 5595 } else if (DestVT.bitsGT(MVT::f64)) { 5596 return DAG.getNode(ISD::FP_EXTEND, dl, DestVT, Sub); 5597 } 5598 5599 // Handle final rounding. 5600 return Sub; 5601} 5602 5603SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 5604 SDValue N0 = Op.getOperand(0); 5605 DebugLoc dl = Op.getDebugLoc(); 5606 5607 // Now not UINT_TO_FP is legal (it's marked custom), dag combiner won't 5608 // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform 5609 // the optimization here. 5610 if (DAG.SignBitIsZero(N0)) 5611 return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0); 5612 5613 EVT SrcVT = N0.getValueType(); 5614 if (SrcVT == MVT::i64) { 5615 // We only handle SSE2 f64 target here; caller can expand the rest. 5616 if (Op.getValueType() != MVT::f64 || !X86ScalarSSEf64) 5617 return SDValue(); 5618 5619 return LowerUINT_TO_FP_i64(Op, DAG); 5620 } else if (SrcVT == MVT::i32 && X86ScalarSSEf64) { 5621 return LowerUINT_TO_FP_i32(Op, DAG); 5622 } 5623 5624 assert(SrcVT == MVT::i32 && "Unknown UINT_TO_FP to lower!"); 5625 5626 // Make a 64-bit buffer, and use it to build an FILD. 5627 SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64); 5628 SDValue WordOff = DAG.getConstant(4, getPointerTy()); 5629 SDValue OffsetSlot = DAG.getNode(ISD::ADD, dl, 5630 getPointerTy(), StackSlot, WordOff); 5631 SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), 5632 StackSlot, NULL, 0, false, false, 0); 5633 SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, MVT::i32), 5634 OffsetSlot, NULL, 0, false, false, 0); 5635 return BuildFILD(Op, MVT::i64, Store2, StackSlot, DAG); 5636} 5637 5638std::pair<SDValue,SDValue> X86TargetLowering:: 5639FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { 5640 DebugLoc dl = Op.getDebugLoc(); 5641 5642 EVT DstTy = Op.getValueType(); 5643 5644 if (!IsSigned) { 5645 assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); 5646 DstTy = MVT::i64; 5647 } 5648 5649 assert(DstTy.getSimpleVT() <= MVT::i64 && 5650 DstTy.getSimpleVT() >= MVT::i16 && 5651 "Unknown FP_TO_SINT to lower!"); 5652 5653 // These are really Legal. 5654 if (DstTy == MVT::i32 && 5655 isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) 5656 return std::make_pair(SDValue(), SDValue()); 5657 if (Subtarget->is64Bit() && 5658 DstTy == MVT::i64 && 5659 isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) 5660 return std::make_pair(SDValue(), SDValue()); 5661 5662 // We lower FP->sint64 into FISTP64, followed by a load, all to a temporary 5663 // stack slot. 5664 MachineFunction &MF = DAG.getMachineFunction(); 5665 unsigned MemSize = DstTy.getSizeInBits()/8; 5666 int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); 5667 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 5668 5669 unsigned Opc; 5670 switch (DstTy.getSimpleVT().SimpleTy) { 5671 default: llvm_unreachable("Invalid FP_TO_SINT to lower!"); 5672 case MVT::i16: Opc = X86ISD::FP_TO_INT16_IN_MEM; break; 5673 case MVT::i32: Opc = X86ISD::FP_TO_INT32_IN_MEM; break; 5674 case MVT::i64: Opc = X86ISD::FP_TO_INT64_IN_MEM; break; 5675 } 5676 5677 SDValue Chain = DAG.getEntryNode(); 5678 SDValue Value = Op.getOperand(0); 5679 if (isScalarFPTypeInSSEReg(Op.getOperand(0).getValueType())) { 5680 assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); 5681 Chain = DAG.getStore(Chain, dl, Value, StackSlot, 5682 PseudoSourceValue::getFixedStack(SSFI), 0, 5683 false, false, 0); 5684 SDVTList Tys = DAG.getVTList(Op.getOperand(0).getValueType(), MVT::Other); 5685 SDValue Ops[] = { 5686 Chain, StackSlot, DAG.getValueType(Op.getOperand(0).getValueType()) 5687 }; 5688 Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3); 5689 Chain = Value.getValue(1); 5690 SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); 5691 StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 5692 } 5693 5694 // Build the FP_TO_INT*_IN_MEM 5695 SDValue Ops[] = { Chain, Value, StackSlot }; 5696 SDValue FIST = DAG.getNode(Opc, dl, MVT::Other, Ops, 3); 5697 5698 return std::make_pair(FIST, StackSlot); 5699} 5700 5701SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { 5702 if (Op.getValueType().isVector()) { 5703 if (Op.getValueType() == MVT::v2i32 && 5704 Op.getOperand(0).getValueType() == MVT::v2f64) { 5705 return Op; 5706 } 5707 return SDValue(); 5708 } 5709 5710 std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true); 5711 SDValue FIST = Vals.first, StackSlot = Vals.second; 5712 // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. 5713 if (FIST.getNode() == 0) return Op; 5714 5715 // Load the result. 5716 return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), 5717 FIST, StackSlot, NULL, 0, false, false, 0); 5718} 5719 5720SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) { 5721 std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, false); 5722 SDValue FIST = Vals.first, StackSlot = Vals.second; 5723 assert(FIST.getNode() && "Unexpected failure"); 5724 5725 // Load the result. 5726 return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), 5727 FIST, StackSlot, NULL, 0, false, false, 0); 5728} 5729 5730SDValue X86TargetLowering::LowerFABS(SDValue Op, SelectionDAG &DAG) { 5731 LLVMContext *Context = DAG.getContext(); 5732 DebugLoc dl = Op.getDebugLoc(); 5733 EVT VT = Op.getValueType(); 5734 EVT EltVT = VT; 5735 if (VT.isVector()) 5736 EltVT = VT.getVectorElementType(); 5737 std::vector<Constant*> CV; 5738 if (EltVT == MVT::f64) { 5739 Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63)))); 5740 CV.push_back(C); 5741 CV.push_back(C); 5742 } else { 5743 Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31)))); 5744 CV.push_back(C); 5745 CV.push_back(C); 5746 CV.push_back(C); 5747 CV.push_back(C); 5748 } 5749 Constant *C = ConstantVector::get(CV); 5750 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); 5751 SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, 5752 PseudoSourceValue::getConstantPool(), 0, 5753 false, false, 16); 5754 return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); 5755} 5756 5757SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) { 5758 LLVMContext *Context = DAG.getContext(); 5759 DebugLoc dl = Op.getDebugLoc(); 5760 EVT VT = Op.getValueType(); 5761 EVT EltVT = VT; 5762 if (VT.isVector()) 5763 EltVT = VT.getVectorElementType(); 5764 std::vector<Constant*> CV; 5765 if (EltVT == MVT::f64) { 5766 Constant *C = ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63))); 5767 CV.push_back(C); 5768 CV.push_back(C); 5769 } else { 5770 Constant *C = ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31))); 5771 CV.push_back(C); 5772 CV.push_back(C); 5773 CV.push_back(C); 5774 CV.push_back(C); 5775 } 5776 Constant *C = ConstantVector::get(CV); 5777 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); 5778 SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, 5779 PseudoSourceValue::getConstantPool(), 0, 5780 false, false, 16); 5781 if (VT.isVector()) { 5782 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 5783 DAG.getNode(ISD::XOR, dl, MVT::v2i64, 5784 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, 5785 Op.getOperand(0)), 5786 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2i64, Mask))); 5787 } else { 5788 return DAG.getNode(X86ISD::FXOR, dl, VT, Op.getOperand(0), Mask); 5789 } 5790} 5791 5792SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { 5793 LLVMContext *Context = DAG.getContext(); 5794 SDValue Op0 = Op.getOperand(0); 5795 SDValue Op1 = Op.getOperand(1); 5796 DebugLoc dl = Op.getDebugLoc(); 5797 EVT VT = Op.getValueType(); 5798 EVT SrcVT = Op1.getValueType(); 5799 5800 // If second operand is smaller, extend it first. 5801 if (SrcVT.bitsLT(VT)) { 5802 Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1); 5803 SrcVT = VT; 5804 } 5805 // And if it is bigger, shrink it first. 5806 if (SrcVT.bitsGT(VT)) { 5807 Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1)); 5808 SrcVT = VT; 5809 } 5810 5811 // At this point the operands and the result should have the same 5812 // type, and that won't be f80 since that is not custom lowered. 5813 5814 // First get the sign bit of second operand. 5815 std::vector<Constant*> CV; 5816 if (SrcVT == MVT::f64) { 5817 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 1ULL << 63)))); 5818 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); 5819 } else { 5820 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 1U << 31)))); 5821 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); 5822 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); 5823 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); 5824 } 5825 Constant *C = ConstantVector::get(CV); 5826 SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); 5827 SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, 5828 PseudoSourceValue::getConstantPool(), 0, 5829 false, false, 16); 5830 SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); 5831 5832 // Shift sign bit right or left if the two operands have different types. 5833 if (SrcVT.bitsGT(VT)) { 5834 // Op0 is MVT::f32, Op1 is MVT::f64. 5835 SignBit = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, SignBit); 5836 SignBit = DAG.getNode(X86ISD::FSRL, dl, MVT::v2f64, SignBit, 5837 DAG.getConstant(32, MVT::i32)); 5838 SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, SignBit); 5839 SignBit = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, SignBit, 5840 DAG.getIntPtrConstant(0)); 5841 } 5842 5843 // Clear first operand sign bit. 5844 CV.clear(); 5845 if (VT == MVT::f64) { 5846 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, ~(1ULL << 63))))); 5847 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(64, 0)))); 5848 } else { 5849 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, ~(1U << 31))))); 5850 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); 5851 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); 5852 CV.push_back(ConstantFP::get(*Context, APFloat(APInt(32, 0)))); 5853 } 5854 C = ConstantVector::get(CV); 5855 CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); 5856 SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, 5857 PseudoSourceValue::getConstantPool(), 0, 5858 false, false, 16); 5859 SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); 5860 5861 // Or the value with the sign bit. 5862 return DAG.getNode(X86ISD::FOR, dl, VT, Val, SignBit); 5863} 5864 5865/// Emit nodes that will be selected as "test Op0,Op0", or something 5866/// equivalent. 5867SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, 5868 SelectionDAG &DAG) { 5869 DebugLoc dl = Op.getDebugLoc(); 5870 5871 // CF and OF aren't always set the way we want. Determine which 5872 // of these we need. 5873 bool NeedCF = false; 5874 bool NeedOF = false; 5875 switch (X86CC) { 5876 case X86::COND_A: case X86::COND_AE: 5877 case X86::COND_B: case X86::COND_BE: 5878 NeedCF = true; 5879 break; 5880 case X86::COND_G: case X86::COND_GE: 5881 case X86::COND_L: case X86::COND_LE: 5882 case X86::COND_O: case X86::COND_NO: 5883 NeedOF = true; 5884 break; 5885 default: break; 5886 } 5887 5888 // See if we can use the EFLAGS value from the operand instead of 5889 // doing a separate TEST. TEST always sets OF and CF to 0, so unless 5890 // we prove that the arithmetic won't overflow, we can't use OF or CF. 5891 if (Op.getResNo() == 0 && !NeedOF && !NeedCF) { 5892 unsigned Opcode = 0; 5893 unsigned NumOperands = 0; 5894 switch (Op.getNode()->getOpcode()) { 5895 case ISD::ADD: 5896 // Due to an isel shortcoming, be conservative if this add is likely to 5897 // be selected as part of a load-modify-store instruction. When the root 5898 // node in a match is a store, isel doesn't know how to remap non-chain 5899 // non-flag uses of other nodes in the match, such as the ADD in this 5900 // case. This leads to the ADD being left around and reselected, with 5901 // the result being two adds in the output. 5902 for (SDNode::use_iterator UI = Op.getNode()->use_begin(), 5903 UE = Op.getNode()->use_end(); UI != UE; ++UI) 5904 if (UI->getOpcode() == ISD::STORE) 5905 goto default_case; 5906 if (ConstantSDNode *C = 5907 dyn_cast<ConstantSDNode>(Op.getNode()->getOperand(1))) { 5908 // An add of one will be selected as an INC. 5909 if (C->getAPIntValue() == 1) { 5910 Opcode = X86ISD::INC; 5911 NumOperands = 1; 5912 break; 5913 } 5914 // An add of negative one (subtract of one) will be selected as a DEC. 5915 if (C->getAPIntValue().isAllOnesValue()) { 5916 Opcode = X86ISD::DEC; 5917 NumOperands = 1; 5918 break; 5919 } 5920 } 5921 // Otherwise use a regular EFLAGS-setting add. 5922 Opcode = X86ISD::ADD; 5923 NumOperands = 2; 5924 break; 5925 case ISD::AND: { 5926 // If the primary and result isn't used, don't bother using X86ISD::AND, 5927 // because a TEST instruction will be better. 5928 bool NonFlagUse = false; 5929 for (SDNode::use_iterator UI = Op.getNode()->use_begin(), 5930 UE = Op.getNode()->use_end(); UI != UE; ++UI) { 5931 SDNode *User = *UI; 5932 unsigned UOpNo = UI.getOperandNo(); 5933 if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { 5934 // Look pass truncate. 5935 UOpNo = User->use_begin().getOperandNo(); 5936 User = *User->use_begin(); 5937 } 5938 if (User->getOpcode() != ISD::BRCOND && 5939 User->getOpcode() != ISD::SETCC && 5940 (User->getOpcode() != ISD::SELECT || UOpNo != 0)) { 5941 NonFlagUse = true; 5942 break; 5943 } 5944 } 5945 if (!NonFlagUse) 5946 break; 5947 } 5948 // FALL THROUGH 5949 case ISD::SUB: 5950 case ISD::OR: 5951 case ISD::XOR: 5952 // Due to the ISEL shortcoming noted above, be conservative if this op is 5953 // likely to be selected as part of a load-modify-store instruction. 5954 for (SDNode::use_iterator UI = Op.getNode()->use_begin(), 5955 UE = Op.getNode()->use_end(); UI != UE; ++UI) 5956 if (UI->getOpcode() == ISD::STORE) 5957 goto default_case; 5958 // Otherwise use a regular EFLAGS-setting instruction. 5959 switch (Op.getNode()->getOpcode()) { 5960 case ISD::SUB: Opcode = X86ISD::SUB; break; 5961 case ISD::OR: Opcode = X86ISD::OR; break; 5962 case ISD::XOR: Opcode = X86ISD::XOR; break; 5963 case ISD::AND: Opcode = X86ISD::AND; break; 5964 default: llvm_unreachable("unexpected operator!"); 5965 } 5966 NumOperands = 2; 5967 break; 5968 case X86ISD::ADD: 5969 case X86ISD::SUB: 5970 case X86ISD::INC: 5971 case X86ISD::DEC: 5972 case X86ISD::OR: 5973 case X86ISD::XOR: 5974 case X86ISD::AND: 5975 return SDValue(Op.getNode(), 1); 5976 default: 5977 default_case: 5978 break; 5979 } 5980 if (Opcode != 0) { 5981 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); 5982 SmallVector<SDValue, 4> Ops; 5983 for (unsigned i = 0; i != NumOperands; ++i) 5984 Ops.push_back(Op.getOperand(i)); 5985 SDValue New = DAG.getNode(Opcode, dl, VTs, &Ops[0], NumOperands); 5986 DAG.ReplaceAllUsesWith(Op, New); 5987 return SDValue(New.getNode(), 1); 5988 } 5989 } 5990 5991 // Otherwise just emit a CMP with 0, which is the TEST pattern. 5992 return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, 5993 DAG.getConstant(0, Op.getValueType())); 5994} 5995 5996/// Emit nodes that will be selected as "cmp Op0,Op1", or something 5997/// equivalent. 5998SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, 5999 SelectionDAG &DAG) { 6000 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op1)) 6001 if (C->getAPIntValue() == 0) 6002 return EmitTest(Op0, X86CC, DAG); 6003 6004 DebugLoc dl = Op0.getDebugLoc(); 6005 return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); 6006} 6007 6008/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node 6009/// if it's possible. 6010static SDValue LowerToBT(SDValue And, ISD::CondCode CC, 6011 DebugLoc dl, SelectionDAG &DAG) { 6012 SDValue Op0 = And.getOperand(0); 6013 SDValue Op1 = And.getOperand(1); 6014 if (Op0.getOpcode() == ISD::TRUNCATE) 6015 Op0 = Op0.getOperand(0); 6016 if (Op1.getOpcode() == ISD::TRUNCATE) 6017 Op1 = Op1.getOperand(0); 6018 6019 SDValue LHS, RHS; 6020 if (Op1.getOpcode() == ISD::SHL) { 6021 if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0))) 6022 if (And10C->getZExtValue() == 1) { 6023 LHS = Op0; 6024 RHS = Op1.getOperand(1); 6025 } 6026 } else if (Op0.getOpcode() == ISD::SHL) { 6027 if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0))) 6028 if (And00C->getZExtValue() == 1) { 6029 LHS = Op1; 6030 RHS = Op0.getOperand(1); 6031 } 6032 } else if (Op1.getOpcode() == ISD::Constant) { 6033 ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); 6034 SDValue AndLHS = Op0; 6035 if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { 6036 LHS = AndLHS.getOperand(0); 6037 RHS = AndLHS.getOperand(1); 6038 } 6039 } 6040 6041 if (LHS.getNode()) { 6042 // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT 6043 // instruction. Since the shift amount is in-range-or-undefined, we know 6044 // that doing a bittest on the i16 value is ok. We extend to i32 because 6045 // the encoding for the i16 version is larger than the i32 version. 6046 if (LHS.getValueType() == MVT::i8) 6047 LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); 6048 6049 // If the operand types disagree, extend the shift amount to match. Since 6050 // BT ignores high bits (like shifts) we can use anyextend. 6051 if (LHS.getValueType() != RHS.getValueType()) 6052 RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); 6053 6054 SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); 6055 unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; 6056 return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, 6057 DAG.getConstant(Cond, MVT::i8), BT); 6058 } 6059 6060 return SDValue(); 6061} 6062 6063SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { 6064 assert(Op.getValueType() == MVT::i8 && "SetCC type must be 8-bit integer"); 6065 SDValue Op0 = Op.getOperand(0); 6066 SDValue Op1 = Op.getOperand(1); 6067 DebugLoc dl = Op.getDebugLoc(); 6068 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 6069 6070 // Optimize to BT if possible. 6071 // Lower (X & (1 << N)) == 0 to BT(X, N). 6072 // Lower ((X >>u N) & 1) != 0 to BT(X, N). 6073 // Lower ((X >>s N) & 1) != 0 to BT(X, N). 6074 if (Op0.getOpcode() == ISD::AND && 6075 Op0.hasOneUse() && 6076 Op1.getOpcode() == ISD::Constant && 6077 cast<ConstantSDNode>(Op1)->getZExtValue() == 0 && 6078 (CC == ISD::SETEQ || CC == ISD::SETNE)) { 6079 SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); 6080 if (NewSetCC.getNode()) 6081 return NewSetCC; 6082 } 6083 6084 // Look for "(setcc) == / != 1" to avoid unncessary setcc. 6085 if (Op0.getOpcode() == X86ISD::SETCC && 6086 Op1.getOpcode() == ISD::Constant && 6087 (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 || 6088 cast<ConstantSDNode>(Op1)->isNullValue()) && 6089 (CC == ISD::SETEQ || CC == ISD::SETNE)) { 6090 X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); 6091 bool Invert = (CC == ISD::SETNE) ^ 6092 cast<ConstantSDNode>(Op1)->isNullValue(); 6093 if (Invert) 6094 CCode = X86::GetOppositeBranchCondition(CCode); 6095 return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, 6096 DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); 6097 } 6098 6099 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 6100 unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); 6101 if (X86CC == X86::COND_INVALID) 6102 return SDValue(); 6103 6104 SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG); 6105 6106 // Use sbb x, x to materialize carry bit into a GPR. 6107 if (X86CC == X86::COND_B) 6108 return DAG.getNode(ISD::AND, dl, MVT::i8, 6109 DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8, 6110 DAG.getConstant(X86CC, MVT::i8), Cond), 6111 DAG.getConstant(1, MVT::i8)); 6112 6113 return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, 6114 DAG.getConstant(X86CC, MVT::i8), Cond); 6115} 6116 6117SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 6118 SDValue Cond; 6119 SDValue Op0 = Op.getOperand(0); 6120 SDValue Op1 = Op.getOperand(1); 6121 SDValue CC = Op.getOperand(2); 6122 EVT VT = Op.getValueType(); 6123 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 6124 bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); 6125 DebugLoc dl = Op.getDebugLoc(); 6126 6127 if (isFP) { 6128 unsigned SSECC = 8; 6129 EVT VT0 = Op0.getValueType(); 6130 assert(VT0 == MVT::v4f32 || VT0 == MVT::v2f64); 6131 unsigned Opc = VT0 == MVT::v4f32 ? X86ISD::CMPPS : X86ISD::CMPPD; 6132 bool Swap = false; 6133 6134 switch (SetCCOpcode) { 6135 default: break; 6136 case ISD::SETOEQ: 6137 case ISD::SETEQ: SSECC = 0; break; 6138 case ISD::SETOGT: 6139 case ISD::SETGT: Swap = true; // Fallthrough 6140 case ISD::SETLT: 6141 case ISD::SETOLT: SSECC = 1; break; 6142 case ISD::SETOGE: 6143 case ISD::SETGE: Swap = true; // Fallthrough 6144 case ISD::SETLE: 6145 case ISD::SETOLE: SSECC = 2; break; 6146 case ISD::SETUO: SSECC = 3; break; 6147 case ISD::SETUNE: 6148 case ISD::SETNE: SSECC = 4; break; 6149 case ISD::SETULE: Swap = true; 6150 case ISD::SETUGE: SSECC = 5; break; 6151 case ISD::SETULT: Swap = true; 6152 case ISD::SETUGT: SSECC = 6; break; 6153 case ISD::SETO: SSECC = 7; break; 6154 } 6155 if (Swap) 6156 std::swap(Op0, Op1); 6157 6158 // In the two special cases we can't handle, emit two comparisons. 6159 if (SSECC == 8) { 6160 if (SetCCOpcode == ISD::SETUEQ) { 6161 SDValue UNORD, EQ; 6162 UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); 6163 EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); 6164 return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ); 6165 } 6166 else if (SetCCOpcode == ISD::SETONE) { 6167 SDValue ORD, NEQ; 6168 ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); 6169 NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); 6170 return DAG.getNode(ISD::AND, dl, VT, ORD, NEQ); 6171 } 6172 llvm_unreachable("Illegal FP comparison"); 6173 } 6174 // Handle all other FP comparisons here. 6175 return DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(SSECC, MVT::i8)); 6176 } 6177 6178 // We are handling one of the integer comparisons here. Since SSE only has 6179 // GT and EQ comparisons for integer, swapping operands and multiple 6180 // operations may be required for some comparisons. 6181 unsigned Opc = 0, EQOpc = 0, GTOpc = 0; 6182 bool Swap = false, Invert = false, FlipSigns = false; 6183 6184 switch (VT.getSimpleVT().SimpleTy) { 6185 default: break; 6186 case MVT::v8i8: 6187 case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; 6188 case MVT::v4i16: 6189 case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; 6190 case MVT::v2i32: 6191 case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; 6192 case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; 6193 } 6194 6195 switch (SetCCOpcode) { 6196 default: break; 6197 case ISD::SETNE: Invert = true; 6198 case ISD::SETEQ: Opc = EQOpc; break; 6199 case ISD::SETLT: Swap = true; 6200 case ISD::SETGT: Opc = GTOpc; break; 6201 case ISD::SETGE: Swap = true; 6202 case ISD::SETLE: Opc = GTOpc; Invert = true; break; 6203 case ISD::SETULT: Swap = true; 6204 case ISD::SETUGT: Opc = GTOpc; FlipSigns = true; break; 6205 case ISD::SETUGE: Swap = true; 6206 case ISD::SETULE: Opc = GTOpc; FlipSigns = true; Invert = true; break; 6207 } 6208 if (Swap) 6209 std::swap(Op0, Op1); 6210 6211 // Since SSE has no unsigned integer comparisons, we need to flip the sign 6212 // bits of the inputs before performing those operations. 6213 if (FlipSigns) { 6214 EVT EltVT = VT.getVectorElementType(); 6215 SDValue SignBit = DAG.getConstant(APInt::getSignBit(EltVT.getSizeInBits()), 6216 EltVT); 6217 std::vector<SDValue> SignBits(VT.getVectorNumElements(), SignBit); 6218 SDValue SignVec = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &SignBits[0], 6219 SignBits.size()); 6220 Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SignVec); 6221 Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SignVec); 6222 } 6223 6224 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 6225 6226 // If the logical-not of the result is required, perform that now. 6227 if (Invert) 6228 Result = DAG.getNOT(dl, Result, VT); 6229 6230 return Result; 6231} 6232 6233// isX86LogicalCmp - Return true if opcode is a X86 logical comparison. 6234static bool isX86LogicalCmp(SDValue Op) { 6235 unsigned Opc = Op.getNode()->getOpcode(); 6236 if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI) 6237 return true; 6238 if (Op.getResNo() == 1 && 6239 (Opc == X86ISD::ADD || 6240 Opc == X86ISD::SUB || 6241 Opc == X86ISD::SMUL || 6242 Opc == X86ISD::UMUL || 6243 Opc == X86ISD::INC || 6244 Opc == X86ISD::DEC || 6245 Opc == X86ISD::OR || 6246 Opc == X86ISD::XOR || 6247 Opc == X86ISD::AND)) 6248 return true; 6249 6250 return false; 6251} 6252 6253SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { 6254 bool addTest = true; 6255 SDValue Cond = Op.getOperand(0); 6256 DebugLoc dl = Op.getDebugLoc(); 6257 SDValue CC; 6258 6259 if (Cond.getOpcode() == ISD::SETCC) { 6260 SDValue NewCond = LowerSETCC(Cond, DAG); 6261 if (NewCond.getNode()) 6262 Cond = NewCond; 6263 } 6264 6265 // (select (x == 0), -1, 0) -> (sign_bit (x - 1)) 6266 SDValue Op1 = Op.getOperand(1); 6267 SDValue Op2 = Op.getOperand(2); 6268 if (Cond.getOpcode() == X86ISD::SETCC && 6269 cast<ConstantSDNode>(Cond.getOperand(0))->getZExtValue() == X86::COND_E) { 6270 SDValue Cmp = Cond.getOperand(1); 6271 if (Cmp.getOpcode() == X86ISD::CMP) { 6272 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op1); 6273 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(Op2); 6274 ConstantSDNode *RHSC = 6275 dyn_cast<ConstantSDNode>(Cmp.getOperand(1).getNode()); 6276 if (N1C && N1C->isAllOnesValue() && 6277 N2C && N2C->isNullValue() && 6278 RHSC && RHSC->isNullValue()) { 6279 SDValue CmpOp0 = Cmp.getOperand(0); 6280 Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, 6281 CmpOp0, DAG.getConstant(1, CmpOp0.getValueType())); 6282 return DAG.getNode(X86ISD::SETCC_CARRY, dl, Op.getValueType(), 6283 DAG.getConstant(X86::COND_B, MVT::i8), Cmp); 6284 } 6285 } 6286 } 6287 6288 // Look pass (and (setcc_carry (cmp ...)), 1). 6289 if (Cond.getOpcode() == ISD::AND && 6290 Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { 6291 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 6292 if (C && C->getAPIntValue() == 1) 6293 Cond = Cond.getOperand(0); 6294 } 6295 6296 // If condition flag is set by a X86ISD::CMP, then use it as the condition 6297 // setting operand in place of the X86ISD::SETCC. 6298 if (Cond.getOpcode() == X86ISD::SETCC || 6299 Cond.getOpcode() == X86ISD::SETCC_CARRY) { 6300 CC = Cond.getOperand(0); 6301 6302 SDValue Cmp = Cond.getOperand(1); 6303 unsigned Opc = Cmp.getOpcode(); 6304 EVT VT = Op.getValueType(); 6305 6306 bool IllegalFPCMov = false; 6307 if (VT.isFloatingPoint() && !VT.isVector() && 6308 !isScalarFPTypeInSSEReg(VT)) // FPStack? 6309 IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue()); 6310 6311 if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || 6312 Opc == X86ISD::BT) { // FIXME 6313 Cond = Cmp; 6314 addTest = false; 6315 } 6316 } 6317 6318 if (addTest) { 6319 // Look pass the truncate. 6320 if (Cond.getOpcode() == ISD::TRUNCATE) 6321 Cond = Cond.getOperand(0); 6322 6323 // We know the result of AND is compared against zero. Try to match 6324 // it to BT. 6325 if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 6326 SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); 6327 if (NewSetCC.getNode()) { 6328 CC = NewSetCC.getOperand(0); 6329 Cond = NewSetCC.getOperand(1); 6330 addTest = false; 6331 } 6332 } 6333 } 6334 6335 if (addTest) { 6336 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 6337 Cond = EmitTest(Cond, X86::COND_NE, DAG); 6338 } 6339 6340 // X86ISD::CMOV means set the result (which is operand 1) to the RHS if 6341 // condition is true. 6342 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag); 6343 SDValue Ops[] = { Op2, Op1, CC, Cond }; 6344 return DAG.getNode(X86ISD::CMOV, dl, VTs, Ops, array_lengthof(Ops)); 6345} 6346 6347// isAndOrOfSingleUseSetCCs - Return true if node is an ISD::AND or 6348// ISD::OR of two X86ISD::SETCC nodes each of which has no other use apart 6349// from the AND / OR. 6350static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { 6351 Opc = Op.getOpcode(); 6352 if (Opc != ISD::OR && Opc != ISD::AND) 6353 return false; 6354 return (Op.getOperand(0).getOpcode() == X86ISD::SETCC && 6355 Op.getOperand(0).hasOneUse() && 6356 Op.getOperand(1).getOpcode() == X86ISD::SETCC && 6357 Op.getOperand(1).hasOneUse()); 6358} 6359 6360// isXor1OfSetCC - Return true if node is an ISD::XOR of a X86ISD::SETCC and 6361// 1 and that the SETCC node has a single use. 6362static bool isXor1OfSetCC(SDValue Op) { 6363 if (Op.getOpcode() != ISD::XOR) 6364 return false; 6365 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 6366 if (N1C && N1C->getAPIntValue() == 1) { 6367 return Op.getOperand(0).getOpcode() == X86ISD::SETCC && 6368 Op.getOperand(0).hasOneUse(); 6369 } 6370 return false; 6371} 6372 6373SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { 6374 bool addTest = true; 6375 SDValue Chain = Op.getOperand(0); 6376 SDValue Cond = Op.getOperand(1); 6377 SDValue Dest = Op.getOperand(2); 6378 DebugLoc dl = Op.getDebugLoc(); 6379 SDValue CC; 6380 6381 if (Cond.getOpcode() == ISD::SETCC) { 6382 SDValue NewCond = LowerSETCC(Cond, DAG); 6383 if (NewCond.getNode()) 6384 Cond = NewCond; 6385 } 6386#if 0 6387 // FIXME: LowerXALUO doesn't handle these!! 6388 else if (Cond.getOpcode() == X86ISD::ADD || 6389 Cond.getOpcode() == X86ISD::SUB || 6390 Cond.getOpcode() == X86ISD::SMUL || 6391 Cond.getOpcode() == X86ISD::UMUL) 6392 Cond = LowerXALUO(Cond, DAG); 6393#endif 6394 6395 // Look pass (and (setcc_carry (cmp ...)), 1). 6396 if (Cond.getOpcode() == ISD::AND && 6397 Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { 6398 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 6399 if (C && C->getAPIntValue() == 1) 6400 Cond = Cond.getOperand(0); 6401 } 6402 6403 // If condition flag is set by a X86ISD::CMP, then use it as the condition 6404 // setting operand in place of the X86ISD::SETCC. 6405 if (Cond.getOpcode() == X86ISD::SETCC || 6406 Cond.getOpcode() == X86ISD::SETCC_CARRY) { 6407 CC = Cond.getOperand(0); 6408 6409 SDValue Cmp = Cond.getOperand(1); 6410 unsigned Opc = Cmp.getOpcode(); 6411 // FIXME: WHY THE SPECIAL CASING OF LogicalCmp?? 6412 if (isX86LogicalCmp(Cmp) || Opc == X86ISD::BT) { 6413 Cond = Cmp; 6414 addTest = false; 6415 } else { 6416 switch (cast<ConstantSDNode>(CC)->getZExtValue()) { 6417 default: break; 6418 case X86::COND_O: 6419 case X86::COND_B: 6420 // These can only come from an arithmetic instruction with overflow, 6421 // e.g. SADDO, UADDO. 6422 Cond = Cond.getNode()->getOperand(1); 6423 addTest = false; 6424 break; 6425 } 6426 } 6427 } else { 6428 unsigned CondOpc; 6429 if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { 6430 SDValue Cmp = Cond.getOperand(0).getOperand(1); 6431 if (CondOpc == ISD::OR) { 6432 // Also, recognize the pattern generated by an FCMP_UNE. We can emit 6433 // two branches instead of an explicit OR instruction with a 6434 // separate test. 6435 if (Cmp == Cond.getOperand(1).getOperand(1) && 6436 isX86LogicalCmp(Cmp)) { 6437 CC = Cond.getOperand(0).getOperand(0); 6438 Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), 6439 Chain, Dest, CC, Cmp); 6440 CC = Cond.getOperand(1).getOperand(0); 6441 Cond = Cmp; 6442 addTest = false; 6443 } 6444 } else { // ISD::AND 6445 // Also, recognize the pattern generated by an FCMP_OEQ. We can emit 6446 // two branches instead of an explicit AND instruction with a 6447 // separate test. However, we only do this if this block doesn't 6448 // have a fall-through edge, because this requires an explicit 6449 // jmp when the condition is false. 6450 if (Cmp == Cond.getOperand(1).getOperand(1) && 6451 isX86LogicalCmp(Cmp) && 6452 Op.getNode()->hasOneUse()) { 6453 X86::CondCode CCode = 6454 (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); 6455 CCode = X86::GetOppositeBranchCondition(CCode); 6456 CC = DAG.getConstant(CCode, MVT::i8); 6457 SDValue User = SDValue(*Op.getNode()->use_begin(), 0); 6458 // Look for an unconditional branch following this conditional branch. 6459 // We need this because we need to reverse the successors in order 6460 // to implement FCMP_OEQ. 6461 if (User.getOpcode() == ISD::BR) { 6462 SDValue FalseBB = User.getOperand(1); 6463 SDValue NewBR = 6464 DAG.UpdateNodeOperands(User, User.getOperand(0), Dest); 6465 assert(NewBR == User); 6466 Dest = FalseBB; 6467 6468 Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), 6469 Chain, Dest, CC, Cmp); 6470 X86::CondCode CCode = 6471 (X86::CondCode)Cond.getOperand(1).getConstantOperandVal(0); 6472 CCode = X86::GetOppositeBranchCondition(CCode); 6473 CC = DAG.getConstant(CCode, MVT::i8); 6474 Cond = Cmp; 6475 addTest = false; 6476 } 6477 } 6478 } 6479 } else if (Cond.hasOneUse() && isXor1OfSetCC(Cond)) { 6480 // Recognize for xorb (setcc), 1 patterns. The xor inverts the condition. 6481 // It should be transformed during dag combiner except when the condition 6482 // is set by a arithmetics with overflow node. 6483 X86::CondCode CCode = 6484 (X86::CondCode)Cond.getOperand(0).getConstantOperandVal(0); 6485 CCode = X86::GetOppositeBranchCondition(CCode); 6486 CC = DAG.getConstant(CCode, MVT::i8); 6487 Cond = Cond.getOperand(0).getOperand(1); 6488 addTest = false; 6489 } 6490 } 6491 6492 if (addTest) { 6493 // Look pass the truncate. 6494 if (Cond.getOpcode() == ISD::TRUNCATE) 6495 Cond = Cond.getOperand(0); 6496 6497 // We know the result of AND is compared against zero. Try to match 6498 // it to BT. 6499 if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { 6500 SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); 6501 if (NewSetCC.getNode()) { 6502 CC = NewSetCC.getOperand(0); 6503 Cond = NewSetCC.getOperand(1); 6504 addTest = false; 6505 } 6506 } 6507 } 6508 6509 if (addTest) { 6510 CC = DAG.getConstant(X86::COND_NE, MVT::i8); 6511 Cond = EmitTest(Cond, X86::COND_NE, DAG); 6512 } 6513 return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), 6514 Chain, Dest, CC, Cond); 6515} 6516 6517 6518// Lower dynamic stack allocation to _alloca call for Cygwin/Mingw targets. 6519// Calls to _alloca is needed to probe the stack when allocating more than 4k 6520// bytes in one go. Touching the stack at 4K increments is necessary to ensure 6521// that the guard pages used by the OS virtual memory manager are allocated in 6522// correct sequence. 6523SDValue 6524X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 6525 SelectionDAG &DAG) { 6526 assert(Subtarget->isTargetCygMing() && 6527 "This should be used only on Cygwin/Mingw targets"); 6528 DebugLoc dl = Op.getDebugLoc(); 6529 6530 // Get the inputs. 6531 SDValue Chain = Op.getOperand(0); 6532 SDValue Size = Op.getOperand(1); 6533 // FIXME: Ensure alignment here 6534 6535 SDValue Flag; 6536 6537 EVT IntPtr = getPointerTy(); 6538 EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; 6539 6540 Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag); 6541 Flag = Chain.getValue(1); 6542 6543 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 6544 6545 Chain = DAG.getNode(X86ISD::MINGW_ALLOCA, dl, NodeTys, Chain, Flag); 6546 Flag = Chain.getValue(1); 6547 6548 Chain = DAG.getCopyFromReg(Chain, dl, X86StackPtr, SPTy).getValue(1); 6549 6550 SDValue Ops1[2] = { Chain.getValue(0), Chain }; 6551 return DAG.getMergeValues(Ops1, 2, dl); 6552} 6553 6554SDValue 6555X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, 6556 SDValue Chain, 6557 SDValue Dst, SDValue Src, 6558 SDValue Size, unsigned Align, 6559 bool isVolatile, 6560 const Value *DstSV, 6561 uint64_t DstSVOff) { 6562 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 6563 6564 // If not DWORD aligned or size is more than the threshold, call the library. 6565 // The libc version is likely to be faster for these cases. It can use the 6566 // address value and run time information about the CPU. 6567 if ((Align & 3) != 0 || 6568 !ConstantSize || 6569 ConstantSize->getZExtValue() > 6570 getSubtarget()->getMaxInlineSizeThreshold()) { 6571 SDValue InFlag(0, 0); 6572 6573 // Check to see if there is a specialized entry-point for memory zeroing. 6574 ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); 6575 6576 if (const char *bzeroEntry = V && 6577 V->isNullValue() ? Subtarget->getBZeroEntry() : 0) { 6578 EVT IntPtr = getPointerTy(); 6579 const Type *IntPtrTy = TD->getIntPtrType(*DAG.getContext()); 6580 TargetLowering::ArgListTy Args; 6581 TargetLowering::ArgListEntry Entry; 6582 Entry.Node = Dst; 6583 Entry.Ty = IntPtrTy; 6584 Args.push_back(Entry); 6585 Entry.Node = Size; 6586 Args.push_back(Entry); 6587 std::pair<SDValue,SDValue> CallResult = 6588 LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()), 6589 false, false, false, false, 6590 0, CallingConv::C, false, /*isReturnValueUsed=*/false, 6591 DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); 6592 return CallResult.second; 6593 } 6594 6595 // Otherwise have the target-independent code call memset. 6596 return SDValue(); 6597 } 6598 6599 uint64_t SizeVal = ConstantSize->getZExtValue(); 6600 SDValue InFlag(0, 0); 6601 EVT AVT; 6602 SDValue Count; 6603 ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Src); 6604 unsigned BytesLeft = 0; 6605 bool TwoRepStos = false; 6606 if (ValC) { 6607 unsigned ValReg; 6608 uint64_t Val = ValC->getZExtValue() & 255; 6609 6610 // If the value is a constant, then we can potentially use larger sets. 6611 switch (Align & 3) { 6612 case 2: // WORD aligned 6613 AVT = MVT::i16; 6614 ValReg = X86::AX; 6615 Val = (Val << 8) | Val; 6616 break; 6617 case 0: // DWORD aligned 6618 AVT = MVT::i32; 6619 ValReg = X86::EAX; 6620 Val = (Val << 8) | Val; 6621 Val = (Val << 16) | Val; 6622 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned 6623 AVT = MVT::i64; 6624 ValReg = X86::RAX; 6625 Val = (Val << 32) | Val; 6626 } 6627 break; 6628 default: // Byte aligned 6629 AVT = MVT::i8; 6630 ValReg = X86::AL; 6631 Count = DAG.getIntPtrConstant(SizeVal); 6632 break; 6633 } 6634 6635 if (AVT.bitsGT(MVT::i8)) { 6636 unsigned UBytes = AVT.getSizeInBits() / 8; 6637 Count = DAG.getIntPtrConstant(SizeVal / UBytes); 6638 BytesLeft = SizeVal % UBytes; 6639 } 6640 6641 Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, AVT), 6642 InFlag); 6643 InFlag = Chain.getValue(1); 6644 } else { 6645 AVT = MVT::i8; 6646 Count = DAG.getIntPtrConstant(SizeVal); 6647 Chain = DAG.getCopyToReg(Chain, dl, X86::AL, Src, InFlag); 6648 InFlag = Chain.getValue(1); 6649 } 6650 6651 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : 6652 X86::ECX, 6653 Count, InFlag); 6654 InFlag = Chain.getValue(1); 6655 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : 6656 X86::EDI, 6657 Dst, InFlag); 6658 InFlag = Chain.getValue(1); 6659 6660 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6661 SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; 6662 Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); 6663 6664 if (TwoRepStos) { 6665 InFlag = Chain.getValue(1); 6666 Count = Size; 6667 EVT CVT = Count.getValueType(); 6668 SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, 6669 DAG.getConstant((AVT == MVT::i64) ? 7 : 3, CVT)); 6670 Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : 6671 X86::ECX, 6672 Left, InFlag); 6673 InFlag = Chain.getValue(1); 6674 Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6675 SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; 6676 Chain = DAG.getNode(X86ISD::REP_STOS, dl, Tys, Ops, array_lengthof(Ops)); 6677 } else if (BytesLeft) { 6678 // Handle the last 1 - 7 bytes. 6679 unsigned Offset = SizeVal - BytesLeft; 6680 EVT AddrVT = Dst.getValueType(); 6681 EVT SizeVT = Size.getValueType(); 6682 6683 Chain = DAG.getMemset(Chain, dl, 6684 DAG.getNode(ISD::ADD, dl, AddrVT, Dst, 6685 DAG.getConstant(Offset, AddrVT)), 6686 Src, 6687 DAG.getConstant(BytesLeft, SizeVT), 6688 Align, isVolatile, DstSV, DstSVOff + Offset); 6689 } 6690 6691 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. 6692 return Chain; 6693} 6694 6695SDValue 6696X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, 6697 SDValue Chain, SDValue Dst, SDValue Src, 6698 SDValue Size, unsigned Align, 6699 bool isVolatile, bool AlwaysInline, 6700 const Value *DstSV, uint64_t DstSVOff, 6701 const Value *SrcSV, uint64_t SrcSVOff) { 6702 // This requires the copy size to be a constant, preferrably 6703 // within a subtarget-specific limit. 6704 ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); 6705 if (!ConstantSize) 6706 return SDValue(); 6707 uint64_t SizeVal = ConstantSize->getZExtValue(); 6708 if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold()) 6709 return SDValue(); 6710 6711 /// If not DWORD aligned, call the library. 6712 if ((Align & 3) != 0) 6713 return SDValue(); 6714 6715 // DWORD aligned 6716 EVT AVT = MVT::i32; 6717 if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned 6718 AVT = MVT::i64; 6719 6720 unsigned UBytes = AVT.getSizeInBits() / 8; 6721 unsigned CountVal = SizeVal / UBytes; 6722 SDValue Count = DAG.getIntPtrConstant(CountVal); 6723 unsigned BytesLeft = SizeVal % UBytes; 6724 6725 SDValue InFlag(0, 0); 6726 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : 6727 X86::ECX, 6728 Count, InFlag); 6729 InFlag = Chain.getValue(1); 6730 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : 6731 X86::EDI, 6732 Dst, InFlag); 6733 InFlag = Chain.getValue(1); 6734 Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : 6735 X86::ESI, 6736 Src, InFlag); 6737 InFlag = Chain.getValue(1); 6738 6739 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 6740 SDValue Ops[] = { Chain, DAG.getValueType(AVT), InFlag }; 6741 SDValue RepMovs = DAG.getNode(X86ISD::REP_MOVS, dl, Tys, Ops, 6742 array_lengthof(Ops)); 6743 6744 SmallVector<SDValue, 4> Results; 6745 Results.push_back(RepMovs); 6746 if (BytesLeft) { 6747 // Handle the last 1 - 7 bytes. 6748 unsigned Offset = SizeVal - BytesLeft; 6749 EVT DstVT = Dst.getValueType(); 6750 EVT SrcVT = Src.getValueType(); 6751 EVT SizeVT = Size.getValueType(); 6752 Results.push_back(DAG.getMemcpy(Chain, dl, 6753 DAG.getNode(ISD::ADD, dl, DstVT, Dst, 6754 DAG.getConstant(Offset, DstVT)), 6755 DAG.getNode(ISD::ADD, dl, SrcVT, Src, 6756 DAG.getConstant(Offset, SrcVT)), 6757 DAG.getConstant(BytesLeft, SizeVT), 6758 Align, isVolatile, AlwaysInline, 6759 DstSV, DstSVOff + Offset, 6760 SrcSV, SrcSVOff + Offset)); 6761 } 6762 6763 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 6764 &Results[0], Results.size()); 6765} 6766 6767SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) { 6768 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 6769 DebugLoc dl = Op.getDebugLoc(); 6770 6771 if (!Subtarget->is64Bit()) { 6772 // vastart just stores the address of the VarArgsFrameIndex slot into the 6773 // memory location argument. 6774 SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 6775 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, 6776 false, false, 0); 6777 } 6778 6779 // __va_list_tag: 6780 // gp_offset (0 - 6 * 8) 6781 // fp_offset (48 - 48 + 8 * 16) 6782 // overflow_arg_area (point to parameters coming in memory). 6783 // reg_save_area 6784 SmallVector<SDValue, 8> MemOps; 6785 SDValue FIN = Op.getOperand(1); 6786 // Store gp_offset 6787 SDValue Store = DAG.getStore(Op.getOperand(0), dl, 6788 DAG.getConstant(VarArgsGPOffset, MVT::i32), 6789 FIN, SV, 0, false, false, 0); 6790 MemOps.push_back(Store); 6791 6792 // Store fp_offset 6793 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), 6794 FIN, DAG.getIntPtrConstant(4)); 6795 Store = DAG.getStore(Op.getOperand(0), dl, 6796 DAG.getConstant(VarArgsFPOffset, MVT::i32), 6797 FIN, SV, 0, false, false, 0); 6798 MemOps.push_back(Store); 6799 6800 // Store ptr to overflow_arg_area 6801 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), 6802 FIN, DAG.getIntPtrConstant(4)); 6803 SDValue OVFIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); 6804 Store = DAG.getStore(Op.getOperand(0), dl, OVFIN, FIN, SV, 0, 6805 false, false, 0); 6806 MemOps.push_back(Store); 6807 6808 // Store ptr to reg_save_area. 6809 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), 6810 FIN, DAG.getIntPtrConstant(8)); 6811 SDValue RSFIN = DAG.getFrameIndex(RegSaveFrameIndex, getPointerTy()); 6812 Store = DAG.getStore(Op.getOperand(0), dl, RSFIN, FIN, SV, 0, 6813 false, false, 0); 6814 MemOps.push_back(Store); 6815 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 6816 &MemOps[0], MemOps.size()); 6817} 6818 6819SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) { 6820 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 6821 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_arg!"); 6822 SDValue Chain = Op.getOperand(0); 6823 SDValue SrcPtr = Op.getOperand(1); 6824 SDValue SrcSV = Op.getOperand(2); 6825 6826 report_fatal_error("VAArgInst is not yet implemented for x86-64!"); 6827 return SDValue(); 6828} 6829 6830SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) { 6831 // X86-64 va_list is a struct { i32, i32, i8*, i8* }. 6832 assert(Subtarget->is64Bit() && "This code only handles 64-bit va_copy!"); 6833 SDValue Chain = Op.getOperand(0); 6834 SDValue DstPtr = Op.getOperand(1); 6835 SDValue SrcPtr = Op.getOperand(2); 6836 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 6837 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 6838 DebugLoc dl = Op.getDebugLoc(); 6839 6840 return DAG.getMemcpy(Chain, dl, DstPtr, SrcPtr, 6841 DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, 6842 false, DstSV, 0, SrcSV, 0); 6843} 6844 6845SDValue 6846X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { 6847 DebugLoc dl = Op.getDebugLoc(); 6848 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 6849 switch (IntNo) { 6850 default: return SDValue(); // Don't custom lower most intrinsics. 6851 // Comparison intrinsics. 6852 case Intrinsic::x86_sse_comieq_ss: 6853 case Intrinsic::x86_sse_comilt_ss: 6854 case Intrinsic::x86_sse_comile_ss: 6855 case Intrinsic::x86_sse_comigt_ss: 6856 case Intrinsic::x86_sse_comige_ss: 6857 case Intrinsic::x86_sse_comineq_ss: 6858 case Intrinsic::x86_sse_ucomieq_ss: 6859 case Intrinsic::x86_sse_ucomilt_ss: 6860 case Intrinsic::x86_sse_ucomile_ss: 6861 case Intrinsic::x86_sse_ucomigt_ss: 6862 case Intrinsic::x86_sse_ucomige_ss: 6863 case Intrinsic::x86_sse_ucomineq_ss: 6864 case Intrinsic::x86_sse2_comieq_sd: 6865 case Intrinsic::x86_sse2_comilt_sd: 6866 case Intrinsic::x86_sse2_comile_sd: 6867 case Intrinsic::x86_sse2_comigt_sd: 6868 case Intrinsic::x86_sse2_comige_sd: 6869 case Intrinsic::x86_sse2_comineq_sd: 6870 case Intrinsic::x86_sse2_ucomieq_sd: 6871 case Intrinsic::x86_sse2_ucomilt_sd: 6872 case Intrinsic::x86_sse2_ucomile_sd: 6873 case Intrinsic::x86_sse2_ucomigt_sd: 6874 case Intrinsic::x86_sse2_ucomige_sd: 6875 case Intrinsic::x86_sse2_ucomineq_sd: { 6876 unsigned Opc = 0; 6877 ISD::CondCode CC = ISD::SETCC_INVALID; 6878 switch (IntNo) { 6879 default: break; 6880 case Intrinsic::x86_sse_comieq_ss: 6881 case Intrinsic::x86_sse2_comieq_sd: 6882 Opc = X86ISD::COMI; 6883 CC = ISD::SETEQ; 6884 break; 6885 case Intrinsic::x86_sse_comilt_ss: 6886 case Intrinsic::x86_sse2_comilt_sd: 6887 Opc = X86ISD::COMI; 6888 CC = ISD::SETLT; 6889 break; 6890 case Intrinsic::x86_sse_comile_ss: 6891 case Intrinsic::x86_sse2_comile_sd: 6892 Opc = X86ISD::COMI; 6893 CC = ISD::SETLE; 6894 break; 6895 case Intrinsic::x86_sse_comigt_ss: 6896 case Intrinsic::x86_sse2_comigt_sd: 6897 Opc = X86ISD::COMI; 6898 CC = ISD::SETGT; 6899 break; 6900 case Intrinsic::x86_sse_comige_ss: 6901 case Intrinsic::x86_sse2_comige_sd: 6902 Opc = X86ISD::COMI; 6903 CC = ISD::SETGE; 6904 break; 6905 case Intrinsic::x86_sse_comineq_ss: 6906 case Intrinsic::x86_sse2_comineq_sd: 6907 Opc = X86ISD::COMI; 6908 CC = ISD::SETNE; 6909 break; 6910 case Intrinsic::x86_sse_ucomieq_ss: 6911 case Intrinsic::x86_sse2_ucomieq_sd: 6912 Opc = X86ISD::UCOMI; 6913 CC = ISD::SETEQ; 6914 break; 6915 case Intrinsic::x86_sse_ucomilt_ss: 6916 case Intrinsic::x86_sse2_ucomilt_sd: 6917 Opc = X86ISD::UCOMI; 6918 CC = ISD::SETLT; 6919 break; 6920 case Intrinsic::x86_sse_ucomile_ss: 6921 case Intrinsic::x86_sse2_ucomile_sd: 6922 Opc = X86ISD::UCOMI; 6923 CC = ISD::SETLE; 6924 break; 6925 case Intrinsic::x86_sse_ucomigt_ss: 6926 case Intrinsic::x86_sse2_ucomigt_sd: 6927 Opc = X86ISD::UCOMI; 6928 CC = ISD::SETGT; 6929 break; 6930 case Intrinsic::x86_sse_ucomige_ss: 6931 case Intrinsic::x86_sse2_ucomige_sd: 6932 Opc = X86ISD::UCOMI; 6933 CC = ISD::SETGE; 6934 break; 6935 case Intrinsic::x86_sse_ucomineq_ss: 6936 case Intrinsic::x86_sse2_ucomineq_sd: 6937 Opc = X86ISD::UCOMI; 6938 CC = ISD::SETNE; 6939 break; 6940 } 6941 6942 SDValue LHS = Op.getOperand(1); 6943 SDValue RHS = Op.getOperand(2); 6944 unsigned X86CC = TranslateX86CC(CC, true, LHS, RHS, DAG); 6945 assert(X86CC != X86::COND_INVALID && "Unexpected illegal condition!"); 6946 SDValue Cond = DAG.getNode(Opc, dl, MVT::i32, LHS, RHS); 6947 SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, 6948 DAG.getConstant(X86CC, MVT::i8), Cond); 6949 return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); 6950 } 6951 // ptest intrinsics. The intrinsic these come from are designed to return 6952 // an integer value, not just an instruction so lower it to the ptest 6953 // pattern and a setcc for the result. 6954 case Intrinsic::x86_sse41_ptestz: 6955 case Intrinsic::x86_sse41_ptestc: 6956 case Intrinsic::x86_sse41_ptestnzc:{ 6957 unsigned X86CC = 0; 6958 switch (IntNo) { 6959 default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); 6960 case Intrinsic::x86_sse41_ptestz: 6961 // ZF = 1 6962 X86CC = X86::COND_E; 6963 break; 6964 case Intrinsic::x86_sse41_ptestc: 6965 // CF = 1 6966 X86CC = X86::COND_B; 6967 break; 6968 case Intrinsic::x86_sse41_ptestnzc: 6969 // ZF and CF = 0 6970 X86CC = X86::COND_A; 6971 break; 6972 } 6973 6974 SDValue LHS = Op.getOperand(1); 6975 SDValue RHS = Op.getOperand(2); 6976 SDValue Test = DAG.getNode(X86ISD::PTEST, dl, MVT::i32, LHS, RHS); 6977 SDValue CC = DAG.getConstant(X86CC, MVT::i8); 6978 SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test); 6979 return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); 6980 } 6981 6982 // Fix vector shift instructions where the last operand is a non-immediate 6983 // i32 value. 6984 case Intrinsic::x86_sse2_pslli_w: 6985 case Intrinsic::x86_sse2_pslli_d: 6986 case Intrinsic::x86_sse2_pslli_q: 6987 case Intrinsic::x86_sse2_psrli_w: 6988 case Intrinsic::x86_sse2_psrli_d: 6989 case Intrinsic::x86_sse2_psrli_q: 6990 case Intrinsic::x86_sse2_psrai_w: 6991 case Intrinsic::x86_sse2_psrai_d: 6992 case Intrinsic::x86_mmx_pslli_w: 6993 case Intrinsic::x86_mmx_pslli_d: 6994 case Intrinsic::x86_mmx_pslli_q: 6995 case Intrinsic::x86_mmx_psrli_w: 6996 case Intrinsic::x86_mmx_psrli_d: 6997 case Intrinsic::x86_mmx_psrli_q: 6998 case Intrinsic::x86_mmx_psrai_w: 6999 case Intrinsic::x86_mmx_psrai_d: { 7000 SDValue ShAmt = Op.getOperand(2); 7001 if (isa<ConstantSDNode>(ShAmt)) 7002 return SDValue(); 7003 7004 unsigned NewIntNo = 0; 7005 EVT ShAmtVT = MVT::v4i32; 7006 switch (IntNo) { 7007 case Intrinsic::x86_sse2_pslli_w: 7008 NewIntNo = Intrinsic::x86_sse2_psll_w; 7009 break; 7010 case Intrinsic::x86_sse2_pslli_d: 7011 NewIntNo = Intrinsic::x86_sse2_psll_d; 7012 break; 7013 case Intrinsic::x86_sse2_pslli_q: 7014 NewIntNo = Intrinsic::x86_sse2_psll_q; 7015 break; 7016 case Intrinsic::x86_sse2_psrli_w: 7017 NewIntNo = Intrinsic::x86_sse2_psrl_w; 7018 break; 7019 case Intrinsic::x86_sse2_psrli_d: 7020 NewIntNo = Intrinsic::x86_sse2_psrl_d; 7021 break; 7022 case Intrinsic::x86_sse2_psrli_q: 7023 NewIntNo = Intrinsic::x86_sse2_psrl_q; 7024 break; 7025 case Intrinsic::x86_sse2_psrai_w: 7026 NewIntNo = Intrinsic::x86_sse2_psra_w; 7027 break; 7028 case Intrinsic::x86_sse2_psrai_d: 7029 NewIntNo = Intrinsic::x86_sse2_psra_d; 7030 break; 7031 default: { 7032 ShAmtVT = MVT::v2i32; 7033 switch (IntNo) { 7034 case Intrinsic::x86_mmx_pslli_w: 7035 NewIntNo = Intrinsic::x86_mmx_psll_w; 7036 break; 7037 case Intrinsic::x86_mmx_pslli_d: 7038 NewIntNo = Intrinsic::x86_mmx_psll_d; 7039 break; 7040 case Intrinsic::x86_mmx_pslli_q: 7041 NewIntNo = Intrinsic::x86_mmx_psll_q; 7042 break; 7043 case Intrinsic::x86_mmx_psrli_w: 7044 NewIntNo = Intrinsic::x86_mmx_psrl_w; 7045 break; 7046 case Intrinsic::x86_mmx_psrli_d: 7047 NewIntNo = Intrinsic::x86_mmx_psrl_d; 7048 break; 7049 case Intrinsic::x86_mmx_psrli_q: 7050 NewIntNo = Intrinsic::x86_mmx_psrl_q; 7051 break; 7052 case Intrinsic::x86_mmx_psrai_w: 7053 NewIntNo = Intrinsic::x86_mmx_psra_w; 7054 break; 7055 case Intrinsic::x86_mmx_psrai_d: 7056 NewIntNo = Intrinsic::x86_mmx_psra_d; 7057 break; 7058 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 7059 } 7060 break; 7061 } 7062 } 7063 7064 // The vector shift intrinsics with scalars uses 32b shift amounts but 7065 // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits 7066 // to be zero. 7067 SDValue ShOps[4]; 7068 ShOps[0] = ShAmt; 7069 ShOps[1] = DAG.getConstant(0, MVT::i32); 7070 if (ShAmtVT == MVT::v4i32) { 7071 ShOps[2] = DAG.getUNDEF(MVT::i32); 7072 ShOps[3] = DAG.getUNDEF(MVT::i32); 7073 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 4); 7074 } else { 7075 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); 7076 } 7077 7078 EVT VT = Op.getValueType(); 7079 ShAmt = DAG.getNode(ISD::BIT_CONVERT, dl, VT, ShAmt); 7080 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 7081 DAG.getConstant(NewIntNo, MVT::i32), 7082 Op.getOperand(1), ShAmt); 7083 } 7084 } 7085} 7086 7087SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { 7088 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7089 DebugLoc dl = Op.getDebugLoc(); 7090 7091 if (Depth > 0) { 7092 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 7093 SDValue Offset = 7094 DAG.getConstant(TD->getPointerSize(), 7095 Subtarget->is64Bit() ? MVT::i64 : MVT::i32); 7096 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 7097 DAG.getNode(ISD::ADD, dl, getPointerTy(), 7098 FrameAddr, Offset), 7099 NULL, 0, false, false, 0); 7100 } 7101 7102 // Just load the return address. 7103 SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); 7104 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 7105 RetAddrFI, NULL, 0, false, false, 0); 7106} 7107 7108SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { 7109 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 7110 MFI->setFrameAddressIsTaken(true); 7111 EVT VT = Op.getValueType(); 7112 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 7113 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7114 unsigned FrameReg = Subtarget->is64Bit() ? X86::RBP : X86::EBP; 7115 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 7116 while (Depth--) 7117 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, 7118 false, false, 0); 7119 return FrameAddr; 7120} 7121 7122SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, 7123 SelectionDAG &DAG) { 7124 return DAG.getIntPtrConstant(2*TD->getPointerSize()); 7125} 7126 7127SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) 7128{ 7129 MachineFunction &MF = DAG.getMachineFunction(); 7130 SDValue Chain = Op.getOperand(0); 7131 SDValue Offset = Op.getOperand(1); 7132 SDValue Handler = Op.getOperand(2); 7133 DebugLoc dl = Op.getDebugLoc(); 7134 7135 SDValue Frame = DAG.getRegister(Subtarget->is64Bit() ? X86::RBP : X86::EBP, 7136 getPointerTy()); 7137 unsigned StoreAddrReg = (Subtarget->is64Bit() ? X86::RCX : X86::ECX); 7138 7139 SDValue StoreAddr = DAG.getNode(ISD::SUB, dl, getPointerTy(), Frame, 7140 DAG.getIntPtrConstant(-TD->getPointerSize())); 7141 StoreAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StoreAddr, Offset); 7142 Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, NULL, 0, false, false, 0); 7143 Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); 7144 MF.getRegInfo().addLiveOut(StoreAddrReg); 7145 7146 return DAG.getNode(X86ISD::EH_RETURN, dl, 7147 MVT::Other, 7148 Chain, DAG.getRegister(StoreAddrReg, getPointerTy())); 7149} 7150 7151SDValue X86TargetLowering::LowerTRAMPOLINE(SDValue Op, 7152 SelectionDAG &DAG) { 7153 SDValue Root = Op.getOperand(0); 7154 SDValue Trmp = Op.getOperand(1); // trampoline 7155 SDValue FPtr = Op.getOperand(2); // nested function 7156 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 7157 DebugLoc dl = Op.getDebugLoc(); 7158 7159 const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 7160 7161 if (Subtarget->is64Bit()) { 7162 SDValue OutChains[6]; 7163 7164 // Large code-model. 7165 const unsigned char JMP64r = 0xFF; // 64-bit jmp through register opcode. 7166 const unsigned char MOV64ri = 0xB8; // X86::MOV64ri opcode. 7167 7168 const unsigned char N86R10 = RegInfo->getX86RegNum(X86::R10); 7169 const unsigned char N86R11 = RegInfo->getX86RegNum(X86::R11); 7170 7171 const unsigned char REX_WB = 0x40 | 0x08 | 0x01; // REX prefix 7172 7173 // Load the pointer to the nested function into R11. 7174 unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; // movabsq r11 7175 SDValue Addr = Trmp; 7176 OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), 7177 Addr, TrmpAddr, 0, false, false, 0); 7178 7179 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, 7180 DAG.getConstant(2, MVT::i64)); 7181 OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, TrmpAddr, 2, 7182 false, false, 2); 7183 7184 // Load the 'nest' parameter value into R10. 7185 // R10 is specified in X86CallingConv.td 7186 OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; // movabsq r10 7187 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, 7188 DAG.getConstant(10, MVT::i64)); 7189 OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), 7190 Addr, TrmpAddr, 10, false, false, 0); 7191 7192 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, 7193 DAG.getConstant(12, MVT::i64)); 7194 OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 12, 7195 false, false, 2); 7196 7197 // Jump to the nested function. 7198 OpCode = (JMP64r << 8) | REX_WB; // jmpq *... 7199 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, 7200 DAG.getConstant(20, MVT::i64)); 7201 OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, MVT::i16), 7202 Addr, TrmpAddr, 20, false, false, 0); 7203 7204 unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); // ...r11 7205 Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, 7206 DAG.getConstant(22, MVT::i64)); 7207 OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, MVT::i8), Addr, 7208 TrmpAddr, 22, false, false, 0); 7209 7210 SDValue Ops[] = 7211 { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 6) }; 7212 return DAG.getMergeValues(Ops, 2, dl); 7213 } else { 7214 const Function *Func = 7215 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); 7216 CallingConv::ID CC = Func->getCallingConv(); 7217 unsigned NestReg; 7218 7219 switch (CC) { 7220 default: 7221 llvm_unreachable("Unsupported calling convention"); 7222 case CallingConv::C: 7223 case CallingConv::X86_StdCall: { 7224 // Pass 'nest' parameter in ECX. 7225 // Must be kept in sync with X86CallingConv.td 7226 NestReg = X86::ECX; 7227 7228 // Check that ECX wasn't needed by an 'inreg' parameter. 7229 const FunctionType *FTy = Func->getFunctionType(); 7230 const AttrListPtr &Attrs = Func->getAttributes(); 7231 7232 if (!Attrs.isEmpty() && !Func->isVarArg()) { 7233 unsigned InRegCount = 0; 7234 unsigned Idx = 1; 7235 7236 for (FunctionType::param_iterator I = FTy->param_begin(), 7237 E = FTy->param_end(); I != E; ++I, ++Idx) 7238 if (Attrs.paramHasAttr(Idx, Attribute::InReg)) 7239 // FIXME: should only count parameters that are lowered to integers. 7240 InRegCount += (TD->getTypeSizeInBits(*I) + 31) / 32; 7241 7242 if (InRegCount > 2) { 7243 report_fatal_error("Nest register in use - reduce number of inreg parameters!"); 7244 } 7245 } 7246 break; 7247 } 7248 case CallingConv::X86_FastCall: 7249 case CallingConv::Fast: 7250 // Pass 'nest' parameter in EAX. 7251 // Must be kept in sync with X86CallingConv.td 7252 NestReg = X86::EAX; 7253 break; 7254 } 7255 7256 SDValue OutChains[4]; 7257 SDValue Addr, Disp; 7258 7259 Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, 7260 DAG.getConstant(10, MVT::i32)); 7261 Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); 7262 7263 // This is storing the opcode for MOV32ri. 7264 const unsigned char MOV32ri = 0xB8; // X86::MOV32ri's opcode byte. 7265 const unsigned char N86Reg = RegInfo->getX86RegNum(NestReg); 7266 OutChains[0] = DAG.getStore(Root, dl, 7267 DAG.getConstant(MOV32ri|N86Reg, MVT::i8), 7268 Trmp, TrmpAddr, 0, false, false, 0); 7269 7270 Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, 7271 DAG.getConstant(1, MVT::i32)); 7272 OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, TrmpAddr, 1, 7273 false, false, 1); 7274 7275 const unsigned char JMP = 0xE9; // jmp <32bit dst> opcode. 7276 Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, 7277 DAG.getConstant(5, MVT::i32)); 7278 OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(JMP, MVT::i8), Addr, 7279 TrmpAddr, 5, false, false, 1); 7280 7281 Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, 7282 DAG.getConstant(6, MVT::i32)); 7283 OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, TrmpAddr, 6, 7284 false, false, 1); 7285 7286 SDValue Ops[] = 7287 { Trmp, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains, 4) }; 7288 return DAG.getMergeValues(Ops, 2, dl); 7289 } 7290} 7291 7292SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { 7293 /* 7294 The rounding mode is in bits 11:10 of FPSR, and has the following 7295 settings: 7296 00 Round to nearest 7297 01 Round to -inf 7298 10 Round to +inf 7299 11 Round to 0 7300 7301 FLT_ROUNDS, on the other hand, expects the following: 7302 -1 Undefined 7303 0 Round to 0 7304 1 Round to nearest 7305 2 Round to +inf 7306 3 Round to -inf 7307 7308 To perform the conversion, we do: 7309 (((((FPSR & 0x800) >> 11) | ((FPSR & 0x400) >> 9)) + 1) & 3) 7310 */ 7311 7312 MachineFunction &MF = DAG.getMachineFunction(); 7313 const TargetMachine &TM = MF.getTarget(); 7314 const TargetFrameInfo &TFI = *TM.getFrameInfo(); 7315 unsigned StackAlignment = TFI.getStackAlignment(); 7316 EVT VT = Op.getValueType(); 7317 DebugLoc dl = Op.getDebugLoc(); 7318 7319 // Save FP Control Word to stack slot 7320 int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); 7321 SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); 7322 7323 SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other, 7324 DAG.getEntryNode(), StackSlot); 7325 7326 // Load FP Control Word from stack slot 7327 SDValue CWD = DAG.getLoad(MVT::i16, dl, Chain, StackSlot, NULL, 0, 7328 false, false, 0); 7329 7330 // Transform as necessary 7331 SDValue CWD1 = 7332 DAG.getNode(ISD::SRL, dl, MVT::i16, 7333 DAG.getNode(ISD::AND, dl, MVT::i16, 7334 CWD, DAG.getConstant(0x800, MVT::i16)), 7335 DAG.getConstant(11, MVT::i8)); 7336 SDValue CWD2 = 7337 DAG.getNode(ISD::SRL, dl, MVT::i16, 7338 DAG.getNode(ISD::AND, dl, MVT::i16, 7339 CWD, DAG.getConstant(0x400, MVT::i16)), 7340 DAG.getConstant(9, MVT::i8)); 7341 7342 SDValue RetVal = 7343 DAG.getNode(ISD::AND, dl, MVT::i16, 7344 DAG.getNode(ISD::ADD, dl, MVT::i16, 7345 DAG.getNode(ISD::OR, dl, MVT::i16, CWD1, CWD2), 7346 DAG.getConstant(1, MVT::i16)), 7347 DAG.getConstant(3, MVT::i16)); 7348 7349 7350 return DAG.getNode((VT.getSizeInBits() < 16 ? 7351 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 7352} 7353 7354SDValue X86TargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) { 7355 EVT VT = Op.getValueType(); 7356 EVT OpVT = VT; 7357 unsigned NumBits = VT.getSizeInBits(); 7358 DebugLoc dl = Op.getDebugLoc(); 7359 7360 Op = Op.getOperand(0); 7361 if (VT == MVT::i8) { 7362 // Zero extend to i32 since there is not an i8 bsr. 7363 OpVT = MVT::i32; 7364 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); 7365 } 7366 7367 // Issue a bsr (scan bits in reverse) which also sets EFLAGS. 7368 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 7369 Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); 7370 7371 // If src is zero (i.e. bsr sets ZF), returns NumBits. 7372 SDValue Ops[] = { 7373 Op, 7374 DAG.getConstant(NumBits+NumBits-1, OpVT), 7375 DAG.getConstant(X86::COND_E, MVT::i8), 7376 Op.getValue(1) 7377 }; 7378 Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops)); 7379 7380 // Finally xor with NumBits-1. 7381 Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, DAG.getConstant(NumBits-1, OpVT)); 7382 7383 if (VT == MVT::i8) 7384 Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); 7385 return Op; 7386} 7387 7388SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) { 7389 EVT VT = Op.getValueType(); 7390 EVT OpVT = VT; 7391 unsigned NumBits = VT.getSizeInBits(); 7392 DebugLoc dl = Op.getDebugLoc(); 7393 7394 Op = Op.getOperand(0); 7395 if (VT == MVT::i8) { 7396 OpVT = MVT::i32; 7397 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); 7398 } 7399 7400 // Issue a bsf (scan bits forward) which also sets EFLAGS. 7401 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); 7402 Op = DAG.getNode(X86ISD::BSF, dl, VTs, Op); 7403 7404 // If src is zero (i.e. bsf sets ZF), returns NumBits. 7405 SDValue Ops[] = { 7406 Op, 7407 DAG.getConstant(NumBits, OpVT), 7408 DAG.getConstant(X86::COND_E, MVT::i8), 7409 Op.getValue(1) 7410 }; 7411 Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops, array_lengthof(Ops)); 7412 7413 if (VT == MVT::i8) 7414 Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); 7415 return Op; 7416} 7417 7418SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) { 7419 EVT VT = Op.getValueType(); 7420 assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); 7421 DebugLoc dl = Op.getDebugLoc(); 7422 7423 // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32); 7424 // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32); 7425 // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b ); 7426 // ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi ); 7427 // ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b ); 7428 // 7429 // AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 ); 7430 // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 ); 7431 // return AloBlo + AloBhi + AhiBlo; 7432 7433 SDValue A = Op.getOperand(0); 7434 SDValue B = Op.getOperand(1); 7435 7436 SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 7437 DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), 7438 A, DAG.getConstant(32, MVT::i32)); 7439 SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 7440 DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), 7441 B, DAG.getConstant(32, MVT::i32)); 7442 SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 7443 DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), 7444 A, B); 7445 SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 7446 DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), 7447 A, Bhi); 7448 SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 7449 DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32), 7450 Ahi, B); 7451 AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 7452 DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), 7453 AloBhi, DAG.getConstant(32, MVT::i32)); 7454 AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 7455 DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), 7456 AhiBlo, DAG.getConstant(32, MVT::i32)); 7457 SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi); 7458 Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); 7459 return Res; 7460} 7461 7462 7463SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) { 7464 // Lower the "add/sub/mul with overflow" instruction into a regular ins plus 7465 // a "setcc" instruction that checks the overflow flag. The "brcond" lowering 7466 // looks for this combo and may remove the "setcc" instruction if the "setcc" 7467 // has only one use. 7468 SDNode *N = Op.getNode(); 7469 SDValue LHS = N->getOperand(0); 7470 SDValue RHS = N->getOperand(1); 7471 unsigned BaseOp = 0; 7472 unsigned Cond = 0; 7473 DebugLoc dl = Op.getDebugLoc(); 7474 7475 switch (Op.getOpcode()) { 7476 default: llvm_unreachable("Unknown ovf instruction!"); 7477 case ISD::SADDO: 7478 // A subtract of one will be selected as a INC. Note that INC doesn't 7479 // set CF, so we can't do this for UADDO. 7480 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) 7481 if (C->getAPIntValue() == 1) { 7482 BaseOp = X86ISD::INC; 7483 Cond = X86::COND_O; 7484 break; 7485 } 7486 BaseOp = X86ISD::ADD; 7487 Cond = X86::COND_O; 7488 break; 7489 case ISD::UADDO: 7490 BaseOp = X86ISD::ADD; 7491 Cond = X86::COND_B; 7492 break; 7493 case ISD::SSUBO: 7494 // A subtract of one will be selected as a DEC. Note that DEC doesn't 7495 // set CF, so we can't do this for USUBO. 7496 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) 7497 if (C->getAPIntValue() == 1) { 7498 BaseOp = X86ISD::DEC; 7499 Cond = X86::COND_O; 7500 break; 7501 } 7502 BaseOp = X86ISD::SUB; 7503 Cond = X86::COND_O; 7504 break; 7505 case ISD::USUBO: 7506 BaseOp = X86ISD::SUB; 7507 Cond = X86::COND_B; 7508 break; 7509 case ISD::SMULO: 7510 BaseOp = X86ISD::SMUL; 7511 Cond = X86::COND_O; 7512 break; 7513 case ISD::UMULO: 7514 BaseOp = X86ISD::UMUL; 7515 Cond = X86::COND_B; 7516 break; 7517 } 7518 7519 // Also sets EFLAGS. 7520 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); 7521 SDValue Sum = DAG.getNode(BaseOp, dl, VTs, LHS, RHS); 7522 7523 SDValue SetCC = 7524 DAG.getNode(X86ISD::SETCC, dl, N->getValueType(1), 7525 DAG.getConstant(Cond, MVT::i32), SDValue(Sum.getNode(), 1)); 7526 7527 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), SetCC); 7528 return Sum; 7529} 7530 7531SDValue X86TargetLowering::LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) { 7532 EVT T = Op.getValueType(); 7533 DebugLoc dl = Op.getDebugLoc(); 7534 unsigned Reg = 0; 7535 unsigned size = 0; 7536 switch(T.getSimpleVT().SimpleTy) { 7537 default: 7538 assert(false && "Invalid value type!"); 7539 case MVT::i8: Reg = X86::AL; size = 1; break; 7540 case MVT::i16: Reg = X86::AX; size = 2; break; 7541 case MVT::i32: Reg = X86::EAX; size = 4; break; 7542 case MVT::i64: 7543 assert(Subtarget->is64Bit() && "Node not type legal!"); 7544 Reg = X86::RAX; size = 8; 7545 break; 7546 } 7547 SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), dl, Reg, 7548 Op.getOperand(2), SDValue()); 7549 SDValue Ops[] = { cpIn.getValue(0), 7550 Op.getOperand(1), 7551 Op.getOperand(3), 7552 DAG.getTargetConstant(size, MVT::i8), 7553 cpIn.getValue(1) }; 7554 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 7555 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG_DAG, dl, Tys, Ops, 5); 7556 SDValue cpOut = 7557 DAG.getCopyFromReg(Result.getValue(0), dl, Reg, T, Result.getValue(1)); 7558 return cpOut; 7559} 7560 7561SDValue X86TargetLowering::LowerREADCYCLECOUNTER(SDValue Op, 7562 SelectionDAG &DAG) { 7563 assert(Subtarget->is64Bit() && "Result not type legalized?"); 7564 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 7565 SDValue TheChain = Op.getOperand(0); 7566 DebugLoc dl = Op.getDebugLoc(); 7567 SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); 7568 SDValue rax = DAG.getCopyFromReg(rd, dl, X86::RAX, MVT::i64, rd.getValue(1)); 7569 SDValue rdx = DAG.getCopyFromReg(rax.getValue(1), dl, X86::RDX, MVT::i64, 7570 rax.getValue(2)); 7571 SDValue Tmp = DAG.getNode(ISD::SHL, dl, MVT::i64, rdx, 7572 DAG.getConstant(32, MVT::i8)); 7573 SDValue Ops[] = { 7574 DAG.getNode(ISD::OR, dl, MVT::i64, rax, Tmp), 7575 rdx.getValue(1) 7576 }; 7577 return DAG.getMergeValues(Ops, 2, dl); 7578} 7579 7580SDValue X86TargetLowering::LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) { 7581 SDNode *Node = Op.getNode(); 7582 DebugLoc dl = Node->getDebugLoc(); 7583 EVT T = Node->getValueType(0); 7584 SDValue negOp = DAG.getNode(ISD::SUB, dl, T, 7585 DAG.getConstant(0, T), Node->getOperand(2)); 7586 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, 7587 cast<AtomicSDNode>(Node)->getMemoryVT(), 7588 Node->getOperand(0), 7589 Node->getOperand(1), negOp, 7590 cast<AtomicSDNode>(Node)->getSrcValue(), 7591 cast<AtomicSDNode>(Node)->getAlignment()); 7592} 7593 7594/// LowerOperation - Provide custom lowering hooks for some operations. 7595/// 7596SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { 7597 switch (Op.getOpcode()) { 7598 default: llvm_unreachable("Should not custom lower this!"); 7599 case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op,DAG); 7600 case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); 7601 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 7602 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 7603 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 7604 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 7605 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 7606 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 7607 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 7608 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 7609 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 7610 case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); 7611 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 7612 case ISD::SHL_PARTS: 7613 case ISD::SRA_PARTS: 7614 case ISD::SRL_PARTS: return LowerShift(Op, DAG); 7615 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 7616 case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 7617 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 7618 case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG); 7619 case ISD::FABS: return LowerFABS(Op, DAG); 7620 case ISD::FNEG: return LowerFNEG(Op, DAG); 7621 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 7622 case ISD::SETCC: return LowerSETCC(Op, DAG); 7623 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 7624 case ISD::SELECT: return LowerSELECT(Op, DAG); 7625 case ISD::BRCOND: return LowerBRCOND(Op, DAG); 7626 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 7627 case ISD::VASTART: return LowerVASTART(Op, DAG); 7628 case ISD::VAARG: return LowerVAARG(Op, DAG); 7629 case ISD::VACOPY: return LowerVACOPY(Op, DAG); 7630 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 7631 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 7632 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 7633 case ISD::FRAME_TO_ARGS_OFFSET: 7634 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); 7635 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 7636 case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); 7637 case ISD::TRAMPOLINE: return LowerTRAMPOLINE(Op, DAG); 7638 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 7639 case ISD::CTLZ: return LowerCTLZ(Op, DAG); 7640 case ISD::CTTZ: return LowerCTTZ(Op, DAG); 7641 case ISD::MUL: return LowerMUL_V2I64(Op, DAG); 7642 case ISD::SADDO: 7643 case ISD::UADDO: 7644 case ISD::SSUBO: 7645 case ISD::USUBO: 7646 case ISD::SMULO: 7647 case ISD::UMULO: return LowerXALUO(Op, DAG); 7648 case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG); 7649 } 7650} 7651 7652void X86TargetLowering:: 7653ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, 7654 SelectionDAG &DAG, unsigned NewOp) { 7655 EVT T = Node->getValueType(0); 7656 DebugLoc dl = Node->getDebugLoc(); 7657 assert (T == MVT::i64 && "Only know how to expand i64 atomics"); 7658 7659 SDValue Chain = Node->getOperand(0); 7660 SDValue In1 = Node->getOperand(1); 7661 SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 7662 Node->getOperand(2), DAG.getIntPtrConstant(0)); 7663 SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 7664 Node->getOperand(2), DAG.getIntPtrConstant(1)); 7665 SDValue Ops[] = { Chain, In1, In2L, In2H }; 7666 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 7667 SDValue Result = 7668 DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, 4, MVT::i64, 7669 cast<MemSDNode>(Node)->getMemOperand()); 7670 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; 7671 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 7672 Results.push_back(Result.getValue(2)); 7673} 7674 7675/// ReplaceNodeResults - Replace a node with an illegal result type 7676/// with a new node built out of custom code. 7677void X86TargetLowering::ReplaceNodeResults(SDNode *N, 7678 SmallVectorImpl<SDValue>&Results, 7679 SelectionDAG &DAG) { 7680 DebugLoc dl = N->getDebugLoc(); 7681 switch (N->getOpcode()) { 7682 default: 7683 assert(false && "Do not know how to custom type legalize this operation!"); 7684 return; 7685 case ISD::FP_TO_SINT: { 7686 std::pair<SDValue,SDValue> Vals = 7687 FP_TO_INTHelper(SDValue(N, 0), DAG, true); 7688 SDValue FIST = Vals.first, StackSlot = Vals.second; 7689 if (FIST.getNode() != 0) { 7690 EVT VT = N->getValueType(0); 7691 // Return a load from the stack slot. 7692 Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, NULL, 0, 7693 false, false, 0)); 7694 } 7695 return; 7696 } 7697 case ISD::READCYCLECOUNTER: { 7698 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 7699 SDValue TheChain = N->getOperand(0); 7700 SDValue rd = DAG.getNode(X86ISD::RDTSC_DAG, dl, Tys, &TheChain, 1); 7701 SDValue eax = DAG.getCopyFromReg(rd, dl, X86::EAX, MVT::i32, 7702 rd.getValue(1)); 7703 SDValue edx = DAG.getCopyFromReg(eax.getValue(1), dl, X86::EDX, MVT::i32, 7704 eax.getValue(2)); 7705 // Use a buildpair to merge the two 32-bit values into a 64-bit one. 7706 SDValue Ops[] = { eax, edx }; 7707 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Ops, 2)); 7708 Results.push_back(edx.getValue(1)); 7709 return; 7710 } 7711 case ISD::ATOMIC_CMP_SWAP: { 7712 EVT T = N->getValueType(0); 7713 assert (T == MVT::i64 && "Only know how to expand i64 Cmp and Swap"); 7714 SDValue cpInL, cpInH; 7715 cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), 7716 DAG.getConstant(0, MVT::i32)); 7717 cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(2), 7718 DAG.getConstant(1, MVT::i32)); 7719 cpInL = DAG.getCopyToReg(N->getOperand(0), dl, X86::EAX, cpInL, SDValue()); 7720 cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, X86::EDX, cpInH, 7721 cpInL.getValue(1)); 7722 SDValue swapInL, swapInH; 7723 swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), 7724 DAG.getConstant(0, MVT::i32)); 7725 swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(3), 7726 DAG.getConstant(1, MVT::i32)); 7727 swapInL = DAG.getCopyToReg(cpInH.getValue(0), dl, X86::EBX, swapInL, 7728 cpInH.getValue(1)); 7729 swapInH = DAG.getCopyToReg(swapInL.getValue(0), dl, X86::ECX, swapInH, 7730 swapInL.getValue(1)); 7731 SDValue Ops[] = { swapInH.getValue(0), 7732 N->getOperand(1), 7733 swapInH.getValue(1) }; 7734 SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Flag); 7735 SDValue Result = DAG.getNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, 3); 7736 SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, X86::EAX, 7737 MVT::i32, Result.getValue(1)); 7738 SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, X86::EDX, 7739 MVT::i32, cpOutL.getValue(2)); 7740 SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; 7741 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 7742 Results.push_back(cpOutH.getValue(1)); 7743 return; 7744 } 7745 case ISD::ATOMIC_LOAD_ADD: 7746 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMADD64_DAG); 7747 return; 7748 case ISD::ATOMIC_LOAD_AND: 7749 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMAND64_DAG); 7750 return; 7751 case ISD::ATOMIC_LOAD_NAND: 7752 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMNAND64_DAG); 7753 return; 7754 case ISD::ATOMIC_LOAD_OR: 7755 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMOR64_DAG); 7756 return; 7757 case ISD::ATOMIC_LOAD_SUB: 7758 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSUB64_DAG); 7759 return; 7760 case ISD::ATOMIC_LOAD_XOR: 7761 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMXOR64_DAG); 7762 return; 7763 case ISD::ATOMIC_SWAP: 7764 ReplaceATOMIC_BINARY_64(N, Results, DAG, X86ISD::ATOMSWAP64_DAG); 7765 return; 7766 } 7767} 7768 7769const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { 7770 switch (Opcode) { 7771 default: return NULL; 7772 case X86ISD::BSF: return "X86ISD::BSF"; 7773 case X86ISD::BSR: return "X86ISD::BSR"; 7774 case X86ISD::SHLD: return "X86ISD::SHLD"; 7775 case X86ISD::SHRD: return "X86ISD::SHRD"; 7776 case X86ISD::FAND: return "X86ISD::FAND"; 7777 case X86ISD::FOR: return "X86ISD::FOR"; 7778 case X86ISD::FXOR: return "X86ISD::FXOR"; 7779 case X86ISD::FSRL: return "X86ISD::FSRL"; 7780 case X86ISD::FILD: return "X86ISD::FILD"; 7781 case X86ISD::FILD_FLAG: return "X86ISD::FILD_FLAG"; 7782 case X86ISD::FP_TO_INT16_IN_MEM: return "X86ISD::FP_TO_INT16_IN_MEM"; 7783 case X86ISD::FP_TO_INT32_IN_MEM: return "X86ISD::FP_TO_INT32_IN_MEM"; 7784 case X86ISD::FP_TO_INT64_IN_MEM: return "X86ISD::FP_TO_INT64_IN_MEM"; 7785 case X86ISD::FLD: return "X86ISD::FLD"; 7786 case X86ISD::FST: return "X86ISD::FST"; 7787 case X86ISD::CALL: return "X86ISD::CALL"; 7788 case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; 7789 case X86ISD::BT: return "X86ISD::BT"; 7790 case X86ISD::CMP: return "X86ISD::CMP"; 7791 case X86ISD::COMI: return "X86ISD::COMI"; 7792 case X86ISD::UCOMI: return "X86ISD::UCOMI"; 7793 case X86ISD::SETCC: return "X86ISD::SETCC"; 7794 case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY"; 7795 case X86ISD::CMOV: return "X86ISD::CMOV"; 7796 case X86ISD::BRCOND: return "X86ISD::BRCOND"; 7797 case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG"; 7798 case X86ISD::REP_STOS: return "X86ISD::REP_STOS"; 7799 case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS"; 7800 case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg"; 7801 case X86ISD::Wrapper: return "X86ISD::Wrapper"; 7802 case X86ISD::WrapperRIP: return "X86ISD::WrapperRIP"; 7803 case X86ISD::PEXTRB: return "X86ISD::PEXTRB"; 7804 case X86ISD::PEXTRW: return "X86ISD::PEXTRW"; 7805 case X86ISD::INSERTPS: return "X86ISD::INSERTPS"; 7806 case X86ISD::PINSRB: return "X86ISD::PINSRB"; 7807 case X86ISD::PINSRW: return "X86ISD::PINSRW"; 7808 case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW"; 7809 case X86ISD::PSHUFB: return "X86ISD::PSHUFB"; 7810 case X86ISD::FMAX: return "X86ISD::FMAX"; 7811 case X86ISD::FMIN: return "X86ISD::FMIN"; 7812 case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; 7813 case X86ISD::FRCP: return "X86ISD::FRCP"; 7814 case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; 7815 case X86ISD::SegmentBaseAddress: return "X86ISD::SegmentBaseAddress"; 7816 case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; 7817 case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN"; 7818 case X86ISD::FNSTCW16m: return "X86ISD::FNSTCW16m"; 7819 case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; 7820 case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; 7821 case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; 7822 case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG"; 7823 case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG"; 7824 case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG"; 7825 case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; 7826 case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; 7827 case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; 7828 case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; 7829 case X86ISD::VSHL: return "X86ISD::VSHL"; 7830 case X86ISD::VSRL: return "X86ISD::VSRL"; 7831 case X86ISD::CMPPD: return "X86ISD::CMPPD"; 7832 case X86ISD::CMPPS: return "X86ISD::CMPPS"; 7833 case X86ISD::PCMPEQB: return "X86ISD::PCMPEQB"; 7834 case X86ISD::PCMPEQW: return "X86ISD::PCMPEQW"; 7835 case X86ISD::PCMPEQD: return "X86ISD::PCMPEQD"; 7836 case X86ISD::PCMPEQQ: return "X86ISD::PCMPEQQ"; 7837 case X86ISD::PCMPGTB: return "X86ISD::PCMPGTB"; 7838 case X86ISD::PCMPGTW: return "X86ISD::PCMPGTW"; 7839 case X86ISD::PCMPGTD: return "X86ISD::PCMPGTD"; 7840 case X86ISD::PCMPGTQ: return "X86ISD::PCMPGTQ"; 7841 case X86ISD::ADD: return "X86ISD::ADD"; 7842 case X86ISD::SUB: return "X86ISD::SUB"; 7843 case X86ISD::SMUL: return "X86ISD::SMUL"; 7844 case X86ISD::UMUL: return "X86ISD::UMUL"; 7845 case X86ISD::INC: return "X86ISD::INC"; 7846 case X86ISD::DEC: return "X86ISD::DEC"; 7847 case X86ISD::OR: return "X86ISD::OR"; 7848 case X86ISD::XOR: return "X86ISD::XOR"; 7849 case X86ISD::AND: return "X86ISD::AND"; 7850 case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; 7851 case X86ISD::PTEST: return "X86ISD::PTEST"; 7852 case X86ISD::VASTART_SAVE_XMM_REGS: return "X86ISD::VASTART_SAVE_XMM_REGS"; 7853 case X86ISD::MINGW_ALLOCA: return "X86ISD::MINGW_ALLOCA"; 7854 } 7855} 7856 7857// isLegalAddressingMode - Return true if the addressing mode represented 7858// by AM is legal for this target, for a load/store of the specified type. 7859bool X86TargetLowering::isLegalAddressingMode(const AddrMode &AM, 7860 const Type *Ty) const { 7861 // X86 supports extremely general addressing modes. 7862 CodeModel::Model M = getTargetMachine().getCodeModel(); 7863 7864 // X86 allows a sign-extended 32-bit immediate field as a displacement. 7865 if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != NULL)) 7866 return false; 7867 7868 if (AM.BaseGV) { 7869 unsigned GVFlags = 7870 Subtarget->ClassifyGlobalReference(AM.BaseGV, getTargetMachine()); 7871 7872 // If a reference to this global requires an extra load, we can't fold it. 7873 if (isGlobalStubReference(GVFlags)) 7874 return false; 7875 7876 // If BaseGV requires a register for the PIC base, we cannot also have a 7877 // BaseReg specified. 7878 if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags)) 7879 return false; 7880 7881 // If lower 4G is not available, then we must use rip-relative addressing. 7882 if (Subtarget->is64Bit() && (AM.BaseOffs || AM.Scale > 1)) 7883 return false; 7884 } 7885 7886 switch (AM.Scale) { 7887 case 0: 7888 case 1: 7889 case 2: 7890 case 4: 7891 case 8: 7892 // These scales always work. 7893 break; 7894 case 3: 7895 case 5: 7896 case 9: 7897 // These scales are formed with basereg+scalereg. Only accept if there is 7898 // no basereg yet. 7899 if (AM.HasBaseReg) 7900 return false; 7901 break; 7902 default: // Other stuff never works. 7903 return false; 7904 } 7905 7906 return true; 7907} 7908 7909 7910bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const { 7911 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) 7912 return false; 7913 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); 7914 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); 7915 if (NumBits1 <= NumBits2) 7916 return false; 7917 return true; 7918} 7919 7920bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { 7921 if (!VT1.isInteger() || !VT2.isInteger()) 7922 return false; 7923 unsigned NumBits1 = VT1.getSizeInBits(); 7924 unsigned NumBits2 = VT2.getSizeInBits(); 7925 if (NumBits1 <= NumBits2) 7926 return false; 7927 return true; 7928} 7929 7930bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const { 7931 // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. 7932 return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget->is64Bit(); 7933} 7934 7935bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { 7936 // x86-64 implicitly zero-extends 32-bit results in 64-bit registers. 7937 return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit(); 7938} 7939 7940bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { 7941 // i16 instructions are longer (0x66 prefix) and potentially slower. 7942 return !(VT1 == MVT::i32 && VT2 == MVT::i16); 7943} 7944 7945/// isShuffleMaskLegal - Targets can use this to indicate that they only 7946/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 7947/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 7948/// are assumed to be legal. 7949bool 7950X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 7951 EVT VT) const { 7952 // Very little shuffling can be done for 64-bit vectors right now. 7953 if (VT.getSizeInBits() == 64) 7954 return isPALIGNRMask(M, VT, Subtarget->hasSSSE3()); 7955 7956 // FIXME: pshufb, blends, shifts. 7957 return (VT.getVectorNumElements() == 2 || 7958 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 7959 isMOVLMask(M, VT) || 7960 isSHUFPMask(M, VT) || 7961 isPSHUFDMask(M, VT) || 7962 isPSHUFHWMask(M, VT) || 7963 isPSHUFLWMask(M, VT) || 7964 isPALIGNRMask(M, VT, Subtarget->hasSSSE3()) || 7965 isUNPCKLMask(M, VT) || 7966 isUNPCKHMask(M, VT) || 7967 isUNPCKL_v_undef_Mask(M, VT) || 7968 isUNPCKH_v_undef_Mask(M, VT)); 7969} 7970 7971bool 7972X86TargetLowering::isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, 7973 EVT VT) const { 7974 unsigned NumElts = VT.getVectorNumElements(); 7975 // FIXME: This collection of masks seems suspect. 7976 if (NumElts == 2) 7977 return true; 7978 if (NumElts == 4 && VT.getSizeInBits() == 128) { 7979 return (isMOVLMask(Mask, VT) || 7980 isCommutedMOVLMask(Mask, VT, true) || 7981 isSHUFPMask(Mask, VT) || 7982 isCommutedSHUFPMask(Mask, VT)); 7983 } 7984 return false; 7985} 7986 7987//===----------------------------------------------------------------------===// 7988// X86 Scheduler Hooks 7989//===----------------------------------------------------------------------===// 7990 7991// private utility function 7992MachineBasicBlock * 7993X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr, 7994 MachineBasicBlock *MBB, 7995 unsigned regOpc, 7996 unsigned immOpc, 7997 unsigned LoadOpc, 7998 unsigned CXchgOpc, 7999 unsigned copyOpc, 8000 unsigned notOpc, 8001 unsigned EAXreg, 8002 TargetRegisterClass *RC, 8003 bool invSrc) const { 8004 // For the atomic bitwise operator, we generate 8005 // thisMBB: 8006 // newMBB: 8007 // ld t1 = [bitinstr.addr] 8008 // op t2 = t1, [bitinstr.val] 8009 // mov EAX = t1 8010 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 8011 // bz newMBB 8012 // fallthrough -->nextMBB 8013 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 8014 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 8015 MachineFunction::iterator MBBIter = MBB; 8016 ++MBBIter; 8017 8018 /// First build the CFG 8019 MachineFunction *F = MBB->getParent(); 8020 MachineBasicBlock *thisMBB = MBB; 8021 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 8022 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 8023 F->insert(MBBIter, newMBB); 8024 F->insert(MBBIter, nextMBB); 8025 8026 // Move all successors to thisMBB to nextMBB 8027 nextMBB->transferSuccessors(thisMBB); 8028 8029 // Update thisMBB to fall through to newMBB 8030 thisMBB->addSuccessor(newMBB); 8031 8032 // newMBB jumps to itself and fall through to nextMBB 8033 newMBB->addSuccessor(nextMBB); 8034 newMBB->addSuccessor(newMBB); 8035 8036 // Insert instructions into newMBB based on incoming instruction 8037 assert(bInstr->getNumOperands() < X86AddrNumOperands + 4 && 8038 "unexpected number of operands"); 8039 DebugLoc dl = bInstr->getDebugLoc(); 8040 MachineOperand& destOper = bInstr->getOperand(0); 8041 MachineOperand* argOpers[2 + X86AddrNumOperands]; 8042 int numArgs = bInstr->getNumOperands() - 1; 8043 for (int i=0; i < numArgs; ++i) 8044 argOpers[i] = &bInstr->getOperand(i+1); 8045 8046 // x86 address has 4 operands: base, index, scale, and displacement 8047 int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] 8048 int valArgIndx = lastAddrIndx + 1; 8049 8050 unsigned t1 = F->getRegInfo().createVirtualRegister(RC); 8051 MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(LoadOpc), t1); 8052 for (int i=0; i <= lastAddrIndx; ++i) 8053 (*MIB).addOperand(*argOpers[i]); 8054 8055 unsigned tt = F->getRegInfo().createVirtualRegister(RC); 8056 if (invSrc) { 8057 MIB = BuildMI(newMBB, dl, TII->get(notOpc), tt).addReg(t1); 8058 } 8059 else 8060 tt = t1; 8061 8062 unsigned t2 = F->getRegInfo().createVirtualRegister(RC); 8063 assert((argOpers[valArgIndx]->isReg() || 8064 argOpers[valArgIndx]->isImm()) && 8065 "invalid operand"); 8066 if (argOpers[valArgIndx]->isReg()) 8067 MIB = BuildMI(newMBB, dl, TII->get(regOpc), t2); 8068 else 8069 MIB = BuildMI(newMBB, dl, TII->get(immOpc), t2); 8070 MIB.addReg(tt); 8071 (*MIB).addOperand(*argOpers[valArgIndx]); 8072 8073 MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg); 8074 MIB.addReg(t1); 8075 8076 MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc)); 8077 for (int i=0; i <= lastAddrIndx; ++i) 8078 (*MIB).addOperand(*argOpers[i]); 8079 MIB.addReg(t2); 8080 assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 8081 (*MIB).setMemRefs(bInstr->memoperands_begin(), 8082 bInstr->memoperands_end()); 8083 8084 MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg()); 8085 MIB.addReg(EAXreg); 8086 8087 // insert branch 8088 BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); 8089 8090 F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. 8091 return nextMBB; 8092} 8093 8094// private utility function: 64 bit atomics on 32 bit host. 8095MachineBasicBlock * 8096X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr, 8097 MachineBasicBlock *MBB, 8098 unsigned regOpcL, 8099 unsigned regOpcH, 8100 unsigned immOpcL, 8101 unsigned immOpcH, 8102 bool invSrc) const { 8103 // For the atomic bitwise operator, we generate 8104 // thisMBB (instructions are in pairs, except cmpxchg8b) 8105 // ld t1,t2 = [bitinstr.addr] 8106 // newMBB: 8107 // out1, out2 = phi (thisMBB, t1/t2) (newMBB, t3/t4) 8108 // op t5, t6 <- out1, out2, [bitinstr.val] 8109 // (for SWAP, substitute: mov t5, t6 <- [bitinstr.val]) 8110 // mov ECX, EBX <- t5, t6 8111 // mov EAX, EDX <- t1, t2 8112 // cmpxchg8b [bitinstr.addr] [EAX, EDX, EBX, ECX implicit] 8113 // mov t3, t4 <- EAX, EDX 8114 // bz newMBB 8115 // result in out1, out2 8116 // fallthrough -->nextMBB 8117 8118 const TargetRegisterClass *RC = X86::GR32RegisterClass; 8119 const unsigned LoadOpc = X86::MOV32rm; 8120 const unsigned copyOpc = X86::MOV32rr; 8121 const unsigned NotOpc = X86::NOT32r; 8122 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 8123 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 8124 MachineFunction::iterator MBBIter = MBB; 8125 ++MBBIter; 8126 8127 /// First build the CFG 8128 MachineFunction *F = MBB->getParent(); 8129 MachineBasicBlock *thisMBB = MBB; 8130 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 8131 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 8132 F->insert(MBBIter, newMBB); 8133 F->insert(MBBIter, nextMBB); 8134 8135 // Move all successors to thisMBB to nextMBB 8136 nextMBB->transferSuccessors(thisMBB); 8137 8138 // Update thisMBB to fall through to newMBB 8139 thisMBB->addSuccessor(newMBB); 8140 8141 // newMBB jumps to itself and fall through to nextMBB 8142 newMBB->addSuccessor(nextMBB); 8143 newMBB->addSuccessor(newMBB); 8144 8145 DebugLoc dl = bInstr->getDebugLoc(); 8146 // Insert instructions into newMBB based on incoming instruction 8147 // There are 8 "real" operands plus 9 implicit def/uses, ignored here. 8148 assert(bInstr->getNumOperands() < X86AddrNumOperands + 14 && 8149 "unexpected number of operands"); 8150 MachineOperand& dest1Oper = bInstr->getOperand(0); 8151 MachineOperand& dest2Oper = bInstr->getOperand(1); 8152 MachineOperand* argOpers[2 + X86AddrNumOperands]; 8153 for (int i=0; i < 2 + X86AddrNumOperands; ++i) 8154 argOpers[i] = &bInstr->getOperand(i+2); 8155 8156 // x86 address has 5 operands: base, index, scale, displacement, and segment. 8157 int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] 8158 8159 unsigned t1 = F->getRegInfo().createVirtualRegister(RC); 8160 MachineInstrBuilder MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t1); 8161 for (int i=0; i <= lastAddrIndx; ++i) 8162 (*MIB).addOperand(*argOpers[i]); 8163 unsigned t2 = F->getRegInfo().createVirtualRegister(RC); 8164 MIB = BuildMI(thisMBB, dl, TII->get(LoadOpc), t2); 8165 // add 4 to displacement. 8166 for (int i=0; i <= lastAddrIndx-2; ++i) 8167 (*MIB).addOperand(*argOpers[i]); 8168 MachineOperand newOp3 = *(argOpers[3]); 8169 if (newOp3.isImm()) 8170 newOp3.setImm(newOp3.getImm()+4); 8171 else 8172 newOp3.setOffset(newOp3.getOffset()+4); 8173 (*MIB).addOperand(newOp3); 8174 (*MIB).addOperand(*argOpers[lastAddrIndx]); 8175 8176 // t3/4 are defined later, at the bottom of the loop 8177 unsigned t3 = F->getRegInfo().createVirtualRegister(RC); 8178 unsigned t4 = F->getRegInfo().createVirtualRegister(RC); 8179 BuildMI(newMBB, dl, TII->get(X86::PHI), dest1Oper.getReg()) 8180 .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(newMBB); 8181 BuildMI(newMBB, dl, TII->get(X86::PHI), dest2Oper.getReg()) 8182 .addReg(t2).addMBB(thisMBB).addReg(t4).addMBB(newMBB); 8183 8184 // The subsequent operations should be using the destination registers of 8185 //the PHI instructions. 8186 if (invSrc) { 8187 t1 = F->getRegInfo().createVirtualRegister(RC); 8188 t2 = F->getRegInfo().createVirtualRegister(RC); 8189 MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t1).addReg(dest1Oper.getReg()); 8190 MIB = BuildMI(newMBB, dl, TII->get(NotOpc), t2).addReg(dest2Oper.getReg()); 8191 } else { 8192 t1 = dest1Oper.getReg(); 8193 t2 = dest2Oper.getReg(); 8194 } 8195 8196 int valArgIndx = lastAddrIndx + 1; 8197 assert((argOpers[valArgIndx]->isReg() || 8198 argOpers[valArgIndx]->isImm()) && 8199 "invalid operand"); 8200 unsigned t5 = F->getRegInfo().createVirtualRegister(RC); 8201 unsigned t6 = F->getRegInfo().createVirtualRegister(RC); 8202 if (argOpers[valArgIndx]->isReg()) 8203 MIB = BuildMI(newMBB, dl, TII->get(regOpcL), t5); 8204 else 8205 MIB = BuildMI(newMBB, dl, TII->get(immOpcL), t5); 8206 if (regOpcL != X86::MOV32rr) 8207 MIB.addReg(t1); 8208 (*MIB).addOperand(*argOpers[valArgIndx]); 8209 assert(argOpers[valArgIndx + 1]->isReg() == 8210 argOpers[valArgIndx]->isReg()); 8211 assert(argOpers[valArgIndx + 1]->isImm() == 8212 argOpers[valArgIndx]->isImm()); 8213 if (argOpers[valArgIndx + 1]->isReg()) 8214 MIB = BuildMI(newMBB, dl, TII->get(regOpcH), t6); 8215 else 8216 MIB = BuildMI(newMBB, dl, TII->get(immOpcH), t6); 8217 if (regOpcH != X86::MOV32rr) 8218 MIB.addReg(t2); 8219 (*MIB).addOperand(*argOpers[valArgIndx + 1]); 8220 8221 MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX); 8222 MIB.addReg(t1); 8223 MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX); 8224 MIB.addReg(t2); 8225 8226 MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX); 8227 MIB.addReg(t5); 8228 MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX); 8229 MIB.addReg(t6); 8230 8231 MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B)); 8232 for (int i=0; i <= lastAddrIndx; ++i) 8233 (*MIB).addOperand(*argOpers[i]); 8234 8235 assert(bInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 8236 (*MIB).setMemRefs(bInstr->memoperands_begin(), 8237 bInstr->memoperands_end()); 8238 8239 MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3); 8240 MIB.addReg(X86::EAX); 8241 MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4); 8242 MIB.addReg(X86::EDX); 8243 8244 // insert branch 8245 BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); 8246 8247 F->DeleteMachineInstr(bInstr); // The pseudo instruction is gone now. 8248 return nextMBB; 8249} 8250 8251// private utility function 8252MachineBasicBlock * 8253X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr, 8254 MachineBasicBlock *MBB, 8255 unsigned cmovOpc) const { 8256 // For the atomic min/max operator, we generate 8257 // thisMBB: 8258 // newMBB: 8259 // ld t1 = [min/max.addr] 8260 // mov t2 = [min/max.val] 8261 // cmp t1, t2 8262 // cmov[cond] t2 = t1 8263 // mov EAX = t1 8264 // lcs dest = [bitinstr.addr], t2 [EAX is implicit] 8265 // bz newMBB 8266 // fallthrough -->nextMBB 8267 // 8268 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 8269 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 8270 MachineFunction::iterator MBBIter = MBB; 8271 ++MBBIter; 8272 8273 /// First build the CFG 8274 MachineFunction *F = MBB->getParent(); 8275 MachineBasicBlock *thisMBB = MBB; 8276 MachineBasicBlock *newMBB = F->CreateMachineBasicBlock(LLVM_BB); 8277 MachineBasicBlock *nextMBB = F->CreateMachineBasicBlock(LLVM_BB); 8278 F->insert(MBBIter, newMBB); 8279 F->insert(MBBIter, nextMBB); 8280 8281 // Move all successors of thisMBB to nextMBB 8282 nextMBB->transferSuccessors(thisMBB); 8283 8284 // Update thisMBB to fall through to newMBB 8285 thisMBB->addSuccessor(newMBB); 8286 8287 // newMBB jumps to newMBB and fall through to nextMBB 8288 newMBB->addSuccessor(nextMBB); 8289 newMBB->addSuccessor(newMBB); 8290 8291 DebugLoc dl = mInstr->getDebugLoc(); 8292 // Insert instructions into newMBB based on incoming instruction 8293 assert(mInstr->getNumOperands() < X86AddrNumOperands + 4 && 8294 "unexpected number of operands"); 8295 MachineOperand& destOper = mInstr->getOperand(0); 8296 MachineOperand* argOpers[2 + X86AddrNumOperands]; 8297 int numArgs = mInstr->getNumOperands() - 1; 8298 for (int i=0; i < numArgs; ++i) 8299 argOpers[i] = &mInstr->getOperand(i+1); 8300 8301 // x86 address has 4 operands: base, index, scale, and displacement 8302 int lastAddrIndx = X86AddrNumOperands - 1; // [0,3] 8303 int valArgIndx = lastAddrIndx + 1; 8304 8305 unsigned t1 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 8306 MachineInstrBuilder MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rm), t1); 8307 for (int i=0; i <= lastAddrIndx; ++i) 8308 (*MIB).addOperand(*argOpers[i]); 8309 8310 // We only support register and immediate values 8311 assert((argOpers[valArgIndx]->isReg() || 8312 argOpers[valArgIndx]->isImm()) && 8313 "invalid operand"); 8314 8315 unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 8316 if (argOpers[valArgIndx]->isReg()) 8317 MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); 8318 else 8319 MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2); 8320 (*MIB).addOperand(*argOpers[valArgIndx]); 8321 8322 MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX); 8323 MIB.addReg(t1); 8324 8325 MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr)); 8326 MIB.addReg(t1); 8327 MIB.addReg(t2); 8328 8329 // Generate movc 8330 unsigned t3 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass); 8331 MIB = BuildMI(newMBB, dl, TII->get(cmovOpc),t3); 8332 MIB.addReg(t2); 8333 MIB.addReg(t1); 8334 8335 // Cmp and exchange if none has modified the memory location 8336 MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG32)); 8337 for (int i=0; i <= lastAddrIndx; ++i) 8338 (*MIB).addOperand(*argOpers[i]); 8339 MIB.addReg(t3); 8340 assert(mInstr->hasOneMemOperand() && "Unexpected number of memoperand"); 8341 (*MIB).setMemRefs(mInstr->memoperands_begin(), 8342 mInstr->memoperands_end()); 8343 8344 MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg()); 8345 MIB.addReg(X86::EAX); 8346 8347 // insert branch 8348 BuildMI(newMBB, dl, TII->get(X86::JNE_4)).addMBB(newMBB); 8349 8350 F->DeleteMachineInstr(mInstr); // The pseudo instruction is gone now. 8351 return nextMBB; 8352} 8353 8354// FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 8355// all of this code can be replaced with that in the .td file. 8356MachineBasicBlock * 8357X86TargetLowering::EmitPCMP(MachineInstr *MI, MachineBasicBlock *BB, 8358 unsigned numArgs, bool memArg) const { 8359 8360 MachineFunction *F = BB->getParent(); 8361 DebugLoc dl = MI->getDebugLoc(); 8362 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 8363 8364 unsigned Opc; 8365 if (memArg) 8366 Opc = numArgs == 3 ? X86::PCMPISTRM128rm : X86::PCMPESTRM128rm; 8367 else 8368 Opc = numArgs == 3 ? X86::PCMPISTRM128rr : X86::PCMPESTRM128rr; 8369 8370 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(Opc)); 8371 8372 for (unsigned i = 0; i < numArgs; ++i) { 8373 MachineOperand &Op = MI->getOperand(i+1); 8374 8375 if (!(Op.isReg() && Op.isImplicit())) 8376 MIB.addOperand(Op); 8377 } 8378 8379 BuildMI(BB, dl, TII->get(X86::MOVAPSrr), MI->getOperand(0).getReg()) 8380 .addReg(X86::XMM0); 8381 8382 F->DeleteMachineInstr(MI); 8383 8384 return BB; 8385} 8386 8387MachineBasicBlock * 8388X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( 8389 MachineInstr *MI, 8390 MachineBasicBlock *MBB) const { 8391 // Emit code to save XMM registers to the stack. The ABI says that the 8392 // number of registers to save is given in %al, so it's theoretically 8393 // possible to do an indirect jump trick to avoid saving all of them, 8394 // however this code takes a simpler approach and just executes all 8395 // of the stores if %al is non-zero. It's less code, and it's probably 8396 // easier on the hardware branch predictor, and stores aren't all that 8397 // expensive anyway. 8398 8399 // Create the new basic blocks. One block contains all the XMM stores, 8400 // and one block is the final destination regardless of whether any 8401 // stores were performed. 8402 const BasicBlock *LLVM_BB = MBB->getBasicBlock(); 8403 MachineFunction *F = MBB->getParent(); 8404 MachineFunction::iterator MBBIter = MBB; 8405 ++MBBIter; 8406 MachineBasicBlock *XMMSaveMBB = F->CreateMachineBasicBlock(LLVM_BB); 8407 MachineBasicBlock *EndMBB = F->CreateMachineBasicBlock(LLVM_BB); 8408 F->insert(MBBIter, XMMSaveMBB); 8409 F->insert(MBBIter, EndMBB); 8410 8411 // Set up the CFG. 8412 // Move any original successors of MBB to the end block. 8413 EndMBB->transferSuccessors(MBB); 8414 // The original block will now fall through to the XMM save block. 8415 MBB->addSuccessor(XMMSaveMBB); 8416 // The XMMSaveMBB will fall through to the end block. 8417 XMMSaveMBB->addSuccessor(EndMBB); 8418 8419 // Now add the instructions. 8420 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 8421 DebugLoc DL = MI->getDebugLoc(); 8422 8423 unsigned CountReg = MI->getOperand(0).getReg(); 8424 int64_t RegSaveFrameIndex = MI->getOperand(1).getImm(); 8425 int64_t VarArgsFPOffset = MI->getOperand(2).getImm(); 8426 8427 if (!Subtarget->isTargetWin64()) { 8428 // If %al is 0, branch around the XMM save block. 8429 BuildMI(MBB, DL, TII->get(X86::TEST8rr)).addReg(CountReg).addReg(CountReg); 8430 BuildMI(MBB, DL, TII->get(X86::JE_4)).addMBB(EndMBB); 8431 MBB->addSuccessor(EndMBB); 8432 } 8433 8434 // In the XMM save block, save all the XMM argument registers. 8435 for (int i = 3, e = MI->getNumOperands(); i != e; ++i) { 8436 int64_t Offset = (i - 3) * 16 + VarArgsFPOffset; 8437 MachineMemOperand *MMO = 8438 F->getMachineMemOperand( 8439 PseudoSourceValue::getFixedStack(RegSaveFrameIndex), 8440 MachineMemOperand::MOStore, Offset, 8441 /*Size=*/16, /*Align=*/16); 8442 BuildMI(XMMSaveMBB, DL, TII->get(X86::MOVAPSmr)) 8443 .addFrameIndex(RegSaveFrameIndex) 8444 .addImm(/*Scale=*/1) 8445 .addReg(/*IndexReg=*/0) 8446 .addImm(/*Disp=*/Offset) 8447 .addReg(/*Segment=*/0) 8448 .addReg(MI->getOperand(i).getReg()) 8449 .addMemOperand(MMO); 8450 } 8451 8452 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 8453 8454 return EndMBB; 8455} 8456 8457MachineBasicBlock * 8458X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, 8459 MachineBasicBlock *BB, 8460 DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { 8461 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 8462 DebugLoc DL = MI->getDebugLoc(); 8463 8464 // To "insert" a SELECT_CC instruction, we actually have to insert the 8465 // diamond control-flow pattern. The incoming instruction knows the 8466 // destination vreg to set, the condition code register to branch on, the 8467 // true/false values to select between, and a branch opcode to use. 8468 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 8469 MachineFunction::iterator It = BB; 8470 ++It; 8471 8472 // thisMBB: 8473 // ... 8474 // TrueVal = ... 8475 // cmpTY ccX, r1, r2 8476 // bCC copy1MBB 8477 // fallthrough --> copy0MBB 8478 MachineBasicBlock *thisMBB = BB; 8479 MachineFunction *F = BB->getParent(); 8480 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 8481 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 8482 unsigned Opc = 8483 X86::GetCondBranchFromCond((X86::CondCode)MI->getOperand(3).getImm()); 8484 BuildMI(BB, DL, TII->get(Opc)).addMBB(sinkMBB); 8485 F->insert(It, copy0MBB); 8486 F->insert(It, sinkMBB); 8487 // Update machine-CFG edges by first adding all successors of the current 8488 // block to the new block which will contain the Phi node for the select. 8489 // Also inform sdisel of the edge changes. 8490 for (MachineBasicBlock::succ_iterator I = BB->succ_begin(), 8491 E = BB->succ_end(); I != E; ++I) { 8492 EM->insert(std::make_pair(*I, sinkMBB)); 8493 sinkMBB->addSuccessor(*I); 8494 } 8495 // Next, remove all successors of the current block, and add the true 8496 // and fallthrough blocks as its successors. 8497 while (!BB->succ_empty()) 8498 BB->removeSuccessor(BB->succ_begin()); 8499 // Add the true and fallthrough blocks as its successors. 8500 BB->addSuccessor(copy0MBB); 8501 BB->addSuccessor(sinkMBB); 8502 8503 // copy0MBB: 8504 // %FalseValue = ... 8505 // # fallthrough to sinkMBB 8506 BB = copy0MBB; 8507 8508 // Update machine-CFG edges 8509 BB->addSuccessor(sinkMBB); 8510 8511 // sinkMBB: 8512 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 8513 // ... 8514 BB = sinkMBB; 8515 BuildMI(BB, DL, TII->get(X86::PHI), MI->getOperand(0).getReg()) 8516 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 8517 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 8518 8519 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 8520 return BB; 8521} 8522 8523MachineBasicBlock * 8524X86TargetLowering::EmitLoweredMingwAlloca(MachineInstr *MI, 8525 MachineBasicBlock *BB, 8526 DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { 8527 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 8528 DebugLoc DL = MI->getDebugLoc(); 8529 MachineFunction *F = BB->getParent(); 8530 8531 // The lowering is pretty easy: we're just emitting the call to _alloca. The 8532 // non-trivial part is impdef of ESP. 8533 // FIXME: The code should be tweaked as soon as we'll try to do codegen for 8534 // mingw-w64. 8535 8536 BuildMI(BB, DL, TII->get(X86::CALLpcrel32)) 8537 .addExternalSymbol("_alloca") 8538 .addReg(X86::EAX, RegState::Implicit) 8539 .addReg(X86::ESP, RegState::Implicit) 8540 .addReg(X86::EAX, RegState::Define | RegState::Implicit) 8541 .addReg(X86::ESP, RegState::Define | RegState::Implicit); 8542 8543 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 8544 return BB; 8545} 8546 8547MachineBasicBlock * 8548X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 8549 MachineBasicBlock *BB, 8550 DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const { 8551 switch (MI->getOpcode()) { 8552 default: assert(false && "Unexpected instr type to insert"); 8553 case X86::MINGW_ALLOCA: 8554 return EmitLoweredMingwAlloca(MI, BB, EM); 8555 case X86::CMOV_GR8: 8556 case X86::CMOV_V1I64: 8557 case X86::CMOV_FR32: 8558 case X86::CMOV_FR64: 8559 case X86::CMOV_V4F32: 8560 case X86::CMOV_V2F64: 8561 case X86::CMOV_V2I64: 8562 case X86::CMOV_GR16: 8563 case X86::CMOV_GR32: 8564 case X86::CMOV_RFP32: 8565 case X86::CMOV_RFP64: 8566 case X86::CMOV_RFP80: 8567 return EmitLoweredSelect(MI, BB, EM); 8568 8569 case X86::FP32_TO_INT16_IN_MEM: 8570 case X86::FP32_TO_INT32_IN_MEM: 8571 case X86::FP32_TO_INT64_IN_MEM: 8572 case X86::FP64_TO_INT16_IN_MEM: 8573 case X86::FP64_TO_INT32_IN_MEM: 8574 case X86::FP64_TO_INT64_IN_MEM: 8575 case X86::FP80_TO_INT16_IN_MEM: 8576 case X86::FP80_TO_INT32_IN_MEM: 8577 case X86::FP80_TO_INT64_IN_MEM: { 8578 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 8579 DebugLoc DL = MI->getDebugLoc(); 8580 8581 // Change the floating point control register to use "round towards zero" 8582 // mode when truncating to an integer value. 8583 MachineFunction *F = BB->getParent(); 8584 int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); 8585 addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); 8586 8587 // Load the old value of the high byte of the control word... 8588 unsigned OldCW = 8589 F->getRegInfo().createVirtualRegister(X86::GR16RegisterClass); 8590 addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16rm), OldCW), 8591 CWFrameIdx); 8592 8593 // Set the high part to be round to zero... 8594 addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mi)), CWFrameIdx) 8595 .addImm(0xC7F); 8596 8597 // Reload the modified control word now... 8598 addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); 8599 8600 // Restore the memory image of control word to original value 8601 addFrameReference(BuildMI(BB, DL, TII->get(X86::MOV16mr)), CWFrameIdx) 8602 .addReg(OldCW); 8603 8604 // Get the X86 opcode to use. 8605 unsigned Opc; 8606 switch (MI->getOpcode()) { 8607 default: llvm_unreachable("illegal opcode!"); 8608 case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; 8609 case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; 8610 case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; 8611 case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; 8612 case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; 8613 case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; 8614 case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; 8615 case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; 8616 case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; 8617 } 8618 8619 X86AddressMode AM; 8620 MachineOperand &Op = MI->getOperand(0); 8621 if (Op.isReg()) { 8622 AM.BaseType = X86AddressMode::RegBase; 8623 AM.Base.Reg = Op.getReg(); 8624 } else { 8625 AM.BaseType = X86AddressMode::FrameIndexBase; 8626 AM.Base.FrameIndex = Op.getIndex(); 8627 } 8628 Op = MI->getOperand(1); 8629 if (Op.isImm()) 8630 AM.Scale = Op.getImm(); 8631 Op = MI->getOperand(2); 8632 if (Op.isImm()) 8633 AM.IndexReg = Op.getImm(); 8634 Op = MI->getOperand(3); 8635 if (Op.isGlobal()) { 8636 AM.GV = Op.getGlobal(); 8637 } else { 8638 AM.Disp = Op.getImm(); 8639 } 8640 addFullAddress(BuildMI(BB, DL, TII->get(Opc)), AM) 8641 .addReg(MI->getOperand(X86AddrNumOperands).getReg()); 8642 8643 // Reload the original control word now. 8644 addFrameReference(BuildMI(BB, DL, TII->get(X86::FLDCW16m)), CWFrameIdx); 8645 8646 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 8647 return BB; 8648 } 8649 // DBG_VALUE. Only the frame index case is done here. 8650 case X86::DBG_VALUE: { 8651 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 8652 DebugLoc DL = MI->getDebugLoc(); 8653 X86AddressMode AM; 8654 MachineFunction *F = BB->getParent(); 8655 AM.BaseType = X86AddressMode::FrameIndexBase; 8656 AM.Base.FrameIndex = MI->getOperand(0).getImm(); 8657 addFullAddress(BuildMI(BB, DL, TII->get(X86::DBG_VALUE)), AM). 8658 addImm(MI->getOperand(1).getImm()). 8659 addMetadata(MI->getOperand(2).getMetadata()); 8660 F->DeleteMachineInstr(MI); // Remove pseudo. 8661 return BB; 8662 } 8663 8664 // String/text processing lowering. 8665 case X86::PCMPISTRM128REG: 8666 return EmitPCMP(MI, BB, 3, false /* in-mem */); 8667 case X86::PCMPISTRM128MEM: 8668 return EmitPCMP(MI, BB, 3, true /* in-mem */); 8669 case X86::PCMPESTRM128REG: 8670 return EmitPCMP(MI, BB, 5, false /* in mem */); 8671 case X86::PCMPESTRM128MEM: 8672 return EmitPCMP(MI, BB, 5, true /* in mem */); 8673 8674 // Atomic Lowering. 8675 case X86::ATOMAND32: 8676 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 8677 X86::AND32ri, X86::MOV32rm, 8678 X86::LCMPXCHG32, X86::MOV32rr, 8679 X86::NOT32r, X86::EAX, 8680 X86::GR32RegisterClass); 8681 case X86::ATOMOR32: 8682 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr, 8683 X86::OR32ri, X86::MOV32rm, 8684 X86::LCMPXCHG32, X86::MOV32rr, 8685 X86::NOT32r, X86::EAX, 8686 X86::GR32RegisterClass); 8687 case X86::ATOMXOR32: 8688 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr, 8689 X86::XOR32ri, X86::MOV32rm, 8690 X86::LCMPXCHG32, X86::MOV32rr, 8691 X86::NOT32r, X86::EAX, 8692 X86::GR32RegisterClass); 8693 case X86::ATOMNAND32: 8694 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr, 8695 X86::AND32ri, X86::MOV32rm, 8696 X86::LCMPXCHG32, X86::MOV32rr, 8697 X86::NOT32r, X86::EAX, 8698 X86::GR32RegisterClass, true); 8699 case X86::ATOMMIN32: 8700 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL32rr); 8701 case X86::ATOMMAX32: 8702 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG32rr); 8703 case X86::ATOMUMIN32: 8704 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB32rr); 8705 case X86::ATOMUMAX32: 8706 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA32rr); 8707 8708 case X86::ATOMAND16: 8709 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 8710 X86::AND16ri, X86::MOV16rm, 8711 X86::LCMPXCHG16, X86::MOV16rr, 8712 X86::NOT16r, X86::AX, 8713 X86::GR16RegisterClass); 8714 case X86::ATOMOR16: 8715 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr, 8716 X86::OR16ri, X86::MOV16rm, 8717 X86::LCMPXCHG16, X86::MOV16rr, 8718 X86::NOT16r, X86::AX, 8719 X86::GR16RegisterClass); 8720 case X86::ATOMXOR16: 8721 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr, 8722 X86::XOR16ri, X86::MOV16rm, 8723 X86::LCMPXCHG16, X86::MOV16rr, 8724 X86::NOT16r, X86::AX, 8725 X86::GR16RegisterClass); 8726 case X86::ATOMNAND16: 8727 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr, 8728 X86::AND16ri, X86::MOV16rm, 8729 X86::LCMPXCHG16, X86::MOV16rr, 8730 X86::NOT16r, X86::AX, 8731 X86::GR16RegisterClass, true); 8732 case X86::ATOMMIN16: 8733 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL16rr); 8734 case X86::ATOMMAX16: 8735 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG16rr); 8736 case X86::ATOMUMIN16: 8737 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB16rr); 8738 case X86::ATOMUMAX16: 8739 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA16rr); 8740 8741 case X86::ATOMAND8: 8742 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 8743 X86::AND8ri, X86::MOV8rm, 8744 X86::LCMPXCHG8, X86::MOV8rr, 8745 X86::NOT8r, X86::AL, 8746 X86::GR8RegisterClass); 8747 case X86::ATOMOR8: 8748 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr, 8749 X86::OR8ri, X86::MOV8rm, 8750 X86::LCMPXCHG8, X86::MOV8rr, 8751 X86::NOT8r, X86::AL, 8752 X86::GR8RegisterClass); 8753 case X86::ATOMXOR8: 8754 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr, 8755 X86::XOR8ri, X86::MOV8rm, 8756 X86::LCMPXCHG8, X86::MOV8rr, 8757 X86::NOT8r, X86::AL, 8758 X86::GR8RegisterClass); 8759 case X86::ATOMNAND8: 8760 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr, 8761 X86::AND8ri, X86::MOV8rm, 8762 X86::LCMPXCHG8, X86::MOV8rr, 8763 X86::NOT8r, X86::AL, 8764 X86::GR8RegisterClass, true); 8765 // FIXME: There are no CMOV8 instructions; MIN/MAX need some other way. 8766 // This group is for 64-bit host. 8767 case X86::ATOMAND64: 8768 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 8769 X86::AND64ri32, X86::MOV64rm, 8770 X86::LCMPXCHG64, X86::MOV64rr, 8771 X86::NOT64r, X86::RAX, 8772 X86::GR64RegisterClass); 8773 case X86::ATOMOR64: 8774 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr, 8775 X86::OR64ri32, X86::MOV64rm, 8776 X86::LCMPXCHG64, X86::MOV64rr, 8777 X86::NOT64r, X86::RAX, 8778 X86::GR64RegisterClass); 8779 case X86::ATOMXOR64: 8780 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr, 8781 X86::XOR64ri32, X86::MOV64rm, 8782 X86::LCMPXCHG64, X86::MOV64rr, 8783 X86::NOT64r, X86::RAX, 8784 X86::GR64RegisterClass); 8785 case X86::ATOMNAND64: 8786 return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr, 8787 X86::AND64ri32, X86::MOV64rm, 8788 X86::LCMPXCHG64, X86::MOV64rr, 8789 X86::NOT64r, X86::RAX, 8790 X86::GR64RegisterClass, true); 8791 case X86::ATOMMIN64: 8792 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVL64rr); 8793 case X86::ATOMMAX64: 8794 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVG64rr); 8795 case X86::ATOMUMIN64: 8796 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVB64rr); 8797 case X86::ATOMUMAX64: 8798 return EmitAtomicMinMaxWithCustomInserter(MI, BB, X86::CMOVA64rr); 8799 8800 // This group does 64-bit operations on a 32-bit host. 8801 case X86::ATOMAND6432: 8802 return EmitAtomicBit6432WithCustomInserter(MI, BB, 8803 X86::AND32rr, X86::AND32rr, 8804 X86::AND32ri, X86::AND32ri, 8805 false); 8806 case X86::ATOMOR6432: 8807 return EmitAtomicBit6432WithCustomInserter(MI, BB, 8808 X86::OR32rr, X86::OR32rr, 8809 X86::OR32ri, X86::OR32ri, 8810 false); 8811 case X86::ATOMXOR6432: 8812 return EmitAtomicBit6432WithCustomInserter(MI, BB, 8813 X86::XOR32rr, X86::XOR32rr, 8814 X86::XOR32ri, X86::XOR32ri, 8815 false); 8816 case X86::ATOMNAND6432: 8817 return EmitAtomicBit6432WithCustomInserter(MI, BB, 8818 X86::AND32rr, X86::AND32rr, 8819 X86::AND32ri, X86::AND32ri, 8820 true); 8821 case X86::ATOMADD6432: 8822 return EmitAtomicBit6432WithCustomInserter(MI, BB, 8823 X86::ADD32rr, X86::ADC32rr, 8824 X86::ADD32ri, X86::ADC32ri, 8825 false); 8826 case X86::ATOMSUB6432: 8827 return EmitAtomicBit6432WithCustomInserter(MI, BB, 8828 X86::SUB32rr, X86::SBB32rr, 8829 X86::SUB32ri, X86::SBB32ri, 8830 false); 8831 case X86::ATOMSWAP6432: 8832 return EmitAtomicBit6432WithCustomInserter(MI, BB, 8833 X86::MOV32rr, X86::MOV32rr, 8834 X86::MOV32ri, X86::MOV32ri, 8835 false); 8836 case X86::VASTART_SAVE_XMM_REGS: 8837 return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); 8838 } 8839} 8840 8841//===----------------------------------------------------------------------===// 8842// X86 Optimization Hooks 8843//===----------------------------------------------------------------------===// 8844 8845void X86TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 8846 const APInt &Mask, 8847 APInt &KnownZero, 8848 APInt &KnownOne, 8849 const SelectionDAG &DAG, 8850 unsigned Depth) const { 8851 unsigned Opc = Op.getOpcode(); 8852 assert((Opc >= ISD::BUILTIN_OP_END || 8853 Opc == ISD::INTRINSIC_WO_CHAIN || 8854 Opc == ISD::INTRINSIC_W_CHAIN || 8855 Opc == ISD::INTRINSIC_VOID) && 8856 "Should use MaskedValueIsZero if you don't know whether Op" 8857 " is a target node!"); 8858 8859 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); // Don't know anything. 8860 switch (Opc) { 8861 default: break; 8862 case X86ISD::ADD: 8863 case X86ISD::SUB: 8864 case X86ISD::SMUL: 8865 case X86ISD::UMUL: 8866 case X86ISD::INC: 8867 case X86ISD::DEC: 8868 case X86ISD::OR: 8869 case X86ISD::XOR: 8870 case X86ISD::AND: 8871 // These nodes' second result is a boolean. 8872 if (Op.getResNo() == 0) 8873 break; 8874 // Fallthrough 8875 case X86ISD::SETCC: 8876 KnownZero |= APInt::getHighBitsSet(Mask.getBitWidth(), 8877 Mask.getBitWidth() - 1); 8878 break; 8879 } 8880} 8881 8882/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 8883/// node is a GlobalAddress + offset. 8884bool X86TargetLowering::isGAPlusOffset(SDNode *N, 8885 const GlobalValue* &GA, 8886 int64_t &Offset) const { 8887 if (N->getOpcode() == X86ISD::Wrapper) { 8888 if (isa<GlobalAddressSDNode>(N->getOperand(0))) { 8889 GA = cast<GlobalAddressSDNode>(N->getOperand(0))->getGlobal(); 8890 Offset = cast<GlobalAddressSDNode>(N->getOperand(0))->getOffset(); 8891 return true; 8892 } 8893 } 8894 return TargetLowering::isGAPlusOffset(N, GA, Offset); 8895} 8896 8897/// PerformShuffleCombine - Combine a vector_shuffle that is equal to 8898/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load 8899/// if the load addresses are consecutive, non-overlapping, and in the right 8900/// order. 8901static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, 8902 const TargetLowering &TLI) { 8903 DebugLoc dl = N->getDebugLoc(); 8904 EVT VT = N->getValueType(0); 8905 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 8906 8907 if (VT.getSizeInBits() != 128) 8908 return SDValue(); 8909 8910 SmallVector<SDValue, 16> Elts; 8911 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) 8912 Elts.push_back(DAG.getShuffleScalarElt(SVN, i)); 8913 8914 return EltsFromConsecutiveLoads(VT, Elts, dl, DAG); 8915} 8916 8917/// PerformShuffleCombine - Detect vector gather/scatter index generation 8918/// and convert it from being a bunch of shuffles and extracts to a simple 8919/// store and scalar loads to extract the elements. 8920static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, 8921 const TargetLowering &TLI) { 8922 SDValue InputVector = N->getOperand(0); 8923 8924 // Only operate on vectors of 4 elements, where the alternative shuffling 8925 // gets to be more expensive. 8926 if (InputVector.getValueType() != MVT::v4i32) 8927 return SDValue(); 8928 8929 // Check whether every use of InputVector is an EXTRACT_VECTOR_ELT with a 8930 // single use which is a sign-extend or zero-extend, and all elements are 8931 // used. 8932 SmallVector<SDNode *, 4> Uses; 8933 unsigned ExtractedElements = 0; 8934 for (SDNode::use_iterator UI = InputVector.getNode()->use_begin(), 8935 UE = InputVector.getNode()->use_end(); UI != UE; ++UI) { 8936 if (UI.getUse().getResNo() != InputVector.getResNo()) 8937 return SDValue(); 8938 8939 SDNode *Extract = *UI; 8940 if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 8941 return SDValue(); 8942 8943 if (Extract->getValueType(0) != MVT::i32) 8944 return SDValue(); 8945 if (!Extract->hasOneUse()) 8946 return SDValue(); 8947 if (Extract->use_begin()->getOpcode() != ISD::SIGN_EXTEND && 8948 Extract->use_begin()->getOpcode() != ISD::ZERO_EXTEND) 8949 return SDValue(); 8950 if (!isa<ConstantSDNode>(Extract->getOperand(1))) 8951 return SDValue(); 8952 8953 // Record which element was extracted. 8954 ExtractedElements |= 8955 1 << cast<ConstantSDNode>(Extract->getOperand(1))->getZExtValue(); 8956 8957 Uses.push_back(Extract); 8958 } 8959 8960 // If not all the elements were used, this may not be worthwhile. 8961 if (ExtractedElements != 15) 8962 return SDValue(); 8963 8964 // Ok, we've now decided to do the transformation. 8965 DebugLoc dl = InputVector.getDebugLoc(); 8966 8967 // Store the value to a temporary stack slot. 8968 SDValue StackPtr = DAG.CreateStackTemporary(InputVector.getValueType()); 8969 SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InputVector, StackPtr, NULL, 0, 8970 false, false, 0); 8971 8972 // Replace each use (extract) with a load of the appropriate element. 8973 for (SmallVectorImpl<SDNode *>::iterator UI = Uses.begin(), 8974 UE = Uses.end(); UI != UE; ++UI) { 8975 SDNode *Extract = *UI; 8976 8977 // Compute the element's address. 8978 SDValue Idx = Extract->getOperand(1); 8979 unsigned EltSize = 8980 InputVector.getValueType().getVectorElementType().getSizeInBits()/8; 8981 uint64_t Offset = EltSize * cast<ConstantSDNode>(Idx)->getZExtValue(); 8982 SDValue OffsetVal = DAG.getConstant(Offset, TLI.getPointerTy()); 8983 8984 SDValue ScalarAddr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), OffsetVal, StackPtr); 8985 8986 // Load the scalar. 8987 SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr, 8988 NULL, 0, false, false, 0); 8989 8990 // Replace the exact with the load. 8991 DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar); 8992 } 8993 8994 // The replacement was made in place; don't return anything. 8995 return SDValue(); 8996} 8997 8998/// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. 8999static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, 9000 const X86Subtarget *Subtarget) { 9001 DebugLoc DL = N->getDebugLoc(); 9002 SDValue Cond = N->getOperand(0); 9003 // Get the LHS/RHS of the select. 9004 SDValue LHS = N->getOperand(1); 9005 SDValue RHS = N->getOperand(2); 9006 9007 // If we have SSE[12] support, try to form min/max nodes. SSE min/max 9008 // instructions match the semantics of the common C idiom x<y?x:y but not 9009 // x<=y?x:y, because of how they handle negative zero (which can be 9010 // ignored in unsafe-math mode). 9011 if (Subtarget->hasSSE2() && 9012 (LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) && 9013 Cond.getOpcode() == ISD::SETCC) { 9014 ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 9015 9016 unsigned Opcode = 0; 9017 // Check for x CC y ? x : y. 9018 if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && 9019 DAG.isEqualTo(RHS, Cond.getOperand(1))) { 9020 switch (CC) { 9021 default: break; 9022 case ISD::SETULT: 9023 // Converting this to a min would handle NaNs incorrectly, and swapping 9024 // the operands would cause it to handle comparisons between positive 9025 // and negative zero incorrectly. 9026 if (!FiniteOnlyFPMath() && 9027 (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { 9028 if (!UnsafeFPMath && 9029 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 9030 break; 9031 std::swap(LHS, RHS); 9032 } 9033 Opcode = X86ISD::FMIN; 9034 break; 9035 case ISD::SETOLE: 9036 // Converting this to a min would handle comparisons between positive 9037 // and negative zero incorrectly. 9038 if (!UnsafeFPMath && 9039 !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) 9040 break; 9041 Opcode = X86ISD::FMIN; 9042 break; 9043 case ISD::SETULE: 9044 // Converting this to a min would handle both negative zeros and NaNs 9045 // incorrectly, but we can swap the operands to fix both. 9046 std::swap(LHS, RHS); 9047 case ISD::SETOLT: 9048 case ISD::SETLT: 9049 case ISD::SETLE: 9050 Opcode = X86ISD::FMIN; 9051 break; 9052 9053 case ISD::SETOGE: 9054 // Converting this to a max would handle comparisons between positive 9055 // and negative zero incorrectly. 9056 if (!UnsafeFPMath && 9057 !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS)) 9058 break; 9059 Opcode = X86ISD::FMAX; 9060 break; 9061 case ISD::SETUGT: 9062 // Converting this to a max would handle NaNs incorrectly, and swapping 9063 // the operands would cause it to handle comparisons between positive 9064 // and negative zero incorrectly. 9065 if (!FiniteOnlyFPMath() && 9066 (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) { 9067 if (!UnsafeFPMath && 9068 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 9069 break; 9070 std::swap(LHS, RHS); 9071 } 9072 Opcode = X86ISD::FMAX; 9073 break; 9074 case ISD::SETUGE: 9075 // Converting this to a max would handle both negative zeros and NaNs 9076 // incorrectly, but we can swap the operands to fix both. 9077 std::swap(LHS, RHS); 9078 case ISD::SETOGT: 9079 case ISD::SETGT: 9080 case ISD::SETGE: 9081 Opcode = X86ISD::FMAX; 9082 break; 9083 } 9084 // Check for x CC y ? y : x -- a min/max with reversed arms. 9085 } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && 9086 DAG.isEqualTo(RHS, Cond.getOperand(0))) { 9087 switch (CC) { 9088 default: break; 9089 case ISD::SETOGE: 9090 // Converting this to a min would handle comparisons between positive 9091 // and negative zero incorrectly, and swapping the operands would 9092 // cause it to handle NaNs incorrectly. 9093 if (!UnsafeFPMath && 9094 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) { 9095 if (!FiniteOnlyFPMath() && 9096 (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) 9097 break; 9098 std::swap(LHS, RHS); 9099 } 9100 Opcode = X86ISD::FMIN; 9101 break; 9102 case ISD::SETUGT: 9103 // Converting this to a min would handle NaNs incorrectly. 9104 if (!UnsafeFPMath && 9105 (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) 9106 break; 9107 Opcode = X86ISD::FMIN; 9108 break; 9109 case ISD::SETUGE: 9110 // Converting this to a min would handle both negative zeros and NaNs 9111 // incorrectly, but we can swap the operands to fix both. 9112 std::swap(LHS, RHS); 9113 case ISD::SETOGT: 9114 case ISD::SETGT: 9115 case ISD::SETGE: 9116 Opcode = X86ISD::FMIN; 9117 break; 9118 9119 case ISD::SETULT: 9120 // Converting this to a max would handle NaNs incorrectly. 9121 if (!FiniteOnlyFPMath() && 9122 (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) 9123 break; 9124 Opcode = X86ISD::FMAX; 9125 break; 9126 case ISD::SETOLE: 9127 // Converting this to a max would handle comparisons between positive 9128 // and negative zero incorrectly, and swapping the operands would 9129 // cause it to handle NaNs incorrectly. 9130 if (!UnsafeFPMath && 9131 !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) { 9132 if (!FiniteOnlyFPMath() && 9133 (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) 9134 break; 9135 std::swap(LHS, RHS); 9136 } 9137 Opcode = X86ISD::FMAX; 9138 break; 9139 case ISD::SETULE: 9140 // Converting this to a max would handle both negative zeros and NaNs 9141 // incorrectly, but we can swap the operands to fix both. 9142 std::swap(LHS, RHS); 9143 case ISD::SETOLT: 9144 case ISD::SETLT: 9145 case ISD::SETLE: 9146 Opcode = X86ISD::FMAX; 9147 break; 9148 } 9149 } 9150 9151 if (Opcode) 9152 return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); 9153 } 9154 9155 // If this is a select between two integer constants, try to do some 9156 // optimizations. 9157 if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) { 9158 if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS)) 9159 // Don't do this for crazy integer types. 9160 if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) { 9161 // If this is efficiently invertible, canonicalize the LHSC/RHSC values 9162 // so that TrueC (the true value) is larger than FalseC. 9163 bool NeedsCondInvert = false; 9164 9165 if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) && 9166 // Efficiently invertible. 9167 (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible. 9168 (Cond.getOpcode() == ISD::XOR && // xor(X, C) -> invertible. 9169 isa<ConstantSDNode>(Cond.getOperand(1))))) { 9170 NeedsCondInvert = true; 9171 std::swap(TrueC, FalseC); 9172 } 9173 9174 // Optimize C ? 8 : 0 -> zext(C) << 3. Likewise for any pow2/0. 9175 if (FalseC->getAPIntValue() == 0 && 9176 TrueC->getAPIntValue().isPowerOf2()) { 9177 if (NeedsCondInvert) // Invert the condition if needed. 9178 Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, 9179 DAG.getConstant(1, Cond.getValueType())); 9180 9181 // Zero extend the condition if needed. 9182 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond); 9183 9184 unsigned ShAmt = TrueC->getAPIntValue().logBase2(); 9185 return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond, 9186 DAG.getConstant(ShAmt, MVT::i8)); 9187 } 9188 9189 // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. 9190 if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { 9191 if (NeedsCondInvert) // Invert the condition if needed. 9192 Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, 9193 DAG.getConstant(1, Cond.getValueType())); 9194 9195 // Zero extend the condition if needed. 9196 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, 9197 FalseC->getValueType(0), Cond); 9198 return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, 9199 SDValue(FalseC, 0)); 9200 } 9201 9202 // Optimize cases that will turn into an LEA instruction. This requires 9203 // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). 9204 if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { 9205 uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); 9206 if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; 9207 9208 bool isFastMultiplier = false; 9209 if (Diff < 10) { 9210 switch ((unsigned char)Diff) { 9211 default: break; 9212 case 1: // result = add base, cond 9213 case 2: // result = lea base( , cond*2) 9214 case 3: // result = lea base(cond, cond*2) 9215 case 4: // result = lea base( , cond*4) 9216 case 5: // result = lea base(cond, cond*4) 9217 case 8: // result = lea base( , cond*8) 9218 case 9: // result = lea base(cond, cond*8) 9219 isFastMultiplier = true; 9220 break; 9221 } 9222 } 9223 9224 if (isFastMultiplier) { 9225 APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); 9226 if (NeedsCondInvert) // Invert the condition if needed. 9227 Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond, 9228 DAG.getConstant(1, Cond.getValueType())); 9229 9230 // Zero extend the condition if needed. 9231 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), 9232 Cond); 9233 // Scale the condition by the difference. 9234 if (Diff != 1) 9235 Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, 9236 DAG.getConstant(Diff, Cond.getValueType())); 9237 9238 // Add the base if non-zero. 9239 if (FalseC->getAPIntValue() != 0) 9240 Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, 9241 SDValue(FalseC, 0)); 9242 return Cond; 9243 } 9244 } 9245 } 9246 } 9247 9248 return SDValue(); 9249} 9250 9251/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL] 9252static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, 9253 TargetLowering::DAGCombinerInfo &DCI) { 9254 DebugLoc DL = N->getDebugLoc(); 9255 9256 // If the flag operand isn't dead, don't touch this CMOV. 9257 if (N->getNumValues() == 2 && !SDValue(N, 1).use_empty()) 9258 return SDValue(); 9259 9260 // If this is a select between two integer constants, try to do some 9261 // optimizations. Note that the operands are ordered the opposite of SELECT 9262 // operands. 9263 if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 9264 if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 9265 // Canonicalize the TrueC/FalseC values so that TrueC (the true value) is 9266 // larger than FalseC (the false value). 9267 X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); 9268 9269 if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { 9270 CC = X86::GetOppositeBranchCondition(CC); 9271 std::swap(TrueC, FalseC); 9272 } 9273 9274 // Optimize C ? 8 : 0 -> zext(setcc(C)) << 3. Likewise for any pow2/0. 9275 // This is efficient for any integer data type (including i8/i16) and 9276 // shift amount. 9277 if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { 9278 SDValue Cond = N->getOperand(3); 9279 Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, 9280 DAG.getConstant(CC, MVT::i8), Cond); 9281 9282 // Zero extend the condition if needed. 9283 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); 9284 9285 unsigned ShAmt = TrueC->getAPIntValue().logBase2(); 9286 Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, 9287 DAG.getConstant(ShAmt, MVT::i8)); 9288 if (N->getNumValues() == 2) // Dead flag value? 9289 return DCI.CombineTo(N, Cond, SDValue()); 9290 return Cond; 9291 } 9292 9293 // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst. This is efficient 9294 // for any integer data type, including i8/i16. 9295 if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { 9296 SDValue Cond = N->getOperand(3); 9297 Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, 9298 DAG.getConstant(CC, MVT::i8), Cond); 9299 9300 // Zero extend the condition if needed. 9301 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, 9302 FalseC->getValueType(0), Cond); 9303 Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, 9304 SDValue(FalseC, 0)); 9305 9306 if (N->getNumValues() == 2) // Dead flag value? 9307 return DCI.CombineTo(N, Cond, SDValue()); 9308 return Cond; 9309 } 9310 9311 // Optimize cases that will turn into an LEA instruction. This requires 9312 // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9). 9313 if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { 9314 uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue(); 9315 if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff; 9316 9317 bool isFastMultiplier = false; 9318 if (Diff < 10) { 9319 switch ((unsigned char)Diff) { 9320 default: break; 9321 case 1: // result = add base, cond 9322 case 2: // result = lea base( , cond*2) 9323 case 3: // result = lea base(cond, cond*2) 9324 case 4: // result = lea base( , cond*4) 9325 case 5: // result = lea base(cond, cond*4) 9326 case 8: // result = lea base( , cond*8) 9327 case 9: // result = lea base(cond, cond*8) 9328 isFastMultiplier = true; 9329 break; 9330 } 9331 } 9332 9333 if (isFastMultiplier) { 9334 APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue(); 9335 SDValue Cond = N->getOperand(3); 9336 Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8, 9337 DAG.getConstant(CC, MVT::i8), Cond); 9338 // Zero extend the condition if needed. 9339 Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), 9340 Cond); 9341 // Scale the condition by the difference. 9342 if (Diff != 1) 9343 Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, 9344 DAG.getConstant(Diff, Cond.getValueType())); 9345 9346 // Add the base if non-zero. 9347 if (FalseC->getAPIntValue() != 0) 9348 Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, 9349 SDValue(FalseC, 0)); 9350 if (N->getNumValues() == 2) // Dead flag value? 9351 return DCI.CombineTo(N, Cond, SDValue()); 9352 return Cond; 9353 } 9354 } 9355 } 9356 } 9357 return SDValue(); 9358} 9359 9360 9361/// PerformMulCombine - Optimize a single multiply with constant into two 9362/// in order to implement it with two cheaper instructions, e.g. 9363/// LEA + SHL, LEA + LEA. 9364static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG, 9365 TargetLowering::DAGCombinerInfo &DCI) { 9366 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 9367 return SDValue(); 9368 9369 EVT VT = N->getValueType(0); 9370 if (VT != MVT::i64) 9371 return SDValue(); 9372 9373 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 9374 if (!C) 9375 return SDValue(); 9376 uint64_t MulAmt = C->getZExtValue(); 9377 if (isPowerOf2_64(MulAmt) || MulAmt == 3 || MulAmt == 5 || MulAmt == 9) 9378 return SDValue(); 9379 9380 uint64_t MulAmt1 = 0; 9381 uint64_t MulAmt2 = 0; 9382 if ((MulAmt % 9) == 0) { 9383 MulAmt1 = 9; 9384 MulAmt2 = MulAmt / 9; 9385 } else if ((MulAmt % 5) == 0) { 9386 MulAmt1 = 5; 9387 MulAmt2 = MulAmt / 5; 9388 } else if ((MulAmt % 3) == 0) { 9389 MulAmt1 = 3; 9390 MulAmt2 = MulAmt / 3; 9391 } 9392 if (MulAmt2 && 9393 (isPowerOf2_64(MulAmt2) || MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)){ 9394 DebugLoc DL = N->getDebugLoc(); 9395 9396 if (isPowerOf2_64(MulAmt2) && 9397 !(N->hasOneUse() && N->use_begin()->getOpcode() == ISD::ADD)) 9398 // If second multiplifer is pow2, issue it first. We want the multiply by 9399 // 3, 5, or 9 to be folded into the addressing mode unless the lone use 9400 // is an add. 9401 std::swap(MulAmt1, MulAmt2); 9402 9403 SDValue NewMul; 9404 if (isPowerOf2_64(MulAmt1)) 9405 NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), 9406 DAG.getConstant(Log2_64(MulAmt1), MVT::i8)); 9407 else 9408 NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), 9409 DAG.getConstant(MulAmt1, VT)); 9410 9411 if (isPowerOf2_64(MulAmt2)) 9412 NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, 9413 DAG.getConstant(Log2_64(MulAmt2), MVT::i8)); 9414 else 9415 NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, 9416 DAG.getConstant(MulAmt2, VT)); 9417 9418 // Do not add new nodes to DAG combiner worklist. 9419 DCI.CombineTo(N, NewMul, false); 9420 } 9421 return SDValue(); 9422} 9423 9424static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { 9425 SDValue N0 = N->getOperand(0); 9426 SDValue N1 = N->getOperand(1); 9427 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 9428 EVT VT = N0.getValueType(); 9429 9430 // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) 9431 // since the result of setcc_c is all zero's or all ones. 9432 if (N1C && N0.getOpcode() == ISD::AND && 9433 N0.getOperand(1).getOpcode() == ISD::Constant) { 9434 SDValue N00 = N0.getOperand(0); 9435 if (N00.getOpcode() == X86ISD::SETCC_CARRY || 9436 ((N00.getOpcode() == ISD::ANY_EXTEND || 9437 N00.getOpcode() == ISD::ZERO_EXTEND) && 9438 N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) { 9439 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 9440 APInt ShAmt = N1C->getAPIntValue(); 9441 Mask = Mask.shl(ShAmt); 9442 if (Mask != 0) 9443 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 9444 N00, DAG.getConstant(Mask, VT)); 9445 } 9446 } 9447 9448 return SDValue(); 9449} 9450 9451/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts 9452/// when possible. 9453static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, 9454 const X86Subtarget *Subtarget) { 9455 EVT VT = N->getValueType(0); 9456 if (!VT.isVector() && VT.isInteger() && 9457 N->getOpcode() == ISD::SHL) 9458 return PerformSHLCombine(N, DAG); 9459 9460 // On X86 with SSE2 support, we can transform this to a vector shift if 9461 // all elements are shifted by the same amount. We can't do this in legalize 9462 // because the a constant vector is typically transformed to a constant pool 9463 // so we have no knowledge of the shift amount. 9464 if (!Subtarget->hasSSE2()) 9465 return SDValue(); 9466 9467 if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) 9468 return SDValue(); 9469 9470 SDValue ShAmtOp = N->getOperand(1); 9471 EVT EltVT = VT.getVectorElementType(); 9472 DebugLoc DL = N->getDebugLoc(); 9473 SDValue BaseShAmt = SDValue(); 9474 if (ShAmtOp.getOpcode() == ISD::BUILD_VECTOR) { 9475 unsigned NumElts = VT.getVectorNumElements(); 9476 unsigned i = 0; 9477 for (; i != NumElts; ++i) { 9478 SDValue Arg = ShAmtOp.getOperand(i); 9479 if (Arg.getOpcode() == ISD::UNDEF) continue; 9480 BaseShAmt = Arg; 9481 break; 9482 } 9483 for (; i != NumElts; ++i) { 9484 SDValue Arg = ShAmtOp.getOperand(i); 9485 if (Arg.getOpcode() == ISD::UNDEF) continue; 9486 if (Arg != BaseShAmt) { 9487 return SDValue(); 9488 } 9489 } 9490 } else if (ShAmtOp.getOpcode() == ISD::VECTOR_SHUFFLE && 9491 cast<ShuffleVectorSDNode>(ShAmtOp)->isSplat()) { 9492 SDValue InVec = ShAmtOp.getOperand(0); 9493 if (InVec.getOpcode() == ISD::BUILD_VECTOR) { 9494 unsigned NumElts = InVec.getValueType().getVectorNumElements(); 9495 unsigned i = 0; 9496 for (; i != NumElts; ++i) { 9497 SDValue Arg = InVec.getOperand(i); 9498 if (Arg.getOpcode() == ISD::UNDEF) continue; 9499 BaseShAmt = Arg; 9500 break; 9501 } 9502 } else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) { 9503 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) { 9504 unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex(); 9505 if (C->getZExtValue() == SplatIdx) 9506 BaseShAmt = InVec.getOperand(1); 9507 } 9508 } 9509 if (BaseShAmt.getNode() == 0) 9510 BaseShAmt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, ShAmtOp, 9511 DAG.getIntPtrConstant(0)); 9512 } else 9513 return SDValue(); 9514 9515 // The shift amount is an i32. 9516 if (EltVT.bitsGT(MVT::i32)) 9517 BaseShAmt = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, BaseShAmt); 9518 else if (EltVT.bitsLT(MVT::i32)) 9519 BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, BaseShAmt); 9520 9521 // The shift amount is identical so we can do a vector shift. 9522 SDValue ValOp = N->getOperand(0); 9523 switch (N->getOpcode()) { 9524 default: 9525 llvm_unreachable("Unknown shift opcode!"); 9526 break; 9527 case ISD::SHL: 9528 if (VT == MVT::v2i64) 9529 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 9530 DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), 9531 ValOp, BaseShAmt); 9532 if (VT == MVT::v4i32) 9533 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 9534 DAG.getConstant(Intrinsic::x86_sse2_pslli_d, MVT::i32), 9535 ValOp, BaseShAmt); 9536 if (VT == MVT::v8i16) 9537 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 9538 DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), 9539 ValOp, BaseShAmt); 9540 break; 9541 case ISD::SRA: 9542 if (VT == MVT::v4i32) 9543 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 9544 DAG.getConstant(Intrinsic::x86_sse2_psrai_d, MVT::i32), 9545 ValOp, BaseShAmt); 9546 if (VT == MVT::v8i16) 9547 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 9548 DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32), 9549 ValOp, BaseShAmt); 9550 break; 9551 case ISD::SRL: 9552 if (VT == MVT::v2i64) 9553 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 9554 DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), 9555 ValOp, BaseShAmt); 9556 if (VT == MVT::v4i32) 9557 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 9558 DAG.getConstant(Intrinsic::x86_sse2_psrli_d, MVT::i32), 9559 ValOp, BaseShAmt); 9560 if (VT == MVT::v8i16) 9561 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, 9562 DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32), 9563 ValOp, BaseShAmt); 9564 break; 9565 } 9566 return SDValue(); 9567} 9568 9569static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, 9570 const X86Subtarget *Subtarget) { 9571 EVT VT = N->getValueType(0); 9572 if (VT != MVT::i64 || !Subtarget->is64Bit()) 9573 return SDValue(); 9574 9575 // fold (or (x << c) | (y >> (64 - c))) ==> (shld64 x, y, c) 9576 SDValue N0 = N->getOperand(0); 9577 SDValue N1 = N->getOperand(1); 9578 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 9579 std::swap(N0, N1); 9580 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 9581 return SDValue(); 9582 9583 SDValue ShAmt0 = N0.getOperand(1); 9584 if (ShAmt0.getValueType() != MVT::i8) 9585 return SDValue(); 9586 SDValue ShAmt1 = N1.getOperand(1); 9587 if (ShAmt1.getValueType() != MVT::i8) 9588 return SDValue(); 9589 if (ShAmt0.getOpcode() == ISD::TRUNCATE) 9590 ShAmt0 = ShAmt0.getOperand(0); 9591 if (ShAmt1.getOpcode() == ISD::TRUNCATE) 9592 ShAmt1 = ShAmt1.getOperand(0); 9593 9594 DebugLoc DL = N->getDebugLoc(); 9595 unsigned Opc = X86ISD::SHLD; 9596 SDValue Op0 = N0.getOperand(0); 9597 SDValue Op1 = N1.getOperand(0); 9598 if (ShAmt0.getOpcode() == ISD::SUB) { 9599 Opc = X86ISD::SHRD; 9600 std::swap(Op0, Op1); 9601 std::swap(ShAmt0, ShAmt1); 9602 } 9603 9604 if (ShAmt1.getOpcode() == ISD::SUB) { 9605 SDValue Sum = ShAmt1.getOperand(0); 9606 if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) { 9607 if (SumC->getSExtValue() == 64 && 9608 ShAmt1.getOperand(1) == ShAmt0) 9609 return DAG.getNode(Opc, DL, VT, 9610 Op0, Op1, 9611 DAG.getNode(ISD::TRUNCATE, DL, 9612 MVT::i8, ShAmt0)); 9613 } 9614 } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) { 9615 ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0); 9616 if (ShAmt0C && 9617 ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == 64) 9618 return DAG.getNode(Opc, DL, VT, 9619 N0.getOperand(0), N1.getOperand(0), 9620 DAG.getNode(ISD::TRUNCATE, DL, 9621 MVT::i8, ShAmt0)); 9622 } 9623 9624 return SDValue(); 9625} 9626 9627/// PerformSTORECombine - Do target-specific dag combines on STORE nodes. 9628static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, 9629 const X86Subtarget *Subtarget) { 9630 // Turn load->store of MMX types into GPR load/stores. This avoids clobbering 9631 // the FP state in cases where an emms may be missing. 9632 // A preferable solution to the general problem is to figure out the right 9633 // places to insert EMMS. This qualifies as a quick hack. 9634 9635 // Similarly, turn load->store of i64 into double load/stores in 32-bit mode. 9636 StoreSDNode *St = cast<StoreSDNode>(N); 9637 EVT VT = St->getValue().getValueType(); 9638 if (VT.getSizeInBits() != 64) 9639 return SDValue(); 9640 9641 const Function *F = DAG.getMachineFunction().getFunction(); 9642 bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); 9643 bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps 9644 && Subtarget->hasSSE2(); 9645 if ((VT.isVector() || 9646 (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && 9647 isa<LoadSDNode>(St->getValue()) && 9648 !cast<LoadSDNode>(St->getValue())->isVolatile() && 9649 St->getChain().hasOneUse() && !St->isVolatile()) { 9650 SDNode* LdVal = St->getValue().getNode(); 9651 LoadSDNode *Ld = 0; 9652 int TokenFactorIndex = -1; 9653 SmallVector<SDValue, 8> Ops; 9654 SDNode* ChainVal = St->getChain().getNode(); 9655 // Must be a store of a load. We currently handle two cases: the load 9656 // is a direct child, and it's under an intervening TokenFactor. It is 9657 // possible to dig deeper under nested TokenFactors. 9658 if (ChainVal == LdVal) 9659 Ld = cast<LoadSDNode>(St->getChain()); 9660 else if (St->getValue().hasOneUse() && 9661 ChainVal->getOpcode() == ISD::TokenFactor) { 9662 for (unsigned i=0, e = ChainVal->getNumOperands(); i != e; ++i) { 9663 if (ChainVal->getOperand(i).getNode() == LdVal) { 9664 TokenFactorIndex = i; 9665 Ld = cast<LoadSDNode>(St->getValue()); 9666 } else 9667 Ops.push_back(ChainVal->getOperand(i)); 9668 } 9669 } 9670 9671 if (!Ld || !ISD::isNormalLoad(Ld)) 9672 return SDValue(); 9673 9674 // If this is not the MMX case, i.e. we are just turning i64 load/store 9675 // into f64 load/store, avoid the transformation if there are multiple 9676 // uses of the loaded value. 9677 if (!VT.isVector() && !Ld->hasNUsesOfValue(1, 0)) 9678 return SDValue(); 9679 9680 DebugLoc LdDL = Ld->getDebugLoc(); 9681 DebugLoc StDL = N->getDebugLoc(); 9682 // If we are a 64-bit capable x86, lower to a single movq load/store pair. 9683 // Otherwise, if it's legal to use f64 SSE instructions, use f64 load/store 9684 // pair instead. 9685 if (Subtarget->is64Bit() || F64IsLegal) { 9686 EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; 9687 SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), 9688 Ld->getBasePtr(), Ld->getSrcValue(), 9689 Ld->getSrcValueOffset(), Ld->isVolatile(), 9690 Ld->isNonTemporal(), Ld->getAlignment()); 9691 SDValue NewChain = NewLd.getValue(1); 9692 if (TokenFactorIndex != -1) { 9693 Ops.push_back(NewChain); 9694 NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0], 9695 Ops.size()); 9696 } 9697 return DAG.getStore(NewChain, StDL, NewLd, St->getBasePtr(), 9698 St->getSrcValue(), St->getSrcValueOffset(), 9699 St->isVolatile(), St->isNonTemporal(), 9700 St->getAlignment()); 9701 } 9702 9703 // Otherwise, lower to two pairs of 32-bit loads / stores. 9704 SDValue LoAddr = Ld->getBasePtr(); 9705 SDValue HiAddr = DAG.getNode(ISD::ADD, LdDL, MVT::i32, LoAddr, 9706 DAG.getConstant(4, MVT::i32)); 9707 9708 SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, 9709 Ld->getSrcValue(), Ld->getSrcValueOffset(), 9710 Ld->isVolatile(), Ld->isNonTemporal(), 9711 Ld->getAlignment()); 9712 SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, 9713 Ld->getSrcValue(), Ld->getSrcValueOffset()+4, 9714 Ld->isVolatile(), Ld->isNonTemporal(), 9715 MinAlign(Ld->getAlignment(), 4)); 9716 9717 SDValue NewChain = LoLd.getValue(1); 9718 if (TokenFactorIndex != -1) { 9719 Ops.push_back(LoLd); 9720 Ops.push_back(HiLd); 9721 NewChain = DAG.getNode(ISD::TokenFactor, LdDL, MVT::Other, &Ops[0], 9722 Ops.size()); 9723 } 9724 9725 LoAddr = St->getBasePtr(); 9726 HiAddr = DAG.getNode(ISD::ADD, StDL, MVT::i32, LoAddr, 9727 DAG.getConstant(4, MVT::i32)); 9728 9729 SDValue LoSt = DAG.getStore(NewChain, StDL, LoLd, LoAddr, 9730 St->getSrcValue(), St->getSrcValueOffset(), 9731 St->isVolatile(), St->isNonTemporal(), 9732 St->getAlignment()); 9733 SDValue HiSt = DAG.getStore(NewChain, StDL, HiLd, HiAddr, 9734 St->getSrcValue(), 9735 St->getSrcValueOffset() + 4, 9736 St->isVolatile(), 9737 St->isNonTemporal(), 9738 MinAlign(St->getAlignment(), 4)); 9739 return DAG.getNode(ISD::TokenFactor, StDL, MVT::Other, LoSt, HiSt); 9740 } 9741 return SDValue(); 9742} 9743 9744/// PerformFORCombine - Do target-specific dag combines on X86ISD::FOR and 9745/// X86ISD::FXOR nodes. 9746static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG) { 9747 assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); 9748 // F[X]OR(0.0, x) -> x 9749 // F[X]OR(x, 0.0) -> x 9750 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 9751 if (C->getValueAPF().isPosZero()) 9752 return N->getOperand(1); 9753 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 9754 if (C->getValueAPF().isPosZero()) 9755 return N->getOperand(0); 9756 return SDValue(); 9757} 9758 9759/// PerformFANDCombine - Do target-specific dag combines on X86ISD::FAND nodes. 9760static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { 9761 // FAND(0.0, x) -> 0.0 9762 // FAND(x, 0.0) -> 0.0 9763 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 9764 if (C->getValueAPF().isPosZero()) 9765 return N->getOperand(0); 9766 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1))) 9767 if (C->getValueAPF().isPosZero()) 9768 return N->getOperand(1); 9769 return SDValue(); 9770} 9771 9772static SDValue PerformBTCombine(SDNode *N, 9773 SelectionDAG &DAG, 9774 TargetLowering::DAGCombinerInfo &DCI) { 9775 // BT ignores high bits in the bit index operand. 9776 SDValue Op1 = N->getOperand(1); 9777 if (Op1.hasOneUse()) { 9778 unsigned BitWidth = Op1.getValueSizeInBits(); 9779 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); 9780 APInt KnownZero, KnownOne; 9781 TargetLowering::TargetLoweringOpt TLO(DAG); 9782 TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9783 if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || 9784 TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) 9785 DCI.CommitTargetLoweringOpt(TLO); 9786 } 9787 return SDValue(); 9788} 9789 9790static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { 9791 SDValue Op = N->getOperand(0); 9792 if (Op.getOpcode() == ISD::BIT_CONVERT) 9793 Op = Op.getOperand(0); 9794 EVT VT = N->getValueType(0), OpVT = Op.getValueType(); 9795 if (Op.getOpcode() == X86ISD::VZEXT_LOAD && 9796 VT.getVectorElementType().getSizeInBits() == 9797 OpVT.getVectorElementType().getSizeInBits()) { 9798 return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); 9799 } 9800 return SDValue(); 9801} 9802 9803// On X86 and X86-64, atomic operations are lowered to locked instructions. 9804// Locked instructions, in turn, have implicit fence semantics (all memory 9805// operations are flushed before issuing the locked instruction, and the 9806// are not buffered), so we can fold away the common pattern of 9807// fence-atomic-fence. 9808static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { 9809 SDValue atomic = N->getOperand(0); 9810 switch (atomic.getOpcode()) { 9811 case ISD::ATOMIC_CMP_SWAP: 9812 case ISD::ATOMIC_SWAP: 9813 case ISD::ATOMIC_LOAD_ADD: 9814 case ISD::ATOMIC_LOAD_SUB: 9815 case ISD::ATOMIC_LOAD_AND: 9816 case ISD::ATOMIC_LOAD_OR: 9817 case ISD::ATOMIC_LOAD_XOR: 9818 case ISD::ATOMIC_LOAD_NAND: 9819 case ISD::ATOMIC_LOAD_MIN: 9820 case ISD::ATOMIC_LOAD_MAX: 9821 case ISD::ATOMIC_LOAD_UMIN: 9822 case ISD::ATOMIC_LOAD_UMAX: 9823 break; 9824 default: 9825 return SDValue(); 9826 } 9827 9828 SDValue fence = atomic.getOperand(0); 9829 if (fence.getOpcode() != ISD::MEMBARRIER) 9830 return SDValue(); 9831 9832 switch (atomic.getOpcode()) { 9833 case ISD::ATOMIC_CMP_SWAP: 9834 return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), 9835 atomic.getOperand(1), atomic.getOperand(2), 9836 atomic.getOperand(3)); 9837 case ISD::ATOMIC_SWAP: 9838 case ISD::ATOMIC_LOAD_ADD: 9839 case ISD::ATOMIC_LOAD_SUB: 9840 case ISD::ATOMIC_LOAD_AND: 9841 case ISD::ATOMIC_LOAD_OR: 9842 case ISD::ATOMIC_LOAD_XOR: 9843 case ISD::ATOMIC_LOAD_NAND: 9844 case ISD::ATOMIC_LOAD_MIN: 9845 case ISD::ATOMIC_LOAD_MAX: 9846 case ISD::ATOMIC_LOAD_UMIN: 9847 case ISD::ATOMIC_LOAD_UMAX: 9848 return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), 9849 atomic.getOperand(1), atomic.getOperand(2)); 9850 default: 9851 return SDValue(); 9852 } 9853} 9854 9855static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG) { 9856 // (i32 zext (and (i8 x86isd::setcc_carry), 1)) -> 9857 // (and (i32 x86isd::setcc_carry), 1) 9858 // This eliminates the zext. This transformation is necessary because 9859 // ISD::SETCC is always legalized to i8. 9860 DebugLoc dl = N->getDebugLoc(); 9861 SDValue N0 = N->getOperand(0); 9862 EVT VT = N->getValueType(0); 9863 if (N0.getOpcode() == ISD::AND && 9864 N0.hasOneUse() && 9865 N0.getOperand(0).hasOneUse()) { 9866 SDValue N00 = N0.getOperand(0); 9867 if (N00.getOpcode() != X86ISD::SETCC_CARRY) 9868 return SDValue(); 9869 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 9870 if (!C || C->getZExtValue() != 1) 9871 return SDValue(); 9872 return DAG.getNode(ISD::AND, dl, VT, 9873 DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, 9874 N00.getOperand(0), N00.getOperand(1)), 9875 DAG.getConstant(1, VT)); 9876 } 9877 9878 return SDValue(); 9879} 9880 9881SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, 9882 DAGCombinerInfo &DCI) const { 9883 SelectionDAG &DAG = DCI.DAG; 9884 switch (N->getOpcode()) { 9885 default: break; 9886 case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); 9887 case ISD::EXTRACT_VECTOR_ELT: 9888 return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this); 9889 case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); 9890 case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); 9891 case ISD::MUL: return PerformMulCombine(N, DAG, DCI); 9892 case ISD::SHL: 9893 case ISD::SRA: 9894 case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); 9895 case ISD::OR: return PerformOrCombine(N, DAG, Subtarget); 9896 case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); 9897 case X86ISD::FXOR: 9898 case X86ISD::FOR: return PerformFORCombine(N, DAG); 9899 case X86ISD::FAND: return PerformFANDCombine(N, DAG); 9900 case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); 9901 case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); 9902 case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG); 9903 case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG); 9904 } 9905 9906 return SDValue(); 9907} 9908 9909//===----------------------------------------------------------------------===// 9910// X86 Inline Assembly Support 9911//===----------------------------------------------------------------------===// 9912 9913static bool LowerToBSwap(CallInst *CI) { 9914 // FIXME: this should verify that we are targetting a 486 or better. If not, 9915 // we will turn this bswap into something that will be lowered to logical ops 9916 // instead of emitting the bswap asm. For now, we don't support 486 or lower 9917 // so don't worry about this. 9918 9919 // Verify this is a simple bswap. 9920 if (CI->getNumOperands() != 2 || 9921 CI->getType() != CI->getOperand(1)->getType() || 9922 !CI->getType()->isIntegerTy()) 9923 return false; 9924 9925 const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 9926 if (!Ty || Ty->getBitWidth() % 16 != 0) 9927 return false; 9928 9929 // Okay, we can do this xform, do so now. 9930 const Type *Tys[] = { Ty }; 9931 Module *M = CI->getParent()->getParent()->getParent(); 9932 Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1); 9933 9934 Value *Op = CI->getOperand(1); 9935 Op = CallInst::Create(Int, Op, CI->getName(), CI); 9936 9937 CI->replaceAllUsesWith(Op); 9938 CI->eraseFromParent(); 9939 return true; 9940} 9941 9942bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { 9943 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 9944 std::vector<InlineAsm::ConstraintInfo> Constraints = IA->ParseConstraints(); 9945 9946 std::string AsmStr = IA->getAsmString(); 9947 9948 // TODO: should remove alternatives from the asmstring: "foo {a|b}" -> "foo a" 9949 SmallVector<StringRef, 4> AsmPieces; 9950 SplitString(AsmStr, AsmPieces, "\n"); // ; as separator? 9951 9952 switch (AsmPieces.size()) { 9953 default: return false; 9954 case 1: 9955 AsmStr = AsmPieces[0]; 9956 AsmPieces.clear(); 9957 SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace. 9958 9959 // bswap $0 9960 if (AsmPieces.size() == 2 && 9961 (AsmPieces[0] == "bswap" || 9962 AsmPieces[0] == "bswapq" || 9963 AsmPieces[0] == "bswapl") && 9964 (AsmPieces[1] == "$0" || 9965 AsmPieces[1] == "${0:q}")) { 9966 // No need to check constraints, nothing other than the equivalent of 9967 // "=r,0" would be valid here. 9968 return LowerToBSwap(CI); 9969 } 9970 // rorw $$8, ${0:w} --> llvm.bswap.i16 9971 if (CI->getType()->isIntegerTy(16) && 9972 AsmPieces.size() == 3 && 9973 (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") && 9974 AsmPieces[1] == "$$8," && 9975 AsmPieces[2] == "${0:w}" && 9976 IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { 9977 AsmPieces.clear(); 9978 const std::string &Constraints = IA->getConstraintString(); 9979 SplitString(StringRef(Constraints).substr(5), AsmPieces, ","); 9980 std::sort(AsmPieces.begin(), AsmPieces.end()); 9981 if (AsmPieces.size() == 4 && 9982 AsmPieces[0] == "~{cc}" && 9983 AsmPieces[1] == "~{dirflag}" && 9984 AsmPieces[2] == "~{flags}" && 9985 AsmPieces[3] == "~{fpsr}") { 9986 return LowerToBSwap(CI); 9987 } 9988 } 9989 break; 9990 case 3: 9991 if (CI->getType()->isIntegerTy(64) && 9992 Constraints.size() >= 2 && 9993 Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && 9994 Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { 9995 // bswap %eax / bswap %edx / xchgl %eax, %edx -> llvm.bswap.i64 9996 SmallVector<StringRef, 4> Words; 9997 SplitString(AsmPieces[0], Words, " \t"); 9998 if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%eax") { 9999 Words.clear(); 10000 SplitString(AsmPieces[1], Words, " \t"); 10001 if (Words.size() == 2 && Words[0] == "bswap" && Words[1] == "%edx") { 10002 Words.clear(); 10003 SplitString(AsmPieces[2], Words, " \t,"); 10004 if (Words.size() == 3 && Words[0] == "xchgl" && Words[1] == "%eax" && 10005 Words[2] == "%edx") { 10006 return LowerToBSwap(CI); 10007 } 10008 } 10009 } 10010 } 10011 break; 10012 } 10013 return false; 10014} 10015 10016 10017 10018/// getConstraintType - Given a constraint letter, return the type of 10019/// constraint it is for this target. 10020X86TargetLowering::ConstraintType 10021X86TargetLowering::getConstraintType(const std::string &Constraint) const { 10022 if (Constraint.size() == 1) { 10023 switch (Constraint[0]) { 10024 case 'A': 10025 return C_Register; 10026 case 'f': 10027 case 'r': 10028 case 'R': 10029 case 'l': 10030 case 'q': 10031 case 'Q': 10032 case 'x': 10033 case 'y': 10034 case 'Y': 10035 return C_RegisterClass; 10036 case 'e': 10037 case 'Z': 10038 return C_Other; 10039 default: 10040 break; 10041 } 10042 } 10043 return TargetLowering::getConstraintType(Constraint); 10044} 10045 10046/// LowerXConstraint - try to replace an X constraint, which matches anything, 10047/// with another that has more specific requirements based on the type of the 10048/// corresponding operand. 10049const char *X86TargetLowering:: 10050LowerXConstraint(EVT ConstraintVT) const { 10051 // FP X constraints get lowered to SSE1/2 registers if available, otherwise 10052 // 'f' like normal targets. 10053 if (ConstraintVT.isFloatingPoint()) { 10054 if (Subtarget->hasSSE2()) 10055 return "Y"; 10056 if (Subtarget->hasSSE1()) 10057 return "x"; 10058 } 10059 10060 return TargetLowering::LowerXConstraint(ConstraintVT); 10061} 10062 10063/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 10064/// vector. If it is invalid, don't add anything to Ops. 10065void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, 10066 char Constraint, 10067 bool hasMemory, 10068 std::vector<SDValue>&Ops, 10069 SelectionDAG &DAG) const { 10070 SDValue Result(0, 0); 10071 10072 switch (Constraint) { 10073 default: break; 10074 case 'I': 10075 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 10076 if (C->getZExtValue() <= 31) { 10077 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 10078 break; 10079 } 10080 } 10081 return; 10082 case 'J': 10083 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 10084 if (C->getZExtValue() <= 63) { 10085 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 10086 break; 10087 } 10088 } 10089 return; 10090 case 'K': 10091 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 10092 if ((int8_t)C->getSExtValue() == C->getSExtValue()) { 10093 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 10094 break; 10095 } 10096 } 10097 return; 10098 case 'N': 10099 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 10100 if (C->getZExtValue() <= 255) { 10101 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 10102 break; 10103 } 10104 } 10105 return; 10106 case 'e': { 10107 // 32-bit signed value 10108 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 10109 const ConstantInt *CI = C->getConstantIntValue(); 10110 if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), 10111 C->getSExtValue())) { 10112 // Widen to 64 bits here to get it sign extended. 10113 Result = DAG.getTargetConstant(C->getSExtValue(), MVT::i64); 10114 break; 10115 } 10116 // FIXME gcc accepts some relocatable values here too, but only in certain 10117 // memory models; it's complicated. 10118 } 10119 return; 10120 } 10121 case 'Z': { 10122 // 32-bit unsigned value 10123 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 10124 const ConstantInt *CI = C->getConstantIntValue(); 10125 if (CI->isValueValidForType(Type::getInt32Ty(*DAG.getContext()), 10126 C->getZExtValue())) { 10127 Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType()); 10128 break; 10129 } 10130 } 10131 // FIXME gcc accepts some relocatable values here too, but only in certain 10132 // memory models; it's complicated. 10133 return; 10134 } 10135 case 'i': { 10136 // Literal immediates are always ok. 10137 if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { 10138 // Widen to 64 bits here to get it sign extended. 10139 Result = DAG.getTargetConstant(CST->getSExtValue(), MVT::i64); 10140 break; 10141 } 10142 10143 // If we are in non-pic codegen mode, we allow the address of a global (with 10144 // an optional displacement) to be used with 'i'. 10145 GlobalAddressSDNode *GA = 0; 10146 int64_t Offset = 0; 10147 10148 // Match either (GA), (GA+C), (GA+C1+C2), etc. 10149 while (1) { 10150 if ((GA = dyn_cast<GlobalAddressSDNode>(Op))) { 10151 Offset += GA->getOffset(); 10152 break; 10153 } else if (Op.getOpcode() == ISD::ADD) { 10154 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 10155 Offset += C->getZExtValue(); 10156 Op = Op.getOperand(0); 10157 continue; 10158 } 10159 } else if (Op.getOpcode() == ISD::SUB) { 10160 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 10161 Offset += -C->getZExtValue(); 10162 Op = Op.getOperand(0); 10163 continue; 10164 } 10165 } 10166 10167 // Otherwise, this isn't something we can handle, reject it. 10168 return; 10169 } 10170 10171 const GlobalValue *GV = GA->getGlobal(); 10172 // If we require an extra load to get this address, as in PIC mode, we 10173 // can't accept it. 10174 if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV, 10175 getTargetMachine()))) 10176 return; 10177 10178 if (hasMemory) 10179 Op = LowerGlobalAddress(GV, Op.getDebugLoc(), Offset, DAG); 10180 else 10181 Op = DAG.getTargetGlobalAddress(GV, GA->getValueType(0), Offset); 10182 Result = Op; 10183 break; 10184 } 10185 } 10186 10187 if (Result.getNode()) { 10188 Ops.push_back(Result); 10189 return; 10190 } 10191 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, hasMemory, 10192 Ops, DAG); 10193} 10194 10195std::vector<unsigned> X86TargetLowering:: 10196getRegClassForInlineAsmConstraint(const std::string &Constraint, 10197 EVT VT) const { 10198 if (Constraint.size() == 1) { 10199 // FIXME: not handling fp-stack yet! 10200 switch (Constraint[0]) { // GCC X86 Constraint Letters 10201 default: break; // Unknown constraint letter 10202 case 'q': // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode. 10203 if (Subtarget->is64Bit()) { 10204 if (VT == MVT::i32) 10205 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 10206 X86::ESI, X86::EDI, X86::R8D, X86::R9D, 10207 X86::R10D,X86::R11D,X86::R12D, 10208 X86::R13D,X86::R14D,X86::R15D, 10209 X86::EBP, X86::ESP, 0); 10210 else if (VT == MVT::i16) 10211 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 10212 X86::SI, X86::DI, X86::R8W,X86::R9W, 10213 X86::R10W,X86::R11W,X86::R12W, 10214 X86::R13W,X86::R14W,X86::R15W, 10215 X86::BP, X86::SP, 0); 10216 else if (VT == MVT::i8) 10217 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 10218 X86::SIL, X86::DIL, X86::R8B,X86::R9B, 10219 X86::R10B,X86::R11B,X86::R12B, 10220 X86::R13B,X86::R14B,X86::R15B, 10221 X86::BPL, X86::SPL, 0); 10222 10223 else if (VT == MVT::i64) 10224 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 10225 X86::RSI, X86::RDI, X86::R8, X86::R9, 10226 X86::R10, X86::R11, X86::R12, 10227 X86::R13, X86::R14, X86::R15, 10228 X86::RBP, X86::RSP, 0); 10229 10230 break; 10231 } 10232 // 32-bit fallthrough 10233 case 'Q': // Q_REGS 10234 if (VT == MVT::i32) 10235 return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0); 10236 else if (VT == MVT::i16) 10237 return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0); 10238 else if (VT == MVT::i8) 10239 return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0); 10240 else if (VT == MVT::i64) 10241 return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0); 10242 break; 10243 } 10244 } 10245 10246 return std::vector<unsigned>(); 10247} 10248 10249std::pair<unsigned, const TargetRegisterClass*> 10250X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 10251 EVT VT) const { 10252 // First, see if this is a constraint that directly corresponds to an LLVM 10253 // register class. 10254 if (Constraint.size() == 1) { 10255 // GCC Constraint Letters 10256 switch (Constraint[0]) { 10257 default: break; 10258 case 'r': // GENERAL_REGS 10259 case 'l': // INDEX_REGS 10260 if (VT == MVT::i8) 10261 return std::make_pair(0U, X86::GR8RegisterClass); 10262 if (VT == MVT::i16) 10263 return std::make_pair(0U, X86::GR16RegisterClass); 10264 if (VT == MVT::i32 || !Subtarget->is64Bit()) 10265 return std::make_pair(0U, X86::GR32RegisterClass); 10266 return std::make_pair(0U, X86::GR64RegisterClass); 10267 case 'R': // LEGACY_REGS 10268 if (VT == MVT::i8) 10269 return std::make_pair(0U, X86::GR8_NOREXRegisterClass); 10270 if (VT == MVT::i16) 10271 return std::make_pair(0U, X86::GR16_NOREXRegisterClass); 10272 if (VT == MVT::i32 || !Subtarget->is64Bit()) 10273 return std::make_pair(0U, X86::GR32_NOREXRegisterClass); 10274 return std::make_pair(0U, X86::GR64_NOREXRegisterClass); 10275 case 'f': // FP Stack registers. 10276 // If SSE is enabled for this VT, use f80 to ensure the isel moves the 10277 // value to the correct fpstack register class. 10278 if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) 10279 return std::make_pair(0U, X86::RFP32RegisterClass); 10280 if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) 10281 return std::make_pair(0U, X86::RFP64RegisterClass); 10282 return std::make_pair(0U, X86::RFP80RegisterClass); 10283 case 'y': // MMX_REGS if MMX allowed. 10284 if (!Subtarget->hasMMX()) break; 10285 return std::make_pair(0U, X86::VR64RegisterClass); 10286 case 'Y': // SSE_REGS if SSE2 allowed 10287 if (!Subtarget->hasSSE2()) break; 10288 // FALL THROUGH. 10289 case 'x': // SSE_REGS if SSE1 allowed 10290 if (!Subtarget->hasSSE1()) break; 10291 10292 switch (VT.getSimpleVT().SimpleTy) { 10293 default: break; 10294 // Scalar SSE types. 10295 case MVT::f32: 10296 case MVT::i32: 10297 return std::make_pair(0U, X86::FR32RegisterClass); 10298 case MVT::f64: 10299 case MVT::i64: 10300 return std::make_pair(0U, X86::FR64RegisterClass); 10301 // Vector types. 10302 case MVT::v16i8: 10303 case MVT::v8i16: 10304 case MVT::v4i32: 10305 case MVT::v2i64: 10306 case MVT::v4f32: 10307 case MVT::v2f64: 10308 return std::make_pair(0U, X86::VR128RegisterClass); 10309 } 10310 break; 10311 } 10312 } 10313 10314 // Use the default implementation in TargetLowering to convert the register 10315 // constraint into a member of a register class. 10316 std::pair<unsigned, const TargetRegisterClass*> Res; 10317 Res = TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 10318 10319 // Not found as a standard register? 10320 if (Res.second == 0) { 10321 // Map st(0) -> st(7) -> ST0 10322 if (Constraint.size() == 7 && Constraint[0] == '{' && 10323 tolower(Constraint[1]) == 's' && 10324 tolower(Constraint[2]) == 't' && 10325 Constraint[3] == '(' && 10326 (Constraint[4] >= '0' && Constraint[4] <= '7') && 10327 Constraint[5] == ')' && 10328 Constraint[6] == '}') { 10329 10330 Res.first = X86::ST0+Constraint[4]-'0'; 10331 Res.second = X86::RFP80RegisterClass; 10332 return Res; 10333 } 10334 10335 // GCC allows "st(0)" to be called just plain "st". 10336 if (StringRef("{st}").equals_lower(Constraint)) { 10337 Res.first = X86::ST0; 10338 Res.second = X86::RFP80RegisterClass; 10339 return Res; 10340 } 10341 10342 // flags -> EFLAGS 10343 if (StringRef("{flags}").equals_lower(Constraint)) { 10344 Res.first = X86::EFLAGS; 10345 Res.second = X86::CCRRegisterClass; 10346 return Res; 10347 } 10348 10349 // 'A' means EAX + EDX. 10350 if (Constraint == "A") { 10351 Res.first = X86::EAX; 10352 Res.second = X86::GR32_ADRegisterClass; 10353 return Res; 10354 } 10355 return Res; 10356 } 10357 10358 // Otherwise, check to see if this is a register class of the wrong value 10359 // type. For example, we want to map "{ax},i32" -> {eax}, we don't want it to 10360 // turn into {ax},{dx}. 10361 if (Res.second->hasType(VT)) 10362 return Res; // Correct type already, nothing to do. 10363 10364 // All of the single-register GCC register classes map their values onto 10365 // 16-bit register pieces "ax","dx","cx","bx","si","di","bp","sp". If we 10366 // really want an 8-bit or 32-bit register, map to the appropriate register 10367 // class and return the appropriate register. 10368 if (Res.second == X86::GR16RegisterClass) { 10369 if (VT == MVT::i8) { 10370 unsigned DestReg = 0; 10371 switch (Res.first) { 10372 default: break; 10373 case X86::AX: DestReg = X86::AL; break; 10374 case X86::DX: DestReg = X86::DL; break; 10375 case X86::CX: DestReg = X86::CL; break; 10376 case X86::BX: DestReg = X86::BL; break; 10377 } 10378 if (DestReg) { 10379 Res.first = DestReg; 10380 Res.second = X86::GR8RegisterClass; 10381 } 10382 } else if (VT == MVT::i32) { 10383 unsigned DestReg = 0; 10384 switch (Res.first) { 10385 default: break; 10386 case X86::AX: DestReg = X86::EAX; break; 10387 case X86::DX: DestReg = X86::EDX; break; 10388 case X86::CX: DestReg = X86::ECX; break; 10389 case X86::BX: DestReg = X86::EBX; break; 10390 case X86::SI: DestReg = X86::ESI; break; 10391 case X86::DI: DestReg = X86::EDI; break; 10392 case X86::BP: DestReg = X86::EBP; break; 10393 case X86::SP: DestReg = X86::ESP; break; 10394 } 10395 if (DestReg) { 10396 Res.first = DestReg; 10397 Res.second = X86::GR32RegisterClass; 10398 } 10399 } else if (VT == MVT::i64) { 10400 unsigned DestReg = 0; 10401 switch (Res.first) { 10402 default: break; 10403 case X86::AX: DestReg = X86::RAX; break; 10404 case X86::DX: DestReg = X86::RDX; break; 10405 case X86::CX: DestReg = X86::RCX; break; 10406 case X86::BX: DestReg = X86::RBX; break; 10407 case X86::SI: DestReg = X86::RSI; break; 10408 case X86::DI: DestReg = X86::RDI; break; 10409 case X86::BP: DestReg = X86::RBP; break; 10410 case X86::SP: DestReg = X86::RSP; break; 10411 } 10412 if (DestReg) { 10413 Res.first = DestReg; 10414 Res.second = X86::GR64RegisterClass; 10415 } 10416 } 10417 } else if (Res.second == X86::FR32RegisterClass || 10418 Res.second == X86::FR64RegisterClass || 10419 Res.second == X86::VR128RegisterClass) { 10420 // Handle references to XMM physical registers that got mapped into the 10421 // wrong class. This can happen with constraints like {xmm0} where the 10422 // target independent register mapper will just pick the first match it can 10423 // find, ignoring the required type. 10424 if (VT == MVT::f32) 10425 Res.second = X86::FR32RegisterClass; 10426 else if (VT == MVT::f64) 10427 Res.second = X86::FR64RegisterClass; 10428 else if (X86::VR128RegisterClass->hasType(VT)) 10429 Res.second = X86::VR128RegisterClass; 10430 } 10431 10432 return Res; 10433} 10434