SPUISelLowering.cpp revision d258c49589f3befd161a5ab27fd635b1dbdafc10
1// 2//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SPUTargetLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "SPURegisterNames.h" 15#include "SPUISelLowering.h" 16#include "SPUTargetMachine.h" 17#include "SPUFrameInfo.h" 18#include "SPUMachineFunction.h" 19#include "llvm/Constants.h" 20#include "llvm/Function.h" 21#include "llvm/Intrinsics.h" 22#include "llvm/CallingConv.h" 23#include "llvm/CodeGen/CallingConvLower.h" 24#include "llvm/CodeGen/MachineFrameInfo.h" 25#include "llvm/CodeGen/MachineFunction.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 30#include "llvm/Target/TargetOptions.h" 31#include "llvm/ADT/VectorExtras.h" 32#include "llvm/Support/Debug.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Support/raw_ostream.h" 36#include <map> 37 38using namespace llvm; 39 40// Used in getTargetNodeName() below 41namespace { 42 std::map<unsigned, const char *> node_names; 43 44 //! EVT mapping to useful data for Cell SPU 45 struct valtype_map_s { 46 EVT valtype; 47 int prefslot_byte; 48 }; 49 50 const valtype_map_s valtype_map[] = { 51 { MVT::i1, 3 }, 52 { MVT::i8, 3 }, 53 { MVT::i16, 2 }, 54 { MVT::i32, 0 }, 55 { MVT::f32, 0 }, 56 { MVT::i64, 0 }, 57 { MVT::f64, 0 }, 58 { MVT::i128, 0 } 59 }; 60 61 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); 62 63 const valtype_map_s *getValueTypeMapEntry(EVT VT) { 64 const valtype_map_s *retval = 0; 65 66 for (size_t i = 0; i < n_valtype_map; ++i) { 67 if (valtype_map[i].valtype == VT) { 68 retval = valtype_map + i; 69 break; 70 } 71 } 72 73#ifndef NDEBUG 74 if (retval == 0) { 75 report_fatal_error("getValueTypeMapEntry returns NULL for " + 76 Twine(VT.getEVTString())); 77 } 78#endif 79 80 return retval; 81 } 82 83 //! Expand a library call into an actual call DAG node 84 /*! 85 \note 86 This code is taken from SelectionDAGLegalize, since it is not exposed as 87 part of the LLVM SelectionDAG API. 88 */ 89 90 SDValue 91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, 92 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { 93 // The input chain to this libcall is the entry node of the function. 94 // Legalizing the call will automatically add the previous call to the 95 // dependence. 96 SDValue InChain = DAG.getEntryNode(); 97 98 TargetLowering::ArgListTy Args; 99 TargetLowering::ArgListEntry Entry; 100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { 101 EVT ArgVT = Op.getOperand(i).getValueType(); 102 const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 103 Entry.Node = Op.getOperand(i); 104 Entry.Ty = ArgTy; 105 Entry.isSExt = isSigned; 106 Entry.isZExt = !isSigned; 107 Args.push_back(Entry); 108 } 109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), 110 TLI.getPointerTy()); 111 112 // Splice the libcall in wherever FindInputOutputChains tells us to. 113 const Type *RetTy = 114 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); 115 std::pair<SDValue, SDValue> CallInfo = 116 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 117 0, TLI.getLibcallCallingConv(LC), false, 118 /*isReturnValueUsed=*/true, 119 Callee, Args, DAG, Op.getDebugLoc()); 120 121 return CallInfo.first; 122 } 123} 124 125SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) 126 : TargetLowering(TM, new TargetLoweringObjectFileELF()), 127 SPUTM(TM) { 128 // Fold away setcc operations if possible. 129 setPow2DivIsCheap(); 130 131 // Use _setjmp/_longjmp instead of setjmp/longjmp. 132 setUseUnderscoreSetJmp(true); 133 setUseUnderscoreLongJmp(true); 134 135 // Set RTLIB libcall names as used by SPU: 136 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); 137 138 // Set up the SPU's register classes: 139 addRegisterClass(MVT::i8, SPU::R8CRegisterClass); 140 addRegisterClass(MVT::i16, SPU::R16CRegisterClass); 141 addRegisterClass(MVT::i32, SPU::R32CRegisterClass); 142 addRegisterClass(MVT::i64, SPU::R64CRegisterClass); 143 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); 144 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); 145 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); 146 147 // SPU has no sign or zero extended loads for i1, i8, i16: 148 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); 149 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 150 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 151 152 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 153 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); 154 155 setTruncStoreAction(MVT::i128, MVT::i64, Expand); 156 setTruncStoreAction(MVT::i128, MVT::i32, Expand); 157 setTruncStoreAction(MVT::i128, MVT::i16, Expand); 158 setTruncStoreAction(MVT::i128, MVT::i8, Expand); 159 160 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 161 162 // SPU constant load actions are custom lowered: 163 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 164 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 165 166 // SPU's loads and stores have to be custom lowered: 167 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; 168 ++sctype) { 169 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; 170 171 setOperationAction(ISD::LOAD, VT, Custom); 172 setOperationAction(ISD::STORE, VT, Custom); 173 setLoadExtAction(ISD::EXTLOAD, VT, Custom); 174 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); 175 setLoadExtAction(ISD::SEXTLOAD, VT, Custom); 176 177 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { 178 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; 179 setTruncStoreAction(VT, StoreVT, Expand); 180 } 181 } 182 183 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; 184 ++sctype) { 185 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; 186 187 setOperationAction(ISD::LOAD, VT, Custom); 188 setOperationAction(ISD::STORE, VT, Custom); 189 190 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { 191 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; 192 setTruncStoreAction(VT, StoreVT, Expand); 193 } 194 } 195 196 // Expand the jumptable branches 197 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 198 setOperationAction(ISD::BR_CC, MVT::Other, Expand); 199 200 // Custom lower SELECT_CC for most cases, but expand by default 201 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 202 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); 203 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); 204 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 205 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); 206 207 // SPU has no intrinsics for these particular operations: 208 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 209 210 // SPU has no division/remainder instructions 211 setOperationAction(ISD::SREM, MVT::i8, Expand); 212 setOperationAction(ISD::UREM, MVT::i8, Expand); 213 setOperationAction(ISD::SDIV, MVT::i8, Expand); 214 setOperationAction(ISD::UDIV, MVT::i8, Expand); 215 setOperationAction(ISD::SDIVREM, MVT::i8, Expand); 216 setOperationAction(ISD::UDIVREM, MVT::i8, Expand); 217 setOperationAction(ISD::SREM, MVT::i16, Expand); 218 setOperationAction(ISD::UREM, MVT::i16, Expand); 219 setOperationAction(ISD::SDIV, MVT::i16, Expand); 220 setOperationAction(ISD::UDIV, MVT::i16, Expand); 221 setOperationAction(ISD::SDIVREM, MVT::i16, Expand); 222 setOperationAction(ISD::UDIVREM, MVT::i16, Expand); 223 setOperationAction(ISD::SREM, MVT::i32, Expand); 224 setOperationAction(ISD::UREM, MVT::i32, Expand); 225 setOperationAction(ISD::SDIV, MVT::i32, Expand); 226 setOperationAction(ISD::UDIV, MVT::i32, Expand); 227 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 228 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 229 setOperationAction(ISD::SREM, MVT::i64, Expand); 230 setOperationAction(ISD::UREM, MVT::i64, Expand); 231 setOperationAction(ISD::SDIV, MVT::i64, Expand); 232 setOperationAction(ISD::UDIV, MVT::i64, Expand); 233 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 234 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 235 setOperationAction(ISD::SREM, MVT::i128, Expand); 236 setOperationAction(ISD::UREM, MVT::i128, Expand); 237 setOperationAction(ISD::SDIV, MVT::i128, Expand); 238 setOperationAction(ISD::UDIV, MVT::i128, Expand); 239 setOperationAction(ISD::SDIVREM, MVT::i128, Expand); 240 setOperationAction(ISD::UDIVREM, MVT::i128, Expand); 241 242 // We don't support sin/cos/sqrt/fmod 243 setOperationAction(ISD::FSIN , MVT::f64, Expand); 244 setOperationAction(ISD::FCOS , MVT::f64, Expand); 245 setOperationAction(ISD::FREM , MVT::f64, Expand); 246 setOperationAction(ISD::FSIN , MVT::f32, Expand); 247 setOperationAction(ISD::FCOS , MVT::f32, Expand); 248 setOperationAction(ISD::FREM , MVT::f32, Expand); 249 250 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt 251 // for f32!) 252 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 253 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 254 255 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 256 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 257 258 // SPU can do rotate right and left, so legalize it... but customize for i8 259 // because instructions don't exist. 260 261 // FIXME: Change from "expand" to appropriate type once ROTR is supported in 262 // .td files. 263 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); 264 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); 265 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); 266 267 setOperationAction(ISD::ROTL, MVT::i32, Legal); 268 setOperationAction(ISD::ROTL, MVT::i16, Legal); 269 setOperationAction(ISD::ROTL, MVT::i8, Custom); 270 271 // SPU has no native version of shift left/right for i8 272 setOperationAction(ISD::SHL, MVT::i8, Custom); 273 setOperationAction(ISD::SRL, MVT::i8, Custom); 274 setOperationAction(ISD::SRA, MVT::i8, Custom); 275 276 // Make these operations legal and handle them during instruction selection: 277 setOperationAction(ISD::SHL, MVT::i64, Legal); 278 setOperationAction(ISD::SRL, MVT::i64, Legal); 279 setOperationAction(ISD::SRA, MVT::i64, Legal); 280 281 // Custom lower i8, i32 and i64 multiplications 282 setOperationAction(ISD::MUL, MVT::i8, Custom); 283 setOperationAction(ISD::MUL, MVT::i32, Legal); 284 setOperationAction(ISD::MUL, MVT::i64, Legal); 285 286 // Expand double-width multiplication 287 // FIXME: It would probably be reasonable to support some of these operations 288 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); 289 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); 290 setOperationAction(ISD::MULHU, MVT::i8, Expand); 291 setOperationAction(ISD::MULHS, MVT::i8, Expand); 292 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); 293 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); 294 setOperationAction(ISD::MULHU, MVT::i16, Expand); 295 setOperationAction(ISD::MULHS, MVT::i16, Expand); 296 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 297 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 298 setOperationAction(ISD::MULHU, MVT::i32, Expand); 299 setOperationAction(ISD::MULHS, MVT::i32, Expand); 300 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 301 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 302 setOperationAction(ISD::MULHU, MVT::i64, Expand); 303 setOperationAction(ISD::MULHS, MVT::i64, Expand); 304 305 // Need to custom handle (some) common i8, i64 math ops 306 setOperationAction(ISD::ADD, MVT::i8, Custom); 307 setOperationAction(ISD::ADD, MVT::i64, Legal); 308 setOperationAction(ISD::SUB, MVT::i8, Custom); 309 setOperationAction(ISD::SUB, MVT::i64, Legal); 310 311 // SPU does not have BSWAP. It does have i32 support CTLZ. 312 // CTPOP has to be custom lowered. 313 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 314 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 315 316 setOperationAction(ISD::CTPOP, MVT::i8, Custom); 317 setOperationAction(ISD::CTPOP, MVT::i16, Custom); 318 setOperationAction(ISD::CTPOP, MVT::i32, Custom); 319 setOperationAction(ISD::CTPOP, MVT::i64, Custom); 320 setOperationAction(ISD::CTPOP, MVT::i128, Expand); 321 322 setOperationAction(ISD::CTTZ , MVT::i8, Expand); 323 setOperationAction(ISD::CTTZ , MVT::i16, Expand); 324 setOperationAction(ISD::CTTZ , MVT::i32, Expand); 325 setOperationAction(ISD::CTTZ , MVT::i64, Expand); 326 setOperationAction(ISD::CTTZ , MVT::i128, Expand); 327 328 setOperationAction(ISD::CTLZ , MVT::i8, Promote); 329 setOperationAction(ISD::CTLZ , MVT::i16, Promote); 330 setOperationAction(ISD::CTLZ , MVT::i32, Legal); 331 setOperationAction(ISD::CTLZ , MVT::i64, Expand); 332 setOperationAction(ISD::CTLZ , MVT::i128, Expand); 333 334 // SPU has a version of select that implements (a&~c)|(b&c), just like 335 // select ought to work: 336 setOperationAction(ISD::SELECT, MVT::i8, Legal); 337 setOperationAction(ISD::SELECT, MVT::i16, Legal); 338 setOperationAction(ISD::SELECT, MVT::i32, Legal); 339 setOperationAction(ISD::SELECT, MVT::i64, Legal); 340 341 setOperationAction(ISD::SETCC, MVT::i8, Legal); 342 setOperationAction(ISD::SETCC, MVT::i16, Legal); 343 setOperationAction(ISD::SETCC, MVT::i32, Legal); 344 setOperationAction(ISD::SETCC, MVT::i64, Legal); 345 setOperationAction(ISD::SETCC, MVT::f64, Custom); 346 347 // Custom lower i128 -> i64 truncates 348 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); 349 350 // Custom lower i32/i64 -> i128 sign extend 351 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); 352 353 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); 354 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); 355 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); 356 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); 357 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need 358 // to expand to a libcall, hence the custom lowering: 359 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 360 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 361 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); 362 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 363 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); 364 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); 365 366 // FDIV on SPU requires custom lowering 367 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall 368 369 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: 370 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 371 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); 372 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); 373 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 374 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); 375 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); 376 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 377 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 378 379 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal); 380 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal); 381 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal); 382 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal); 383 384 // We cannot sextinreg(i1). Expand to shifts. 385 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 386 387 // We want to legalize GlobalAddress and ConstantPool nodes into the 388 // appropriate instructions to materialize the address. 389 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; 390 ++sctype) { 391 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; 392 393 setOperationAction(ISD::GlobalAddress, VT, Custom); 394 setOperationAction(ISD::ConstantPool, VT, Custom); 395 setOperationAction(ISD::JumpTable, VT, Custom); 396 } 397 398 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 399 setOperationAction(ISD::VASTART , MVT::Other, Custom); 400 401 // Use the default implementation. 402 setOperationAction(ISD::VAARG , MVT::Other, Expand); 403 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 404 setOperationAction(ISD::VAEND , MVT::Other, Expand); 405 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 406 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 407 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 408 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); 409 410 // Cell SPU has instructions for converting between i64 and fp. 411 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 412 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 413 414 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 415 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 416 417 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 418 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 419 420 // First set operation action for all vector types to expand. Then we 421 // will selectively turn on ones that can be effectively codegen'd. 422 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); 423 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); 424 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); 425 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); 426 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); 427 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); 428 429 // "Odd size" vector classes that we're willing to support: 430 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); 431 432 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 433 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 434 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 435 436 // add/sub are legal for all supported vector VT's. 437 setOperationAction(ISD::ADD, VT, Legal); 438 setOperationAction(ISD::SUB, VT, Legal); 439 // mul has to be custom lowered. 440 setOperationAction(ISD::MUL, VT, Legal); 441 442 setOperationAction(ISD::AND, VT, Legal); 443 setOperationAction(ISD::OR, VT, Legal); 444 setOperationAction(ISD::XOR, VT, Legal); 445 setOperationAction(ISD::LOAD, VT, Legal); 446 setOperationAction(ISD::SELECT, VT, Legal); 447 setOperationAction(ISD::STORE, VT, Legal); 448 449 // These operations need to be expanded: 450 setOperationAction(ISD::SDIV, VT, Expand); 451 setOperationAction(ISD::SREM, VT, Expand); 452 setOperationAction(ISD::UDIV, VT, Expand); 453 setOperationAction(ISD::UREM, VT, Expand); 454 455 // Custom lower build_vector, constant pool spills, insert and 456 // extract vector elements: 457 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 458 setOperationAction(ISD::ConstantPool, VT, Custom); 459 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 460 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 461 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 462 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 463 } 464 465 setOperationAction(ISD::AND, MVT::v16i8, Custom); 466 setOperationAction(ISD::OR, MVT::v16i8, Custom); 467 setOperationAction(ISD::XOR, MVT::v16i8, Custom); 468 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 469 470 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 471 472 setShiftAmountType(MVT::i32); 473 setBooleanContents(ZeroOrNegativeOneBooleanContent); 474 475 setStackPointerRegisterToSaveRestore(SPU::R1); 476 477 // We have target-specific dag combine patterns for the following nodes: 478 setTargetDAGCombine(ISD::ADD); 479 setTargetDAGCombine(ISD::ZERO_EXTEND); 480 setTargetDAGCombine(ISD::SIGN_EXTEND); 481 setTargetDAGCombine(ISD::ANY_EXTEND); 482 483 computeRegisterProperties(); 484 485 // Set pre-RA register scheduler default to BURR, which produces slightly 486 // better code than the default (could also be TDRR, but TargetLowering.h 487 // needs a mod to support that model): 488 setSchedulingPreference(Sched::RegPressure); 489} 490 491const char * 492SPUTargetLowering::getTargetNodeName(unsigned Opcode) const 493{ 494 if (node_names.empty()) { 495 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; 496 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; 497 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; 498 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; 499 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; 500 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; 501 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; 502 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; 503 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; 504 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; 505 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; 506 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; 507 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; 508 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; 509 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; 510 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; 511 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; 512 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; 513 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = 514 "SPUISD::ROTBYTES_LEFT_BITS"; 515 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; 516 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; 517 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; 518 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; 519 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; 520 } 521 522 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); 523 524 return ((i != node_names.end()) ? i->second : 0); 525} 526 527/// getFunctionAlignment - Return the Log2 alignment of this function. 528unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { 529 return 3; 530} 531 532//===----------------------------------------------------------------------===// 533// Return the Cell SPU's SETCC result type 534//===----------------------------------------------------------------------===// 535 536MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { 537 // i16 and i32 are valid SETCC result types 538 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? 539 VT.getSimpleVT().SimpleTy : 540 MVT::i32); 541} 542 543//===----------------------------------------------------------------------===// 544// Calling convention code: 545//===----------------------------------------------------------------------===// 546 547#include "SPUGenCallingConv.inc" 548 549//===----------------------------------------------------------------------===// 550// LowerOperation implementation 551//===----------------------------------------------------------------------===// 552 553/// Custom lower loads for CellSPU 554/*! 555 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements 556 within a 16-byte block, we have to rotate to extract the requested element. 557 558 For extending loads, we also want to ensure that the following sequence is 559 emitted, e.g. for MVT::f32 extending load to MVT::f64: 560 561\verbatim 562%1 v16i8,ch = load 563%2 v16i8,ch = rotate %1 564%3 v4f8, ch = bitconvert %2 565%4 f32 = vec2perfslot %3 566%5 f64 = fp_extend %4 567\endverbatim 568*/ 569static SDValue 570LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 571 LoadSDNode *LN = cast<LoadSDNode>(Op); 572 SDValue the_chain = LN->getChain(); 573 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 574 EVT InVT = LN->getMemoryVT(); 575 EVT OutVT = Op.getValueType(); 576 ISD::LoadExtType ExtType = LN->getExtensionType(); 577 unsigned alignment = LN->getAlignment(); 578 const valtype_map_s *vtm = getValueTypeMapEntry(InVT); 579 DebugLoc dl = Op.getDebugLoc(); 580 581 switch (LN->getAddressingMode()) { 582 case ISD::UNINDEXED: { 583 SDValue result; 584 SDValue basePtr = LN->getBasePtr(); 585 SDValue rotate; 586 587 if (alignment == 16) { 588 ConstantSDNode *CN; 589 590 // Special cases for a known aligned load to simplify the base pointer 591 // and the rotation amount: 592 if (basePtr.getOpcode() == ISD::ADD 593 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { 594 // Known offset into basePtr 595 int64_t offset = CN->getSExtValue(); 596 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); 597 598 if (rotamt < 0) 599 rotamt += 16; 600 601 rotate = DAG.getConstant(rotamt, MVT::i16); 602 603 // Simplify the base pointer for this case: 604 basePtr = basePtr.getOperand(0); 605 if ((offset & ~0xf) > 0) { 606 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 607 basePtr, 608 DAG.getConstant((offset & ~0xf), PtrVT)); 609 } 610 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) 611 || (basePtr.getOpcode() == SPUISD::IndirectAddr 612 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi 613 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { 614 // Plain aligned a-form address: rotate into preferred slot 615 // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) 616 int64_t rotamt = -vtm->prefslot_byte; 617 if (rotamt < 0) 618 rotamt += 16; 619 rotate = DAG.getConstant(rotamt, MVT::i16); 620 } else { 621 // Offset the rotate amount by the basePtr and the preferred slot 622 // byte offset 623 int64_t rotamt = -vtm->prefslot_byte; 624 if (rotamt < 0) 625 rotamt += 16; 626 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 627 basePtr, 628 DAG.getConstant(rotamt, PtrVT)); 629 } 630 } else { 631 // Unaligned load: must be more pessimistic about addressing modes: 632 if (basePtr.getOpcode() == ISD::ADD) { 633 MachineFunction &MF = DAG.getMachineFunction(); 634 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 635 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 636 SDValue Flag; 637 638 SDValue Op0 = basePtr.getOperand(0); 639 SDValue Op1 = basePtr.getOperand(1); 640 641 if (isa<ConstantSDNode>(Op1)) { 642 // Convert the (add <ptr>, <const>) to an indirect address contained 643 // in a register. Note that this is done because we need to avoid 644 // creating a 0(reg) d-form address due to the SPU's block loads. 645 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 646 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 647 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 648 } else { 649 // Convert the (add <arg1>, <arg2>) to an indirect address, which 650 // will likely be lowered as a reg(reg) x-form address. 651 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 652 } 653 } else { 654 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 655 basePtr, 656 DAG.getConstant(0, PtrVT)); 657 } 658 659 // Offset the rotate amount by the basePtr and the preferred slot 660 // byte offset 661 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 662 basePtr, 663 DAG.getConstant(-vtm->prefslot_byte, PtrVT)); 664 } 665 666 // Re-emit as a v16i8 vector load 667 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, 668 LN->getSrcValue(), LN->getSrcValueOffset(), 669 LN->isVolatile(), LN->isNonTemporal(), 16); 670 671 // Update the chain 672 the_chain = result.getValue(1); 673 674 // Rotate into the preferred slot: 675 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8, 676 result.getValue(0), rotate); 677 678 // Convert the loaded v16i8 vector to the appropriate vector type 679 // specified by the operand: 680 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 681 InVT, (128 / InVT.getSizeInBits())); 682 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, 683 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); 684 685 // Handle extending loads by extending the scalar result: 686 if (ExtType == ISD::SEXTLOAD) { 687 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); 688 } else if (ExtType == ISD::ZEXTLOAD) { 689 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); 690 } else if (ExtType == ISD::EXTLOAD) { 691 unsigned NewOpc = ISD::ANY_EXTEND; 692 693 if (OutVT.isFloatingPoint()) 694 NewOpc = ISD::FP_EXTEND; 695 696 result = DAG.getNode(NewOpc, dl, OutVT, result); 697 } 698 699 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); 700 SDValue retops[2] = { 701 result, 702 the_chain 703 }; 704 705 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, 706 retops, sizeof(retops) / sizeof(retops[0])); 707 return result; 708 } 709 case ISD::PRE_INC: 710 case ISD::PRE_DEC: 711 case ISD::POST_INC: 712 case ISD::POST_DEC: 713 case ISD::LAST_INDEXED_MODE: 714 { 715 report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " 716 "than UNINDEXED\n" + 717 Twine((unsigned)LN->getAddressingMode())); 718 /*NOTREACHED*/ 719 } 720 } 721 722 return SDValue(); 723} 724 725/// Custom lower stores for CellSPU 726/*! 727 All CellSPU stores are aligned to 16-byte boundaries, so for elements 728 within a 16-byte block, we have to generate a shuffle to insert the 729 requested element into its place, then store the resulting block. 730 */ 731static SDValue 732LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 733 StoreSDNode *SN = cast<StoreSDNode>(Op); 734 SDValue Value = SN->getValue(); 735 EVT VT = Value.getValueType(); 736 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); 737 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 738 DebugLoc dl = Op.getDebugLoc(); 739 unsigned alignment = SN->getAlignment(); 740 741 switch (SN->getAddressingMode()) { 742 case ISD::UNINDEXED: { 743 // The vector type we really want to load from the 16-byte chunk. 744 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 745 VT, (128 / VT.getSizeInBits())); 746 747 SDValue alignLoadVec; 748 SDValue basePtr = SN->getBasePtr(); 749 SDValue the_chain = SN->getChain(); 750 SDValue insertEltOffs; 751 752 if (alignment == 16) { 753 ConstantSDNode *CN; 754 755 // Special cases for a known aligned load to simplify the base pointer 756 // and insertion byte: 757 if (basePtr.getOpcode() == ISD::ADD 758 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { 759 // Known offset into basePtr 760 int64_t offset = CN->getSExtValue(); 761 762 // Simplify the base pointer for this case: 763 basePtr = basePtr.getOperand(0); 764 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 765 basePtr, 766 DAG.getConstant((offset & 0xf), PtrVT)); 767 768 if ((offset & ~0xf) > 0) { 769 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 770 basePtr, 771 DAG.getConstant((offset & ~0xf), PtrVT)); 772 } 773 } else { 774 // Otherwise, assume it's at byte 0 of basePtr 775 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 776 basePtr, 777 DAG.getConstant(0, PtrVT)); 778 } 779 } else { 780 // Unaligned load: must be more pessimistic about addressing modes: 781 if (basePtr.getOpcode() == ISD::ADD) { 782 MachineFunction &MF = DAG.getMachineFunction(); 783 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 784 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 785 SDValue Flag; 786 787 SDValue Op0 = basePtr.getOperand(0); 788 SDValue Op1 = basePtr.getOperand(1); 789 790 if (isa<ConstantSDNode>(Op1)) { 791 // Convert the (add <ptr>, <const>) to an indirect address contained 792 // in a register. Note that this is done because we need to avoid 793 // creating a 0(reg) d-form address due to the SPU's block loads. 794 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 795 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 796 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 797 } else { 798 // Convert the (add <arg1>, <arg2>) to an indirect address, which 799 // will likely be lowered as a reg(reg) x-form address. 800 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 801 } 802 } else { 803 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 804 basePtr, 805 DAG.getConstant(0, PtrVT)); 806 } 807 808 // Insertion point is solely determined by basePtr's contents 809 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, 810 basePtr, 811 DAG.getConstant(0, PtrVT)); 812 } 813 814 // Re-emit as a v16i8 vector load 815 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, 816 SN->getSrcValue(), SN->getSrcValueOffset(), 817 SN->isVolatile(), SN->isNonTemporal(), 16); 818 819 // Update the chain 820 the_chain = alignLoadVec.getValue(1); 821 822 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec); 823 SDValue theValue = SN->getValue(); 824 SDValue result; 825 826 if (StVT != VT 827 && (theValue.getOpcode() == ISD::AssertZext 828 || theValue.getOpcode() == ISD::AssertSext)) { 829 // Drill down and get the value for zero- and sign-extended 830 // quantities 831 theValue = theValue.getOperand(0); 832 } 833 834 // If the base pointer is already a D-form address, then just create 835 // a new D-form address with a slot offset and the orignal base pointer. 836 // Otherwise generate a D-form address with the slot offset relative 837 // to the stack pointer, which is always aligned. 838#if !defined(NDEBUG) 839 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 840 errs() << "CellSPU LowerSTORE: basePtr = "; 841 basePtr.getNode()->dump(&DAG); 842 errs() << "\n"; 843 } 844#endif 845 846 SDValue insertEltOp = 847 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); 848 SDValue vectorizeOp = 849 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); 850 851 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, 852 vectorizeOp, alignLoadVec, 853 DAG.getNode(ISD::BIT_CONVERT, dl, 854 MVT::v4i32, insertEltOp)); 855 856 result = DAG.getStore(the_chain, dl, result, basePtr, 857 LN->getSrcValue(), LN->getSrcValueOffset(), 858 LN->isVolatile(), LN->isNonTemporal(), 859 LN->getAlignment()); 860 861#if 0 && !defined(NDEBUG) 862 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 863 const SDValue ¤tRoot = DAG.getRoot(); 864 865 DAG.setRoot(result); 866 errs() << "------- CellSPU:LowerStore result:\n"; 867 DAG.dump(); 868 errs() << "-------\n"; 869 DAG.setRoot(currentRoot); 870 } 871#endif 872 873 return result; 874 /*UNREACHED*/ 875 } 876 case ISD::PRE_INC: 877 case ISD::PRE_DEC: 878 case ISD::POST_INC: 879 case ISD::POST_DEC: 880 case ISD::LAST_INDEXED_MODE: 881 { 882 report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " 883 "than UNINDEXED\n" + 884 Twine((unsigned)SN->getAddressingMode())); 885 /*NOTREACHED*/ 886 } 887 } 888 889 return SDValue(); 890} 891 892//! Generate the address of a constant pool entry. 893static SDValue 894LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 895 EVT PtrVT = Op.getValueType(); 896 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 897 const Constant *C = CP->getConstVal(); 898 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 899 SDValue Zero = DAG.getConstant(0, PtrVT); 900 const TargetMachine &TM = DAG.getTarget(); 901 // FIXME there is no actual debug info here 902 DebugLoc dl = Op.getDebugLoc(); 903 904 if (TM.getRelocationModel() == Reloc::Static) { 905 if (!ST->usingLargeMem()) { 906 // Just return the SDValue with the constant pool address in it. 907 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); 908 } else { 909 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); 910 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); 911 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 912 } 913 } 914 915 llvm_unreachable("LowerConstantPool: Relocation model other than static" 916 " not supported."); 917 return SDValue(); 918} 919 920//! Alternate entry point for generating the address of a constant pool entry 921SDValue 922SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { 923 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); 924} 925 926static SDValue 927LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 928 EVT PtrVT = Op.getValueType(); 929 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 930 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 931 SDValue Zero = DAG.getConstant(0, PtrVT); 932 const TargetMachine &TM = DAG.getTarget(); 933 // FIXME there is no actual debug info here 934 DebugLoc dl = Op.getDebugLoc(); 935 936 if (TM.getRelocationModel() == Reloc::Static) { 937 if (!ST->usingLargeMem()) { 938 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); 939 } else { 940 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); 941 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); 942 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 943 } 944 } 945 946 llvm_unreachable("LowerJumpTable: Relocation model other than static" 947 " not supported."); 948 return SDValue(); 949} 950 951static SDValue 952LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 953 EVT PtrVT = Op.getValueType(); 954 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 955 const GlobalValue *GV = GSDN->getGlobal(); 956 SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), 957 PtrVT, GSDN->getOffset()); 958 const TargetMachine &TM = DAG.getTarget(); 959 SDValue Zero = DAG.getConstant(0, PtrVT); 960 // FIXME there is no actual debug info here 961 DebugLoc dl = Op.getDebugLoc(); 962 963 if (TM.getRelocationModel() == Reloc::Static) { 964 if (!ST->usingLargeMem()) { 965 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); 966 } else { 967 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); 968 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); 969 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 970 } 971 } else { 972 report_fatal_error("LowerGlobalAddress: Relocation model other than static" 973 "not supported."); 974 /*NOTREACHED*/ 975 } 976 977 return SDValue(); 978} 979 980//! Custom lower double precision floating point constants 981static SDValue 982LowerConstantFP(SDValue Op, SelectionDAG &DAG) { 983 EVT VT = Op.getValueType(); 984 // FIXME there is no actual debug info here 985 DebugLoc dl = Op.getDebugLoc(); 986 987 if (VT == MVT::f64) { 988 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode()); 989 990 assert((FP != 0) && 991 "LowerConstantFP: Node is not ConstantFPSDNode"); 992 993 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); 994 SDValue T = DAG.getConstant(dbits, MVT::i64); 995 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); 996 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 997 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec)); 998 } 999 1000 return SDValue(); 1001} 1002 1003SDValue 1004SPUTargetLowering::LowerFormalArguments(SDValue Chain, 1005 CallingConv::ID CallConv, bool isVarArg, 1006 const SmallVectorImpl<ISD::InputArg> 1007 &Ins, 1008 DebugLoc dl, SelectionDAG &DAG, 1009 SmallVectorImpl<SDValue> &InVals) 1010 const { 1011 1012 MachineFunction &MF = DAG.getMachineFunction(); 1013 MachineFrameInfo *MFI = MF.getFrameInfo(); 1014 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1015 SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); 1016 1017 unsigned ArgOffset = SPUFrameInfo::minStackSize(); 1018 unsigned ArgRegIdx = 0; 1019 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); 1020 1021 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1022 1023 SmallVector<CCValAssign, 16> ArgLocs; 1024 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1025 *DAG.getContext()); 1026 // FIXME: allow for other calling conventions 1027 CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); 1028 1029 // Add DAG nodes to load the arguments or copy them out of registers. 1030 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 1031 EVT ObjectVT = Ins[ArgNo].VT; 1032 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1033 SDValue ArgVal; 1034 CCValAssign &VA = ArgLocs[ArgNo]; 1035 1036 if (VA.isRegLoc()) { 1037 const TargetRegisterClass *ArgRegClass; 1038 1039 switch (ObjectVT.getSimpleVT().SimpleTy) { 1040 default: 1041 report_fatal_error("LowerFormalArguments Unhandled argument type: " + 1042 Twine(ObjectVT.getEVTString())); 1043 case MVT::i8: 1044 ArgRegClass = &SPU::R8CRegClass; 1045 break; 1046 case MVT::i16: 1047 ArgRegClass = &SPU::R16CRegClass; 1048 break; 1049 case MVT::i32: 1050 ArgRegClass = &SPU::R32CRegClass; 1051 break; 1052 case MVT::i64: 1053 ArgRegClass = &SPU::R64CRegClass; 1054 break; 1055 case MVT::i128: 1056 ArgRegClass = &SPU::GPRCRegClass; 1057 break; 1058 case MVT::f32: 1059 ArgRegClass = &SPU::R32FPRegClass; 1060 break; 1061 case MVT::f64: 1062 ArgRegClass = &SPU::R64FPRegClass; 1063 break; 1064 case MVT::v2f64: 1065 case MVT::v4f32: 1066 case MVT::v2i64: 1067 case MVT::v4i32: 1068 case MVT::v8i16: 1069 case MVT::v16i8: 1070 ArgRegClass = &SPU::VECREGRegClass; 1071 break; 1072 } 1073 1074 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); 1075 RegInfo.addLiveIn(VA.getLocReg(), VReg); 1076 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 1077 ++ArgRegIdx; 1078 } else { 1079 // We need to load the argument to a virtual register if we determined 1080 // above that we ran out of physical registers of the appropriate type 1081 // or we're forced to do vararg 1082 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); 1083 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1084 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); 1085 ArgOffset += StackSlotSize; 1086 } 1087 1088 InVals.push_back(ArgVal); 1089 // Update the chain 1090 Chain = ArgVal.getOperand(0); 1091 } 1092 1093 // vararg handling: 1094 if (isVarArg) { 1095 // FIXME: we should be able to query the argument registers from 1096 // tablegen generated code. 1097 static const unsigned ArgRegs[] = { 1098 SPU::R3, SPU::R4, SPU::R5, SPU::R6, SPU::R7, SPU::R8, SPU::R9, 1099 SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, 1100 SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, 1101 SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, 1102 SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, 1103 SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, 1104 SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, 1105 SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, 1106 SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, 1107 SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, 1108 SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 1109 }; 1110 // size of ArgRegs array 1111 unsigned NumArgRegs = 77; 1112 1113 // We will spill (79-3)+1 registers to the stack 1114 SmallVector<SDValue, 79-3+1> MemOps; 1115 1116 // Create the frame slot 1117 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { 1118 FuncInfo->setVarArgsFrameIndex( 1119 MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); 1120 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1121 unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); 1122 SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); 1123 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, 1124 false, false, 0); 1125 Chain = Store.getOperand(0); 1126 MemOps.push_back(Store); 1127 1128 // Increment address by stack slot size for the next stored argument 1129 ArgOffset += StackSlotSize; 1130 } 1131 if (!MemOps.empty()) 1132 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1133 &MemOps[0], MemOps.size()); 1134 } 1135 1136 return Chain; 1137} 1138 1139/// isLSAAddress - Return the immediate to use if the specified 1140/// value is representable as a LSA address. 1141static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { 1142 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1143 if (!C) return 0; 1144 1145 int Addr = C->getZExtValue(); 1146 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1147 (Addr << 14 >> 14) != Addr) 1148 return 0; // Top 14 bits have to be sext of immediate. 1149 1150 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); 1151} 1152 1153SDValue 1154SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1155 CallingConv::ID CallConv, bool isVarArg, 1156 bool &isTailCall, 1157 const SmallVectorImpl<ISD::OutputArg> &Outs, 1158 const SmallVectorImpl<SDValue> &OutVals, 1159 const SmallVectorImpl<ISD::InputArg> &Ins, 1160 DebugLoc dl, SelectionDAG &DAG, 1161 SmallVectorImpl<SDValue> &InVals) const { 1162 // CellSPU target does not yet support tail call optimization. 1163 isTailCall = false; 1164 1165 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 1166 unsigned NumOps = Outs.size(); 1167 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); 1168 1169 SmallVector<CCValAssign, 16> ArgLocs; 1170 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1171 *DAG.getContext()); 1172 // FIXME: allow for other calling conventions 1173 CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); 1174 1175 const unsigned NumArgRegs = ArgLocs.size(); 1176 1177 1178 // Handy pointer type 1179 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1180 1181 // Set up a copy of the stack pointer for use loading and storing any 1182 // arguments that may not fit in the registers available for argument 1183 // passing. 1184 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); 1185 1186 // Figure out which arguments are going to go in registers, and which in 1187 // memory. 1188 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] 1189 unsigned ArgRegIdx = 0; 1190 1191 // Keep track of registers passing arguments 1192 std::vector<std::pair<unsigned, SDValue> > RegsToPass; 1193 // And the arguments passed on the stack 1194 SmallVector<SDValue, 8> MemOpChains; 1195 1196 for (; ArgRegIdx != NumOps; ++ArgRegIdx) { 1197 SDValue Arg = OutVals[ArgRegIdx]; 1198 CCValAssign &VA = ArgLocs[ArgRegIdx]; 1199 1200 // PtrOff will be used to store the current argument to the stack if a 1201 // register cannot be found for it. 1202 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 1203 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 1204 1205 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 1206 default: llvm_unreachable("Unexpected ValueType for argument!"); 1207 case MVT::i8: 1208 case MVT::i16: 1209 case MVT::i32: 1210 case MVT::i64: 1211 case MVT::i128: 1212 case MVT::f32: 1213 case MVT::f64: 1214 case MVT::v2i64: 1215 case MVT::v2f64: 1216 case MVT::v4f32: 1217 case MVT::v4i32: 1218 case MVT::v8i16: 1219 case MVT::v16i8: 1220 if (ArgRegIdx != NumArgRegs) { 1221 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1222 } else { 1223 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, 1224 false, false, 0)); 1225 ArgOffset += StackSlotSize; 1226 } 1227 break; 1228 } 1229 } 1230 1231 // Accumulate how many bytes are to be pushed on the stack, including the 1232 // linkage area, and parameter passing area. According to the SPU ABI, 1233 // we minimally need space for [LR] and [SP]. 1234 unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize(); 1235 1236 // Insert a call sequence start 1237 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, 1238 true)); 1239 1240 if (!MemOpChains.empty()) { 1241 // Adjust the stack pointer for the stack arguments. 1242 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1243 &MemOpChains[0], MemOpChains.size()); 1244 } 1245 1246 // Build a sequence of copy-to-reg nodes chained together with token chain 1247 // and flag operands which copy the outgoing args into the appropriate regs. 1248 SDValue InFlag; 1249 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1250 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1251 RegsToPass[i].second, InFlag); 1252 InFlag = Chain.getValue(1); 1253 } 1254 1255 SmallVector<SDValue, 8> Ops; 1256 unsigned CallOpc = SPUISD::CALL; 1257 1258 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1259 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1260 // node so that legalize doesn't hack it. 1261 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1262 const GlobalValue *GV = G->getGlobal(); 1263 EVT CalleeVT = Callee.getValueType(); 1264 SDValue Zero = DAG.getConstant(0, PtrVT); 1265 SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); 1266 1267 if (!ST->usingLargeMem()) { 1268 // Turn calls to targets that are defined (i.e., have bodies) into BRSL 1269 // style calls, otherwise, external symbols are BRASL calls. This assumes 1270 // that declared/defined symbols are in the same compilation unit and can 1271 // be reached through PC-relative jumps. 1272 // 1273 // NOTE: 1274 // This may be an unsafe assumption for JIT and really large compilation 1275 // units. 1276 if (GV->isDeclaration()) { 1277 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); 1278 } else { 1279 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); 1280 } 1281 } else { 1282 // "Large memory" mode: Turn all calls into indirect calls with a X-form 1283 // address pairs: 1284 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); 1285 } 1286 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1287 EVT CalleeVT = Callee.getValueType(); 1288 SDValue Zero = DAG.getConstant(0, PtrVT); 1289 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), 1290 Callee.getValueType()); 1291 1292 if (!ST->usingLargeMem()) { 1293 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); 1294 } else { 1295 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); 1296 } 1297 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { 1298 // If this is an absolute destination address that appears to be a legal 1299 // local store address, use the munged value. 1300 Callee = SDValue(Dest, 0); 1301 } 1302 1303 Ops.push_back(Chain); 1304 Ops.push_back(Callee); 1305 1306 // Add argument registers to the end of the list so that they are known live 1307 // into the call. 1308 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1309 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1310 RegsToPass[i].second.getValueType())); 1311 1312 if (InFlag.getNode()) 1313 Ops.push_back(InFlag); 1314 // Returns a chain and a flag for retval copy to use. 1315 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), 1316 &Ops[0], Ops.size()); 1317 InFlag = Chain.getValue(1); 1318 1319 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), 1320 DAG.getIntPtrConstant(0, true), InFlag); 1321 if (!Ins.empty()) 1322 InFlag = Chain.getValue(1); 1323 1324 // If the function returns void, just return the chain. 1325 if (Ins.empty()) 1326 return Chain; 1327 1328 // If the call has results, copy the values out of the ret val registers. 1329 switch (Ins[0].VT.getSimpleVT().SimpleTy) { 1330 default: llvm_unreachable("Unexpected ret value!"); 1331 case MVT::Other: break; 1332 case MVT::i32: 1333 if (Ins.size() > 1 && Ins[1].VT == MVT::i32) { 1334 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, 1335 MVT::i32, InFlag).getValue(1); 1336 InVals.push_back(Chain.getValue(0)); 1337 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, 1338 Chain.getValue(2)).getValue(1); 1339 InVals.push_back(Chain.getValue(0)); 1340 } else { 1341 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, 1342 InFlag).getValue(1); 1343 InVals.push_back(Chain.getValue(0)); 1344 } 1345 break; 1346 case MVT::i8: 1347 case MVT::i16: 1348 case MVT::i64: 1349 case MVT::i128: 1350 case MVT::f32: 1351 case MVT::f64: 1352 case MVT::v2f64: 1353 case MVT::v2i64: 1354 case MVT::v4f32: 1355 case MVT::v4i32: 1356 case MVT::v8i16: 1357 case MVT::v16i8: 1358 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, 1359 InFlag).getValue(1); 1360 InVals.push_back(Chain.getValue(0)); 1361 break; 1362 } 1363 1364 return Chain; 1365} 1366 1367SDValue 1368SPUTargetLowering::LowerReturn(SDValue Chain, 1369 CallingConv::ID CallConv, bool isVarArg, 1370 const SmallVectorImpl<ISD::OutputArg> &Outs, 1371 const SmallVectorImpl<SDValue> &OutVals, 1372 DebugLoc dl, SelectionDAG &DAG) const { 1373 1374 SmallVector<CCValAssign, 16> RVLocs; 1375 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 1376 RVLocs, *DAG.getContext()); 1377 CCInfo.AnalyzeReturn(Outs, RetCC_SPU); 1378 1379 // If this is the first return lowered for this function, add the regs to the 1380 // liveout set for the function. 1381 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1382 for (unsigned i = 0; i != RVLocs.size(); ++i) 1383 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1384 } 1385 1386 SDValue Flag; 1387 1388 // Copy the result values into the output registers. 1389 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1390 CCValAssign &VA = RVLocs[i]; 1391 assert(VA.isRegLoc() && "Can only return in registers!"); 1392 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1393 OutVals[i], Flag); 1394 Flag = Chain.getValue(1); 1395 } 1396 1397 if (Flag.getNode()) 1398 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1399 else 1400 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); 1401} 1402 1403 1404//===----------------------------------------------------------------------===// 1405// Vector related lowering: 1406//===----------------------------------------------------------------------===// 1407 1408static ConstantSDNode * 1409getVecImm(SDNode *N) { 1410 SDValue OpVal(0, 0); 1411 1412 // Check to see if this buildvec has a single non-undef value in its elements. 1413 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1414 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1415 if (OpVal.getNode() == 0) 1416 OpVal = N->getOperand(i); 1417 else if (OpVal != N->getOperand(i)) 1418 return 0; 1419 } 1420 1421 if (OpVal.getNode() != 0) { 1422 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1423 return CN; 1424 } 1425 } 1426 1427 return 0; 1428} 1429 1430/// get_vec_i18imm - Test if this vector is a vector filled with the same value 1431/// and the value fits into an unsigned 18-bit constant, and if so, return the 1432/// constant 1433SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, 1434 EVT ValueType) { 1435 if (ConstantSDNode *CN = getVecImm(N)) { 1436 uint64_t Value = CN->getZExtValue(); 1437 if (ValueType == MVT::i64) { 1438 uint64_t UValue = CN->getZExtValue(); 1439 uint32_t upper = uint32_t(UValue >> 32); 1440 uint32_t lower = uint32_t(UValue); 1441 if (upper != lower) 1442 return SDValue(); 1443 Value = Value >> 32; 1444 } 1445 if (Value <= 0x3ffff) 1446 return DAG.getTargetConstant(Value, ValueType); 1447 } 1448 1449 return SDValue(); 1450} 1451 1452/// get_vec_i16imm - Test if this vector is a vector filled with the same value 1453/// and the value fits into a signed 16-bit constant, and if so, return the 1454/// constant 1455SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, 1456 EVT ValueType) { 1457 if (ConstantSDNode *CN = getVecImm(N)) { 1458 int64_t Value = CN->getSExtValue(); 1459 if (ValueType == MVT::i64) { 1460 uint64_t UValue = CN->getZExtValue(); 1461 uint32_t upper = uint32_t(UValue >> 32); 1462 uint32_t lower = uint32_t(UValue); 1463 if (upper != lower) 1464 return SDValue(); 1465 Value = Value >> 32; 1466 } 1467 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { 1468 return DAG.getTargetConstant(Value, ValueType); 1469 } 1470 } 1471 1472 return SDValue(); 1473} 1474 1475/// get_vec_i10imm - Test if this vector is a vector filled with the same value 1476/// and the value fits into a signed 10-bit constant, and if so, return the 1477/// constant 1478SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, 1479 EVT ValueType) { 1480 if (ConstantSDNode *CN = getVecImm(N)) { 1481 int64_t Value = CN->getSExtValue(); 1482 if (ValueType == MVT::i64) { 1483 uint64_t UValue = CN->getZExtValue(); 1484 uint32_t upper = uint32_t(UValue >> 32); 1485 uint32_t lower = uint32_t(UValue); 1486 if (upper != lower) 1487 return SDValue(); 1488 Value = Value >> 32; 1489 } 1490 if (isInt<10>(Value)) 1491 return DAG.getTargetConstant(Value, ValueType); 1492 } 1493 1494 return SDValue(); 1495} 1496 1497/// get_vec_i8imm - Test if this vector is a vector filled with the same value 1498/// and the value fits into a signed 8-bit constant, and if so, return the 1499/// constant. 1500/// 1501/// @note: The incoming vector is v16i8 because that's the only way we can load 1502/// constant vectors. Thus, we test to see if the upper and lower bytes are the 1503/// same value. 1504SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, 1505 EVT ValueType) { 1506 if (ConstantSDNode *CN = getVecImm(N)) { 1507 int Value = (int) CN->getZExtValue(); 1508 if (ValueType == MVT::i16 1509 && Value <= 0xffff /* truncated from uint64_t */ 1510 && ((short) Value >> 8) == ((short) Value & 0xff)) 1511 return DAG.getTargetConstant(Value & 0xff, ValueType); 1512 else if (ValueType == MVT::i8 1513 && (Value & 0xff) == Value) 1514 return DAG.getTargetConstant(Value, ValueType); 1515 } 1516 1517 return SDValue(); 1518} 1519 1520/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value 1521/// and the value fits into a signed 16-bit constant, and if so, return the 1522/// constant 1523SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, 1524 EVT ValueType) { 1525 if (ConstantSDNode *CN = getVecImm(N)) { 1526 uint64_t Value = CN->getZExtValue(); 1527 if ((ValueType == MVT::i32 1528 && ((unsigned) Value & 0xffff0000) == (unsigned) Value) 1529 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) 1530 return DAG.getTargetConstant(Value >> 16, ValueType); 1531 } 1532 1533 return SDValue(); 1534} 1535 1536/// get_v4i32_imm - Catch-all for general 32-bit constant vectors 1537SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { 1538 if (ConstantSDNode *CN = getVecImm(N)) { 1539 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); 1540 } 1541 1542 return SDValue(); 1543} 1544 1545/// get_v4i32_imm - Catch-all for general 64-bit constant vectors 1546SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { 1547 if (ConstantSDNode *CN = getVecImm(N)) { 1548 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); 1549 } 1550 1551 return SDValue(); 1552} 1553 1554//! Lower a BUILD_VECTOR instruction creatively: 1555static SDValue 1556LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 1557 EVT VT = Op.getValueType(); 1558 EVT EltVT = VT.getVectorElementType(); 1559 DebugLoc dl = Op.getDebugLoc(); 1560 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 1561 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); 1562 unsigned minSplatBits = EltVT.getSizeInBits(); 1563 1564 if (minSplatBits < 16) 1565 minSplatBits = 16; 1566 1567 APInt APSplatBits, APSplatUndef; 1568 unsigned SplatBitSize; 1569 bool HasAnyUndefs; 1570 1571 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 1572 HasAnyUndefs, minSplatBits) 1573 || minSplatBits < SplatBitSize) 1574 return SDValue(); // Wasn't a constant vector or splat exceeded min 1575 1576 uint64_t SplatBits = APSplatBits.getZExtValue(); 1577 1578 switch (VT.getSimpleVT().SimpleTy) { 1579 default: 1580 report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + 1581 Twine(VT.getEVTString())); 1582 /*NOTREACHED*/ 1583 case MVT::v4f32: { 1584 uint32_t Value32 = uint32_t(SplatBits); 1585 assert(SplatBitSize == 32 1586 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); 1587 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1588 SDValue T = DAG.getConstant(Value32, MVT::i32); 1589 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, 1590 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); 1591 break; 1592 } 1593 case MVT::v2f64: { 1594 uint64_t f64val = uint64_t(SplatBits); 1595 assert(SplatBitSize == 64 1596 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); 1597 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1598 SDValue T = DAG.getConstant(f64val, MVT::i64); 1599 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, 1600 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); 1601 break; 1602 } 1603 case MVT::v16i8: { 1604 // 8-bit constants have to be expanded to 16-bits 1605 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; 1606 SmallVector<SDValue, 8> Ops; 1607 1608 Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); 1609 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 1610 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); 1611 } 1612 case MVT::v8i16: { 1613 unsigned short Value16 = SplatBits; 1614 SDValue T = DAG.getConstant(Value16, EltVT); 1615 SmallVector<SDValue, 8> Ops; 1616 1617 Ops.assign(8, T); 1618 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); 1619 } 1620 case MVT::v4i32: { 1621 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); 1622 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); 1623 } 1624 case MVT::v2i32: { 1625 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); 1626 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); 1627 } 1628 case MVT::v2i64: { 1629 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); 1630 } 1631 } 1632 1633 return SDValue(); 1634} 1635 1636/*! 1637 */ 1638SDValue 1639SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, 1640 DebugLoc dl) { 1641 uint32_t upper = uint32_t(SplatVal >> 32); 1642 uint32_t lower = uint32_t(SplatVal); 1643 1644 if (upper == lower) { 1645 // Magic constant that can be matched by IL, ILA, et. al. 1646 SDValue Val = DAG.getTargetConstant(upper, MVT::i32); 1647 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, 1648 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1649 Val, Val, Val, Val)); 1650 } else { 1651 bool upper_special, lower_special; 1652 1653 // NOTE: This code creates common-case shuffle masks that can be easily 1654 // detected as common expressions. It is not attempting to create highly 1655 // specialized masks to replace any and all 0's, 0xff's and 0x80's. 1656 1657 // Detect if the upper or lower half is a special shuffle mask pattern: 1658 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); 1659 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); 1660 1661 // Both upper and lower are special, lower to a constant pool load: 1662 if (lower_special && upper_special) { 1663 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); 1664 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, 1665 SplatValCN, SplatValCN); 1666 } 1667 1668 SDValue LO32; 1669 SDValue HI32; 1670 SmallVector<SDValue, 16> ShufBytes; 1671 SDValue Result; 1672 1673 // Create lower vector if not a special pattern 1674 if (!lower_special) { 1675 SDValue LO32C = DAG.getConstant(lower, MVT::i32); 1676 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, 1677 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1678 LO32C, LO32C, LO32C, LO32C)); 1679 } 1680 1681 // Create upper vector if not a special pattern 1682 if (!upper_special) { 1683 SDValue HI32C = DAG.getConstant(upper, MVT::i32); 1684 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, 1685 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1686 HI32C, HI32C, HI32C, HI32C)); 1687 } 1688 1689 // If either upper or lower are special, then the two input operands are 1690 // the same (basically, one of them is a "don't care") 1691 if (lower_special) 1692 LO32 = HI32; 1693 if (upper_special) 1694 HI32 = LO32; 1695 1696 for (int i = 0; i < 4; ++i) { 1697 uint64_t val = 0; 1698 for (int j = 0; j < 4; ++j) { 1699 SDValue V; 1700 bool process_upper, process_lower; 1701 val <<= 8; 1702 process_upper = (upper_special && (i & 1) == 0); 1703 process_lower = (lower_special && (i & 1) == 1); 1704 1705 if (process_upper || process_lower) { 1706 if ((process_upper && upper == 0) 1707 || (process_lower && lower == 0)) 1708 val |= 0x80; 1709 else if ((process_upper && upper == 0xffffffff) 1710 || (process_lower && lower == 0xffffffff)) 1711 val |= 0xc0; 1712 else if ((process_upper && upper == 0x80000000) 1713 || (process_lower && lower == 0x80000000)) 1714 val |= (j == 0 ? 0xe0 : 0x80); 1715 } else 1716 val |= i * 4 + j + ((i & 1) * 16); 1717 } 1718 1719 ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); 1720 } 1721 1722 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, 1723 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1724 &ShufBytes[0], ShufBytes.size())); 1725 } 1726} 1727 1728/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on 1729/// which the Cell can operate. The code inspects V3 to ascertain whether the 1730/// permutation vector, V3, is monotonically increasing with one "exception" 1731/// element, e.g., (0, 1, _, 3). If this is the case, then generate a 1732/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. 1733/// In either case, the net result is going to eventually invoke SHUFB to 1734/// permute/shuffle the bytes from V1 and V2. 1735/// \note 1736/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate 1737/// control word for byte/halfword/word insertion. This takes care of a single 1738/// element move from V2 into V1. 1739/// \note 1740/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. 1741static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 1742 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); 1743 SDValue V1 = Op.getOperand(0); 1744 SDValue V2 = Op.getOperand(1); 1745 DebugLoc dl = Op.getDebugLoc(); 1746 1747 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 1748 1749 // If we have a single element being moved from V1 to V2, this can be handled 1750 // using the C*[DX] compute mask instructions, but the vector elements have 1751 // to be monotonically increasing with one exception element. 1752 EVT VecVT = V1.getValueType(); 1753 EVT EltVT = VecVT.getVectorElementType(); 1754 unsigned EltsFromV2 = 0; 1755 unsigned V2Elt = 0; 1756 unsigned V2EltIdx0 = 0; 1757 unsigned CurrElt = 0; 1758 unsigned MaxElts = VecVT.getVectorNumElements(); 1759 unsigned PrevElt = 0; 1760 unsigned V0Elt = 0; 1761 bool monotonic = true; 1762 bool rotate = true; 1763 EVT maskVT; // which of the c?d instructions to use 1764 1765 if (EltVT == MVT::i8) { 1766 V2EltIdx0 = 16; 1767 maskVT = MVT::v16i8; 1768 } else if (EltVT == MVT::i16) { 1769 V2EltIdx0 = 8; 1770 maskVT = MVT::v8i16; 1771 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { 1772 V2EltIdx0 = 4; 1773 maskVT = MVT::v4i32; 1774 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { 1775 V2EltIdx0 = 2; 1776 maskVT = MVT::v2i64; 1777 } else 1778 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); 1779 1780 for (unsigned i = 0; i != MaxElts; ++i) { 1781 if (SVN->getMaskElt(i) < 0) 1782 continue; 1783 1784 unsigned SrcElt = SVN->getMaskElt(i); 1785 1786 if (monotonic) { 1787 if (SrcElt >= V2EltIdx0) { 1788 if (1 >= (++EltsFromV2)) { 1789 V2Elt = (V2EltIdx0 - SrcElt) << 2; 1790 } 1791 } else if (CurrElt != SrcElt) { 1792 monotonic = false; 1793 } 1794 1795 ++CurrElt; 1796 } 1797 1798 if (rotate) { 1799 if (PrevElt > 0 && SrcElt < MaxElts) { 1800 if ((PrevElt == SrcElt - 1) 1801 || (PrevElt == MaxElts - 1 && SrcElt == 0)) { 1802 PrevElt = SrcElt; 1803 if (SrcElt == 0) 1804 V0Elt = i; 1805 } else { 1806 rotate = false; 1807 } 1808 } else if (i == 0) { 1809 // First time through, need to keep track of previous element 1810 PrevElt = SrcElt; 1811 } else { 1812 // This isn't a rotation, takes elements from vector 2 1813 rotate = false; 1814 } 1815 } 1816 } 1817 1818 if (EltsFromV2 == 1 && monotonic) { 1819 // Compute mask and shuffle 1820 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1821 1822 // As SHUFFLE_MASK becomes a c?d instruction, feed it an address 1823 // R1 ($sp) is used here only as it is guaranteed to have last bits zero 1824 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 1825 DAG.getRegister(SPU::R1, PtrVT), 1826 DAG.getConstant(V2Elt, MVT::i32)); 1827 SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, 1828 maskVT, Pointer); 1829 1830 // Use shuffle mask in SHUFB synthetic instruction: 1831 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, 1832 ShufMaskOp); 1833 } else if (rotate) { 1834 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; 1835 1836 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), 1837 V1, DAG.getConstant(rotamt, MVT::i16)); 1838 } else { 1839 // Convert the SHUFFLE_VECTOR mask's input element units to the 1840 // actual bytes. 1841 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 1842 1843 SmallVector<SDValue, 16> ResultMask; 1844 for (unsigned i = 0, e = MaxElts; i != e; ++i) { 1845 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); 1846 1847 for (unsigned j = 0; j < BytesPerElement; ++j) 1848 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); 1849 } 1850 1851 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 1852 &ResultMask[0], ResultMask.size()); 1853 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); 1854 } 1855} 1856 1857static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 1858 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar 1859 DebugLoc dl = Op.getDebugLoc(); 1860 1861 if (Op0.getNode()->getOpcode() == ISD::Constant) { 1862 // For a constant, build the appropriate constant vector, which will 1863 // eventually simplify to a vector register load. 1864 1865 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode()); 1866 SmallVector<SDValue, 16> ConstVecValues; 1867 EVT VT; 1868 size_t n_copies; 1869 1870 // Create a constant vector: 1871 switch (Op.getValueType().getSimpleVT().SimpleTy) { 1872 default: llvm_unreachable("Unexpected constant value type in " 1873 "LowerSCALAR_TO_VECTOR"); 1874 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; 1875 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; 1876 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; 1877 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; 1878 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; 1879 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; 1880 } 1881 1882 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); 1883 for (size_t j = 0; j < n_copies; ++j) 1884 ConstVecValues.push_back(CValue); 1885 1886 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), 1887 &ConstVecValues[0], ConstVecValues.size()); 1888 } else { 1889 // Otherwise, copy the value from one register to another: 1890 switch (Op0.getValueType().getSimpleVT().SimpleTy) { 1891 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); 1892 case MVT::i8: 1893 case MVT::i16: 1894 case MVT::i32: 1895 case MVT::i64: 1896 case MVT::f32: 1897 case MVT::f64: 1898 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); 1899 } 1900 } 1901 1902 return SDValue(); 1903} 1904 1905static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 1906 EVT VT = Op.getValueType(); 1907 SDValue N = Op.getOperand(0); 1908 SDValue Elt = Op.getOperand(1); 1909 DebugLoc dl = Op.getDebugLoc(); 1910 SDValue retval; 1911 1912 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 1913 // Constant argument: 1914 int EltNo = (int) C->getZExtValue(); 1915 1916 // sanity checks: 1917 if (VT == MVT::i8 && EltNo >= 16) 1918 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); 1919 else if (VT == MVT::i16 && EltNo >= 8) 1920 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); 1921 else if (VT == MVT::i32 && EltNo >= 4) 1922 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); 1923 else if (VT == MVT::i64 && EltNo >= 2) 1924 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); 1925 1926 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { 1927 // i32 and i64: Element 0 is the preferred slot 1928 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); 1929 } 1930 1931 // Need to generate shuffle mask and extract: 1932 int prefslot_begin = -1, prefslot_end = -1; 1933 int elt_byte = EltNo * VT.getSizeInBits() / 8; 1934 1935 switch (VT.getSimpleVT().SimpleTy) { 1936 default: 1937 assert(false && "Invalid value type!"); 1938 case MVT::i8: { 1939 prefslot_begin = prefslot_end = 3; 1940 break; 1941 } 1942 case MVT::i16: { 1943 prefslot_begin = 2; prefslot_end = 3; 1944 break; 1945 } 1946 case MVT::i32: 1947 case MVT::f32: { 1948 prefslot_begin = 0; prefslot_end = 3; 1949 break; 1950 } 1951 case MVT::i64: 1952 case MVT::f64: { 1953 prefslot_begin = 0; prefslot_end = 7; 1954 break; 1955 } 1956 } 1957 1958 assert(prefslot_begin != -1 && prefslot_end != -1 && 1959 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); 1960 1961 unsigned int ShufBytes[16] = { 1962 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 1963 }; 1964 for (int i = 0; i < 16; ++i) { 1965 // zero fill uppper part of preferred slot, don't care about the 1966 // other slots: 1967 unsigned int mask_val; 1968 if (i <= prefslot_end) { 1969 mask_val = 1970 ((i < prefslot_begin) 1971 ? 0x80 1972 : elt_byte + (i - prefslot_begin)); 1973 1974 ShufBytes[i] = mask_val; 1975 } else 1976 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; 1977 } 1978 1979 SDValue ShufMask[4]; 1980 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { 1981 unsigned bidx = i * 4; 1982 unsigned int bits = ((ShufBytes[bidx] << 24) | 1983 (ShufBytes[bidx+1] << 16) | 1984 (ShufBytes[bidx+2] << 8) | 1985 ShufBytes[bidx+3]); 1986 ShufMask[i] = DAG.getConstant(bits, MVT::i32); 1987 } 1988 1989 SDValue ShufMaskVec = 1990 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1991 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); 1992 1993 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 1994 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), 1995 N, N, ShufMaskVec)); 1996 } else { 1997 // Variable index: Rotate the requested element into slot 0, then replicate 1998 // slot 0 across the vector 1999 EVT VecVT = N.getValueType(); 2000 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { 2001 report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" 2002 "vector type!"); 2003 } 2004 2005 // Make life easier by making sure the index is zero-extended to i32 2006 if (Elt.getValueType() != MVT::i32) 2007 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); 2008 2009 // Scale the index to a bit/byte shift quantity 2010 APInt scaleFactor = 2011 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); 2012 unsigned scaleShift = scaleFactor.logBase2(); 2013 SDValue vecShift; 2014 2015 if (scaleShift > 0) { 2016 // Scale the shift factor: 2017 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, 2018 DAG.getConstant(scaleShift, MVT::i32)); 2019 } 2020 2021 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt); 2022 2023 // Replicate the bytes starting at byte 0 across the entire vector (for 2024 // consistency with the notion of a unified register set) 2025 SDValue replicate; 2026 2027 switch (VT.getSimpleVT().SimpleTy) { 2028 default: 2029 report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" 2030 "type"); 2031 /*NOTREACHED*/ 2032 case MVT::i8: { 2033 SDValue factor = DAG.getConstant(0x00000000, MVT::i32); 2034 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2035 factor, factor, factor, factor); 2036 break; 2037 } 2038 case MVT::i16: { 2039 SDValue factor = DAG.getConstant(0x00010001, MVT::i32); 2040 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2041 factor, factor, factor, factor); 2042 break; 2043 } 2044 case MVT::i32: 2045 case MVT::f32: { 2046 SDValue factor = DAG.getConstant(0x00010203, MVT::i32); 2047 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2048 factor, factor, factor, factor); 2049 break; 2050 } 2051 case MVT::i64: 2052 case MVT::f64: { 2053 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); 2054 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); 2055 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2056 loFactor, hiFactor, loFactor, hiFactor); 2057 break; 2058 } 2059 } 2060 2061 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 2062 DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2063 vecShift, vecShift, replicate)); 2064 } 2065 2066 return retval; 2067} 2068 2069static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 2070 SDValue VecOp = Op.getOperand(0); 2071 SDValue ValOp = Op.getOperand(1); 2072 SDValue IdxOp = Op.getOperand(2); 2073 DebugLoc dl = Op.getDebugLoc(); 2074 EVT VT = Op.getValueType(); 2075 2076 // use 0 when the lane to insert to is 'undef' 2077 int64_t Idx=0; 2078 if (IdxOp.getOpcode() != ISD::UNDEF) { 2079 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); 2080 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); 2081 Idx = (CN->getSExtValue()); 2082 } 2083 2084 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2085 // Use $sp ($1) because it's always 16-byte aligned and it's available: 2086 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 2087 DAG.getRegister(SPU::R1, PtrVT), 2088 DAG.getConstant(Idx, PtrVT)); 2089 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); 2090 2091 SDValue result = 2092 DAG.getNode(SPUISD::SHUFB, dl, VT, 2093 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), 2094 VecOp, 2095 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask)); 2096 2097 return result; 2098} 2099 2100static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, 2101 const TargetLowering &TLI) 2102{ 2103 SDValue N0 = Op.getOperand(0); // Everything has at least one operand 2104 DebugLoc dl = Op.getDebugLoc(); 2105 EVT ShiftVT = TLI.getShiftAmountTy(); 2106 2107 assert(Op.getValueType() == MVT::i8); 2108 switch (Opc) { 2109 default: 2110 llvm_unreachable("Unhandled i8 math operator"); 2111 /*NOTREACHED*/ 2112 break; 2113 case ISD::ADD: { 2114 // 8-bit addition: Promote the arguments up to 16-bits and truncate 2115 // the result: 2116 SDValue N1 = Op.getOperand(1); 2117 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2118 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2119 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2120 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2121 2122 } 2123 2124 case ISD::SUB: { 2125 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate 2126 // the result: 2127 SDValue N1 = Op.getOperand(1); 2128 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2129 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2130 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2131 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2132 } 2133 case ISD::ROTR: 2134 case ISD::ROTL: { 2135 SDValue N1 = Op.getOperand(1); 2136 EVT N1VT = N1.getValueType(); 2137 2138 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2139 if (!N1VT.bitsEq(ShiftVT)) { 2140 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) 2141 ? ISD::ZERO_EXTEND 2142 : ISD::TRUNCATE; 2143 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2144 } 2145 2146 // Replicate lower 8-bits into upper 8: 2147 SDValue ExpandArg = 2148 DAG.getNode(ISD::OR, dl, MVT::i16, N0, 2149 DAG.getNode(ISD::SHL, dl, MVT::i16, 2150 N0, DAG.getConstant(8, MVT::i32))); 2151 2152 // Truncate back down to i8 2153 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2154 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); 2155 } 2156 case ISD::SRL: 2157 case ISD::SHL: { 2158 SDValue N1 = Op.getOperand(1); 2159 EVT N1VT = N1.getValueType(); 2160 2161 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2162 if (!N1VT.bitsEq(ShiftVT)) { 2163 unsigned N1Opc = ISD::ZERO_EXTEND; 2164 2165 if (N1.getValueType().bitsGT(ShiftVT)) 2166 N1Opc = ISD::TRUNCATE; 2167 2168 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2169 } 2170 2171 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2172 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2173 } 2174 case ISD::SRA: { 2175 SDValue N1 = Op.getOperand(1); 2176 EVT N1VT = N1.getValueType(); 2177 2178 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2179 if (!N1VT.bitsEq(ShiftVT)) { 2180 unsigned N1Opc = ISD::SIGN_EXTEND; 2181 2182 if (N1VT.bitsGT(ShiftVT)) 2183 N1Opc = ISD::TRUNCATE; 2184 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2185 } 2186 2187 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2188 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2189 } 2190 case ISD::MUL: { 2191 SDValue N1 = Op.getOperand(1); 2192 2193 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2194 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2195 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2196 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2197 break; 2198 } 2199 } 2200 2201 return SDValue(); 2202} 2203 2204//! Lower byte immediate operations for v16i8 vectors: 2205static SDValue 2206LowerByteImmed(SDValue Op, SelectionDAG &DAG) { 2207 SDValue ConstVec; 2208 SDValue Arg; 2209 EVT VT = Op.getValueType(); 2210 DebugLoc dl = Op.getDebugLoc(); 2211 2212 ConstVec = Op.getOperand(0); 2213 Arg = Op.getOperand(1); 2214 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { 2215 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { 2216 ConstVec = ConstVec.getOperand(0); 2217 } else { 2218 ConstVec = Op.getOperand(1); 2219 Arg = Op.getOperand(0); 2220 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { 2221 ConstVec = ConstVec.getOperand(0); 2222 } 2223 } 2224 } 2225 2226 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { 2227 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode()); 2228 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); 2229 2230 APInt APSplatBits, APSplatUndef; 2231 unsigned SplatBitSize; 2232 bool HasAnyUndefs; 2233 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); 2234 2235 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 2236 HasAnyUndefs, minSplatBits) 2237 && minSplatBits <= SplatBitSize) { 2238 uint64_t SplatBits = APSplatBits.getZExtValue(); 2239 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); 2240 2241 SmallVector<SDValue, 16> tcVec; 2242 tcVec.assign(16, tc); 2243 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, 2244 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); 2245 } 2246 } 2247 2248 // These operations (AND, OR, XOR) are legal, they just couldn't be custom 2249 // lowered. Return the operation, rather than a null SDValue. 2250 return Op; 2251} 2252 2253//! Custom lowering for CTPOP (count population) 2254/*! 2255 Custom lowering code that counts the number ones in the input 2256 operand. SPU has such an instruction, but it counts the number of 2257 ones per byte, which then have to be accumulated. 2258*/ 2259static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { 2260 EVT VT = Op.getValueType(); 2261 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 2262 VT, (128 / VT.getSizeInBits())); 2263 DebugLoc dl = Op.getDebugLoc(); 2264 2265 switch (VT.getSimpleVT().SimpleTy) { 2266 default: 2267 assert(false && "Invalid value type!"); 2268 case MVT::i8: { 2269 SDValue N = Op.getOperand(0); 2270 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2271 2272 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2273 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2274 2275 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); 2276 } 2277 2278 case MVT::i16: { 2279 MachineFunction &MF = DAG.getMachineFunction(); 2280 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2281 2282 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); 2283 2284 SDValue N = Op.getOperand(0); 2285 SDValue Elt0 = DAG.getConstant(0, MVT::i16); 2286 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); 2287 SDValue Shift1 = DAG.getConstant(8, MVT::i32); 2288 2289 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2290 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2291 2292 // CNTB_result becomes the chain to which all of the virtual registers 2293 // CNTB_reg, SUM1_reg become associated: 2294 SDValue CNTB_result = 2295 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); 2296 2297 SDValue CNTB_rescopy = 2298 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2299 2300 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); 2301 2302 return DAG.getNode(ISD::AND, dl, MVT::i16, 2303 DAG.getNode(ISD::ADD, dl, MVT::i16, 2304 DAG.getNode(ISD::SRL, dl, MVT::i16, 2305 Tmp1, Shift1), 2306 Tmp1), 2307 Mask0); 2308 } 2309 2310 case MVT::i32: { 2311 MachineFunction &MF = DAG.getMachineFunction(); 2312 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2313 2314 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2315 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2316 2317 SDValue N = Op.getOperand(0); 2318 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2319 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); 2320 SDValue Shift1 = DAG.getConstant(16, MVT::i32); 2321 SDValue Shift2 = DAG.getConstant(8, MVT::i32); 2322 2323 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2324 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2325 2326 // CNTB_result becomes the chain to which all of the virtual registers 2327 // CNTB_reg, SUM1_reg become associated: 2328 SDValue CNTB_result = 2329 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); 2330 2331 SDValue CNTB_rescopy = 2332 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2333 2334 SDValue Comp1 = 2335 DAG.getNode(ISD::SRL, dl, MVT::i32, 2336 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), 2337 Shift1); 2338 2339 SDValue Sum1 = 2340 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, 2341 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); 2342 2343 SDValue Sum1_rescopy = 2344 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); 2345 2346 SDValue Comp2 = 2347 DAG.getNode(ISD::SRL, dl, MVT::i32, 2348 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), 2349 Shift2); 2350 SDValue Sum2 = 2351 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, 2352 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); 2353 2354 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); 2355 } 2356 2357 case MVT::i64: 2358 break; 2359 } 2360 2361 return SDValue(); 2362} 2363 2364//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 2365/*! 2366 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. 2367 All conversions to i64 are expanded to a libcall. 2368 */ 2369static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 2370 const SPUTargetLowering &TLI) { 2371 EVT OpVT = Op.getValueType(); 2372 SDValue Op0 = Op.getOperand(0); 2373 EVT Op0VT = Op0.getValueType(); 2374 2375 if ((OpVT == MVT::i32 && Op0VT == MVT::f64) 2376 || OpVT == MVT::i64) { 2377 // Convert f32 / f64 to i32 / i64 via libcall. 2378 RTLIB::Libcall LC = 2379 (Op.getOpcode() == ISD::FP_TO_SINT) 2380 ? RTLIB::getFPTOSINT(Op0VT, OpVT) 2381 : RTLIB::getFPTOUINT(Op0VT, OpVT); 2382 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); 2383 SDValue Dummy; 2384 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2385 } 2386 2387 return Op; 2388} 2389 2390//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 2391/*! 2392 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. 2393 All conversions from i64 are expanded to a libcall. 2394 */ 2395static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, 2396 const SPUTargetLowering &TLI) { 2397 EVT OpVT = Op.getValueType(); 2398 SDValue Op0 = Op.getOperand(0); 2399 EVT Op0VT = Op0.getValueType(); 2400 2401 if ((OpVT == MVT::f64 && Op0VT == MVT::i32) 2402 || Op0VT == MVT::i64) { 2403 // Convert i32, i64 to f64 via libcall: 2404 RTLIB::Libcall LC = 2405 (Op.getOpcode() == ISD::SINT_TO_FP) 2406 ? RTLIB::getSINTTOFP(Op0VT, OpVT) 2407 : RTLIB::getUINTTOFP(Op0VT, OpVT); 2408 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); 2409 SDValue Dummy; 2410 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2411 } 2412 2413 return Op; 2414} 2415 2416//! Lower ISD::SETCC 2417/*! 2418 This handles MVT::f64 (double floating point) condition lowering 2419 */ 2420static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, 2421 const TargetLowering &TLI) { 2422 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2)); 2423 DebugLoc dl = Op.getDebugLoc(); 2424 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); 2425 2426 SDValue lhs = Op.getOperand(0); 2427 SDValue rhs = Op.getOperand(1); 2428 EVT lhsVT = lhs.getValueType(); 2429 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); 2430 2431 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); 2432 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2433 EVT IntVT(MVT::i64); 2434 2435 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently 2436 // selected to a NOP: 2437 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs); 2438 SDValue lhsHi32 = 2439 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2440 DAG.getNode(ISD::SRL, dl, IntVT, 2441 i64lhs, DAG.getConstant(32, MVT::i32))); 2442 SDValue lhsHi32abs = 2443 DAG.getNode(ISD::AND, dl, MVT::i32, 2444 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); 2445 SDValue lhsLo32 = 2446 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); 2447 2448 // SETO and SETUO only use the lhs operand: 2449 if (CC->get() == ISD::SETO) { 2450 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of 2451 // SETUO 2452 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2453 return DAG.getNode(ISD::XOR, dl, ccResultVT, 2454 DAG.getSetCC(dl, ccResultVT, 2455 lhs, DAG.getConstantFP(0.0, lhsVT), 2456 ISD::SETUO), 2457 DAG.getConstant(ccResultAllOnes, ccResultVT)); 2458 } else if (CC->get() == ISD::SETUO) { 2459 // Evaluates to true if Op0 is [SQ]NaN 2460 return DAG.getNode(ISD::AND, dl, ccResultVT, 2461 DAG.getSetCC(dl, ccResultVT, 2462 lhsHi32abs, 2463 DAG.getConstant(0x7ff00000, MVT::i32), 2464 ISD::SETGE), 2465 DAG.getSetCC(dl, ccResultVT, 2466 lhsLo32, 2467 DAG.getConstant(0, MVT::i32), 2468 ISD::SETGT)); 2469 } 2470 2471 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs); 2472 SDValue rhsHi32 = 2473 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2474 DAG.getNode(ISD::SRL, dl, IntVT, 2475 i64rhs, DAG.getConstant(32, MVT::i32))); 2476 2477 // If a value is negative, subtract from the sign magnitude constant: 2478 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); 2479 2480 // Convert the sign-magnitude representation into 2's complement: 2481 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2482 lhsHi32, DAG.getConstant(31, MVT::i32)); 2483 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); 2484 SDValue lhsSelect = 2485 DAG.getNode(ISD::SELECT, dl, IntVT, 2486 lhsSelectMask, lhsSignMag2TC, i64lhs); 2487 2488 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2489 rhsHi32, DAG.getConstant(31, MVT::i32)); 2490 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); 2491 SDValue rhsSelect = 2492 DAG.getNode(ISD::SELECT, dl, IntVT, 2493 rhsSelectMask, rhsSignMag2TC, i64rhs); 2494 2495 unsigned compareOp; 2496 2497 switch (CC->get()) { 2498 case ISD::SETOEQ: 2499 case ISD::SETUEQ: 2500 compareOp = ISD::SETEQ; break; 2501 case ISD::SETOGT: 2502 case ISD::SETUGT: 2503 compareOp = ISD::SETGT; break; 2504 case ISD::SETOGE: 2505 case ISD::SETUGE: 2506 compareOp = ISD::SETGE; break; 2507 case ISD::SETOLT: 2508 case ISD::SETULT: 2509 compareOp = ISD::SETLT; break; 2510 case ISD::SETOLE: 2511 case ISD::SETULE: 2512 compareOp = ISD::SETLE; break; 2513 case ISD::SETUNE: 2514 case ISD::SETONE: 2515 compareOp = ISD::SETNE; break; 2516 default: 2517 report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); 2518 } 2519 2520 SDValue result = 2521 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, 2522 (ISD::CondCode) compareOp); 2523 2524 if ((CC->get() & 0x8) == 0) { 2525 // Ordered comparison: 2526 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, 2527 lhs, DAG.getConstantFP(0.0, MVT::f64), 2528 ISD::SETO); 2529 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, 2530 rhs, DAG.getConstantFP(0.0, MVT::f64), 2531 ISD::SETO); 2532 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); 2533 2534 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); 2535 } 2536 2537 return result; 2538} 2539 2540//! Lower ISD::SELECT_CC 2541/*! 2542 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the 2543 SELB instruction. 2544 2545 \note Need to revisit this in the future: if the code path through the true 2546 and false value computations is longer than the latency of a branch (6 2547 cycles), then it would be more advantageous to branch and insert a new basic 2548 block and branch on the condition. However, this code does not make that 2549 assumption, given the simplisitc uses so far. 2550 */ 2551 2552static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, 2553 const TargetLowering &TLI) { 2554 EVT VT = Op.getValueType(); 2555 SDValue lhs = Op.getOperand(0); 2556 SDValue rhs = Op.getOperand(1); 2557 SDValue trueval = Op.getOperand(2); 2558 SDValue falseval = Op.getOperand(3); 2559 SDValue condition = Op.getOperand(4); 2560 DebugLoc dl = Op.getDebugLoc(); 2561 2562 // NOTE: SELB's arguments: $rA, $rB, $mask 2563 // 2564 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB 2565 // where bits in $mask are 1. CCond will be inverted, having 1s where the 2566 // condition was true and 0s where the condition was false. Hence, the 2567 // arguments to SELB get reversed. 2568 2569 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's 2570 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up 2571 // with another "cannot select select_cc" assert: 2572 2573 SDValue compare = DAG.getNode(ISD::SETCC, dl, 2574 TLI.getSetCCResultType(Op.getValueType()), 2575 lhs, rhs, condition); 2576 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); 2577} 2578 2579//! Custom lower ISD::TRUNCATE 2580static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) 2581{ 2582 // Type to truncate to 2583 EVT VT = Op.getValueType(); 2584 MVT simpleVT = VT.getSimpleVT(); 2585 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), 2586 VT, (128 / VT.getSizeInBits())); 2587 DebugLoc dl = Op.getDebugLoc(); 2588 2589 // Type to truncate from 2590 SDValue Op0 = Op.getOperand(0); 2591 EVT Op0VT = Op0.getValueType(); 2592 2593 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { 2594 // Create shuffle mask, least significant doubleword of quadword 2595 unsigned maskHigh = 0x08090a0b; 2596 unsigned maskLow = 0x0c0d0e0f; 2597 // Use a shuffle to perform the truncation 2598 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2599 DAG.getConstant(maskHigh, MVT::i32), 2600 DAG.getConstant(maskLow, MVT::i32), 2601 DAG.getConstant(maskHigh, MVT::i32), 2602 DAG.getConstant(maskLow, MVT::i32)); 2603 2604 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2605 Op0, Op0, shufMask); 2606 2607 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); 2608 } 2609 2610 return SDValue(); // Leave the truncate unmolested 2611} 2612 2613/*! 2614 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic 2615 * algorithm is to duplicate the sign bit using rotmai to generate at 2616 * least one byte full of sign bits. Then propagate the "sign-byte" into 2617 * the leftmost words and the i64/i32 into the rightmost words using shufb. 2618 * 2619 * @param Op The sext operand 2620 * @param DAG The current DAG 2621 * @return The SDValue with the entire instruction sequence 2622 */ 2623static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) 2624{ 2625 DebugLoc dl = Op.getDebugLoc(); 2626 2627 // Type to extend to 2628 MVT OpVT = Op.getValueType().getSimpleVT(); 2629 2630 // Type to extend from 2631 SDValue Op0 = Op.getOperand(0); 2632 MVT Op0VT = Op0.getValueType().getSimpleVT(); 2633 2634 // The type to extend to needs to be a i128 and 2635 // the type to extend from needs to be i64 or i32. 2636 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && 2637 "LowerSIGN_EXTEND: input and/or output operand have wrong size"); 2638 2639 // Create shuffle mask 2640 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 2641 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11 2642 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 2643 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2644 DAG.getConstant(mask1, MVT::i32), 2645 DAG.getConstant(mask1, MVT::i32), 2646 DAG.getConstant(mask2, MVT::i32), 2647 DAG.getConstant(mask3, MVT::i32)); 2648 2649 // Word wise arithmetic right shift to generate at least one byte 2650 // that contains sign bits. 2651 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; 2652 SDValue sraVal = DAG.getNode(ISD::SRA, 2653 dl, 2654 mvt, 2655 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), 2656 DAG.getConstant(31, MVT::i32)); 2657 2658 // Shuffle bytes - Copy the sign bits into the upper 64 bits 2659 // and the input value into the lower 64 bits. 2660 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, 2661 DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask); 2662 2663 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle); 2664} 2665 2666//! Custom (target-specific) lowering entry point 2667/*! 2668 This is where LLVM's DAG selection process calls to do target-specific 2669 lowering of nodes. 2670 */ 2671SDValue 2672SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 2673{ 2674 unsigned Opc = (unsigned) Op.getOpcode(); 2675 EVT VT = Op.getValueType(); 2676 2677 switch (Opc) { 2678 default: { 2679#ifndef NDEBUG 2680 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; 2681 errs() << "Op.getOpcode() = " << Opc << "\n"; 2682 errs() << "*Op.getNode():\n"; 2683 Op.getNode()->dump(); 2684#endif 2685 llvm_unreachable(0); 2686 } 2687 case ISD::LOAD: 2688 case ISD::EXTLOAD: 2689 case ISD::SEXTLOAD: 2690 case ISD::ZEXTLOAD: 2691 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); 2692 case ISD::STORE: 2693 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); 2694 case ISD::ConstantPool: 2695 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); 2696 case ISD::GlobalAddress: 2697 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); 2698 case ISD::JumpTable: 2699 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); 2700 case ISD::ConstantFP: 2701 return LowerConstantFP(Op, DAG); 2702 2703 // i8, i64 math ops: 2704 case ISD::ADD: 2705 case ISD::SUB: 2706 case ISD::ROTR: 2707 case ISD::ROTL: 2708 case ISD::SRL: 2709 case ISD::SHL: 2710 case ISD::SRA: { 2711 if (VT == MVT::i8) 2712 return LowerI8Math(Op, DAG, Opc, *this); 2713 break; 2714 } 2715 2716 case ISD::FP_TO_SINT: 2717 case ISD::FP_TO_UINT: 2718 return LowerFP_TO_INT(Op, DAG, *this); 2719 2720 case ISD::SINT_TO_FP: 2721 case ISD::UINT_TO_FP: 2722 return LowerINT_TO_FP(Op, DAG, *this); 2723 2724 // Vector-related lowering. 2725 case ISD::BUILD_VECTOR: 2726 return LowerBUILD_VECTOR(Op, DAG); 2727 case ISD::SCALAR_TO_VECTOR: 2728 return LowerSCALAR_TO_VECTOR(Op, DAG); 2729 case ISD::VECTOR_SHUFFLE: 2730 return LowerVECTOR_SHUFFLE(Op, DAG); 2731 case ISD::EXTRACT_VECTOR_ELT: 2732 return LowerEXTRACT_VECTOR_ELT(Op, DAG); 2733 case ISD::INSERT_VECTOR_ELT: 2734 return LowerINSERT_VECTOR_ELT(Op, DAG); 2735 2736 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: 2737 case ISD::AND: 2738 case ISD::OR: 2739 case ISD::XOR: 2740 return LowerByteImmed(Op, DAG); 2741 2742 // Vector and i8 multiply: 2743 case ISD::MUL: 2744 if (VT == MVT::i8) 2745 return LowerI8Math(Op, DAG, Opc, *this); 2746 2747 case ISD::CTPOP: 2748 return LowerCTPOP(Op, DAG); 2749 2750 case ISD::SELECT_CC: 2751 return LowerSELECT_CC(Op, DAG, *this); 2752 2753 case ISD::SETCC: 2754 return LowerSETCC(Op, DAG, *this); 2755 2756 case ISD::TRUNCATE: 2757 return LowerTRUNCATE(Op, DAG); 2758 2759 case ISD::SIGN_EXTEND: 2760 return LowerSIGN_EXTEND(Op, DAG); 2761 } 2762 2763 return SDValue(); 2764} 2765 2766void SPUTargetLowering::ReplaceNodeResults(SDNode *N, 2767 SmallVectorImpl<SDValue>&Results, 2768 SelectionDAG &DAG) const 2769{ 2770#if 0 2771 unsigned Opc = (unsigned) N->getOpcode(); 2772 EVT OpVT = N->getValueType(0); 2773 2774 switch (Opc) { 2775 default: { 2776 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; 2777 errs() << "Op.getOpcode() = " << Opc << "\n"; 2778 errs() << "*Op.getNode():\n"; 2779 N->dump(); 2780 abort(); 2781 /*NOTREACHED*/ 2782 } 2783 } 2784#endif 2785 2786 /* Otherwise, return unchanged */ 2787} 2788 2789//===----------------------------------------------------------------------===// 2790// Target Optimization Hooks 2791//===----------------------------------------------------------------------===// 2792 2793SDValue 2794SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const 2795{ 2796#if 0 2797 TargetMachine &TM = getTargetMachine(); 2798#endif 2799 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 2800 SelectionDAG &DAG = DCI.DAG; 2801 SDValue Op0 = N->getOperand(0); // everything has at least one operand 2802 EVT NodeVT = N->getValueType(0); // The node's value type 2803 EVT Op0VT = Op0.getValueType(); // The first operand's result 2804 SDValue Result; // Initially, empty result 2805 DebugLoc dl = N->getDebugLoc(); 2806 2807 switch (N->getOpcode()) { 2808 default: break; 2809 case ISD::ADD: { 2810 SDValue Op1 = N->getOperand(1); 2811 2812 if (Op0.getOpcode() == SPUISD::IndirectAddr 2813 || Op1.getOpcode() == SPUISD::IndirectAddr) { 2814 // Normalize the operands to reduce repeated code 2815 SDValue IndirectArg = Op0, AddArg = Op1; 2816 2817 if (Op1.getOpcode() == SPUISD::IndirectAddr) { 2818 IndirectArg = Op1; 2819 AddArg = Op0; 2820 } 2821 2822 if (isa<ConstantSDNode>(AddArg)) { 2823 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg); 2824 SDValue IndOp1 = IndirectArg.getOperand(1); 2825 2826 if (CN0->isNullValue()) { 2827 // (add (SPUindirect <arg>, <arg>), 0) -> 2828 // (SPUindirect <arg>, <arg>) 2829 2830#if !defined(NDEBUG) 2831 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2832 errs() << "\n" 2833 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n" 2834 << "With: (SPUindirect <arg>, <arg>)\n"; 2835 } 2836#endif 2837 2838 return IndirectArg; 2839 } else if (isa<ConstantSDNode>(IndOp1)) { 2840 // (add (SPUindirect <arg>, <const>), <const>) -> 2841 // (SPUindirect <arg>, <const + const>) 2842 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1); 2843 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); 2844 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); 2845 2846#if !defined(NDEBUG) 2847 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2848 errs() << "\n" 2849 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue() 2850 << "), " << CN0->getSExtValue() << ")\n" 2851 << "With: (SPUindirect <arg>, " 2852 << combinedConst << ")\n"; 2853 } 2854#endif 2855 2856 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 2857 IndirectArg, combinedValue); 2858 } 2859 } 2860 } 2861 break; 2862 } 2863 case ISD::SIGN_EXTEND: 2864 case ISD::ZERO_EXTEND: 2865 case ISD::ANY_EXTEND: { 2866 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { 2867 // (any_extend (SPUextract_elt0 <arg>)) -> 2868 // (SPUextract_elt0 <arg>) 2869 // Types must match, however... 2870#if !defined(NDEBUG) 2871 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2872 errs() << "\nReplace: "; 2873 N->dump(&DAG); 2874 errs() << "\nWith: "; 2875 Op0.getNode()->dump(&DAG); 2876 errs() << "\n"; 2877 } 2878#endif 2879 2880 return Op0; 2881 } 2882 break; 2883 } 2884 case SPUISD::IndirectAddr: { 2885 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { 2886 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2887 if (CN != 0 && CN->isNullValue()) { 2888 // (SPUindirect (SPUaform <addr>, 0), 0) -> 2889 // (SPUaform <addr>, 0) 2890 2891 DEBUG(errs() << "Replace: "); 2892 DEBUG(N->dump(&DAG)); 2893 DEBUG(errs() << "\nWith: "); 2894 DEBUG(Op0.getNode()->dump(&DAG)); 2895 DEBUG(errs() << "\n"); 2896 2897 return Op0; 2898 } 2899 } else if (Op0.getOpcode() == ISD::ADD) { 2900 SDValue Op1 = N->getOperand(1); 2901 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) { 2902 // (SPUindirect (add <arg>, <arg>), 0) -> 2903 // (SPUindirect <arg>, <arg>) 2904 if (CN1->isNullValue()) { 2905 2906#if !defined(NDEBUG) 2907 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2908 errs() << "\n" 2909 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n" 2910 << "With: (SPUindirect <arg>, <arg>)\n"; 2911 } 2912#endif 2913 2914 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 2915 Op0.getOperand(0), Op0.getOperand(1)); 2916 } 2917 } 2918 } 2919 break; 2920 } 2921 case SPUISD::SHLQUAD_L_BITS: 2922 case SPUISD::SHLQUAD_L_BYTES: 2923 case SPUISD::ROTBYTES_LEFT: { 2924 SDValue Op1 = N->getOperand(1); 2925 2926 // Kill degenerate vector shifts: 2927 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) { 2928 if (CN->isNullValue()) { 2929 Result = Op0; 2930 } 2931 } 2932 break; 2933 } 2934 case SPUISD::PREFSLOT2VEC: { 2935 switch (Op0.getOpcode()) { 2936 default: 2937 break; 2938 case ISD::ANY_EXTEND: 2939 case ISD::ZERO_EXTEND: 2940 case ISD::SIGN_EXTEND: { 2941 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) -> 2942 // <arg> 2943 // but only if the SPUprefslot2vec and <arg> types match. 2944 SDValue Op00 = Op0.getOperand(0); 2945 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { 2946 SDValue Op000 = Op00.getOperand(0); 2947 if (Op000.getValueType() == NodeVT) { 2948 Result = Op000; 2949 } 2950 } 2951 break; 2952 } 2953 case SPUISD::VEC2PREFSLOT: { 2954 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) -> 2955 // <arg> 2956 Result = Op0.getOperand(0); 2957 break; 2958 } 2959 } 2960 break; 2961 } 2962 } 2963 2964 // Otherwise, return unchanged. 2965#ifndef NDEBUG 2966 if (Result.getNode()) { 2967 DEBUG(errs() << "\nReplace.SPU: "); 2968 DEBUG(N->dump(&DAG)); 2969 DEBUG(errs() << "\nWith: "); 2970 DEBUG(Result.getNode()->dump(&DAG)); 2971 DEBUG(errs() << "\n"); 2972 } 2973#endif 2974 2975 return Result; 2976} 2977 2978//===----------------------------------------------------------------------===// 2979// Inline Assembly Support 2980//===----------------------------------------------------------------------===// 2981 2982/// getConstraintType - Given a constraint letter, return the type of 2983/// constraint it is for this target. 2984SPUTargetLowering::ConstraintType 2985SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { 2986 if (ConstraintLetter.size() == 1) { 2987 switch (ConstraintLetter[0]) { 2988 default: break; 2989 case 'b': 2990 case 'r': 2991 case 'f': 2992 case 'v': 2993 case 'y': 2994 return C_RegisterClass; 2995 } 2996 } 2997 return TargetLowering::getConstraintType(ConstraintLetter); 2998} 2999 3000std::pair<unsigned, const TargetRegisterClass*> 3001SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 3002 EVT VT) const 3003{ 3004 if (Constraint.size() == 1) { 3005 // GCC RS6000 Constraint Letters 3006 switch (Constraint[0]) { 3007 case 'b': // R1-R31 3008 case 'r': // R0-R31 3009 if (VT == MVT::i64) 3010 return std::make_pair(0U, SPU::R64CRegisterClass); 3011 return std::make_pair(0U, SPU::R32CRegisterClass); 3012 case 'f': 3013 if (VT == MVT::f32) 3014 return std::make_pair(0U, SPU::R32FPRegisterClass); 3015 else if (VT == MVT::f64) 3016 return std::make_pair(0U, SPU::R64FPRegisterClass); 3017 break; 3018 case 'v': 3019 return std::make_pair(0U, SPU::GPRCRegisterClass); 3020 } 3021 } 3022 3023 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 3024} 3025 3026//! Compute used/known bits for a SPU operand 3027void 3028SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 3029 const APInt &Mask, 3030 APInt &KnownZero, 3031 APInt &KnownOne, 3032 const SelectionDAG &DAG, 3033 unsigned Depth ) const { 3034#if 0 3035 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; 3036 3037 switch (Op.getOpcode()) { 3038 default: 3039 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 3040 break; 3041 case CALL: 3042 case SHUFB: 3043 case SHUFFLE_MASK: 3044 case CNTB: 3045 case SPUISD::PREFSLOT2VEC: 3046 case SPUISD::LDRESULT: 3047 case SPUISD::VEC2PREFSLOT: 3048 case SPUISD::SHLQUAD_L_BITS: 3049 case SPUISD::SHLQUAD_L_BYTES: 3050 case SPUISD::VEC_ROTL: 3051 case SPUISD::VEC_ROTR: 3052 case SPUISD::ROTBYTES_LEFT: 3053 case SPUISD::SELECT_MASK: 3054 case SPUISD::SELB: 3055 } 3056#endif 3057} 3058 3059unsigned 3060SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, 3061 unsigned Depth) const { 3062 switch (Op.getOpcode()) { 3063 default: 3064 return 1; 3065 3066 case ISD::SETCC: { 3067 EVT VT = Op.getValueType(); 3068 3069 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { 3070 VT = MVT::i32; 3071 } 3072 return VT.getSizeInBits(); 3073 } 3074 } 3075} 3076 3077// LowerAsmOperandForConstraint 3078void 3079SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 3080 char ConstraintLetter, 3081 std::vector<SDValue> &Ops, 3082 SelectionDAG &DAG) const { 3083 // Default, for the time being, to the base class handler 3084 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG); 3085} 3086 3087/// isLegalAddressImmediate - Return true if the integer value can be used 3088/// as the offset of the target addressing mode. 3089bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, 3090 const Type *Ty) const { 3091 // SPU's addresses are 256K: 3092 return (V > -(1 << 18) && V < (1 << 18) - 1); 3093} 3094 3095bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 3096 return false; 3097} 3098 3099bool 3100SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 3101 // The SPU target isn't yet aware of offsets. 3102 return false; 3103} 3104