SPUISelLowering.cpp revision 5427d71be8e907f1ef06f97b55b2431e703ef266
1// 2//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SPUTargetLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "SPURegisterNames.h" 15#include "SPUISelLowering.h" 16#include "SPUTargetMachine.h" 17#include "SPUFrameInfo.h" 18#include "llvm/ADT/APInt.h" 19#include "llvm/ADT/VectorExtras.h" 20#include "llvm/CallingConv.h" 21#include "llvm/CodeGen/CallingConvLower.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineInstrBuilder.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/SelectionDAG.h" 27#include "llvm/Constants.h" 28#include "llvm/Function.h" 29#include "llvm/Intrinsics.h" 30#include "llvm/Support/Debug.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/Support/raw_ostream.h" 34#include "llvm/Target/TargetOptions.h" 35 36#include <map> 37 38using namespace llvm; 39 40// Used in getTargetNodeName() below 41namespace { 42 std::map<unsigned, const char *> node_names; 43 44 //! MVT mapping to useful data for Cell SPU 45 struct valtype_map_s { 46 const MVT valtype; 47 const int prefslot_byte; 48 }; 49 50 const valtype_map_s valtype_map[] = { 51 { MVT::i1, 3 }, 52 { MVT::i8, 3 }, 53 { MVT::i16, 2 }, 54 { MVT::i32, 0 }, 55 { MVT::f32, 0 }, 56 { MVT::i64, 0 }, 57 { MVT::f64, 0 }, 58 { MVT::i128, 0 } 59 }; 60 61 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); 62 63 const valtype_map_s *getValueTypeMapEntry(MVT VT) { 64 const valtype_map_s *retval = 0; 65 66 for (size_t i = 0; i < n_valtype_map; ++i) { 67 if (valtype_map[i].valtype == VT) { 68 retval = valtype_map + i; 69 break; 70 } 71 } 72 73#ifndef NDEBUG 74 if (retval == 0) { 75 std::string msg; 76 raw_string_ostream Msg(msg); 77 Msg << "getValueTypeMapEntry returns NULL for " 78 << VT.getMVTString(); 79 llvm_report_error(Msg.str()); 80 } 81#endif 82 83 return retval; 84 } 85 86 //! Expand a library call into an actual call DAG node 87 /*! 88 \note 89 This code is taken from SelectionDAGLegalize, since it is not exposed as 90 part of the LLVM SelectionDAG API. 91 */ 92 93 SDValue 94 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, 95 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) { 96 // The input chain to this libcall is the entry node of the function. 97 // Legalizing the call will automatically add the previous call to the 98 // dependence. 99 SDValue InChain = DAG.getEntryNode(); 100 101 TargetLowering::ArgListTy Args; 102 TargetLowering::ArgListEntry Entry; 103 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { 104 MVT ArgVT = Op.getOperand(i).getValueType(); 105 const Type *ArgTy = ArgVT.getTypeForMVT(*DAG.getContext()); 106 Entry.Node = Op.getOperand(i); 107 Entry.Ty = ArgTy; 108 Entry.isSExt = isSigned; 109 Entry.isZExt = !isSigned; 110 Args.push_back(Entry); 111 } 112 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), 113 TLI.getPointerTy()); 114 115 // Splice the libcall in wherever FindInputOutputChains tells us to. 116 const Type *RetTy = 117 Op.getNode()->getValueType(0).getTypeForMVT(*DAG.getContext()); 118 std::pair<SDValue, SDValue> CallInfo = 119 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 120 0, CallingConv::C, false, Callee, Args, DAG, 121 Op.getDebugLoc()); 122 123 return CallInfo.first; 124 } 125} 126 127SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) 128 : TargetLowering(TM), 129 SPUTM(TM) 130{ 131 // Fold away setcc operations if possible. 132 setPow2DivIsCheap(); 133 134 // Use _setjmp/_longjmp instead of setjmp/longjmp. 135 setUseUnderscoreSetJmp(true); 136 setUseUnderscoreLongJmp(true); 137 138 // Set RTLIB libcall names as used by SPU: 139 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); 140 141 // Set up the SPU's register classes: 142 addRegisterClass(MVT::i8, SPU::R8CRegisterClass); 143 addRegisterClass(MVT::i16, SPU::R16CRegisterClass); 144 addRegisterClass(MVT::i32, SPU::R32CRegisterClass); 145 addRegisterClass(MVT::i64, SPU::R64CRegisterClass); 146 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); 147 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); 148 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); 149 150 // SPU has no sign or zero extended loads for i1, i8, i16: 151 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); 152 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 153 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 154 155 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 156 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); 157 158 setTruncStoreAction(MVT::i128, MVT::i64, Expand); 159 setTruncStoreAction(MVT::i128, MVT::i32, Expand); 160 setTruncStoreAction(MVT::i128, MVT::i16, Expand); 161 setTruncStoreAction(MVT::i128, MVT::i8, Expand); 162 163 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 164 165 // SPU constant load actions are custom lowered: 166 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 167 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 168 169 // SPU's loads and stores have to be custom lowered: 170 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; 171 ++sctype) { 172 MVT VT = (MVT::SimpleValueType)sctype; 173 174 setOperationAction(ISD::LOAD, VT, Custom); 175 setOperationAction(ISD::STORE, VT, Custom); 176 setLoadExtAction(ISD::EXTLOAD, VT, Custom); 177 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); 178 setLoadExtAction(ISD::SEXTLOAD, VT, Custom); 179 180 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { 181 MVT StoreVT = (MVT::SimpleValueType) stype; 182 setTruncStoreAction(VT, StoreVT, Expand); 183 } 184 } 185 186 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; 187 ++sctype) { 188 MVT VT = (MVT::SimpleValueType) sctype; 189 190 setOperationAction(ISD::LOAD, VT, Custom); 191 setOperationAction(ISD::STORE, VT, Custom); 192 193 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { 194 MVT StoreVT = (MVT::SimpleValueType) stype; 195 setTruncStoreAction(VT, StoreVT, Expand); 196 } 197 } 198 199 // Expand the jumptable branches 200 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 201 setOperationAction(ISD::BR_CC, MVT::Other, Expand); 202 203 // Custom lower SELECT_CC for most cases, but expand by default 204 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 205 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); 206 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); 207 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 208 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); 209 210 // SPU has no intrinsics for these particular operations: 211 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 212 213 // SPU has no division/remainder instructions 214 setOperationAction(ISD::SREM, MVT::i8, Expand); 215 setOperationAction(ISD::UREM, MVT::i8, Expand); 216 setOperationAction(ISD::SDIV, MVT::i8, Expand); 217 setOperationAction(ISD::UDIV, MVT::i8, Expand); 218 setOperationAction(ISD::SDIVREM, MVT::i8, Expand); 219 setOperationAction(ISD::UDIVREM, MVT::i8, Expand); 220 setOperationAction(ISD::SREM, MVT::i16, Expand); 221 setOperationAction(ISD::UREM, MVT::i16, Expand); 222 setOperationAction(ISD::SDIV, MVT::i16, Expand); 223 setOperationAction(ISD::UDIV, MVT::i16, Expand); 224 setOperationAction(ISD::SDIVREM, MVT::i16, Expand); 225 setOperationAction(ISD::UDIVREM, MVT::i16, Expand); 226 setOperationAction(ISD::SREM, MVT::i32, Expand); 227 setOperationAction(ISD::UREM, MVT::i32, Expand); 228 setOperationAction(ISD::SDIV, MVT::i32, Expand); 229 setOperationAction(ISD::UDIV, MVT::i32, Expand); 230 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 231 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 232 setOperationAction(ISD::SREM, MVT::i64, Expand); 233 setOperationAction(ISD::UREM, MVT::i64, Expand); 234 setOperationAction(ISD::SDIV, MVT::i64, Expand); 235 setOperationAction(ISD::UDIV, MVT::i64, Expand); 236 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 237 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 238 setOperationAction(ISD::SREM, MVT::i128, Expand); 239 setOperationAction(ISD::UREM, MVT::i128, Expand); 240 setOperationAction(ISD::SDIV, MVT::i128, Expand); 241 setOperationAction(ISD::UDIV, MVT::i128, Expand); 242 setOperationAction(ISD::SDIVREM, MVT::i128, Expand); 243 setOperationAction(ISD::UDIVREM, MVT::i128, Expand); 244 245 // We don't support sin/cos/sqrt/fmod 246 setOperationAction(ISD::FSIN , MVT::f64, Expand); 247 setOperationAction(ISD::FCOS , MVT::f64, Expand); 248 setOperationAction(ISD::FREM , MVT::f64, Expand); 249 setOperationAction(ISD::FSIN , MVT::f32, Expand); 250 setOperationAction(ISD::FCOS , MVT::f32, Expand); 251 setOperationAction(ISD::FREM , MVT::f32, Expand); 252 253 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt 254 // for f32!) 255 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 256 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 257 258 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 259 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 260 261 // SPU can do rotate right and left, so legalize it... but customize for i8 262 // because instructions don't exist. 263 264 // FIXME: Change from "expand" to appropriate type once ROTR is supported in 265 // .td files. 266 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); 267 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); 268 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); 269 270 setOperationAction(ISD::ROTL, MVT::i32, Legal); 271 setOperationAction(ISD::ROTL, MVT::i16, Legal); 272 setOperationAction(ISD::ROTL, MVT::i8, Custom); 273 274 // SPU has no native version of shift left/right for i8 275 setOperationAction(ISD::SHL, MVT::i8, Custom); 276 setOperationAction(ISD::SRL, MVT::i8, Custom); 277 setOperationAction(ISD::SRA, MVT::i8, Custom); 278 279 // Make these operations legal and handle them during instruction selection: 280 setOperationAction(ISD::SHL, MVT::i64, Legal); 281 setOperationAction(ISD::SRL, MVT::i64, Legal); 282 setOperationAction(ISD::SRA, MVT::i64, Legal); 283 284 // Custom lower i8, i32 and i64 multiplications 285 setOperationAction(ISD::MUL, MVT::i8, Custom); 286 setOperationAction(ISD::MUL, MVT::i32, Legal); 287 setOperationAction(ISD::MUL, MVT::i64, Legal); 288 289 // Expand double-width multiplication 290 // FIXME: It would probably be reasonable to support some of these operations 291 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); 292 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); 293 setOperationAction(ISD::MULHU, MVT::i8, Expand); 294 setOperationAction(ISD::MULHS, MVT::i8, Expand); 295 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); 296 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); 297 setOperationAction(ISD::MULHU, MVT::i16, Expand); 298 setOperationAction(ISD::MULHS, MVT::i16, Expand); 299 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 300 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 301 setOperationAction(ISD::MULHU, MVT::i32, Expand); 302 setOperationAction(ISD::MULHS, MVT::i32, Expand); 303 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 304 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 305 setOperationAction(ISD::MULHU, MVT::i64, Expand); 306 setOperationAction(ISD::MULHS, MVT::i64, Expand); 307 308 // Need to custom handle (some) common i8, i64 math ops 309 setOperationAction(ISD::ADD, MVT::i8, Custom); 310 setOperationAction(ISD::ADD, MVT::i64, Legal); 311 setOperationAction(ISD::SUB, MVT::i8, Custom); 312 setOperationAction(ISD::SUB, MVT::i64, Legal); 313 314 // SPU does not have BSWAP. It does have i32 support CTLZ. 315 // CTPOP has to be custom lowered. 316 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 317 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 318 319 setOperationAction(ISD::CTPOP, MVT::i8, Custom); 320 setOperationAction(ISD::CTPOP, MVT::i16, Custom); 321 setOperationAction(ISD::CTPOP, MVT::i32, Custom); 322 setOperationAction(ISD::CTPOP, MVT::i64, Custom); 323 setOperationAction(ISD::CTPOP, MVT::i128, Expand); 324 325 setOperationAction(ISD::CTTZ , MVT::i8, Expand); 326 setOperationAction(ISD::CTTZ , MVT::i16, Expand); 327 setOperationAction(ISD::CTTZ , MVT::i32, Expand); 328 setOperationAction(ISD::CTTZ , MVT::i64, Expand); 329 setOperationAction(ISD::CTTZ , MVT::i128, Expand); 330 331 setOperationAction(ISD::CTLZ , MVT::i8, Promote); 332 setOperationAction(ISD::CTLZ , MVT::i16, Promote); 333 setOperationAction(ISD::CTLZ , MVT::i32, Legal); 334 setOperationAction(ISD::CTLZ , MVT::i64, Expand); 335 setOperationAction(ISD::CTLZ , MVT::i128, Expand); 336 337 // SPU has a version of select that implements (a&~c)|(b&c), just like 338 // select ought to work: 339 setOperationAction(ISD::SELECT, MVT::i8, Legal); 340 setOperationAction(ISD::SELECT, MVT::i16, Legal); 341 setOperationAction(ISD::SELECT, MVT::i32, Legal); 342 setOperationAction(ISD::SELECT, MVT::i64, Legal); 343 344 setOperationAction(ISD::SETCC, MVT::i8, Legal); 345 setOperationAction(ISD::SETCC, MVT::i16, Legal); 346 setOperationAction(ISD::SETCC, MVT::i32, Legal); 347 setOperationAction(ISD::SETCC, MVT::i64, Legal); 348 setOperationAction(ISD::SETCC, MVT::f64, Custom); 349 350 // Custom lower i128 -> i64 truncates 351 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); 352 353 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); 354 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); 355 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); 356 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); 357 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need 358 // to expand to a libcall, hence the custom lowering: 359 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 360 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 361 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); 362 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 363 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); 364 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); 365 366 // FDIV on SPU requires custom lowering 367 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall 368 369 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: 370 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 371 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); 372 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); 373 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 374 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); 375 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); 376 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 377 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 378 379 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal); 380 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal); 381 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal); 382 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal); 383 384 // We cannot sextinreg(i1). Expand to shifts. 385 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 386 387 // Support label based line numbers. 388 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); 389 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 390 391 // We want to legalize GlobalAddress and ConstantPool nodes into the 392 // appropriate instructions to materialize the address. 393 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; 394 ++sctype) { 395 MVT VT = (MVT::SimpleValueType)sctype; 396 397 setOperationAction(ISD::GlobalAddress, VT, Custom); 398 setOperationAction(ISD::ConstantPool, VT, Custom); 399 setOperationAction(ISD::JumpTable, VT, Custom); 400 } 401 402 // RET must be custom lowered, to meet ABI requirements 403 setOperationAction(ISD::RET, MVT::Other, Custom); 404 405 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 406 setOperationAction(ISD::VASTART , MVT::Other, Custom); 407 408 // Use the default implementation. 409 setOperationAction(ISD::VAARG , MVT::Other, Expand); 410 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 411 setOperationAction(ISD::VAEND , MVT::Other, Expand); 412 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 413 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 414 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 415 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); 416 417 // Cell SPU has instructions for converting between i64 and fp. 418 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 419 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 420 421 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 422 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 423 424 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 425 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 426 427 // First set operation action for all vector types to expand. Then we 428 // will selectively turn on ones that can be effectively codegen'd. 429 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); 430 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); 431 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); 432 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); 433 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); 434 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); 435 436 // "Odd size" vector classes that we're willing to support: 437 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); 438 439 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 440 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 441 MVT VT = (MVT::SimpleValueType)i; 442 443 // add/sub are legal for all supported vector VT's. 444 setOperationAction(ISD::ADD, VT, Legal); 445 setOperationAction(ISD::SUB, VT, Legal); 446 // mul has to be custom lowered. 447 setOperationAction(ISD::MUL, VT, Legal); 448 449 setOperationAction(ISD::AND, VT, Legal); 450 setOperationAction(ISD::OR, VT, Legal); 451 setOperationAction(ISD::XOR, VT, Legal); 452 setOperationAction(ISD::LOAD, VT, Legal); 453 setOperationAction(ISD::SELECT, VT, Legal); 454 setOperationAction(ISD::STORE, VT, Legal); 455 456 // These operations need to be expanded: 457 setOperationAction(ISD::SDIV, VT, Expand); 458 setOperationAction(ISD::SREM, VT, Expand); 459 setOperationAction(ISD::UDIV, VT, Expand); 460 setOperationAction(ISD::UREM, VT, Expand); 461 462 // Custom lower build_vector, constant pool spills, insert and 463 // extract vector elements: 464 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 465 setOperationAction(ISD::ConstantPool, VT, Custom); 466 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 467 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 468 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 469 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 470 } 471 472 setOperationAction(ISD::AND, MVT::v16i8, Custom); 473 setOperationAction(ISD::OR, MVT::v16i8, Custom); 474 setOperationAction(ISD::XOR, MVT::v16i8, Custom); 475 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 476 477 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 478 479 setShiftAmountType(MVT::i32); 480 setBooleanContents(ZeroOrNegativeOneBooleanContent); 481 482 setStackPointerRegisterToSaveRestore(SPU::R1); 483 484 // We have target-specific dag combine patterns for the following nodes: 485 setTargetDAGCombine(ISD::ADD); 486 setTargetDAGCombine(ISD::ZERO_EXTEND); 487 setTargetDAGCombine(ISD::SIGN_EXTEND); 488 setTargetDAGCombine(ISD::ANY_EXTEND); 489 490 computeRegisterProperties(); 491 492 // Set pre-RA register scheduler default to BURR, which produces slightly 493 // better code than the default (could also be TDRR, but TargetLowering.h 494 // needs a mod to support that model): 495 setSchedulingPreference(SchedulingForRegPressure); 496} 497 498const char * 499SPUTargetLowering::getTargetNodeName(unsigned Opcode) const 500{ 501 if (node_names.empty()) { 502 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; 503 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; 504 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; 505 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; 506 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; 507 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; 508 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; 509 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; 510 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; 511 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; 512 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; 513 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; 514 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; 515 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; 516 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; 517 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; 518 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; 519 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; 520 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; 521 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; 522 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; 523 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = 524 "SPUISD::ROTBYTES_LEFT_BITS"; 525 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; 526 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; 527 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; 528 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; 529 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; 530 } 531 532 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); 533 534 return ((i != node_names.end()) ? i->second : 0); 535} 536 537/// getFunctionAlignment - Return the Log2 alignment of this function. 538unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { 539 return 3; 540} 541 542//===----------------------------------------------------------------------===// 543// Return the Cell SPU's SETCC result type 544//===----------------------------------------------------------------------===// 545 546MVT SPUTargetLowering::getSetCCResultType(MVT VT) const { 547 // i16 and i32 are valid SETCC result types 548 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32); 549} 550 551//===----------------------------------------------------------------------===// 552// Calling convention code: 553//===----------------------------------------------------------------------===// 554 555#include "SPUGenCallingConv.inc" 556 557//===----------------------------------------------------------------------===// 558// LowerOperation implementation 559//===----------------------------------------------------------------------===// 560 561/// Custom lower loads for CellSPU 562/*! 563 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements 564 within a 16-byte block, we have to rotate to extract the requested element. 565 566 For extending loads, we also want to ensure that the following sequence is 567 emitted, e.g. for MVT::f32 extending load to MVT::f64: 568 569\verbatim 570%1 v16i8,ch = load 571%2 v16i8,ch = rotate %1 572%3 v4f8, ch = bitconvert %2 573%4 f32 = vec2perfslot %3 574%5 f64 = fp_extend %4 575\endverbatim 576*/ 577static SDValue 578LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 579 LoadSDNode *LN = cast<LoadSDNode>(Op); 580 SDValue the_chain = LN->getChain(); 581 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 582 MVT InVT = LN->getMemoryVT(); 583 MVT OutVT = Op.getValueType(); 584 ISD::LoadExtType ExtType = LN->getExtensionType(); 585 unsigned alignment = LN->getAlignment(); 586 const valtype_map_s *vtm = getValueTypeMapEntry(InVT); 587 DebugLoc dl = Op.getDebugLoc(); 588 589 switch (LN->getAddressingMode()) { 590 case ISD::UNINDEXED: { 591 SDValue result; 592 SDValue basePtr = LN->getBasePtr(); 593 SDValue rotate; 594 595 if (alignment == 16) { 596 ConstantSDNode *CN; 597 598 // Special cases for a known aligned load to simplify the base pointer 599 // and the rotation amount: 600 if (basePtr.getOpcode() == ISD::ADD 601 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { 602 // Known offset into basePtr 603 int64_t offset = CN->getSExtValue(); 604 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); 605 606 if (rotamt < 0) 607 rotamt += 16; 608 609 rotate = DAG.getConstant(rotamt, MVT::i16); 610 611 // Simplify the base pointer for this case: 612 basePtr = basePtr.getOperand(0); 613 if ((offset & ~0xf) > 0) { 614 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 615 basePtr, 616 DAG.getConstant((offset & ~0xf), PtrVT)); 617 } 618 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) 619 || (basePtr.getOpcode() == SPUISD::IndirectAddr 620 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi 621 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { 622 // Plain aligned a-form address: rotate into preferred slot 623 // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) 624 int64_t rotamt = -vtm->prefslot_byte; 625 if (rotamt < 0) 626 rotamt += 16; 627 rotate = DAG.getConstant(rotamt, MVT::i16); 628 } else { 629 // Offset the rotate amount by the basePtr and the preferred slot 630 // byte offset 631 int64_t rotamt = -vtm->prefslot_byte; 632 if (rotamt < 0) 633 rotamt += 16; 634 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 635 basePtr, 636 DAG.getConstant(rotamt, PtrVT)); 637 } 638 } else { 639 // Unaligned load: must be more pessimistic about addressing modes: 640 if (basePtr.getOpcode() == ISD::ADD) { 641 MachineFunction &MF = DAG.getMachineFunction(); 642 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 643 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 644 SDValue Flag; 645 646 SDValue Op0 = basePtr.getOperand(0); 647 SDValue Op1 = basePtr.getOperand(1); 648 649 if (isa<ConstantSDNode>(Op1)) { 650 // Convert the (add <ptr>, <const>) to an indirect address contained 651 // in a register. Note that this is done because we need to avoid 652 // creating a 0(reg) d-form address due to the SPU's block loads. 653 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 654 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 655 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 656 } else { 657 // Convert the (add <arg1>, <arg2>) to an indirect address, which 658 // will likely be lowered as a reg(reg) x-form address. 659 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 660 } 661 } else { 662 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 663 basePtr, 664 DAG.getConstant(0, PtrVT)); 665 } 666 667 // Offset the rotate amount by the basePtr and the preferred slot 668 // byte offset 669 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 670 basePtr, 671 DAG.getConstant(-vtm->prefslot_byte, PtrVT)); 672 } 673 674 // Re-emit as a v16i8 vector load 675 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, 676 LN->getSrcValue(), LN->getSrcValueOffset(), 677 LN->isVolatile(), 16); 678 679 // Update the chain 680 the_chain = result.getValue(1); 681 682 // Rotate into the preferred slot: 683 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8, 684 result.getValue(0), rotate); 685 686 // Convert the loaded v16i8 vector to the appropriate vector type 687 // specified by the operand: 688 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits())); 689 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, 690 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); 691 692 // Handle extending loads by extending the scalar result: 693 if (ExtType == ISD::SEXTLOAD) { 694 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); 695 } else if (ExtType == ISD::ZEXTLOAD) { 696 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); 697 } else if (ExtType == ISD::EXTLOAD) { 698 unsigned NewOpc = ISD::ANY_EXTEND; 699 700 if (OutVT.isFloatingPoint()) 701 NewOpc = ISD::FP_EXTEND; 702 703 result = DAG.getNode(NewOpc, dl, OutVT, result); 704 } 705 706 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); 707 SDValue retops[2] = { 708 result, 709 the_chain 710 }; 711 712 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, 713 retops, sizeof(retops) / sizeof(retops[0])); 714 return result; 715 } 716 case ISD::PRE_INC: 717 case ISD::PRE_DEC: 718 case ISD::POST_INC: 719 case ISD::POST_DEC: 720 case ISD::LAST_INDEXED_MODE: 721 { 722 std::string msg; 723 raw_string_ostream Msg(msg); 724 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " 725 "UNINDEXED\n"; 726 Msg << (unsigned) LN->getAddressingMode(); 727 llvm_report_error(Msg.str()); 728 /*NOTREACHED*/ 729 } 730 } 731 732 return SDValue(); 733} 734 735/// Custom lower stores for CellSPU 736/*! 737 All CellSPU stores are aligned to 16-byte boundaries, so for elements 738 within a 16-byte block, we have to generate a shuffle to insert the 739 requested element into its place, then store the resulting block. 740 */ 741static SDValue 742LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 743 StoreSDNode *SN = cast<StoreSDNode>(Op); 744 SDValue Value = SN->getValue(); 745 MVT VT = Value.getValueType(); 746 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); 747 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 748 DebugLoc dl = Op.getDebugLoc(); 749 unsigned alignment = SN->getAlignment(); 750 751 switch (SN->getAddressingMode()) { 752 case ISD::UNINDEXED: { 753 // The vector type we really want to load from the 16-byte chunk. 754 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())), 755 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits())); 756 757 SDValue alignLoadVec; 758 SDValue basePtr = SN->getBasePtr(); 759 SDValue the_chain = SN->getChain(); 760 SDValue insertEltOffs; 761 762 if (alignment == 16) { 763 ConstantSDNode *CN; 764 765 // Special cases for a known aligned load to simplify the base pointer 766 // and insertion byte: 767 if (basePtr.getOpcode() == ISD::ADD 768 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { 769 // Known offset into basePtr 770 int64_t offset = CN->getSExtValue(); 771 772 // Simplify the base pointer for this case: 773 basePtr = basePtr.getOperand(0); 774 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 775 basePtr, 776 DAG.getConstant((offset & 0xf), PtrVT)); 777 778 if ((offset & ~0xf) > 0) { 779 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 780 basePtr, 781 DAG.getConstant((offset & ~0xf), PtrVT)); 782 } 783 } else { 784 // Otherwise, assume it's at byte 0 of basePtr 785 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 786 basePtr, 787 DAG.getConstant(0, PtrVT)); 788 } 789 } else { 790 // Unaligned load: must be more pessimistic about addressing modes: 791 if (basePtr.getOpcode() == ISD::ADD) { 792 MachineFunction &MF = DAG.getMachineFunction(); 793 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 794 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 795 SDValue Flag; 796 797 SDValue Op0 = basePtr.getOperand(0); 798 SDValue Op1 = basePtr.getOperand(1); 799 800 if (isa<ConstantSDNode>(Op1)) { 801 // Convert the (add <ptr>, <const>) to an indirect address contained 802 // in a register. Note that this is done because we need to avoid 803 // creating a 0(reg) d-form address due to the SPU's block loads. 804 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 805 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 806 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 807 } else { 808 // Convert the (add <arg1>, <arg2>) to an indirect address, which 809 // will likely be lowered as a reg(reg) x-form address. 810 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 811 } 812 } else { 813 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 814 basePtr, 815 DAG.getConstant(0, PtrVT)); 816 } 817 818 // Insertion point is solely determined by basePtr's contents 819 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, 820 basePtr, 821 DAG.getConstant(0, PtrVT)); 822 } 823 824 // Re-emit as a v16i8 vector load 825 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, 826 SN->getSrcValue(), SN->getSrcValueOffset(), 827 SN->isVolatile(), 16); 828 829 // Update the chain 830 the_chain = alignLoadVec.getValue(1); 831 832 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec); 833 SDValue theValue = SN->getValue(); 834 SDValue result; 835 836 if (StVT != VT 837 && (theValue.getOpcode() == ISD::AssertZext 838 || theValue.getOpcode() == ISD::AssertSext)) { 839 // Drill down and get the value for zero- and sign-extended 840 // quantities 841 theValue = theValue.getOperand(0); 842 } 843 844 // If the base pointer is already a D-form address, then just create 845 // a new D-form address with a slot offset and the orignal base pointer. 846 // Otherwise generate a D-form address with the slot offset relative 847 // to the stack pointer, which is always aligned. 848#if !defined(NDEBUG) 849 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 850 cerr << "CellSPU LowerSTORE: basePtr = "; 851 basePtr.getNode()->dump(&DAG); 852 cerr << "\n"; 853 } 854#endif 855 856 SDValue insertEltOp = 857 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); 858 SDValue vectorizeOp = 859 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); 860 861 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, 862 vectorizeOp, alignLoadVec, 863 DAG.getNode(ISD::BIT_CONVERT, dl, 864 MVT::v4i32, insertEltOp)); 865 866 result = DAG.getStore(the_chain, dl, result, basePtr, 867 LN->getSrcValue(), LN->getSrcValueOffset(), 868 LN->isVolatile(), LN->getAlignment()); 869 870#if 0 && !defined(NDEBUG) 871 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 872 const SDValue ¤tRoot = DAG.getRoot(); 873 874 DAG.setRoot(result); 875 cerr << "------- CellSPU:LowerStore result:\n"; 876 DAG.dump(); 877 cerr << "-------\n"; 878 DAG.setRoot(currentRoot); 879 } 880#endif 881 882 return result; 883 /*UNREACHED*/ 884 } 885 case ISD::PRE_INC: 886 case ISD::PRE_DEC: 887 case ISD::POST_INC: 888 case ISD::POST_DEC: 889 case ISD::LAST_INDEXED_MODE: 890 { 891 std::string msg; 892 raw_string_ostream Msg(msg); 893 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " 894 "UNINDEXED\n"; 895 Msg << (unsigned) SN->getAddressingMode(); 896 llvm_report_error(Msg.str()); 897 /*NOTREACHED*/ 898 } 899 } 900 901 return SDValue(); 902} 903 904//! Generate the address of a constant pool entry. 905SDValue 906LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 907 MVT PtrVT = Op.getValueType(); 908 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 909 Constant *C = CP->getConstVal(); 910 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 911 SDValue Zero = DAG.getConstant(0, PtrVT); 912 const TargetMachine &TM = DAG.getTarget(); 913 // FIXME there is no actual debug info here 914 DebugLoc dl = Op.getDebugLoc(); 915 916 if (TM.getRelocationModel() == Reloc::Static) { 917 if (!ST->usingLargeMem()) { 918 // Just return the SDValue with the constant pool address in it. 919 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); 920 } else { 921 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); 922 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); 923 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 924 } 925 } 926 927 llvm_unreachable("LowerConstantPool: Relocation model other than static" 928 " not supported."); 929 return SDValue(); 930} 931 932//! Alternate entry point for generating the address of a constant pool entry 933SDValue 934SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { 935 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); 936} 937 938static SDValue 939LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 940 MVT PtrVT = Op.getValueType(); 941 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 942 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 943 SDValue Zero = DAG.getConstant(0, PtrVT); 944 const TargetMachine &TM = DAG.getTarget(); 945 // FIXME there is no actual debug info here 946 DebugLoc dl = Op.getDebugLoc(); 947 948 if (TM.getRelocationModel() == Reloc::Static) { 949 if (!ST->usingLargeMem()) { 950 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); 951 } else { 952 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); 953 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); 954 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 955 } 956 } 957 958 llvm_unreachable("LowerJumpTable: Relocation model other than static" 959 " not supported."); 960 return SDValue(); 961} 962 963static SDValue 964LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 965 MVT PtrVT = Op.getValueType(); 966 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 967 GlobalValue *GV = GSDN->getGlobal(); 968 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); 969 const TargetMachine &TM = DAG.getTarget(); 970 SDValue Zero = DAG.getConstant(0, PtrVT); 971 // FIXME there is no actual debug info here 972 DebugLoc dl = Op.getDebugLoc(); 973 974 if (TM.getRelocationModel() == Reloc::Static) { 975 if (!ST->usingLargeMem()) { 976 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); 977 } else { 978 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); 979 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); 980 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 981 } 982 } else { 983 llvm_report_error("LowerGlobalAddress: Relocation model other than static" 984 "not supported."); 985 /*NOTREACHED*/ 986 } 987 988 return SDValue(); 989} 990 991//! Custom lower double precision floating point constants 992static SDValue 993LowerConstantFP(SDValue Op, SelectionDAG &DAG) { 994 MVT VT = Op.getValueType(); 995 // FIXME there is no actual debug info here 996 DebugLoc dl = Op.getDebugLoc(); 997 998 if (VT == MVT::f64) { 999 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode()); 1000 1001 assert((FP != 0) && 1002 "LowerConstantFP: Node is not ConstantFPSDNode"); 1003 1004 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); 1005 SDValue T = DAG.getConstant(dbits, MVT::i64); 1006 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); 1007 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 1008 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec)); 1009 } 1010 1011 return SDValue(); 1012} 1013 1014static SDValue 1015LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) 1016{ 1017 MachineFunction &MF = DAG.getMachineFunction(); 1018 MachineFrameInfo *MFI = MF.getFrameInfo(); 1019 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1020 SmallVector<SDValue, 48> ArgValues; 1021 SDValue Root = Op.getOperand(0); 1022 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; 1023 DebugLoc dl = Op.getDebugLoc(); 1024 1025 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); 1026 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); 1027 1028 unsigned ArgOffset = SPUFrameInfo::minStackSize(); 1029 unsigned ArgRegIdx = 0; 1030 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); 1031 1032 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1033 1034 // Add DAG nodes to load the arguments or copy them out of registers. 1035 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1; 1036 ArgNo != e; ++ArgNo) { 1037 MVT ObjectVT = Op.getValue(ArgNo).getValueType(); 1038 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1039 SDValue ArgVal; 1040 1041 if (ArgRegIdx < NumArgRegs) { 1042 const TargetRegisterClass *ArgRegClass; 1043 1044 switch (ObjectVT.getSimpleVT()) { 1045 default: { 1046 std::string msg; 1047 raw_string_ostream Msg(msg); 1048 Msg << "LowerFORMAL_ARGUMENTS Unhandled argument type: " 1049 << ObjectVT.getMVTString(); 1050 llvm_report_error(Msg.str()); 1051 } 1052 case MVT::i8: 1053 ArgRegClass = &SPU::R8CRegClass; 1054 break; 1055 case MVT::i16: 1056 ArgRegClass = &SPU::R16CRegClass; 1057 break; 1058 case MVT::i32: 1059 ArgRegClass = &SPU::R32CRegClass; 1060 break; 1061 case MVT::i64: 1062 ArgRegClass = &SPU::R64CRegClass; 1063 break; 1064 case MVT::i128: 1065 ArgRegClass = &SPU::GPRCRegClass; 1066 break; 1067 case MVT::f32: 1068 ArgRegClass = &SPU::R32FPRegClass; 1069 break; 1070 case MVT::f64: 1071 ArgRegClass = &SPU::R64FPRegClass; 1072 break; 1073 case MVT::v2f64: 1074 case MVT::v4f32: 1075 case MVT::v2i64: 1076 case MVT::v4i32: 1077 case MVT::v8i16: 1078 case MVT::v16i8: 1079 ArgRegClass = &SPU::VECREGRegClass; 1080 break; 1081 } 1082 1083 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); 1084 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); 1085 ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT); 1086 ++ArgRegIdx; 1087 } else { 1088 // We need to load the argument to a virtual register if we determined 1089 // above that we ran out of physical registers of the appropriate type 1090 // or we're forced to do vararg 1091 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1092 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1093 ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0); 1094 ArgOffset += StackSlotSize; 1095 } 1096 1097 ArgValues.push_back(ArgVal); 1098 // Update the chain 1099 Root = ArgVal.getOperand(0); 1100 } 1101 1102 // vararg handling: 1103 if (isVarArg) { 1104 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8; 1105 // We will spill (79-3)+1 registers to the stack 1106 SmallVector<SDValue, 79-3+1> MemOps; 1107 1108 // Create the frame slot 1109 1110 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { 1111 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset); 1112 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1113 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); 1114 SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0); 1115 Root = Store.getOperand(0); 1116 MemOps.push_back(Store); 1117 1118 // Increment address by stack slot size for the next stored argument 1119 ArgOffset += StackSlotSize; 1120 } 1121 if (!MemOps.empty()) 1122 Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1123 &MemOps[0], MemOps.size()); 1124 } 1125 1126 ArgValues.push_back(Root); 1127 1128 // Return the new list of results. 1129 return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(), 1130 &ArgValues[0], ArgValues.size()); 1131} 1132 1133/// isLSAAddress - Return the immediate to use if the specified 1134/// value is representable as a LSA address. 1135static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { 1136 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1137 if (!C) return 0; 1138 1139 int Addr = C->getZExtValue(); 1140 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1141 (Addr << 14 >> 14) != Addr) 1142 return 0; // Top 14 bits have to be sext of immediate. 1143 1144 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); 1145} 1146 1147static SDValue 1148LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 1149 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); 1150 SDValue Chain = TheCall->getChain(); 1151 SDValue Callee = TheCall->getCallee(); 1152 unsigned NumOps = TheCall->getNumArgs(); 1153 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); 1154 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); 1155 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); 1156 DebugLoc dl = TheCall->getDebugLoc(); 1157 1158 // Handy pointer type 1159 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1160 1161 // Accumulate how many bytes are to be pushed on the stack, including the 1162 // linkage area, and parameter passing area. According to the SPU ABI, 1163 // we minimally need space for [LR] and [SP] 1164 unsigned NumStackBytes = SPUFrameInfo::minStackSize(); 1165 1166 // Set up a copy of the stack pointer for use loading and storing any 1167 // arguments that may not fit in the registers available for argument 1168 // passing. 1169 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); 1170 1171 // Figure out which arguments are going to go in registers, and which in 1172 // memory. 1173 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] 1174 unsigned ArgRegIdx = 0; 1175 1176 // Keep track of registers passing arguments 1177 std::vector<std::pair<unsigned, SDValue> > RegsToPass; 1178 // And the arguments passed on the stack 1179 SmallVector<SDValue, 8> MemOpChains; 1180 1181 for (unsigned i = 0; i != NumOps; ++i) { 1182 SDValue Arg = TheCall->getArg(i); 1183 1184 // PtrOff will be used to store the current argument to the stack if a 1185 // register cannot be found for it. 1186 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 1187 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 1188 1189 switch (Arg.getValueType().getSimpleVT()) { 1190 default: llvm_unreachable("Unexpected ValueType for argument!"); 1191 case MVT::i8: 1192 case MVT::i16: 1193 case MVT::i32: 1194 case MVT::i64: 1195 case MVT::i128: 1196 if (ArgRegIdx != NumArgRegs) { 1197 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); 1198 } else { 1199 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); 1200 ArgOffset += StackSlotSize; 1201 } 1202 break; 1203 case MVT::f32: 1204 case MVT::f64: 1205 if (ArgRegIdx != NumArgRegs) { 1206 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); 1207 } else { 1208 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); 1209 ArgOffset += StackSlotSize; 1210 } 1211 break; 1212 case MVT::v2i64: 1213 case MVT::v2f64: 1214 case MVT::v4f32: 1215 case MVT::v4i32: 1216 case MVT::v8i16: 1217 case MVT::v16i8: 1218 if (ArgRegIdx != NumArgRegs) { 1219 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); 1220 } else { 1221 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); 1222 ArgOffset += StackSlotSize; 1223 } 1224 break; 1225 } 1226 } 1227 1228 // Update number of stack bytes actually used, insert a call sequence start 1229 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize()); 1230 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, 1231 true)); 1232 1233 if (!MemOpChains.empty()) { 1234 // Adjust the stack pointer for the stack arguments. 1235 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1236 &MemOpChains[0], MemOpChains.size()); 1237 } 1238 1239 // Build a sequence of copy-to-reg nodes chained together with token chain 1240 // and flag operands which copy the outgoing args into the appropriate regs. 1241 SDValue InFlag; 1242 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1243 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1244 RegsToPass[i].second, InFlag); 1245 InFlag = Chain.getValue(1); 1246 } 1247 1248 SmallVector<SDValue, 8> Ops; 1249 unsigned CallOpc = SPUISD::CALL; 1250 1251 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1252 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1253 // node so that legalize doesn't hack it. 1254 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1255 GlobalValue *GV = G->getGlobal(); 1256 MVT CalleeVT = Callee.getValueType(); 1257 SDValue Zero = DAG.getConstant(0, PtrVT); 1258 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); 1259 1260 if (!ST->usingLargeMem()) { 1261 // Turn calls to targets that are defined (i.e., have bodies) into BRSL 1262 // style calls, otherwise, external symbols are BRASL calls. This assumes 1263 // that declared/defined symbols are in the same compilation unit and can 1264 // be reached through PC-relative jumps. 1265 // 1266 // NOTE: 1267 // This may be an unsafe assumption for JIT and really large compilation 1268 // units. 1269 if (GV->isDeclaration()) { 1270 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); 1271 } else { 1272 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); 1273 } 1274 } else { 1275 // "Large memory" mode: Turn all calls into indirect calls with a X-form 1276 // address pairs: 1277 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); 1278 } 1279 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1280 MVT CalleeVT = Callee.getValueType(); 1281 SDValue Zero = DAG.getConstant(0, PtrVT); 1282 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), 1283 Callee.getValueType()); 1284 1285 if (!ST->usingLargeMem()) { 1286 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); 1287 } else { 1288 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); 1289 } 1290 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { 1291 // If this is an absolute destination address that appears to be a legal 1292 // local store address, use the munged value. 1293 Callee = SDValue(Dest, 0); 1294 } 1295 1296 Ops.push_back(Chain); 1297 Ops.push_back(Callee); 1298 1299 // Add argument registers to the end of the list so that they are known live 1300 // into the call. 1301 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1302 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1303 RegsToPass[i].second.getValueType())); 1304 1305 if (InFlag.getNode()) 1306 Ops.push_back(InFlag); 1307 // Returns a chain and a flag for retval copy to use. 1308 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), 1309 &Ops[0], Ops.size()); 1310 InFlag = Chain.getValue(1); 1311 1312 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), 1313 DAG.getIntPtrConstant(0, true), InFlag); 1314 if (TheCall->getValueType(0) != MVT::Other) 1315 InFlag = Chain.getValue(1); 1316 1317 SDValue ResultVals[3]; 1318 unsigned NumResults = 0; 1319 1320 // If the call has results, copy the values out of the ret val registers. 1321 switch (TheCall->getValueType(0).getSimpleVT()) { 1322 default: llvm_unreachable("Unexpected ret value!"); 1323 case MVT::Other: break; 1324 case MVT::i32: 1325 if (TheCall->getValueType(1) == MVT::i32) { 1326 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, 1327 MVT::i32, InFlag).getValue(1); 1328 ResultVals[0] = Chain.getValue(0); 1329 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, 1330 Chain.getValue(2)).getValue(1); 1331 ResultVals[1] = Chain.getValue(0); 1332 NumResults = 2; 1333 } else { 1334 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, 1335 InFlag).getValue(1); 1336 ResultVals[0] = Chain.getValue(0); 1337 NumResults = 1; 1338 } 1339 break; 1340 case MVT::i64: 1341 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64, 1342 InFlag).getValue(1); 1343 ResultVals[0] = Chain.getValue(0); 1344 NumResults = 1; 1345 break; 1346 case MVT::i128: 1347 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128, 1348 InFlag).getValue(1); 1349 ResultVals[0] = Chain.getValue(0); 1350 NumResults = 1; 1351 break; 1352 case MVT::f32: 1353 case MVT::f64: 1354 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0), 1355 InFlag).getValue(1); 1356 ResultVals[0] = Chain.getValue(0); 1357 NumResults = 1; 1358 break; 1359 case MVT::v2f64: 1360 case MVT::v2i64: 1361 case MVT::v4f32: 1362 case MVT::v4i32: 1363 case MVT::v8i16: 1364 case MVT::v16i8: 1365 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0), 1366 InFlag).getValue(1); 1367 ResultVals[0] = Chain.getValue(0); 1368 NumResults = 1; 1369 break; 1370 } 1371 1372 // If the function returns void, just return the chain. 1373 if (NumResults == 0) 1374 return Chain; 1375 1376 // Otherwise, merge everything together with a MERGE_VALUES node. 1377 ResultVals[NumResults++] = Chain; 1378 SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl); 1379 return Res.getValue(Op.getResNo()); 1380} 1381 1382static SDValue 1383LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { 1384 SmallVector<CCValAssign, 16> RVLocs; 1385 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 1386 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 1387 DebugLoc dl = Op.getDebugLoc(); 1388 CCState CCInfo(CC, isVarArg, TM, RVLocs, DAG.getContext()); 1389 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU); 1390 1391 // If this is the first return lowered for this function, add the regs to the 1392 // liveout set for the function. 1393 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1394 for (unsigned i = 0; i != RVLocs.size(); ++i) 1395 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1396 } 1397 1398 SDValue Chain = Op.getOperand(0); 1399 SDValue Flag; 1400 1401 // Copy the result values into the output registers. 1402 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1403 CCValAssign &VA = RVLocs[i]; 1404 assert(VA.isRegLoc() && "Can only return in registers!"); 1405 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1406 Op.getOperand(i*2+1), Flag); 1407 Flag = Chain.getValue(1); 1408 } 1409 1410 if (Flag.getNode()) 1411 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1412 else 1413 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); 1414} 1415 1416 1417//===----------------------------------------------------------------------===// 1418// Vector related lowering: 1419//===----------------------------------------------------------------------===// 1420 1421static ConstantSDNode * 1422getVecImm(SDNode *N) { 1423 SDValue OpVal(0, 0); 1424 1425 // Check to see if this buildvec has a single non-undef value in its elements. 1426 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1427 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1428 if (OpVal.getNode() == 0) 1429 OpVal = N->getOperand(i); 1430 else if (OpVal != N->getOperand(i)) 1431 return 0; 1432 } 1433 1434 if (OpVal.getNode() != 0) { 1435 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1436 return CN; 1437 } 1438 } 1439 1440 return 0; 1441} 1442 1443/// get_vec_i18imm - Test if this vector is a vector filled with the same value 1444/// and the value fits into an unsigned 18-bit constant, and if so, return the 1445/// constant 1446SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, 1447 MVT ValueType) { 1448 if (ConstantSDNode *CN = getVecImm(N)) { 1449 uint64_t Value = CN->getZExtValue(); 1450 if (ValueType == MVT::i64) { 1451 uint64_t UValue = CN->getZExtValue(); 1452 uint32_t upper = uint32_t(UValue >> 32); 1453 uint32_t lower = uint32_t(UValue); 1454 if (upper != lower) 1455 return SDValue(); 1456 Value = Value >> 32; 1457 } 1458 if (Value <= 0x3ffff) 1459 return DAG.getTargetConstant(Value, ValueType); 1460 } 1461 1462 return SDValue(); 1463} 1464 1465/// get_vec_i16imm - Test if this vector is a vector filled with the same value 1466/// and the value fits into a signed 16-bit constant, and if so, return the 1467/// constant 1468SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, 1469 MVT ValueType) { 1470 if (ConstantSDNode *CN = getVecImm(N)) { 1471 int64_t Value = CN->getSExtValue(); 1472 if (ValueType == MVT::i64) { 1473 uint64_t UValue = CN->getZExtValue(); 1474 uint32_t upper = uint32_t(UValue >> 32); 1475 uint32_t lower = uint32_t(UValue); 1476 if (upper != lower) 1477 return SDValue(); 1478 Value = Value >> 32; 1479 } 1480 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { 1481 return DAG.getTargetConstant(Value, ValueType); 1482 } 1483 } 1484 1485 return SDValue(); 1486} 1487 1488/// get_vec_i10imm - Test if this vector is a vector filled with the same value 1489/// and the value fits into a signed 10-bit constant, and if so, return the 1490/// constant 1491SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, 1492 MVT ValueType) { 1493 if (ConstantSDNode *CN = getVecImm(N)) { 1494 int64_t Value = CN->getSExtValue(); 1495 if (ValueType == MVT::i64) { 1496 uint64_t UValue = CN->getZExtValue(); 1497 uint32_t upper = uint32_t(UValue >> 32); 1498 uint32_t lower = uint32_t(UValue); 1499 if (upper != lower) 1500 return SDValue(); 1501 Value = Value >> 32; 1502 } 1503 if (isS10Constant(Value)) 1504 return DAG.getTargetConstant(Value, ValueType); 1505 } 1506 1507 return SDValue(); 1508} 1509 1510/// get_vec_i8imm - Test if this vector is a vector filled with the same value 1511/// and the value fits into a signed 8-bit constant, and if so, return the 1512/// constant. 1513/// 1514/// @note: The incoming vector is v16i8 because that's the only way we can load 1515/// constant vectors. Thus, we test to see if the upper and lower bytes are the 1516/// same value. 1517SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, 1518 MVT ValueType) { 1519 if (ConstantSDNode *CN = getVecImm(N)) { 1520 int Value = (int) CN->getZExtValue(); 1521 if (ValueType == MVT::i16 1522 && Value <= 0xffff /* truncated from uint64_t */ 1523 && ((short) Value >> 8) == ((short) Value & 0xff)) 1524 return DAG.getTargetConstant(Value & 0xff, ValueType); 1525 else if (ValueType == MVT::i8 1526 && (Value & 0xff) == Value) 1527 return DAG.getTargetConstant(Value, ValueType); 1528 } 1529 1530 return SDValue(); 1531} 1532 1533/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value 1534/// and the value fits into a signed 16-bit constant, and if so, return the 1535/// constant 1536SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, 1537 MVT ValueType) { 1538 if (ConstantSDNode *CN = getVecImm(N)) { 1539 uint64_t Value = CN->getZExtValue(); 1540 if ((ValueType == MVT::i32 1541 && ((unsigned) Value & 0xffff0000) == (unsigned) Value) 1542 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) 1543 return DAG.getTargetConstant(Value >> 16, ValueType); 1544 } 1545 1546 return SDValue(); 1547} 1548 1549/// get_v4i32_imm - Catch-all for general 32-bit constant vectors 1550SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { 1551 if (ConstantSDNode *CN = getVecImm(N)) { 1552 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); 1553 } 1554 1555 return SDValue(); 1556} 1557 1558/// get_v4i32_imm - Catch-all for general 64-bit constant vectors 1559SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { 1560 if (ConstantSDNode *CN = getVecImm(N)) { 1561 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); 1562 } 1563 1564 return SDValue(); 1565} 1566 1567//! Lower a BUILD_VECTOR instruction creatively: 1568SDValue 1569LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 1570 MVT VT = Op.getValueType(); 1571 MVT EltVT = VT.getVectorElementType(); 1572 DebugLoc dl = Op.getDebugLoc(); 1573 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 1574 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); 1575 unsigned minSplatBits = EltVT.getSizeInBits(); 1576 1577 if (minSplatBits < 16) 1578 minSplatBits = 16; 1579 1580 APInt APSplatBits, APSplatUndef; 1581 unsigned SplatBitSize; 1582 bool HasAnyUndefs; 1583 1584 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 1585 HasAnyUndefs, minSplatBits) 1586 || minSplatBits < SplatBitSize) 1587 return SDValue(); // Wasn't a constant vector or splat exceeded min 1588 1589 uint64_t SplatBits = APSplatBits.getZExtValue(); 1590 1591 switch (VT.getSimpleVT()) { 1592 default: { 1593 std::string msg; 1594 raw_string_ostream Msg(msg); 1595 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " 1596 << VT.getMVTString(); 1597 llvm_report_error(Msg.str()); 1598 /*NOTREACHED*/ 1599 } 1600 case MVT::v4f32: { 1601 uint32_t Value32 = uint32_t(SplatBits); 1602 assert(SplatBitSize == 32 1603 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); 1604 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1605 SDValue T = DAG.getConstant(Value32, MVT::i32); 1606 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, 1607 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); 1608 break; 1609 } 1610 case MVT::v2f64: { 1611 uint64_t f64val = uint64_t(SplatBits); 1612 assert(SplatBitSize == 64 1613 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); 1614 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1615 SDValue T = DAG.getConstant(f64val, MVT::i64); 1616 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, 1617 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); 1618 break; 1619 } 1620 case MVT::v16i8: { 1621 // 8-bit constants have to be expanded to 16-bits 1622 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; 1623 SmallVector<SDValue, 8> Ops; 1624 1625 Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); 1626 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 1627 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); 1628 } 1629 case MVT::v8i16: { 1630 unsigned short Value16 = SplatBits; 1631 SDValue T = DAG.getConstant(Value16, EltVT); 1632 SmallVector<SDValue, 8> Ops; 1633 1634 Ops.assign(8, T); 1635 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); 1636 } 1637 case MVT::v4i32: { 1638 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); 1639 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); 1640 } 1641 case MVT::v2i32: { 1642 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); 1643 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); 1644 } 1645 case MVT::v2i64: { 1646 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); 1647 } 1648 } 1649 1650 return SDValue(); 1651} 1652 1653/*! 1654 */ 1655SDValue 1656SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, 1657 DebugLoc dl) { 1658 uint32_t upper = uint32_t(SplatVal >> 32); 1659 uint32_t lower = uint32_t(SplatVal); 1660 1661 if (upper == lower) { 1662 // Magic constant that can be matched by IL, ILA, et. al. 1663 SDValue Val = DAG.getTargetConstant(upper, MVT::i32); 1664 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, 1665 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1666 Val, Val, Val, Val)); 1667 } else { 1668 bool upper_special, lower_special; 1669 1670 // NOTE: This code creates common-case shuffle masks that can be easily 1671 // detected as common expressions. It is not attempting to create highly 1672 // specialized masks to replace any and all 0's, 0xff's and 0x80's. 1673 1674 // Detect if the upper or lower half is a special shuffle mask pattern: 1675 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); 1676 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); 1677 1678 // Both upper and lower are special, lower to a constant pool load: 1679 if (lower_special && upper_special) { 1680 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); 1681 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, 1682 SplatValCN, SplatValCN); 1683 } 1684 1685 SDValue LO32; 1686 SDValue HI32; 1687 SmallVector<SDValue, 16> ShufBytes; 1688 SDValue Result; 1689 1690 // Create lower vector if not a special pattern 1691 if (!lower_special) { 1692 SDValue LO32C = DAG.getConstant(lower, MVT::i32); 1693 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, 1694 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1695 LO32C, LO32C, LO32C, LO32C)); 1696 } 1697 1698 // Create upper vector if not a special pattern 1699 if (!upper_special) { 1700 SDValue HI32C = DAG.getConstant(upper, MVT::i32); 1701 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, 1702 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1703 HI32C, HI32C, HI32C, HI32C)); 1704 } 1705 1706 // If either upper or lower are special, then the two input operands are 1707 // the same (basically, one of them is a "don't care") 1708 if (lower_special) 1709 LO32 = HI32; 1710 if (upper_special) 1711 HI32 = LO32; 1712 1713 for (int i = 0; i < 4; ++i) { 1714 uint64_t val = 0; 1715 for (int j = 0; j < 4; ++j) { 1716 SDValue V; 1717 bool process_upper, process_lower; 1718 val <<= 8; 1719 process_upper = (upper_special && (i & 1) == 0); 1720 process_lower = (lower_special && (i & 1) == 1); 1721 1722 if (process_upper || process_lower) { 1723 if ((process_upper && upper == 0) 1724 || (process_lower && lower == 0)) 1725 val |= 0x80; 1726 else if ((process_upper && upper == 0xffffffff) 1727 || (process_lower && lower == 0xffffffff)) 1728 val |= 0xc0; 1729 else if ((process_upper && upper == 0x80000000) 1730 || (process_lower && lower == 0x80000000)) 1731 val |= (j == 0 ? 0xe0 : 0x80); 1732 } else 1733 val |= i * 4 + j + ((i & 1) * 16); 1734 } 1735 1736 ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); 1737 } 1738 1739 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, 1740 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1741 &ShufBytes[0], ShufBytes.size())); 1742 } 1743} 1744 1745/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on 1746/// which the Cell can operate. The code inspects V3 to ascertain whether the 1747/// permutation vector, V3, is monotonically increasing with one "exception" 1748/// element, e.g., (0, 1, _, 3). If this is the case, then generate a 1749/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. 1750/// In either case, the net result is going to eventually invoke SHUFB to 1751/// permute/shuffle the bytes from V1 and V2. 1752/// \note 1753/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate 1754/// control word for byte/halfword/word insertion. This takes care of a single 1755/// element move from V2 into V1. 1756/// \note 1757/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. 1758static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 1759 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); 1760 SDValue V1 = Op.getOperand(0); 1761 SDValue V2 = Op.getOperand(1); 1762 DebugLoc dl = Op.getDebugLoc(); 1763 1764 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 1765 1766 // If we have a single element being moved from V1 to V2, this can be handled 1767 // using the C*[DX] compute mask instructions, but the vector elements have 1768 // to be monotonically increasing with one exception element. 1769 MVT VecVT = V1.getValueType(); 1770 MVT EltVT = VecVT.getVectorElementType(); 1771 unsigned EltsFromV2 = 0; 1772 unsigned V2Elt = 0; 1773 unsigned V2EltIdx0 = 0; 1774 unsigned CurrElt = 0; 1775 unsigned MaxElts = VecVT.getVectorNumElements(); 1776 unsigned PrevElt = 0; 1777 unsigned V0Elt = 0; 1778 bool monotonic = true; 1779 bool rotate = true; 1780 1781 if (EltVT == MVT::i8) { 1782 V2EltIdx0 = 16; 1783 } else if (EltVT == MVT::i16) { 1784 V2EltIdx0 = 8; 1785 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { 1786 V2EltIdx0 = 4; 1787 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { 1788 V2EltIdx0 = 2; 1789 } else 1790 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); 1791 1792 for (unsigned i = 0; i != MaxElts; ++i) { 1793 if (SVN->getMaskElt(i) < 0) 1794 continue; 1795 1796 unsigned SrcElt = SVN->getMaskElt(i); 1797 1798 if (monotonic) { 1799 if (SrcElt >= V2EltIdx0) { 1800 if (1 >= (++EltsFromV2)) { 1801 V2Elt = (V2EltIdx0 - SrcElt) << 2; 1802 } 1803 } else if (CurrElt != SrcElt) { 1804 monotonic = false; 1805 } 1806 1807 ++CurrElt; 1808 } 1809 1810 if (rotate) { 1811 if (PrevElt > 0 && SrcElt < MaxElts) { 1812 if ((PrevElt == SrcElt - 1) 1813 || (PrevElt == MaxElts - 1 && SrcElt == 0)) { 1814 PrevElt = SrcElt; 1815 if (SrcElt == 0) 1816 V0Elt = i; 1817 } else { 1818 rotate = false; 1819 } 1820 } else if (PrevElt == 0) { 1821 // First time through, need to keep track of previous element 1822 PrevElt = SrcElt; 1823 } else { 1824 // This isn't a rotation, takes elements from vector 2 1825 rotate = false; 1826 } 1827 } 1828 } 1829 1830 if (EltsFromV2 == 1 && monotonic) { 1831 // Compute mask and shuffle 1832 MachineFunction &MF = DAG.getMachineFunction(); 1833 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1834 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 1835 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1836 // Initialize temporary register to 0 1837 SDValue InitTempReg = 1838 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT)); 1839 // Copy register's contents as index in SHUFFLE_MASK: 1840 SDValue ShufMaskOp = 1841 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32, 1842 DAG.getTargetConstant(V2Elt, MVT::i32), 1843 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT)); 1844 // Use shuffle mask in SHUFB synthetic instruction: 1845 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, 1846 ShufMaskOp); 1847 } else if (rotate) { 1848 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; 1849 1850 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), 1851 V1, DAG.getConstant(rotamt, MVT::i16)); 1852 } else { 1853 // Convert the SHUFFLE_VECTOR mask's input element units to the 1854 // actual bytes. 1855 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 1856 1857 SmallVector<SDValue, 16> ResultMask; 1858 for (unsigned i = 0, e = MaxElts; i != e; ++i) { 1859 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); 1860 1861 for (unsigned j = 0; j < BytesPerElement; ++j) 1862 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); 1863 } 1864 1865 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 1866 &ResultMask[0], ResultMask.size()); 1867 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); 1868 } 1869} 1870 1871static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 1872 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar 1873 DebugLoc dl = Op.getDebugLoc(); 1874 1875 if (Op0.getNode()->getOpcode() == ISD::Constant) { 1876 // For a constant, build the appropriate constant vector, which will 1877 // eventually simplify to a vector register load. 1878 1879 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode()); 1880 SmallVector<SDValue, 16> ConstVecValues; 1881 MVT VT; 1882 size_t n_copies; 1883 1884 // Create a constant vector: 1885 switch (Op.getValueType().getSimpleVT()) { 1886 default: llvm_unreachable("Unexpected constant value type in " 1887 "LowerSCALAR_TO_VECTOR"); 1888 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; 1889 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; 1890 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; 1891 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; 1892 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; 1893 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; 1894 } 1895 1896 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); 1897 for (size_t j = 0; j < n_copies; ++j) 1898 ConstVecValues.push_back(CValue); 1899 1900 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), 1901 &ConstVecValues[0], ConstVecValues.size()); 1902 } else { 1903 // Otherwise, copy the value from one register to another: 1904 switch (Op0.getValueType().getSimpleVT()) { 1905 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); 1906 case MVT::i8: 1907 case MVT::i16: 1908 case MVT::i32: 1909 case MVT::i64: 1910 case MVT::f32: 1911 case MVT::f64: 1912 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); 1913 } 1914 } 1915 1916 return SDValue(); 1917} 1918 1919static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 1920 MVT VT = Op.getValueType(); 1921 SDValue N = Op.getOperand(0); 1922 SDValue Elt = Op.getOperand(1); 1923 DebugLoc dl = Op.getDebugLoc(); 1924 SDValue retval; 1925 1926 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 1927 // Constant argument: 1928 int EltNo = (int) C->getZExtValue(); 1929 1930 // sanity checks: 1931 if (VT == MVT::i8 && EltNo >= 16) 1932 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); 1933 else if (VT == MVT::i16 && EltNo >= 8) 1934 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); 1935 else if (VT == MVT::i32 && EltNo >= 4) 1936 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); 1937 else if (VT == MVT::i64 && EltNo >= 2) 1938 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); 1939 1940 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { 1941 // i32 and i64: Element 0 is the preferred slot 1942 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); 1943 } 1944 1945 // Need to generate shuffle mask and extract: 1946 int prefslot_begin = -1, prefslot_end = -1; 1947 int elt_byte = EltNo * VT.getSizeInBits() / 8; 1948 1949 switch (VT.getSimpleVT()) { 1950 default: 1951 assert(false && "Invalid value type!"); 1952 case MVT::i8: { 1953 prefslot_begin = prefslot_end = 3; 1954 break; 1955 } 1956 case MVT::i16: { 1957 prefslot_begin = 2; prefslot_end = 3; 1958 break; 1959 } 1960 case MVT::i32: 1961 case MVT::f32: { 1962 prefslot_begin = 0; prefslot_end = 3; 1963 break; 1964 } 1965 case MVT::i64: 1966 case MVT::f64: { 1967 prefslot_begin = 0; prefslot_end = 7; 1968 break; 1969 } 1970 } 1971 1972 assert(prefslot_begin != -1 && prefslot_end != -1 && 1973 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); 1974 1975 unsigned int ShufBytes[16]; 1976 for (int i = 0; i < 16; ++i) { 1977 // zero fill uppper part of preferred slot, don't care about the 1978 // other slots: 1979 unsigned int mask_val; 1980 if (i <= prefslot_end) { 1981 mask_val = 1982 ((i < prefslot_begin) 1983 ? 0x80 1984 : elt_byte + (i - prefslot_begin)); 1985 1986 ShufBytes[i] = mask_val; 1987 } else 1988 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; 1989 } 1990 1991 SDValue ShufMask[4]; 1992 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { 1993 unsigned bidx = i * 4; 1994 unsigned int bits = ((ShufBytes[bidx] << 24) | 1995 (ShufBytes[bidx+1] << 16) | 1996 (ShufBytes[bidx+2] << 8) | 1997 ShufBytes[bidx+3]); 1998 ShufMask[i] = DAG.getConstant(bits, MVT::i32); 1999 } 2000 2001 SDValue ShufMaskVec = 2002 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2003 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); 2004 2005 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 2006 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), 2007 N, N, ShufMaskVec)); 2008 } else { 2009 // Variable index: Rotate the requested element into slot 0, then replicate 2010 // slot 0 across the vector 2011 MVT VecVT = N.getValueType(); 2012 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { 2013 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" 2014 "vector type!"); 2015 } 2016 2017 // Make life easier by making sure the index is zero-extended to i32 2018 if (Elt.getValueType() != MVT::i32) 2019 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); 2020 2021 // Scale the index to a bit/byte shift quantity 2022 APInt scaleFactor = 2023 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); 2024 unsigned scaleShift = scaleFactor.logBase2(); 2025 SDValue vecShift; 2026 2027 if (scaleShift > 0) { 2028 // Scale the shift factor: 2029 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, 2030 DAG.getConstant(scaleShift, MVT::i32)); 2031 } 2032 2033 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt); 2034 2035 // Replicate the bytes starting at byte 0 across the entire vector (for 2036 // consistency with the notion of a unified register set) 2037 SDValue replicate; 2038 2039 switch (VT.getSimpleVT()) { 2040 default: 2041 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" 2042 "type"); 2043 /*NOTREACHED*/ 2044 case MVT::i8: { 2045 SDValue factor = DAG.getConstant(0x00000000, MVT::i32); 2046 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2047 factor, factor, factor, factor); 2048 break; 2049 } 2050 case MVT::i16: { 2051 SDValue factor = DAG.getConstant(0x00010001, MVT::i32); 2052 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2053 factor, factor, factor, factor); 2054 break; 2055 } 2056 case MVT::i32: 2057 case MVT::f32: { 2058 SDValue factor = DAG.getConstant(0x00010203, MVT::i32); 2059 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2060 factor, factor, factor, factor); 2061 break; 2062 } 2063 case MVT::i64: 2064 case MVT::f64: { 2065 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); 2066 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); 2067 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2068 loFactor, hiFactor, loFactor, hiFactor); 2069 break; 2070 } 2071 } 2072 2073 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 2074 DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2075 vecShift, vecShift, replicate)); 2076 } 2077 2078 return retval; 2079} 2080 2081static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 2082 SDValue VecOp = Op.getOperand(0); 2083 SDValue ValOp = Op.getOperand(1); 2084 SDValue IdxOp = Op.getOperand(2); 2085 DebugLoc dl = Op.getDebugLoc(); 2086 MVT VT = Op.getValueType(); 2087 2088 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); 2089 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); 2090 2091 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2092 // Use $sp ($1) because it's always 16-byte aligned and it's available: 2093 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 2094 DAG.getRegister(SPU::R1, PtrVT), 2095 DAG.getConstant(CN->getSExtValue(), PtrVT)); 2096 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); 2097 2098 SDValue result = 2099 DAG.getNode(SPUISD::SHUFB, dl, VT, 2100 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), 2101 VecOp, 2102 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask)); 2103 2104 return result; 2105} 2106 2107static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, 2108 const TargetLowering &TLI) 2109{ 2110 SDValue N0 = Op.getOperand(0); // Everything has at least one operand 2111 DebugLoc dl = Op.getDebugLoc(); 2112 MVT ShiftVT = TLI.getShiftAmountTy(); 2113 2114 assert(Op.getValueType() == MVT::i8); 2115 switch (Opc) { 2116 default: 2117 llvm_unreachable("Unhandled i8 math operator"); 2118 /*NOTREACHED*/ 2119 break; 2120 case ISD::ADD: { 2121 // 8-bit addition: Promote the arguments up to 16-bits and truncate 2122 // the result: 2123 SDValue N1 = Op.getOperand(1); 2124 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2125 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2126 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2127 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2128 2129 } 2130 2131 case ISD::SUB: { 2132 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate 2133 // the result: 2134 SDValue N1 = Op.getOperand(1); 2135 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2136 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2137 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2138 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2139 } 2140 case ISD::ROTR: 2141 case ISD::ROTL: { 2142 SDValue N1 = Op.getOperand(1); 2143 MVT N1VT = N1.getValueType(); 2144 2145 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2146 if (!N1VT.bitsEq(ShiftVT)) { 2147 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) 2148 ? ISD::ZERO_EXTEND 2149 : ISD::TRUNCATE; 2150 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2151 } 2152 2153 // Replicate lower 8-bits into upper 8: 2154 SDValue ExpandArg = 2155 DAG.getNode(ISD::OR, dl, MVT::i16, N0, 2156 DAG.getNode(ISD::SHL, dl, MVT::i16, 2157 N0, DAG.getConstant(8, MVT::i32))); 2158 2159 // Truncate back down to i8 2160 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2161 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); 2162 } 2163 case ISD::SRL: 2164 case ISD::SHL: { 2165 SDValue N1 = Op.getOperand(1); 2166 MVT N1VT = N1.getValueType(); 2167 2168 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2169 if (!N1VT.bitsEq(ShiftVT)) { 2170 unsigned N1Opc = ISD::ZERO_EXTEND; 2171 2172 if (N1.getValueType().bitsGT(ShiftVT)) 2173 N1Opc = ISD::TRUNCATE; 2174 2175 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2176 } 2177 2178 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2179 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2180 } 2181 case ISD::SRA: { 2182 SDValue N1 = Op.getOperand(1); 2183 MVT N1VT = N1.getValueType(); 2184 2185 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2186 if (!N1VT.bitsEq(ShiftVT)) { 2187 unsigned N1Opc = ISD::SIGN_EXTEND; 2188 2189 if (N1VT.bitsGT(ShiftVT)) 2190 N1Opc = ISD::TRUNCATE; 2191 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2192 } 2193 2194 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2195 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2196 } 2197 case ISD::MUL: { 2198 SDValue N1 = Op.getOperand(1); 2199 2200 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2201 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2202 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2203 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2204 break; 2205 } 2206 } 2207 2208 return SDValue(); 2209} 2210 2211//! Lower byte immediate operations for v16i8 vectors: 2212static SDValue 2213LowerByteImmed(SDValue Op, SelectionDAG &DAG) { 2214 SDValue ConstVec; 2215 SDValue Arg; 2216 MVT VT = Op.getValueType(); 2217 DebugLoc dl = Op.getDebugLoc(); 2218 2219 ConstVec = Op.getOperand(0); 2220 Arg = Op.getOperand(1); 2221 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { 2222 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { 2223 ConstVec = ConstVec.getOperand(0); 2224 } else { 2225 ConstVec = Op.getOperand(1); 2226 Arg = Op.getOperand(0); 2227 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { 2228 ConstVec = ConstVec.getOperand(0); 2229 } 2230 } 2231 } 2232 2233 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { 2234 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode()); 2235 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); 2236 2237 APInt APSplatBits, APSplatUndef; 2238 unsigned SplatBitSize; 2239 bool HasAnyUndefs; 2240 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); 2241 2242 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 2243 HasAnyUndefs, minSplatBits) 2244 && minSplatBits <= SplatBitSize) { 2245 uint64_t SplatBits = APSplatBits.getZExtValue(); 2246 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); 2247 2248 SmallVector<SDValue, 16> tcVec; 2249 tcVec.assign(16, tc); 2250 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, 2251 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); 2252 } 2253 } 2254 2255 // These operations (AND, OR, XOR) are legal, they just couldn't be custom 2256 // lowered. Return the operation, rather than a null SDValue. 2257 return Op; 2258} 2259 2260//! Custom lowering for CTPOP (count population) 2261/*! 2262 Custom lowering code that counts the number ones in the input 2263 operand. SPU has such an instruction, but it counts the number of 2264 ones per byte, which then have to be accumulated. 2265*/ 2266static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { 2267 MVT VT = Op.getValueType(); 2268 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); 2269 DebugLoc dl = Op.getDebugLoc(); 2270 2271 switch (VT.getSimpleVT()) { 2272 default: 2273 assert(false && "Invalid value type!"); 2274 case MVT::i8: { 2275 SDValue N = Op.getOperand(0); 2276 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2277 2278 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2279 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2280 2281 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); 2282 } 2283 2284 case MVT::i16: { 2285 MachineFunction &MF = DAG.getMachineFunction(); 2286 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2287 2288 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); 2289 2290 SDValue N = Op.getOperand(0); 2291 SDValue Elt0 = DAG.getConstant(0, MVT::i16); 2292 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); 2293 SDValue Shift1 = DAG.getConstant(8, MVT::i32); 2294 2295 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2296 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2297 2298 // CNTB_result becomes the chain to which all of the virtual registers 2299 // CNTB_reg, SUM1_reg become associated: 2300 SDValue CNTB_result = 2301 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); 2302 2303 SDValue CNTB_rescopy = 2304 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2305 2306 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); 2307 2308 return DAG.getNode(ISD::AND, dl, MVT::i16, 2309 DAG.getNode(ISD::ADD, dl, MVT::i16, 2310 DAG.getNode(ISD::SRL, dl, MVT::i16, 2311 Tmp1, Shift1), 2312 Tmp1), 2313 Mask0); 2314 } 2315 2316 case MVT::i32: { 2317 MachineFunction &MF = DAG.getMachineFunction(); 2318 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2319 2320 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2321 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2322 2323 SDValue N = Op.getOperand(0); 2324 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2325 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); 2326 SDValue Shift1 = DAG.getConstant(16, MVT::i32); 2327 SDValue Shift2 = DAG.getConstant(8, MVT::i32); 2328 2329 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2330 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2331 2332 // CNTB_result becomes the chain to which all of the virtual registers 2333 // CNTB_reg, SUM1_reg become associated: 2334 SDValue CNTB_result = 2335 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); 2336 2337 SDValue CNTB_rescopy = 2338 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2339 2340 SDValue Comp1 = 2341 DAG.getNode(ISD::SRL, dl, MVT::i32, 2342 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), 2343 Shift1); 2344 2345 SDValue Sum1 = 2346 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, 2347 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); 2348 2349 SDValue Sum1_rescopy = 2350 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); 2351 2352 SDValue Comp2 = 2353 DAG.getNode(ISD::SRL, dl, MVT::i32, 2354 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), 2355 Shift2); 2356 SDValue Sum2 = 2357 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, 2358 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); 2359 2360 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); 2361 } 2362 2363 case MVT::i64: 2364 break; 2365 } 2366 2367 return SDValue(); 2368} 2369 2370//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 2371/*! 2372 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. 2373 All conversions to i64 are expanded to a libcall. 2374 */ 2375static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 2376 SPUTargetLowering &TLI) { 2377 MVT OpVT = Op.getValueType(); 2378 SDValue Op0 = Op.getOperand(0); 2379 MVT Op0VT = Op0.getValueType(); 2380 2381 if ((OpVT == MVT::i32 && Op0VT == MVT::f64) 2382 || OpVT == MVT::i64) { 2383 // Convert f32 / f64 to i32 / i64 via libcall. 2384 RTLIB::Libcall LC = 2385 (Op.getOpcode() == ISD::FP_TO_SINT) 2386 ? RTLIB::getFPTOSINT(Op0VT, OpVT) 2387 : RTLIB::getFPTOUINT(Op0VT, OpVT); 2388 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); 2389 SDValue Dummy; 2390 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2391 } 2392 2393 return Op; 2394} 2395 2396//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 2397/*! 2398 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. 2399 All conversions from i64 are expanded to a libcall. 2400 */ 2401static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, 2402 SPUTargetLowering &TLI) { 2403 MVT OpVT = Op.getValueType(); 2404 SDValue Op0 = Op.getOperand(0); 2405 MVT Op0VT = Op0.getValueType(); 2406 2407 if ((OpVT == MVT::f64 && Op0VT == MVT::i32) 2408 || Op0VT == MVT::i64) { 2409 // Convert i32, i64 to f64 via libcall: 2410 RTLIB::Libcall LC = 2411 (Op.getOpcode() == ISD::SINT_TO_FP) 2412 ? RTLIB::getSINTTOFP(Op0VT, OpVT) 2413 : RTLIB::getUINTTOFP(Op0VT, OpVT); 2414 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); 2415 SDValue Dummy; 2416 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2417 } 2418 2419 return Op; 2420} 2421 2422//! Lower ISD::SETCC 2423/*! 2424 This handles MVT::f64 (double floating point) condition lowering 2425 */ 2426static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, 2427 const TargetLowering &TLI) { 2428 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2)); 2429 DebugLoc dl = Op.getDebugLoc(); 2430 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); 2431 2432 SDValue lhs = Op.getOperand(0); 2433 SDValue rhs = Op.getOperand(1); 2434 MVT lhsVT = lhs.getValueType(); 2435 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); 2436 2437 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); 2438 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2439 MVT IntVT(MVT::i64); 2440 2441 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently 2442 // selected to a NOP: 2443 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs); 2444 SDValue lhsHi32 = 2445 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2446 DAG.getNode(ISD::SRL, dl, IntVT, 2447 i64lhs, DAG.getConstant(32, MVT::i32))); 2448 SDValue lhsHi32abs = 2449 DAG.getNode(ISD::AND, dl, MVT::i32, 2450 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); 2451 SDValue lhsLo32 = 2452 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); 2453 2454 // SETO and SETUO only use the lhs operand: 2455 if (CC->get() == ISD::SETO) { 2456 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of 2457 // SETUO 2458 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2459 return DAG.getNode(ISD::XOR, dl, ccResultVT, 2460 DAG.getSetCC(dl, ccResultVT, 2461 lhs, DAG.getConstantFP(0.0, lhsVT), 2462 ISD::SETUO), 2463 DAG.getConstant(ccResultAllOnes, ccResultVT)); 2464 } else if (CC->get() == ISD::SETUO) { 2465 // Evaluates to true if Op0 is [SQ]NaN 2466 return DAG.getNode(ISD::AND, dl, ccResultVT, 2467 DAG.getSetCC(dl, ccResultVT, 2468 lhsHi32abs, 2469 DAG.getConstant(0x7ff00000, MVT::i32), 2470 ISD::SETGE), 2471 DAG.getSetCC(dl, ccResultVT, 2472 lhsLo32, 2473 DAG.getConstant(0, MVT::i32), 2474 ISD::SETGT)); 2475 } 2476 2477 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs); 2478 SDValue rhsHi32 = 2479 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2480 DAG.getNode(ISD::SRL, dl, IntVT, 2481 i64rhs, DAG.getConstant(32, MVT::i32))); 2482 2483 // If a value is negative, subtract from the sign magnitude constant: 2484 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); 2485 2486 // Convert the sign-magnitude representation into 2's complement: 2487 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2488 lhsHi32, DAG.getConstant(31, MVT::i32)); 2489 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); 2490 SDValue lhsSelect = 2491 DAG.getNode(ISD::SELECT, dl, IntVT, 2492 lhsSelectMask, lhsSignMag2TC, i64lhs); 2493 2494 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2495 rhsHi32, DAG.getConstant(31, MVT::i32)); 2496 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); 2497 SDValue rhsSelect = 2498 DAG.getNode(ISD::SELECT, dl, IntVT, 2499 rhsSelectMask, rhsSignMag2TC, i64rhs); 2500 2501 unsigned compareOp; 2502 2503 switch (CC->get()) { 2504 case ISD::SETOEQ: 2505 case ISD::SETUEQ: 2506 compareOp = ISD::SETEQ; break; 2507 case ISD::SETOGT: 2508 case ISD::SETUGT: 2509 compareOp = ISD::SETGT; break; 2510 case ISD::SETOGE: 2511 case ISD::SETUGE: 2512 compareOp = ISD::SETGE; break; 2513 case ISD::SETOLT: 2514 case ISD::SETULT: 2515 compareOp = ISD::SETLT; break; 2516 case ISD::SETOLE: 2517 case ISD::SETULE: 2518 compareOp = ISD::SETLE; break; 2519 case ISD::SETUNE: 2520 case ISD::SETONE: 2521 compareOp = ISD::SETNE; break; 2522 default: 2523 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition"); 2524 } 2525 2526 SDValue result = 2527 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, 2528 (ISD::CondCode) compareOp); 2529 2530 if ((CC->get() & 0x8) == 0) { 2531 // Ordered comparison: 2532 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, 2533 lhs, DAG.getConstantFP(0.0, MVT::f64), 2534 ISD::SETO); 2535 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, 2536 rhs, DAG.getConstantFP(0.0, MVT::f64), 2537 ISD::SETO); 2538 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); 2539 2540 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); 2541 } 2542 2543 return result; 2544} 2545 2546//! Lower ISD::SELECT_CC 2547/*! 2548 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the 2549 SELB instruction. 2550 2551 \note Need to revisit this in the future: if the code path through the true 2552 and false value computations is longer than the latency of a branch (6 2553 cycles), then it would be more advantageous to branch and insert a new basic 2554 block and branch on the condition. However, this code does not make that 2555 assumption, given the simplisitc uses so far. 2556 */ 2557 2558static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, 2559 const TargetLowering &TLI) { 2560 MVT VT = Op.getValueType(); 2561 SDValue lhs = Op.getOperand(0); 2562 SDValue rhs = Op.getOperand(1); 2563 SDValue trueval = Op.getOperand(2); 2564 SDValue falseval = Op.getOperand(3); 2565 SDValue condition = Op.getOperand(4); 2566 DebugLoc dl = Op.getDebugLoc(); 2567 2568 // NOTE: SELB's arguments: $rA, $rB, $mask 2569 // 2570 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB 2571 // where bits in $mask are 1. CCond will be inverted, having 1s where the 2572 // condition was true and 0s where the condition was false. Hence, the 2573 // arguments to SELB get reversed. 2574 2575 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's 2576 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up 2577 // with another "cannot select select_cc" assert: 2578 2579 SDValue compare = DAG.getNode(ISD::SETCC, dl, 2580 TLI.getSetCCResultType(Op.getValueType()), 2581 lhs, rhs, condition); 2582 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); 2583} 2584 2585//! Custom lower ISD::TRUNCATE 2586static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) 2587{ 2588 // Type to truncate to 2589 MVT VT = Op.getValueType(); 2590 MVT::SimpleValueType simpleVT = VT.getSimpleVT(); 2591 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); 2592 DebugLoc dl = Op.getDebugLoc(); 2593 2594 // Type to truncate from 2595 SDValue Op0 = Op.getOperand(0); 2596 MVT Op0VT = Op0.getValueType(); 2597 2598 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { 2599 // Create shuffle mask, least significant doubleword of quadword 2600 unsigned maskHigh = 0x08090a0b; 2601 unsigned maskLow = 0x0c0d0e0f; 2602 // Use a shuffle to perform the truncation 2603 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2604 DAG.getConstant(maskHigh, MVT::i32), 2605 DAG.getConstant(maskLow, MVT::i32), 2606 DAG.getConstant(maskHigh, MVT::i32), 2607 DAG.getConstant(maskLow, MVT::i32)); 2608 2609 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2610 Op0, Op0, shufMask); 2611 2612 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); 2613 } 2614 2615 return SDValue(); // Leave the truncate unmolested 2616} 2617 2618//! Custom (target-specific) lowering entry point 2619/*! 2620 This is where LLVM's DAG selection process calls to do target-specific 2621 lowering of nodes. 2622 */ 2623SDValue 2624SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 2625{ 2626 unsigned Opc = (unsigned) Op.getOpcode(); 2627 MVT VT = Op.getValueType(); 2628 2629 switch (Opc) { 2630 default: { 2631#ifndef NDEBUG 2632 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; 2633 cerr << "Op.getOpcode() = " << Opc << "\n"; 2634 cerr << "*Op.getNode():\n"; 2635 Op.getNode()->dump(); 2636#endif 2637 llvm_unreachable(0); 2638 } 2639 case ISD::LOAD: 2640 case ISD::EXTLOAD: 2641 case ISD::SEXTLOAD: 2642 case ISD::ZEXTLOAD: 2643 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); 2644 case ISD::STORE: 2645 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); 2646 case ISD::ConstantPool: 2647 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); 2648 case ISD::GlobalAddress: 2649 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); 2650 case ISD::JumpTable: 2651 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); 2652 case ISD::ConstantFP: 2653 return LowerConstantFP(Op, DAG); 2654 case ISD::FORMAL_ARGUMENTS: 2655 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); 2656 case ISD::CALL: 2657 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl()); 2658 case ISD::RET: 2659 return LowerRET(Op, DAG, getTargetMachine()); 2660 2661 // i8, i64 math ops: 2662 case ISD::ADD: 2663 case ISD::SUB: 2664 case ISD::ROTR: 2665 case ISD::ROTL: 2666 case ISD::SRL: 2667 case ISD::SHL: 2668 case ISD::SRA: { 2669 if (VT == MVT::i8) 2670 return LowerI8Math(Op, DAG, Opc, *this); 2671 break; 2672 } 2673 2674 case ISD::FP_TO_SINT: 2675 case ISD::FP_TO_UINT: 2676 return LowerFP_TO_INT(Op, DAG, *this); 2677 2678 case ISD::SINT_TO_FP: 2679 case ISD::UINT_TO_FP: 2680 return LowerINT_TO_FP(Op, DAG, *this); 2681 2682 // Vector-related lowering. 2683 case ISD::BUILD_VECTOR: 2684 return LowerBUILD_VECTOR(Op, DAG); 2685 case ISD::SCALAR_TO_VECTOR: 2686 return LowerSCALAR_TO_VECTOR(Op, DAG); 2687 case ISD::VECTOR_SHUFFLE: 2688 return LowerVECTOR_SHUFFLE(Op, DAG); 2689 case ISD::EXTRACT_VECTOR_ELT: 2690 return LowerEXTRACT_VECTOR_ELT(Op, DAG); 2691 case ISD::INSERT_VECTOR_ELT: 2692 return LowerINSERT_VECTOR_ELT(Op, DAG); 2693 2694 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: 2695 case ISD::AND: 2696 case ISD::OR: 2697 case ISD::XOR: 2698 return LowerByteImmed(Op, DAG); 2699 2700 // Vector and i8 multiply: 2701 case ISD::MUL: 2702 if (VT == MVT::i8) 2703 return LowerI8Math(Op, DAG, Opc, *this); 2704 2705 case ISD::CTPOP: 2706 return LowerCTPOP(Op, DAG); 2707 2708 case ISD::SELECT_CC: 2709 return LowerSELECT_CC(Op, DAG, *this); 2710 2711 case ISD::SETCC: 2712 return LowerSETCC(Op, DAG, *this); 2713 2714 case ISD::TRUNCATE: 2715 return LowerTRUNCATE(Op, DAG); 2716 } 2717 2718 return SDValue(); 2719} 2720 2721void SPUTargetLowering::ReplaceNodeResults(SDNode *N, 2722 SmallVectorImpl<SDValue>&Results, 2723 SelectionDAG &DAG) 2724{ 2725#if 0 2726 unsigned Opc = (unsigned) N->getOpcode(); 2727 MVT OpVT = N->getValueType(0); 2728 2729 switch (Opc) { 2730 default: { 2731 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; 2732 cerr << "Op.getOpcode() = " << Opc << "\n"; 2733 cerr << "*Op.getNode():\n"; 2734 N->dump(); 2735 abort(); 2736 /*NOTREACHED*/ 2737 } 2738 } 2739#endif 2740 2741 /* Otherwise, return unchanged */ 2742} 2743 2744//===----------------------------------------------------------------------===// 2745// Target Optimization Hooks 2746//===----------------------------------------------------------------------===// 2747 2748SDValue 2749SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const 2750{ 2751#if 0 2752 TargetMachine &TM = getTargetMachine(); 2753#endif 2754 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 2755 SelectionDAG &DAG = DCI.DAG; 2756 SDValue Op0 = N->getOperand(0); // everything has at least one operand 2757 MVT NodeVT = N->getValueType(0); // The node's value type 2758 MVT Op0VT = Op0.getValueType(); // The first operand's result 2759 SDValue Result; // Initially, empty result 2760 DebugLoc dl = N->getDebugLoc(); 2761 2762 switch (N->getOpcode()) { 2763 default: break; 2764 case ISD::ADD: { 2765 SDValue Op1 = N->getOperand(1); 2766 2767 if (Op0.getOpcode() == SPUISD::IndirectAddr 2768 || Op1.getOpcode() == SPUISD::IndirectAddr) { 2769 // Normalize the operands to reduce repeated code 2770 SDValue IndirectArg = Op0, AddArg = Op1; 2771 2772 if (Op1.getOpcode() == SPUISD::IndirectAddr) { 2773 IndirectArg = Op1; 2774 AddArg = Op0; 2775 } 2776 2777 if (isa<ConstantSDNode>(AddArg)) { 2778 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg); 2779 SDValue IndOp1 = IndirectArg.getOperand(1); 2780 2781 if (CN0->isNullValue()) { 2782 // (add (SPUindirect <arg>, <arg>), 0) -> 2783 // (SPUindirect <arg>, <arg>) 2784 2785#if !defined(NDEBUG) 2786 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2787 cerr << "\n" 2788 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n" 2789 << "With: (SPUindirect <arg>, <arg>)\n"; 2790 } 2791#endif 2792 2793 return IndirectArg; 2794 } else if (isa<ConstantSDNode>(IndOp1)) { 2795 // (add (SPUindirect <arg>, <const>), <const>) -> 2796 // (SPUindirect <arg>, <const + const>) 2797 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1); 2798 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); 2799 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); 2800 2801#if !defined(NDEBUG) 2802 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2803 cerr << "\n" 2804 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue() 2805 << "), " << CN0->getSExtValue() << ")\n" 2806 << "With: (SPUindirect <arg>, " 2807 << combinedConst << ")\n"; 2808 } 2809#endif 2810 2811 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 2812 IndirectArg, combinedValue); 2813 } 2814 } 2815 } 2816 break; 2817 } 2818 case ISD::SIGN_EXTEND: 2819 case ISD::ZERO_EXTEND: 2820 case ISD::ANY_EXTEND: { 2821 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { 2822 // (any_extend (SPUextract_elt0 <arg>)) -> 2823 // (SPUextract_elt0 <arg>) 2824 // Types must match, however... 2825#if !defined(NDEBUG) 2826 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2827 cerr << "\nReplace: "; 2828 N->dump(&DAG); 2829 cerr << "\nWith: "; 2830 Op0.getNode()->dump(&DAG); 2831 cerr << "\n"; 2832 } 2833#endif 2834 2835 return Op0; 2836 } 2837 break; 2838 } 2839 case SPUISD::IndirectAddr: { 2840 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { 2841 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2842 if (CN != 0 && CN->getZExtValue() == 0) { 2843 // (SPUindirect (SPUaform <addr>, 0), 0) -> 2844 // (SPUaform <addr>, 0) 2845 2846 DEBUG(cerr << "Replace: "); 2847 DEBUG(N->dump(&DAG)); 2848 DEBUG(cerr << "\nWith: "); 2849 DEBUG(Op0.getNode()->dump(&DAG)); 2850 DEBUG(cerr << "\n"); 2851 2852 return Op0; 2853 } 2854 } else if (Op0.getOpcode() == ISD::ADD) { 2855 SDValue Op1 = N->getOperand(1); 2856 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) { 2857 // (SPUindirect (add <arg>, <arg>), 0) -> 2858 // (SPUindirect <arg>, <arg>) 2859 if (CN1->isNullValue()) { 2860 2861#if !defined(NDEBUG) 2862 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2863 cerr << "\n" 2864 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n" 2865 << "With: (SPUindirect <arg>, <arg>)\n"; 2866 } 2867#endif 2868 2869 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 2870 Op0.getOperand(0), Op0.getOperand(1)); 2871 } 2872 } 2873 } 2874 break; 2875 } 2876 case SPUISD::SHLQUAD_L_BITS: 2877 case SPUISD::SHLQUAD_L_BYTES: 2878 case SPUISD::VEC_SHL: 2879 case SPUISD::VEC_SRL: 2880 case SPUISD::VEC_SRA: 2881 case SPUISD::ROTBYTES_LEFT: { 2882 SDValue Op1 = N->getOperand(1); 2883 2884 // Kill degenerate vector shifts: 2885 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) { 2886 if (CN->isNullValue()) { 2887 Result = Op0; 2888 } 2889 } 2890 break; 2891 } 2892 case SPUISD::PREFSLOT2VEC: { 2893 switch (Op0.getOpcode()) { 2894 default: 2895 break; 2896 case ISD::ANY_EXTEND: 2897 case ISD::ZERO_EXTEND: 2898 case ISD::SIGN_EXTEND: { 2899 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) -> 2900 // <arg> 2901 // but only if the SPUprefslot2vec and <arg> types match. 2902 SDValue Op00 = Op0.getOperand(0); 2903 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { 2904 SDValue Op000 = Op00.getOperand(0); 2905 if (Op000.getValueType() == NodeVT) { 2906 Result = Op000; 2907 } 2908 } 2909 break; 2910 } 2911 case SPUISD::VEC2PREFSLOT: { 2912 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) -> 2913 // <arg> 2914 Result = Op0.getOperand(0); 2915 break; 2916 } 2917 } 2918 break; 2919 } 2920 } 2921 2922 // Otherwise, return unchanged. 2923#ifndef NDEBUG 2924 if (Result.getNode()) { 2925 DEBUG(cerr << "\nReplace.SPU: "); 2926 DEBUG(N->dump(&DAG)); 2927 DEBUG(cerr << "\nWith: "); 2928 DEBUG(Result.getNode()->dump(&DAG)); 2929 DEBUG(cerr << "\n"); 2930 } 2931#endif 2932 2933 return Result; 2934} 2935 2936//===----------------------------------------------------------------------===// 2937// Inline Assembly Support 2938//===----------------------------------------------------------------------===// 2939 2940/// getConstraintType - Given a constraint letter, return the type of 2941/// constraint it is for this target. 2942SPUTargetLowering::ConstraintType 2943SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { 2944 if (ConstraintLetter.size() == 1) { 2945 switch (ConstraintLetter[0]) { 2946 default: break; 2947 case 'b': 2948 case 'r': 2949 case 'f': 2950 case 'v': 2951 case 'y': 2952 return C_RegisterClass; 2953 } 2954 } 2955 return TargetLowering::getConstraintType(ConstraintLetter); 2956} 2957 2958std::pair<unsigned, const TargetRegisterClass*> 2959SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 2960 MVT VT) const 2961{ 2962 if (Constraint.size() == 1) { 2963 // GCC RS6000 Constraint Letters 2964 switch (Constraint[0]) { 2965 case 'b': // R1-R31 2966 case 'r': // R0-R31 2967 if (VT == MVT::i64) 2968 return std::make_pair(0U, SPU::R64CRegisterClass); 2969 return std::make_pair(0U, SPU::R32CRegisterClass); 2970 case 'f': 2971 if (VT == MVT::f32) 2972 return std::make_pair(0U, SPU::R32FPRegisterClass); 2973 else if (VT == MVT::f64) 2974 return std::make_pair(0U, SPU::R64FPRegisterClass); 2975 break; 2976 case 'v': 2977 return std::make_pair(0U, SPU::GPRCRegisterClass); 2978 } 2979 } 2980 2981 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 2982} 2983 2984//! Compute used/known bits for a SPU operand 2985void 2986SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 2987 const APInt &Mask, 2988 APInt &KnownZero, 2989 APInt &KnownOne, 2990 const SelectionDAG &DAG, 2991 unsigned Depth ) const { 2992#if 0 2993 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; 2994 2995 switch (Op.getOpcode()) { 2996 default: 2997 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 2998 break; 2999 case CALL: 3000 case SHUFB: 3001 case SHUFFLE_MASK: 3002 case CNTB: 3003 case SPUISD::PREFSLOT2VEC: 3004 case SPUISD::LDRESULT: 3005 case SPUISD::VEC2PREFSLOT: 3006 case SPUISD::SHLQUAD_L_BITS: 3007 case SPUISD::SHLQUAD_L_BYTES: 3008 case SPUISD::VEC_SHL: 3009 case SPUISD::VEC_SRL: 3010 case SPUISD::VEC_SRA: 3011 case SPUISD::VEC_ROTL: 3012 case SPUISD::VEC_ROTR: 3013 case SPUISD::ROTBYTES_LEFT: 3014 case SPUISD::SELECT_MASK: 3015 case SPUISD::SELB: 3016 } 3017#endif 3018} 3019 3020unsigned 3021SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, 3022 unsigned Depth) const { 3023 switch (Op.getOpcode()) { 3024 default: 3025 return 1; 3026 3027 case ISD::SETCC: { 3028 MVT VT = Op.getValueType(); 3029 3030 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { 3031 VT = MVT::i32; 3032 } 3033 return VT.getSizeInBits(); 3034 } 3035 } 3036} 3037 3038// LowerAsmOperandForConstraint 3039void 3040SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 3041 char ConstraintLetter, 3042 bool hasMemory, 3043 std::vector<SDValue> &Ops, 3044 SelectionDAG &DAG) const { 3045 // Default, for the time being, to the base class handler 3046 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory, 3047 Ops, DAG); 3048} 3049 3050/// isLegalAddressImmediate - Return true if the integer value can be used 3051/// as the offset of the target addressing mode. 3052bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, 3053 const Type *Ty) const { 3054 // SPU's addresses are 256K: 3055 return (V > -(1 << 18) && V < (1 << 18) - 1); 3056} 3057 3058bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 3059 return false; 3060} 3061 3062bool 3063SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 3064 // The SPU target isn't yet aware of offsets. 3065 return false; 3066} 3067