SPUISelLowering.cpp revision 1df30c4061a6b6faaf9641f72b1741a0af4aa532
1//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SPUTargetLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "SPURegisterNames.h" 15#include "SPUISelLowering.h" 16#include "SPUTargetMachine.h" 17#include "SPUFrameInfo.h" 18#include "llvm/ADT/APInt.h" 19#include "llvm/ADT/VectorExtras.h" 20#include "llvm/CodeGen/CallingConvLower.h" 21#include "llvm/CodeGen/MachineFrameInfo.h" 22#include "llvm/CodeGen/MachineFunction.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25#include "llvm/CodeGen/SelectionDAG.h" 26#include "llvm/Constants.h" 27#include "llvm/Function.h" 28#include "llvm/Intrinsics.h" 29#include "llvm/Support/Debug.h" 30#include "llvm/Support/MathExtras.h" 31#include "llvm/Target/TargetOptions.h" 32 33#include <map> 34 35using namespace llvm; 36 37// Used in getTargetNodeName() below 38namespace { 39 std::map<unsigned, const char *> node_names; 40 41 //! MVT mapping to useful data for Cell SPU 42 struct valtype_map_s { 43 const MVT valtype; 44 const int prefslot_byte; 45 }; 46 47 const valtype_map_s valtype_map[] = { 48 { MVT::i1, 3 }, 49 { MVT::i8, 3 }, 50 { MVT::i16, 2 }, 51 { MVT::i32, 0 }, 52 { MVT::f32, 0 }, 53 { MVT::i64, 0 }, 54 { MVT::f64, 0 }, 55 { MVT::i128, 0 } 56 }; 57 58 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); 59 60 const valtype_map_s *getValueTypeMapEntry(MVT VT) { 61 const valtype_map_s *retval = 0; 62 63 for (size_t i = 0; i < n_valtype_map; ++i) { 64 if (valtype_map[i].valtype == VT) { 65 retval = valtype_map + i; 66 break; 67 } 68 } 69 70#ifndef NDEBUG 71 if (retval == 0) { 72 cerr << "getValueTypeMapEntry returns NULL for " 73 << VT.getMVTString() 74 << "\n"; 75 abort(); 76 } 77#endif 78 79 return retval; 80 } 81} 82 83SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) 84 : TargetLowering(TM), 85 SPUTM(TM) 86{ 87 // Fold away setcc operations if possible. 88 setPow2DivIsCheap(); 89 90 // Use _setjmp/_longjmp instead of setjmp/longjmp. 91 setUseUnderscoreSetJmp(true); 92 setUseUnderscoreLongJmp(true); 93 94 // Set up the SPU's register classes: 95 addRegisterClass(MVT::i8, SPU::R8CRegisterClass); 96 addRegisterClass(MVT::i16, SPU::R16CRegisterClass); 97 addRegisterClass(MVT::i32, SPU::R32CRegisterClass); 98 addRegisterClass(MVT::i64, SPU::R64CRegisterClass); 99 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); 100 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); 101 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); 102 103 // SPU has no sign or zero extended loads for i1, i8, i16: 104 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); 105 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 106 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 107 108 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 109 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); 110 111 // SPU constant load actions are custom lowered: 112 setOperationAction(ISD::Constant, MVT::i64, Custom); 113 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 114 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 115 116 // SPU's loads and stores have to be custom lowered: 117 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; 118 ++sctype) { 119 MVT VT = (MVT::SimpleValueType)sctype; 120 121 setOperationAction(ISD::LOAD, VT, Custom); 122 setOperationAction(ISD::STORE, VT, Custom); 123 setLoadExtAction(ISD::EXTLOAD, VT, Custom); 124 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); 125 setLoadExtAction(ISD::SEXTLOAD, VT, Custom); 126 127 // SMUL_LOHI, UMUL_LOHI are not legal for Cell: 128 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 129 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 130 131 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { 132 MVT StoreVT = (MVT::SimpleValueType) stype; 133 setTruncStoreAction(VT, StoreVT, Expand); 134 } 135 } 136 137 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; 138 ++sctype) { 139 MVT VT = (MVT::SimpleValueType) sctype; 140 141 setOperationAction(ISD::LOAD, VT, Custom); 142 setOperationAction(ISD::STORE, VT, Custom); 143 144 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { 145 MVT StoreVT = (MVT::SimpleValueType) stype; 146 setTruncStoreAction(VT, StoreVT, Expand); 147 } 148 } 149 150 // Custom lower BRCOND for i8 to "promote" the result to whatever the result 151 // operand happens to be: 152 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 153 154 // Expand the jumptable branches 155 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 156 setOperationAction(ISD::BR_CC, MVT::Other, Expand); 157 158 // Custom lower SELECT_CC for most cases, but expand by default 159 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 160 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); 161 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); 162 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 163 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); 164 165 // SPU has no intrinsics for these particular operations: 166 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 167 168 // SPU has no SREM/UREM instructions 169 setOperationAction(ISD::SREM, MVT::i32, Expand); 170 setOperationAction(ISD::UREM, MVT::i32, Expand); 171 setOperationAction(ISD::SREM, MVT::i64, Expand); 172 setOperationAction(ISD::UREM, MVT::i64, Expand); 173 174 // We don't support sin/cos/sqrt/fmod 175 setOperationAction(ISD::FSIN , MVT::f64, Expand); 176 setOperationAction(ISD::FCOS , MVT::f64, Expand); 177 setOperationAction(ISD::FREM , MVT::f64, Expand); 178 setOperationAction(ISD::FSIN , MVT::f32, Expand); 179 setOperationAction(ISD::FCOS , MVT::f32, Expand); 180 setOperationAction(ISD::FREM , MVT::f32, Expand); 181 182 // If we're enabling GP optimizations, use hardware square root 183 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 184 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 185 186 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 187 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 188 189 // SPU can do rotate right and left, so legalize it... but customize for i8 190 // because instructions don't exist. 191 192 // FIXME: Change from "expand" to appropriate type once ROTR is supported in 193 // .td files. 194 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); 195 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); 196 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); 197 198 setOperationAction(ISD::ROTL, MVT::i32, Legal); 199 setOperationAction(ISD::ROTL, MVT::i16, Legal); 200 setOperationAction(ISD::ROTL, MVT::i8, Custom); 201 202 // SPU has no native version of shift left/right for i8 203 setOperationAction(ISD::SHL, MVT::i8, Custom); 204 setOperationAction(ISD::SRL, MVT::i8, Custom); 205 setOperationAction(ISD::SRA, MVT::i8, Custom); 206 207 // SPU needs custom lowering for shift left/right for i64 208 setOperationAction(ISD::SHL, MVT::i64, Custom); 209 setOperationAction(ISD::SRL, MVT::i64, Custom); 210 setOperationAction(ISD::SRA, MVT::i64, Custom); 211 212 // Custom lower i8, i32 and i64 multiplications 213 setOperationAction(ISD::MUL, MVT::i8, Custom); 214 setOperationAction(ISD::MUL, MVT::i32, Legal); 215 setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall 216 217 // Need to custom handle (some) common i8, i64 math ops 218 setOperationAction(ISD::ADD, MVT::i64, Custom); 219 setOperationAction(ISD::SUB, MVT::i8, Custom); 220 setOperationAction(ISD::SUB, MVT::i64, Custom); 221 222 // SPU does not have BSWAP. It does have i32 support CTLZ. 223 // CTPOP has to be custom lowered. 224 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 225 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 226 227 setOperationAction(ISD::CTPOP, MVT::i8, Custom); 228 setOperationAction(ISD::CTPOP, MVT::i16, Custom); 229 setOperationAction(ISD::CTPOP, MVT::i32, Custom); 230 setOperationAction(ISD::CTPOP, MVT::i64, Custom); 231 232 setOperationAction(ISD::CTTZ , MVT::i32, Expand); 233 setOperationAction(ISD::CTTZ , MVT::i64, Expand); 234 235 setOperationAction(ISD::CTLZ , MVT::i32, Legal); 236 237 // SPU has a version of select that implements (a&~c)|(b&c), just like 238 // select ought to work: 239 setOperationAction(ISD::SELECT, MVT::i8, Legal); 240 setOperationAction(ISD::SELECT, MVT::i16, Legal); 241 setOperationAction(ISD::SELECT, MVT::i32, Legal); 242 setOperationAction(ISD::SELECT, MVT::i64, Legal); 243 244 setOperationAction(ISD::SETCC, MVT::i8, Legal); 245 setOperationAction(ISD::SETCC, MVT::i16, Legal); 246 setOperationAction(ISD::SETCC, MVT::i32, Legal); 247 setOperationAction(ISD::SETCC, MVT::i64, Legal); 248 249 // Zero extension and sign extension for i64 have to be 250 // custom legalized 251 setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); 252 setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); 253 setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom); 254 255 // Custom lower i128 -> i64 truncates 256 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); 257 258 // SPU has a legal FP -> signed INT instruction 259 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); 260 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 261 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); 262 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 263 264 // FDIV on SPU requires custom lowering 265 setOperationAction(ISD::FDIV, MVT::f32, Custom); 266 setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall 267 268 // SPU has [U|S]INT_TO_FP 269 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); 270 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); 271 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); 272 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); 273 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); 274 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); 275 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 276 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 277 278 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal); 279 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal); 280 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal); 281 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal); 282 283 // We cannot sextinreg(i1). Expand to shifts. 284 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 285 286 // Support label based line numbers. 287 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); 288 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 289 290 // We want to legalize GlobalAddress and ConstantPool nodes into the 291 // appropriate instructions to materialize the address. 292 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; 293 ++sctype) { 294 MVT VT = (MVT::SimpleValueType)sctype; 295 296 setOperationAction(ISD::GlobalAddress, VT, Custom); 297 setOperationAction(ISD::ConstantPool, VT, Custom); 298 setOperationAction(ISD::JumpTable, VT, Custom); 299 } 300 301 // RET must be custom lowered, to meet ABI requirements 302 setOperationAction(ISD::RET, MVT::Other, Custom); 303 304 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 305 setOperationAction(ISD::VASTART , MVT::Other, Custom); 306 307 // Use the default implementation. 308 setOperationAction(ISD::VAARG , MVT::Other, Expand); 309 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 310 setOperationAction(ISD::VAEND , MVT::Other, Expand); 311 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 312 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 313 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 314 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); 315 316 // Cell SPU has instructions for converting between i64 and fp. 317 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 318 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 319 320 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 321 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 322 323 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 324 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 325 326 // First set operation action for all vector types to expand. Then we 327 // will selectively turn on ones that can be effectively codegen'd. 328 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); 329 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); 330 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); 331 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); 332 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); 333 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); 334 335 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 336 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 337 MVT VT = (MVT::SimpleValueType)i; 338 339 // add/sub are legal for all supported vector VT's. 340 setOperationAction(ISD::ADD , VT, Legal); 341 setOperationAction(ISD::SUB , VT, Legal); 342 // mul has to be custom lowered. 343 setOperationAction(ISD::MUL , VT, Custom); 344 345 setOperationAction(ISD::AND , VT, Legal); 346 setOperationAction(ISD::OR , VT, Legal); 347 setOperationAction(ISD::XOR , VT, Legal); 348 setOperationAction(ISD::LOAD , VT, Legal); 349 setOperationAction(ISD::SELECT, VT, Legal); 350 setOperationAction(ISD::STORE, VT, Legal); 351 352 // These operations need to be expanded: 353 setOperationAction(ISD::SDIV, VT, Expand); 354 setOperationAction(ISD::SREM, VT, Expand); 355 setOperationAction(ISD::UDIV, VT, Expand); 356 setOperationAction(ISD::UREM, VT, Expand); 357 setOperationAction(ISD::FDIV, VT, Custom); 358 359 // Custom lower build_vector, constant pool spills, insert and 360 // extract vector elements: 361 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 362 setOperationAction(ISD::ConstantPool, VT, Custom); 363 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 364 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 365 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 366 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 367 } 368 369 setOperationAction(ISD::AND, MVT::v16i8, Custom); 370 setOperationAction(ISD::OR, MVT::v16i8, Custom); 371 setOperationAction(ISD::XOR, MVT::v16i8, Custom); 372 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 373 374 // FIXME: This is only temporary until I put all vector multiplications in 375 // SPUInstrInfo.td: 376 setOperationAction(ISD::MUL, MVT::v4i32, Legal); 377 378 setShiftAmountType(MVT::i32); 379 setBooleanContents(ZeroOrNegativeOneBooleanContent); 380 381 setStackPointerRegisterToSaveRestore(SPU::R1); 382 383 // We have target-specific dag combine patterns for the following nodes: 384 setTargetDAGCombine(ISD::ADD); 385 setTargetDAGCombine(ISD::ZERO_EXTEND); 386 setTargetDAGCombine(ISD::SIGN_EXTEND); 387 setTargetDAGCombine(ISD::ANY_EXTEND); 388 389 computeRegisterProperties(); 390 391 // Set pre-RA register scheduler default to BURR, which produces slightly 392 // better code than the default (could also be TDRR, but TargetLowering.h 393 // needs a mod to support that model): 394 setSchedulingPreference(SchedulingForRegPressure); 395} 396 397const char * 398SPUTargetLowering::getTargetNodeName(unsigned Opcode) const 399{ 400 if (node_names.empty()) { 401 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; 402 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; 403 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; 404 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; 405 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; 406 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; 407 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; 408 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; 409 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; 410 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; 411 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; 412 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; 413 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; 414 node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY"; 415 node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU"; 416 node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH"; 417 node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH"; 418 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; 419 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; 420 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; 421 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; 422 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; 423 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; 424 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; 425 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] = 426 "SPUISD::ROTQUAD_RZ_BYTES"; 427 node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] = 428 "SPUISD::ROTQUAD_RZ_BITS"; 429 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; 430 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = 431 "SPUISD::ROTBYTES_LEFT_BITS"; 432 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; 433 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; 434 node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED"; 435 node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE"; 436 node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED"; 437 node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE"; 438 node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp"; 439 node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst"; 440 node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64"; 441 } 442 443 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); 444 445 return ((i != node_names.end()) ? i->second : 0); 446} 447 448//===----------------------------------------------------------------------===// 449// Return the Cell SPU's SETCC result type 450//===----------------------------------------------------------------------===// 451 452MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const { 453 MVT VT = Op.getValueType(); 454 // i16 and i32 are valid SETCC result types 455 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32); 456} 457 458//===----------------------------------------------------------------------===// 459// Calling convention code: 460//===----------------------------------------------------------------------===// 461 462#include "SPUGenCallingConv.inc" 463 464//===----------------------------------------------------------------------===// 465// LowerOperation implementation 466//===----------------------------------------------------------------------===// 467 468/// Custom lower loads for CellSPU 469/*! 470 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements 471 within a 16-byte block, we have to rotate to extract the requested element. 472 473 For extending loads, we also want to ensure that the following sequence is 474 emitted, e.g. for MVT::f32 extending load to MVT::f64: 475 476\verbatim 477%1 v16i8,ch = load 478%2 v16i8,ch = rotate %1 479%3 v4f8, ch = bitconvert %2 480%4 f32 = vec2perfslot %3 481%5 f64 = fp_extend %4 482\endverbatim 483*/ 484static SDValue 485LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 486 LoadSDNode *LN = cast<LoadSDNode>(Op); 487 SDValue the_chain = LN->getChain(); 488 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 489 MVT InVT = LN->getMemoryVT(); 490 MVT OutVT = Op.getValueType(); 491 ISD::LoadExtType ExtType = LN->getExtensionType(); 492 unsigned alignment = LN->getAlignment(); 493 const valtype_map_s *vtm = getValueTypeMapEntry(InVT); 494 495 switch (LN->getAddressingMode()) { 496 case ISD::UNINDEXED: { 497 SDValue result; 498 SDValue basePtr = LN->getBasePtr(); 499 SDValue rotate; 500 501 if (alignment == 16) { 502 ConstantSDNode *CN; 503 504 // Special cases for a known aligned load to simplify the base pointer 505 // and the rotation amount: 506 if (basePtr.getOpcode() == ISD::ADD 507 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { 508 // Known offset into basePtr 509 int64_t offset = CN->getSExtValue(); 510 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); 511 512 if (rotamt < 0) 513 rotamt += 16; 514 515 rotate = DAG.getConstant(rotamt, MVT::i16); 516 517 // Simplify the base pointer for this case: 518 basePtr = basePtr.getOperand(0); 519 if ((offset & ~0xf) > 0) { 520 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, 521 basePtr, 522 DAG.getConstant((offset & ~0xf), PtrVT)); 523 } 524 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) 525 || (basePtr.getOpcode() == SPUISD::IndirectAddr 526 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi 527 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { 528 // Plain aligned a-form address: rotate into preferred slot 529 // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) 530 int64_t rotamt = -vtm->prefslot_byte; 531 if (rotamt < 0) 532 rotamt += 16; 533 rotate = DAG.getConstant(rotamt, MVT::i16); 534 } else { 535 // Offset the rotate amount by the basePtr and the preferred slot 536 // byte offset 537 int64_t rotamt = -vtm->prefslot_byte; 538 if (rotamt < 0) 539 rotamt += 16; 540 rotate = DAG.getNode(ISD::ADD, PtrVT, 541 basePtr, 542 DAG.getConstant(rotamt, PtrVT)); 543 } 544 } else { 545 // Unaligned load: must be more pessimistic about addressing modes: 546 if (basePtr.getOpcode() == ISD::ADD) { 547 MachineFunction &MF = DAG.getMachineFunction(); 548 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 549 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 550 SDValue Flag; 551 552 SDValue Op0 = basePtr.getOperand(0); 553 SDValue Op1 = basePtr.getOperand(1); 554 555 if (isa<ConstantSDNode>(Op1)) { 556 // Convert the (add <ptr>, <const>) to an indirect address contained 557 // in a register. Note that this is done because we need to avoid 558 // creating a 0(reg) d-form address due to the SPU's block loads. 559 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); 560 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag); 561 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT); 562 } else { 563 // Convert the (add <arg1>, <arg2>) to an indirect address, which 564 // will likely be lowered as a reg(reg) x-form address. 565 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); 566 } 567 } else { 568 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, 569 basePtr, 570 DAG.getConstant(0, PtrVT)); 571 } 572 573 // Offset the rotate amount by the basePtr and the preferred slot 574 // byte offset 575 rotate = DAG.getNode(ISD::ADD, PtrVT, 576 basePtr, 577 DAG.getConstant(-vtm->prefslot_byte, PtrVT)); 578 } 579 580 // Re-emit as a v16i8 vector load 581 result = DAG.getLoad(MVT::v16i8, the_chain, basePtr, 582 LN->getSrcValue(), LN->getSrcValueOffset(), 583 LN->isVolatile(), 16); 584 585 // Update the chain 586 the_chain = result.getValue(1); 587 588 // Rotate into the preferred slot: 589 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, 590 result.getValue(0), rotate); 591 592 // Convert the loaded v16i8 vector to the appropriate vector type 593 // specified by the operand: 594 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits())); 595 result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT, 596 DAG.getNode(ISD::BIT_CONVERT, vecVT, result)); 597 598 // Handle extending loads by extending the scalar result: 599 if (ExtType == ISD::SEXTLOAD) { 600 result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result); 601 } else if (ExtType == ISD::ZEXTLOAD) { 602 result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result); 603 } else if (ExtType == ISD::EXTLOAD) { 604 unsigned NewOpc = ISD::ANY_EXTEND; 605 606 if (OutVT.isFloatingPoint()) 607 NewOpc = ISD::FP_EXTEND; 608 609 result = DAG.getNode(NewOpc, OutVT, result); 610 } 611 612 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); 613 SDValue retops[2] = { 614 result, 615 the_chain 616 }; 617 618 result = DAG.getNode(SPUISD::LDRESULT, retvts, 619 retops, sizeof(retops) / sizeof(retops[0])); 620 return result; 621 } 622 case ISD::PRE_INC: 623 case ISD::PRE_DEC: 624 case ISD::POST_INC: 625 case ISD::POST_DEC: 626 case ISD::LAST_INDEXED_MODE: 627 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " 628 "UNINDEXED\n"; 629 cerr << (unsigned) LN->getAddressingMode() << "\n"; 630 abort(); 631 /*NOTREACHED*/ 632 } 633 634 return SDValue(); 635} 636 637/// Custom lower stores for CellSPU 638/*! 639 All CellSPU stores are aligned to 16-byte boundaries, so for elements 640 within a 16-byte block, we have to generate a shuffle to insert the 641 requested element into its place, then store the resulting block. 642 */ 643static SDValue 644LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 645 StoreSDNode *SN = cast<StoreSDNode>(Op); 646 SDValue Value = SN->getValue(); 647 MVT VT = Value.getValueType(); 648 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); 649 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 650 unsigned alignment = SN->getAlignment(); 651 652 switch (SN->getAddressingMode()) { 653 case ISD::UNINDEXED: { 654 // The vector type we really want to load from the 16-byte chunk. 655 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())), 656 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits())); 657 658 SDValue alignLoadVec; 659 SDValue basePtr = SN->getBasePtr(); 660 SDValue the_chain = SN->getChain(); 661 SDValue insertEltOffs; 662 663 if (alignment == 16) { 664 ConstantSDNode *CN; 665 666 // Special cases for a known aligned load to simplify the base pointer 667 // and insertion byte: 668 if (basePtr.getOpcode() == ISD::ADD 669 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { 670 // Known offset into basePtr 671 int64_t offset = CN->getSExtValue(); 672 673 // Simplify the base pointer for this case: 674 basePtr = basePtr.getOperand(0); 675 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT, 676 basePtr, 677 DAG.getConstant((offset & 0xf), PtrVT)); 678 679 if ((offset & ~0xf) > 0) { 680 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, 681 basePtr, 682 DAG.getConstant((offset & ~0xf), PtrVT)); 683 } 684 } else { 685 // Otherwise, assume it's at byte 0 of basePtr 686 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT, 687 basePtr, 688 DAG.getConstant(0, PtrVT)); 689 } 690 } else { 691 // Unaligned load: must be more pessimistic about addressing modes: 692 if (basePtr.getOpcode() == ISD::ADD) { 693 MachineFunction &MF = DAG.getMachineFunction(); 694 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 695 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 696 SDValue Flag; 697 698 SDValue Op0 = basePtr.getOperand(0); 699 SDValue Op1 = basePtr.getOperand(1); 700 701 if (isa<ConstantSDNode>(Op1)) { 702 // Convert the (add <ptr>, <const>) to an indirect address contained 703 // in a register. Note that this is done because we need to avoid 704 // creating a 0(reg) d-form address due to the SPU's block loads. 705 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); 706 the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag); 707 basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT); 708 } else { 709 // Convert the (add <arg1>, <arg2>) to an indirect address, which 710 // will likely be lowered as a reg(reg) x-form address. 711 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1); 712 } 713 } else { 714 basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, 715 basePtr, 716 DAG.getConstant(0, PtrVT)); 717 } 718 719 // Insertion point is solely determined by basePtr's contents 720 insertEltOffs = DAG.getNode(ISD::ADD, PtrVT, 721 basePtr, 722 DAG.getConstant(0, PtrVT)); 723 } 724 725 // Re-emit as a v16i8 vector load 726 alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr, 727 SN->getSrcValue(), SN->getSrcValueOffset(), 728 SN->isVolatile(), 16); 729 730 // Update the chain 731 the_chain = alignLoadVec.getValue(1); 732 733 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec); 734 SDValue theValue = SN->getValue(); 735 SDValue result; 736 737 if (StVT != VT 738 && (theValue.getOpcode() == ISD::AssertZext 739 || theValue.getOpcode() == ISD::AssertSext)) { 740 // Drill down and get the value for zero- and sign-extended 741 // quantities 742 theValue = theValue.getOperand(0); 743 } 744 745 // If the base pointer is already a D-form address, then just create 746 // a new D-form address with a slot offset and the orignal base pointer. 747 // Otherwise generate a D-form address with the slot offset relative 748 // to the stack pointer, which is always aligned. 749#if !defined(NDEBUG) 750 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 751 cerr << "CellSPU LowerSTORE: basePtr = "; 752 basePtr.getNode()->dump(&DAG); 753 cerr << "\n"; 754 } 755#endif 756 757 SDValue insertEltOp = 758 DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs); 759 SDValue vectorizeOp = 760 DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue); 761 762 result = DAG.getNode(SPUISD::SHUFB, vecVT, 763 vectorizeOp, alignLoadVec, 764 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp)); 765 766 result = DAG.getStore(the_chain, result, basePtr, 767 LN->getSrcValue(), LN->getSrcValueOffset(), 768 LN->isVolatile(), LN->getAlignment()); 769 770#if 0 && !defined(NDEBUG) 771 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 772 const SDValue ¤tRoot = DAG.getRoot(); 773 774 DAG.setRoot(result); 775 cerr << "------- CellSPU:LowerStore result:\n"; 776 DAG.dump(); 777 cerr << "-------\n"; 778 DAG.setRoot(currentRoot); 779 } 780#endif 781 782 return result; 783 /*UNREACHED*/ 784 } 785 case ISD::PRE_INC: 786 case ISD::PRE_DEC: 787 case ISD::POST_INC: 788 case ISD::POST_DEC: 789 case ISD::LAST_INDEXED_MODE: 790 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than " 791 "UNINDEXED\n"; 792 cerr << (unsigned) SN->getAddressingMode() << "\n"; 793 abort(); 794 /*NOTREACHED*/ 795 } 796 797 return SDValue(); 798} 799 800/// Generate the address of a constant pool entry. 801static SDValue 802LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 803 MVT PtrVT = Op.getValueType(); 804 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 805 Constant *C = CP->getConstVal(); 806 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 807 SDValue Zero = DAG.getConstant(0, PtrVT); 808 const TargetMachine &TM = DAG.getTarget(); 809 810 if (TM.getRelocationModel() == Reloc::Static) { 811 if (!ST->usingLargeMem()) { 812 // Just return the SDValue with the constant pool address in it. 813 return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero); 814 } else { 815 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero); 816 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero); 817 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); 818 } 819 } 820 821 assert(0 && 822 "LowerConstantPool: Relocation model other than static" 823 " not supported."); 824 return SDValue(); 825} 826 827static SDValue 828LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 829 MVT PtrVT = Op.getValueType(); 830 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 831 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 832 SDValue Zero = DAG.getConstant(0, PtrVT); 833 const TargetMachine &TM = DAG.getTarget(); 834 835 if (TM.getRelocationModel() == Reloc::Static) { 836 if (!ST->usingLargeMem()) { 837 return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero); 838 } else { 839 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero); 840 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero); 841 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); 842 } 843 } 844 845 assert(0 && 846 "LowerJumpTable: Relocation model other than static not supported."); 847 return SDValue(); 848} 849 850static SDValue 851LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 852 MVT PtrVT = Op.getValueType(); 853 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 854 GlobalValue *GV = GSDN->getGlobal(); 855 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); 856 const TargetMachine &TM = DAG.getTarget(); 857 SDValue Zero = DAG.getConstant(0, PtrVT); 858 859 if (TM.getRelocationModel() == Reloc::Static) { 860 if (!ST->usingLargeMem()) { 861 return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero); 862 } else { 863 SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero); 864 SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero); 865 return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo); 866 } 867 } else { 868 cerr << "LowerGlobalAddress: Relocation model other than static not " 869 << "supported.\n"; 870 abort(); 871 /*NOTREACHED*/ 872 } 873 874 return SDValue(); 875} 876 877//! Custom lower i64 integer constants 878/*! 879 This code inserts all of the necessary juggling that needs to occur to load 880 a 64-bit constant into a register. 881 */ 882static SDValue 883LowerConstant(SDValue Op, SelectionDAG &DAG) { 884 MVT VT = Op.getValueType(); 885 886 if (VT == MVT::i64) { 887 ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode()); 888 SDValue T = DAG.getConstant(CN->getZExtValue(), VT); 889 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, 890 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); 891 } else { 892 cerr << "LowerConstant: unhandled constant type " 893 << VT.getMVTString() 894 << "\n"; 895 abort(); 896 /*NOTREACHED*/ 897 } 898 899 return SDValue(); 900} 901 902//! Custom lower double precision floating point constants 903static SDValue 904LowerConstantFP(SDValue Op, SelectionDAG &DAG) { 905 MVT VT = Op.getValueType(); 906 907 if (VT == MVT::f64) { 908 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode()); 909 910 assert((FP != 0) && 911 "LowerConstantFP: Node is not ConstantFPSDNode"); 912 913 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); 914 SDValue T = DAG.getConstant(dbits, MVT::i64); 915 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T); 916 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, 917 DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec)); 918 } 919 920 return SDValue(); 921} 922 923static SDValue 924LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { 925 SDValue Cond = Op.getOperand(1); 926 MVT CondVT = Cond.getValueType(); 927 unsigned CondOpc; 928 929 if (CondVT == MVT::i8) { 930 SDValue CondOp0 = Cond.getOperand(0); 931 if (Cond.getOpcode() == ISD::TRUNCATE) { 932 // Use the truncate's value type and ANY_EXTEND the condition (DAGcombine 933 // will then remove the truncate) 934 CondVT = CondOp0.getValueType(); 935 CondOpc = ISD::ANY_EXTEND; 936 } else { 937 CondVT = MVT::i32; // default to something reasonable 938 CondOpc = ISD::ZERO_EXTEND; 939 } 940 941 Cond = DAG.getNode(CondOpc, CondVT, Op.getOperand(1)); 942 943 return DAG.getNode(ISD::BRCOND, Op.getValueType(), 944 Op.getOperand(0), Cond, Op.getOperand(2)); 945 } 946 947 return SDValue(); // Unchanged 948} 949 950static SDValue 951LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex) 952{ 953 MachineFunction &MF = DAG.getMachineFunction(); 954 MachineFrameInfo *MFI = MF.getFrameInfo(); 955 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 956 SmallVector<SDValue, 48> ArgValues; 957 SDValue Root = Op.getOperand(0); 958 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0; 959 960 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); 961 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); 962 963 unsigned ArgOffset = SPUFrameInfo::minStackSize(); 964 unsigned ArgRegIdx = 0; 965 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); 966 967 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 968 969 // Add DAG nodes to load the arguments or copy them out of registers. 970 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1; 971 ArgNo != e; ++ArgNo) { 972 MVT ObjectVT = Op.getValue(ArgNo).getValueType(); 973 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 974 SDValue ArgVal; 975 976 if (ArgRegIdx < NumArgRegs) { 977 const TargetRegisterClass *ArgRegClass; 978 979 switch (ObjectVT.getSimpleVT()) { 980 default: { 981 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: " 982 << ObjectVT.getMVTString() 983 << "\n"; 984 abort(); 985 } 986 case MVT::i8: 987 ArgRegClass = &SPU::R8CRegClass; 988 break; 989 case MVT::i16: 990 ArgRegClass = &SPU::R16CRegClass; 991 break; 992 case MVT::i32: 993 ArgRegClass = &SPU::R32CRegClass; 994 break; 995 case MVT::i64: 996 ArgRegClass = &SPU::R64CRegClass; 997 break; 998 case MVT::f32: 999 ArgRegClass = &SPU::R32FPRegClass; 1000 break; 1001 case MVT::f64: 1002 ArgRegClass = &SPU::R64FPRegClass; 1003 break; 1004 case MVT::v2f64: 1005 case MVT::v4f32: 1006 case MVT::v2i64: 1007 case MVT::v4i32: 1008 case MVT::v8i16: 1009 case MVT::v16i8: 1010 ArgRegClass = &SPU::VECREGRegClass; 1011 break; 1012 } 1013 1014 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); 1015 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); 1016 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1017 ++ArgRegIdx; 1018 } else { 1019 // We need to load the argument to a virtual register if we determined 1020 // above that we ran out of physical registers of the appropriate type 1021 // or we're forced to do vararg 1022 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1023 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1024 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); 1025 ArgOffset += StackSlotSize; 1026 } 1027 1028 ArgValues.push_back(ArgVal); 1029 // Update the chain 1030 Root = ArgVal.getOperand(0); 1031 } 1032 1033 // vararg handling: 1034 if (isVarArg) { 1035 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8; 1036 // We will spill (79-3)+1 registers to the stack 1037 SmallVector<SDValue, 79-3+1> MemOps; 1038 1039 // Create the frame slot 1040 1041 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { 1042 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset); 1043 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1044 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); 1045 SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0); 1046 Root = Store.getOperand(0); 1047 MemOps.push_back(Store); 1048 1049 // Increment address by stack slot size for the next stored argument 1050 ArgOffset += StackSlotSize; 1051 } 1052 if (!MemOps.empty()) 1053 Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size()); 1054 } 1055 1056 ArgValues.push_back(Root); 1057 1058 // Return the new list of results. 1059 return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(), 1060 &ArgValues[0], ArgValues.size()); 1061} 1062 1063/// isLSAAddress - Return the immediate to use if the specified 1064/// value is representable as a LSA address. 1065static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { 1066 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1067 if (!C) return 0; 1068 1069 int Addr = C->getZExtValue(); 1070 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1071 (Addr << 14 >> 14) != Addr) 1072 return 0; // Top 14 bits have to be sext of immediate. 1073 1074 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); 1075} 1076 1077static 1078SDValue 1079LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 1080 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode()); 1081 SDValue Chain = TheCall->getChain(); 1082 SDValue Callee = TheCall->getCallee(); 1083 unsigned NumOps = TheCall->getNumArgs(); 1084 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); 1085 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); 1086 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); 1087 1088 // Handy pointer type 1089 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1090 1091 // Accumulate how many bytes are to be pushed on the stack, including the 1092 // linkage area, and parameter passing area. According to the SPU ABI, 1093 // we minimally need space for [LR] and [SP] 1094 unsigned NumStackBytes = SPUFrameInfo::minStackSize(); 1095 1096 // Set up a copy of the stack pointer for use loading and storing any 1097 // arguments that may not fit in the registers available for argument 1098 // passing. 1099 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); 1100 1101 // Figure out which arguments are going to go in registers, and which in 1102 // memory. 1103 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] 1104 unsigned ArgRegIdx = 0; 1105 1106 // Keep track of registers passing arguments 1107 std::vector<std::pair<unsigned, SDValue> > RegsToPass; 1108 // And the arguments passed on the stack 1109 SmallVector<SDValue, 8> MemOpChains; 1110 1111 for (unsigned i = 0; i != NumOps; ++i) { 1112 SDValue Arg = TheCall->getArg(i); 1113 1114 // PtrOff will be used to store the current argument to the stack if a 1115 // register cannot be found for it. 1116 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 1117 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); 1118 1119 switch (Arg.getValueType().getSimpleVT()) { 1120 default: assert(0 && "Unexpected ValueType for argument!"); 1121 case MVT::i32: 1122 case MVT::i64: 1123 case MVT::i128: 1124 if (ArgRegIdx != NumArgRegs) { 1125 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); 1126 } else { 1127 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1128 ArgOffset += StackSlotSize; 1129 } 1130 break; 1131 case MVT::f32: 1132 case MVT::f64: 1133 if (ArgRegIdx != NumArgRegs) { 1134 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); 1135 } else { 1136 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1137 ArgOffset += StackSlotSize; 1138 } 1139 break; 1140 case MVT::v2i64: 1141 case MVT::v2f64: 1142 case MVT::v4f32: 1143 case MVT::v4i32: 1144 case MVT::v8i16: 1145 case MVT::v16i8: 1146 if (ArgRegIdx != NumArgRegs) { 1147 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); 1148 } else { 1149 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1150 ArgOffset += StackSlotSize; 1151 } 1152 break; 1153 } 1154 } 1155 1156 // Update number of stack bytes actually used, insert a call sequence start 1157 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize()); 1158 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, 1159 true)); 1160 1161 if (!MemOpChains.empty()) { 1162 // Adjust the stack pointer for the stack arguments. 1163 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1164 &MemOpChains[0], MemOpChains.size()); 1165 } 1166 1167 // Build a sequence of copy-to-reg nodes chained together with token chain 1168 // and flag operands which copy the outgoing args into the appropriate regs. 1169 SDValue InFlag; 1170 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1171 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1172 InFlag); 1173 InFlag = Chain.getValue(1); 1174 } 1175 1176 SmallVector<SDValue, 8> Ops; 1177 unsigned CallOpc = SPUISD::CALL; 1178 1179 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1180 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1181 // node so that legalize doesn't hack it. 1182 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1183 GlobalValue *GV = G->getGlobal(); 1184 MVT CalleeVT = Callee.getValueType(); 1185 SDValue Zero = DAG.getConstant(0, PtrVT); 1186 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); 1187 1188 if (!ST->usingLargeMem()) { 1189 // Turn calls to targets that are defined (i.e., have bodies) into BRSL 1190 // style calls, otherwise, external symbols are BRASL calls. This assumes 1191 // that declared/defined symbols are in the same compilation unit and can 1192 // be reached through PC-relative jumps. 1193 // 1194 // NOTE: 1195 // This may be an unsafe assumption for JIT and really large compilation 1196 // units. 1197 if (GV->isDeclaration()) { 1198 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero); 1199 } else { 1200 Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero); 1201 } 1202 } else { 1203 // "Large memory" mode: Turn all calls into indirect calls with a X-form 1204 // address pairs: 1205 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero); 1206 } 1207 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1208 MVT CalleeVT = Callee.getValueType(); 1209 SDValue Zero = DAG.getConstant(0, PtrVT); 1210 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), 1211 Callee.getValueType()); 1212 1213 if (!ST->usingLargeMem()) { 1214 Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero); 1215 } else { 1216 Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero); 1217 } 1218 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { 1219 // If this is an absolute destination address that appears to be a legal 1220 // local store address, use the munged value. 1221 Callee = SDValue(Dest, 0); 1222 } 1223 1224 Ops.push_back(Chain); 1225 Ops.push_back(Callee); 1226 1227 // Add argument registers to the end of the list so that they are known live 1228 // into the call. 1229 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1230 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1231 RegsToPass[i].second.getValueType())); 1232 1233 if (InFlag.getNode()) 1234 Ops.push_back(InFlag); 1235 // Returns a chain and a flag for retval copy to use. 1236 Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag), 1237 &Ops[0], Ops.size()); 1238 InFlag = Chain.getValue(1); 1239 1240 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), 1241 DAG.getIntPtrConstant(0, true), InFlag); 1242 if (TheCall->getValueType(0) != MVT::Other) 1243 InFlag = Chain.getValue(1); 1244 1245 SDValue ResultVals[3]; 1246 unsigned NumResults = 0; 1247 1248 // If the call has results, copy the values out of the ret val registers. 1249 switch (TheCall->getValueType(0).getSimpleVT()) { 1250 default: assert(0 && "Unexpected ret value!"); 1251 case MVT::Other: break; 1252 case MVT::i32: 1253 if (TheCall->getValueType(1) == MVT::i32) { 1254 Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1); 1255 ResultVals[0] = Chain.getValue(0); 1256 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, 1257 Chain.getValue(2)).getValue(1); 1258 ResultVals[1] = Chain.getValue(0); 1259 NumResults = 2; 1260 } else { 1261 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1); 1262 ResultVals[0] = Chain.getValue(0); 1263 NumResults = 1; 1264 } 1265 break; 1266 case MVT::i64: 1267 Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1); 1268 ResultVals[0] = Chain.getValue(0); 1269 NumResults = 1; 1270 break; 1271 case MVT::f32: 1272 case MVT::f64: 1273 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0), 1274 InFlag).getValue(1); 1275 ResultVals[0] = Chain.getValue(0); 1276 NumResults = 1; 1277 break; 1278 case MVT::v2f64: 1279 case MVT::v2i64: 1280 case MVT::v4f32: 1281 case MVT::v4i32: 1282 case MVT::v8i16: 1283 case MVT::v16i8: 1284 Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0), 1285 InFlag).getValue(1); 1286 ResultVals[0] = Chain.getValue(0); 1287 NumResults = 1; 1288 break; 1289 } 1290 1291 // If the function returns void, just return the chain. 1292 if (NumResults == 0) 1293 return Chain; 1294 1295 // Otherwise, merge everything together with a MERGE_VALUES node. 1296 ResultVals[NumResults++] = Chain; 1297 SDValue Res = DAG.getMergeValues(ResultVals, NumResults); 1298 return Res.getValue(Op.getResNo()); 1299} 1300 1301static SDValue 1302LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) { 1303 SmallVector<CCValAssign, 16> RVLocs; 1304 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 1305 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 1306 CCState CCInfo(CC, isVarArg, TM, RVLocs); 1307 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU); 1308 1309 // If this is the first return lowered for this function, add the regs to the 1310 // liveout set for the function. 1311 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1312 for (unsigned i = 0; i != RVLocs.size(); ++i) 1313 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1314 } 1315 1316 SDValue Chain = Op.getOperand(0); 1317 SDValue Flag; 1318 1319 // Copy the result values into the output registers. 1320 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1321 CCValAssign &VA = RVLocs[i]; 1322 assert(VA.isRegLoc() && "Can only return in registers!"); 1323 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); 1324 Flag = Chain.getValue(1); 1325 } 1326 1327 if (Flag.getNode()) 1328 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag); 1329 else 1330 return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain); 1331} 1332 1333 1334//===----------------------------------------------------------------------===// 1335// Vector related lowering: 1336//===----------------------------------------------------------------------===// 1337 1338static ConstantSDNode * 1339getVecImm(SDNode *N) { 1340 SDValue OpVal(0, 0); 1341 1342 // Check to see if this buildvec has a single non-undef value in its elements. 1343 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1344 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1345 if (OpVal.getNode() == 0) 1346 OpVal = N->getOperand(i); 1347 else if (OpVal != N->getOperand(i)) 1348 return 0; 1349 } 1350 1351 if (OpVal.getNode() != 0) { 1352 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1353 return CN; 1354 } 1355 } 1356 1357 return 0; // All UNDEF: use implicit def.; not Constant node 1358} 1359 1360/// get_vec_i18imm - Test if this vector is a vector filled with the same value 1361/// and the value fits into an unsigned 18-bit constant, and if so, return the 1362/// constant 1363SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, 1364 MVT ValueType) { 1365 if (ConstantSDNode *CN = getVecImm(N)) { 1366 uint64_t Value = CN->getZExtValue(); 1367 if (ValueType == MVT::i64) { 1368 uint64_t UValue = CN->getZExtValue(); 1369 uint32_t upper = uint32_t(UValue >> 32); 1370 uint32_t lower = uint32_t(UValue); 1371 if (upper != lower) 1372 return SDValue(); 1373 Value = Value >> 32; 1374 } 1375 if (Value <= 0x3ffff) 1376 return DAG.getTargetConstant(Value, ValueType); 1377 } 1378 1379 return SDValue(); 1380} 1381 1382/// get_vec_i16imm - Test if this vector is a vector filled with the same value 1383/// and the value fits into a signed 16-bit constant, and if so, return the 1384/// constant 1385SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, 1386 MVT ValueType) { 1387 if (ConstantSDNode *CN = getVecImm(N)) { 1388 int64_t Value = CN->getSExtValue(); 1389 if (ValueType == MVT::i64) { 1390 uint64_t UValue = CN->getZExtValue(); 1391 uint32_t upper = uint32_t(UValue >> 32); 1392 uint32_t lower = uint32_t(UValue); 1393 if (upper != lower) 1394 return SDValue(); 1395 Value = Value >> 32; 1396 } 1397 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { 1398 return DAG.getTargetConstant(Value, ValueType); 1399 } 1400 } 1401 1402 return SDValue(); 1403} 1404 1405/// get_vec_i10imm - Test if this vector is a vector filled with the same value 1406/// and the value fits into a signed 10-bit constant, and if so, return the 1407/// constant 1408SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, 1409 MVT ValueType) { 1410 if (ConstantSDNode *CN = getVecImm(N)) { 1411 int64_t Value = CN->getSExtValue(); 1412 if (ValueType == MVT::i64) { 1413 uint64_t UValue = CN->getZExtValue(); 1414 uint32_t upper = uint32_t(UValue >> 32); 1415 uint32_t lower = uint32_t(UValue); 1416 if (upper != lower) 1417 return SDValue(); 1418 Value = Value >> 32; 1419 } 1420 if (isS10Constant(Value)) 1421 return DAG.getTargetConstant(Value, ValueType); 1422 } 1423 1424 return SDValue(); 1425} 1426 1427/// get_vec_i8imm - Test if this vector is a vector filled with the same value 1428/// and the value fits into a signed 8-bit constant, and if so, return the 1429/// constant. 1430/// 1431/// @note: The incoming vector is v16i8 because that's the only way we can load 1432/// constant vectors. Thus, we test to see if the upper and lower bytes are the 1433/// same value. 1434SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, 1435 MVT ValueType) { 1436 if (ConstantSDNode *CN = getVecImm(N)) { 1437 int Value = (int) CN->getZExtValue(); 1438 if (ValueType == MVT::i16 1439 && Value <= 0xffff /* truncated from uint64_t */ 1440 && ((short) Value >> 8) == ((short) Value & 0xff)) 1441 return DAG.getTargetConstant(Value & 0xff, ValueType); 1442 else if (ValueType == MVT::i8 1443 && (Value & 0xff) == Value) 1444 return DAG.getTargetConstant(Value, ValueType); 1445 } 1446 1447 return SDValue(); 1448} 1449 1450/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value 1451/// and the value fits into a signed 16-bit constant, and if so, return the 1452/// constant 1453SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, 1454 MVT ValueType) { 1455 if (ConstantSDNode *CN = getVecImm(N)) { 1456 uint64_t Value = CN->getZExtValue(); 1457 if ((ValueType == MVT::i32 1458 && ((unsigned) Value & 0xffff0000) == (unsigned) Value) 1459 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) 1460 return DAG.getTargetConstant(Value >> 16, ValueType); 1461 } 1462 1463 return SDValue(); 1464} 1465 1466/// get_v4i32_imm - Catch-all for general 32-bit constant vectors 1467SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { 1468 if (ConstantSDNode *CN = getVecImm(N)) { 1469 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); 1470 } 1471 1472 return SDValue(); 1473} 1474 1475/// get_v4i32_imm - Catch-all for general 64-bit constant vectors 1476SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { 1477 if (ConstantSDNode *CN = getVecImm(N)) { 1478 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); 1479 } 1480 1481 return SDValue(); 1482} 1483 1484// If this is a vector of constants or undefs, get the bits. A bit in 1485// UndefBits is set if the corresponding element of the vector is an 1486// ISD::UNDEF value. For undefs, the corresponding VectorBits values are 1487// zero. Return true if this is not an array of constants, false if it is. 1488// 1489static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 1490 uint64_t UndefBits[2]) { 1491 // Start with zero'd results. 1492 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 1493 1494 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits(); 1495 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 1496 SDValue OpVal = BV->getOperand(i); 1497 1498 unsigned PartNo = i >= e/2; // In the upper 128 bits? 1499 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 1500 1501 uint64_t EltBits = 0; 1502 if (OpVal.getOpcode() == ISD::UNDEF) { 1503 uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize); 1504 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 1505 continue; 1506 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1507 EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize)); 1508 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1509 const APFloat &apf = CN->getValueAPF(); 1510 EltBits = (CN->getValueType(0) == MVT::f32 1511 ? FloatToBits(apf.convertToFloat()) 1512 : DoubleToBits(apf.convertToDouble())); 1513 } else { 1514 // Nonconstant element. 1515 return true; 1516 } 1517 1518 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 1519 } 1520 1521 //printf("%llx %llx %llx %llx\n", 1522 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 1523 return false; 1524} 1525 1526/// If this is a splat (repetition) of a value across the whole vector, return 1527/// the smallest size that splats it. For example, "0x01010101010101..." is a 1528/// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 1529/// SplatSize = 1 byte. 1530static bool isConstantSplat(const uint64_t Bits128[2], 1531 const uint64_t Undef128[2], 1532 int MinSplatBits, 1533 uint64_t &SplatBits, uint64_t &SplatUndef, 1534 int &SplatSize) { 1535 // Don't let undefs prevent splats from matching. See if the top 64-bits are 1536 // the same as the lower 64-bits, ignoring undefs. 1537 uint64_t Bits64 = Bits128[0] | Bits128[1]; 1538 uint64_t Undef64 = Undef128[0] & Undef128[1]; 1539 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 1540 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 1541 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 1542 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 1543 1544 if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) { 1545 if (MinSplatBits < 64) { 1546 1547 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 1548 // undefs. 1549 if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) { 1550 if (MinSplatBits < 32) { 1551 1552 // If the top 16-bits are different than the lower 16-bits, ignoring 1553 // undefs, we have an i32 splat. 1554 if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) { 1555 if (MinSplatBits < 16) { 1556 // If the top 8-bits are different than the lower 8-bits, ignoring 1557 // undefs, we have an i16 splat. 1558 if ((Bits16 & (uint16_t(~Undef16) >> 8)) 1559 == ((Bits16 >> 8) & ~Undef16)) { 1560 // Otherwise, we have an 8-bit splat. 1561 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 1562 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 1563 SplatSize = 1; 1564 return true; 1565 } 1566 } else { 1567 SplatBits = Bits16; 1568 SplatUndef = Undef16; 1569 SplatSize = 2; 1570 return true; 1571 } 1572 } 1573 } else { 1574 SplatBits = Bits32; 1575 SplatUndef = Undef32; 1576 SplatSize = 4; 1577 return true; 1578 } 1579 } 1580 } else { 1581 SplatBits = Bits128[0]; 1582 SplatUndef = Undef128[0]; 1583 SplatSize = 8; 1584 return true; 1585 } 1586 } 1587 1588 return false; // Can't be a splat if two pieces don't match. 1589} 1590 1591// If this is a case we can't handle, return null and let the default 1592// expansion code take care of it. If we CAN select this case, and if it 1593// selects to a single instruction, return Op. Otherwise, if we can codegen 1594// this case more efficiently than a constant pool load, lower it to the 1595// sequence of ops that should be used. 1596static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 1597 MVT VT = Op.getValueType(); 1598 // If this is a vector of constants or undefs, get the bits. A bit in 1599 // UndefBits is set if the corresponding element of the vector is an 1600 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 1601 // zero. 1602 uint64_t VectorBits[2]; 1603 uint64_t UndefBits[2]; 1604 uint64_t SplatBits, SplatUndef; 1605 int SplatSize; 1606 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits) 1607 || !isConstantSplat(VectorBits, UndefBits, 1608 VT.getVectorElementType().getSizeInBits(), 1609 SplatBits, SplatUndef, SplatSize)) 1610 return SDValue(); // Not a constant vector, not a splat. 1611 1612 switch (VT.getSimpleVT()) { 1613 default: 1614 case MVT::v4f32: { 1615 uint32_t Value32 = SplatBits; 1616 assert(SplatSize == 4 1617 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); 1618 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1619 SDValue T = DAG.getConstant(Value32, MVT::i32); 1620 return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32, 1621 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T)); 1622 break; 1623 } 1624 case MVT::v2f64: { 1625 uint64_t f64val = SplatBits; 1626 assert(SplatSize == 8 1627 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); 1628 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1629 SDValue T = DAG.getConstant(f64val, MVT::i64); 1630 return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, 1631 DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T)); 1632 break; 1633 } 1634 case MVT::v16i8: { 1635 // 8-bit constants have to be expanded to 16-bits 1636 unsigned short Value16 = SplatBits | (SplatBits << 8); 1637 SDValue Ops[8]; 1638 for (int i = 0; i < 8; ++i) 1639 Ops[i] = DAG.getConstant(Value16, MVT::i16); 1640 return DAG.getNode(ISD::BIT_CONVERT, VT, 1641 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8)); 1642 } 1643 case MVT::v8i16: { 1644 unsigned short Value16; 1645 if (SplatSize == 2) 1646 Value16 = (unsigned short) (SplatBits & 0xffff); 1647 else 1648 Value16 = (unsigned short) (SplatBits | (SplatBits << 8)); 1649 SDValue T = DAG.getConstant(Value16, VT.getVectorElementType()); 1650 SDValue Ops[8]; 1651 for (int i = 0; i < 8; ++i) Ops[i] = T; 1652 return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8); 1653 } 1654 case MVT::v4i32: { 1655 unsigned int Value = SplatBits; 1656 SDValue T = DAG.getConstant(Value, VT.getVectorElementType()); 1657 return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T); 1658 } 1659 case MVT::v2i64: { 1660 uint64_t val = SplatBits; 1661 uint32_t upper = uint32_t(val >> 32); 1662 uint32_t lower = uint32_t(val); 1663 1664 if (upper == lower) { 1665 // Magic constant that can be matched by IL, ILA, et. al. 1666 SDValue Val = DAG.getTargetConstant(val, MVT::i64); 1667 return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val); 1668 } else { 1669 SDValue LO32; 1670 SDValue HI32; 1671 SmallVector<SDValue, 16> ShufBytes; 1672 SDValue Result; 1673 bool upper_special, lower_special; 1674 1675 // NOTE: This code creates common-case shuffle masks that can be easily 1676 // detected as common expressions. It is not attempting to create highly 1677 // specialized masks to replace any and all 0's, 0xff's and 0x80's. 1678 1679 // Detect if the upper or lower half is a special shuffle mask pattern: 1680 upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000); 1681 lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000); 1682 1683 // Create lower vector if not a special pattern 1684 if (!lower_special) { 1685 SDValue LO32C = DAG.getConstant(lower, MVT::i32); 1686 LO32 = DAG.getNode(ISD::BIT_CONVERT, VT, 1687 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, 1688 LO32C, LO32C, LO32C, LO32C)); 1689 } 1690 1691 // Create upper vector if not a special pattern 1692 if (!upper_special) { 1693 SDValue HI32C = DAG.getConstant(upper, MVT::i32); 1694 HI32 = DAG.getNode(ISD::BIT_CONVERT, VT, 1695 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, 1696 HI32C, HI32C, HI32C, HI32C)); 1697 } 1698 1699 // If either upper or lower are special, then the two input operands are 1700 // the same (basically, one of them is a "don't care") 1701 if (lower_special) 1702 LO32 = HI32; 1703 if (upper_special) 1704 HI32 = LO32; 1705 if (lower_special && upper_special) { 1706 // Unhappy situation... both upper and lower are special, so punt with 1707 // a target constant: 1708 SDValue Zero = DAG.getConstant(0, MVT::i32); 1709 HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero, 1710 Zero, Zero); 1711 } 1712 1713 for (int i = 0; i < 4; ++i) { 1714 uint64_t val = 0; 1715 for (int j = 0; j < 4; ++j) { 1716 SDValue V; 1717 bool process_upper, process_lower; 1718 val <<= 8; 1719 process_upper = (upper_special && (i & 1) == 0); 1720 process_lower = (lower_special && (i & 1) == 1); 1721 1722 if (process_upper || process_lower) { 1723 if ((process_upper && upper == 0) 1724 || (process_lower && lower == 0)) 1725 val |= 0x80; 1726 else if ((process_upper && upper == 0xffffffff) 1727 || (process_lower && lower == 0xffffffff)) 1728 val |= 0xc0; 1729 else if ((process_upper && upper == 0x80000000) 1730 || (process_lower && lower == 0x80000000)) 1731 val |= (j == 0 ? 0xe0 : 0x80); 1732 } else 1733 val |= i * 4 + j + ((i & 1) * 16); 1734 } 1735 1736 ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); 1737 } 1738 1739 return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32, 1740 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, 1741 &ShufBytes[0], ShufBytes.size())); 1742 } 1743 } 1744 } 1745 1746 return SDValue(); 1747} 1748 1749/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on 1750/// which the Cell can operate. The code inspects V3 to ascertain whether the 1751/// permutation vector, V3, is monotonically increasing with one "exception" 1752/// element, e.g., (0, 1, _, 3). If this is the case, then generate a 1753/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. 1754/// In either case, the net result is going to eventually invoke SHUFB to 1755/// permute/shuffle the bytes from V1 and V2. 1756/// \note 1757/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate 1758/// control word for byte/halfword/word insertion. This takes care of a single 1759/// element move from V2 into V1. 1760/// \note 1761/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. 1762static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 1763 SDValue V1 = Op.getOperand(0); 1764 SDValue V2 = Op.getOperand(1); 1765 SDValue PermMask = Op.getOperand(2); 1766 1767 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 1768 1769 // If we have a single element being moved from V1 to V2, this can be handled 1770 // using the C*[DX] compute mask instructions, but the vector elements have 1771 // to be monotonically increasing with one exception element. 1772 MVT VecVT = V1.getValueType(); 1773 MVT EltVT = VecVT.getVectorElementType(); 1774 unsigned EltsFromV2 = 0; 1775 unsigned V2Elt = 0; 1776 unsigned V2EltIdx0 = 0; 1777 unsigned CurrElt = 0; 1778 unsigned MaxElts = VecVT.getVectorNumElements(); 1779 unsigned PrevElt = 0; 1780 unsigned V0Elt = 0; 1781 bool monotonic = true; 1782 bool rotate = true; 1783 1784 if (EltVT == MVT::i8) { 1785 V2EltIdx0 = 16; 1786 } else if (EltVT == MVT::i16) { 1787 V2EltIdx0 = 8; 1788 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { 1789 V2EltIdx0 = 4; 1790 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { 1791 V2EltIdx0 = 2; 1792 } else 1793 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE"); 1794 1795 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) { 1796 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) { 1797 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue(); 1798 1799 if (monotonic) { 1800 if (SrcElt >= V2EltIdx0) { 1801 if (1 >= (++EltsFromV2)) { 1802 V2Elt = (V2EltIdx0 - SrcElt) << 2; 1803 } 1804 } else if (CurrElt != SrcElt) { 1805 monotonic = false; 1806 } 1807 1808 ++CurrElt; 1809 } 1810 1811 if (rotate) { 1812 if (PrevElt > 0 && SrcElt < MaxElts) { 1813 if ((PrevElt == SrcElt - 1) 1814 || (PrevElt == MaxElts - 1 && SrcElt == 0)) { 1815 PrevElt = SrcElt; 1816 if (SrcElt == 0) 1817 V0Elt = i; 1818 } else { 1819 rotate = false; 1820 } 1821 } else if (PrevElt == 0) { 1822 // First time through, need to keep track of previous element 1823 PrevElt = SrcElt; 1824 } else { 1825 // This isn't a rotation, takes elements from vector 2 1826 rotate = false; 1827 } 1828 } 1829 } 1830 } 1831 1832 if (EltsFromV2 == 1 && monotonic) { 1833 // Compute mask and shuffle 1834 MachineFunction &MF = DAG.getMachineFunction(); 1835 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1836 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 1837 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1838 // Initialize temporary register to 0 1839 SDValue InitTempReg = 1840 DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT)); 1841 // Copy register's contents as index in SHUFFLE_MASK: 1842 SDValue ShufMaskOp = 1843 DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32, 1844 DAG.getTargetConstant(V2Elt, MVT::i32), 1845 DAG.getCopyFromReg(InitTempReg, VReg, PtrVT)); 1846 // Use shuffle mask in SHUFB synthetic instruction: 1847 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp); 1848 } else if (rotate) { 1849 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; 1850 1851 return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(), 1852 V1, DAG.getConstant(rotamt, MVT::i16)); 1853 } else { 1854 // Convert the SHUFFLE_VECTOR mask's input element units to the 1855 // actual bytes. 1856 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 1857 1858 SmallVector<SDValue, 16> ResultMask; 1859 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 1860 unsigned SrcElt; 1861 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 1862 SrcElt = 0; 1863 else 1864 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue(); 1865 1866 for (unsigned j = 0; j < BytesPerElement; ++j) { 1867 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 1868 MVT::i8)); 1869 } 1870 } 1871 1872 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, 1873 &ResultMask[0], ResultMask.size()); 1874 return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask); 1875 } 1876} 1877 1878static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 1879 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar 1880 1881 if (Op0.getNode()->getOpcode() == ISD::Constant) { 1882 // For a constant, build the appropriate constant vector, which will 1883 // eventually simplify to a vector register load. 1884 1885 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode()); 1886 SmallVector<SDValue, 16> ConstVecValues; 1887 MVT VT; 1888 size_t n_copies; 1889 1890 // Create a constant vector: 1891 switch (Op.getValueType().getSimpleVT()) { 1892 default: assert(0 && "Unexpected constant value type in " 1893 "LowerSCALAR_TO_VECTOR"); 1894 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; 1895 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; 1896 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; 1897 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; 1898 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; 1899 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; 1900 } 1901 1902 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); 1903 for (size_t j = 0; j < n_copies; ++j) 1904 ConstVecValues.push_back(CValue); 1905 1906 return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(), 1907 &ConstVecValues[0], ConstVecValues.size()); 1908 } else { 1909 // Otherwise, copy the value from one register to another: 1910 switch (Op0.getValueType().getSimpleVT()) { 1911 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR"); 1912 case MVT::i8: 1913 case MVT::i16: 1914 case MVT::i32: 1915 case MVT::i64: 1916 case MVT::f32: 1917 case MVT::f64: 1918 return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0); 1919 } 1920 } 1921 1922 return SDValue(); 1923} 1924 1925static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) { 1926 switch (Op.getValueType().getSimpleVT()) { 1927 default: 1928 cerr << "CellSPU: Unknown vector multiplication, got " 1929 << Op.getValueType().getMVTString() 1930 << "\n"; 1931 abort(); 1932 /*NOTREACHED*/ 1933 1934 case MVT::v4i32: 1935 break; 1936 1937 // Multiply two v8i16 vectors (pipeline friendly version): 1938 // a) multiply lower halves, mask off upper 16-bit of 32-bit product 1939 // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes) 1940 // c) Use SELB to select upper and lower halves from the intermediate results 1941 // 1942 // NOTE: We really want to move the SELECT_MASK to earlier to actually get the 1943 // dual-issue. This code does manage to do this, even if it's a little on 1944 // the wacky side 1945 case MVT::v8i16: { 1946 MachineFunction &MF = DAG.getMachineFunction(); 1947 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1948 SDValue Chain = Op.getOperand(0); 1949 SDValue rA = Op.getOperand(0); 1950 SDValue rB = Op.getOperand(1); 1951 unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); 1952 unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); 1953 1954 SDValue FSMBOp = 1955 DAG.getCopyToReg(Chain, FSMBIreg, 1956 DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, 1957 DAG.getConstant(0xcccc, MVT::i16))); 1958 1959 SDValue HHProd = 1960 DAG.getCopyToReg(FSMBOp, HiProdReg, 1961 DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB)); 1962 1963 SDValue HHProd_v4i32 = 1964 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, 1965 DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32)); 1966 1967 return DAG.getNode(SPUISD::SELB, MVT::v8i16, 1968 DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB), 1969 DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), 1970 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, 1971 HHProd_v4i32, 1972 DAG.getConstant(16, MVT::i16))), 1973 DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32)); 1974 } 1975 1976 // This M00sE is N@stI! (apologies to Monty Python) 1977 // 1978 // SPU doesn't know how to do any 8-bit multiplication, so the solution 1979 // is to break it all apart, sign extend, and reassemble the various 1980 // intermediate products. 1981 case MVT::v16i8: { 1982 SDValue rA = Op.getOperand(0); 1983 SDValue rB = Op.getOperand(1); 1984 SDValue c8 = DAG.getConstant(8, MVT::i32); 1985 SDValue c16 = DAG.getConstant(16, MVT::i32); 1986 1987 SDValue LLProd = 1988 DAG.getNode(SPUISD::MPY, MVT::v8i16, 1989 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA), 1990 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB)); 1991 1992 SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8); 1993 1994 SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8); 1995 1996 SDValue LHProd = 1997 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, 1998 DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8); 1999 2000 SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16, 2001 DAG.getConstant(0x2222, MVT::i16)); 2002 2003 SDValue LoProdParts = 2004 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, 2005 DAG.getNode(SPUISD::SELB, MVT::v8i16, 2006 LLProd, LHProd, FSMBmask)); 2007 2008 SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32); 2009 2010 SDValue LoProd = 2011 DAG.getNode(ISD::AND, MVT::v4i32, 2012 LoProdParts, 2013 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, 2014 LoProdMask, LoProdMask, 2015 LoProdMask, LoProdMask)); 2016 2017 SDValue rAH = 2018 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, 2019 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16); 2020 2021 SDValue rBH = 2022 DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, 2023 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16); 2024 2025 SDValue HLProd = 2026 DAG.getNode(SPUISD::MPY, MVT::v8i16, 2027 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH), 2028 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH)); 2029 2030 SDValue HHProd_1 = 2031 DAG.getNode(SPUISD::MPY, MVT::v8i16, 2032 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, 2033 DAG.getNode(SPUISD::VEC_SRA, 2034 MVT::v4i32, rAH, c8)), 2035 DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, 2036 DAG.getNode(SPUISD::VEC_SRA, 2037 MVT::v4i32, rBH, c8))); 2038 2039 SDValue HHProd = 2040 DAG.getNode(SPUISD::SELB, MVT::v8i16, 2041 HLProd, 2042 DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8), 2043 FSMBmask); 2044 2045 SDValue HiProd = 2046 DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16); 2047 2048 return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, 2049 DAG.getNode(ISD::OR, MVT::v4i32, 2050 LoProd, HiProd)); 2051 } 2052 } 2053 2054 return SDValue(); 2055} 2056 2057static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) { 2058 MachineFunction &MF = DAG.getMachineFunction(); 2059 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2060 2061 SDValue A = Op.getOperand(0); 2062 SDValue B = Op.getOperand(1); 2063 MVT VT = Op.getValueType(); 2064 2065 unsigned VRegBR, VRegC; 2066 2067 if (VT == MVT::f32) { 2068 VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); 2069 VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass); 2070 } else { 2071 VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); 2072 VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass); 2073 } 2074 // TODO: make sure we're feeding FPInterp the right arguments 2075 // Right now: fi B, frest(B) 2076 2077 // Computes BRcpl = 2078 // (Floating Interpolate (FP Reciprocal Estimate B)) 2079 SDValue BRcpl = 2080 DAG.getCopyToReg(DAG.getEntryNode(), VRegBR, 2081 DAG.getNode(SPUISD::FPInterp, VT, B, 2082 DAG.getNode(SPUISD::FPRecipEst, VT, B))); 2083 2084 // Computes A * BRcpl and stores in a temporary register 2085 SDValue AxBRcpl = 2086 DAG.getCopyToReg(BRcpl, VRegC, 2087 DAG.getNode(ISD::FMUL, VT, A, 2088 DAG.getCopyFromReg(BRcpl, VRegBR, VT))); 2089 // What's the Chain variable do? It's magic! 2090 // TODO: set Chain = Op(0).getEntryNode() 2091 2092 return DAG.getNode(ISD::FADD, VT, 2093 DAG.getCopyFromReg(AxBRcpl, VRegC, VT), 2094 DAG.getNode(ISD::FMUL, VT, 2095 DAG.getCopyFromReg(AxBRcpl, VRegBR, VT), 2096 DAG.getNode(ISD::FSUB, VT, A, 2097 DAG.getNode(ISD::FMUL, VT, B, 2098 DAG.getCopyFromReg(AxBRcpl, VRegC, VT))))); 2099} 2100 2101static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 2102 MVT VT = Op.getValueType(); 2103 SDValue N = Op.getOperand(0); 2104 SDValue Elt = Op.getOperand(1); 2105 SDValue retval; 2106 2107 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 2108 // Constant argument: 2109 int EltNo = (int) C->getZExtValue(); 2110 2111 // sanity checks: 2112 if (VT == MVT::i8 && EltNo >= 16) 2113 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); 2114 else if (VT == MVT::i16 && EltNo >= 8) 2115 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); 2116 else if (VT == MVT::i32 && EltNo >= 4) 2117 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); 2118 else if (VT == MVT::i64 && EltNo >= 2) 2119 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); 2120 2121 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { 2122 // i32 and i64: Element 0 is the preferred slot 2123 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N); 2124 } 2125 2126 // Need to generate shuffle mask and extract: 2127 int prefslot_begin = -1, prefslot_end = -1; 2128 int elt_byte = EltNo * VT.getSizeInBits() / 8; 2129 2130 switch (VT.getSimpleVT()) { 2131 default: 2132 assert(false && "Invalid value type!"); 2133 case MVT::i8: { 2134 prefslot_begin = prefslot_end = 3; 2135 break; 2136 } 2137 case MVT::i16: { 2138 prefslot_begin = 2; prefslot_end = 3; 2139 break; 2140 } 2141 case MVT::i32: 2142 case MVT::f32: { 2143 prefslot_begin = 0; prefslot_end = 3; 2144 break; 2145 } 2146 case MVT::i64: 2147 case MVT::f64: { 2148 prefslot_begin = 0; prefslot_end = 7; 2149 break; 2150 } 2151 } 2152 2153 assert(prefslot_begin != -1 && prefslot_end != -1 && 2154 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); 2155 2156 unsigned int ShufBytes[16]; 2157 for (int i = 0; i < 16; ++i) { 2158 // zero fill uppper part of preferred slot, don't care about the 2159 // other slots: 2160 unsigned int mask_val; 2161 if (i <= prefslot_end) { 2162 mask_val = 2163 ((i < prefslot_begin) 2164 ? 0x80 2165 : elt_byte + (i - prefslot_begin)); 2166 2167 ShufBytes[i] = mask_val; 2168 } else 2169 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; 2170 } 2171 2172 SDValue ShufMask[4]; 2173 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { 2174 unsigned bidx = i * 4; 2175 unsigned int bits = ((ShufBytes[bidx] << 24) | 2176 (ShufBytes[bidx+1] << 16) | 2177 (ShufBytes[bidx+2] << 8) | 2178 ShufBytes[bidx+3]); 2179 ShufMask[i] = DAG.getConstant(bits, MVT::i32); 2180 } 2181 2182 SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, 2183 &ShufMask[0], 2184 sizeof(ShufMask) / sizeof(ShufMask[0])); 2185 2186 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT, 2187 DAG.getNode(SPUISD::SHUFB, N.getValueType(), 2188 N, N, ShufMaskVec)); 2189 } else { 2190 // Variable index: Rotate the requested element into slot 0, then replicate 2191 // slot 0 across the vector 2192 MVT VecVT = N.getValueType(); 2193 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { 2194 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n"; 2195 abort(); 2196 } 2197 2198 // Make life easier by making sure the index is zero-extended to i32 2199 if (Elt.getValueType() != MVT::i32) 2200 Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt); 2201 2202 // Scale the index to a bit/byte shift quantity 2203 APInt scaleFactor = 2204 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); 2205 unsigned scaleShift = scaleFactor.logBase2(); 2206 SDValue vecShift; 2207 2208 if (scaleShift > 0) { 2209 // Scale the shift factor: 2210 Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt, 2211 DAG.getConstant(scaleShift, MVT::i32)); 2212 } 2213 2214 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt); 2215 2216 // Replicate the bytes starting at byte 0 across the entire vector (for 2217 // consistency with the notion of a unified register set) 2218 SDValue replicate; 2219 2220 switch (VT.getSimpleVT()) { 2221 default: 2222 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n"; 2223 abort(); 2224 /*NOTREACHED*/ 2225 case MVT::i8: { 2226 SDValue factor = DAG.getConstant(0x00000000, MVT::i32); 2227 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, 2228 factor, factor); 2229 break; 2230 } 2231 case MVT::i16: { 2232 SDValue factor = DAG.getConstant(0x00010001, MVT::i32); 2233 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, 2234 factor, factor); 2235 break; 2236 } 2237 case MVT::i32: 2238 case MVT::f32: { 2239 SDValue factor = DAG.getConstant(0x00010203, MVT::i32); 2240 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor, 2241 factor, factor); 2242 break; 2243 } 2244 case MVT::i64: 2245 case MVT::f64: { 2246 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); 2247 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); 2248 replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor, 2249 loFactor, hiFactor); 2250 break; 2251 } 2252 } 2253 2254 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT, 2255 DAG.getNode(SPUISD::SHUFB, VecVT, 2256 vecShift, vecShift, replicate)); 2257 } 2258 2259 return retval; 2260} 2261 2262static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 2263 SDValue VecOp = Op.getOperand(0); 2264 SDValue ValOp = Op.getOperand(1); 2265 SDValue IdxOp = Op.getOperand(2); 2266 MVT VT = Op.getValueType(); 2267 2268 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); 2269 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); 2270 2271 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2272 // Use $sp ($1) because it's always 16-byte aligned and it's available: 2273 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT, 2274 DAG.getRegister(SPU::R1, PtrVT), 2275 DAG.getConstant(CN->getSExtValue(), PtrVT)); 2276 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer); 2277 2278 SDValue result = 2279 DAG.getNode(SPUISD::SHUFB, VT, 2280 DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp), 2281 VecOp, 2282 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask)); 2283 2284 return result; 2285} 2286 2287static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, 2288 const TargetLowering &TLI) 2289{ 2290 SDValue N0 = Op.getOperand(0); // Everything has at least one operand 2291 MVT ShiftVT = TLI.getShiftAmountTy(); 2292 2293 assert(Op.getValueType() == MVT::i8); 2294 switch (Opc) { 2295 default: 2296 assert(0 && "Unhandled i8 math operator"); 2297 /*NOTREACHED*/ 2298 break; 2299 case ISD::SUB: { 2300 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate 2301 // the result: 2302 SDValue N1 = Op.getOperand(1); 2303 N0 = (N0.getOpcode() != ISD::Constant 2304 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) 2305 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(), 2306 MVT::i16)); 2307 N1 = (N1.getOpcode() != ISD::Constant 2308 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1) 2309 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(), 2310 MVT::i16)); 2311 return DAG.getNode(ISD::TRUNCATE, MVT::i8, 2312 DAG.getNode(Opc, MVT::i16, N0, N1)); 2313 } 2314 case ISD::ROTR: 2315 case ISD::ROTL: { 2316 SDValue N1 = Op.getOperand(1); 2317 unsigned N1Opc; 2318 N0 = (N0.getOpcode() != ISD::Constant 2319 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) 2320 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(), 2321 MVT::i16)); 2322 N1Opc = N1.getValueType().bitsLT(ShiftVT) 2323 ? ISD::ZERO_EXTEND 2324 : ISD::TRUNCATE; 2325 N1 = (N1.getOpcode() != ISD::Constant 2326 ? DAG.getNode(N1Opc, ShiftVT, N1) 2327 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 2328 TLI.getShiftAmountTy())); 2329 SDValue ExpandArg = 2330 DAG.getNode(ISD::OR, MVT::i16, N0, 2331 DAG.getNode(ISD::SHL, MVT::i16, 2332 N0, DAG.getConstant(8, MVT::i32))); 2333 return DAG.getNode(ISD::TRUNCATE, MVT::i8, 2334 DAG.getNode(Opc, MVT::i16, ExpandArg, N1)); 2335 } 2336 case ISD::SRL: 2337 case ISD::SHL: { 2338 SDValue N1 = Op.getOperand(1); 2339 unsigned N1Opc; 2340 N0 = (N0.getOpcode() != ISD::Constant 2341 ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0) 2342 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(), 2343 MVT::i32)); 2344 N1Opc = N1.getValueType().bitsLT(ShiftVT) 2345 ? ISD::ZERO_EXTEND 2346 : ISD::TRUNCATE; 2347 N1 = (N1.getOpcode() != ISD::Constant 2348 ? DAG.getNode(N1Opc, ShiftVT, N1) 2349 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT)); 2350 return DAG.getNode(ISD::TRUNCATE, MVT::i8, 2351 DAG.getNode(Opc, MVT::i16, N0, N1)); 2352 } 2353 case ISD::SRA: { 2354 SDValue N1 = Op.getOperand(1); 2355 unsigned N1Opc; 2356 N0 = (N0.getOpcode() != ISD::Constant 2357 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) 2358 : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(), 2359 MVT::i16)); 2360 N1Opc = N1.getValueType().bitsLT(ShiftVT) 2361 ? ISD::SIGN_EXTEND 2362 : ISD::TRUNCATE; 2363 N1 = (N1.getOpcode() != ISD::Constant 2364 ? DAG.getNode(N1Opc, ShiftVT, N1) 2365 : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 2366 ShiftVT)); 2367 return DAG.getNode(ISD::TRUNCATE, MVT::i8, 2368 DAG.getNode(Opc, MVT::i16, N0, N1)); 2369 } 2370 case ISD::MUL: { 2371 SDValue N1 = Op.getOperand(1); 2372 unsigned N1Opc; 2373 N0 = (N0.getOpcode() != ISD::Constant 2374 ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0) 2375 : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(), 2376 MVT::i16)); 2377 N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE; 2378 N1 = (N1.getOpcode() != ISD::Constant 2379 ? DAG.getNode(N1Opc, MVT::i16, N1) 2380 : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(), 2381 MVT::i16)); 2382 return DAG.getNode(ISD::TRUNCATE, MVT::i8, 2383 DAG.getNode(Opc, MVT::i16, N0, N1)); 2384 break; 2385 } 2386 } 2387 2388 return SDValue(); 2389} 2390 2391static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc) 2392{ 2393 MVT VT = Op.getValueType(); 2394 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); 2395 2396 SDValue Op0 = Op.getOperand(0); 2397 2398 switch (Opc) { 2399 case ISD::ZERO_EXTEND: 2400 case ISD::SIGN_EXTEND: 2401 case ISD::ANY_EXTEND: { 2402 MVT Op0VT = Op0.getValueType(); 2403 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); 2404 2405 assert(Op0VT == MVT::i32 2406 && "CellSPU: Zero/sign extending something other than i32"); 2407 2408 DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n"); 2409 2410 SDValue PromoteScalar = 2411 DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); 2412 2413 if (Opc != ISD::SIGN_EXTEND) { 2414 // Use a shuffle to zero extend the i32 to i64 directly: 2415 SDValue shufMask = 2416 DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT, 2417 DAG.getConstant(0x80808080, MVT::i32), 2418 DAG.getConstant(0x00010203, MVT::i32), 2419 DAG.getConstant(0x80808080, MVT::i32), 2420 DAG.getConstant(0x08090a0b, MVT::i32)); 2421 SDValue zextShuffle = 2422 DAG.getNode(SPUISD::SHUFB, Op0VecVT, 2423 PromoteScalar, PromoteScalar, shufMask); 2424 2425 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, 2426 DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle)); 2427 } else { 2428 // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift 2429 // right and propagate the sign bit) instruction. 2430 SDValue RotQuad = 2431 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT, 2432 PromoteScalar, DAG.getConstant(4, MVT::i32)); 2433 SDValue SignQuad = 2434 DAG.getNode(SPUISD::VEC_SRA, Op0VecVT, 2435 PromoteScalar, DAG.getConstant(32, MVT::i32)); 2436 SDValue SelMask = 2437 DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT, 2438 DAG.getConstant(0xf0f0, MVT::i16)); 2439 SDValue CombineQuad = 2440 DAG.getNode(SPUISD::SELB, Op0VecVT, 2441 SignQuad, RotQuad, SelMask); 2442 2443 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, 2444 DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad)); 2445 } 2446 } 2447 2448 case ISD::ADD: { 2449 // Turn operands into vectors to satisfy type checking (shufb works on 2450 // vectors) 2451 SDValue Op0 = 2452 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); 2453 SDValue Op1 = 2454 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1)); 2455 SmallVector<SDValue, 16> ShufBytes; 2456 2457 // Create the shuffle mask for "rotating" the borrow up one register slot 2458 // once the borrow is generated. 2459 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); 2460 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); 2461 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); 2462 ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32)); 2463 2464 SDValue CarryGen = 2465 DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1); 2466 SDValue ShiftedCarry = 2467 DAG.getNode(SPUISD::SHUFB, MVT::v2i64, 2468 CarryGen, CarryGen, 2469 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, 2470 &ShufBytes[0], ShufBytes.size())); 2471 2472 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, 2473 DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64, 2474 Op0, Op1, ShiftedCarry)); 2475 } 2476 2477 case ISD::SUB: { 2478 // Turn operands into vectors to satisfy type checking (shufb works on 2479 // vectors) 2480 SDValue Op0 = 2481 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); 2482 SDValue Op1 = 2483 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1)); 2484 SmallVector<SDValue, 16> ShufBytes; 2485 2486 // Create the shuffle mask for "rotating" the borrow up one register slot 2487 // once the borrow is generated. 2488 ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32)); 2489 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); 2490 ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32)); 2491 ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32)); 2492 2493 SDValue BorrowGen = 2494 DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1); 2495 SDValue ShiftedBorrow = 2496 DAG.getNode(SPUISD::SHUFB, MVT::v2i64, 2497 BorrowGen, BorrowGen, 2498 DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, 2499 &ShufBytes[0], ShufBytes.size())); 2500 2501 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, 2502 DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64, 2503 Op0, Op1, ShiftedBorrow)); 2504 } 2505 2506 case ISD::SHL: { 2507 SDValue ShiftAmt = Op.getOperand(1); 2508 MVT ShiftAmtVT = ShiftAmt.getValueType(); 2509 SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0); 2510 SDValue MaskLower = 2511 DAG.getNode(SPUISD::SELB, VecVT, 2512 Op0Vec, 2513 DAG.getConstant(0, VecVT), 2514 DAG.getNode(SPUISD::SELECT_MASK, VecVT, 2515 DAG.getConstant(0xff00ULL, MVT::i16))); 2516 SDValue ShiftAmtBytes = 2517 DAG.getNode(ISD::SRL, ShiftAmtVT, 2518 ShiftAmt, 2519 DAG.getConstant(3, ShiftAmtVT)); 2520 SDValue ShiftAmtBits = 2521 DAG.getNode(ISD::AND, ShiftAmtVT, 2522 ShiftAmt, 2523 DAG.getConstant(7, ShiftAmtVT)); 2524 2525 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, 2526 DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT, 2527 DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, 2528 MaskLower, ShiftAmtBytes), 2529 ShiftAmtBits)); 2530 } 2531 2532 case ISD::SRL: { 2533 MVT VT = Op.getValueType(); 2534 SDValue ShiftAmt = Op.getOperand(1); 2535 MVT ShiftAmtVT = ShiftAmt.getValueType(); 2536 SDValue ShiftAmtBytes = 2537 DAG.getNode(ISD::SRL, ShiftAmtVT, 2538 ShiftAmt, 2539 DAG.getConstant(3, ShiftAmtVT)); 2540 SDValue ShiftAmtBits = 2541 DAG.getNode(ISD::AND, ShiftAmtVT, 2542 ShiftAmt, 2543 DAG.getConstant(7, ShiftAmtVT)); 2544 2545 return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT, 2546 DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT, 2547 Op0, ShiftAmtBytes), 2548 ShiftAmtBits); 2549 } 2550 2551 case ISD::SRA: { 2552 // Promote Op0 to vector 2553 SDValue Op0 = 2554 DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0)); 2555 SDValue ShiftAmt = Op.getOperand(1); 2556 MVT ShiftVT = ShiftAmt.getValueType(); 2557 2558 // Negate variable shift amounts 2559 if (!isa<ConstantSDNode>(ShiftAmt)) { 2560 ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT, 2561 DAG.getConstant(0, ShiftVT), ShiftAmt); 2562 } 2563 2564 SDValue UpperHalfSign = 2565 DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32, 2566 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, 2567 DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64, 2568 Op0, DAG.getConstant(31, MVT::i32)))); 2569 SDValue UpperHalfSignMask = 2570 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign); 2571 SDValue UpperLowerMask = 2572 DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, 2573 DAG.getConstant(0xff00, MVT::i16)); 2574 SDValue UpperLowerSelect = 2575 DAG.getNode(SPUISD::SELB, MVT::v2i64, 2576 UpperHalfSignMask, Op0, UpperLowerMask); 2577 SDValue RotateLeftBytes = 2578 DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64, 2579 UpperLowerSelect, ShiftAmt); 2580 SDValue RotateLeftBits = 2581 DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64, 2582 RotateLeftBytes, ShiftAmt); 2583 2584 return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64, 2585 RotateLeftBits); 2586 } 2587 } 2588 2589 return SDValue(); 2590} 2591 2592//! Lower byte immediate operations for v16i8 vectors: 2593static SDValue 2594LowerByteImmed(SDValue Op, SelectionDAG &DAG) { 2595 SDValue ConstVec; 2596 SDValue Arg; 2597 MVT VT = Op.getValueType(); 2598 2599 ConstVec = Op.getOperand(0); 2600 Arg = Op.getOperand(1); 2601 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { 2602 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { 2603 ConstVec = ConstVec.getOperand(0); 2604 } else { 2605 ConstVec = Op.getOperand(1); 2606 Arg = Op.getOperand(0); 2607 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { 2608 ConstVec = ConstVec.getOperand(0); 2609 } 2610 } 2611 } 2612 2613 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { 2614 uint64_t VectorBits[2]; 2615 uint64_t UndefBits[2]; 2616 uint64_t SplatBits, SplatUndef; 2617 int SplatSize; 2618 2619 if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits) 2620 && isConstantSplat(VectorBits, UndefBits, 2621 VT.getVectorElementType().getSizeInBits(), 2622 SplatBits, SplatUndef, SplatSize)) { 2623 SDValue tcVec[16]; 2624 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); 2625 const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]); 2626 2627 // Turn the BUILD_VECTOR into a set of target constants: 2628 for (size_t i = 0; i < tcVecSize; ++i) 2629 tcVec[i] = tc; 2630 2631 return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg, 2632 DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize)); 2633 } 2634 } 2635 // These operations (AND, OR, XOR) are legal, they just couldn't be custom 2636 // lowered. Return the operation, rather than a null SDValue. 2637 return Op; 2638} 2639 2640//! Custom lowering for CTPOP (count population) 2641/*! 2642 Custom lowering code that counts the number ones in the input 2643 operand. SPU has such an instruction, but it counts the number of 2644 ones per byte, which then have to be accumulated. 2645*/ 2646static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { 2647 MVT VT = Op.getValueType(); 2648 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); 2649 2650 switch (VT.getSimpleVT()) { 2651 default: 2652 assert(false && "Invalid value type!"); 2653 case MVT::i8: { 2654 SDValue N = Op.getOperand(0); 2655 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2656 2657 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N); 2658 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); 2659 2660 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0); 2661 } 2662 2663 case MVT::i16: { 2664 MachineFunction &MF = DAG.getMachineFunction(); 2665 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2666 2667 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); 2668 2669 SDValue N = Op.getOperand(0); 2670 SDValue Elt0 = DAG.getConstant(0, MVT::i16); 2671 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); 2672 SDValue Shift1 = DAG.getConstant(8, MVT::i32); 2673 2674 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N); 2675 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); 2676 2677 // CNTB_result becomes the chain to which all of the virtual registers 2678 // CNTB_reg, SUM1_reg become associated: 2679 SDValue CNTB_result = 2680 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0); 2681 2682 SDValue CNTB_rescopy = 2683 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); 2684 2685 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16); 2686 2687 return DAG.getNode(ISD::AND, MVT::i16, 2688 DAG.getNode(ISD::ADD, MVT::i16, 2689 DAG.getNode(ISD::SRL, MVT::i16, 2690 Tmp1, Shift1), 2691 Tmp1), 2692 Mask0); 2693 } 2694 2695 case MVT::i32: { 2696 MachineFunction &MF = DAG.getMachineFunction(); 2697 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2698 2699 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2700 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2701 2702 SDValue N = Op.getOperand(0); 2703 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2704 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); 2705 SDValue Shift1 = DAG.getConstant(16, MVT::i32); 2706 SDValue Shift2 = DAG.getConstant(8, MVT::i32); 2707 2708 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N); 2709 SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote); 2710 2711 // CNTB_result becomes the chain to which all of the virtual registers 2712 // CNTB_reg, SUM1_reg become associated: 2713 SDValue CNTB_result = 2714 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0); 2715 2716 SDValue CNTB_rescopy = 2717 DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result); 2718 2719 SDValue Comp1 = 2720 DAG.getNode(ISD::SRL, MVT::i32, 2721 DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1); 2722 2723 SDValue Sum1 = 2724 DAG.getNode(ISD::ADD, MVT::i32, 2725 Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32)); 2726 2727 SDValue Sum1_rescopy = 2728 DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1); 2729 2730 SDValue Comp2 = 2731 DAG.getNode(ISD::SRL, MVT::i32, 2732 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32), 2733 Shift2); 2734 SDValue Sum2 = 2735 DAG.getNode(ISD::ADD, MVT::i32, Comp2, 2736 DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32)); 2737 2738 return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0); 2739 } 2740 2741 case MVT::i64: 2742 break; 2743 } 2744 2745 return SDValue(); 2746} 2747 2748//! Lower ISD::SETCC 2749/*! 2750 Lower i64 condition code handling. 2751 */ 2752 2753static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) { 2754 MVT VT = Op.getValueType(); 2755 SDValue lhs = Op.getOperand(0); 2756 SDValue rhs = Op.getOperand(1); 2757 SDValue condition = Op.getOperand(2); 2758 2759 if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) { 2760 // Expand the i64 comparisons to what Cell can actually support, 2761 // which is eq, ugt and sgt: 2762#if 0 2763 CondCodeSDNode *ccvalue = dyn_cast<CondCodeSDValue>(condition); 2764 2765 switch (ccvalue->get()) { 2766 case 2767 } 2768#endif 2769 } 2770 2771 return SDValue(); 2772} 2773 2774//! Lower ISD::SELECT_CC 2775/*! 2776 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the 2777 SELB instruction. 2778 2779 \note Need to revisit this in the future: if the code path through the true 2780 and false value computations is longer than the latency of a branch (6 2781 cycles), then it would be more advantageous to branch and insert a new basic 2782 block and branch on the condition. However, this code does not make that 2783 assumption, given the simplisitc uses so far. 2784 */ 2785 2786static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, 2787 const TargetLowering &TLI) { 2788 MVT VT = Op.getValueType(); 2789 SDValue lhs = Op.getOperand(0); 2790 SDValue rhs = Op.getOperand(1); 2791 SDValue trueval = Op.getOperand(2); 2792 SDValue falseval = Op.getOperand(3); 2793 SDValue condition = Op.getOperand(4); 2794 2795 // NOTE: SELB's arguments: $rA, $rB, $mask 2796 // 2797 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB 2798 // where bits in $mask are 1. CCond will be inverted, having 1s where the 2799 // condition was true and 0s where the condition was false. Hence, the 2800 // arguments to SELB get reversed. 2801 2802 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's 2803 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up 2804 // with another "cannot select select_cc" assert: 2805 2806 SDValue compare = DAG.getNode(ISD::SETCC, TLI.getSetCCResultType(Op), 2807 lhs, rhs, condition); 2808 return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare); 2809} 2810 2811//! Custom lower ISD::TRUNCATE 2812static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) 2813{ 2814 MVT VT = Op.getValueType(); 2815 MVT::SimpleValueType simpleVT = VT.getSimpleVT(); 2816 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())); 2817 2818 SDValue Op0 = Op.getOperand(0); 2819 MVT Op0VT = Op0.getValueType(); 2820 MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits())); 2821 2822 // Create shuffle mask 2823 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { 2824 // least significant doubleword of quadword 2825 unsigned maskHigh = 0x08090a0b; 2826 unsigned maskLow = 0x0c0d0e0f; 2827 // Use a shuffle to perform the truncation 2828 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, 2829 DAG.getConstant(maskHigh, MVT::i32), 2830 DAG.getConstant(maskLow, MVT::i32), 2831 DAG.getConstant(maskHigh, MVT::i32), 2832 DAG.getConstant(maskLow, MVT::i32)); 2833 2834 2835 SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0); 2836 2837 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, 2838 PromoteScalar, PromoteScalar, shufMask); 2839 2840 return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, 2841 DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle)); 2842 } 2843 2844 return SDValue(); // Leave the truncate unmolested 2845} 2846 2847//! Custom (target-specific) lowering entry point 2848/*! 2849 This is where LLVM's DAG selection process calls to do target-specific 2850 lowering of nodes. 2851 */ 2852SDValue 2853SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 2854{ 2855 unsigned Opc = (unsigned) Op.getOpcode(); 2856 MVT VT = Op.getValueType(); 2857 2858 switch (Opc) { 2859 default: { 2860 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; 2861 cerr << "Op.getOpcode() = " << Opc << "\n"; 2862 cerr << "*Op.getNode():\n"; 2863 Op.getNode()->dump(); 2864 abort(); 2865 } 2866 case ISD::LOAD: 2867 case ISD::EXTLOAD: 2868 case ISD::SEXTLOAD: 2869 case ISD::ZEXTLOAD: 2870 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); 2871 case ISD::STORE: 2872 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); 2873 case ISD::ConstantPool: 2874 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); 2875 case ISD::GlobalAddress: 2876 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); 2877 case ISD::JumpTable: 2878 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); 2879 case ISD::Constant: 2880 return LowerConstant(Op, DAG); 2881 case ISD::ConstantFP: 2882 return LowerConstantFP(Op, DAG); 2883 case ISD::BRCOND: 2884 return LowerBRCOND(Op, DAG, *this); 2885 case ISD::FORMAL_ARGUMENTS: 2886 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex); 2887 case ISD::CALL: 2888 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl()); 2889 case ISD::RET: 2890 return LowerRET(Op, DAG, getTargetMachine()); 2891 2892 2893 // i8, i64 math ops: 2894 case ISD::ZERO_EXTEND: 2895 case ISD::SIGN_EXTEND: 2896 case ISD::ANY_EXTEND: 2897 case ISD::ADD: 2898 case ISD::SUB: 2899 case ISD::ROTR: 2900 case ISD::ROTL: 2901 case ISD::SRL: 2902 case ISD::SHL: 2903 case ISD::SRA: { 2904 if (VT == MVT::i8) 2905 return LowerI8Math(Op, DAG, Opc, *this); 2906 else if (VT == MVT::i64) 2907 return LowerI64Math(Op, DAG, Opc); 2908 break; 2909 } 2910 2911 // Vector-related lowering. 2912 case ISD::BUILD_VECTOR: 2913 return LowerBUILD_VECTOR(Op, DAG); 2914 case ISD::SCALAR_TO_VECTOR: 2915 return LowerSCALAR_TO_VECTOR(Op, DAG); 2916 case ISD::VECTOR_SHUFFLE: 2917 return LowerVECTOR_SHUFFLE(Op, DAG); 2918 case ISD::EXTRACT_VECTOR_ELT: 2919 return LowerEXTRACT_VECTOR_ELT(Op, DAG); 2920 case ISD::INSERT_VECTOR_ELT: 2921 return LowerINSERT_VECTOR_ELT(Op, DAG); 2922 2923 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: 2924 case ISD::AND: 2925 case ISD::OR: 2926 case ISD::XOR: 2927 return LowerByteImmed(Op, DAG); 2928 2929 // Vector and i8 multiply: 2930 case ISD::MUL: 2931 if (VT.isVector()) 2932 return LowerVectorMUL(Op, DAG); 2933 else if (VT == MVT::i8) 2934 return LowerI8Math(Op, DAG, Opc, *this); 2935 2936 case ISD::FDIV: 2937 if (VT == MVT::f32 || VT == MVT::v4f32) 2938 return LowerFDIVf32(Op, DAG); 2939#if 0 2940 // This is probably a libcall 2941 else if (Op.getValueType() == MVT::f64) 2942 return LowerFDIVf64(Op, DAG); 2943#endif 2944 else 2945 assert(0 && "Calling FDIV on unsupported MVT"); 2946 2947 case ISD::CTPOP: 2948 return LowerCTPOP(Op, DAG); 2949 2950 case ISD::SELECT_CC: 2951 return LowerSELECT_CC(Op, DAG, *this); 2952 2953 case ISD::TRUNCATE: 2954 return LowerTRUNCATE(Op, DAG); 2955 2956 case ISD::SETCC: 2957 return LowerSETCC(Op, DAG); 2958 } 2959 2960 return SDValue(); 2961} 2962 2963void SPUTargetLowering::ReplaceNodeResults(SDNode *N, 2964 SmallVectorImpl<SDValue>&Results, 2965 SelectionDAG &DAG) 2966{ 2967#if 0 2968 unsigned Opc = (unsigned) N->getOpcode(); 2969 MVT OpVT = N->getValueType(0); 2970 2971 switch (Opc) { 2972 default: { 2973 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; 2974 cerr << "Op.getOpcode() = " << Opc << "\n"; 2975 cerr << "*Op.getNode():\n"; 2976 N->dump(); 2977 abort(); 2978 /*NOTREACHED*/ 2979 } 2980 } 2981#endif 2982 2983 /* Otherwise, return unchanged */ 2984} 2985 2986//===----------------------------------------------------------------------===// 2987// Target Optimization Hooks 2988//===----------------------------------------------------------------------===// 2989 2990SDValue 2991SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const 2992{ 2993#if 0 2994 TargetMachine &TM = getTargetMachine(); 2995#endif 2996 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 2997 SelectionDAG &DAG = DCI.DAG; 2998 SDValue Op0 = N->getOperand(0); // everything has at least one operand 2999 MVT NodeVT = N->getValueType(0); // The node's value type 3000 MVT Op0VT = Op0.getValueType(); // The first operand's result 3001 SDValue Result; // Initially, empty result 3002 3003 switch (N->getOpcode()) { 3004 default: break; 3005 case ISD::ADD: { 3006 SDValue Op1 = N->getOperand(1); 3007 3008 if (Op0.getOpcode() == SPUISD::IndirectAddr 3009 || Op1.getOpcode() == SPUISD::IndirectAddr) { 3010 // Normalize the operands to reduce repeated code 3011 SDValue IndirectArg = Op0, AddArg = Op1; 3012 3013 if (Op1.getOpcode() == SPUISD::IndirectAddr) { 3014 IndirectArg = Op1; 3015 AddArg = Op0; 3016 } 3017 3018 if (isa<ConstantSDNode>(AddArg)) { 3019 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg); 3020 SDValue IndOp1 = IndirectArg.getOperand(1); 3021 3022 if (CN0->isNullValue()) { 3023 // (add (SPUindirect <arg>, <arg>), 0) -> 3024 // (SPUindirect <arg>, <arg>) 3025 3026#if !defined(NDEBUG) 3027 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 3028 cerr << "\n" 3029 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n" 3030 << "With: (SPUindirect <arg>, <arg>)\n"; 3031 } 3032#endif 3033 3034 return IndirectArg; 3035 } else if (isa<ConstantSDNode>(IndOp1)) { 3036 // (add (SPUindirect <arg>, <const>), <const>) -> 3037 // (SPUindirect <arg>, <const + const>) 3038 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1); 3039 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); 3040 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); 3041 3042#if !defined(NDEBUG) 3043 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 3044 cerr << "\n" 3045 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue() 3046 << "), " << CN0->getSExtValue() << ")\n" 3047 << "With: (SPUindirect <arg>, " 3048 << combinedConst << ")\n"; 3049 } 3050#endif 3051 3052 return DAG.getNode(SPUISD::IndirectAddr, Op0VT, 3053 IndirectArg, combinedValue); 3054 } 3055 } 3056 } 3057 break; 3058 } 3059 case ISD::SIGN_EXTEND: 3060 case ISD::ZERO_EXTEND: 3061 case ISD::ANY_EXTEND: { 3062 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { 3063 // (any_extend (SPUextract_elt0 <arg>)) -> 3064 // (SPUextract_elt0 <arg>) 3065 // Types must match, however... 3066#if !defined(NDEBUG) 3067 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 3068 cerr << "\nReplace: "; 3069 N->dump(&DAG); 3070 cerr << "\nWith: "; 3071 Op0.getNode()->dump(&DAG); 3072 cerr << "\n"; 3073 } 3074#endif 3075 3076 return Op0; 3077 } 3078 break; 3079 } 3080 case SPUISD::IndirectAddr: { 3081 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { 3082 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1)); 3083 if (CN->getZExtValue() == 0) { 3084 // (SPUindirect (SPUaform <addr>, 0), 0) -> 3085 // (SPUaform <addr>, 0) 3086 3087 DEBUG(cerr << "Replace: "); 3088 DEBUG(N->dump(&DAG)); 3089 DEBUG(cerr << "\nWith: "); 3090 DEBUG(Op0.getNode()->dump(&DAG)); 3091 DEBUG(cerr << "\n"); 3092 3093 return Op0; 3094 } 3095 } else if (Op0.getOpcode() == ISD::ADD) { 3096 SDValue Op1 = N->getOperand(1); 3097 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) { 3098 // (SPUindirect (add <arg>, <arg>), 0) -> 3099 // (SPUindirect <arg>, <arg>) 3100 if (CN1->isNullValue()) { 3101 3102#if !defined(NDEBUG) 3103 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 3104 cerr << "\n" 3105 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n" 3106 << "With: (SPUindirect <arg>, <arg>)\n"; 3107 } 3108#endif 3109 3110 return DAG.getNode(SPUISD::IndirectAddr, Op0VT, 3111 Op0.getOperand(0), Op0.getOperand(1)); 3112 } 3113 } 3114 } 3115 break; 3116 } 3117 case SPUISD::SHLQUAD_L_BITS: 3118 case SPUISD::SHLQUAD_L_BYTES: 3119 case SPUISD::VEC_SHL: 3120 case SPUISD::VEC_SRL: 3121 case SPUISD::VEC_SRA: 3122 case SPUISD::ROTQUAD_RZ_BYTES: 3123 case SPUISD::ROTQUAD_RZ_BITS: 3124 case SPUISD::ROTBYTES_LEFT: { 3125 SDValue Op1 = N->getOperand(1); 3126 3127 // Kill degenerate vector shifts: 3128 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) { 3129 if (CN->isNullValue()) { 3130 Result = Op0; 3131 } 3132 } 3133 break; 3134 } 3135 case SPUISD::PREFSLOT2VEC: { 3136 switch (Op0.getOpcode()) { 3137 default: 3138 break; 3139 case ISD::ANY_EXTEND: 3140 case ISD::ZERO_EXTEND: 3141 case ISD::SIGN_EXTEND: { 3142 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) -> 3143 // <arg> 3144 // but only if the SPUprefslot2vec and <arg> types match. 3145 SDValue Op00 = Op0.getOperand(0); 3146 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { 3147 SDValue Op000 = Op00.getOperand(0); 3148 if (Op000.getValueType() == NodeVT) { 3149 Result = Op000; 3150 } 3151 } 3152 break; 3153 } 3154 case SPUISD::VEC2PREFSLOT: { 3155 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) -> 3156 // <arg> 3157 Result = Op0.getOperand(0); 3158 break; 3159 } 3160 } 3161 break; 3162 } 3163 } 3164 // Otherwise, return unchanged. 3165#ifndef NDEBUG 3166 if (Result.getNode()) { 3167 DEBUG(cerr << "\nReplace.SPU: "); 3168 DEBUG(N->dump(&DAG)); 3169 DEBUG(cerr << "\nWith: "); 3170 DEBUG(Result.getNode()->dump(&DAG)); 3171 DEBUG(cerr << "\n"); 3172 } 3173#endif 3174 3175 return Result; 3176} 3177 3178//===----------------------------------------------------------------------===// 3179// Inline Assembly Support 3180//===----------------------------------------------------------------------===// 3181 3182/// getConstraintType - Given a constraint letter, return the type of 3183/// constraint it is for this target. 3184SPUTargetLowering::ConstraintType 3185SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { 3186 if (ConstraintLetter.size() == 1) { 3187 switch (ConstraintLetter[0]) { 3188 default: break; 3189 case 'b': 3190 case 'r': 3191 case 'f': 3192 case 'v': 3193 case 'y': 3194 return C_RegisterClass; 3195 } 3196 } 3197 return TargetLowering::getConstraintType(ConstraintLetter); 3198} 3199 3200std::pair<unsigned, const TargetRegisterClass*> 3201SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 3202 MVT VT) const 3203{ 3204 if (Constraint.size() == 1) { 3205 // GCC RS6000 Constraint Letters 3206 switch (Constraint[0]) { 3207 case 'b': // R1-R31 3208 case 'r': // R0-R31 3209 if (VT == MVT::i64) 3210 return std::make_pair(0U, SPU::R64CRegisterClass); 3211 return std::make_pair(0U, SPU::R32CRegisterClass); 3212 case 'f': 3213 if (VT == MVT::f32) 3214 return std::make_pair(0U, SPU::R32FPRegisterClass); 3215 else if (VT == MVT::f64) 3216 return std::make_pair(0U, SPU::R64FPRegisterClass); 3217 break; 3218 case 'v': 3219 return std::make_pair(0U, SPU::GPRCRegisterClass); 3220 } 3221 } 3222 3223 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 3224} 3225 3226//! Compute used/known bits for a SPU operand 3227void 3228SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 3229 const APInt &Mask, 3230 APInt &KnownZero, 3231 APInt &KnownOne, 3232 const SelectionDAG &DAG, 3233 unsigned Depth ) const { 3234#if 0 3235 const uint64_t uint64_sizebits = sizeof(uint64_t) * 8; 3236#endif 3237 3238 switch (Op.getOpcode()) { 3239 default: 3240 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 3241 break; 3242 3243#if 0 3244 case CALL: 3245 case SHUFB: 3246 case SHUFFLE_MASK: 3247 case CNTB: 3248#endif 3249 3250 case SPUISD::PREFSLOT2VEC: { 3251 SDValue Op0 = Op.getOperand(0); 3252 MVT Op0VT = Op0.getValueType(); 3253 unsigned Op0VTBits = Op0VT.getSizeInBits(); 3254 uint64_t InMask = Op0VT.getIntegerVTBitMask(); 3255 KnownZero |= APInt(Op0VTBits, ~InMask, false); 3256 KnownOne |= APInt(Op0VTBits, InMask, false); 3257 break; 3258 } 3259 3260 case SPUISD::LDRESULT: 3261 case SPUISD::VEC2PREFSLOT: { 3262 MVT OpVT = Op.getValueType(); 3263 unsigned OpVTBits = OpVT.getSizeInBits(); 3264 uint64_t InMask = OpVT.getIntegerVTBitMask(); 3265 KnownZero |= APInt(OpVTBits, ~InMask, false); 3266 KnownOne |= APInt(OpVTBits, InMask, false); 3267 break; 3268 } 3269 3270#if 0 3271 case MPY: 3272 case MPYU: 3273 case MPYH: 3274 case MPYHH: 3275 case SPUISD::SHLQUAD_L_BITS: 3276 case SPUISD::SHLQUAD_L_BYTES: 3277 case SPUISD::VEC_SHL: 3278 case SPUISD::VEC_SRL: 3279 case SPUISD::VEC_SRA: 3280 case SPUISD::VEC_ROTL: 3281 case SPUISD::VEC_ROTR: 3282 case SPUISD::ROTQUAD_RZ_BYTES: 3283 case SPUISD::ROTQUAD_RZ_BITS: 3284 case SPUISD::ROTBYTES_LEFT: 3285 case SPUISD::SELECT_MASK: 3286 case SPUISD::SELB: 3287 case SPUISD::FPInterp: 3288 case SPUISD::FPRecipEst: 3289 case SPUISD::SEXT32TO64: 3290#endif 3291 } 3292} 3293 3294unsigned 3295SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, 3296 unsigned Depth) const { 3297 switch (Op.getOpcode()) { 3298 default: 3299 return 1; 3300 3301 case ISD::SETCC: { 3302 MVT VT = Op.getValueType(); 3303 3304 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { 3305 VT = MVT::i32; 3306 } 3307 return VT.getSizeInBits(); 3308 } 3309 } 3310} 3311 3312// LowerAsmOperandForConstraint 3313void 3314SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 3315 char ConstraintLetter, 3316 bool hasMemory, 3317 std::vector<SDValue> &Ops, 3318 SelectionDAG &DAG) const { 3319 // Default, for the time being, to the base class handler 3320 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory, 3321 Ops, DAG); 3322} 3323 3324/// isLegalAddressImmediate - Return true if the integer value can be used 3325/// as the offset of the target addressing mode. 3326bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, 3327 const Type *Ty) const { 3328 // SPU's addresses are 256K: 3329 return (V > -(1 << 18) && V < (1 << 18) - 1); 3330} 3331 3332bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 3333 return false; 3334} 3335 3336bool 3337SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 3338 // The SPU target isn't yet aware of offsets. 3339 return false; 3340} 3341