SPUISelLowering.cpp revision 23b9b19b1a5a00faa9fce0788155c7dbfd00bfb1
1// 2//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SPUTargetLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "SPURegisterNames.h" 15#include "SPUISelLowering.h" 16#include "SPUTargetMachine.h" 17#include "SPUFrameInfo.h" 18#include "llvm/Constants.h" 19#include "llvm/Function.h" 20#include "llvm/Intrinsics.h" 21#include "llvm/CallingConv.h" 22#include "llvm/CodeGen/CallingConvLower.h" 23#include "llvm/CodeGen/MachineFrameInfo.h" 24#include "llvm/CodeGen/MachineFunction.h" 25#include "llvm/CodeGen/MachineInstrBuilder.h" 26#include "llvm/CodeGen/MachineRegisterInfo.h" 27#include "llvm/CodeGen/SelectionDAG.h" 28#include "llvm/Target/TargetLoweringObjectFile.h" 29#include "llvm/Target/TargetOptions.h" 30#include "llvm/ADT/VectorExtras.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/MathExtras.h" 34#include "llvm/Support/raw_ostream.h" 35#include <map> 36 37using namespace llvm; 38 39// Used in getTargetNodeName() below 40namespace { 41 std::map<unsigned, const char *> node_names; 42 43 //! EVT mapping to useful data for Cell SPU 44 struct valtype_map_s { 45 const EVT valtype; 46 const int prefslot_byte; 47 }; 48 49 const valtype_map_s valtype_map[] = { 50 { MVT::i1, 3 }, 51 { MVT::i8, 3 }, 52 { MVT::i16, 2 }, 53 { MVT::i32, 0 }, 54 { MVT::f32, 0 }, 55 { MVT::i64, 0 }, 56 { MVT::f64, 0 }, 57 { MVT::i128, 0 } 58 }; 59 60 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); 61 62 const valtype_map_s *getValueTypeMapEntry(EVT VT) { 63 const valtype_map_s *retval = 0; 64 65 for (size_t i = 0; i < n_valtype_map; ++i) { 66 if (valtype_map[i].valtype == VT) { 67 retval = valtype_map + i; 68 break; 69 } 70 } 71 72#ifndef NDEBUG 73 if (retval == 0) { 74 std::string msg; 75 raw_string_ostream Msg(msg); 76 Msg << "getValueTypeMapEntry returns NULL for " 77 << VT.getEVTString(); 78 llvm_report_error(Msg.str()); 79 } 80#endif 81 82 return retval; 83 } 84 85 //! Expand a library call into an actual call DAG node 86 /*! 87 \note 88 This code is taken from SelectionDAGLegalize, since it is not exposed as 89 part of the LLVM SelectionDAG API. 90 */ 91 92 SDValue 93 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, 94 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) { 95 // The input chain to this libcall is the entry node of the function. 96 // Legalizing the call will automatically add the previous call to the 97 // dependence. 98 SDValue InChain = DAG.getEntryNode(); 99 100 TargetLowering::ArgListTy Args; 101 TargetLowering::ArgListEntry Entry; 102 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { 103 EVT ArgVT = Op.getOperand(i).getValueType(); 104 const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); 105 Entry.Node = Op.getOperand(i); 106 Entry.Ty = ArgTy; 107 Entry.isSExt = isSigned; 108 Entry.isZExt = !isSigned; 109 Args.push_back(Entry); 110 } 111 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), 112 TLI.getPointerTy()); 113 114 // Splice the libcall in wherever FindInputOutputChains tells us to. 115 const Type *RetTy = 116 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); 117 std::pair<SDValue, SDValue> CallInfo = 118 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 119 0, CallingConv::C, false, 120 /*isReturnValueUsed=*/true, 121 Callee, Args, DAG, 122 Op.getDebugLoc()); 123 124 return CallInfo.first; 125 } 126} 127 128SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) 129 : TargetLowering(TM, new TargetLoweringObjectFileELF()), 130 SPUTM(TM) { 131 // Fold away setcc operations if possible. 132 setPow2DivIsCheap(); 133 134 // Use _setjmp/_longjmp instead of setjmp/longjmp. 135 setUseUnderscoreSetJmp(true); 136 setUseUnderscoreLongJmp(true); 137 138 // Set RTLIB libcall names as used by SPU: 139 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); 140 141 // Set up the SPU's register classes: 142 addRegisterClass(MVT::i8, SPU::R8CRegisterClass); 143 addRegisterClass(MVT::i16, SPU::R16CRegisterClass); 144 addRegisterClass(MVT::i32, SPU::R32CRegisterClass); 145 addRegisterClass(MVT::i64, SPU::R64CRegisterClass); 146 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass); 147 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass); 148 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); 149 150 // SPU has no sign or zero extended loads for i1, i8, i16: 151 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); 152 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 153 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 154 155 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 156 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); 157 158 setTruncStoreAction(MVT::i128, MVT::i64, Expand); 159 setTruncStoreAction(MVT::i128, MVT::i32, Expand); 160 setTruncStoreAction(MVT::i128, MVT::i16, Expand); 161 setTruncStoreAction(MVT::i128, MVT::i8, Expand); 162 163 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 164 165 // SPU constant load actions are custom lowered: 166 setOperationAction(ISD::ConstantFP, MVT::f32, Legal); 167 setOperationAction(ISD::ConstantFP, MVT::f64, Custom); 168 169 // SPU's loads and stores have to be custom lowered: 170 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; 171 ++sctype) { 172 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; 173 174 setOperationAction(ISD::LOAD, VT, Custom); 175 setOperationAction(ISD::STORE, VT, Custom); 176 setLoadExtAction(ISD::EXTLOAD, VT, Custom); 177 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); 178 setLoadExtAction(ISD::SEXTLOAD, VT, Custom); 179 180 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { 181 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; 182 setTruncStoreAction(VT, StoreVT, Expand); 183 } 184 } 185 186 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; 187 ++sctype) { 188 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; 189 190 setOperationAction(ISD::LOAD, VT, Custom); 191 setOperationAction(ISD::STORE, VT, Custom); 192 193 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { 194 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; 195 setTruncStoreAction(VT, StoreVT, Expand); 196 } 197 } 198 199 // Expand the jumptable branches 200 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 201 setOperationAction(ISD::BR_CC, MVT::Other, Expand); 202 203 // Custom lower SELECT_CC for most cases, but expand by default 204 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); 205 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom); 206 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom); 207 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 208 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); 209 210 // SPU has no intrinsics for these particular operations: 211 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 212 213 // SPU has no division/remainder instructions 214 setOperationAction(ISD::SREM, MVT::i8, Expand); 215 setOperationAction(ISD::UREM, MVT::i8, Expand); 216 setOperationAction(ISD::SDIV, MVT::i8, Expand); 217 setOperationAction(ISD::UDIV, MVT::i8, Expand); 218 setOperationAction(ISD::SDIVREM, MVT::i8, Expand); 219 setOperationAction(ISD::UDIVREM, MVT::i8, Expand); 220 setOperationAction(ISD::SREM, MVT::i16, Expand); 221 setOperationAction(ISD::UREM, MVT::i16, Expand); 222 setOperationAction(ISD::SDIV, MVT::i16, Expand); 223 setOperationAction(ISD::UDIV, MVT::i16, Expand); 224 setOperationAction(ISD::SDIVREM, MVT::i16, Expand); 225 setOperationAction(ISD::UDIVREM, MVT::i16, Expand); 226 setOperationAction(ISD::SREM, MVT::i32, Expand); 227 setOperationAction(ISD::UREM, MVT::i32, Expand); 228 setOperationAction(ISD::SDIV, MVT::i32, Expand); 229 setOperationAction(ISD::UDIV, MVT::i32, Expand); 230 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 231 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 232 setOperationAction(ISD::SREM, MVT::i64, Expand); 233 setOperationAction(ISD::UREM, MVT::i64, Expand); 234 setOperationAction(ISD::SDIV, MVT::i64, Expand); 235 setOperationAction(ISD::UDIV, MVT::i64, Expand); 236 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 237 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 238 setOperationAction(ISD::SREM, MVT::i128, Expand); 239 setOperationAction(ISD::UREM, MVT::i128, Expand); 240 setOperationAction(ISD::SDIV, MVT::i128, Expand); 241 setOperationAction(ISD::UDIV, MVT::i128, Expand); 242 setOperationAction(ISD::SDIVREM, MVT::i128, Expand); 243 setOperationAction(ISD::UDIVREM, MVT::i128, Expand); 244 245 // We don't support sin/cos/sqrt/fmod 246 setOperationAction(ISD::FSIN , MVT::f64, Expand); 247 setOperationAction(ISD::FCOS , MVT::f64, Expand); 248 setOperationAction(ISD::FREM , MVT::f64, Expand); 249 setOperationAction(ISD::FSIN , MVT::f32, Expand); 250 setOperationAction(ISD::FCOS , MVT::f32, Expand); 251 setOperationAction(ISD::FREM , MVT::f32, Expand); 252 253 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt 254 // for f32!) 255 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 256 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 257 258 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 259 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 260 261 // SPU can do rotate right and left, so legalize it... but customize for i8 262 // because instructions don't exist. 263 264 // FIXME: Change from "expand" to appropriate type once ROTR is supported in 265 // .td files. 266 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/); 267 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/); 268 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/); 269 270 setOperationAction(ISD::ROTL, MVT::i32, Legal); 271 setOperationAction(ISD::ROTL, MVT::i16, Legal); 272 setOperationAction(ISD::ROTL, MVT::i8, Custom); 273 274 // SPU has no native version of shift left/right for i8 275 setOperationAction(ISD::SHL, MVT::i8, Custom); 276 setOperationAction(ISD::SRL, MVT::i8, Custom); 277 setOperationAction(ISD::SRA, MVT::i8, Custom); 278 279 // Make these operations legal and handle them during instruction selection: 280 setOperationAction(ISD::SHL, MVT::i64, Legal); 281 setOperationAction(ISD::SRL, MVT::i64, Legal); 282 setOperationAction(ISD::SRA, MVT::i64, Legal); 283 284 // Custom lower i8, i32 and i64 multiplications 285 setOperationAction(ISD::MUL, MVT::i8, Custom); 286 setOperationAction(ISD::MUL, MVT::i32, Legal); 287 setOperationAction(ISD::MUL, MVT::i64, Legal); 288 289 // Expand double-width multiplication 290 // FIXME: It would probably be reasonable to support some of these operations 291 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); 292 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); 293 setOperationAction(ISD::MULHU, MVT::i8, Expand); 294 setOperationAction(ISD::MULHS, MVT::i8, Expand); 295 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); 296 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); 297 setOperationAction(ISD::MULHU, MVT::i16, Expand); 298 setOperationAction(ISD::MULHS, MVT::i16, Expand); 299 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 300 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 301 setOperationAction(ISD::MULHU, MVT::i32, Expand); 302 setOperationAction(ISD::MULHS, MVT::i32, Expand); 303 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 304 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 305 setOperationAction(ISD::MULHU, MVT::i64, Expand); 306 setOperationAction(ISD::MULHS, MVT::i64, Expand); 307 308 // Need to custom handle (some) common i8, i64 math ops 309 setOperationAction(ISD::ADD, MVT::i8, Custom); 310 setOperationAction(ISD::ADD, MVT::i64, Legal); 311 setOperationAction(ISD::SUB, MVT::i8, Custom); 312 setOperationAction(ISD::SUB, MVT::i64, Legal); 313 314 // SPU does not have BSWAP. It does have i32 support CTLZ. 315 // CTPOP has to be custom lowered. 316 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 317 setOperationAction(ISD::BSWAP, MVT::i64, Expand); 318 319 setOperationAction(ISD::CTPOP, MVT::i8, Custom); 320 setOperationAction(ISD::CTPOP, MVT::i16, Custom); 321 setOperationAction(ISD::CTPOP, MVT::i32, Custom); 322 setOperationAction(ISD::CTPOP, MVT::i64, Custom); 323 setOperationAction(ISD::CTPOP, MVT::i128, Expand); 324 325 setOperationAction(ISD::CTTZ , MVT::i8, Expand); 326 setOperationAction(ISD::CTTZ , MVT::i16, Expand); 327 setOperationAction(ISD::CTTZ , MVT::i32, Expand); 328 setOperationAction(ISD::CTTZ , MVT::i64, Expand); 329 setOperationAction(ISD::CTTZ , MVT::i128, Expand); 330 331 setOperationAction(ISD::CTLZ , MVT::i8, Promote); 332 setOperationAction(ISD::CTLZ , MVT::i16, Promote); 333 setOperationAction(ISD::CTLZ , MVT::i32, Legal); 334 setOperationAction(ISD::CTLZ , MVT::i64, Expand); 335 setOperationAction(ISD::CTLZ , MVT::i128, Expand); 336 337 // SPU has a version of select that implements (a&~c)|(b&c), just like 338 // select ought to work: 339 setOperationAction(ISD::SELECT, MVT::i8, Legal); 340 setOperationAction(ISD::SELECT, MVT::i16, Legal); 341 setOperationAction(ISD::SELECT, MVT::i32, Legal); 342 setOperationAction(ISD::SELECT, MVT::i64, Legal); 343 344 setOperationAction(ISD::SETCC, MVT::i8, Legal); 345 setOperationAction(ISD::SETCC, MVT::i16, Legal); 346 setOperationAction(ISD::SETCC, MVT::i32, Legal); 347 setOperationAction(ISD::SETCC, MVT::i64, Legal); 348 setOperationAction(ISD::SETCC, MVT::f64, Custom); 349 350 // Custom lower i128 -> i64 truncates 351 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); 352 353 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); 354 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); 355 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); 356 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); 357 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need 358 // to expand to a libcall, hence the custom lowering: 359 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 360 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 361 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); 362 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 363 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); 364 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); 365 366 // FDIV on SPU requires custom lowering 367 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall 368 369 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: 370 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 371 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); 372 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); 373 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 374 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); 375 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); 376 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 377 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 378 379 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal); 380 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal); 381 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal); 382 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal); 383 384 // We cannot sextinreg(i1). Expand to shifts. 385 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 386 387 // Support label based line numbers. 388 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); 389 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 390 391 // We want to legalize GlobalAddress and ConstantPool nodes into the 392 // appropriate instructions to materialize the address. 393 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; 394 ++sctype) { 395 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; 396 397 setOperationAction(ISD::GlobalAddress, VT, Custom); 398 setOperationAction(ISD::ConstantPool, VT, Custom); 399 setOperationAction(ISD::JumpTable, VT, Custom); 400 } 401 402 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 403 setOperationAction(ISD::VASTART , MVT::Other, Custom); 404 405 // Use the default implementation. 406 setOperationAction(ISD::VAARG , MVT::Other, Expand); 407 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 408 setOperationAction(ISD::VAEND , MVT::Other, Expand); 409 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 410 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 411 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 412 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand); 413 414 // Cell SPU has instructions for converting between i64 and fp. 415 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 416 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 417 418 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 419 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 420 421 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 422 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 423 424 // First set operation action for all vector types to expand. Then we 425 // will selectively turn on ones that can be effectively codegen'd. 426 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); 427 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); 428 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); 429 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); 430 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); 431 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); 432 433 // "Odd size" vector classes that we're willing to support: 434 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); 435 436 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 437 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 438 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 439 440 // add/sub are legal for all supported vector VT's. 441 setOperationAction(ISD::ADD, VT, Legal); 442 setOperationAction(ISD::SUB, VT, Legal); 443 // mul has to be custom lowered. 444 setOperationAction(ISD::MUL, VT, Legal); 445 446 setOperationAction(ISD::AND, VT, Legal); 447 setOperationAction(ISD::OR, VT, Legal); 448 setOperationAction(ISD::XOR, VT, Legal); 449 setOperationAction(ISD::LOAD, VT, Legal); 450 setOperationAction(ISD::SELECT, VT, Legal); 451 setOperationAction(ISD::STORE, VT, Legal); 452 453 // These operations need to be expanded: 454 setOperationAction(ISD::SDIV, VT, Expand); 455 setOperationAction(ISD::SREM, VT, Expand); 456 setOperationAction(ISD::UDIV, VT, Expand); 457 setOperationAction(ISD::UREM, VT, Expand); 458 459 // Custom lower build_vector, constant pool spills, insert and 460 // extract vector elements: 461 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 462 setOperationAction(ISD::ConstantPool, VT, Custom); 463 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 464 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 465 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 466 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 467 } 468 469 setOperationAction(ISD::AND, MVT::v16i8, Custom); 470 setOperationAction(ISD::OR, MVT::v16i8, Custom); 471 setOperationAction(ISD::XOR, MVT::v16i8, Custom); 472 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 473 474 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 475 476 setShiftAmountType(MVT::i32); 477 setBooleanContents(ZeroOrNegativeOneBooleanContent); 478 479 setStackPointerRegisterToSaveRestore(SPU::R1); 480 481 // We have target-specific dag combine patterns for the following nodes: 482 setTargetDAGCombine(ISD::ADD); 483 setTargetDAGCombine(ISD::ZERO_EXTEND); 484 setTargetDAGCombine(ISD::SIGN_EXTEND); 485 setTargetDAGCombine(ISD::ANY_EXTEND); 486 487 computeRegisterProperties(); 488 489 // Set pre-RA register scheduler default to BURR, which produces slightly 490 // better code than the default (could also be TDRR, but TargetLowering.h 491 // needs a mod to support that model): 492 setSchedulingPreference(SchedulingForRegPressure); 493} 494 495const char * 496SPUTargetLowering::getTargetNodeName(unsigned Opcode) const 497{ 498 if (node_names.empty()) { 499 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; 500 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; 501 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; 502 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; 503 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; 504 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; 505 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; 506 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; 507 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; 508 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; 509 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; 510 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; 511 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; 512 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; 513 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; 514 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL"; 515 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL"; 516 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA"; 517 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; 518 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; 519 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; 520 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = 521 "SPUISD::ROTBYTES_LEFT_BITS"; 522 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; 523 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; 524 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; 525 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; 526 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; 527 } 528 529 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); 530 531 return ((i != node_names.end()) ? i->second : 0); 532} 533 534/// getFunctionAlignment - Return the Log2 alignment of this function. 535unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { 536 return 3; 537} 538 539//===----------------------------------------------------------------------===// 540// Return the Cell SPU's SETCC result type 541//===----------------------------------------------------------------------===// 542 543MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { 544 // i16 and i32 are valid SETCC result types 545 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? 546 VT.getSimpleVT().SimpleTy : 547 MVT::i32); 548} 549 550//===----------------------------------------------------------------------===// 551// Calling convention code: 552//===----------------------------------------------------------------------===// 553 554#include "SPUGenCallingConv.inc" 555 556//===----------------------------------------------------------------------===// 557// LowerOperation implementation 558//===----------------------------------------------------------------------===// 559 560/// Custom lower loads for CellSPU 561/*! 562 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements 563 within a 16-byte block, we have to rotate to extract the requested element. 564 565 For extending loads, we also want to ensure that the following sequence is 566 emitted, e.g. for MVT::f32 extending load to MVT::f64: 567 568\verbatim 569%1 v16i8,ch = load 570%2 v16i8,ch = rotate %1 571%3 v4f8, ch = bitconvert %2 572%4 f32 = vec2perfslot %3 573%5 f64 = fp_extend %4 574\endverbatim 575*/ 576static SDValue 577LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 578 LoadSDNode *LN = cast<LoadSDNode>(Op); 579 SDValue the_chain = LN->getChain(); 580 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 581 EVT InVT = LN->getMemoryVT(); 582 EVT OutVT = Op.getValueType(); 583 ISD::LoadExtType ExtType = LN->getExtensionType(); 584 unsigned alignment = LN->getAlignment(); 585 const valtype_map_s *vtm = getValueTypeMapEntry(InVT); 586 DebugLoc dl = Op.getDebugLoc(); 587 588 switch (LN->getAddressingMode()) { 589 case ISD::UNINDEXED: { 590 SDValue result; 591 SDValue basePtr = LN->getBasePtr(); 592 SDValue rotate; 593 594 if (alignment == 16) { 595 ConstantSDNode *CN; 596 597 // Special cases for a known aligned load to simplify the base pointer 598 // and the rotation amount: 599 if (basePtr.getOpcode() == ISD::ADD 600 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { 601 // Known offset into basePtr 602 int64_t offset = CN->getSExtValue(); 603 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); 604 605 if (rotamt < 0) 606 rotamt += 16; 607 608 rotate = DAG.getConstant(rotamt, MVT::i16); 609 610 // Simplify the base pointer for this case: 611 basePtr = basePtr.getOperand(0); 612 if ((offset & ~0xf) > 0) { 613 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 614 basePtr, 615 DAG.getConstant((offset & ~0xf), PtrVT)); 616 } 617 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) 618 || (basePtr.getOpcode() == SPUISD::IndirectAddr 619 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi 620 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { 621 // Plain aligned a-form address: rotate into preferred slot 622 // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) 623 int64_t rotamt = -vtm->prefslot_byte; 624 if (rotamt < 0) 625 rotamt += 16; 626 rotate = DAG.getConstant(rotamt, MVT::i16); 627 } else { 628 // Offset the rotate amount by the basePtr and the preferred slot 629 // byte offset 630 int64_t rotamt = -vtm->prefslot_byte; 631 if (rotamt < 0) 632 rotamt += 16; 633 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 634 basePtr, 635 DAG.getConstant(rotamt, PtrVT)); 636 } 637 } else { 638 // Unaligned load: must be more pessimistic about addressing modes: 639 if (basePtr.getOpcode() == ISD::ADD) { 640 MachineFunction &MF = DAG.getMachineFunction(); 641 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 642 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 643 SDValue Flag; 644 645 SDValue Op0 = basePtr.getOperand(0); 646 SDValue Op1 = basePtr.getOperand(1); 647 648 if (isa<ConstantSDNode>(Op1)) { 649 // Convert the (add <ptr>, <const>) to an indirect address contained 650 // in a register. Note that this is done because we need to avoid 651 // creating a 0(reg) d-form address due to the SPU's block loads. 652 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 653 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 654 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 655 } else { 656 // Convert the (add <arg1>, <arg2>) to an indirect address, which 657 // will likely be lowered as a reg(reg) x-form address. 658 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 659 } 660 } else { 661 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 662 basePtr, 663 DAG.getConstant(0, PtrVT)); 664 } 665 666 // Offset the rotate amount by the basePtr and the preferred slot 667 // byte offset 668 rotate = DAG.getNode(ISD::ADD, dl, PtrVT, 669 basePtr, 670 DAG.getConstant(-vtm->prefslot_byte, PtrVT)); 671 } 672 673 // Re-emit as a v16i8 vector load 674 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, 675 LN->getSrcValue(), LN->getSrcValueOffset(), 676 LN->isVolatile(), 16); 677 678 // Update the chain 679 the_chain = result.getValue(1); 680 681 // Rotate into the preferred slot: 682 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8, 683 result.getValue(0), rotate); 684 685 // Convert the loaded v16i8 vector to the appropriate vector type 686 // specified by the operand: 687 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 688 InVT, (128 / InVT.getSizeInBits())); 689 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, 690 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); 691 692 // Handle extending loads by extending the scalar result: 693 if (ExtType == ISD::SEXTLOAD) { 694 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); 695 } else if (ExtType == ISD::ZEXTLOAD) { 696 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); 697 } else if (ExtType == ISD::EXTLOAD) { 698 unsigned NewOpc = ISD::ANY_EXTEND; 699 700 if (OutVT.isFloatingPoint()) 701 NewOpc = ISD::FP_EXTEND; 702 703 result = DAG.getNode(NewOpc, dl, OutVT, result); 704 } 705 706 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); 707 SDValue retops[2] = { 708 result, 709 the_chain 710 }; 711 712 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, 713 retops, sizeof(retops) / sizeof(retops[0])); 714 return result; 715 } 716 case ISD::PRE_INC: 717 case ISD::PRE_DEC: 718 case ISD::POST_INC: 719 case ISD::POST_DEC: 720 case ISD::LAST_INDEXED_MODE: 721 { 722 std::string msg; 723 raw_string_ostream Msg(msg); 724 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " 725 "UNINDEXED\n"; 726 Msg << (unsigned) LN->getAddressingMode(); 727 llvm_report_error(Msg.str()); 728 /*NOTREACHED*/ 729 } 730 } 731 732 return SDValue(); 733} 734 735/// Custom lower stores for CellSPU 736/*! 737 All CellSPU stores are aligned to 16-byte boundaries, so for elements 738 within a 16-byte block, we have to generate a shuffle to insert the 739 requested element into its place, then store the resulting block. 740 */ 741static SDValue 742LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 743 StoreSDNode *SN = cast<StoreSDNode>(Op); 744 SDValue Value = SN->getValue(); 745 EVT VT = Value.getValueType(); 746 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); 747 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 748 DebugLoc dl = Op.getDebugLoc(); 749 unsigned alignment = SN->getAlignment(); 750 751 switch (SN->getAddressingMode()) { 752 case ISD::UNINDEXED: { 753 // The vector type we really want to load from the 16-byte chunk. 754 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 755 VT, (128 / VT.getSizeInBits())), 756 stVecVT = EVT::getVectorVT(*DAG.getContext(), 757 StVT, (128 / StVT.getSizeInBits())); 758 759 SDValue alignLoadVec; 760 SDValue basePtr = SN->getBasePtr(); 761 SDValue the_chain = SN->getChain(); 762 SDValue insertEltOffs; 763 764 if (alignment == 16) { 765 ConstantSDNode *CN; 766 767 // Special cases for a known aligned load to simplify the base pointer 768 // and insertion byte: 769 if (basePtr.getOpcode() == ISD::ADD 770 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { 771 // Known offset into basePtr 772 int64_t offset = CN->getSExtValue(); 773 774 // Simplify the base pointer for this case: 775 basePtr = basePtr.getOperand(0); 776 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 777 basePtr, 778 DAG.getConstant((offset & 0xf), PtrVT)); 779 780 if ((offset & ~0xf) > 0) { 781 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 782 basePtr, 783 DAG.getConstant((offset & ~0xf), PtrVT)); 784 } 785 } else { 786 // Otherwise, assume it's at byte 0 of basePtr 787 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 788 basePtr, 789 DAG.getConstant(0, PtrVT)); 790 } 791 } else { 792 // Unaligned load: must be more pessimistic about addressing modes: 793 if (basePtr.getOpcode() == ISD::ADD) { 794 MachineFunction &MF = DAG.getMachineFunction(); 795 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 796 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 797 SDValue Flag; 798 799 SDValue Op0 = basePtr.getOperand(0); 800 SDValue Op1 = basePtr.getOperand(1); 801 802 if (isa<ConstantSDNode>(Op1)) { 803 // Convert the (add <ptr>, <const>) to an indirect address contained 804 // in a register. Note that this is done because we need to avoid 805 // creating a 0(reg) d-form address due to the SPU's block loads. 806 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 807 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); 808 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); 809 } else { 810 // Convert the (add <arg1>, <arg2>) to an indirect address, which 811 // will likely be lowered as a reg(reg) x-form address. 812 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); 813 } 814 } else { 815 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 816 basePtr, 817 DAG.getConstant(0, PtrVT)); 818 } 819 820 // Insertion point is solely determined by basePtr's contents 821 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, 822 basePtr, 823 DAG.getConstant(0, PtrVT)); 824 } 825 826 // Re-emit as a v16i8 vector load 827 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, 828 SN->getSrcValue(), SN->getSrcValueOffset(), 829 SN->isVolatile(), 16); 830 831 // Update the chain 832 the_chain = alignLoadVec.getValue(1); 833 834 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec); 835 SDValue theValue = SN->getValue(); 836 SDValue result; 837 838 if (StVT != VT 839 && (theValue.getOpcode() == ISD::AssertZext 840 || theValue.getOpcode() == ISD::AssertSext)) { 841 // Drill down and get the value for zero- and sign-extended 842 // quantities 843 theValue = theValue.getOperand(0); 844 } 845 846 // If the base pointer is already a D-form address, then just create 847 // a new D-form address with a slot offset and the orignal base pointer. 848 // Otherwise generate a D-form address with the slot offset relative 849 // to the stack pointer, which is always aligned. 850#if !defined(NDEBUG) 851 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 852 cerr << "CellSPU LowerSTORE: basePtr = "; 853 basePtr.getNode()->dump(&DAG); 854 cerr << "\n"; 855 } 856#endif 857 858 SDValue insertEltOp = 859 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); 860 SDValue vectorizeOp = 861 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); 862 863 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, 864 vectorizeOp, alignLoadVec, 865 DAG.getNode(ISD::BIT_CONVERT, dl, 866 MVT::v4i32, insertEltOp)); 867 868 result = DAG.getStore(the_chain, dl, result, basePtr, 869 LN->getSrcValue(), LN->getSrcValueOffset(), 870 LN->isVolatile(), LN->getAlignment()); 871 872#if 0 && !defined(NDEBUG) 873 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 874 const SDValue ¤tRoot = DAG.getRoot(); 875 876 DAG.setRoot(result); 877 cerr << "------- CellSPU:LowerStore result:\n"; 878 DAG.dump(); 879 cerr << "-------\n"; 880 DAG.setRoot(currentRoot); 881 } 882#endif 883 884 return result; 885 /*UNREACHED*/ 886 } 887 case ISD::PRE_INC: 888 case ISD::PRE_DEC: 889 case ISD::POST_INC: 890 case ISD::POST_DEC: 891 case ISD::LAST_INDEXED_MODE: 892 { 893 std::string msg; 894 raw_string_ostream Msg(msg); 895 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than " 896 "UNINDEXED\n"; 897 Msg << (unsigned) SN->getAddressingMode(); 898 llvm_report_error(Msg.str()); 899 /*NOTREACHED*/ 900 } 901 } 902 903 return SDValue(); 904} 905 906//! Generate the address of a constant pool entry. 907static SDValue 908LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 909 EVT PtrVT = Op.getValueType(); 910 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 911 Constant *C = CP->getConstVal(); 912 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 913 SDValue Zero = DAG.getConstant(0, PtrVT); 914 const TargetMachine &TM = DAG.getTarget(); 915 // FIXME there is no actual debug info here 916 DebugLoc dl = Op.getDebugLoc(); 917 918 if (TM.getRelocationModel() == Reloc::Static) { 919 if (!ST->usingLargeMem()) { 920 // Just return the SDValue with the constant pool address in it. 921 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); 922 } else { 923 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); 924 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); 925 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 926 } 927 } 928 929 llvm_unreachable("LowerConstantPool: Relocation model other than static" 930 " not supported."); 931 return SDValue(); 932} 933 934//! Alternate entry point for generating the address of a constant pool entry 935SDValue 936SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { 937 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); 938} 939 940static SDValue 941LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 942 EVT PtrVT = Op.getValueType(); 943 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 944 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 945 SDValue Zero = DAG.getConstant(0, PtrVT); 946 const TargetMachine &TM = DAG.getTarget(); 947 // FIXME there is no actual debug info here 948 DebugLoc dl = Op.getDebugLoc(); 949 950 if (TM.getRelocationModel() == Reloc::Static) { 951 if (!ST->usingLargeMem()) { 952 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); 953 } else { 954 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); 955 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); 956 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 957 } 958 } 959 960 llvm_unreachable("LowerJumpTable: Relocation model other than static" 961 " not supported."); 962 return SDValue(); 963} 964 965static SDValue 966LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { 967 EVT PtrVT = Op.getValueType(); 968 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 969 GlobalValue *GV = GSDN->getGlobal(); 970 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); 971 const TargetMachine &TM = DAG.getTarget(); 972 SDValue Zero = DAG.getConstant(0, PtrVT); 973 // FIXME there is no actual debug info here 974 DebugLoc dl = Op.getDebugLoc(); 975 976 if (TM.getRelocationModel() == Reloc::Static) { 977 if (!ST->usingLargeMem()) { 978 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); 979 } else { 980 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); 981 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); 982 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); 983 } 984 } else { 985 llvm_report_error("LowerGlobalAddress: Relocation model other than static" 986 "not supported."); 987 /*NOTREACHED*/ 988 } 989 990 return SDValue(); 991} 992 993//! Custom lower double precision floating point constants 994static SDValue 995LowerConstantFP(SDValue Op, SelectionDAG &DAG) { 996 EVT VT = Op.getValueType(); 997 // FIXME there is no actual debug info here 998 DebugLoc dl = Op.getDebugLoc(); 999 1000 if (VT == MVT::f64) { 1001 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode()); 1002 1003 assert((FP != 0) && 1004 "LowerConstantFP: Node is not ConstantFPSDNode"); 1005 1006 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); 1007 SDValue T = DAG.getConstant(dbits, MVT::i64); 1008 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); 1009 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 1010 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec)); 1011 } 1012 1013 return SDValue(); 1014} 1015 1016SDValue 1017SPUTargetLowering::LowerFormalArguments(SDValue Chain, 1018 unsigned CallConv, bool isVarArg, 1019 const SmallVectorImpl<ISD::InputArg> 1020 &Ins, 1021 DebugLoc dl, SelectionDAG &DAG, 1022 SmallVectorImpl<SDValue> &InVals) { 1023 1024 MachineFunction &MF = DAG.getMachineFunction(); 1025 MachineFrameInfo *MFI = MF.getFrameInfo(); 1026 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1027 1028 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); 1029 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); 1030 1031 unsigned ArgOffset = SPUFrameInfo::minStackSize(); 1032 unsigned ArgRegIdx = 0; 1033 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); 1034 1035 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1036 1037 // Add DAG nodes to load the arguments or copy them out of registers. 1038 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 1039 EVT ObjectVT = Ins[ArgNo].VT; 1040 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1041 SDValue ArgVal; 1042 1043 if (ArgRegIdx < NumArgRegs) { 1044 const TargetRegisterClass *ArgRegClass; 1045 1046 switch (ObjectVT.getSimpleVT().SimpleTy) { 1047 default: { 1048 std::string msg; 1049 raw_string_ostream Msg(msg); 1050 Msg << "LowerFormalArguments Unhandled argument type: " 1051 << ObjectVT.getEVTString(); 1052 llvm_report_error(Msg.str()); 1053 } 1054 case MVT::i8: 1055 ArgRegClass = &SPU::R8CRegClass; 1056 break; 1057 case MVT::i16: 1058 ArgRegClass = &SPU::R16CRegClass; 1059 break; 1060 case MVT::i32: 1061 ArgRegClass = &SPU::R32CRegClass; 1062 break; 1063 case MVT::i64: 1064 ArgRegClass = &SPU::R64CRegClass; 1065 break; 1066 case MVT::i128: 1067 ArgRegClass = &SPU::GPRCRegClass; 1068 break; 1069 case MVT::f32: 1070 ArgRegClass = &SPU::R32FPRegClass; 1071 break; 1072 case MVT::f64: 1073 ArgRegClass = &SPU::R64FPRegClass; 1074 break; 1075 case MVT::v2f64: 1076 case MVT::v4f32: 1077 case MVT::v2i64: 1078 case MVT::v4i32: 1079 case MVT::v8i16: 1080 case MVT::v16i8: 1081 ArgRegClass = &SPU::VECREGRegClass; 1082 break; 1083 } 1084 1085 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); 1086 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg); 1087 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 1088 ++ArgRegIdx; 1089 } else { 1090 // We need to load the argument to a virtual register if we determined 1091 // above that we ran out of physical registers of the appropriate type 1092 // or we're forced to do vararg 1093 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1094 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1095 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); 1096 ArgOffset += StackSlotSize; 1097 } 1098 1099 InVals.push_back(ArgVal); 1100 // Update the chain 1101 Chain = ArgVal.getOperand(0); 1102 } 1103 1104 // vararg handling: 1105 if (isVarArg) { 1106 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8; 1107 // We will spill (79-3)+1 registers to the stack 1108 SmallVector<SDValue, 79-3+1> MemOps; 1109 1110 // Create the frame slot 1111 1112 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { 1113 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset); 1114 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1115 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); 1116 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0); 1117 Chain = Store.getOperand(0); 1118 MemOps.push_back(Store); 1119 1120 // Increment address by stack slot size for the next stored argument 1121 ArgOffset += StackSlotSize; 1122 } 1123 if (!MemOps.empty()) 1124 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1125 &MemOps[0], MemOps.size()); 1126 } 1127 1128 return Chain; 1129} 1130 1131/// isLSAAddress - Return the immediate to use if the specified 1132/// value is representable as a LSA address. 1133static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { 1134 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1135 if (!C) return 0; 1136 1137 int Addr = C->getZExtValue(); 1138 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1139 (Addr << 14 >> 14) != Addr) 1140 return 0; // Top 14 bits have to be sext of immediate. 1141 1142 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); 1143} 1144 1145SDValue 1146SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1147 unsigned CallConv, bool isVarArg, 1148 bool isTailCall, 1149 const SmallVectorImpl<ISD::OutputArg> &Outs, 1150 const SmallVectorImpl<ISD::InputArg> &Ins, 1151 DebugLoc dl, SelectionDAG &DAG, 1152 SmallVectorImpl<SDValue> &InVals) { 1153 1154 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 1155 unsigned NumOps = Outs.size(); 1156 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); 1157 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs(); 1158 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs(); 1159 1160 // Handy pointer type 1161 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1162 1163 // Accumulate how many bytes are to be pushed on the stack, including the 1164 // linkage area, and parameter passing area. According to the SPU ABI, 1165 // we minimally need space for [LR] and [SP] 1166 unsigned NumStackBytes = SPUFrameInfo::minStackSize(); 1167 1168 // Set up a copy of the stack pointer for use loading and storing any 1169 // arguments that may not fit in the registers available for argument 1170 // passing. 1171 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); 1172 1173 // Figure out which arguments are going to go in registers, and which in 1174 // memory. 1175 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] 1176 unsigned ArgRegIdx = 0; 1177 1178 // Keep track of registers passing arguments 1179 std::vector<std::pair<unsigned, SDValue> > RegsToPass; 1180 // And the arguments passed on the stack 1181 SmallVector<SDValue, 8> MemOpChains; 1182 1183 for (unsigned i = 0; i != NumOps; ++i) { 1184 SDValue Arg = Outs[i].Val; 1185 1186 // PtrOff will be used to store the current argument to the stack if a 1187 // register cannot be found for it. 1188 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 1189 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 1190 1191 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 1192 default: llvm_unreachable("Unexpected ValueType for argument!"); 1193 case MVT::i8: 1194 case MVT::i16: 1195 case MVT::i32: 1196 case MVT::i64: 1197 case MVT::i128: 1198 if (ArgRegIdx != NumArgRegs) { 1199 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); 1200 } else { 1201 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); 1202 ArgOffset += StackSlotSize; 1203 } 1204 break; 1205 case MVT::f32: 1206 case MVT::f64: 1207 if (ArgRegIdx != NumArgRegs) { 1208 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); 1209 } else { 1210 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); 1211 ArgOffset += StackSlotSize; 1212 } 1213 break; 1214 case MVT::v2i64: 1215 case MVT::v2f64: 1216 case MVT::v4f32: 1217 case MVT::v4i32: 1218 case MVT::v8i16: 1219 case MVT::v16i8: 1220 if (ArgRegIdx != NumArgRegs) { 1221 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg)); 1222 } else { 1223 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0)); 1224 ArgOffset += StackSlotSize; 1225 } 1226 break; 1227 } 1228 } 1229 1230 // Update number of stack bytes actually used, insert a call sequence start 1231 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize()); 1232 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, 1233 true)); 1234 1235 if (!MemOpChains.empty()) { 1236 // Adjust the stack pointer for the stack arguments. 1237 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1238 &MemOpChains[0], MemOpChains.size()); 1239 } 1240 1241 // Build a sequence of copy-to-reg nodes chained together with token chain 1242 // and flag operands which copy the outgoing args into the appropriate regs. 1243 SDValue InFlag; 1244 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1245 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1246 RegsToPass[i].second, InFlag); 1247 InFlag = Chain.getValue(1); 1248 } 1249 1250 SmallVector<SDValue, 8> Ops; 1251 unsigned CallOpc = SPUISD::CALL; 1252 1253 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1254 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1255 // node so that legalize doesn't hack it. 1256 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1257 GlobalValue *GV = G->getGlobal(); 1258 EVT CalleeVT = Callee.getValueType(); 1259 SDValue Zero = DAG.getConstant(0, PtrVT); 1260 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT); 1261 1262 if (!ST->usingLargeMem()) { 1263 // Turn calls to targets that are defined (i.e., have bodies) into BRSL 1264 // style calls, otherwise, external symbols are BRASL calls. This assumes 1265 // that declared/defined symbols are in the same compilation unit and can 1266 // be reached through PC-relative jumps. 1267 // 1268 // NOTE: 1269 // This may be an unsafe assumption for JIT and really large compilation 1270 // units. 1271 if (GV->isDeclaration()) { 1272 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); 1273 } else { 1274 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); 1275 } 1276 } else { 1277 // "Large memory" mode: Turn all calls into indirect calls with a X-form 1278 // address pairs: 1279 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); 1280 } 1281 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1282 EVT CalleeVT = Callee.getValueType(); 1283 SDValue Zero = DAG.getConstant(0, PtrVT); 1284 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), 1285 Callee.getValueType()); 1286 1287 if (!ST->usingLargeMem()) { 1288 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); 1289 } else { 1290 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); 1291 } 1292 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { 1293 // If this is an absolute destination address that appears to be a legal 1294 // local store address, use the munged value. 1295 Callee = SDValue(Dest, 0); 1296 } 1297 1298 Ops.push_back(Chain); 1299 Ops.push_back(Callee); 1300 1301 // Add argument registers to the end of the list so that they are known live 1302 // into the call. 1303 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1304 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1305 RegsToPass[i].second.getValueType())); 1306 1307 if (InFlag.getNode()) 1308 Ops.push_back(InFlag); 1309 // Returns a chain and a flag for retval copy to use. 1310 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), 1311 &Ops[0], Ops.size()); 1312 InFlag = Chain.getValue(1); 1313 1314 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), 1315 DAG.getIntPtrConstant(0, true), InFlag); 1316 if (!Ins.empty()) 1317 InFlag = Chain.getValue(1); 1318 1319 // If the function returns void, just return the chain. 1320 if (Ins.empty()) 1321 return Chain; 1322 1323 // If the call has results, copy the values out of the ret val registers. 1324 switch (Ins[0].VT.getSimpleVT().SimpleTy) { 1325 default: llvm_unreachable("Unexpected ret value!"); 1326 case MVT::Other: break; 1327 case MVT::i32: 1328 if (Ins.size() > 1 && Ins[1].VT == MVT::i32) { 1329 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, 1330 MVT::i32, InFlag).getValue(1); 1331 InVals.push_back(Chain.getValue(0)); 1332 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, 1333 Chain.getValue(2)).getValue(1); 1334 InVals.push_back(Chain.getValue(0)); 1335 } else { 1336 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, 1337 InFlag).getValue(1); 1338 InVals.push_back(Chain.getValue(0)); 1339 } 1340 break; 1341 case MVT::i64: 1342 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64, 1343 InFlag).getValue(1); 1344 InVals.push_back(Chain.getValue(0)); 1345 break; 1346 case MVT::i128: 1347 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128, 1348 InFlag).getValue(1); 1349 InVals.push_back(Chain.getValue(0)); 1350 break; 1351 case MVT::f32: 1352 case MVT::f64: 1353 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, 1354 InFlag).getValue(1); 1355 InVals.push_back(Chain.getValue(0)); 1356 break; 1357 case MVT::v2f64: 1358 case MVT::v2i64: 1359 case MVT::v4f32: 1360 case MVT::v4i32: 1361 case MVT::v8i16: 1362 case MVT::v16i8: 1363 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, 1364 InFlag).getValue(1); 1365 InVals.push_back(Chain.getValue(0)); 1366 break; 1367 } 1368 1369 return Chain; 1370} 1371 1372SDValue 1373SPUTargetLowering::LowerReturn(SDValue Chain, 1374 unsigned CallConv, bool isVarArg, 1375 const SmallVectorImpl<ISD::OutputArg> &Outs, 1376 DebugLoc dl, SelectionDAG &DAG) { 1377 1378 SmallVector<CCValAssign, 16> RVLocs; 1379 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 1380 RVLocs, *DAG.getContext()); 1381 CCInfo.AnalyzeReturn(Outs, RetCC_SPU); 1382 1383 // If this is the first return lowered for this function, add the regs to the 1384 // liveout set for the function. 1385 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1386 for (unsigned i = 0; i != RVLocs.size(); ++i) 1387 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1388 } 1389 1390 SDValue Flag; 1391 1392 // Copy the result values into the output registers. 1393 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1394 CCValAssign &VA = RVLocs[i]; 1395 assert(VA.isRegLoc() && "Can only return in registers!"); 1396 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1397 Outs[i].Val, Flag); 1398 Flag = Chain.getValue(1); 1399 } 1400 1401 if (Flag.getNode()) 1402 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1403 else 1404 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); 1405} 1406 1407 1408//===----------------------------------------------------------------------===// 1409// Vector related lowering: 1410//===----------------------------------------------------------------------===// 1411 1412static ConstantSDNode * 1413getVecImm(SDNode *N) { 1414 SDValue OpVal(0, 0); 1415 1416 // Check to see if this buildvec has a single non-undef value in its elements. 1417 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 1418 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 1419 if (OpVal.getNode() == 0) 1420 OpVal = N->getOperand(i); 1421 else if (OpVal != N->getOperand(i)) 1422 return 0; 1423 } 1424 1425 if (OpVal.getNode() != 0) { 1426 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1427 return CN; 1428 } 1429 } 1430 1431 return 0; 1432} 1433 1434/// get_vec_i18imm - Test if this vector is a vector filled with the same value 1435/// and the value fits into an unsigned 18-bit constant, and if so, return the 1436/// constant 1437SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, 1438 EVT ValueType) { 1439 if (ConstantSDNode *CN = getVecImm(N)) { 1440 uint64_t Value = CN->getZExtValue(); 1441 if (ValueType == MVT::i64) { 1442 uint64_t UValue = CN->getZExtValue(); 1443 uint32_t upper = uint32_t(UValue >> 32); 1444 uint32_t lower = uint32_t(UValue); 1445 if (upper != lower) 1446 return SDValue(); 1447 Value = Value >> 32; 1448 } 1449 if (Value <= 0x3ffff) 1450 return DAG.getTargetConstant(Value, ValueType); 1451 } 1452 1453 return SDValue(); 1454} 1455 1456/// get_vec_i16imm - Test if this vector is a vector filled with the same value 1457/// and the value fits into a signed 16-bit constant, and if so, return the 1458/// constant 1459SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, 1460 EVT ValueType) { 1461 if (ConstantSDNode *CN = getVecImm(N)) { 1462 int64_t Value = CN->getSExtValue(); 1463 if (ValueType == MVT::i64) { 1464 uint64_t UValue = CN->getZExtValue(); 1465 uint32_t upper = uint32_t(UValue >> 32); 1466 uint32_t lower = uint32_t(UValue); 1467 if (upper != lower) 1468 return SDValue(); 1469 Value = Value >> 32; 1470 } 1471 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { 1472 return DAG.getTargetConstant(Value, ValueType); 1473 } 1474 } 1475 1476 return SDValue(); 1477} 1478 1479/// get_vec_i10imm - Test if this vector is a vector filled with the same value 1480/// and the value fits into a signed 10-bit constant, and if so, return the 1481/// constant 1482SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, 1483 EVT ValueType) { 1484 if (ConstantSDNode *CN = getVecImm(N)) { 1485 int64_t Value = CN->getSExtValue(); 1486 if (ValueType == MVT::i64) { 1487 uint64_t UValue = CN->getZExtValue(); 1488 uint32_t upper = uint32_t(UValue >> 32); 1489 uint32_t lower = uint32_t(UValue); 1490 if (upper != lower) 1491 return SDValue(); 1492 Value = Value >> 32; 1493 } 1494 if (isS10Constant(Value)) 1495 return DAG.getTargetConstant(Value, ValueType); 1496 } 1497 1498 return SDValue(); 1499} 1500 1501/// get_vec_i8imm - Test if this vector is a vector filled with the same value 1502/// and the value fits into a signed 8-bit constant, and if so, return the 1503/// constant. 1504/// 1505/// @note: The incoming vector is v16i8 because that's the only way we can load 1506/// constant vectors. Thus, we test to see if the upper and lower bytes are the 1507/// same value. 1508SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, 1509 EVT ValueType) { 1510 if (ConstantSDNode *CN = getVecImm(N)) { 1511 int Value = (int) CN->getZExtValue(); 1512 if (ValueType == MVT::i16 1513 && Value <= 0xffff /* truncated from uint64_t */ 1514 && ((short) Value >> 8) == ((short) Value & 0xff)) 1515 return DAG.getTargetConstant(Value & 0xff, ValueType); 1516 else if (ValueType == MVT::i8 1517 && (Value & 0xff) == Value) 1518 return DAG.getTargetConstant(Value, ValueType); 1519 } 1520 1521 return SDValue(); 1522} 1523 1524/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value 1525/// and the value fits into a signed 16-bit constant, and if so, return the 1526/// constant 1527SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, 1528 EVT ValueType) { 1529 if (ConstantSDNode *CN = getVecImm(N)) { 1530 uint64_t Value = CN->getZExtValue(); 1531 if ((ValueType == MVT::i32 1532 && ((unsigned) Value & 0xffff0000) == (unsigned) Value) 1533 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) 1534 return DAG.getTargetConstant(Value >> 16, ValueType); 1535 } 1536 1537 return SDValue(); 1538} 1539 1540/// get_v4i32_imm - Catch-all for general 32-bit constant vectors 1541SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { 1542 if (ConstantSDNode *CN = getVecImm(N)) { 1543 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); 1544 } 1545 1546 return SDValue(); 1547} 1548 1549/// get_v4i32_imm - Catch-all for general 64-bit constant vectors 1550SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { 1551 if (ConstantSDNode *CN = getVecImm(N)) { 1552 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); 1553 } 1554 1555 return SDValue(); 1556} 1557 1558//! Lower a BUILD_VECTOR instruction creatively: 1559static SDValue 1560LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { 1561 EVT VT = Op.getValueType(); 1562 EVT EltVT = VT.getVectorElementType(); 1563 DebugLoc dl = Op.getDebugLoc(); 1564 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 1565 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); 1566 unsigned minSplatBits = EltVT.getSizeInBits(); 1567 1568 if (minSplatBits < 16) 1569 minSplatBits = 16; 1570 1571 APInt APSplatBits, APSplatUndef; 1572 unsigned SplatBitSize; 1573 bool HasAnyUndefs; 1574 1575 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 1576 HasAnyUndefs, minSplatBits) 1577 || minSplatBits < SplatBitSize) 1578 return SDValue(); // Wasn't a constant vector or splat exceeded min 1579 1580 uint64_t SplatBits = APSplatBits.getZExtValue(); 1581 1582 switch (VT.getSimpleVT().SimpleTy) { 1583 default: { 1584 std::string msg; 1585 raw_string_ostream Msg(msg); 1586 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " 1587 << VT.getEVTString(); 1588 llvm_report_error(Msg.str()); 1589 /*NOTREACHED*/ 1590 } 1591 case MVT::v4f32: { 1592 uint32_t Value32 = uint32_t(SplatBits); 1593 assert(SplatBitSize == 32 1594 && "LowerBUILD_VECTOR: Unexpected floating point vector element."); 1595 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1596 SDValue T = DAG.getConstant(Value32, MVT::i32); 1597 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, 1598 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); 1599 break; 1600 } 1601 case MVT::v2f64: { 1602 uint64_t f64val = uint64_t(SplatBits); 1603 assert(SplatBitSize == 64 1604 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); 1605 // NOTE: pretend the constant is an integer. LLVM won't load FP constants 1606 SDValue T = DAG.getConstant(f64val, MVT::i64); 1607 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, 1608 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); 1609 break; 1610 } 1611 case MVT::v16i8: { 1612 // 8-bit constants have to be expanded to 16-bits 1613 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; 1614 SmallVector<SDValue, 8> Ops; 1615 1616 Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); 1617 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 1618 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); 1619 } 1620 case MVT::v8i16: { 1621 unsigned short Value16 = SplatBits; 1622 SDValue T = DAG.getConstant(Value16, EltVT); 1623 SmallVector<SDValue, 8> Ops; 1624 1625 Ops.assign(8, T); 1626 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); 1627 } 1628 case MVT::v4i32: { 1629 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); 1630 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); 1631 } 1632 case MVT::v2i32: { 1633 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); 1634 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); 1635 } 1636 case MVT::v2i64: { 1637 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); 1638 } 1639 } 1640 1641 return SDValue(); 1642} 1643 1644/*! 1645 */ 1646SDValue 1647SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, 1648 DebugLoc dl) { 1649 uint32_t upper = uint32_t(SplatVal >> 32); 1650 uint32_t lower = uint32_t(SplatVal); 1651 1652 if (upper == lower) { 1653 // Magic constant that can be matched by IL, ILA, et. al. 1654 SDValue Val = DAG.getTargetConstant(upper, MVT::i32); 1655 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, 1656 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1657 Val, Val, Val, Val)); 1658 } else { 1659 bool upper_special, lower_special; 1660 1661 // NOTE: This code creates common-case shuffle masks that can be easily 1662 // detected as common expressions. It is not attempting to create highly 1663 // specialized masks to replace any and all 0's, 0xff's and 0x80's. 1664 1665 // Detect if the upper or lower half is a special shuffle mask pattern: 1666 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); 1667 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); 1668 1669 // Both upper and lower are special, lower to a constant pool load: 1670 if (lower_special && upper_special) { 1671 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); 1672 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, 1673 SplatValCN, SplatValCN); 1674 } 1675 1676 SDValue LO32; 1677 SDValue HI32; 1678 SmallVector<SDValue, 16> ShufBytes; 1679 SDValue Result; 1680 1681 // Create lower vector if not a special pattern 1682 if (!lower_special) { 1683 SDValue LO32C = DAG.getConstant(lower, MVT::i32); 1684 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, 1685 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1686 LO32C, LO32C, LO32C, LO32C)); 1687 } 1688 1689 // Create upper vector if not a special pattern 1690 if (!upper_special) { 1691 SDValue HI32C = DAG.getConstant(upper, MVT::i32); 1692 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, 1693 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1694 HI32C, HI32C, HI32C, HI32C)); 1695 } 1696 1697 // If either upper or lower are special, then the two input operands are 1698 // the same (basically, one of them is a "don't care") 1699 if (lower_special) 1700 LO32 = HI32; 1701 if (upper_special) 1702 HI32 = LO32; 1703 1704 for (int i = 0; i < 4; ++i) { 1705 uint64_t val = 0; 1706 for (int j = 0; j < 4; ++j) { 1707 SDValue V; 1708 bool process_upper, process_lower; 1709 val <<= 8; 1710 process_upper = (upper_special && (i & 1) == 0); 1711 process_lower = (lower_special && (i & 1) == 1); 1712 1713 if (process_upper || process_lower) { 1714 if ((process_upper && upper == 0) 1715 || (process_lower && lower == 0)) 1716 val |= 0x80; 1717 else if ((process_upper && upper == 0xffffffff) 1718 || (process_lower && lower == 0xffffffff)) 1719 val |= 0xc0; 1720 else if ((process_upper && upper == 0x80000000) 1721 || (process_lower && lower == 0x80000000)) 1722 val |= (j == 0 ? 0xe0 : 0x80); 1723 } else 1724 val |= i * 4 + j + ((i & 1) * 16); 1725 } 1726 1727 ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); 1728 } 1729 1730 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, 1731 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1732 &ShufBytes[0], ShufBytes.size())); 1733 } 1734} 1735 1736/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on 1737/// which the Cell can operate. The code inspects V3 to ascertain whether the 1738/// permutation vector, V3, is monotonically increasing with one "exception" 1739/// element, e.g., (0, 1, _, 3). If this is the case, then generate a 1740/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. 1741/// In either case, the net result is going to eventually invoke SHUFB to 1742/// permute/shuffle the bytes from V1 and V2. 1743/// \note 1744/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate 1745/// control word for byte/halfword/word insertion. This takes care of a single 1746/// element move from V2 into V1. 1747/// \note 1748/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. 1749static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 1750 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); 1751 SDValue V1 = Op.getOperand(0); 1752 SDValue V2 = Op.getOperand(1); 1753 DebugLoc dl = Op.getDebugLoc(); 1754 1755 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 1756 1757 // If we have a single element being moved from V1 to V2, this can be handled 1758 // using the C*[DX] compute mask instructions, but the vector elements have 1759 // to be monotonically increasing with one exception element. 1760 EVT VecVT = V1.getValueType(); 1761 EVT EltVT = VecVT.getVectorElementType(); 1762 unsigned EltsFromV2 = 0; 1763 unsigned V2Elt = 0; 1764 unsigned V2EltIdx0 = 0; 1765 unsigned CurrElt = 0; 1766 unsigned MaxElts = VecVT.getVectorNumElements(); 1767 unsigned PrevElt = 0; 1768 unsigned V0Elt = 0; 1769 bool monotonic = true; 1770 bool rotate = true; 1771 1772 if (EltVT == MVT::i8) { 1773 V2EltIdx0 = 16; 1774 } else if (EltVT == MVT::i16) { 1775 V2EltIdx0 = 8; 1776 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { 1777 V2EltIdx0 = 4; 1778 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { 1779 V2EltIdx0 = 2; 1780 } else 1781 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); 1782 1783 for (unsigned i = 0; i != MaxElts; ++i) { 1784 if (SVN->getMaskElt(i) < 0) 1785 continue; 1786 1787 unsigned SrcElt = SVN->getMaskElt(i); 1788 1789 if (monotonic) { 1790 if (SrcElt >= V2EltIdx0) { 1791 if (1 >= (++EltsFromV2)) { 1792 V2Elt = (V2EltIdx0 - SrcElt) << 2; 1793 } 1794 } else if (CurrElt != SrcElt) { 1795 monotonic = false; 1796 } 1797 1798 ++CurrElt; 1799 } 1800 1801 if (rotate) { 1802 if (PrevElt > 0 && SrcElt < MaxElts) { 1803 if ((PrevElt == SrcElt - 1) 1804 || (PrevElt == MaxElts - 1 && SrcElt == 0)) { 1805 PrevElt = SrcElt; 1806 if (SrcElt == 0) 1807 V0Elt = i; 1808 } else { 1809 rotate = false; 1810 } 1811 } else if (PrevElt == 0) { 1812 // First time through, need to keep track of previous element 1813 PrevElt = SrcElt; 1814 } else { 1815 // This isn't a rotation, takes elements from vector 2 1816 rotate = false; 1817 } 1818 } 1819 } 1820 1821 if (EltsFromV2 == 1 && monotonic) { 1822 // Compute mask and shuffle 1823 MachineFunction &MF = DAG.getMachineFunction(); 1824 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1825 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 1826 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1827 // Initialize temporary register to 0 1828 SDValue InitTempReg = 1829 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT)); 1830 // Copy register's contents as index in SHUFFLE_MASK: 1831 SDValue ShufMaskOp = 1832 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32, 1833 DAG.getTargetConstant(V2Elt, MVT::i32), 1834 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT)); 1835 // Use shuffle mask in SHUFB synthetic instruction: 1836 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, 1837 ShufMaskOp); 1838 } else if (rotate) { 1839 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; 1840 1841 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), 1842 V1, DAG.getConstant(rotamt, MVT::i16)); 1843 } else { 1844 // Convert the SHUFFLE_VECTOR mask's input element units to the 1845 // actual bytes. 1846 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 1847 1848 SmallVector<SDValue, 16> ResultMask; 1849 for (unsigned i = 0, e = MaxElts; i != e; ++i) { 1850 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); 1851 1852 for (unsigned j = 0; j < BytesPerElement; ++j) 1853 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); 1854 } 1855 1856 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 1857 &ResultMask[0], ResultMask.size()); 1858 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); 1859 } 1860} 1861 1862static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { 1863 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar 1864 DebugLoc dl = Op.getDebugLoc(); 1865 1866 if (Op0.getNode()->getOpcode() == ISD::Constant) { 1867 // For a constant, build the appropriate constant vector, which will 1868 // eventually simplify to a vector register load. 1869 1870 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode()); 1871 SmallVector<SDValue, 16> ConstVecValues; 1872 EVT VT; 1873 size_t n_copies; 1874 1875 // Create a constant vector: 1876 switch (Op.getValueType().getSimpleVT().SimpleTy) { 1877 default: llvm_unreachable("Unexpected constant value type in " 1878 "LowerSCALAR_TO_VECTOR"); 1879 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; 1880 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; 1881 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; 1882 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; 1883 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; 1884 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; 1885 } 1886 1887 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); 1888 for (size_t j = 0; j < n_copies; ++j) 1889 ConstVecValues.push_back(CValue); 1890 1891 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), 1892 &ConstVecValues[0], ConstVecValues.size()); 1893 } else { 1894 // Otherwise, copy the value from one register to another: 1895 switch (Op0.getValueType().getSimpleVT().SimpleTy) { 1896 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); 1897 case MVT::i8: 1898 case MVT::i16: 1899 case MVT::i32: 1900 case MVT::i64: 1901 case MVT::f32: 1902 case MVT::f64: 1903 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); 1904 } 1905 } 1906 1907 return SDValue(); 1908} 1909 1910static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 1911 EVT VT = Op.getValueType(); 1912 SDValue N = Op.getOperand(0); 1913 SDValue Elt = Op.getOperand(1); 1914 DebugLoc dl = Op.getDebugLoc(); 1915 SDValue retval; 1916 1917 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 1918 // Constant argument: 1919 int EltNo = (int) C->getZExtValue(); 1920 1921 // sanity checks: 1922 if (VT == MVT::i8 && EltNo >= 16) 1923 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); 1924 else if (VT == MVT::i16 && EltNo >= 8) 1925 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); 1926 else if (VT == MVT::i32 && EltNo >= 4) 1927 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); 1928 else if (VT == MVT::i64 && EltNo >= 2) 1929 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); 1930 1931 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { 1932 // i32 and i64: Element 0 is the preferred slot 1933 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); 1934 } 1935 1936 // Need to generate shuffle mask and extract: 1937 int prefslot_begin = -1, prefslot_end = -1; 1938 int elt_byte = EltNo * VT.getSizeInBits() / 8; 1939 1940 switch (VT.getSimpleVT().SimpleTy) { 1941 default: 1942 assert(false && "Invalid value type!"); 1943 case MVT::i8: { 1944 prefslot_begin = prefslot_end = 3; 1945 break; 1946 } 1947 case MVT::i16: { 1948 prefslot_begin = 2; prefslot_end = 3; 1949 break; 1950 } 1951 case MVT::i32: 1952 case MVT::f32: { 1953 prefslot_begin = 0; prefslot_end = 3; 1954 break; 1955 } 1956 case MVT::i64: 1957 case MVT::f64: { 1958 prefslot_begin = 0; prefslot_end = 7; 1959 break; 1960 } 1961 } 1962 1963 assert(prefslot_begin != -1 && prefslot_end != -1 && 1964 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); 1965 1966 unsigned int ShufBytes[16]; 1967 for (int i = 0; i < 16; ++i) { 1968 // zero fill uppper part of preferred slot, don't care about the 1969 // other slots: 1970 unsigned int mask_val; 1971 if (i <= prefslot_end) { 1972 mask_val = 1973 ((i < prefslot_begin) 1974 ? 0x80 1975 : elt_byte + (i - prefslot_begin)); 1976 1977 ShufBytes[i] = mask_val; 1978 } else 1979 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; 1980 } 1981 1982 SDValue ShufMask[4]; 1983 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { 1984 unsigned bidx = i * 4; 1985 unsigned int bits = ((ShufBytes[bidx] << 24) | 1986 (ShufBytes[bidx+1] << 16) | 1987 (ShufBytes[bidx+2] << 8) | 1988 ShufBytes[bidx+3]); 1989 ShufMask[i] = DAG.getConstant(bits, MVT::i32); 1990 } 1991 1992 SDValue ShufMaskVec = 1993 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 1994 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); 1995 1996 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 1997 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), 1998 N, N, ShufMaskVec)); 1999 } else { 2000 // Variable index: Rotate the requested element into slot 0, then replicate 2001 // slot 0 across the vector 2002 EVT VecVT = N.getValueType(); 2003 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { 2004 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" 2005 "vector type!"); 2006 } 2007 2008 // Make life easier by making sure the index is zero-extended to i32 2009 if (Elt.getValueType() != MVT::i32) 2010 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); 2011 2012 // Scale the index to a bit/byte shift quantity 2013 APInt scaleFactor = 2014 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); 2015 unsigned scaleShift = scaleFactor.logBase2(); 2016 SDValue vecShift; 2017 2018 if (scaleShift > 0) { 2019 // Scale the shift factor: 2020 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, 2021 DAG.getConstant(scaleShift, MVT::i32)); 2022 } 2023 2024 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt); 2025 2026 // Replicate the bytes starting at byte 0 across the entire vector (for 2027 // consistency with the notion of a unified register set) 2028 SDValue replicate; 2029 2030 switch (VT.getSimpleVT().SimpleTy) { 2031 default: 2032 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" 2033 "type"); 2034 /*NOTREACHED*/ 2035 case MVT::i8: { 2036 SDValue factor = DAG.getConstant(0x00000000, MVT::i32); 2037 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2038 factor, factor, factor, factor); 2039 break; 2040 } 2041 case MVT::i16: { 2042 SDValue factor = DAG.getConstant(0x00010001, MVT::i32); 2043 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2044 factor, factor, factor, factor); 2045 break; 2046 } 2047 case MVT::i32: 2048 case MVT::f32: { 2049 SDValue factor = DAG.getConstant(0x00010203, MVT::i32); 2050 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2051 factor, factor, factor, factor); 2052 break; 2053 } 2054 case MVT::i64: 2055 case MVT::f64: { 2056 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); 2057 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); 2058 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2059 loFactor, hiFactor, loFactor, hiFactor); 2060 break; 2061 } 2062 } 2063 2064 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, 2065 DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2066 vecShift, vecShift, replicate)); 2067 } 2068 2069 return retval; 2070} 2071 2072static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 2073 SDValue VecOp = Op.getOperand(0); 2074 SDValue ValOp = Op.getOperand(1); 2075 SDValue IdxOp = Op.getOperand(2); 2076 DebugLoc dl = Op.getDebugLoc(); 2077 EVT VT = Op.getValueType(); 2078 2079 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); 2080 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); 2081 2082 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2083 // Use $sp ($1) because it's always 16-byte aligned and it's available: 2084 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, 2085 DAG.getRegister(SPU::R1, PtrVT), 2086 DAG.getConstant(CN->getSExtValue(), PtrVT)); 2087 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); 2088 2089 SDValue result = 2090 DAG.getNode(SPUISD::SHUFB, dl, VT, 2091 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), 2092 VecOp, 2093 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask)); 2094 2095 return result; 2096} 2097 2098static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, 2099 const TargetLowering &TLI) 2100{ 2101 SDValue N0 = Op.getOperand(0); // Everything has at least one operand 2102 DebugLoc dl = Op.getDebugLoc(); 2103 EVT ShiftVT = TLI.getShiftAmountTy(); 2104 2105 assert(Op.getValueType() == MVT::i8); 2106 switch (Opc) { 2107 default: 2108 llvm_unreachable("Unhandled i8 math operator"); 2109 /*NOTREACHED*/ 2110 break; 2111 case ISD::ADD: { 2112 // 8-bit addition: Promote the arguments up to 16-bits and truncate 2113 // the result: 2114 SDValue N1 = Op.getOperand(1); 2115 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2116 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2117 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2118 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2119 2120 } 2121 2122 case ISD::SUB: { 2123 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate 2124 // the result: 2125 SDValue N1 = Op.getOperand(1); 2126 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2127 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2128 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2129 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2130 } 2131 case ISD::ROTR: 2132 case ISD::ROTL: { 2133 SDValue N1 = Op.getOperand(1); 2134 EVT N1VT = N1.getValueType(); 2135 2136 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2137 if (!N1VT.bitsEq(ShiftVT)) { 2138 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) 2139 ? ISD::ZERO_EXTEND 2140 : ISD::TRUNCATE; 2141 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2142 } 2143 2144 // Replicate lower 8-bits into upper 8: 2145 SDValue ExpandArg = 2146 DAG.getNode(ISD::OR, dl, MVT::i16, N0, 2147 DAG.getNode(ISD::SHL, dl, MVT::i16, 2148 N0, DAG.getConstant(8, MVT::i32))); 2149 2150 // Truncate back down to i8 2151 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2152 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); 2153 } 2154 case ISD::SRL: 2155 case ISD::SHL: { 2156 SDValue N1 = Op.getOperand(1); 2157 EVT N1VT = N1.getValueType(); 2158 2159 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); 2160 if (!N1VT.bitsEq(ShiftVT)) { 2161 unsigned N1Opc = ISD::ZERO_EXTEND; 2162 2163 if (N1.getValueType().bitsGT(ShiftVT)) 2164 N1Opc = ISD::TRUNCATE; 2165 2166 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2167 } 2168 2169 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2170 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2171 } 2172 case ISD::SRA: { 2173 SDValue N1 = Op.getOperand(1); 2174 EVT N1VT = N1.getValueType(); 2175 2176 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2177 if (!N1VT.bitsEq(ShiftVT)) { 2178 unsigned N1Opc = ISD::SIGN_EXTEND; 2179 2180 if (N1VT.bitsGT(ShiftVT)) 2181 N1Opc = ISD::TRUNCATE; 2182 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); 2183 } 2184 2185 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2186 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2187 } 2188 case ISD::MUL: { 2189 SDValue N1 = Op.getOperand(1); 2190 2191 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); 2192 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); 2193 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, 2194 DAG.getNode(Opc, dl, MVT::i16, N0, N1)); 2195 break; 2196 } 2197 } 2198 2199 return SDValue(); 2200} 2201 2202//! Lower byte immediate operations for v16i8 vectors: 2203static SDValue 2204LowerByteImmed(SDValue Op, SelectionDAG &DAG) { 2205 SDValue ConstVec; 2206 SDValue Arg; 2207 EVT VT = Op.getValueType(); 2208 DebugLoc dl = Op.getDebugLoc(); 2209 2210 ConstVec = Op.getOperand(0); 2211 Arg = Op.getOperand(1); 2212 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { 2213 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { 2214 ConstVec = ConstVec.getOperand(0); 2215 } else { 2216 ConstVec = Op.getOperand(1); 2217 Arg = Op.getOperand(0); 2218 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { 2219 ConstVec = ConstVec.getOperand(0); 2220 } 2221 } 2222 } 2223 2224 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { 2225 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode()); 2226 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); 2227 2228 APInt APSplatBits, APSplatUndef; 2229 unsigned SplatBitSize; 2230 bool HasAnyUndefs; 2231 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); 2232 2233 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 2234 HasAnyUndefs, minSplatBits) 2235 && minSplatBits <= SplatBitSize) { 2236 uint64_t SplatBits = APSplatBits.getZExtValue(); 2237 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); 2238 2239 SmallVector<SDValue, 16> tcVec; 2240 tcVec.assign(16, tc); 2241 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, 2242 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); 2243 } 2244 } 2245 2246 // These operations (AND, OR, XOR) are legal, they just couldn't be custom 2247 // lowered. Return the operation, rather than a null SDValue. 2248 return Op; 2249} 2250 2251//! Custom lowering for CTPOP (count population) 2252/*! 2253 Custom lowering code that counts the number ones in the input 2254 operand. SPU has such an instruction, but it counts the number of 2255 ones per byte, which then have to be accumulated. 2256*/ 2257static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { 2258 EVT VT = Op.getValueType(); 2259 EVT vecVT = EVT::getVectorVT(*DAG.getContext(), 2260 VT, (128 / VT.getSizeInBits())); 2261 DebugLoc dl = Op.getDebugLoc(); 2262 2263 switch (VT.getSimpleVT().SimpleTy) { 2264 default: 2265 assert(false && "Invalid value type!"); 2266 case MVT::i8: { 2267 SDValue N = Op.getOperand(0); 2268 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2269 2270 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2271 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2272 2273 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); 2274 } 2275 2276 case MVT::i16: { 2277 MachineFunction &MF = DAG.getMachineFunction(); 2278 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2279 2280 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); 2281 2282 SDValue N = Op.getOperand(0); 2283 SDValue Elt0 = DAG.getConstant(0, MVT::i16); 2284 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); 2285 SDValue Shift1 = DAG.getConstant(8, MVT::i32); 2286 2287 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2288 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2289 2290 // CNTB_result becomes the chain to which all of the virtual registers 2291 // CNTB_reg, SUM1_reg become associated: 2292 SDValue CNTB_result = 2293 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); 2294 2295 SDValue CNTB_rescopy = 2296 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2297 2298 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); 2299 2300 return DAG.getNode(ISD::AND, dl, MVT::i16, 2301 DAG.getNode(ISD::ADD, dl, MVT::i16, 2302 DAG.getNode(ISD::SRL, dl, MVT::i16, 2303 Tmp1, Shift1), 2304 Tmp1), 2305 Mask0); 2306 } 2307 2308 case MVT::i32: { 2309 MachineFunction &MF = DAG.getMachineFunction(); 2310 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2311 2312 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2313 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); 2314 2315 SDValue N = Op.getOperand(0); 2316 SDValue Elt0 = DAG.getConstant(0, MVT::i32); 2317 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); 2318 SDValue Shift1 = DAG.getConstant(16, MVT::i32); 2319 SDValue Shift2 = DAG.getConstant(8, MVT::i32); 2320 2321 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); 2322 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); 2323 2324 // CNTB_result becomes the chain to which all of the virtual registers 2325 // CNTB_reg, SUM1_reg become associated: 2326 SDValue CNTB_result = 2327 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); 2328 2329 SDValue CNTB_rescopy = 2330 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); 2331 2332 SDValue Comp1 = 2333 DAG.getNode(ISD::SRL, dl, MVT::i32, 2334 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), 2335 Shift1); 2336 2337 SDValue Sum1 = 2338 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, 2339 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); 2340 2341 SDValue Sum1_rescopy = 2342 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); 2343 2344 SDValue Comp2 = 2345 DAG.getNode(ISD::SRL, dl, MVT::i32, 2346 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), 2347 Shift2); 2348 SDValue Sum2 = 2349 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, 2350 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); 2351 2352 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); 2353 } 2354 2355 case MVT::i64: 2356 break; 2357 } 2358 2359 return SDValue(); 2360} 2361 2362//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 2363/*! 2364 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. 2365 All conversions to i64 are expanded to a libcall. 2366 */ 2367static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 2368 SPUTargetLowering &TLI) { 2369 EVT OpVT = Op.getValueType(); 2370 SDValue Op0 = Op.getOperand(0); 2371 EVT Op0VT = Op0.getValueType(); 2372 2373 if ((OpVT == MVT::i32 && Op0VT == MVT::f64) 2374 || OpVT == MVT::i64) { 2375 // Convert f32 / f64 to i32 / i64 via libcall. 2376 RTLIB::Libcall LC = 2377 (Op.getOpcode() == ISD::FP_TO_SINT) 2378 ? RTLIB::getFPTOSINT(Op0VT, OpVT) 2379 : RTLIB::getFPTOUINT(Op0VT, OpVT); 2380 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); 2381 SDValue Dummy; 2382 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2383 } 2384 2385 return Op; 2386} 2387 2388//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 2389/*! 2390 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. 2391 All conversions from i64 are expanded to a libcall. 2392 */ 2393static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, 2394 SPUTargetLowering &TLI) { 2395 EVT OpVT = Op.getValueType(); 2396 SDValue Op0 = Op.getOperand(0); 2397 EVT Op0VT = Op0.getValueType(); 2398 2399 if ((OpVT == MVT::f64 && Op0VT == MVT::i32) 2400 || Op0VT == MVT::i64) { 2401 // Convert i32, i64 to f64 via libcall: 2402 RTLIB::Libcall LC = 2403 (Op.getOpcode() == ISD::SINT_TO_FP) 2404 ? RTLIB::getSINTTOFP(Op0VT, OpVT) 2405 : RTLIB::getUINTTOFP(Op0VT, OpVT); 2406 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); 2407 SDValue Dummy; 2408 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); 2409 } 2410 2411 return Op; 2412} 2413 2414//! Lower ISD::SETCC 2415/*! 2416 This handles MVT::f64 (double floating point) condition lowering 2417 */ 2418static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, 2419 const TargetLowering &TLI) { 2420 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2)); 2421 DebugLoc dl = Op.getDebugLoc(); 2422 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); 2423 2424 SDValue lhs = Op.getOperand(0); 2425 SDValue rhs = Op.getOperand(1); 2426 EVT lhsVT = lhs.getValueType(); 2427 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); 2428 2429 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); 2430 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2431 EVT IntVT(MVT::i64); 2432 2433 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently 2434 // selected to a NOP: 2435 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs); 2436 SDValue lhsHi32 = 2437 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2438 DAG.getNode(ISD::SRL, dl, IntVT, 2439 i64lhs, DAG.getConstant(32, MVT::i32))); 2440 SDValue lhsHi32abs = 2441 DAG.getNode(ISD::AND, dl, MVT::i32, 2442 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); 2443 SDValue lhsLo32 = 2444 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); 2445 2446 // SETO and SETUO only use the lhs operand: 2447 if (CC->get() == ISD::SETO) { 2448 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of 2449 // SETUO 2450 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); 2451 return DAG.getNode(ISD::XOR, dl, ccResultVT, 2452 DAG.getSetCC(dl, ccResultVT, 2453 lhs, DAG.getConstantFP(0.0, lhsVT), 2454 ISD::SETUO), 2455 DAG.getConstant(ccResultAllOnes, ccResultVT)); 2456 } else if (CC->get() == ISD::SETUO) { 2457 // Evaluates to true if Op0 is [SQ]NaN 2458 return DAG.getNode(ISD::AND, dl, ccResultVT, 2459 DAG.getSetCC(dl, ccResultVT, 2460 lhsHi32abs, 2461 DAG.getConstant(0x7ff00000, MVT::i32), 2462 ISD::SETGE), 2463 DAG.getSetCC(dl, ccResultVT, 2464 lhsLo32, 2465 DAG.getConstant(0, MVT::i32), 2466 ISD::SETGT)); 2467 } 2468 2469 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs); 2470 SDValue rhsHi32 = 2471 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, 2472 DAG.getNode(ISD::SRL, dl, IntVT, 2473 i64rhs, DAG.getConstant(32, MVT::i32))); 2474 2475 // If a value is negative, subtract from the sign magnitude constant: 2476 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); 2477 2478 // Convert the sign-magnitude representation into 2's complement: 2479 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2480 lhsHi32, DAG.getConstant(31, MVT::i32)); 2481 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); 2482 SDValue lhsSelect = 2483 DAG.getNode(ISD::SELECT, dl, IntVT, 2484 lhsSelectMask, lhsSignMag2TC, i64lhs); 2485 2486 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, 2487 rhsHi32, DAG.getConstant(31, MVT::i32)); 2488 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); 2489 SDValue rhsSelect = 2490 DAG.getNode(ISD::SELECT, dl, IntVT, 2491 rhsSelectMask, rhsSignMag2TC, i64rhs); 2492 2493 unsigned compareOp; 2494 2495 switch (CC->get()) { 2496 case ISD::SETOEQ: 2497 case ISD::SETUEQ: 2498 compareOp = ISD::SETEQ; break; 2499 case ISD::SETOGT: 2500 case ISD::SETUGT: 2501 compareOp = ISD::SETGT; break; 2502 case ISD::SETOGE: 2503 case ISD::SETUGE: 2504 compareOp = ISD::SETGE; break; 2505 case ISD::SETOLT: 2506 case ISD::SETULT: 2507 compareOp = ISD::SETLT; break; 2508 case ISD::SETOLE: 2509 case ISD::SETULE: 2510 compareOp = ISD::SETLE; break; 2511 case ISD::SETUNE: 2512 case ISD::SETONE: 2513 compareOp = ISD::SETNE; break; 2514 default: 2515 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition"); 2516 } 2517 2518 SDValue result = 2519 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, 2520 (ISD::CondCode) compareOp); 2521 2522 if ((CC->get() & 0x8) == 0) { 2523 // Ordered comparison: 2524 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, 2525 lhs, DAG.getConstantFP(0.0, MVT::f64), 2526 ISD::SETO); 2527 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, 2528 rhs, DAG.getConstantFP(0.0, MVT::f64), 2529 ISD::SETO); 2530 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); 2531 2532 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); 2533 } 2534 2535 return result; 2536} 2537 2538//! Lower ISD::SELECT_CC 2539/*! 2540 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the 2541 SELB instruction. 2542 2543 \note Need to revisit this in the future: if the code path through the true 2544 and false value computations is longer than the latency of a branch (6 2545 cycles), then it would be more advantageous to branch and insert a new basic 2546 block and branch on the condition. However, this code does not make that 2547 assumption, given the simplisitc uses so far. 2548 */ 2549 2550static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, 2551 const TargetLowering &TLI) { 2552 EVT VT = Op.getValueType(); 2553 SDValue lhs = Op.getOperand(0); 2554 SDValue rhs = Op.getOperand(1); 2555 SDValue trueval = Op.getOperand(2); 2556 SDValue falseval = Op.getOperand(3); 2557 SDValue condition = Op.getOperand(4); 2558 DebugLoc dl = Op.getDebugLoc(); 2559 2560 // NOTE: SELB's arguments: $rA, $rB, $mask 2561 // 2562 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB 2563 // where bits in $mask are 1. CCond will be inverted, having 1s where the 2564 // condition was true and 0s where the condition was false. Hence, the 2565 // arguments to SELB get reversed. 2566 2567 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's 2568 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up 2569 // with another "cannot select select_cc" assert: 2570 2571 SDValue compare = DAG.getNode(ISD::SETCC, dl, 2572 TLI.getSetCCResultType(Op.getValueType()), 2573 lhs, rhs, condition); 2574 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); 2575} 2576 2577//! Custom lower ISD::TRUNCATE 2578static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) 2579{ 2580 // Type to truncate to 2581 EVT VT = Op.getValueType(); 2582 MVT simpleVT = VT.getSimpleVT(); 2583 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), 2584 VT, (128 / VT.getSizeInBits())); 2585 DebugLoc dl = Op.getDebugLoc(); 2586 2587 // Type to truncate from 2588 SDValue Op0 = Op.getOperand(0); 2589 EVT Op0VT = Op0.getValueType(); 2590 2591 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { 2592 // Create shuffle mask, least significant doubleword of quadword 2593 unsigned maskHigh = 0x08090a0b; 2594 unsigned maskLow = 0x0c0d0e0f; 2595 // Use a shuffle to perform the truncation 2596 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, 2597 DAG.getConstant(maskHigh, MVT::i32), 2598 DAG.getConstant(maskLow, MVT::i32), 2599 DAG.getConstant(maskHigh, MVT::i32), 2600 DAG.getConstant(maskLow, MVT::i32)); 2601 2602 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, 2603 Op0, Op0, shufMask); 2604 2605 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); 2606 } 2607 2608 return SDValue(); // Leave the truncate unmolested 2609} 2610 2611//! Custom (target-specific) lowering entry point 2612/*! 2613 This is where LLVM's DAG selection process calls to do target-specific 2614 lowering of nodes. 2615 */ 2616SDValue 2617SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 2618{ 2619 unsigned Opc = (unsigned) Op.getOpcode(); 2620 EVT VT = Op.getValueType(); 2621 2622 switch (Opc) { 2623 default: { 2624#ifndef NDEBUG 2625 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; 2626 cerr << "Op.getOpcode() = " << Opc << "\n"; 2627 cerr << "*Op.getNode():\n"; 2628 Op.getNode()->dump(); 2629#endif 2630 llvm_unreachable(0); 2631 } 2632 case ISD::LOAD: 2633 case ISD::EXTLOAD: 2634 case ISD::SEXTLOAD: 2635 case ISD::ZEXTLOAD: 2636 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); 2637 case ISD::STORE: 2638 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); 2639 case ISD::ConstantPool: 2640 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); 2641 case ISD::GlobalAddress: 2642 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); 2643 case ISD::JumpTable: 2644 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); 2645 case ISD::ConstantFP: 2646 return LowerConstantFP(Op, DAG); 2647 2648 // i8, i64 math ops: 2649 case ISD::ADD: 2650 case ISD::SUB: 2651 case ISD::ROTR: 2652 case ISD::ROTL: 2653 case ISD::SRL: 2654 case ISD::SHL: 2655 case ISD::SRA: { 2656 if (VT == MVT::i8) 2657 return LowerI8Math(Op, DAG, Opc, *this); 2658 break; 2659 } 2660 2661 case ISD::FP_TO_SINT: 2662 case ISD::FP_TO_UINT: 2663 return LowerFP_TO_INT(Op, DAG, *this); 2664 2665 case ISD::SINT_TO_FP: 2666 case ISD::UINT_TO_FP: 2667 return LowerINT_TO_FP(Op, DAG, *this); 2668 2669 // Vector-related lowering. 2670 case ISD::BUILD_VECTOR: 2671 return LowerBUILD_VECTOR(Op, DAG); 2672 case ISD::SCALAR_TO_VECTOR: 2673 return LowerSCALAR_TO_VECTOR(Op, DAG); 2674 case ISD::VECTOR_SHUFFLE: 2675 return LowerVECTOR_SHUFFLE(Op, DAG); 2676 case ISD::EXTRACT_VECTOR_ELT: 2677 return LowerEXTRACT_VECTOR_ELT(Op, DAG); 2678 case ISD::INSERT_VECTOR_ELT: 2679 return LowerINSERT_VECTOR_ELT(Op, DAG); 2680 2681 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: 2682 case ISD::AND: 2683 case ISD::OR: 2684 case ISD::XOR: 2685 return LowerByteImmed(Op, DAG); 2686 2687 // Vector and i8 multiply: 2688 case ISD::MUL: 2689 if (VT == MVT::i8) 2690 return LowerI8Math(Op, DAG, Opc, *this); 2691 2692 case ISD::CTPOP: 2693 return LowerCTPOP(Op, DAG); 2694 2695 case ISD::SELECT_CC: 2696 return LowerSELECT_CC(Op, DAG, *this); 2697 2698 case ISD::SETCC: 2699 return LowerSETCC(Op, DAG, *this); 2700 2701 case ISD::TRUNCATE: 2702 return LowerTRUNCATE(Op, DAG); 2703 } 2704 2705 return SDValue(); 2706} 2707 2708void SPUTargetLowering::ReplaceNodeResults(SDNode *N, 2709 SmallVectorImpl<SDValue>&Results, 2710 SelectionDAG &DAG) 2711{ 2712#if 0 2713 unsigned Opc = (unsigned) N->getOpcode(); 2714 EVT OpVT = N->getValueType(0); 2715 2716 switch (Opc) { 2717 default: { 2718 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; 2719 cerr << "Op.getOpcode() = " << Opc << "\n"; 2720 cerr << "*Op.getNode():\n"; 2721 N->dump(); 2722 abort(); 2723 /*NOTREACHED*/ 2724 } 2725 } 2726#endif 2727 2728 /* Otherwise, return unchanged */ 2729} 2730 2731//===----------------------------------------------------------------------===// 2732// Target Optimization Hooks 2733//===----------------------------------------------------------------------===// 2734 2735SDValue 2736SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const 2737{ 2738#if 0 2739 TargetMachine &TM = getTargetMachine(); 2740#endif 2741 const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); 2742 SelectionDAG &DAG = DCI.DAG; 2743 SDValue Op0 = N->getOperand(0); // everything has at least one operand 2744 EVT NodeVT = N->getValueType(0); // The node's value type 2745 EVT Op0VT = Op0.getValueType(); // The first operand's result 2746 SDValue Result; // Initially, empty result 2747 DebugLoc dl = N->getDebugLoc(); 2748 2749 switch (N->getOpcode()) { 2750 default: break; 2751 case ISD::ADD: { 2752 SDValue Op1 = N->getOperand(1); 2753 2754 if (Op0.getOpcode() == SPUISD::IndirectAddr 2755 || Op1.getOpcode() == SPUISD::IndirectAddr) { 2756 // Normalize the operands to reduce repeated code 2757 SDValue IndirectArg = Op0, AddArg = Op1; 2758 2759 if (Op1.getOpcode() == SPUISD::IndirectAddr) { 2760 IndirectArg = Op1; 2761 AddArg = Op0; 2762 } 2763 2764 if (isa<ConstantSDNode>(AddArg)) { 2765 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg); 2766 SDValue IndOp1 = IndirectArg.getOperand(1); 2767 2768 if (CN0->isNullValue()) { 2769 // (add (SPUindirect <arg>, <arg>), 0) -> 2770 // (SPUindirect <arg>, <arg>) 2771 2772#if !defined(NDEBUG) 2773 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2774 cerr << "\n" 2775 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n" 2776 << "With: (SPUindirect <arg>, <arg>)\n"; 2777 } 2778#endif 2779 2780 return IndirectArg; 2781 } else if (isa<ConstantSDNode>(IndOp1)) { 2782 // (add (SPUindirect <arg>, <const>), <const>) -> 2783 // (SPUindirect <arg>, <const + const>) 2784 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1); 2785 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); 2786 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); 2787 2788#if !defined(NDEBUG) 2789 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2790 cerr << "\n" 2791 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue() 2792 << "), " << CN0->getSExtValue() << ")\n" 2793 << "With: (SPUindirect <arg>, " 2794 << combinedConst << ")\n"; 2795 } 2796#endif 2797 2798 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 2799 IndirectArg, combinedValue); 2800 } 2801 } 2802 } 2803 break; 2804 } 2805 case ISD::SIGN_EXTEND: 2806 case ISD::ZERO_EXTEND: 2807 case ISD::ANY_EXTEND: { 2808 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { 2809 // (any_extend (SPUextract_elt0 <arg>)) -> 2810 // (SPUextract_elt0 <arg>) 2811 // Types must match, however... 2812#if !defined(NDEBUG) 2813 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2814 cerr << "\nReplace: "; 2815 N->dump(&DAG); 2816 cerr << "\nWith: "; 2817 Op0.getNode()->dump(&DAG); 2818 cerr << "\n"; 2819 } 2820#endif 2821 2822 return Op0; 2823 } 2824 break; 2825 } 2826 case SPUISD::IndirectAddr: { 2827 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { 2828 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); 2829 if (CN != 0 && CN->getZExtValue() == 0) { 2830 // (SPUindirect (SPUaform <addr>, 0), 0) -> 2831 // (SPUaform <addr>, 0) 2832 2833 DEBUG(cerr << "Replace: "); 2834 DEBUG(N->dump(&DAG)); 2835 DEBUG(cerr << "\nWith: "); 2836 DEBUG(Op0.getNode()->dump(&DAG)); 2837 DEBUG(cerr << "\n"); 2838 2839 return Op0; 2840 } 2841 } else if (Op0.getOpcode() == ISD::ADD) { 2842 SDValue Op1 = N->getOperand(1); 2843 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) { 2844 // (SPUindirect (add <arg>, <arg>), 0) -> 2845 // (SPUindirect <arg>, <arg>) 2846 if (CN1->isNullValue()) { 2847 2848#if !defined(NDEBUG) 2849 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { 2850 cerr << "\n" 2851 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n" 2852 << "With: (SPUindirect <arg>, <arg>)\n"; 2853 } 2854#endif 2855 2856 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, 2857 Op0.getOperand(0), Op0.getOperand(1)); 2858 } 2859 } 2860 } 2861 break; 2862 } 2863 case SPUISD::SHLQUAD_L_BITS: 2864 case SPUISD::SHLQUAD_L_BYTES: 2865 case SPUISD::VEC_SHL: 2866 case SPUISD::VEC_SRL: 2867 case SPUISD::VEC_SRA: 2868 case SPUISD::ROTBYTES_LEFT: { 2869 SDValue Op1 = N->getOperand(1); 2870 2871 // Kill degenerate vector shifts: 2872 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) { 2873 if (CN->isNullValue()) { 2874 Result = Op0; 2875 } 2876 } 2877 break; 2878 } 2879 case SPUISD::PREFSLOT2VEC: { 2880 switch (Op0.getOpcode()) { 2881 default: 2882 break; 2883 case ISD::ANY_EXTEND: 2884 case ISD::ZERO_EXTEND: 2885 case ISD::SIGN_EXTEND: { 2886 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) -> 2887 // <arg> 2888 // but only if the SPUprefslot2vec and <arg> types match. 2889 SDValue Op00 = Op0.getOperand(0); 2890 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { 2891 SDValue Op000 = Op00.getOperand(0); 2892 if (Op000.getValueType() == NodeVT) { 2893 Result = Op000; 2894 } 2895 } 2896 break; 2897 } 2898 case SPUISD::VEC2PREFSLOT: { 2899 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) -> 2900 // <arg> 2901 Result = Op0.getOperand(0); 2902 break; 2903 } 2904 } 2905 break; 2906 } 2907 } 2908 2909 // Otherwise, return unchanged. 2910#ifndef NDEBUG 2911 if (Result.getNode()) { 2912 DEBUG(cerr << "\nReplace.SPU: "); 2913 DEBUG(N->dump(&DAG)); 2914 DEBUG(cerr << "\nWith: "); 2915 DEBUG(Result.getNode()->dump(&DAG)); 2916 DEBUG(cerr << "\n"); 2917 } 2918#endif 2919 2920 return Result; 2921} 2922 2923//===----------------------------------------------------------------------===// 2924// Inline Assembly Support 2925//===----------------------------------------------------------------------===// 2926 2927/// getConstraintType - Given a constraint letter, return the type of 2928/// constraint it is for this target. 2929SPUTargetLowering::ConstraintType 2930SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { 2931 if (ConstraintLetter.size() == 1) { 2932 switch (ConstraintLetter[0]) { 2933 default: break; 2934 case 'b': 2935 case 'r': 2936 case 'f': 2937 case 'v': 2938 case 'y': 2939 return C_RegisterClass; 2940 } 2941 } 2942 return TargetLowering::getConstraintType(ConstraintLetter); 2943} 2944 2945std::pair<unsigned, const TargetRegisterClass*> 2946SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 2947 EVT VT) const 2948{ 2949 if (Constraint.size() == 1) { 2950 // GCC RS6000 Constraint Letters 2951 switch (Constraint[0]) { 2952 case 'b': // R1-R31 2953 case 'r': // R0-R31 2954 if (VT == MVT::i64) 2955 return std::make_pair(0U, SPU::R64CRegisterClass); 2956 return std::make_pair(0U, SPU::R32CRegisterClass); 2957 case 'f': 2958 if (VT == MVT::f32) 2959 return std::make_pair(0U, SPU::R32FPRegisterClass); 2960 else if (VT == MVT::f64) 2961 return std::make_pair(0U, SPU::R64FPRegisterClass); 2962 break; 2963 case 'v': 2964 return std::make_pair(0U, SPU::GPRCRegisterClass); 2965 } 2966 } 2967 2968 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 2969} 2970 2971//! Compute used/known bits for a SPU operand 2972void 2973SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 2974 const APInt &Mask, 2975 APInt &KnownZero, 2976 APInt &KnownOne, 2977 const SelectionDAG &DAG, 2978 unsigned Depth ) const { 2979#if 0 2980 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; 2981 2982 switch (Op.getOpcode()) { 2983 default: 2984 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 2985 break; 2986 case CALL: 2987 case SHUFB: 2988 case SHUFFLE_MASK: 2989 case CNTB: 2990 case SPUISD::PREFSLOT2VEC: 2991 case SPUISD::LDRESULT: 2992 case SPUISD::VEC2PREFSLOT: 2993 case SPUISD::SHLQUAD_L_BITS: 2994 case SPUISD::SHLQUAD_L_BYTES: 2995 case SPUISD::VEC_SHL: 2996 case SPUISD::VEC_SRL: 2997 case SPUISD::VEC_SRA: 2998 case SPUISD::VEC_ROTL: 2999 case SPUISD::VEC_ROTR: 3000 case SPUISD::ROTBYTES_LEFT: 3001 case SPUISD::SELECT_MASK: 3002 case SPUISD::SELB: 3003 } 3004#endif 3005} 3006 3007unsigned 3008SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, 3009 unsigned Depth) const { 3010 switch (Op.getOpcode()) { 3011 default: 3012 return 1; 3013 3014 case ISD::SETCC: { 3015 EVT VT = Op.getValueType(); 3016 3017 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { 3018 VT = MVT::i32; 3019 } 3020 return VT.getSizeInBits(); 3021 } 3022 } 3023} 3024 3025// LowerAsmOperandForConstraint 3026void 3027SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 3028 char ConstraintLetter, 3029 bool hasMemory, 3030 std::vector<SDValue> &Ops, 3031 SelectionDAG &DAG) const { 3032 // Default, for the time being, to the base class handler 3033 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory, 3034 Ops, DAG); 3035} 3036 3037/// isLegalAddressImmediate - Return true if the integer value can be used 3038/// as the offset of the target addressing mode. 3039bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, 3040 const Type *Ty) const { 3041 // SPU's addresses are 256K: 3042 return (V > -(1 << 18) && V < (1 << 18) - 1); 3043} 3044 3045bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 3046 return false; 3047} 3048 3049bool 3050SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 3051 // The SPU target isn't yet aware of offsets. 3052 return false; 3053} 3054