AMDILISelLowering.cpp revision 49fb99bd131a4ed89e6f55cf360f67618acafec4
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9// 10// This file implements the interfaces that AMDIL uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "AMDILISelLowering.h" 16#include "AMDILDevices.h" 17#include "AMDILIntrinsicInfo.h" 18#include "AMDILRegisterInfo.h" 19#include "AMDILSubtarget.h" 20#include "AMDILUtilityFunctions.h" 21#include "llvm/CallingConv.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/PseudoSourceValue.h" 25#include "llvm/CodeGen/SelectionDAG.h" 26#include "llvm/CodeGen/SelectionDAGNodes.h" 27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28#include "llvm/DerivedTypes.h" 29#include "llvm/Instructions.h" 30#include "llvm/Intrinsics.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetOptions.h" 34 35using namespace llvm; 36#define ISDBITCAST ISD::BITCAST 37#define MVTGLUE MVT::Glue 38//===----------------------------------------------------------------------===// 39// Calling Convention Implementation 40//===----------------------------------------------------------------------===// 41#include "AMDILGenCallingConv.inc" 42 43//===----------------------------------------------------------------------===// 44// TargetLowering Implementation Help Functions Begin 45//===----------------------------------------------------------------------===// 46 static SDValue 47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) 48{ 49 DebugLoc DL = Src.getDebugLoc(); 50 EVT svt = Src.getValueType().getScalarType(); 51 EVT dvt = Dst.getValueType().getScalarType(); 52 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { 53 if (dvt.bitsGT(svt)) { 54 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); 55 } else if (svt.bitsLT(svt)) { 56 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, 57 DAG.getConstant(1, MVT::i32)); 58 } 59 } else if (svt.isInteger() && dvt.isInteger()) { 60 if (!svt.bitsEq(dvt)) { 61 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 62 } else { 63 Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src); 64 } 65 } else if (svt.isInteger()) { 66 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; 67 if (!svt.bitsEq(dvt)) { 68 if (dvt.getSimpleVT().SimpleTy == MVT::f32) { 69 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); 70 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { 71 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); 72 } else { 73 assert(0 && "We only support 32 and 64bit fp types"); 74 } 75 } 76 Src = DAG.getNode(opcode, DL, dvt, Src); 77 } else if (dvt.isInteger()) { 78 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; 79 if (svt.getSimpleVT().SimpleTy == MVT::f32) { 80 Src = DAG.getNode(opcode, DL, MVT::i32, Src); 81 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { 82 Src = DAG.getNode(opcode, DL, MVT::i64, Src); 83 } else { 84 assert(0 && "We only support 32 and 64bit fp types"); 85 } 86 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 87 } 88 return Src; 89} 90// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC 91// condition. 92 static AMDILCC::CondCodes 93CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) 94{ 95 switch (CC) { 96 default: 97 { 98 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; 99 assert(0 && "Unknown condition code!"); 100 } 101 case ISD::SETO: 102 switch(type) { 103 case MVT::f32: 104 return AMDILCC::IL_CC_F_O; 105 case MVT::f64: 106 return AMDILCC::IL_CC_D_O; 107 default: 108 assert(0 && "Opcode combination not generated correctly!"); 109 return AMDILCC::COND_ERROR; 110 }; 111 case ISD::SETUO: 112 switch(type) { 113 case MVT::f32: 114 return AMDILCC::IL_CC_F_UO; 115 case MVT::f64: 116 return AMDILCC::IL_CC_D_UO; 117 default: 118 assert(0 && "Opcode combination not generated correctly!"); 119 return AMDILCC::COND_ERROR; 120 }; 121 case ISD::SETGT: 122 switch (type) { 123 case MVT::i1: 124 case MVT::i8: 125 case MVT::i16: 126 case MVT::i32: 127 return AMDILCC::IL_CC_I_GT; 128 case MVT::f32: 129 return AMDILCC::IL_CC_F_GT; 130 case MVT::f64: 131 return AMDILCC::IL_CC_D_GT; 132 case MVT::i64: 133 return AMDILCC::IL_CC_L_GT; 134 default: 135 assert(0 && "Opcode combination not generated correctly!"); 136 return AMDILCC::COND_ERROR; 137 }; 138 case ISD::SETGE: 139 switch (type) { 140 case MVT::i1: 141 case MVT::i8: 142 case MVT::i16: 143 case MVT::i32: 144 return AMDILCC::IL_CC_I_GE; 145 case MVT::f32: 146 return AMDILCC::IL_CC_F_GE; 147 case MVT::f64: 148 return AMDILCC::IL_CC_D_GE; 149 case MVT::i64: 150 return AMDILCC::IL_CC_L_GE; 151 default: 152 assert(0 && "Opcode combination not generated correctly!"); 153 return AMDILCC::COND_ERROR; 154 }; 155 case ISD::SETLT: 156 switch (type) { 157 case MVT::i1: 158 case MVT::i8: 159 case MVT::i16: 160 case MVT::i32: 161 return AMDILCC::IL_CC_I_LT; 162 case MVT::f32: 163 return AMDILCC::IL_CC_F_LT; 164 case MVT::f64: 165 return AMDILCC::IL_CC_D_LT; 166 case MVT::i64: 167 return AMDILCC::IL_CC_L_LT; 168 default: 169 assert(0 && "Opcode combination not generated correctly!"); 170 return AMDILCC::COND_ERROR; 171 }; 172 case ISD::SETLE: 173 switch (type) { 174 case MVT::i1: 175 case MVT::i8: 176 case MVT::i16: 177 case MVT::i32: 178 return AMDILCC::IL_CC_I_LE; 179 case MVT::f32: 180 return AMDILCC::IL_CC_F_LE; 181 case MVT::f64: 182 return AMDILCC::IL_CC_D_LE; 183 case MVT::i64: 184 return AMDILCC::IL_CC_L_LE; 185 default: 186 assert(0 && "Opcode combination not generated correctly!"); 187 return AMDILCC::COND_ERROR; 188 }; 189 case ISD::SETNE: 190 switch (type) { 191 case MVT::i1: 192 case MVT::i8: 193 case MVT::i16: 194 case MVT::i32: 195 return AMDILCC::IL_CC_I_NE; 196 case MVT::f32: 197 return AMDILCC::IL_CC_F_NE; 198 case MVT::f64: 199 return AMDILCC::IL_CC_D_NE; 200 case MVT::i64: 201 return AMDILCC::IL_CC_L_NE; 202 default: 203 assert(0 && "Opcode combination not generated correctly!"); 204 return AMDILCC::COND_ERROR; 205 }; 206 case ISD::SETEQ: 207 switch (type) { 208 case MVT::i1: 209 case MVT::i8: 210 case MVT::i16: 211 case MVT::i32: 212 return AMDILCC::IL_CC_I_EQ; 213 case MVT::f32: 214 return AMDILCC::IL_CC_F_EQ; 215 case MVT::f64: 216 return AMDILCC::IL_CC_D_EQ; 217 case MVT::i64: 218 return AMDILCC::IL_CC_L_EQ; 219 default: 220 assert(0 && "Opcode combination not generated correctly!"); 221 return AMDILCC::COND_ERROR; 222 }; 223 case ISD::SETUGT: 224 switch (type) { 225 case MVT::i1: 226 case MVT::i8: 227 case MVT::i16: 228 case MVT::i32: 229 return AMDILCC::IL_CC_U_GT; 230 case MVT::f32: 231 return AMDILCC::IL_CC_F_UGT; 232 case MVT::f64: 233 return AMDILCC::IL_CC_D_UGT; 234 case MVT::i64: 235 return AMDILCC::IL_CC_UL_GT; 236 default: 237 assert(0 && "Opcode combination not generated correctly!"); 238 return AMDILCC::COND_ERROR; 239 }; 240 case ISD::SETUGE: 241 switch (type) { 242 case MVT::i1: 243 case MVT::i8: 244 case MVT::i16: 245 case MVT::i32: 246 return AMDILCC::IL_CC_U_GE; 247 case MVT::f32: 248 return AMDILCC::IL_CC_F_UGE; 249 case MVT::f64: 250 return AMDILCC::IL_CC_D_UGE; 251 case MVT::i64: 252 return AMDILCC::IL_CC_UL_GE; 253 default: 254 assert(0 && "Opcode combination not generated correctly!"); 255 return AMDILCC::COND_ERROR; 256 }; 257 case ISD::SETULT: 258 switch (type) { 259 case MVT::i1: 260 case MVT::i8: 261 case MVT::i16: 262 case MVT::i32: 263 return AMDILCC::IL_CC_U_LT; 264 case MVT::f32: 265 return AMDILCC::IL_CC_F_ULT; 266 case MVT::f64: 267 return AMDILCC::IL_CC_D_ULT; 268 case MVT::i64: 269 return AMDILCC::IL_CC_UL_LT; 270 default: 271 assert(0 && "Opcode combination not generated correctly!"); 272 return AMDILCC::COND_ERROR; 273 }; 274 case ISD::SETULE: 275 switch (type) { 276 case MVT::i1: 277 case MVT::i8: 278 case MVT::i16: 279 case MVT::i32: 280 return AMDILCC::IL_CC_U_LE; 281 case MVT::f32: 282 return AMDILCC::IL_CC_F_ULE; 283 case MVT::f64: 284 return AMDILCC::IL_CC_D_ULE; 285 case MVT::i64: 286 return AMDILCC::IL_CC_UL_LE; 287 default: 288 assert(0 && "Opcode combination not generated correctly!"); 289 return AMDILCC::COND_ERROR; 290 }; 291 case ISD::SETUNE: 292 switch (type) { 293 case MVT::i1: 294 case MVT::i8: 295 case MVT::i16: 296 case MVT::i32: 297 return AMDILCC::IL_CC_U_NE; 298 case MVT::f32: 299 return AMDILCC::IL_CC_F_UNE; 300 case MVT::f64: 301 return AMDILCC::IL_CC_D_UNE; 302 case MVT::i64: 303 return AMDILCC::IL_CC_UL_NE; 304 default: 305 assert(0 && "Opcode combination not generated correctly!"); 306 return AMDILCC::COND_ERROR; 307 }; 308 case ISD::SETUEQ: 309 switch (type) { 310 case MVT::i1: 311 case MVT::i8: 312 case MVT::i16: 313 case MVT::i32: 314 return AMDILCC::IL_CC_U_EQ; 315 case MVT::f32: 316 return AMDILCC::IL_CC_F_UEQ; 317 case MVT::f64: 318 return AMDILCC::IL_CC_D_UEQ; 319 case MVT::i64: 320 return AMDILCC::IL_CC_UL_EQ; 321 default: 322 assert(0 && "Opcode combination not generated correctly!"); 323 return AMDILCC::COND_ERROR; 324 }; 325 case ISD::SETOGT: 326 switch (type) { 327 case MVT::f32: 328 return AMDILCC::IL_CC_F_OGT; 329 case MVT::f64: 330 return AMDILCC::IL_CC_D_OGT; 331 case MVT::i1: 332 case MVT::i8: 333 case MVT::i16: 334 case MVT::i32: 335 case MVT::i64: 336 default: 337 assert(0 && "Opcode combination not generated correctly!"); 338 return AMDILCC::COND_ERROR; 339 }; 340 case ISD::SETOGE: 341 switch (type) { 342 case MVT::f32: 343 return AMDILCC::IL_CC_F_OGE; 344 case MVT::f64: 345 return AMDILCC::IL_CC_D_OGE; 346 case MVT::i1: 347 case MVT::i8: 348 case MVT::i16: 349 case MVT::i32: 350 case MVT::i64: 351 default: 352 assert(0 && "Opcode combination not generated correctly!"); 353 return AMDILCC::COND_ERROR; 354 }; 355 case ISD::SETOLT: 356 switch (type) { 357 case MVT::f32: 358 return AMDILCC::IL_CC_F_OLT; 359 case MVT::f64: 360 return AMDILCC::IL_CC_D_OLT; 361 case MVT::i1: 362 case MVT::i8: 363 case MVT::i16: 364 case MVT::i32: 365 case MVT::i64: 366 default: 367 assert(0 && "Opcode combination not generated correctly!"); 368 return AMDILCC::COND_ERROR; 369 }; 370 case ISD::SETOLE: 371 switch (type) { 372 case MVT::f32: 373 return AMDILCC::IL_CC_F_OLE; 374 case MVT::f64: 375 return AMDILCC::IL_CC_D_OLE; 376 case MVT::i1: 377 case MVT::i8: 378 case MVT::i16: 379 case MVT::i32: 380 case MVT::i64: 381 default: 382 assert(0 && "Opcode combination not generated correctly!"); 383 return AMDILCC::COND_ERROR; 384 }; 385 case ISD::SETONE: 386 switch (type) { 387 case MVT::f32: 388 return AMDILCC::IL_CC_F_ONE; 389 case MVT::f64: 390 return AMDILCC::IL_CC_D_ONE; 391 case MVT::i1: 392 case MVT::i8: 393 case MVT::i16: 394 case MVT::i32: 395 case MVT::i64: 396 default: 397 assert(0 && "Opcode combination not generated correctly!"); 398 return AMDILCC::COND_ERROR; 399 }; 400 case ISD::SETOEQ: 401 switch (type) { 402 case MVT::f32: 403 return AMDILCC::IL_CC_F_OEQ; 404 case MVT::f64: 405 return AMDILCC::IL_CC_D_OEQ; 406 case MVT::i1: 407 case MVT::i8: 408 case MVT::i16: 409 case MVT::i32: 410 case MVT::i64: 411 default: 412 assert(0 && "Opcode combination not generated correctly!"); 413 return AMDILCC::COND_ERROR; 414 }; 415 }; 416} 417 418/// Helper function used by LowerFormalArguments 419static const TargetRegisterClass* 420getRegClassFromType(unsigned int type) { 421 switch (type) { 422 default: 423 assert(0 && "Passed in type does not match any register classes."); 424 case MVT::i8: 425 return &AMDIL::GPRI8RegClass; 426 case MVT::i16: 427 return &AMDIL::GPRI16RegClass; 428 case MVT::i32: 429 return &AMDIL::GPRI32RegClass; 430 case MVT::f32: 431 return &AMDIL::GPRF32RegClass; 432 case MVT::i64: 433 return &AMDIL::GPRI64RegClass; 434 case MVT::f64: 435 return &AMDIL::GPRF64RegClass; 436 case MVT::v4f32: 437 return &AMDIL::GPRV4F32RegClass; 438 case MVT::v4i8: 439 return &AMDIL::GPRV4I8RegClass; 440 case MVT::v4i16: 441 return &AMDIL::GPRV4I16RegClass; 442 case MVT::v4i32: 443 return &AMDIL::GPRV4I32RegClass; 444 case MVT::v2f32: 445 return &AMDIL::GPRV2F32RegClass; 446 case MVT::v2i8: 447 return &AMDIL::GPRV2I8RegClass; 448 case MVT::v2i16: 449 return &AMDIL::GPRV2I16RegClass; 450 case MVT::v2i32: 451 return &AMDIL::GPRV2I32RegClass; 452 case MVT::v2f64: 453 return &AMDIL::GPRV2F64RegClass; 454 case MVT::v2i64: 455 return &AMDIL::GPRV2I64RegClass; 456 } 457} 458 459SDValue 460AMDILTargetLowering::LowerMemArgument( 461 SDValue Chain, 462 CallingConv::ID CallConv, 463 const SmallVectorImpl<ISD::InputArg> &Ins, 464 DebugLoc dl, SelectionDAG &DAG, 465 const CCValAssign &VA, 466 MachineFrameInfo *MFI, 467 unsigned i) const 468{ 469 // Create the nodes corresponding to a load from this parameter slot. 470 ISD::ArgFlagsTy Flags = Ins[i].Flags; 471 472 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && 473 getTargetMachine().Options.GuaranteedTailCallOpt; 474 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 475 476 // FIXME: For now, all byval parameter objects are marked mutable. This can 477 // be changed with more analysis. 478 // In case of tail call optimization mark all arguments mutable. Since they 479 // could be overwritten by lowering of arguments in case of a tail call. 480 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 481 VA.getLocMemOffset(), isImmutable); 482 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 483 484 if (Flags.isByVal()) 485 return FIN; 486 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 487 MachinePointerInfo::getFixedStack(FI), 488 false, false, false, 0); 489} 490//===----------------------------------------------------------------------===// 491// TargetLowering Implementation Help Functions End 492//===----------------------------------------------------------------------===// 493//===----------------------------------------------------------------------===// 494// Instruction generation functions 495//===----------------------------------------------------------------------===// 496MachineOperand 497AMDILTargetLowering::convertToReg(MachineOperand op) const 498{ 499 if (op.isReg()) { 500 return op; 501 } else if (op.isImm()) { 502 uint32_t loadReg 503 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 504 generateMachineInst(AMDIL::LOADCONST_i32, loadReg) 505 .addImm(op.getImm()); 506 op.ChangeToRegister(loadReg, false); 507 } else if (op.isFPImm()) { 508 uint32_t loadReg 509 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 510 generateMachineInst(AMDIL::LOADCONST_f32, loadReg) 511 .addFPImm(op.getFPImm()); 512 op.ChangeToRegister(loadReg, false); 513 } else if (op.isMBB()) { 514 op.ChangeToRegister(0, false); 515 } else if (op.isFI()) { 516 op.ChangeToRegister(0, false); 517 } else if (op.isCPI()) { 518 op.ChangeToRegister(0, false); 519 } else if (op.isJTI()) { 520 op.ChangeToRegister(0, false); 521 } else if (op.isGlobal()) { 522 op.ChangeToRegister(0, false); 523 } else if (op.isSymbol()) { 524 op.ChangeToRegister(0, false); 525 }/* else if (op.isMetadata()) { 526 op.ChangeToRegister(0, false); 527 }*/ 528 return op; 529} 530 531//===----------------------------------------------------------------------===// 532// TargetLowering Class Implementation Begins 533//===----------------------------------------------------------------------===// 534 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) 535: TargetLowering(TM, new TargetLoweringObjectFileELF()) 536{ 537 int types[] = 538 { 539 (int)MVT::i8, 540 (int)MVT::i16, 541 (int)MVT::i32, 542 (int)MVT::f32, 543 (int)MVT::f64, 544 (int)MVT::i64, 545 (int)MVT::v2i8, 546 (int)MVT::v4i8, 547 (int)MVT::v2i16, 548 (int)MVT::v4i16, 549 (int)MVT::v4f32, 550 (int)MVT::v4i32, 551 (int)MVT::v2f32, 552 (int)MVT::v2i32, 553 (int)MVT::v2f64, 554 (int)MVT::v2i64 555 }; 556 557 int IntTypes[] = 558 { 559 (int)MVT::i8, 560 (int)MVT::i16, 561 (int)MVT::i32, 562 (int)MVT::i64 563 }; 564 565 int FloatTypes[] = 566 { 567 (int)MVT::f32, 568 (int)MVT::f64 569 }; 570 571 int VectorTypes[] = 572 { 573 (int)MVT::v2i8, 574 (int)MVT::v4i8, 575 (int)MVT::v2i16, 576 (int)MVT::v4i16, 577 (int)MVT::v4f32, 578 (int)MVT::v4i32, 579 (int)MVT::v2f32, 580 (int)MVT::v2i32, 581 (int)MVT::v2f64, 582 (int)MVT::v2i64 583 }; 584 size_t numTypes = sizeof(types) / sizeof(*types); 585 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); 586 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); 587 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); 588 589 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 590 // These are the current register classes that are 591 // supported 592 593 addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass); 594 addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass); 595 596 if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 597 addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass); 598 addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass); 599 } 600 if (STM.device()->isSupported(AMDILDeviceInfo::ByteOps)) { 601 addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass); 602 addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass); 603 addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass); 604 setOperationAction(ISD::Constant , MVT::i8 , Legal); 605 } 606 if (STM.device()->isSupported(AMDILDeviceInfo::ShortOps)) { 607 addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass); 608 addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass); 609 addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass); 610 setOperationAction(ISD::Constant , MVT::i16 , Legal); 611 } 612 addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass); 613 addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass); 614 addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass); 615 addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass); 616 if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) { 617 addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass); 618 addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass); 619 } 620 621 for (unsigned int x = 0; x < numTypes; ++x) { 622 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 623 624 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 625 // We cannot sextinreg, expand to shifts 626 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 627 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 628 setOperationAction(ISD::FP_ROUND, VT, Expand); 629 setOperationAction(ISD::SUBE, VT, Expand); 630 setOperationAction(ISD::SUBC, VT, Expand); 631 setOperationAction(ISD::ADDE, VT, Expand); 632 setOperationAction(ISD::ADDC, VT, Expand); 633 setOperationAction(ISD::SETCC, VT, Custom); 634 setOperationAction(ISD::BRCOND, VT, Custom); 635 setOperationAction(ISD::BR_CC, VT, Custom); 636 setOperationAction(ISD::BR_JT, VT, Expand); 637 setOperationAction(ISD::BRIND, VT, Expand); 638 // TODO: Implement custom UREM/SREM routines 639 setOperationAction(ISD::UREM, VT, Expand); 640 setOperationAction(ISD::SREM, VT, Expand); 641 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 642 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 643 setOperationAction(ISD::GlobalAddress, VT, Custom); 644 setOperationAction(ISD::JumpTable, VT, Custom); 645 setOperationAction(ISD::ConstantPool, VT, Custom); 646 setOperationAction(ISD::SELECT_CC, VT, Custom); 647 setOperationAction(ISD::SELECT, VT, Custom); 648 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 649 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 650 if (VT != MVT::i64 && VT != MVT::v2i64) { 651 setOperationAction(ISD::SDIV, VT, Custom); 652 } 653 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 654 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 655 } 656 for (unsigned int x = 0; x < numFloatTypes; ++x) { 657 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 658 659 // IL does not have these operations for floating point types 660 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 661 setOperationAction(ISD::FP_ROUND, VT, Custom); 662 setOperationAction(ISD::SETOLT, VT, Expand); 663 setOperationAction(ISD::SETOGE, VT, Expand); 664 setOperationAction(ISD::SETOGT, VT, Expand); 665 setOperationAction(ISD::SETOLE, VT, Expand); 666 setOperationAction(ISD::SETULT, VT, Expand); 667 setOperationAction(ISD::SETUGE, VT, Expand); 668 setOperationAction(ISD::SETUGT, VT, Expand); 669 setOperationAction(ISD::SETULE, VT, Expand); 670 } 671 672 for (unsigned int x = 0; x < numIntTypes; ++x) { 673 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 674 675 // GPU also does not have divrem function for signed or unsigned 676 setOperationAction(ISD::SDIVREM, VT, Expand); 677 setOperationAction(ISD::UDIVREM, VT, Expand); 678 setOperationAction(ISD::FP_ROUND, VT, Expand); 679 680 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 681 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 682 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 683 684 // GPU doesn't have a rotl, rotr, or byteswap instruction 685 setOperationAction(ISD::ROTR, VT, Expand); 686 setOperationAction(ISD::ROTL, VT, Expand); 687 setOperationAction(ISD::BSWAP, VT, Expand); 688 689 // GPU doesn't have any counting operators 690 setOperationAction(ISD::CTPOP, VT, Expand); 691 setOperationAction(ISD::CTTZ, VT, Expand); 692 setOperationAction(ISD::CTLZ, VT, Expand); 693 } 694 695 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) 696 { 697 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 698 699 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 700 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 701 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 702 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 703 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 704 setOperationAction(ISD::FP_ROUND, VT, Expand); 705 setOperationAction(ISD::SDIVREM, VT, Expand); 706 setOperationAction(ISD::UDIVREM, VT, Expand); 707 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 708 // setOperationAction(ISD::VSETCC, VT, Expand); 709 setOperationAction(ISD::SETCC, VT, Expand); 710 setOperationAction(ISD::SELECT_CC, VT, Expand); 711 setOperationAction(ISD::SELECT, VT, Expand); 712 713 } 714 setOperationAction(ISD::FP_ROUND, MVT::Other, Expand); 715 if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) { 716 if (STM.calVersion() < CAL_VERSION_SC_139 717 || STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 718 setOperationAction(ISD::MUL, MVT::i64, Custom); 719 } 720 setOperationAction(ISD::SUB, MVT::i64, Custom); 721 setOperationAction(ISD::ADD, MVT::i64, Custom); 722 setOperationAction(ISD::MULHU, MVT::i64, Expand); 723 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 724 setOperationAction(ISD::MULHS, MVT::i64, Expand); 725 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 726 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 727 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 728 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 729 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 730 setOperationAction(ISD::Constant , MVT::i64 , Legal); 731 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 732 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand); 733 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand); 734 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 735 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 736 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 737 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 738 } 739 if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 740 // we support loading/storing v2f64 but not operations on the type 741 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 742 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 743 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 744 setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand); 745 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 746 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 747 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 748 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 749 // We want to expand vector conversions into their scalar 750 // counterparts. 751 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand); 752 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand); 753 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 754 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 755 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 756 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 757 setOperationAction(ISD::FABS, MVT::f64, Expand); 758 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 759 } 760 // TODO: Fix the UDIV24 algorithm so it works for these 761 // types correctly. This needs vector comparisons 762 // for this to work correctly. 763 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 764 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 765 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 766 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 767 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 768 setOperationAction(ISD::SUBC, MVT::Other, Expand); 769 setOperationAction(ISD::ADDE, MVT::Other, Expand); 770 setOperationAction(ISD::ADDC, MVT::Other, Expand); 771 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 772 setOperationAction(ISD::BR_CC, MVT::Other, Custom); 773 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 774 setOperationAction(ISD::BRIND, MVT::Other, Expand); 775 setOperationAction(ISD::SETCC, MVT::Other, Custom); 776 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 777 setOperationAction(ISD::FDIV, MVT::f32, Custom); 778 setOperationAction(ISD::FDIV, MVT::v2f32, Custom); 779 setOperationAction(ISD::FDIV, MVT::v4f32, Custom); 780 781 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); 782 // Use the default implementation. 783 setOperationAction(ISD::VAARG , MVT::Other, Expand); 784 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 785 setOperationAction(ISD::VAEND , MVT::Other, Expand); 786 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 787 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 788 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 789 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 790 setOperationAction(ISD::Constant , MVT::i32 , Legal); 791 setOperationAction(ISD::TRAP , MVT::Other , Legal); 792 793 setStackPointerRegisterToSaveRestore(AMDIL::SP); 794 setSchedulingPreference(Sched::RegPressure); 795 setPow2DivIsCheap(false); 796 setPrefLoopAlignment(16); 797 setSelectIsExpensive(true); 798 setJumpIsExpensive(true); 799 computeRegisterProperties(); 800 801 maxStoresPerMemcpy = 4096; 802 maxStoresPerMemmove = 4096; 803 maxStoresPerMemset = 4096; 804 805#undef numTypes 806#undef numIntTypes 807#undef numVectorTypes 808#undef numFloatTypes 809} 810 811const char * 812AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const 813{ 814 switch (Opcode) { 815 default: return 0; 816 case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY"; 817 case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP"; 818 case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP"; 819 case AMDILISD::BITCONV: return "AMDILISD::BITCONV"; 820 case AMDILISD::CMOV: return "AMDILISD::CMOV"; 821 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; 822 case AMDILISD::INEGATE: return "AMDILISD::INEGATE"; 823 case AMDILISD::MAD: return "AMDILISD::MAD"; 824 case AMDILISD::UMAD: return "AMDILISD::UMAD"; 825 case AMDILISD::CALL: return "AMDILISD::CALL"; 826 case AMDILISD::RET: return "AMDILISD::RET"; 827 case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI"; 828 case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO"; 829 case AMDILISD::ADD: return "AMDILISD::ADD"; 830 case AMDILISD::UMUL: return "AMDILISD::UMUL"; 831 case AMDILISD::AND: return "AMDILISD::AND"; 832 case AMDILISD::OR: return "AMDILISD::OR"; 833 case AMDILISD::NOT: return "AMDILISD::NOT"; 834 case AMDILISD::XOR: return "AMDILISD::XOR"; 835 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; 836 case AMDILISD::SMAX: return "AMDILISD::SMAX"; 837 case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE"; 838 case AMDILISD::MOVE: return "AMDILISD::MOVE"; 839 case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; 840 case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT"; 841 case AMDILISD::VINSERT: return "AMDILISD::VINSERT"; 842 case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT"; 843 case AMDILISD::LCREATE: return "AMDILISD::LCREATE"; 844 case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI"; 845 case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO"; 846 case AMDILISD::DCREATE: return "AMDILISD::DCREATE"; 847 case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI"; 848 case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO"; 849 case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2"; 850 case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2"; 851 case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2"; 852 case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2"; 853 case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2"; 854 case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2"; 855 case AMDILISD::CMP: return "AMDILISD::CMP"; 856 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; 857 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; 858 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; 859 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; 860 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; 861 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; 862 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; 863 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; 864 case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO"; 865 case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO"; 866 case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP"; 867 case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR"; 868 case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD"; 869 case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND"; 870 case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG"; 871 case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC"; 872 case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC"; 873 case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX"; 874 case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX"; 875 case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN"; 876 case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN"; 877 case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR"; 878 case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB"; 879 case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB"; 880 case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG"; 881 case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR"; 882 case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET"; 883 case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET"; 884 case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET"; 885 case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET"; 886 case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET"; 887 case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET"; 888 case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET"; 889 case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET"; 890 case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET"; 891 case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET"; 892 case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET"; 893 case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET"; 894 case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET"; 895 case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET"; 896 case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD"; 897 case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND"; 898 case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG"; 899 case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC"; 900 case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC"; 901 case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX"; 902 case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX"; 903 case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN"; 904 case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN"; 905 case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR"; 906 case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB"; 907 case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB"; 908 case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG"; 909 case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR"; 910 case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET"; 911 case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET"; 912 case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET"; 913 case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET"; 914 case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET"; 915 case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET"; 916 case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET"; 917 case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET"; 918 case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET"; 919 case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET"; 920 case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET"; 921 case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET"; 922 case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET"; 923 case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD"; 924 case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND"; 925 case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG"; 926 case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC"; 927 case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC"; 928 case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX"; 929 case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX"; 930 case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN"; 931 case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN"; 932 case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR"; 933 case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR"; 934 case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB"; 935 case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB"; 936 case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG"; 937 case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR"; 938 case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET"; 939 case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET"; 940 case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET"; 941 case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET"; 942 case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET"; 943 case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET"; 944 case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET"; 945 case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET"; 946 case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET"; 947 case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET"; 948 case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET"; 949 case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET"; 950 case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET"; 951 case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET"; 952 case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET"; 953 case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC"; 954 case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET"; 955 case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME"; 956 case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET"; 957 case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ"; 958 case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE"; 959 case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0"; 960 case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1"; 961 case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ"; 962 case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE"; 963 case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0"; 964 case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1"; 965 966 }; 967} 968bool 969AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 970 const CallInst &I, unsigned Intrinsic) const 971{ 972 if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic 973 || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) { 974 return false; 975 } 976 bool bitCastToInt = false; 977 unsigned IntNo; 978 bool isRet = true; 979 const AMDILSubtarget *STM = &this->getTargetMachine() 980 .getSubtarget<AMDILSubtarget>(); 981 switch (Intrinsic) { 982 default: return false; // Don't custom lower most intrinsics. 983 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32: 984 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32: 985 IntNo = AMDILISD::ATOM_G_ADD; break; 986 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret: 987 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret: 988 isRet = false; 989 IntNo = AMDILISD::ATOM_G_ADD_NORET; break; 990 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32: 991 case AMDGPUIntrinsic::AMDIL_atomic_add_li32: 992 IntNo = AMDILISD::ATOM_L_ADD; break; 993 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret: 994 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret: 995 isRet = false; 996 IntNo = AMDILISD::ATOM_L_ADD_NORET; break; 997 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32: 998 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32: 999 IntNo = AMDILISD::ATOM_R_ADD; break; 1000 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret: 1001 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret: 1002 isRet = false; 1003 IntNo = AMDILISD::ATOM_R_ADD_NORET; break; 1004 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32: 1005 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32: 1006 IntNo = AMDILISD::ATOM_G_AND; break; 1007 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret: 1008 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret: 1009 isRet = false; 1010 IntNo = AMDILISD::ATOM_G_AND_NORET; break; 1011 case AMDGPUIntrinsic::AMDIL_atomic_and_li32: 1012 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32: 1013 IntNo = AMDILISD::ATOM_L_AND; break; 1014 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret: 1015 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret: 1016 isRet = false; 1017 IntNo = AMDILISD::ATOM_L_AND_NORET; break; 1018 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32: 1019 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32: 1020 IntNo = AMDILISD::ATOM_R_AND; break; 1021 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret: 1022 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret: 1023 isRet = false; 1024 IntNo = AMDILISD::ATOM_R_AND_NORET; break; 1025 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32: 1026 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32: 1027 IntNo = AMDILISD::ATOM_G_CMPXCHG; break; 1028 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret: 1029 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret: 1030 isRet = false; 1031 IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break; 1032 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32: 1033 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32: 1034 IntNo = AMDILISD::ATOM_L_CMPXCHG; break; 1035 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret: 1036 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret: 1037 isRet = false; 1038 IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break; 1039 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32: 1040 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32: 1041 IntNo = AMDILISD::ATOM_R_CMPXCHG; break; 1042 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret: 1043 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret: 1044 isRet = false; 1045 IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break; 1046 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32: 1047 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32: 1048 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1049 IntNo = AMDILISD::ATOM_G_DEC; 1050 } else { 1051 IntNo = AMDILISD::ATOM_G_SUB; 1052 } 1053 break; 1054 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret: 1055 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret: 1056 isRet = false; 1057 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1058 IntNo = AMDILISD::ATOM_G_DEC_NORET; 1059 } else { 1060 IntNo = AMDILISD::ATOM_G_SUB_NORET; 1061 } 1062 break; 1063 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32: 1064 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32: 1065 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1066 IntNo = AMDILISD::ATOM_L_DEC; 1067 } else { 1068 IntNo = AMDILISD::ATOM_L_SUB; 1069 } 1070 break; 1071 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret: 1072 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret: 1073 isRet = false; 1074 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1075 IntNo = AMDILISD::ATOM_L_DEC_NORET; 1076 } else { 1077 IntNo = AMDILISD::ATOM_L_SUB_NORET; 1078 } 1079 break; 1080 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32: 1081 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32: 1082 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1083 IntNo = AMDILISD::ATOM_R_DEC; 1084 } else { 1085 IntNo = AMDILISD::ATOM_R_SUB; 1086 } 1087 break; 1088 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret: 1089 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret: 1090 isRet = false; 1091 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1092 IntNo = AMDILISD::ATOM_R_DEC_NORET; 1093 } else { 1094 IntNo = AMDILISD::ATOM_R_SUB_NORET; 1095 } 1096 break; 1097 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32: 1098 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32: 1099 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1100 IntNo = AMDILISD::ATOM_G_INC; 1101 } else { 1102 IntNo = AMDILISD::ATOM_G_ADD; 1103 } 1104 break; 1105 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret: 1106 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret: 1107 isRet = false; 1108 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1109 IntNo = AMDILISD::ATOM_G_INC_NORET; 1110 } else { 1111 IntNo = AMDILISD::ATOM_G_ADD_NORET; 1112 } 1113 break; 1114 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32: 1115 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32: 1116 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1117 IntNo = AMDILISD::ATOM_L_INC; 1118 } else { 1119 IntNo = AMDILISD::ATOM_L_ADD; 1120 } 1121 break; 1122 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret: 1123 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret: 1124 isRet = false; 1125 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1126 IntNo = AMDILISD::ATOM_L_INC_NORET; 1127 } else { 1128 IntNo = AMDILISD::ATOM_L_ADD_NORET; 1129 } 1130 break; 1131 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32: 1132 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32: 1133 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1134 IntNo = AMDILISD::ATOM_R_INC; 1135 } else { 1136 IntNo = AMDILISD::ATOM_R_ADD; 1137 } 1138 break; 1139 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret: 1140 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret: 1141 isRet = false; 1142 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1143 IntNo = AMDILISD::ATOM_R_INC_NORET; 1144 } else { 1145 IntNo = AMDILISD::ATOM_R_ADD_NORET; 1146 } 1147 break; 1148 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32: 1149 IntNo = AMDILISD::ATOM_G_MAX; break; 1150 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32: 1151 IntNo = AMDILISD::ATOM_G_UMAX; break; 1152 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret: 1153 isRet = false; 1154 IntNo = AMDILISD::ATOM_G_MAX_NORET; break; 1155 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret: 1156 isRet = false; 1157 IntNo = AMDILISD::ATOM_G_UMAX_NORET; break; 1158 case AMDGPUIntrinsic::AMDIL_atomic_max_li32: 1159 IntNo = AMDILISD::ATOM_L_MAX; break; 1160 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32: 1161 IntNo = AMDILISD::ATOM_L_UMAX; break; 1162 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret: 1163 isRet = false; 1164 IntNo = AMDILISD::ATOM_L_MAX_NORET; break; 1165 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret: 1166 isRet = false; 1167 IntNo = AMDILISD::ATOM_L_UMAX_NORET; break; 1168 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32: 1169 IntNo = AMDILISD::ATOM_R_MAX; break; 1170 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32: 1171 IntNo = AMDILISD::ATOM_R_UMAX; break; 1172 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret: 1173 isRet = false; 1174 IntNo = AMDILISD::ATOM_R_MAX_NORET; break; 1175 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret: 1176 isRet = false; 1177 IntNo = AMDILISD::ATOM_R_UMAX_NORET; break; 1178 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32: 1179 IntNo = AMDILISD::ATOM_G_MIN; break; 1180 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32: 1181 IntNo = AMDILISD::ATOM_G_UMIN; break; 1182 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret: 1183 isRet = false; 1184 IntNo = AMDILISD::ATOM_G_MIN_NORET; break; 1185 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret: 1186 isRet = false; 1187 IntNo = AMDILISD::ATOM_G_UMIN_NORET; break; 1188 case AMDGPUIntrinsic::AMDIL_atomic_min_li32: 1189 IntNo = AMDILISD::ATOM_L_MIN; break; 1190 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32: 1191 IntNo = AMDILISD::ATOM_L_UMIN; break; 1192 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret: 1193 isRet = false; 1194 IntNo = AMDILISD::ATOM_L_MIN_NORET; break; 1195 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret: 1196 isRet = false; 1197 IntNo = AMDILISD::ATOM_L_UMIN_NORET; break; 1198 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32: 1199 IntNo = AMDILISD::ATOM_R_MIN; break; 1200 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32: 1201 IntNo = AMDILISD::ATOM_R_UMIN; break; 1202 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret: 1203 isRet = false; 1204 IntNo = AMDILISD::ATOM_R_MIN_NORET; break; 1205 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret: 1206 isRet = false; 1207 IntNo = AMDILISD::ATOM_R_UMIN_NORET; break; 1208 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32: 1209 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32: 1210 IntNo = AMDILISD::ATOM_G_OR; break; 1211 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret: 1212 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret: 1213 isRet = false; 1214 IntNo = AMDILISD::ATOM_G_OR_NORET; break; 1215 case AMDGPUIntrinsic::AMDIL_atomic_or_li32: 1216 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32: 1217 IntNo = AMDILISD::ATOM_L_OR; break; 1218 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret: 1219 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret: 1220 isRet = false; 1221 IntNo = AMDILISD::ATOM_L_OR_NORET; break; 1222 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32: 1223 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32: 1224 IntNo = AMDILISD::ATOM_R_OR; break; 1225 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret: 1226 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret: 1227 isRet = false; 1228 IntNo = AMDILISD::ATOM_R_OR_NORET; break; 1229 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32: 1230 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32: 1231 IntNo = AMDILISD::ATOM_G_SUB; break; 1232 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret: 1233 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret: 1234 isRet = false; 1235 IntNo = AMDILISD::ATOM_G_SUB_NORET; break; 1236 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32: 1237 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32: 1238 IntNo = AMDILISD::ATOM_L_SUB; break; 1239 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret: 1240 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret: 1241 isRet = false; 1242 IntNo = AMDILISD::ATOM_L_SUB_NORET; break; 1243 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32: 1244 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32: 1245 IntNo = AMDILISD::ATOM_R_SUB; break; 1246 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret: 1247 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret: 1248 isRet = false; 1249 IntNo = AMDILISD::ATOM_R_SUB_NORET; break; 1250 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32: 1251 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32: 1252 IntNo = AMDILISD::ATOM_G_RSUB; break; 1253 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret: 1254 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret: 1255 isRet = false; 1256 IntNo = AMDILISD::ATOM_G_RSUB_NORET; break; 1257 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32: 1258 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32: 1259 IntNo = AMDILISD::ATOM_L_RSUB; break; 1260 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret: 1261 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret: 1262 isRet = false; 1263 IntNo = AMDILISD::ATOM_L_RSUB_NORET; break; 1264 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32: 1265 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32: 1266 IntNo = AMDILISD::ATOM_R_RSUB; break; 1267 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret: 1268 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret: 1269 isRet = false; 1270 IntNo = AMDILISD::ATOM_R_RSUB_NORET; break; 1271 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32: 1272 bitCastToInt = true; 1273 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32: 1274 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32: 1275 IntNo = AMDILISD::ATOM_G_XCHG; break; 1276 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret: 1277 bitCastToInt = true; 1278 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret: 1279 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret: 1280 isRet = false; 1281 IntNo = AMDILISD::ATOM_G_XCHG_NORET; break; 1282 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32: 1283 bitCastToInt = true; 1284 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32: 1285 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32: 1286 IntNo = AMDILISD::ATOM_L_XCHG; break; 1287 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret: 1288 bitCastToInt = true; 1289 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret: 1290 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret: 1291 isRet = false; 1292 IntNo = AMDILISD::ATOM_L_XCHG_NORET; break; 1293 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32: 1294 bitCastToInt = true; 1295 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32: 1296 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32: 1297 IntNo = AMDILISD::ATOM_R_XCHG; break; 1298 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret: 1299 bitCastToInt = true; 1300 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret: 1301 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret: 1302 isRet = false; 1303 IntNo = AMDILISD::ATOM_R_XCHG_NORET; break; 1304 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32: 1305 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32: 1306 IntNo = AMDILISD::ATOM_G_XOR; break; 1307 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret: 1308 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret: 1309 isRet = false; 1310 IntNo = AMDILISD::ATOM_G_XOR_NORET; break; 1311 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32: 1312 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32: 1313 IntNo = AMDILISD::ATOM_L_XOR; break; 1314 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret: 1315 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret: 1316 isRet = false; 1317 IntNo = AMDILISD::ATOM_L_XOR_NORET; break; 1318 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32: 1319 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32: 1320 IntNo = AMDILISD::ATOM_R_XOR; break; 1321 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret: 1322 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret: 1323 isRet = false; 1324 IntNo = AMDILISD::ATOM_R_XOR_NORET; break; 1325 case AMDGPUIntrinsic::AMDIL_append_alloc_i32: 1326 IntNo = AMDILISD::APPEND_ALLOC; break; 1327 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret: 1328 isRet = false; 1329 IntNo = AMDILISD::APPEND_ALLOC_NORET; break; 1330 case AMDGPUIntrinsic::AMDIL_append_consume_i32: 1331 IntNo = AMDILISD::APPEND_CONSUME; break; 1332 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret: 1333 isRet = false; 1334 IntNo = AMDILISD::APPEND_CONSUME_NORET; break; 1335 }; 1336 1337 Info.opc = IntNo; 1338 Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32; 1339 Info.ptrVal = I.getOperand(0); 1340 Info.offset = 0; 1341 Info.align = 4; 1342 Info.vol = true; 1343 Info.readMem = isRet; 1344 Info.writeMem = true; 1345 return true; 1346} 1347// The backend supports 32 and 64 bit floating point immediates 1348bool 1349AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const 1350{ 1351 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1352 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1353 return true; 1354 } else { 1355 return false; 1356 } 1357} 1358 1359bool 1360AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const 1361{ 1362 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1363 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1364 return false; 1365 } else { 1366 return true; 1367 } 1368} 1369 1370 1371// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 1372// be zero. Op is expected to be a target specific node. Used by DAG 1373// combiner. 1374 1375void 1376AMDILTargetLowering::computeMaskedBitsForTargetNode( 1377 const SDValue Op, 1378 APInt &KnownZero, 1379 APInt &KnownOne, 1380 const SelectionDAG &DAG, 1381 unsigned Depth) const 1382{ 1383 APInt KnownZero2; 1384 APInt KnownOne2; 1385 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 1386 switch (Op.getOpcode()) { 1387 default: break; 1388 case AMDILISD::SELECT_CC: 1389 DAG.ComputeMaskedBits( 1390 Op.getOperand(1), 1391 KnownZero, 1392 KnownOne, 1393 Depth + 1 1394 ); 1395 DAG.ComputeMaskedBits( 1396 Op.getOperand(0), 1397 KnownZero2, 1398 KnownOne2 1399 ); 1400 assert((KnownZero & KnownOne) == 0 1401 && "Bits known to be one AND zero?"); 1402 assert((KnownZero2 & KnownOne2) == 0 1403 && "Bits known to be one AND zero?"); 1404 // Only known if known in both the LHS and RHS 1405 KnownOne &= KnownOne2; 1406 KnownZero &= KnownZero2; 1407 break; 1408 }; 1409} 1410 1411// This is the function that determines which calling convention should 1412// be used. Currently there is only one calling convention 1413CCAssignFn* 1414AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const 1415{ 1416 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1417 return CC_AMDIL32; 1418} 1419 1420// LowerCallResult - Lower the result values of an ISD::CALL into the 1421// appropriate copies out of appropriate physical registers. This assumes that 1422// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 1423// being lowered. The returns a SDNode with the same number of values as the 1424// ISD::CALL. 1425SDValue 1426AMDILTargetLowering::LowerCallResult( 1427 SDValue Chain, 1428 SDValue InFlag, 1429 CallingConv::ID CallConv, 1430 bool isVarArg, 1431 const SmallVectorImpl<ISD::InputArg> &Ins, 1432 DebugLoc dl, 1433 SelectionDAG &DAG, 1434 SmallVectorImpl<SDValue> &InVals) const 1435{ 1436 // Assign locations to each value returned by this call 1437 SmallVector<CCValAssign, 16> RVLocs; 1438 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1439 getTargetMachine(), RVLocs, *DAG.getContext()); 1440 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); 1441 1442 // Copy all of the result registers out of their specified physreg. 1443 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1444 EVT CopyVT = RVLocs[i].getValVT(); 1445 if (RVLocs[i].isRegLoc()) { 1446 Chain = DAG.getCopyFromReg( 1447 Chain, 1448 dl, 1449 RVLocs[i].getLocReg(), 1450 CopyVT, 1451 InFlag 1452 ).getValue(1); 1453 SDValue Val = Chain.getValue(0); 1454 InFlag = Chain.getValue(2); 1455 InVals.push_back(Val); 1456 } 1457 } 1458 1459 return Chain; 1460 1461} 1462 1463//===----------------------------------------------------------------------===// 1464// Other Lowering Hooks 1465//===----------------------------------------------------------------------===// 1466 1467// Recursively assign SDNodeOrdering to any unordered nodes 1468// This is necessary to maintain source ordering of instructions 1469// under -O0 to avoid odd-looking "skipping around" issues. 1470 static const SDValue 1471Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) 1472{ 1473 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { 1474 DAG.AssignOrdering( New.getNode(), order ); 1475 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) 1476 Ordered( DAG, order, New.getOperand(i) ); 1477 } 1478 return New; 1479} 1480 1481#define LOWER(A) \ 1482 case ISD:: A: \ 1483return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) 1484 1485SDValue 1486AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 1487{ 1488 switch (Op.getOpcode()) { 1489 default: 1490 Op.getNode()->dump(); 1491 assert(0 && "Custom lowering code for this" 1492 "instruction is not implemented yet!"); 1493 break; 1494 LOWER(GlobalAddress); 1495 LOWER(JumpTable); 1496 LOWER(ConstantPool); 1497 LOWER(ExternalSymbol); 1498 LOWER(FP_TO_UINT); 1499 LOWER(UINT_TO_FP); 1500 LOWER(MUL); 1501 LOWER(SUB); 1502 LOWER(FDIV); 1503 LOWER(SDIV); 1504 LOWER(SREM); 1505 LOWER(UREM); 1506 LOWER(BUILD_VECTOR); 1507 LOWER(INSERT_VECTOR_ELT); 1508 LOWER(EXTRACT_VECTOR_ELT); 1509 LOWER(EXTRACT_SUBVECTOR); 1510 LOWER(SCALAR_TO_VECTOR); 1511 LOWER(CONCAT_VECTORS); 1512 LOWER(SELECT); 1513 LOWER(SETCC); 1514 LOWER(SIGN_EXTEND_INREG); 1515 LOWER(DYNAMIC_STACKALLOC); 1516 LOWER(BRCOND); 1517 LOWER(BR_CC); 1518 LOWER(FP_ROUND); 1519 } 1520 return Op; 1521} 1522 1523int 1524AMDILTargetLowering::getVarArgsFrameOffset() const 1525{ 1526 return VarArgsFrameOffset; 1527} 1528#undef LOWER 1529 1530SDValue 1531AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const 1532{ 1533 SDValue DST = Op; 1534 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); 1535 const GlobalValue *G = GADN->getGlobal(); 1536 DebugLoc DL = Op.getDebugLoc(); 1537 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); 1538 if (!GV) { 1539 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 1540 } else { 1541 if (GV->hasInitializer()) { 1542 const Constant *C = dyn_cast<Constant>(GV->getInitializer()); 1543 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 1544 DST = DAG.getConstant(CI->getValue(), Op.getValueType()); 1545 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { 1546 DST = DAG.getConstantFP(CF->getValueAPF(), 1547 Op.getValueType()); 1548 } else if (dyn_cast<ConstantAggregateZero>(C)) { 1549 EVT VT = Op.getValueType(); 1550 if (VT.isInteger()) { 1551 DST = DAG.getConstant(0, VT); 1552 } else { 1553 DST = DAG.getConstantFP(0, VT); 1554 } 1555 } else { 1556 assert(!"lowering this type of Global Address " 1557 "not implemented yet!"); 1558 C->dump(); 1559 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 1560 } 1561 } else { 1562 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 1563 } 1564 } 1565 return DST; 1566} 1567 1568SDValue 1569AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const 1570{ 1571 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1572 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); 1573 return Result; 1574} 1575SDValue 1576AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const 1577{ 1578 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1579 EVT PtrVT = Op.getValueType(); 1580 SDValue Result; 1581 if (CP->isMachineConstantPoolEntry()) { 1582 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1583 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 1584 } else { 1585 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1586 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 1587 } 1588 return Result; 1589} 1590 1591SDValue 1592AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const 1593{ 1594 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 1595 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); 1596 return Result; 1597} 1598 1599/// LowerFORMAL_ARGUMENTS - transform physical registers into 1600/// virtual registers and generate load operations for 1601/// arguments places on the stack. 1602/// TODO: isVarArg, hasStructRet, isMemReg 1603 SDValue 1604AMDILTargetLowering::LowerFormalArguments(SDValue Chain, 1605 CallingConv::ID CallConv, 1606 bool isVarArg, 1607 const SmallVectorImpl<ISD::InputArg> &Ins, 1608 DebugLoc dl, 1609 SelectionDAG &DAG, 1610 SmallVectorImpl<SDValue> &InVals) 1611const 1612{ 1613 1614 MachineFunction &MF = DAG.getMachineFunction(); 1615 MachineFrameInfo *MFI = MF.getFrameInfo(); 1616 //const Function *Fn = MF.getFunction(); 1617 //MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1618 1619 SmallVector<CCValAssign, 16> ArgLocs; 1620 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 1621 //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); 1622 1623 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), 1624 getTargetMachine(), ArgLocs, *DAG.getContext()); 1625 1626 // When more calling conventions are added, they need to be chosen here 1627 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); 1628 SDValue StackPtr; 1629 1630 //unsigned int FirstStackArgLoc = 0; 1631 1632 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 1633 CCValAssign &VA = ArgLocs[i]; 1634 if (VA.isRegLoc()) { 1635 EVT RegVT = VA.getLocVT(); 1636 const TargetRegisterClass *RC = getRegClassFromType( 1637 RegVT.getSimpleVT().SimpleTy); 1638 1639 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); 1640 SDValue ArgValue = DAG.getCopyFromReg( 1641 Chain, 1642 dl, 1643 Reg, 1644 RegVT); 1645 // If this is an 8 or 16-bit value, it is really passed 1646 // promoted to 32 bits. Insert an assert[sz]ext to capture 1647 // this, then truncate to the right size. 1648 1649 if (VA.getLocInfo() == CCValAssign::SExt) { 1650 ArgValue = DAG.getNode( 1651 ISD::AssertSext, 1652 dl, 1653 RegVT, 1654 ArgValue, 1655 DAG.getValueType(VA.getValVT())); 1656 } else if (VA.getLocInfo() == CCValAssign::ZExt) { 1657 ArgValue = DAG.getNode( 1658 ISD::AssertZext, 1659 dl, 1660 RegVT, 1661 ArgValue, 1662 DAG.getValueType(VA.getValVT())); 1663 } 1664 if (VA.getLocInfo() != CCValAssign::Full) { 1665 ArgValue = DAG.getNode( 1666 ISD::TRUNCATE, 1667 dl, 1668 VA.getValVT(), 1669 ArgValue); 1670 } 1671 // Add the value to the list of arguments 1672 // to be passed in registers 1673 InVals.push_back(ArgValue); 1674 if (isVarArg) { 1675 assert(0 && "Variable arguments are not yet supported"); 1676 // See MipsISelLowering.cpp for ideas on how to implement 1677 } 1678 } else if(VA.isMemLoc()) { 1679 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, 1680 dl, DAG, VA, MFI, i)); 1681 } else { 1682 assert(0 && "found a Value Assign that is " 1683 "neither a register or a memory location"); 1684 } 1685 } 1686 /*if (hasStructRet) { 1687 assert(0 && "Has struct return is not yet implemented"); 1688 // See MipsISelLowering.cpp for ideas on how to implement 1689 }*/ 1690 1691 if (isVarArg) { 1692 assert(0 && "Variable arguments are not yet supported"); 1693 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement 1694 } 1695 // This needs to be changed to non-zero if the return function needs 1696 // to pop bytes 1697 return Chain; 1698} 1699/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1700/// by "Src" to address "Dst" with size and alignment information specified by 1701/// the specific parameter attribute. The copy will be passed as a byval 1702/// function parameter. 1703static SDValue 1704CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1705 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 1706 assert(0 && "MemCopy does not exist yet"); 1707 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1708 1709 return DAG.getMemcpy(Chain, 1710 Src.getDebugLoc(), 1711 Dst, Src, SizeNode, Flags.getByValAlign(), 1712 /*IsVol=*/false, /*AlwaysInline=*/true, 1713 MachinePointerInfo(), MachinePointerInfo()); 1714} 1715 1716SDValue 1717AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, 1718 SDValue StackPtr, SDValue Arg, 1719 DebugLoc dl, SelectionDAG &DAG, 1720 const CCValAssign &VA, 1721 ISD::ArgFlagsTy Flags) const 1722{ 1723 unsigned int LocMemOffset = VA.getLocMemOffset(); 1724 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1725 PtrOff = DAG.getNode(ISD::ADD, 1726 dl, 1727 getPointerTy(), StackPtr, PtrOff); 1728 if (Flags.isByVal()) { 1729 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1730 } else { 1731 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, 1732 MachinePointerInfo::getStack(LocMemOffset), 1733 false, false, 0); 1734 } 1735 return PtrOff; 1736} 1737/// LowerCAL - functions arguments are copied from virtual 1738/// regs to (physical regs)/(stack frame), CALLSEQ_START and 1739/// CALLSEQ_END are emitted. 1740/// TODO: isVarArg, isTailCall, hasStructRet 1741SDValue 1742AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1743 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, 1744 bool& isTailCall, 1745 const SmallVectorImpl<ISD::OutputArg> &Outs, 1746 const SmallVectorImpl<SDValue> &OutVals, 1747 const SmallVectorImpl<ISD::InputArg> &Ins, 1748 DebugLoc dl, SelectionDAG &DAG, 1749 SmallVectorImpl<SDValue> &InVals) 1750const 1751{ 1752 isTailCall = false; 1753 MachineFunction& MF = DAG.getMachineFunction(); 1754 // FIXME: DO we need to handle fast calling conventions and tail call 1755 // optimizations?? X86/PPC ISelLowering 1756 /*bool hasStructRet = (TheCall->getNumArgs()) 1757 ? TheCall->getArgFlags(0).device()->isSRet() 1758 : false;*/ 1759 1760 MachineFrameInfo *MFI = MF.getFrameInfo(); 1761 1762 // Analyze operands of the call, assigning locations to each operand 1763 SmallVector<CCValAssign, 16> ArgLocs; 1764 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1765 getTargetMachine(), ArgLocs, *DAG.getContext()); 1766 // Analyize the calling operands, but need to change 1767 // if we have more than one calling convetion 1768 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); 1769 1770 unsigned int NumBytes = CCInfo.getNextStackOffset(); 1771 if (isTailCall) { 1772 assert(isTailCall && "Tail Call not handled yet!"); 1773 // See X86/PPC ISelLowering 1774 } 1775 1776 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1777 1778 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass; 1779 SmallVector<SDValue, 8> MemOpChains; 1780 SDValue StackPtr; 1781 //unsigned int FirstStacArgLoc = 0; 1782 //int LastArgStackLoc = 0; 1783 1784 // Walk the register/memloc assignments, insert copies/loads 1785 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 1786 CCValAssign &VA = ArgLocs[i]; 1787 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers 1788 // Arguments start after the 5 first operands of ISD::CALL 1789 SDValue Arg = OutVals[i]; 1790 //Promote the value if needed 1791 switch(VA.getLocInfo()) { 1792 default: assert(0 && "Unknown loc info!"); 1793 case CCValAssign::Full: 1794 break; 1795 case CCValAssign::SExt: 1796 Arg = DAG.getNode(ISD::SIGN_EXTEND, 1797 dl, 1798 VA.getLocVT(), Arg); 1799 break; 1800 case CCValAssign::ZExt: 1801 Arg = DAG.getNode(ISD::ZERO_EXTEND, 1802 dl, 1803 VA.getLocVT(), Arg); 1804 break; 1805 case CCValAssign::AExt: 1806 Arg = DAG.getNode(ISD::ANY_EXTEND, 1807 dl, 1808 VA.getLocVT(), Arg); 1809 break; 1810 } 1811 1812 if (VA.isRegLoc()) { 1813 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1814 } else if (VA.isMemLoc()) { 1815 // Create the frame index object for this incoming parameter 1816 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 1817 VA.getLocMemOffset(), true); 1818 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); 1819 1820 // emit ISD::STORE whichs stores the 1821 // parameter value to a stack Location 1822 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 1823 MachinePointerInfo::getFixedStack(FI), 1824 false, false, 0)); 1825 } else { 1826 assert(0 && "Not a Reg/Mem Loc, major error!"); 1827 } 1828 } 1829 if (!MemOpChains.empty()) { 1830 Chain = DAG.getNode(ISD::TokenFactor, 1831 dl, 1832 MVT::Other, 1833 &MemOpChains[0], 1834 MemOpChains.size()); 1835 } 1836 SDValue InFlag; 1837 if (!isTailCall) { 1838 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 1839 Chain = DAG.getCopyToReg(Chain, 1840 dl, 1841 RegsToPass[i].first, 1842 RegsToPass[i].second, 1843 InFlag); 1844 InFlag = Chain.getValue(1); 1845 } 1846 } 1847 1848 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 1849 // every direct call is) turn it into a TargetGlobalAddress/ 1850 // TargetExternalSymbol 1851 // node so that legalize doesn't hack it. 1852 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1853 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); 1854 } 1855 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1856 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1857 } 1858 else if (isTailCall) { 1859 assert(0 && "Tail calls are not handled yet"); 1860 // see X86 ISelLowering for ideas on implementation: 1708 1861 } 1862 1863 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE); 1864 SmallVector<SDValue, 8> Ops; 1865 1866 if (isTailCall) { 1867 assert(0 && "Tail calls are not handled yet"); 1868 // see X86 ISelLowering for ideas on implementation: 1721 1869 } 1870 // If this is a direct call, pass the chain and the callee 1871 if (Callee.getNode()) { 1872 Ops.push_back(Chain); 1873 Ops.push_back(Callee); 1874 } 1875 1876 if (isTailCall) { 1877 assert(0 && "Tail calls are not handled yet"); 1878 // see X86 ISelLowering for ideas on implementation: 1739 1879 } 1880 1881 // Add argument registers to the end of the list so that they are known 1882 // live into the call 1883 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 1884 Ops.push_back(DAG.getRegister( 1885 RegsToPass[i].first, 1886 RegsToPass[i].second.getValueType())); 1887 } 1888 if (InFlag.getNode()) { 1889 Ops.push_back(InFlag); 1890 } 1891 1892 // Emit Tail Call 1893 if (isTailCall) { 1894 assert(0 && "Tail calls are not handled yet"); 1895 // see X86 ISelLowering for ideas on implementation: 1762 1896 } 1897 1898 Chain = DAG.getNode(AMDILISD::CALL, 1899 dl, 1900 NodeTys, &Ops[0], Ops.size()); 1901 InFlag = Chain.getValue(1); 1902 1903 // Create the CALLSEQ_END node 1904 Chain = DAG.getCALLSEQ_END( 1905 Chain, 1906 DAG.getIntPtrConstant(NumBytes, true), 1907 DAG.getIntPtrConstant(0, true), 1908 InFlag); 1909 InFlag = Chain.getValue(1); 1910 // Handle result values, copying them out of physregs into vregs that 1911 // we return 1912 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 1913 InVals); 1914} 1915 1916SDValue 1917AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG, 1918 uint32_t bits) const 1919{ 1920 DebugLoc DL = Op.getDebugLoc(); 1921 EVT INTTY = Op.getValueType(); 1922 EVT FPTY; 1923 if (INTTY.isVector()) { 1924 FPTY = EVT(MVT::getVectorVT(MVT::f32, 1925 INTTY.getVectorNumElements())); 1926 } else { 1927 FPTY = EVT(MVT::f32); 1928 } 1929 /* static inline uint 1930 __clz_Nbit(uint x) 1931 { 1932 int xor = 0x3f800000U | x; 1933 float tp = as_float(xor); 1934 float t = tp + -1.0f; 1935 uint tint = as_uint(t); 1936 int cmp = (x != 0); 1937 uint tsrc = tint >> 23; 1938 uint tmask = tsrc & 0xffU; 1939 uint cst = (103 + N)U - tmask; 1940 return cmp ? cst : N; 1941 } 1942 */ 1943 assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32 1944 && "genCLZu16 only works on 32bit types"); 1945 // uint x = Op 1946 SDValue x = Op; 1947 // xornode = 0x3f800000 | x 1948 SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY, 1949 DAG.getConstant(0x3f800000, INTTY), x); 1950 // float tp = as_float(xornode) 1951 SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode); 1952 // float t = tp + -1.0f 1953 SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp, 1954 DAG.getConstantFP(-1.0f, FPTY)); 1955 // uint tint = as_uint(t) 1956 SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t); 1957 // int cmp = (x != 0) 1958 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 1959 DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x, 1960 DAG.getConstant(0, INTTY)); 1961 // uint tsrc = tint >> 23 1962 SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint, 1963 DAG.getConstant(23, INTTY)); 1964 // uint tmask = tsrc & 0xFF 1965 SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc, 1966 DAG.getConstant(0xFFU, INTTY)); 1967 // uint cst = (103 + bits) - tmask 1968 SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY, 1969 DAG.getConstant((103U + bits), INTTY), tmask); 1970 // return cmp ? cst : N 1971 cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst, 1972 DAG.getConstant(bits, INTTY)); 1973 return cst; 1974} 1975 1976SDValue 1977AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const 1978{ 1979 SDValue DST = SDValue(); 1980 DebugLoc DL = Op.getDebugLoc(); 1981 EVT INTTY = Op.getValueType(); 1982 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 1983 if (STM.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 1984 //__clz_32bit(uint u) 1985 //{ 1986 // int z = __amdil_ffb_hi(u) ; 1987 // return z < 0 ? 32 : z; 1988 // } 1989 // uint u = op 1990 SDValue u = Op; 1991 // int z = __amdil_ffb_hi(u) 1992 SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u); 1993 // int cmp = z < 0 1994 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 1995 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 1996 z, DAG.getConstant(0, INTTY)); 1997 // return cmp ? 32 : z 1998 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, 1999 DAG.getConstant(32, INTTY), z); 2000 } else if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2001 // static inline uint 2002 //__clz_32bit(uint x) 2003 //{ 2004 // uint zh = __clz_16bit(x >> 16); 2005 // uint zl = __clz_16bit(x & 0xffffU); 2006 // return zh == 16U ? 16U + zl : zh; 2007 //} 2008 // uint x = Op 2009 SDValue x = Op; 2010 // uint xs16 = x >> 16 2011 SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x, 2012 DAG.getConstant(16, INTTY)); 2013 // uint zh = __clz_16bit(xs16) 2014 SDValue zh = genCLZuN(xs16, DAG, 16); 2015 // uint xa16 = x & 0xFFFF 2016 SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x, 2017 DAG.getConstant(0xFFFFU, INTTY)); 2018 // uint zl = __clz_16bit(xa16) 2019 SDValue zl = genCLZuN(xa16, DAG, 16); 2020 // uint cmp = zh == 16U 2021 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2022 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2023 zh, DAG.getConstant(16U, INTTY)); 2024 // uint zl16 = zl + 16 2025 SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY, 2026 DAG.getConstant(16, INTTY), zl); 2027 // return cmp ? zl16 : zh 2028 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2029 cmp, zl16, zh); 2030 } else { 2031 assert(0 && "Attempting to generate a CLZ function with an" 2032 " unknown graphics card"); 2033 } 2034 return DST; 2035} 2036SDValue 2037AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const 2038{ 2039 SDValue DST = SDValue(); 2040 DebugLoc DL = Op.getDebugLoc(); 2041 EVT INTTY; 2042 EVT LONGTY = Op.getValueType(); 2043 bool isVec = LONGTY.isVector(); 2044 if (isVec) { 2045 INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType() 2046 .getVectorNumElements())); 2047 } else { 2048 INTTY = EVT(MVT::i32); 2049 } 2050 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 2051 if (STM.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 2052 // Evergreen: 2053 // static inline uint 2054 // __clz_u64(ulong x) 2055 // { 2056 //uint zhi = __clz_32bit((uint)(x >> 32)); 2057 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL)); 2058 //return zhi == 32U ? 32U + zlo : zhi; 2059 //} 2060 //ulong x = op 2061 SDValue x = Op; 2062 // uint xhi = x >> 32 2063 SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2064 // uint xlo = x & 0xFFFFFFFF 2065 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x); 2066 // uint zhi = __clz_32bit(xhi) 2067 SDValue zhi = genCLZu32(xhi, DAG); 2068 // uint zlo = __clz_32bit(xlo) 2069 SDValue zlo = genCLZu32(xlo, DAG); 2070 // uint cmp = zhi == 32 2071 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2072 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2073 zhi, DAG.getConstant(32U, INTTY)); 2074 // uint zlop32 = 32 + zlo 2075 SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY, 2076 DAG.getConstant(32U, INTTY), zlo); 2077 // return cmp ? zlop32: zhi 2078 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi); 2079 } else if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2080 // HD4XXX: 2081 // static inline uint 2082 //__clz_64bit(ulong x) 2083 //{ 2084 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U; 2085 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU); 2086 //uint zl = __clz_23bit((uint)x & 0x7fffffU); 2087 //uint r = zh == 18U ? 18U + zm : zh; 2088 //return zh + zm == 41U ? 41U + zl : r; 2089 //} 2090 //ulong x = Op 2091 SDValue x = Op; 2092 // ulong xs46 = x >> 46 2093 SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2094 DAG.getConstant(46, LONGTY)); 2095 // uint ixs46 = (uint)xs46 2096 SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46); 2097 // ulong xs23 = x >> 23 2098 SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2099 DAG.getConstant(23, LONGTY)); 2100 // uint ixs23 = (uint)xs23 2101 SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23); 2102 // uint xs23m23 = ixs23 & 0x7FFFFF 2103 SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23, 2104 DAG.getConstant(0x7fffffU, INTTY)); 2105 // uint ix = (uint)x 2106 SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2107 // uint xm23 = ix & 0x7FFFFF 2108 SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix, 2109 DAG.getConstant(0x7fffffU, INTTY)); 2110 // uint zh = __clz_23bit(ixs46) 2111 SDValue zh = genCLZuN(ixs46, DAG, 23); 2112 // uint zm = __clz_23bit(xs23m23) 2113 SDValue zm = genCLZuN(xs23m23, DAG, 23); 2114 // uint zl = __clz_23bit(xm23) 2115 SDValue zl = genCLZuN(xm23, DAG, 23); 2116 // uint zhm5 = zh - 5 2117 SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh, 2118 DAG.getConstant(-5U, INTTY)); 2119 SDValue const18 = DAG.getConstant(18, INTTY); 2120 SDValue const41 = DAG.getConstant(41, INTTY); 2121 // uint cmp1 = zh = 18 2122 SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2123 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2124 zhm5, const18); 2125 // uint zhm5zm = zhm5 + zh 2126 SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm); 2127 // uint cmp2 = zhm5zm == 41 2128 SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2129 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2130 zhm5zm, const41); 2131 // uint zmp18 = zhm5 + 18 2132 SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18); 2133 // uint zlp41 = zl + 41 2134 SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41); 2135 // uint r = cmp1 ? zmp18 : zh 2136 SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2137 cmp1, zmp18, zhm5); 2138 // return cmp2 ? zlp41 : r 2139 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r); 2140 } else { 2141 assert(0 && "Attempting to generate a CLZ function with an" 2142 " unknown graphics card"); 2143 } 2144 return DST; 2145} 2146SDValue 2147AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG, 2148 bool includeSign) const 2149{ 2150 EVT INTVT; 2151 EVT LONGVT; 2152 SDValue DST; 2153 DebugLoc DL = RHS.getDebugLoc(); 2154 EVT RHSVT = RHS.getValueType(); 2155 bool isVec = RHSVT.isVector(); 2156 if (isVec) { 2157 LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT 2158 .getVectorNumElements())); 2159 INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT 2160 .getVectorNumElements())); 2161 } else { 2162 LONGVT = EVT(MVT::i64); 2163 INTVT = EVT(MVT::i32); 2164 } 2165 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 2166 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2167 // unsigned version: 2168 // uint uhi = (uint)(d * 0x1.0p-32); 2169 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d)); 2170 // return as_ulong2((uint2)(ulo, uhi)); 2171 // 2172 // signed version: 2173 // double ad = fabs(d); 2174 // long l = unsigned_version(ad); 2175 // long nl = -l; 2176 // return d == ad ? l : nl; 2177 SDValue d = RHS; 2178 if (includeSign) { 2179 d = DAG.getNode(ISD::FABS, DL, RHSVT, d); 2180 } 2181 SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d, 2182 DAG.getConstantFP(0x2f800000, RHSVT)); 2183 SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid); 2184 SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi); 2185 ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod, 2186 DAG.getConstantFP(0xcf800000, RHSVT), d); 2187 SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod); 2188 SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi); 2189 if (includeSign) { 2190 SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l); 2191 SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT, 2192 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32), 2193 RHS, d); 2194 l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl); 2195 } 2196 DST = l; 2197 } else { 2198 /* 2199 __attribute__((always_inline)) long 2200 cast_f64_to_i64(double d) 2201 { 2202 // Convert d in to 32-bit components 2203 long x = as_long(d); 2204 xhi = LCOMPHI(x); 2205 xlo = LCOMPLO(x); 2206 2207 // Generate 'normalized' mantissa 2208 mhi = xhi | 0x00100000; // hidden bit 2209 mhi <<= 11; 2210 temp = xlo >> (32 - 11); 2211 mhi |= temp 2212 mlo = xlo << 11; 2213 2214 // Compute shift right count from exponent 2215 e = (xhi >> (52-32)) & 0x7ff; 2216 sr = 1023 + 63 - e; 2217 srge64 = sr >= 64; 2218 srge32 = sr >= 32; 2219 2220 // Compute result for 0 <= sr < 32 2221 rhi0 = mhi >> (sr &31); 2222 rlo0 = mlo >> (sr &31); 2223 temp = mhi << (32 - sr); 2224 temp |= rlo0; 2225 rlo0 = sr ? temp : rlo0; 2226 2227 // Compute result for 32 <= sr 2228 rhi1 = 0; 2229 rlo1 = srge64 ? 0 : rhi0; 2230 2231 // Pick between the 2 results 2232 rhi = srge32 ? rhi1 : rhi0; 2233 rlo = srge32 ? rlo1 : rlo0; 2234 2235 // Optional saturate on overflow 2236 srlt0 = sr < 0; 2237 rhi = srlt0 ? MAXVALUE : rhi; 2238 rlo = srlt0 ? MAXVALUE : rlo; 2239 2240 // Create long 2241 res = LCREATE( rlo, rhi ); 2242 2243 // Deal with sign bit (ignoring whether result is signed or unsigned value) 2244 if (includeSign) { 2245 sign = ((signed int) xhi) >> 31; fill with sign bit 2246 sign = LCREATE( sign, sign ); 2247 res += sign; 2248 res ^= sign; 2249 } 2250 2251 return res; 2252 } 2253 */ 2254 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 2255 SDValue c32 = DAG.getConstant( 32, INTVT ); 2256 2257 // Convert d in to 32-bit components 2258 SDValue d = RHS; 2259 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 2260 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 2261 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 2262 2263 // Generate 'normalized' mantissa 2264 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 2265 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 2266 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 2267 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 2268 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 2269 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 2270 SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 ); 2271 2272 // Compute shift right count from exponent 2273 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 2274 xhi, DAG.getConstant( 52-32, INTVT ) ); 2275 e = DAG.getNode( ISD::AND, DL, INTVT, 2276 e, DAG.getConstant( 0x7ff, INTVT ) ); 2277 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 2278 DAG.getConstant( 1023 + 63, INTVT ), e ); 2279 SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 2280 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 2281 sr, DAG.getConstant(64, INTVT)); 2282 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 2283 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 2284 sr, DAG.getConstant(32, INTVT)); 2285 2286 // Compute result for 0 <= sr < 32 2287 SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 2288 SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr ); 2289 temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr ); 2290 temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp ); 2291 temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp ); 2292 rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 ); 2293 2294 // Compute result for 32 <= sr 2295 SDValue rhi1 = DAG.getConstant( 0, INTVT ); 2296 SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2297 srge64, rhi1, rhi0 ); 2298 2299 // Pick between the 2 results 2300 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2301 srge32, rhi1, rhi0 ); 2302 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2303 srge32, rlo1, rlo0 ); 2304 2305 // Create long 2306 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 2307 2308 // Deal with sign bit 2309 if (includeSign) { 2310 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 2311 xhi, DAG.getConstant( 31, INTVT ) ); 2312 sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign ); 2313 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign ); 2314 res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign ); 2315 } 2316 DST = res; 2317 } 2318 return DST; 2319} 2320SDValue 2321AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG, 2322 bool includeSign) const 2323{ 2324 EVT INTVT; 2325 EVT LONGVT; 2326 DebugLoc DL = RHS.getDebugLoc(); 2327 EVT RHSVT = RHS.getValueType(); 2328 bool isVec = RHSVT.isVector(); 2329 if (isVec) { 2330 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 2331 RHSVT.getVectorNumElements())); 2332 INTVT = EVT(MVT::getVectorVT(MVT::i32, 2333 RHSVT.getVectorNumElements())); 2334 } else { 2335 LONGVT = EVT(MVT::i64); 2336 INTVT = EVT(MVT::i32); 2337 } 2338 /* 2339 __attribute__((always_inline)) int 2340 cast_f64_to_[u|i]32(double d) 2341 { 2342 // Convert d in to 32-bit components 2343 long x = as_long(d); 2344 xhi = LCOMPHI(x); 2345 xlo = LCOMPLO(x); 2346 2347 // Generate 'normalized' mantissa 2348 mhi = xhi | 0x00100000; // hidden bit 2349 mhi <<= 11; 2350 temp = xlo >> (32 - 11); 2351 mhi |= temp 2352 2353 // Compute shift right count from exponent 2354 e = (xhi >> (52-32)) & 0x7ff; 2355 sr = 1023 + 31 - e; 2356 srge32 = sr >= 32; 2357 2358 // Compute result for 0 <= sr < 32 2359 res = mhi >> (sr &31); 2360 res = srge32 ? 0 : res; 2361 2362 // Optional saturate on overflow 2363 srlt0 = sr < 0; 2364 res = srlt0 ? MAXVALUE : res; 2365 2366 // Deal with sign bit (ignoring whether result is signed or unsigned value) 2367 if (includeSign) { 2368 sign = ((signed int) xhi) >> 31; fill with sign bit 2369 res += sign; 2370 res ^= sign; 2371 } 2372 2373 return res; 2374 } 2375 */ 2376 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 2377 2378 // Convert d in to 32-bit components 2379 SDValue d = RHS; 2380 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 2381 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 2382 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 2383 2384 // Generate 'normalized' mantissa 2385 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 2386 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 2387 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 2388 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 2389 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 2390 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 2391 2392 // Compute shift right count from exponent 2393 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 2394 xhi, DAG.getConstant( 52-32, INTVT ) ); 2395 e = DAG.getNode( ISD::AND, DL, INTVT, 2396 e, DAG.getConstant( 0x7ff, INTVT ) ); 2397 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 2398 DAG.getConstant( 1023 + 31, INTVT ), e ); 2399 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 2400 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 2401 sr, DAG.getConstant(32, INTVT)); 2402 2403 // Compute result for 0 <= sr < 32 2404 SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 2405 res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2406 srge32, DAG.getConstant(0,INTVT), res ); 2407 2408 // Deal with sign bit 2409 if (includeSign) { 2410 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 2411 xhi, DAG.getConstant( 31, INTVT ) ); 2412 res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign ); 2413 res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign ); 2414 } 2415 return res; 2416} 2417 2418SDValue 2419AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const 2420{ 2421 SDValue DST; 2422 SDValue RHS = Op.getOperand(0); 2423 EVT RHSVT = RHS.getValueType(); 2424 MVT RST = RHSVT.getScalarType().getSimpleVT(); 2425 EVT LHSVT = Op.getValueType(); 2426 MVT LST = LHSVT.getScalarType().getSimpleVT(); 2427 DebugLoc DL = Op.getDebugLoc(); 2428 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 2429 if (RST == MVT::f64 && RHSVT.isVector() 2430 && STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2431 // We dont support vector 64bit floating point convertions. 2432 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { 2433 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2434 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 2435 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); 2436 if (!x) { 2437 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 2438 } else { 2439 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, 2440 DST, op, DAG.getTargetConstant(x, MVT::i32)); 2441 } 2442 2443 } 2444 } else { 2445 if (RST == MVT::f64 2446 && LST == MVT::i32) { 2447 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2448 DST = SDValue(Op.getNode(), 0); 2449 } else { 2450 DST = genf64toi32(RHS, DAG, false); 2451 } 2452 } else if (RST == MVT::f64 2453 && LST == MVT::i64) { 2454 DST = genf64toi64(RHS, DAG, false); 2455 } else if (RST == MVT::f64 2456 && (LST == MVT::i8 || LST == MVT::i16)) { 2457 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2458 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); 2459 } else { 2460 SDValue ToInt = genf64toi32(RHS, DAG, false); 2461 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); 2462 } 2463 2464 } else { 2465 DST = SDValue(Op.getNode(), 0); 2466 } 2467 } 2468 return DST; 2469} 2470SDValue 2471AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT, 2472 SelectionDAG &DAG) const 2473{ 2474 EVT RHSVT = RHS.getValueType(); 2475 DebugLoc DL = RHS.getDebugLoc(); 2476 EVT INTVT; 2477 EVT LONGVT; 2478 bool isVec = RHSVT.isVector(); 2479 if (isVec) { 2480 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 2481 RHSVT.getVectorNumElements())); 2482 INTVT = EVT(MVT::getVectorVT(MVT::i32, 2483 RHSVT.getVectorNumElements())); 2484 } else { 2485 LONGVT = EVT(MVT::i64); 2486 INTVT = EVT(MVT::i32); 2487 } 2488 SDValue x = RHS; 2489 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 2490 if (STM.calVersion() >= CAL_VERSION_SC_135) { 2491 // unsigned x = RHS; 2492 // ulong xd = (ulong)(0x4330_0000 << 32) | x; 2493 // double d = as_double( xd ); 2494 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000 2495 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x, 2496 DAG.getConstant( 0x43300000, INTVT ) ); 2497 SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 2498 SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT, 2499 DAG.getConstant( 0x4330000000000000ULL, LONGVT ) ); 2500 return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd ); 2501 } else { 2502 SDValue clz = genCLZu32(x, DAG); 2503 2504 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2 2505 // Except for an input 0... which requires a 0 exponent 2506 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 2507 DAG.getConstant( (1023+31), INTVT), clz ); 2508 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x ); 2509 2510 // Normalize frac 2511 SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz ); 2512 2513 // Eliminate hidden bit 2514 rhi = DAG.getNode( ISD::AND, DL, INTVT, 2515 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 2516 2517 // Pack exponent and frac 2518 SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT, 2519 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 2520 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 2521 rhi, DAG.getConstant( 11, INTVT ) ); 2522 exp = DAG.getNode( ISD::SHL, DL, INTVT, 2523 exp, DAG.getConstant( 20, INTVT ) ); 2524 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 2525 2526 // Convert 2 x 32 in to 1 x 64, then to double precision float type 2527 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 2528 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 2529 } 2530} 2531SDValue 2532AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT, 2533 SelectionDAG &DAG) const 2534{ 2535 EVT RHSVT = RHS.getValueType(); 2536 DebugLoc DL = RHS.getDebugLoc(); 2537 EVT INTVT; 2538 EVT LONGVT; 2539 bool isVec = RHSVT.isVector(); 2540 if (isVec) { 2541 INTVT = EVT(MVT::getVectorVT(MVT::i32, 2542 RHSVT.getVectorNumElements())); 2543 } else { 2544 INTVT = EVT(MVT::i32); 2545 } 2546 LONGVT = RHSVT; 2547 SDValue x = RHS; 2548 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 2549 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2550 // double dhi = (double)(as_uint2(x).y); 2551 // double dlo = (double)(as_uint2(x).x); 2552 // return mad(dhi, 0x1.0p+32, dlo) 2553 SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x); 2554 dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi); 2555 SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x); 2556 dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo); 2557 return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi, 2558 DAG.getConstantFP(0x4f800000, LHSVT), dlo); 2559 } else if (STM.calVersion() >= CAL_VERSION_SC_135) { 2560 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL)); 2561 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32)); 2562 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo; 2563 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL 2564 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) ); 2565 SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 2566 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32 2567 SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) ); 2568 SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe ); 2569 SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT, 2570 DAG.getConstant( 0x4530000000100000ULL, LONGVT ) ); 2571 hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c ); 2572 return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo ); 2573 2574 } else { 2575 SDValue clz = genCLZu64(x, DAG); 2576 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 2577 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 2578 2579 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2 2580 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 2581 DAG.getConstant( (1023+63), INTVT), clz ); 2582 SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo ); 2583 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2584 mash, exp, mash ); // exp = exp, or 0 if input was 0 2585 2586 // Normalize frac 2587 SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT, 2588 clz, DAG.getConstant( 31, INTVT ) ); 2589 SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT, 2590 DAG.getConstant( 32, INTVT ), clz31 ); 2591 SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 ); 2592 SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift ); 2593 t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 ); 2594 SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 ); 2595 SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 2596 SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 2597 SDValue rlo2 = DAG.getConstant( 0, INTVT ); 2598 SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT, 2599 clz, DAG.getConstant( 32, INTVT ) ); 2600 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2601 clz32, rhi2, rhi1 ); 2602 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2603 clz32, rlo2, rlo1 ); 2604 2605 // Eliminate hidden bit 2606 rhi = DAG.getNode( ISD::AND, DL, INTVT, 2607 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 2608 2609 // Save bits needed to round properly 2610 SDValue round = DAG.getNode( ISD::AND, DL, INTVT, 2611 rlo, DAG.getConstant( 0x7ff, INTVT ) ); 2612 2613 // Pack exponent and frac 2614 rlo = DAG.getNode( ISD::SRL, DL, INTVT, 2615 rlo, DAG.getConstant( 11, INTVT ) ); 2616 SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT, 2617 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 2618 rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp ); 2619 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 2620 rhi, DAG.getConstant( 11, INTVT ) ); 2621 exp = DAG.getNode( ISD::SHL, DL, INTVT, 2622 exp, DAG.getConstant( 20, INTVT ) ); 2623 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 2624 2625 // Compute rounding bit 2626 SDValue even = DAG.getNode( ISD::AND, DL, INTVT, 2627 rlo, DAG.getConstant( 1, INTVT ) ); 2628 SDValue grs = DAG.getNode( ISD::AND, DL, INTVT, 2629 round, DAG.getConstant( 0x3ff, INTVT ) ); 2630 grs = DAG.getNode( AMDILISD::CMP, DL, INTVT, 2631 DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32), 2632 grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none 2633 grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even ); 2634 round = DAG.getNode( ISD::SRL, DL, INTVT, 2635 round, DAG.getConstant( 10, INTVT ) ); 2636 round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1 2637 2638 // Add rounding bit 2639 SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, 2640 round, DAG.getConstant( 0, INTVT ) ); 2641 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 2642 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround ); 2643 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 2644 } 2645} 2646SDValue 2647AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const 2648{ 2649 SDValue RHS = Op.getOperand(0); 2650 EVT RHSVT = RHS.getValueType(); 2651 MVT RST = RHSVT.getScalarType().getSimpleVT(); 2652 EVT LHSVT = Op.getValueType(); 2653 MVT LST = LHSVT.getScalarType().getSimpleVT(); 2654 DebugLoc DL = Op.getDebugLoc(); 2655 SDValue DST; 2656 EVT INTVT; 2657 EVT LONGVT; 2658 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 2659 if (LST == MVT::f64 && LHSVT.isVector() 2660 && STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2661 // We dont support vector 64bit floating point convertions. 2662 DST = Op; 2663 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { 2664 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2665 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 2666 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); 2667 if (!x) { 2668 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 2669 } else { 2670 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, 2671 op, DAG.getTargetConstant(x, MVT::i32)); 2672 } 2673 2674 } 2675 } else { 2676 2677 if (RST == MVT::i32 2678 && LST == MVT::f64) { 2679 if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2680 DST = SDValue(Op.getNode(), 0); 2681 } else { 2682 DST = genu32tof64(RHS, LHSVT, DAG); 2683 } 2684 } else if (RST == MVT::i64 2685 && LST == MVT::f64) { 2686 DST = genu64tof64(RHS, LHSVT, DAG); 2687 } else { 2688 DST = SDValue(Op.getNode(), 0); 2689 } 2690 } 2691 return DST; 2692} 2693 2694SDValue 2695AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const 2696{ 2697 SDValue LHS = Op.getOperand(0); 2698 SDValue RHS = Op.getOperand(1); 2699 DebugLoc DL = Op.getDebugLoc(); 2700 EVT OVT = Op.getValueType(); 2701 SDValue DST; 2702 bool isVec = RHS.getValueType().isVector(); 2703 if (OVT.getScalarType() == MVT::i64) { 2704 MVT INTTY = MVT::i32; 2705 if (OVT == MVT::v2i64) { 2706 INTTY = MVT::v2i32; 2707 } 2708 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; 2709 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 2710 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); 2711 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); 2712 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); 2713 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); 2714 INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO); 2715 INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI); 2716 //TODO: need to use IBORROW on HD5XXX and later hardware 2717 SDValue cmp; 2718 if (OVT == MVT::i64) { 2719 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2720 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 2721 LHSLO, RHSLO); 2722 } else { 2723 SDValue cmplo; 2724 SDValue cmphi; 2725 SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2726 DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32)); 2727 SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2728 DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32)); 2729 SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2730 DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32)); 2731 SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2732 DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32)); 2733 cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 2734 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 2735 LHSRLO, RHSRLO); 2736 cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 2737 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 2738 LHSRHI, RHSRHI); 2739 cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo); 2740 cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32, 2741 cmp, cmphi, DAG.getTargetConstant(1, MVT::i32)); 2742 } 2743 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); 2744 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, 2745 INTLO, INTHI); 2746 } else { 2747 DST = SDValue(Op.getNode(), 0); 2748 } 2749 return DST; 2750} 2751SDValue 2752AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const 2753{ 2754 EVT OVT = Op.getValueType(); 2755 SDValue DST; 2756 if (OVT.getScalarType() == MVT::f64) { 2757 DST = LowerFDIV64(Op, DAG); 2758 } else if (OVT.getScalarType() == MVT::f32) { 2759 DST = LowerFDIV32(Op, DAG); 2760 } else { 2761 DST = SDValue(Op.getNode(), 0); 2762 } 2763 return DST; 2764} 2765 2766SDValue 2767AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const 2768{ 2769 EVT OVT = Op.getValueType(); 2770 SDValue DST; 2771 if (OVT.getScalarType() == MVT::i64) { 2772 DST = LowerSDIV64(Op, DAG); 2773 } else if (OVT.getScalarType() == MVT::i32) { 2774 DST = LowerSDIV32(Op, DAG); 2775 } else if (OVT.getScalarType() == MVT::i16 2776 || OVT.getScalarType() == MVT::i8) { 2777 DST = LowerSDIV24(Op, DAG); 2778 } else { 2779 DST = SDValue(Op.getNode(), 0); 2780 } 2781 return DST; 2782} 2783 2784SDValue 2785AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const 2786{ 2787 EVT OVT = Op.getValueType(); 2788 SDValue DST; 2789 if (OVT.getScalarType() == MVT::i64) { 2790 DST = LowerSREM64(Op, DAG); 2791 } else if (OVT.getScalarType() == MVT::i32) { 2792 DST = LowerSREM32(Op, DAG); 2793 } else if (OVT.getScalarType() == MVT::i16) { 2794 DST = LowerSREM16(Op, DAG); 2795 } else if (OVT.getScalarType() == MVT::i8) { 2796 DST = LowerSREM8(Op, DAG); 2797 } else { 2798 DST = SDValue(Op.getNode(), 0); 2799 } 2800 return DST; 2801} 2802 2803SDValue 2804AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const 2805{ 2806 EVT OVT = Op.getValueType(); 2807 SDValue DST; 2808 if (OVT.getScalarType() == MVT::i64) { 2809 DST = LowerUREM64(Op, DAG); 2810 } else if (OVT.getScalarType() == MVT::i32) { 2811 DST = LowerUREM32(Op, DAG); 2812 } else if (OVT.getScalarType() == MVT::i16) { 2813 DST = LowerUREM16(Op, DAG); 2814 } else if (OVT.getScalarType() == MVT::i8) { 2815 DST = LowerUREM8(Op, DAG); 2816 } else { 2817 DST = SDValue(Op.getNode(), 0); 2818 } 2819 return DST; 2820} 2821 2822SDValue 2823AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const 2824{ 2825 DebugLoc DL = Op.getDebugLoc(); 2826 EVT OVT = Op.getValueType(); 2827 SDValue DST; 2828 bool isVec = OVT.isVector(); 2829 if (OVT.getScalarType() != MVT::i64) 2830 { 2831 DST = SDValue(Op.getNode(), 0); 2832 } else { 2833 assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!"); 2834 // TODO: This needs to be turned into a tablegen pattern 2835 SDValue LHS = Op.getOperand(0); 2836 SDValue RHS = Op.getOperand(1); 2837 2838 MVT INTTY = MVT::i32; 2839 if (OVT == MVT::v2i64) { 2840 INTTY = MVT::v2i32; 2841 } 2842 // mul64(h1, l1, h0, l0) 2843 SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 2844 DL, 2845 INTTY, LHS); 2846 SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 2847 DL, 2848 INTTY, LHS); 2849 SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 2850 DL, 2851 INTTY, RHS); 2852 SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 2853 DL, 2854 INTTY, RHS); 2855 // MULLO_UINT_1 r1, h0, l1 2856 SDValue RHILLO = DAG.getNode(AMDILISD::UMUL, 2857 DL, 2858 INTTY, RHSHI, LHSLO); 2859 // MULLO_UINT_1 r2, h1, l0 2860 SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL, 2861 DL, 2862 INTTY, RHSLO, LHSHI); 2863 // ADD_INT hr, r1, r2 2864 SDValue ADDHI = DAG.getNode(ISD::ADD, 2865 DL, 2866 INTTY, RHILLO, RLOHHI); 2867 // MULHI_UINT_1 r3, l1, l0 2868 SDValue RLOLLO = DAG.getNode(ISD::MULHU, 2869 DL, 2870 INTTY, RHSLO, LHSLO); 2871 // ADD_INT hr, hr, r3 2872 SDValue HIGH = DAG.getNode(ISD::ADD, 2873 DL, 2874 INTTY, ADDHI, RLOLLO); 2875 // MULLO_UINT_1 l3, l1, l0 2876 SDValue LOW = DAG.getNode(AMDILISD::UMUL, 2877 DL, 2878 INTTY, LHSLO, RHSLO); 2879 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, 2880 DL, 2881 OVT, LOW, HIGH); 2882 } 2883 return DST; 2884} 2885SDValue 2886AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const 2887{ 2888 EVT VT = Op.getValueType(); 2889 SDValue Nodes1; 2890 SDValue second; 2891 SDValue third; 2892 SDValue fourth; 2893 DebugLoc DL = Op.getDebugLoc(); 2894 Nodes1 = DAG.getNode(AMDILISD::VBUILD, 2895 DL, 2896 VT, Op.getOperand(0)); 2897#if 0 2898 bool allEqual = true; 2899 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { 2900 if (Op.getOperand(0) != Op.getOperand(x)) { 2901 allEqual = false; 2902 break; 2903 } 2904 } 2905 if (allEqual) { 2906 return Nodes1; 2907 } 2908#endif 2909 switch(Op.getNumOperands()) { 2910 default: 2911 case 1: 2912 break; 2913 case 4: 2914 fourth = Op.getOperand(3); 2915 if (fourth.getOpcode() != ISD::UNDEF) { 2916 Nodes1 = DAG.getNode( 2917 ISD::INSERT_VECTOR_ELT, 2918 DL, 2919 Op.getValueType(), 2920 Nodes1, 2921 fourth, 2922 DAG.getConstant(7, MVT::i32)); 2923 } 2924 case 3: 2925 third = Op.getOperand(2); 2926 if (third.getOpcode() != ISD::UNDEF) { 2927 Nodes1 = DAG.getNode( 2928 ISD::INSERT_VECTOR_ELT, 2929 DL, 2930 Op.getValueType(), 2931 Nodes1, 2932 third, 2933 DAG.getConstant(6, MVT::i32)); 2934 } 2935 case 2: 2936 second = Op.getOperand(1); 2937 if (second.getOpcode() != ISD::UNDEF) { 2938 Nodes1 = DAG.getNode( 2939 ISD::INSERT_VECTOR_ELT, 2940 DL, 2941 Op.getValueType(), 2942 Nodes1, 2943 second, 2944 DAG.getConstant(5, MVT::i32)); 2945 } 2946 break; 2947 }; 2948 return Nodes1; 2949} 2950 2951SDValue 2952AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 2953 SelectionDAG &DAG) const 2954{ 2955 DebugLoc DL = Op.getDebugLoc(); 2956 EVT VT = Op.getValueType(); 2957 const SDValue *ptr = NULL; 2958 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2959 uint32_t swizzleNum = 0; 2960 SDValue DST; 2961 if (!VT.isVector()) { 2962 SDValue Res = Op.getOperand(0); 2963 return Res; 2964 } 2965 2966 if (Op.getOperand(1).getOpcode() != ISD::UNDEF) { 2967 ptr = &Op.getOperand(1); 2968 } else { 2969 ptr = &Op.getOperand(0); 2970 } 2971 if (CSDN) { 2972 swizzleNum = (uint32_t)CSDN->getZExtValue(); 2973 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 2974 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 2975 DST = DAG.getNode(AMDILISD::VINSERT, 2976 DL, 2977 VT, 2978 Op.getOperand(0), 2979 *ptr, 2980 DAG.getTargetConstant(mask2, MVT::i32), 2981 DAG.getTargetConstant(mask3, MVT::i32)); 2982 } else { 2983 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 2984 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 2985 SDValue res = DAG.getNode(AMDILISD::VINSERT, 2986 DL, VT, Op.getOperand(0), *ptr, 2987 DAG.getTargetConstant(mask2, MVT::i32), 2988 DAG.getTargetConstant(mask3, MVT::i32)); 2989 for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) { 2990 mask2 = 0x04030201 & ~(0xFF << (x * 8)); 2991 mask3 = 0x01010101 & (0xFF << (x * 8)); 2992 SDValue t = DAG.getNode(AMDILISD::VINSERT, 2993 DL, VT, Op.getOperand(0), *ptr, 2994 DAG.getTargetConstant(mask2, MVT::i32), 2995 DAG.getTargetConstant(mask3, MVT::i32)); 2996 SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(), 2997 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 2998 Op.getOperand(2), DAG.getConstant(x, MVT::i32)); 2999 c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c); 3000 res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res); 3001 } 3002 DST = res; 3003 } 3004 return DST; 3005} 3006 3007SDValue 3008AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 3009 SelectionDAG &DAG) const 3010{ 3011 EVT VT = Op.getValueType(); 3012 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 3013 uint64_t swizzleNum = 0; 3014 DebugLoc DL = Op.getDebugLoc(); 3015 SDValue Res; 3016 if (!Op.getOperand(0).getValueType().isVector()) { 3017 Res = Op.getOperand(0); 3018 return Res; 3019 } 3020 if (CSDN) { 3021 // Static vector extraction 3022 swizzleNum = CSDN->getZExtValue() + 1; 3023 Res = DAG.getNode(AMDILISD::VEXTRACT, 3024 DL, VT, 3025 Op.getOperand(0), 3026 DAG.getTargetConstant(swizzleNum, MVT::i32)); 3027 } else { 3028 SDValue Op1 = Op.getOperand(1); 3029 uint32_t vecSize = 4; 3030 SDValue Op0 = Op.getOperand(0); 3031 SDValue res = DAG.getNode(AMDILISD::VEXTRACT, 3032 DL, VT, Op0, 3033 DAG.getTargetConstant(1, MVT::i32)); 3034 if (Op0.getValueType().isVector()) { 3035 vecSize = Op0.getValueType().getVectorNumElements(); 3036 } 3037 for (uint32_t x = 2; x <= vecSize; ++x) { 3038 SDValue t = DAG.getNode(AMDILISD::VEXTRACT, 3039 DL, VT, Op0, 3040 DAG.getTargetConstant(x, MVT::i32)); 3041 SDValue c = DAG.getNode(AMDILISD::CMP, 3042 DL, Op1.getValueType(), 3043 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 3044 Op1, DAG.getConstant(x, MVT::i32)); 3045 res = DAG.getNode(AMDILISD::CMOVLOG, DL, 3046 VT, c, t, res); 3047 3048 } 3049 Res = res; 3050 } 3051 return Res; 3052} 3053 3054SDValue 3055AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 3056 SelectionDAG &DAG) const 3057{ 3058 uint32_t vecSize = Op.getValueType().getVectorNumElements(); 3059 SDValue src = Op.getOperand(0); 3060 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 3061 uint64_t offset = 0; 3062 EVT vecType = Op.getValueType().getVectorElementType(); 3063 DebugLoc DL = Op.getDebugLoc(); 3064 SDValue Result; 3065 if (CSDN) { 3066 offset = CSDN->getZExtValue(); 3067 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3068 DL,vecType, src, DAG.getConstant(offset, MVT::i32)); 3069 Result = DAG.getNode(AMDILISD::VBUILD, DL, 3070 Op.getValueType(), Result); 3071 for (uint32_t x = 1; x < vecSize; ++x) { 3072 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 3073 src, DAG.getConstant(offset + x, MVT::i32)); 3074 if (elt.getOpcode() != ISD::UNDEF) { 3075 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 3076 Op.getValueType(), Result, elt, 3077 DAG.getConstant(x, MVT::i32)); 3078 } 3079 } 3080 } else { 3081 SDValue idx = Op.getOperand(1); 3082 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3083 DL, vecType, src, idx); 3084 Result = DAG.getNode(AMDILISD::VBUILD, DL, 3085 Op.getValueType(), Result); 3086 for (uint32_t x = 1; x < vecSize; ++x) { 3087 idx = DAG.getNode(ISD::ADD, DL, vecType, 3088 idx, DAG.getConstant(1, MVT::i32)); 3089 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 3090 src, idx); 3091 if (elt.getOpcode() != ISD::UNDEF) { 3092 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 3093 Op.getValueType(), Result, elt, idx); 3094 } 3095 } 3096 } 3097 return Result; 3098} 3099SDValue 3100AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 3101 SelectionDAG &DAG) const 3102{ 3103 SDValue Res = DAG.getNode(AMDILISD::VBUILD, 3104 Op.getDebugLoc(), 3105 Op.getValueType(), 3106 Op.getOperand(0)); 3107 return Res; 3108} 3109SDValue 3110AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const 3111{ 3112 SDValue Cond = Op.getOperand(0); 3113 SDValue LHS = Op.getOperand(1); 3114 SDValue RHS = Op.getOperand(2); 3115 DebugLoc DL = Op.getDebugLoc(); 3116 Cond = getConversionNode(DAG, Cond, Op, true); 3117 Cond = DAG.getNode(AMDILISD::CMOVLOG, 3118 DL, 3119 Op.getValueType(), Cond, LHS, RHS); 3120 return Cond; 3121} 3122SDValue 3123AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 3124{ 3125 SDValue Cond; 3126 SDValue LHS = Op.getOperand(0); 3127 SDValue RHS = Op.getOperand(1); 3128 SDValue CC = Op.getOperand(2); 3129 DebugLoc DL = Op.getDebugLoc(); 3130 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3131 unsigned int AMDILCC = CondCCodeToCC( 3132 SetCCOpcode, 3133 LHS.getValueType().getSimpleVT().SimpleTy); 3134 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 3135 Cond = DAG.getNode( 3136 ISD::SELECT_CC, 3137 Op.getDebugLoc(), 3138 LHS.getValueType(), 3139 LHS, RHS, 3140 DAG.getConstant(-1, MVT::i32), 3141 DAG.getConstant(0, MVT::i32), 3142 CC); 3143 Cond = getConversionNode(DAG, Cond, Op, true); 3144 Cond = DAG.getNode( 3145 ISD::AND, 3146 DL, 3147 Cond.getValueType(), 3148 DAG.getConstant(1, Cond.getValueType()), 3149 Cond); 3150 return Cond; 3151} 3152 3153SDValue 3154AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const 3155{ 3156 SDValue Data = Op.getOperand(0); 3157 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 3158 DebugLoc DL = Op.getDebugLoc(); 3159 EVT DVT = Data.getValueType(); 3160 EVT BVT = BaseType->getVT(); 3161 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 3162 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 3163 unsigned shiftBits = srcBits - baseBits; 3164 if (srcBits < 32) { 3165 // If the op is less than 32 bits, then it needs to extend to 32bits 3166 // so it can properly keep the upper bits valid. 3167 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 3168 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 3169 shiftBits = 32 - baseBits; 3170 DVT = IVT; 3171 } 3172 SDValue Shift = DAG.getConstant(shiftBits, DVT); 3173 // Shift left by 'Shift' bits. 3174 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 3175 // Signed shift Right by 'Shift' bits. 3176 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 3177 if (srcBits < 32) { 3178 // Once the sign extension is done, the op needs to be converted to 3179 // its original type. 3180 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 3181 } 3182 return Data; 3183} 3184EVT 3185AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const 3186{ 3187 int iSize = (size * numEle); 3188 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 3189 if (!vEle) { 3190 vEle = 1; 3191 } 3192 if (size == 64) { 3193 if (vEle == 1) { 3194 return EVT(MVT::i64); 3195 } else { 3196 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 3197 } 3198 } else { 3199 if (vEle == 1) { 3200 return EVT(MVT::i32); 3201 } else { 3202 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 3203 } 3204 } 3205} 3206 3207SDValue 3208AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 3209 SelectionDAG &DAG) const 3210{ 3211 SDValue Chain = Op.getOperand(0); 3212 SDValue Size = Op.getOperand(1); 3213 unsigned int SPReg = AMDIL::SP; 3214 DebugLoc DL = Op.getDebugLoc(); 3215 SDValue SP = DAG.getCopyFromReg(Chain, 3216 DL, 3217 SPReg, MVT::i32); 3218 SDValue NewSP = DAG.getNode(ISD::ADD, 3219 DL, 3220 MVT::i32, SP, Size); 3221 Chain = DAG.getCopyToReg(SP.getValue(1), 3222 DL, 3223 SPReg, NewSP); 3224 SDValue Ops[2] = {NewSP, Chain}; 3225 Chain = DAG.getMergeValues(Ops, 2 ,DL); 3226 return Chain; 3227} 3228SDValue 3229AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const 3230{ 3231 SDValue Chain = Op.getOperand(0); 3232 SDValue Cond = Op.getOperand(1); 3233 SDValue Jump = Op.getOperand(2); 3234 SDValue Result; 3235 Result = DAG.getNode( 3236 AMDILISD::BRANCH_COND, 3237 Op.getDebugLoc(), 3238 Op.getValueType(), 3239 Chain, Jump, Cond); 3240 return Result; 3241} 3242 3243SDValue 3244AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 3245{ 3246 SDValue Chain = Op.getOperand(0); 3247 SDValue CC = Op.getOperand(1); 3248 SDValue LHS = Op.getOperand(2); 3249 SDValue RHS = Op.getOperand(3); 3250 SDValue JumpT = Op.getOperand(4); 3251 SDValue CmpValue; 3252 SDValue Result; 3253 CmpValue = DAG.getNode( 3254 ISD::SELECT_CC, 3255 Op.getDebugLoc(), 3256 LHS.getValueType(), 3257 LHS, RHS, 3258 DAG.getConstant(-1, MVT::i32), 3259 DAG.getConstant(0, MVT::i32), 3260 CC); 3261 Result = DAG.getNode( 3262 AMDILISD::BRANCH_COND, 3263 CmpValue.getDebugLoc(), 3264 MVT::Other, Chain, 3265 JumpT, CmpValue); 3266 return Result; 3267} 3268 3269SDValue 3270AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const 3271{ 3272 SDValue Result = DAG.getNode( 3273 AMDILISD::DP_TO_FP, 3274 Op.getDebugLoc(), 3275 Op.getValueType(), 3276 Op.getOperand(0), 3277 Op.getOperand(1)); 3278 return Result; 3279} 3280 3281SDValue 3282AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const 3283{ 3284 SDValue Result = DAG.getNode( 3285 AMDILISD::VCONCAT, 3286 Op.getDebugLoc(), 3287 Op.getValueType(), 3288 Op.getOperand(0), 3289 Op.getOperand(1)); 3290 return Result; 3291} 3292// LowerRET - Lower an ISD::RET node. 3293SDValue 3294AMDILTargetLowering::LowerReturn(SDValue Chain, 3295 CallingConv::ID CallConv, bool isVarArg, 3296 const SmallVectorImpl<ISD::OutputArg> &Outs, 3297 const SmallVectorImpl<SDValue> &OutVals, 3298 DebugLoc dl, SelectionDAG &DAG) 3299const 3300{ 3301 //MachineFunction& MF = DAG.getMachineFunction(); 3302 // CCValAssign - represent the assignment of the return value 3303 // to a location 3304 SmallVector<CCValAssign, 16> RVLocs; 3305 3306 // CCState - Info about the registers and stack slot 3307 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3308 getTargetMachine(), RVLocs, *DAG.getContext()); 3309 3310 // Analyze return values of ISD::RET 3311 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); 3312 // If this is the first return lowered for this function, add 3313 // the regs to the liveout set for the function 3314 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 3315 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 3316 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { 3317 MRI.addLiveOut(RVLocs[i].getLocReg()); 3318 } 3319 } 3320 // FIXME: implement this when tail call is implemented 3321 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); 3322 // both x86 and ppc implement this in ISelLowering 3323 3324 // Regular return here 3325 SDValue Flag; 3326 SmallVector<SDValue, 6> RetOps; 3327 RetOps.push_back(Chain); 3328 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); 3329 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 3330 CCValAssign &VA = RVLocs[i]; 3331 SDValue ValToCopy = OutVals[i]; 3332 assert(VA.isRegLoc() && "Can only return in registers!"); 3333 // ISD::Ret => ret chain, (regnum1, val1), ... 3334 // So i * 2 + 1 index only the regnums 3335 Chain = DAG.getCopyToReg(Chain, 3336 dl, 3337 VA.getLocReg(), 3338 ValToCopy, 3339 Flag); 3340 // guarantee that all emitted copies are stuck together 3341 // avoiding something bad 3342 Flag = Chain.getValue(1); 3343 } 3344 /*if (MF.getFunction()->hasStructRetAttr()) { 3345 assert(0 && "Struct returns are not yet implemented!"); 3346 // Both MIPS and X86 have this 3347 }*/ 3348 RetOps[0] = Chain; 3349 if (Flag.getNode()) 3350 RetOps.push_back(Flag); 3351 3352 Flag = DAG.getNode(AMDILISD::RET_FLAG, 3353 dl, 3354 MVT::Other, &RetOps[0], RetOps.size()); 3355 return Flag; 3356} 3357 3358unsigned int 3359AMDILTargetLowering::getFunctionAlignment(const Function *) const 3360{ 3361 return 0; 3362} 3363 3364void 3365AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB, 3366 MachineBasicBlock::iterator &BBI, 3367 DebugLoc *DL, const TargetInstrInfo *TII) const 3368{ 3369 mBB = BB; 3370 mBBI = BBI; 3371 mDL = DL; 3372 mTII = TII; 3373} 3374uint32_t 3375AMDILTargetLowering::genVReg(uint32_t regType) const 3376{ 3377 return mBB->getParent()->getRegInfo().createVirtualRegister( 3378 getTargetMachine().getRegisterInfo()->getRegClass(regType)); 3379} 3380 3381MachineInstrBuilder 3382AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const 3383{ 3384 return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst); 3385} 3386 3387MachineInstrBuilder 3388AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 3389 uint32_t src1) const 3390{ 3391 return generateMachineInst(opcode, dst).addReg(src1); 3392} 3393 3394MachineInstrBuilder 3395AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 3396 uint32_t src1, uint32_t src2) const 3397{ 3398 return generateMachineInst(opcode, dst, src1).addReg(src2); 3399} 3400 3401MachineInstrBuilder 3402AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 3403 uint32_t src1, uint32_t src2, uint32_t src3) const 3404{ 3405 return generateMachineInst(opcode, dst, src1, src2).addReg(src3); 3406} 3407 3408 3409SDValue 3410AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const 3411{ 3412 DebugLoc DL = Op.getDebugLoc(); 3413 EVT OVT = Op.getValueType(); 3414 SDValue LHS = Op.getOperand(0); 3415 SDValue RHS = Op.getOperand(1); 3416 MVT INTTY; 3417 MVT FLTTY; 3418 if (!OVT.isVector()) { 3419 INTTY = MVT::i32; 3420 FLTTY = MVT::f32; 3421 } else if (OVT.getVectorNumElements() == 2) { 3422 INTTY = MVT::v2i32; 3423 FLTTY = MVT::v2f32; 3424 } else if (OVT.getVectorNumElements() == 4) { 3425 INTTY = MVT::v4i32; 3426 FLTTY = MVT::v4f32; 3427 } 3428 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 3429 // char|short jq = ia ^ ib; 3430 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 3431 3432 // jq = jq >> (bitsize - 2) 3433 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 3434 3435 // jq = jq | 0x1 3436 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 3437 3438 // jq = (int)jq 3439 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 3440 3441 // int ia = (int)LHS; 3442 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 3443 3444 // int ib, (int)RHS; 3445 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 3446 3447 // float fa = (float)ia; 3448 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 3449 3450 // float fb = (float)ib; 3451 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 3452 3453 // float fq = native_divide(fa, fb); 3454 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 3455 3456 // fq = trunc(fq); 3457 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 3458 3459 // float fqneg = -fq; 3460 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 3461 3462 // float fr = mad(fqneg, fb, fa); 3463 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); 3464 3465 // int iq = (int)fq; 3466 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 3467 3468 // fr = fabs(fr); 3469 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 3470 3471 // fb = fabs(fb); 3472 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 3473 3474 // int cv = fr >= fb; 3475 SDValue cv; 3476 if (INTTY == MVT::i32) { 3477 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 3478 } else { 3479 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 3480 } 3481 // jq = (cv ? jq : 0); 3482 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, 3483 DAG.getConstant(0, OVT)); 3484 // dst = iq + jq; 3485 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 3486 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 3487 return iq; 3488} 3489 3490SDValue 3491AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const 3492{ 3493 DebugLoc DL = Op.getDebugLoc(); 3494 EVT OVT = Op.getValueType(); 3495 SDValue LHS = Op.getOperand(0); 3496 SDValue RHS = Op.getOperand(1); 3497 // The LowerSDIV32 function generates equivalent to the following IL. 3498 // mov r0, LHS 3499 // mov r1, RHS 3500 // ilt r10, r0, 0 3501 // ilt r11, r1, 0 3502 // iadd r0, r0, r10 3503 // iadd r1, r1, r11 3504 // ixor r0, r0, r10 3505 // ixor r1, r1, r11 3506 // udiv r0, r0, r1 3507 // ixor r10, r10, r11 3508 // iadd r0, r0, r10 3509 // ixor DST, r0, r10 3510 3511 // mov r0, LHS 3512 SDValue r0 = LHS; 3513 3514 // mov r1, RHS 3515 SDValue r1 = RHS; 3516 3517 // ilt r10, r0, 0 3518 SDValue r10 = DAG.getSelectCC(DL, 3519 r0, DAG.getConstant(0, OVT), 3520 DAG.getConstant(-1, MVT::i32), 3521 DAG.getConstant(0, MVT::i32), 3522 ISD::SETLT); 3523 3524 // ilt r11, r1, 0 3525 SDValue r11 = DAG.getSelectCC(DL, 3526 r1, DAG.getConstant(0, OVT), 3527 DAG.getConstant(-1, MVT::i32), 3528 DAG.getConstant(0, MVT::i32), 3529 ISD::SETLT); 3530 3531 // iadd r0, r0, r10 3532 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 3533 3534 // iadd r1, r1, r11 3535 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 3536 3537 // ixor r0, r0, r10 3538 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 3539 3540 // ixor r1, r1, r11 3541 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 3542 3543 // udiv r0, r0, r1 3544 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 3545 3546 // ixor r10, r10, r11 3547 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 3548 3549 // iadd r0, r0, r10 3550 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 3551 3552 // ixor DST, r0, r10 3553 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 3554 return DST; 3555} 3556 3557SDValue 3558AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const 3559{ 3560 return SDValue(Op.getNode(), 0); 3561} 3562 3563SDValue 3564AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const 3565{ 3566 DebugLoc DL = Op.getDebugLoc(); 3567 EVT OVT = Op.getValueType(); 3568 SDValue LHS = Op.getOperand(0); 3569 SDValue RHS = Op.getOperand(1); 3570 MVT INTTY; 3571 MVT FLTTY; 3572 if (!OVT.isVector()) { 3573 INTTY = MVT::i32; 3574 FLTTY = MVT::f32; 3575 } else if (OVT.getVectorNumElements() == 2) { 3576 INTTY = MVT::v2i32; 3577 FLTTY = MVT::v2f32; 3578 } else if (OVT.getVectorNumElements() == 4) { 3579 INTTY = MVT::v4i32; 3580 FLTTY = MVT::v4f32; 3581 } 3582 3583 // The LowerUDIV24 function implements the following CL. 3584 // int ia = (int)LHS 3585 // float fa = (float)ia 3586 // int ib = (int)RHS 3587 // float fb = (float)ib 3588 // float fq = native_divide(fa, fb) 3589 // fq = trunc(fq) 3590 // float t = mad(fq, fb, fb) 3591 // int iq = (int)fq - (t <= fa) 3592 // return (type)iq 3593 3594 // int ia = (int)LHS 3595 SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY); 3596 3597 // float fa = (float)ia 3598 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 3599 3600 // int ib = (int)RHS 3601 SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY); 3602 3603 // float fb = (float)ib 3604 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 3605 3606 // float fq = native_divide(fa, fb) 3607 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 3608 3609 // fq = trunc(fq) 3610 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 3611 3612 // float t = mad(fq, fb, fb) 3613 SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb); 3614 3615 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1 3616 SDValue iq; 3617 fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 3618 if (INTTY == MVT::i32) { 3619 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 3620 } else { 3621 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 3622 } 3623 iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq); 3624 3625 3626 // return (type)iq 3627 iq = DAG.getZExtOrTrunc(iq, DL, OVT); 3628 return iq; 3629 3630} 3631 3632SDValue 3633AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const 3634{ 3635 DebugLoc DL = Op.getDebugLoc(); 3636 EVT OVT = Op.getValueType(); 3637 MVT INTTY = MVT::i32; 3638 if (OVT == MVT::v2i8) { 3639 INTTY = MVT::v2i32; 3640 } else if (OVT == MVT::v4i8) { 3641 INTTY = MVT::v4i32; 3642 } 3643 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 3644 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 3645 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 3646 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 3647 return LHS; 3648} 3649 3650SDValue 3651AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const 3652{ 3653 DebugLoc DL = Op.getDebugLoc(); 3654 EVT OVT = Op.getValueType(); 3655 MVT INTTY = MVT::i32; 3656 if (OVT == MVT::v2i16) { 3657 INTTY = MVT::v2i32; 3658 } else if (OVT == MVT::v4i16) { 3659 INTTY = MVT::v4i32; 3660 } 3661 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 3662 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 3663 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 3664 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 3665 return LHS; 3666} 3667 3668SDValue 3669AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const 3670{ 3671 DebugLoc DL = Op.getDebugLoc(); 3672 EVT OVT = Op.getValueType(); 3673 SDValue LHS = Op.getOperand(0); 3674 SDValue RHS = Op.getOperand(1); 3675 // The LowerSREM32 function generates equivalent to the following IL. 3676 // mov r0, LHS 3677 // mov r1, RHS 3678 // ilt r10, r0, 0 3679 // ilt r11, r1, 0 3680 // iadd r0, r0, r10 3681 // iadd r1, r1, r11 3682 // ixor r0, r0, r10 3683 // ixor r1, r1, r11 3684 // udiv r20, r0, r1 3685 // umul r20, r20, r1 3686 // sub r0, r0, r20 3687 // iadd r0, r0, r10 3688 // ixor DST, r0, r10 3689 3690 // mov r0, LHS 3691 SDValue r0 = LHS; 3692 3693 // mov r1, RHS 3694 SDValue r1 = RHS; 3695 3696 // ilt r10, r0, 0 3697 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 3698 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 3699 r0, DAG.getConstant(0, OVT)); 3700 3701 // ilt r11, r1, 0 3702 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 3703 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 3704 r1, DAG.getConstant(0, OVT)); 3705 3706 // iadd r0, r0, r10 3707 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 3708 3709 // iadd r1, r1, r11 3710 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 3711 3712 // ixor r0, r0, r10 3713 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 3714 3715 // ixor r1, r1, r11 3716 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 3717 3718 // udiv r20, r0, r1 3719 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 3720 3721 // umul r20, r20, r1 3722 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); 3723 3724 // sub r0, r0, r20 3725 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 3726 3727 // iadd r0, r0, r10 3728 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 3729 3730 // ixor DST, r0, r10 3731 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 3732 return DST; 3733} 3734 3735SDValue 3736AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const 3737{ 3738 return SDValue(Op.getNode(), 0); 3739} 3740 3741SDValue 3742AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const 3743{ 3744 DebugLoc DL = Op.getDebugLoc(); 3745 EVT OVT = Op.getValueType(); 3746 MVT INTTY = MVT::i32; 3747 if (OVT == MVT::v2i8) { 3748 INTTY = MVT::v2i32; 3749 } else if (OVT == MVT::v4i8) { 3750 INTTY = MVT::v4i32; 3751 } 3752 SDValue LHS = Op.getOperand(0); 3753 SDValue RHS = Op.getOperand(1); 3754 // The LowerUREM8 function generates equivalent to the following IL. 3755 // mov r0, as_u32(LHS) 3756 // mov r1, as_u32(RHS) 3757 // and r10, r0, 0xFF 3758 // and r11, r1, 0xFF 3759 // cmov_logical r3, r11, r11, 0x1 3760 // udiv r3, r10, r3 3761 // cmov_logical r3, r11, r3, 0 3762 // umul r3, r3, r11 3763 // sub r3, r10, r3 3764 // and as_u8(DST), r3, 0xFF 3765 3766 // mov r0, as_u32(LHS) 3767 SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY); 3768 3769 // mov r1, as_u32(RHS) 3770 SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY); 3771 3772 // and r10, r0, 0xFF 3773 SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0, 3774 DAG.getConstant(0xFF, INTTY)); 3775 3776 // and r11, r1, 0xFF 3777 SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1, 3778 DAG.getConstant(0xFF, INTTY)); 3779 3780 // cmov_logical r3, r11, r11, 0x1 3781 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11, 3782 DAG.getConstant(0x01, INTTY)); 3783 3784 // udiv r3, r10, r3 3785 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 3786 3787 // cmov_logical r3, r11, r3, 0 3788 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3, 3789 DAG.getConstant(0, INTTY)); 3790 3791 // umul r3, r3, r11 3792 r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11); 3793 3794 // sub r3, r10, r3 3795 r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3); 3796 3797 // and as_u8(DST), r3, 0xFF 3798 SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3, 3799 DAG.getConstant(0xFF, INTTY)); 3800 DST = DAG.getZExtOrTrunc(DST, DL, OVT); 3801 return DST; 3802} 3803 3804SDValue 3805AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const 3806{ 3807 DebugLoc DL = Op.getDebugLoc(); 3808 EVT OVT = Op.getValueType(); 3809 MVT INTTY = MVT::i32; 3810 if (OVT == MVT::v2i16) { 3811 INTTY = MVT::v2i32; 3812 } else if (OVT == MVT::v4i16) { 3813 INTTY = MVT::v4i32; 3814 } 3815 SDValue LHS = Op.getOperand(0); 3816 SDValue RHS = Op.getOperand(1); 3817 // The LowerUREM16 function generatest equivalent to the following IL. 3818 // mov r0, LHS 3819 // mov r1, RHS 3820 // DIV = LowerUDIV16(LHS, RHS) 3821 // and r10, r0, 0xFFFF 3822 // and r11, r1, 0xFFFF 3823 // cmov_logical r3, r11, r11, 0x1 3824 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 3825 // and r3, r3, 0xFFFF 3826 // cmov_logical r3, r11, r3, 0 3827 // umul r3, r3, r11 3828 // sub r3, r10, r3 3829 // and DST, r3, 0xFFFF 3830 3831 // mov r0, LHS 3832 SDValue r0 = LHS; 3833 3834 // mov r1, RHS 3835 SDValue r1 = RHS; 3836 3837 // and r10, r0, 0xFFFF 3838 SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0, 3839 DAG.getConstant(0xFFFF, OVT)); 3840 3841 // and r11, r1, 0xFFFF 3842 SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1, 3843 DAG.getConstant(0xFFFF, OVT)); 3844 3845 // cmov_logical r3, r11, r11, 0x1 3846 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11, 3847 DAG.getConstant(0x01, OVT)); 3848 3849 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 3850 r10 = DAG.getZExtOrTrunc(r10, DL, INTTY); 3851 r3 = DAG.getZExtOrTrunc(r3, DL, INTTY); 3852 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 3853 r3 = DAG.getZExtOrTrunc(r3, DL, OVT); 3854 r10 = DAG.getZExtOrTrunc(r10, DL, OVT); 3855 3856 // and r3, r3, 0xFFFF 3857 r3 = DAG.getNode(ISD::AND, DL, OVT, r3, 3858 DAG.getConstant(0xFFFF, OVT)); 3859 3860 // cmov_logical r3, r11, r3, 0 3861 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3, 3862 DAG.getConstant(0, OVT)); 3863 // umul r3, r3, r11 3864 r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11); 3865 3866 // sub r3, r10, r3 3867 r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3); 3868 3869 // and DST, r3, 0xFFFF 3870 SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3, 3871 DAG.getConstant(0xFFFF, OVT)); 3872 return DST; 3873} 3874 3875SDValue 3876AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const 3877{ 3878 DebugLoc DL = Op.getDebugLoc(); 3879 EVT OVT = Op.getValueType(); 3880 SDValue LHS = Op.getOperand(0); 3881 SDValue RHS = Op.getOperand(1); 3882 // The LowerUREM32 function generates equivalent to the following IL. 3883 // udiv r20, LHS, RHS 3884 // umul r20, r20, RHS 3885 // sub DST, LHS, r20 3886 3887 // udiv r20, LHS, RHS 3888 SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS); 3889 3890 // umul r20, r20, RHS 3891 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS); 3892 3893 // sub DST, LHS, r20 3894 SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20); 3895 return DST; 3896} 3897 3898SDValue 3899AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const 3900{ 3901 return SDValue(Op.getNode(), 0); 3902} 3903 3904 3905SDValue 3906AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const 3907{ 3908 DebugLoc DL = Op.getDebugLoc(); 3909 EVT OVT = Op.getValueType(); 3910 MVT INTTY = MVT::i32; 3911 if (OVT == MVT::v2f32) { 3912 INTTY = MVT::v2i32; 3913 } else if (OVT == MVT::v4f32) { 3914 INTTY = MVT::v4i32; 3915 } 3916 SDValue LHS = Op.getOperand(0); 3917 SDValue RHS = Op.getOperand(1); 3918 SDValue DST; 3919 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 3920 if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 3921 // TODO: This doesn't work for vector types yet 3922 // The LowerFDIV32 function generates equivalent to the following 3923 // IL: 3924 // mov r20, as_int(LHS) 3925 // mov r21, as_int(RHS) 3926 // and r30, r20, 0x7f800000 3927 // and r31, r20, 0x807FFFFF 3928 // and r32, r21, 0x7f800000 3929 // and r33, r21, 0x807FFFFF 3930 // ieq r40, r30, 0x7F800000 3931 // ieq r41, r31, 0x7F800000 3932 // ieq r42, r32, 0 3933 // ieq r43, r33, 0 3934 // and r50, r20, 0x80000000 3935 // and r51, r21, 0x80000000 3936 // ior r32, r32, 0x3f800000 3937 // ior r33, r33, 0x3f800000 3938 // cmov_logical r32, r42, r50, r32 3939 // cmov_logical r33, r43, r51, r33 3940 // cmov_logical r32, r40, r20, r32 3941 // cmov_logical r33, r41, r21, r33 3942 // ior r50, r40, r41 3943 // ior r51, r42, r43 3944 // ior r50, r50, r51 3945 // inegate r52, r31 3946 // iadd r30, r30, r52 3947 // cmov_logical r30, r50, 0, r30 3948 // div_zeroop(infinity) r21, 1.0, r33 3949 // mul_ieee r20, r32, r21 3950 // and r22, r20, 0x7FFFFFFF 3951 // and r23, r20, 0x80000000 3952 // ishr r60, r22, 0x00000017 3953 // ishr r61, r30, 0x00000017 3954 // iadd r20, r20, r30 3955 // iadd r21, r22, r30 3956 // iadd r60, r60, r61 3957 // ige r42, 0, R60 3958 // ior r41, r23, 0x7F800000 3959 // ige r40, r60, 0x000000FF 3960 // cmov_logical r40, r50, 0, r40 3961 // cmov_logical r20, r42, r23, r20 3962 // cmov_logical DST, r40, r41, r20 3963 // as_float(DST) 3964 3965 // mov r20, as_int(LHS) 3966 SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS); 3967 3968 // mov r21, as_int(RHS) 3969 SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS); 3970 3971 // and r30, r20, 0x7f800000 3972 SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20, 3973 DAG.getConstant(0x7F800000, INTTY)); 3974 3975 // and r31, r21, 0x7f800000 3976 SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21, 3977 DAG.getConstant(0x7f800000, INTTY)); 3978 3979 // and r32, r20, 0x807FFFFF 3980 SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20, 3981 DAG.getConstant(0x807FFFFF, INTTY)); 3982 3983 // and r33, r21, 0x807FFFFF 3984 SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21, 3985 DAG.getConstant(0x807FFFFF, INTTY)); 3986 3987 // ieq r40, r30, 0x7F800000 3988 SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 3989 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 3990 R30, DAG.getConstant(0x7F800000, INTTY)); 3991 3992 // ieq r41, r31, 0x7F800000 3993 SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 3994 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 3995 R31, DAG.getConstant(0x7F800000, INTTY)); 3996 3997 // ieq r42, r30, 0 3998 SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 3999 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 4000 R30, DAG.getConstant(0, INTTY)); 4001 4002 // ieq r43, r31, 0 4003 SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 4004 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 4005 R31, DAG.getConstant(0, INTTY)); 4006 4007 // and r50, r20, 0x80000000 4008 SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20, 4009 DAG.getConstant(0x80000000, INTTY)); 4010 4011 // and r51, r21, 0x80000000 4012 SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21, 4013 DAG.getConstant(0x80000000, INTTY)); 4014 4015 // ior r32, r32, 0x3f800000 4016 R32 = DAG.getNode(ISD::OR, DL, INTTY, R32, 4017 DAG.getConstant(0x3F800000, INTTY)); 4018 4019 // ior r33, r33, 0x3f800000 4020 R33 = DAG.getNode(ISD::OR, DL, INTTY, R33, 4021 DAG.getConstant(0x3F800000, INTTY)); 4022 4023 // cmov_logical r32, r42, r50, r32 4024 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32); 4025 4026 // cmov_logical r33, r43, r51, r33 4027 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33); 4028 4029 // cmov_logical r32, r40, r20, r32 4030 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32); 4031 4032 // cmov_logical r33, r41, r21, r33 4033 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33); 4034 4035 // ior r50, r40, r41 4036 R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41); 4037 4038 // ior r51, r42, r43 4039 R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43); 4040 4041 // ior r50, r50, r51 4042 R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51); 4043 4044 // inegate r52, r31 4045 SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31); 4046 4047 // iadd r30, r30, r52 4048 R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52); 4049 4050 // cmov_logical r30, r50, 0, r30 4051 R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 4052 DAG.getConstant(0, INTTY), R30); 4053 4054 // div_zeroop(infinity) r21, 1.0, as_float(r33) 4055 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 4056 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 4057 DAG.getConstantFP(1.0f, OVT), R33); 4058 4059 // mul_ieee as_int(r20), as_float(r32), r21 4060 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 4061 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 4062 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 4063 4064 // div_zeroop(infinity) r21, 1.0, as_float(r33) 4065 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 4066 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 4067 DAG.getConstantFP(1.0f, OVT), R33); 4068 4069 // mul_ieee as_int(r20), as_float(r32), r21 4070 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 4071 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 4072 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 4073 4074 // and r22, r20, 0x7FFFFFFF 4075 SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20, 4076 DAG.getConstant(0x7FFFFFFF, INTTY)); 4077 4078 // and r23, r20, 0x80000000 4079 SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20, 4080 DAG.getConstant(0x80000000, INTTY)); 4081 4082 // ishr r60, r22, 0x00000017 4083 SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22, 4084 DAG.getConstant(0x00000017, INTTY)); 4085 4086 // ishr r61, r30, 0x00000017 4087 SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30, 4088 DAG.getConstant(0x00000017, INTTY)); 4089 4090 // iadd r20, r20, r30 4091 R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30); 4092 4093 // iadd r21, r22, r30 4094 R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30); 4095 4096 // iadd r60, r60, r61 4097 R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61); 4098 4099 // ige r42, 0, R60 4100 R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 4101 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 4102 DAG.getConstant(0, INTTY), 4103 R60); 4104 4105 // ior r41, r23, 0x7F800000 4106 R41 = DAG.getNode(ISD::OR, DL, INTTY, R23, 4107 DAG.getConstant(0x7F800000, INTTY)); 4108 4109 // ige r40, r60, 0x000000FF 4110 R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 4111 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 4112 R60, 4113 DAG.getConstant(0x0000000FF, INTTY)); 4114 4115 // cmov_logical r40, r50, 0, r40 4116 R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 4117 DAG.getConstant(0, INTTY), 4118 R40); 4119 4120 // cmov_logical r20, r42, r23, r20 4121 R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20); 4122 4123 // cmov_logical DST, r40, r41, r20 4124 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20); 4125 4126 // as_float(DST) 4127 DST = DAG.getNode(ISDBITCAST, DL, OVT, DST); 4128 } else { 4129 // The following sequence of DAG nodes produce the following IL: 4130 // fabs r1, RHS 4131 // lt r2, 0x1.0p+96f, r1 4132 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 4133 // mul_ieee r1, RHS, r3 4134 // div_zeroop(infinity) r0, LHS, r1 4135 // mul_ieee DST, r0, r3 4136 4137 // fabs r1, RHS 4138 SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS); 4139 // lt r2, 0x1.0p+96f, r1 4140 SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4141 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32), 4142 DAG.getConstant(0x6f800000, INTTY), r1); 4143 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 4144 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2, 4145 DAG.getConstant(0x2f800000, INTTY), 4146 DAG.getConstant(0x3f800000, INTTY)); 4147 // mul_ieee r1, RHS, r3 4148 r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3); 4149 // div_zeroop(infinity) r0, LHS, r1 4150 SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1); 4151 // mul_ieee DST, r0, r3 4152 DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3); 4153 } 4154 return DST; 4155} 4156 4157SDValue 4158AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const 4159{ 4160 return SDValue(Op.getNode(), 0); 4161} 4162