AMDILISelLowering.cpp revision 33e7db9a1dafdcf5c7c745180831403e0485544d
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9// 10// This file implements the interfaces that AMDIL uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "AMDILISelLowering.h" 16#include "AMDILDevices.h" 17#include "AMDILIntrinsicInfo.h" 18#include "AMDILSubtarget.h" 19#include "AMDILTargetMachine.h" 20#include "AMDILUtilityFunctions.h" 21#include "llvm/CallingConv.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/PseudoSourceValue.h" 25#include "llvm/CodeGen/SelectionDAG.h" 26#include "llvm/CodeGen/SelectionDAGNodes.h" 27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28#include "llvm/DerivedTypes.h" 29#include "llvm/Instructions.h" 30#include "llvm/Intrinsics.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/Target/TargetOptions.h" 33 34using namespace llvm; 35#define ISDBITCAST ISD::BITCAST 36#define MVTGLUE MVT::Glue 37//===----------------------------------------------------------------------===// 38// Calling Convention Implementation 39//===----------------------------------------------------------------------===// 40#include "AMDILGenCallingConv.inc" 41 42//===----------------------------------------------------------------------===// 43// TargetLowering Implementation Help Functions Begin 44//===----------------------------------------------------------------------===// 45 static SDValue 46getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) 47{ 48 DebugLoc DL = Src.getDebugLoc(); 49 EVT svt = Src.getValueType().getScalarType(); 50 EVT dvt = Dst.getValueType().getScalarType(); 51 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { 52 if (dvt.bitsGT(svt)) { 53 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); 54 } else if (svt.bitsLT(svt)) { 55 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, 56 DAG.getConstant(1, MVT::i32)); 57 } 58 } else if (svt.isInteger() && dvt.isInteger()) { 59 if (!svt.bitsEq(dvt)) { 60 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 61 } else { 62 Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src); 63 } 64 } else if (svt.isInteger()) { 65 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; 66 if (!svt.bitsEq(dvt)) { 67 if (dvt.getSimpleVT().SimpleTy == MVT::f32) { 68 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); 69 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { 70 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); 71 } else { 72 assert(0 && "We only support 32 and 64bit fp types"); 73 } 74 } 75 Src = DAG.getNode(opcode, DL, dvt, Src); 76 } else if (dvt.isInteger()) { 77 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; 78 if (svt.getSimpleVT().SimpleTy == MVT::f32) { 79 Src = DAG.getNode(opcode, DL, MVT::i32, Src); 80 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { 81 Src = DAG.getNode(opcode, DL, MVT::i64, Src); 82 } else { 83 assert(0 && "We only support 32 and 64bit fp types"); 84 } 85 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 86 } 87 return Src; 88} 89// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC 90// condition. 91 static AMDILCC::CondCodes 92CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) 93{ 94 switch (CC) { 95 default: 96 { 97 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; 98 assert(0 && "Unknown condition code!"); 99 } 100 case ISD::SETO: 101 switch(type) { 102 case MVT::f32: 103 return AMDILCC::IL_CC_F_O; 104 case MVT::f64: 105 return AMDILCC::IL_CC_D_O; 106 default: 107 assert(0 && "Opcode combination not generated correctly!"); 108 return AMDILCC::COND_ERROR; 109 }; 110 case ISD::SETUO: 111 switch(type) { 112 case MVT::f32: 113 return AMDILCC::IL_CC_F_UO; 114 case MVT::f64: 115 return AMDILCC::IL_CC_D_UO; 116 default: 117 assert(0 && "Opcode combination not generated correctly!"); 118 return AMDILCC::COND_ERROR; 119 }; 120 case ISD::SETGT: 121 switch (type) { 122 case MVT::i1: 123 case MVT::i8: 124 case MVT::i16: 125 case MVT::i32: 126 return AMDILCC::IL_CC_I_GT; 127 case MVT::f32: 128 return AMDILCC::IL_CC_F_GT; 129 case MVT::f64: 130 return AMDILCC::IL_CC_D_GT; 131 case MVT::i64: 132 return AMDILCC::IL_CC_L_GT; 133 default: 134 assert(0 && "Opcode combination not generated correctly!"); 135 return AMDILCC::COND_ERROR; 136 }; 137 case ISD::SETGE: 138 switch (type) { 139 case MVT::i1: 140 case MVT::i8: 141 case MVT::i16: 142 case MVT::i32: 143 return AMDILCC::IL_CC_I_GE; 144 case MVT::f32: 145 return AMDILCC::IL_CC_F_GE; 146 case MVT::f64: 147 return AMDILCC::IL_CC_D_GE; 148 case MVT::i64: 149 return AMDILCC::IL_CC_L_GE; 150 default: 151 assert(0 && "Opcode combination not generated correctly!"); 152 return AMDILCC::COND_ERROR; 153 }; 154 case ISD::SETLT: 155 switch (type) { 156 case MVT::i1: 157 case MVT::i8: 158 case MVT::i16: 159 case MVT::i32: 160 return AMDILCC::IL_CC_I_LT; 161 case MVT::f32: 162 return AMDILCC::IL_CC_F_LT; 163 case MVT::f64: 164 return AMDILCC::IL_CC_D_LT; 165 case MVT::i64: 166 return AMDILCC::IL_CC_L_LT; 167 default: 168 assert(0 && "Opcode combination not generated correctly!"); 169 return AMDILCC::COND_ERROR; 170 }; 171 case ISD::SETLE: 172 switch (type) { 173 case MVT::i1: 174 case MVT::i8: 175 case MVT::i16: 176 case MVT::i32: 177 return AMDILCC::IL_CC_I_LE; 178 case MVT::f32: 179 return AMDILCC::IL_CC_F_LE; 180 case MVT::f64: 181 return AMDILCC::IL_CC_D_LE; 182 case MVT::i64: 183 return AMDILCC::IL_CC_L_LE; 184 default: 185 assert(0 && "Opcode combination not generated correctly!"); 186 return AMDILCC::COND_ERROR; 187 }; 188 case ISD::SETNE: 189 switch (type) { 190 case MVT::i1: 191 case MVT::i8: 192 case MVT::i16: 193 case MVT::i32: 194 return AMDILCC::IL_CC_I_NE; 195 case MVT::f32: 196 return AMDILCC::IL_CC_F_NE; 197 case MVT::f64: 198 return AMDILCC::IL_CC_D_NE; 199 case MVT::i64: 200 return AMDILCC::IL_CC_L_NE; 201 default: 202 assert(0 && "Opcode combination not generated correctly!"); 203 return AMDILCC::COND_ERROR; 204 }; 205 case ISD::SETEQ: 206 switch (type) { 207 case MVT::i1: 208 case MVT::i8: 209 case MVT::i16: 210 case MVT::i32: 211 return AMDILCC::IL_CC_I_EQ; 212 case MVT::f32: 213 return AMDILCC::IL_CC_F_EQ; 214 case MVT::f64: 215 return AMDILCC::IL_CC_D_EQ; 216 case MVT::i64: 217 return AMDILCC::IL_CC_L_EQ; 218 default: 219 assert(0 && "Opcode combination not generated correctly!"); 220 return AMDILCC::COND_ERROR; 221 }; 222 case ISD::SETUGT: 223 switch (type) { 224 case MVT::i1: 225 case MVT::i8: 226 case MVT::i16: 227 case MVT::i32: 228 return AMDILCC::IL_CC_U_GT; 229 case MVT::f32: 230 return AMDILCC::IL_CC_F_UGT; 231 case MVT::f64: 232 return AMDILCC::IL_CC_D_UGT; 233 case MVT::i64: 234 return AMDILCC::IL_CC_UL_GT; 235 default: 236 assert(0 && "Opcode combination not generated correctly!"); 237 return AMDILCC::COND_ERROR; 238 }; 239 case ISD::SETUGE: 240 switch (type) { 241 case MVT::i1: 242 case MVT::i8: 243 case MVT::i16: 244 case MVT::i32: 245 return AMDILCC::IL_CC_U_GE; 246 case MVT::f32: 247 return AMDILCC::IL_CC_F_UGE; 248 case MVT::f64: 249 return AMDILCC::IL_CC_D_UGE; 250 case MVT::i64: 251 return AMDILCC::IL_CC_UL_GE; 252 default: 253 assert(0 && "Opcode combination not generated correctly!"); 254 return AMDILCC::COND_ERROR; 255 }; 256 case ISD::SETULT: 257 switch (type) { 258 case MVT::i1: 259 case MVT::i8: 260 case MVT::i16: 261 case MVT::i32: 262 return AMDILCC::IL_CC_U_LT; 263 case MVT::f32: 264 return AMDILCC::IL_CC_F_ULT; 265 case MVT::f64: 266 return AMDILCC::IL_CC_D_ULT; 267 case MVT::i64: 268 return AMDILCC::IL_CC_UL_LT; 269 default: 270 assert(0 && "Opcode combination not generated correctly!"); 271 return AMDILCC::COND_ERROR; 272 }; 273 case ISD::SETULE: 274 switch (type) { 275 case MVT::i1: 276 case MVT::i8: 277 case MVT::i16: 278 case MVT::i32: 279 return AMDILCC::IL_CC_U_LE; 280 case MVT::f32: 281 return AMDILCC::IL_CC_F_ULE; 282 case MVT::f64: 283 return AMDILCC::IL_CC_D_ULE; 284 case MVT::i64: 285 return AMDILCC::IL_CC_UL_LE; 286 default: 287 assert(0 && "Opcode combination not generated correctly!"); 288 return AMDILCC::COND_ERROR; 289 }; 290 case ISD::SETUNE: 291 switch (type) { 292 case MVT::i1: 293 case MVT::i8: 294 case MVT::i16: 295 case MVT::i32: 296 return AMDILCC::IL_CC_U_NE; 297 case MVT::f32: 298 return AMDILCC::IL_CC_F_UNE; 299 case MVT::f64: 300 return AMDILCC::IL_CC_D_UNE; 301 case MVT::i64: 302 return AMDILCC::IL_CC_UL_NE; 303 default: 304 assert(0 && "Opcode combination not generated correctly!"); 305 return AMDILCC::COND_ERROR; 306 }; 307 case ISD::SETUEQ: 308 switch (type) { 309 case MVT::i1: 310 case MVT::i8: 311 case MVT::i16: 312 case MVT::i32: 313 return AMDILCC::IL_CC_U_EQ; 314 case MVT::f32: 315 return AMDILCC::IL_CC_F_UEQ; 316 case MVT::f64: 317 return AMDILCC::IL_CC_D_UEQ; 318 case MVT::i64: 319 return AMDILCC::IL_CC_UL_EQ; 320 default: 321 assert(0 && "Opcode combination not generated correctly!"); 322 return AMDILCC::COND_ERROR; 323 }; 324 case ISD::SETOGT: 325 switch (type) { 326 case MVT::f32: 327 return AMDILCC::IL_CC_F_OGT; 328 case MVT::f64: 329 return AMDILCC::IL_CC_D_OGT; 330 case MVT::i1: 331 case MVT::i8: 332 case MVT::i16: 333 case MVT::i32: 334 case MVT::i64: 335 default: 336 assert(0 && "Opcode combination not generated correctly!"); 337 return AMDILCC::COND_ERROR; 338 }; 339 case ISD::SETOGE: 340 switch (type) { 341 case MVT::f32: 342 return AMDILCC::IL_CC_F_OGE; 343 case MVT::f64: 344 return AMDILCC::IL_CC_D_OGE; 345 case MVT::i1: 346 case MVT::i8: 347 case MVT::i16: 348 case MVT::i32: 349 case MVT::i64: 350 default: 351 assert(0 && "Opcode combination not generated correctly!"); 352 return AMDILCC::COND_ERROR; 353 }; 354 case ISD::SETOLT: 355 switch (type) { 356 case MVT::f32: 357 return AMDILCC::IL_CC_F_OLT; 358 case MVT::f64: 359 return AMDILCC::IL_CC_D_OLT; 360 case MVT::i1: 361 case MVT::i8: 362 case MVT::i16: 363 case MVT::i32: 364 case MVT::i64: 365 default: 366 assert(0 && "Opcode combination not generated correctly!"); 367 return AMDILCC::COND_ERROR; 368 }; 369 case ISD::SETOLE: 370 switch (type) { 371 case MVT::f32: 372 return AMDILCC::IL_CC_F_OLE; 373 case MVT::f64: 374 return AMDILCC::IL_CC_D_OLE; 375 case MVT::i1: 376 case MVT::i8: 377 case MVT::i16: 378 case MVT::i32: 379 case MVT::i64: 380 default: 381 assert(0 && "Opcode combination not generated correctly!"); 382 return AMDILCC::COND_ERROR; 383 }; 384 case ISD::SETONE: 385 switch (type) { 386 case MVT::f32: 387 return AMDILCC::IL_CC_F_ONE; 388 case MVT::f64: 389 return AMDILCC::IL_CC_D_ONE; 390 case MVT::i1: 391 case MVT::i8: 392 case MVT::i16: 393 case MVT::i32: 394 case MVT::i64: 395 default: 396 assert(0 && "Opcode combination not generated correctly!"); 397 return AMDILCC::COND_ERROR; 398 }; 399 case ISD::SETOEQ: 400 switch (type) { 401 case MVT::f32: 402 return AMDILCC::IL_CC_F_OEQ; 403 case MVT::f64: 404 return AMDILCC::IL_CC_D_OEQ; 405 case MVT::i1: 406 case MVT::i8: 407 case MVT::i16: 408 case MVT::i32: 409 case MVT::i64: 410 default: 411 assert(0 && "Opcode combination not generated correctly!"); 412 return AMDILCC::COND_ERROR; 413 }; 414 }; 415} 416 417/// Helper function used by LowerFormalArguments 418static const TargetRegisterClass* 419getRegClassFromType(unsigned int type) { 420 switch (type) { 421 default: 422 assert(0 && "Passed in type does not match any register classes."); 423 case MVT::i8: 424 return &AMDIL::GPRI8RegClass; 425 case MVT::i16: 426 return &AMDIL::GPRI16RegClass; 427 case MVT::i32: 428 return &AMDIL::GPRI32RegClass; 429 case MVT::f32: 430 return &AMDIL::GPRF32RegClass; 431 case MVT::i64: 432 return &AMDIL::GPRI64RegClass; 433 case MVT::f64: 434 return &AMDIL::GPRF64RegClass; 435 case MVT::v4f32: 436 return &AMDIL::GPRV4F32RegClass; 437 case MVT::v4i8: 438 return &AMDIL::GPRV4I8RegClass; 439 case MVT::v4i16: 440 return &AMDIL::GPRV4I16RegClass; 441 case MVT::v4i32: 442 return &AMDIL::GPRV4I32RegClass; 443 case MVT::v2f32: 444 return &AMDIL::GPRV2F32RegClass; 445 case MVT::v2i8: 446 return &AMDIL::GPRV2I8RegClass; 447 case MVT::v2i16: 448 return &AMDIL::GPRV2I16RegClass; 449 case MVT::v2i32: 450 return &AMDIL::GPRV2I32RegClass; 451 case MVT::v2f64: 452 return &AMDIL::GPRV2F64RegClass; 453 case MVT::v2i64: 454 return &AMDIL::GPRV2I64RegClass; 455 } 456} 457 458SDValue 459AMDILTargetLowering::LowerMemArgument( 460 SDValue Chain, 461 CallingConv::ID CallConv, 462 const SmallVectorImpl<ISD::InputArg> &Ins, 463 DebugLoc dl, SelectionDAG &DAG, 464 const CCValAssign &VA, 465 MachineFrameInfo *MFI, 466 unsigned i) const 467{ 468 // Create the nodes corresponding to a load from this parameter slot. 469 ISD::ArgFlagsTy Flags = Ins[i].Flags; 470 471 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && 472 getTargetMachine().Options.GuaranteedTailCallOpt; 473 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 474 475 // FIXME: For now, all byval parameter objects are marked mutable. This can 476 // be changed with more analysis. 477 // In case of tail call optimization mark all arguments mutable. Since they 478 // could be overwritten by lowering of arguments in case of a tail call. 479 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 480 VA.getLocMemOffset(), isImmutable); 481 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 482 483 if (Flags.isByVal()) 484 return FIN; 485 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 486 MachinePointerInfo::getFixedStack(FI), 487 false, false, false, 0); 488} 489//===----------------------------------------------------------------------===// 490// TargetLowering Implementation Help Functions End 491//===----------------------------------------------------------------------===// 492//===----------------------------------------------------------------------===// 493// Instruction generation functions 494//===----------------------------------------------------------------------===// 495MachineOperand 496AMDILTargetLowering::convertToReg(MachineOperand op) const 497{ 498 if (op.isReg()) { 499 return op; 500 } else if (op.isImm()) { 501 uint32_t loadReg 502 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 503 generateMachineInst(AMDIL::LOADCONST_i32, loadReg) 504 .addImm(op.getImm()); 505 op.ChangeToRegister(loadReg, false); 506 } else if (op.isFPImm()) { 507 uint32_t loadReg 508 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 509 generateMachineInst(AMDIL::LOADCONST_f32, loadReg) 510 .addFPImm(op.getFPImm()); 511 op.ChangeToRegister(loadReg, false); 512 } else if (op.isMBB()) { 513 op.ChangeToRegister(0, false); 514 } else if (op.isFI()) { 515 op.ChangeToRegister(0, false); 516 } else if (op.isCPI()) { 517 op.ChangeToRegister(0, false); 518 } else if (op.isJTI()) { 519 op.ChangeToRegister(0, false); 520 } else if (op.isGlobal()) { 521 op.ChangeToRegister(0, false); 522 } else if (op.isSymbol()) { 523 op.ChangeToRegister(0, false); 524 }/* else if (op.isMetadata()) { 525 op.ChangeToRegister(0, false); 526 }*/ 527 return op; 528} 529 530//===----------------------------------------------------------------------===// 531// TargetLowering Class Implementation Begins 532//===----------------------------------------------------------------------===// 533 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) 534: TargetLowering(TM, new TargetLoweringObjectFileELF()) 535{ 536 int types[] = 537 { 538 (int)MVT::i8, 539 (int)MVT::i16, 540 (int)MVT::i32, 541 (int)MVT::f32, 542 (int)MVT::f64, 543 (int)MVT::i64, 544 (int)MVT::v2i8, 545 (int)MVT::v4i8, 546 (int)MVT::v2i16, 547 (int)MVT::v4i16, 548 (int)MVT::v4f32, 549 (int)MVT::v4i32, 550 (int)MVT::v2f32, 551 (int)MVT::v2i32, 552 (int)MVT::v2f64, 553 (int)MVT::v2i64 554 }; 555 556 int IntTypes[] = 557 { 558 (int)MVT::i8, 559 (int)MVT::i16, 560 (int)MVT::i32, 561 (int)MVT::i64 562 }; 563 564 int FloatTypes[] = 565 { 566 (int)MVT::f32, 567 (int)MVT::f64 568 }; 569 570 int VectorTypes[] = 571 { 572 (int)MVT::v2i8, 573 (int)MVT::v4i8, 574 (int)MVT::v2i16, 575 (int)MVT::v4i16, 576 (int)MVT::v4f32, 577 (int)MVT::v4i32, 578 (int)MVT::v2f32, 579 (int)MVT::v2i32, 580 (int)MVT::v2f64, 581 (int)MVT::v2i64 582 }; 583 size_t numTypes = sizeof(types) / sizeof(*types); 584 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); 585 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); 586 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); 587 588 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 589 &this->getTargetMachine())->getSubtargetImpl(); 590 // These are the current register classes that are 591 // supported 592 593 addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass); 594 addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass); 595 596 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 597 addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass); 598 addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass); 599 } 600 if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) { 601 addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass); 602 addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass); 603 addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass); 604 setOperationAction(ISD::Constant , MVT::i8 , Legal); 605 } 606 if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) { 607 addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass); 608 addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass); 609 addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass); 610 setOperationAction(ISD::Constant , MVT::i16 , Legal); 611 } 612 addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass); 613 addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass); 614 addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass); 615 addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass); 616 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { 617 addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass); 618 addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass); 619 } 620 621 for (unsigned int x = 0; x < numTypes; ++x) { 622 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 623 624 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 625 // We cannot sextinreg, expand to shifts 626 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 627 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 628 setOperationAction(ISD::FP_ROUND, VT, Expand); 629 setOperationAction(ISD::SUBE, VT, Expand); 630 setOperationAction(ISD::SUBC, VT, Expand); 631 setOperationAction(ISD::ADDE, VT, Expand); 632 setOperationAction(ISD::ADDC, VT, Expand); 633 setOperationAction(ISD::SETCC, VT, Custom); 634 setOperationAction(ISD::BRCOND, VT, Custom); 635 setOperationAction(ISD::BR_CC, VT, Custom); 636 setOperationAction(ISD::BR_JT, VT, Expand); 637 setOperationAction(ISD::BRIND, VT, Expand); 638 // TODO: Implement custom UREM/SREM routines 639 setOperationAction(ISD::UREM, VT, Expand); 640 setOperationAction(ISD::SREM, VT, Expand); 641 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 642 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 643 setOperationAction(ISDBITCAST, VT, Custom); 644 setOperationAction(ISD::GlobalAddress, VT, Custom); 645 setOperationAction(ISD::JumpTable, VT, Custom); 646 setOperationAction(ISD::ConstantPool, VT, Custom); 647 setOperationAction(ISD::SELECT_CC, VT, Custom); 648 setOperationAction(ISD::SELECT, VT, Custom); 649 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 650 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 651 if (VT != MVT::i64 && VT != MVT::v2i64) { 652 setOperationAction(ISD::SDIV, VT, Custom); 653 } 654 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 655 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 656 } 657 for (unsigned int x = 0; x < numFloatTypes; ++x) { 658 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 659 660 // IL does not have these operations for floating point types 661 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 662 setOperationAction(ISD::FP_ROUND, VT, Custom); 663 setOperationAction(ISD::SETOLT, VT, Expand); 664 setOperationAction(ISD::SETOGE, VT, Expand); 665 setOperationAction(ISD::SETOGT, VT, Expand); 666 setOperationAction(ISD::SETOLE, VT, Expand); 667 setOperationAction(ISD::SETULT, VT, Expand); 668 setOperationAction(ISD::SETUGE, VT, Expand); 669 setOperationAction(ISD::SETUGT, VT, Expand); 670 setOperationAction(ISD::SETULE, VT, Expand); 671 } 672 673 for (unsigned int x = 0; x < numIntTypes; ++x) { 674 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 675 676 // GPU also does not have divrem function for signed or unsigned 677 setOperationAction(ISD::SDIVREM, VT, Expand); 678 setOperationAction(ISD::UDIVREM, VT, Expand); 679 setOperationAction(ISD::FP_ROUND, VT, Expand); 680 681 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 682 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 683 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 684 685 // GPU doesn't have a rotl, rotr, or byteswap instruction 686 setOperationAction(ISD::ROTR, VT, Expand); 687 setOperationAction(ISD::ROTL, VT, Expand); 688 setOperationAction(ISD::BSWAP, VT, Expand); 689 690 // GPU doesn't have any counting operators 691 setOperationAction(ISD::CTPOP, VT, Expand); 692 setOperationAction(ISD::CTTZ, VT, Expand); 693 setOperationAction(ISD::CTLZ, VT, Expand); 694 } 695 696 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) 697 { 698 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 699 700 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 701 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 702 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 703 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 704 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 705 setOperationAction(ISD::FP_ROUND, VT, Expand); 706 setOperationAction(ISD::SDIVREM, VT, Expand); 707 setOperationAction(ISD::UDIVREM, VT, Expand); 708 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 709 // setOperationAction(ISD::VSETCC, VT, Expand); 710 setOperationAction(ISD::SETCC, VT, Expand); 711 setOperationAction(ISD::SELECT_CC, VT, Expand); 712 setOperationAction(ISD::SELECT, VT, Expand); 713 714 } 715 setOperationAction(ISD::FP_ROUND, MVT::Other, Expand); 716 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { 717 if (stm->calVersion() < CAL_VERSION_SC_139 718 || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 719 setOperationAction(ISD::MUL, MVT::i64, Custom); 720 } 721 setOperationAction(ISD::SUB, MVT::i64, Custom); 722 setOperationAction(ISD::ADD, MVT::i64, Custom); 723 setOperationAction(ISD::MULHU, MVT::i64, Expand); 724 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 725 setOperationAction(ISD::MULHS, MVT::i64, Expand); 726 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 727 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 728 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 729 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 730 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 731 setOperationAction(ISD::Constant , MVT::i64 , Legal); 732 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 733 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand); 734 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand); 735 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 736 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 737 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 738 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 739 } 740 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 741 // we support loading/storing v2f64 but not operations on the type 742 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 743 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 744 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 745 setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand); 746 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 747 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 748 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 749 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 750 // We want to expand vector conversions into their scalar 751 // counterparts. 752 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand); 753 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand); 754 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 755 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 756 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 757 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 758 setOperationAction(ISD::FABS, MVT::f64, Expand); 759 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 760 } 761 // TODO: Fix the UDIV24 algorithm so it works for these 762 // types correctly. This needs vector comparisons 763 // for this to work correctly. 764 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 765 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 766 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 767 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 768 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 769 setOperationAction(ISD::SUBC, MVT::Other, Expand); 770 setOperationAction(ISD::ADDE, MVT::Other, Expand); 771 setOperationAction(ISD::ADDC, MVT::Other, Expand); 772 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 773 setOperationAction(ISD::BR_CC, MVT::Other, Custom); 774 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 775 setOperationAction(ISD::BRIND, MVT::Other, Expand); 776 setOperationAction(ISD::SETCC, MVT::Other, Custom); 777 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 778 setOperationAction(ISD::FDIV, MVT::f32, Custom); 779 setOperationAction(ISD::FDIV, MVT::v2f32, Custom); 780 setOperationAction(ISD::FDIV, MVT::v4f32, Custom); 781 782 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); 783 // Use the default implementation. 784 setOperationAction(ISD::VAARG , MVT::Other, Expand); 785 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 786 setOperationAction(ISD::VAEND , MVT::Other, Expand); 787 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 788 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 789 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 790 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 791 setOperationAction(ISD::Constant , MVT::i32 , Legal); 792 setOperationAction(ISD::TRAP , MVT::Other , Legal); 793 794 setStackPointerRegisterToSaveRestore(AMDIL::SP); 795 setSchedulingPreference(Sched::RegPressure); 796 setPow2DivIsCheap(false); 797 setPrefLoopAlignment(16); 798 setSelectIsExpensive(true); 799 setJumpIsExpensive(true); 800 computeRegisterProperties(); 801 802 maxStoresPerMemcpy = 4096; 803 maxStoresPerMemmove = 4096; 804 maxStoresPerMemset = 4096; 805 806#undef numTypes 807#undef numIntTypes 808#undef numVectorTypes 809#undef numFloatTypes 810} 811 812const char * 813AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const 814{ 815 switch (Opcode) { 816 default: return 0; 817 case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY"; 818 case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP"; 819 case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP"; 820 case AMDILISD::BITCONV: return "AMDILISD::BITCONV"; 821 case AMDILISD::CMOV: return "AMDILISD::CMOV"; 822 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; 823 case AMDILISD::INEGATE: return "AMDILISD::INEGATE"; 824 case AMDILISD::MAD: return "AMDILISD::MAD"; 825 case AMDILISD::UMAD: return "AMDILISD::UMAD"; 826 case AMDILISD::CALL: return "AMDILISD::CALL"; 827 case AMDILISD::RET: return "AMDILISD::RET"; 828 case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI"; 829 case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO"; 830 case AMDILISD::ADD: return "AMDILISD::ADD"; 831 case AMDILISD::UMUL: return "AMDILISD::UMUL"; 832 case AMDILISD::AND: return "AMDILISD::AND"; 833 case AMDILISD::OR: return "AMDILISD::OR"; 834 case AMDILISD::NOT: return "AMDILISD::NOT"; 835 case AMDILISD::XOR: return "AMDILISD::XOR"; 836 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; 837 case AMDILISD::SMAX: return "AMDILISD::SMAX"; 838 case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE"; 839 case AMDILISD::MOVE: return "AMDILISD::MOVE"; 840 case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; 841 case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT"; 842 case AMDILISD::VINSERT: return "AMDILISD::VINSERT"; 843 case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT"; 844 case AMDILISD::LCREATE: return "AMDILISD::LCREATE"; 845 case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI"; 846 case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO"; 847 case AMDILISD::DCREATE: return "AMDILISD::DCREATE"; 848 case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI"; 849 case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO"; 850 case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2"; 851 case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2"; 852 case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2"; 853 case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2"; 854 case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2"; 855 case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2"; 856 case AMDILISD::CMP: return "AMDILISD::CMP"; 857 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; 858 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; 859 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; 860 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; 861 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; 862 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; 863 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; 864 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; 865 case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO"; 866 case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO"; 867 case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP"; 868 case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR"; 869 case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD"; 870 case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND"; 871 case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG"; 872 case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC"; 873 case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC"; 874 case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX"; 875 case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX"; 876 case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN"; 877 case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN"; 878 case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR"; 879 case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB"; 880 case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB"; 881 case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG"; 882 case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR"; 883 case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET"; 884 case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET"; 885 case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET"; 886 case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET"; 887 case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET"; 888 case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET"; 889 case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET"; 890 case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET"; 891 case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET"; 892 case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET"; 893 case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET"; 894 case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET"; 895 case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET"; 896 case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET"; 897 case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD"; 898 case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND"; 899 case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG"; 900 case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC"; 901 case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC"; 902 case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX"; 903 case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX"; 904 case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN"; 905 case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN"; 906 case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR"; 907 case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB"; 908 case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB"; 909 case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG"; 910 case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR"; 911 case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET"; 912 case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET"; 913 case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET"; 914 case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET"; 915 case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET"; 916 case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET"; 917 case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET"; 918 case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET"; 919 case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET"; 920 case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET"; 921 case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET"; 922 case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET"; 923 case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET"; 924 case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD"; 925 case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND"; 926 case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG"; 927 case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC"; 928 case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC"; 929 case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX"; 930 case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX"; 931 case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN"; 932 case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN"; 933 case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR"; 934 case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR"; 935 case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB"; 936 case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB"; 937 case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG"; 938 case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR"; 939 case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET"; 940 case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET"; 941 case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET"; 942 case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET"; 943 case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET"; 944 case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET"; 945 case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET"; 946 case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET"; 947 case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET"; 948 case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET"; 949 case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET"; 950 case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET"; 951 case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET"; 952 case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET"; 953 case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET"; 954 case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC"; 955 case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET"; 956 case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME"; 957 case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET"; 958 case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ"; 959 case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE"; 960 case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0"; 961 case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1"; 962 case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ"; 963 case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE"; 964 case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0"; 965 case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1"; 966 967 }; 968} 969bool 970AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 971 const CallInst &I, unsigned Intrinsic) const 972{ 973 if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic 974 || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) { 975 return false; 976 } 977 bool bitCastToInt = false; 978 unsigned IntNo; 979 bool isRet = true; 980 const AMDILSubtarget *STM = &this->getTargetMachine() 981 .getSubtarget<AMDILSubtarget>(); 982 switch (Intrinsic) { 983 default: return false; // Don't custom lower most intrinsics. 984 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32: 985 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32: 986 IntNo = AMDILISD::ATOM_G_ADD; break; 987 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret: 988 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret: 989 isRet = false; 990 IntNo = AMDILISD::ATOM_G_ADD_NORET; break; 991 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32: 992 case AMDGPUIntrinsic::AMDIL_atomic_add_li32: 993 IntNo = AMDILISD::ATOM_L_ADD; break; 994 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret: 995 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret: 996 isRet = false; 997 IntNo = AMDILISD::ATOM_L_ADD_NORET; break; 998 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32: 999 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32: 1000 IntNo = AMDILISD::ATOM_R_ADD; break; 1001 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret: 1002 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret: 1003 isRet = false; 1004 IntNo = AMDILISD::ATOM_R_ADD_NORET; break; 1005 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32: 1006 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32: 1007 IntNo = AMDILISD::ATOM_G_AND; break; 1008 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret: 1009 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret: 1010 isRet = false; 1011 IntNo = AMDILISD::ATOM_G_AND_NORET; break; 1012 case AMDGPUIntrinsic::AMDIL_atomic_and_li32: 1013 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32: 1014 IntNo = AMDILISD::ATOM_L_AND; break; 1015 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret: 1016 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret: 1017 isRet = false; 1018 IntNo = AMDILISD::ATOM_L_AND_NORET; break; 1019 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32: 1020 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32: 1021 IntNo = AMDILISD::ATOM_R_AND; break; 1022 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret: 1023 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret: 1024 isRet = false; 1025 IntNo = AMDILISD::ATOM_R_AND_NORET; break; 1026 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32: 1027 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32: 1028 IntNo = AMDILISD::ATOM_G_CMPXCHG; break; 1029 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret: 1030 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret: 1031 isRet = false; 1032 IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break; 1033 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32: 1034 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32: 1035 IntNo = AMDILISD::ATOM_L_CMPXCHG; break; 1036 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret: 1037 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret: 1038 isRet = false; 1039 IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break; 1040 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32: 1041 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32: 1042 IntNo = AMDILISD::ATOM_R_CMPXCHG; break; 1043 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret: 1044 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret: 1045 isRet = false; 1046 IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break; 1047 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32: 1048 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32: 1049 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1050 IntNo = AMDILISD::ATOM_G_DEC; 1051 } else { 1052 IntNo = AMDILISD::ATOM_G_SUB; 1053 } 1054 break; 1055 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret: 1056 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret: 1057 isRet = false; 1058 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1059 IntNo = AMDILISD::ATOM_G_DEC_NORET; 1060 } else { 1061 IntNo = AMDILISD::ATOM_G_SUB_NORET; 1062 } 1063 break; 1064 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32: 1065 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32: 1066 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1067 IntNo = AMDILISD::ATOM_L_DEC; 1068 } else { 1069 IntNo = AMDILISD::ATOM_L_SUB; 1070 } 1071 break; 1072 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret: 1073 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret: 1074 isRet = false; 1075 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1076 IntNo = AMDILISD::ATOM_L_DEC_NORET; 1077 } else { 1078 IntNo = AMDILISD::ATOM_L_SUB_NORET; 1079 } 1080 break; 1081 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32: 1082 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32: 1083 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1084 IntNo = AMDILISD::ATOM_R_DEC; 1085 } else { 1086 IntNo = AMDILISD::ATOM_R_SUB; 1087 } 1088 break; 1089 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret: 1090 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret: 1091 isRet = false; 1092 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1093 IntNo = AMDILISD::ATOM_R_DEC_NORET; 1094 } else { 1095 IntNo = AMDILISD::ATOM_R_SUB_NORET; 1096 } 1097 break; 1098 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32: 1099 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32: 1100 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1101 IntNo = AMDILISD::ATOM_G_INC; 1102 } else { 1103 IntNo = AMDILISD::ATOM_G_ADD; 1104 } 1105 break; 1106 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret: 1107 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret: 1108 isRet = false; 1109 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1110 IntNo = AMDILISD::ATOM_G_INC_NORET; 1111 } else { 1112 IntNo = AMDILISD::ATOM_G_ADD_NORET; 1113 } 1114 break; 1115 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32: 1116 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32: 1117 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1118 IntNo = AMDILISD::ATOM_L_INC; 1119 } else { 1120 IntNo = AMDILISD::ATOM_L_ADD; 1121 } 1122 break; 1123 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret: 1124 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret: 1125 isRet = false; 1126 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1127 IntNo = AMDILISD::ATOM_L_INC_NORET; 1128 } else { 1129 IntNo = AMDILISD::ATOM_L_ADD_NORET; 1130 } 1131 break; 1132 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32: 1133 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32: 1134 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1135 IntNo = AMDILISD::ATOM_R_INC; 1136 } else { 1137 IntNo = AMDILISD::ATOM_R_ADD; 1138 } 1139 break; 1140 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret: 1141 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret: 1142 isRet = false; 1143 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1144 IntNo = AMDILISD::ATOM_R_INC_NORET; 1145 } else { 1146 IntNo = AMDILISD::ATOM_R_ADD_NORET; 1147 } 1148 break; 1149 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32: 1150 IntNo = AMDILISD::ATOM_G_MAX; break; 1151 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32: 1152 IntNo = AMDILISD::ATOM_G_UMAX; break; 1153 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret: 1154 isRet = false; 1155 IntNo = AMDILISD::ATOM_G_MAX_NORET; break; 1156 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret: 1157 isRet = false; 1158 IntNo = AMDILISD::ATOM_G_UMAX_NORET; break; 1159 case AMDGPUIntrinsic::AMDIL_atomic_max_li32: 1160 IntNo = AMDILISD::ATOM_L_MAX; break; 1161 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32: 1162 IntNo = AMDILISD::ATOM_L_UMAX; break; 1163 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret: 1164 isRet = false; 1165 IntNo = AMDILISD::ATOM_L_MAX_NORET; break; 1166 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret: 1167 isRet = false; 1168 IntNo = AMDILISD::ATOM_L_UMAX_NORET; break; 1169 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32: 1170 IntNo = AMDILISD::ATOM_R_MAX; break; 1171 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32: 1172 IntNo = AMDILISD::ATOM_R_UMAX; break; 1173 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret: 1174 isRet = false; 1175 IntNo = AMDILISD::ATOM_R_MAX_NORET; break; 1176 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret: 1177 isRet = false; 1178 IntNo = AMDILISD::ATOM_R_UMAX_NORET; break; 1179 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32: 1180 IntNo = AMDILISD::ATOM_G_MIN; break; 1181 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32: 1182 IntNo = AMDILISD::ATOM_G_UMIN; break; 1183 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret: 1184 isRet = false; 1185 IntNo = AMDILISD::ATOM_G_MIN_NORET; break; 1186 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret: 1187 isRet = false; 1188 IntNo = AMDILISD::ATOM_G_UMIN_NORET; break; 1189 case AMDGPUIntrinsic::AMDIL_atomic_min_li32: 1190 IntNo = AMDILISD::ATOM_L_MIN; break; 1191 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32: 1192 IntNo = AMDILISD::ATOM_L_UMIN; break; 1193 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret: 1194 isRet = false; 1195 IntNo = AMDILISD::ATOM_L_MIN_NORET; break; 1196 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret: 1197 isRet = false; 1198 IntNo = AMDILISD::ATOM_L_UMIN_NORET; break; 1199 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32: 1200 IntNo = AMDILISD::ATOM_R_MIN; break; 1201 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32: 1202 IntNo = AMDILISD::ATOM_R_UMIN; break; 1203 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret: 1204 isRet = false; 1205 IntNo = AMDILISD::ATOM_R_MIN_NORET; break; 1206 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret: 1207 isRet = false; 1208 IntNo = AMDILISD::ATOM_R_UMIN_NORET; break; 1209 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32: 1210 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32: 1211 IntNo = AMDILISD::ATOM_G_OR; break; 1212 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret: 1213 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret: 1214 isRet = false; 1215 IntNo = AMDILISD::ATOM_G_OR_NORET; break; 1216 case AMDGPUIntrinsic::AMDIL_atomic_or_li32: 1217 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32: 1218 IntNo = AMDILISD::ATOM_L_OR; break; 1219 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret: 1220 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret: 1221 isRet = false; 1222 IntNo = AMDILISD::ATOM_L_OR_NORET; break; 1223 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32: 1224 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32: 1225 IntNo = AMDILISD::ATOM_R_OR; break; 1226 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret: 1227 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret: 1228 isRet = false; 1229 IntNo = AMDILISD::ATOM_R_OR_NORET; break; 1230 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32: 1231 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32: 1232 IntNo = AMDILISD::ATOM_G_SUB; break; 1233 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret: 1234 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret: 1235 isRet = false; 1236 IntNo = AMDILISD::ATOM_G_SUB_NORET; break; 1237 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32: 1238 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32: 1239 IntNo = AMDILISD::ATOM_L_SUB; break; 1240 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret: 1241 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret: 1242 isRet = false; 1243 IntNo = AMDILISD::ATOM_L_SUB_NORET; break; 1244 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32: 1245 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32: 1246 IntNo = AMDILISD::ATOM_R_SUB; break; 1247 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret: 1248 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret: 1249 isRet = false; 1250 IntNo = AMDILISD::ATOM_R_SUB_NORET; break; 1251 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32: 1252 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32: 1253 IntNo = AMDILISD::ATOM_G_RSUB; break; 1254 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret: 1255 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret: 1256 isRet = false; 1257 IntNo = AMDILISD::ATOM_G_RSUB_NORET; break; 1258 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32: 1259 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32: 1260 IntNo = AMDILISD::ATOM_L_RSUB; break; 1261 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret: 1262 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret: 1263 isRet = false; 1264 IntNo = AMDILISD::ATOM_L_RSUB_NORET; break; 1265 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32: 1266 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32: 1267 IntNo = AMDILISD::ATOM_R_RSUB; break; 1268 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret: 1269 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret: 1270 isRet = false; 1271 IntNo = AMDILISD::ATOM_R_RSUB_NORET; break; 1272 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32: 1273 bitCastToInt = true; 1274 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32: 1275 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32: 1276 IntNo = AMDILISD::ATOM_G_XCHG; break; 1277 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret: 1278 bitCastToInt = true; 1279 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret: 1280 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret: 1281 isRet = false; 1282 IntNo = AMDILISD::ATOM_G_XCHG_NORET; break; 1283 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32: 1284 bitCastToInt = true; 1285 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32: 1286 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32: 1287 IntNo = AMDILISD::ATOM_L_XCHG; break; 1288 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret: 1289 bitCastToInt = true; 1290 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret: 1291 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret: 1292 isRet = false; 1293 IntNo = AMDILISD::ATOM_L_XCHG_NORET; break; 1294 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32: 1295 bitCastToInt = true; 1296 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32: 1297 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32: 1298 IntNo = AMDILISD::ATOM_R_XCHG; break; 1299 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret: 1300 bitCastToInt = true; 1301 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret: 1302 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret: 1303 isRet = false; 1304 IntNo = AMDILISD::ATOM_R_XCHG_NORET; break; 1305 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32: 1306 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32: 1307 IntNo = AMDILISD::ATOM_G_XOR; break; 1308 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret: 1309 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret: 1310 isRet = false; 1311 IntNo = AMDILISD::ATOM_G_XOR_NORET; break; 1312 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32: 1313 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32: 1314 IntNo = AMDILISD::ATOM_L_XOR; break; 1315 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret: 1316 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret: 1317 isRet = false; 1318 IntNo = AMDILISD::ATOM_L_XOR_NORET; break; 1319 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32: 1320 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32: 1321 IntNo = AMDILISD::ATOM_R_XOR; break; 1322 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret: 1323 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret: 1324 isRet = false; 1325 IntNo = AMDILISD::ATOM_R_XOR_NORET; break; 1326 case AMDGPUIntrinsic::AMDIL_append_alloc_i32: 1327 IntNo = AMDILISD::APPEND_ALLOC; break; 1328 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret: 1329 isRet = false; 1330 IntNo = AMDILISD::APPEND_ALLOC_NORET; break; 1331 case AMDGPUIntrinsic::AMDIL_append_consume_i32: 1332 IntNo = AMDILISD::APPEND_CONSUME; break; 1333 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret: 1334 isRet = false; 1335 IntNo = AMDILISD::APPEND_CONSUME_NORET; break; 1336 }; 1337 1338 Info.opc = IntNo; 1339 Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32; 1340 Info.ptrVal = I.getOperand(0); 1341 Info.offset = 0; 1342 Info.align = 4; 1343 Info.vol = true; 1344 Info.readMem = isRet; 1345 Info.writeMem = true; 1346 return true; 1347} 1348// The backend supports 32 and 64 bit floating point immediates 1349bool 1350AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const 1351{ 1352 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1353 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1354 return true; 1355 } else { 1356 return false; 1357 } 1358} 1359 1360bool 1361AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const 1362{ 1363 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1364 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1365 return false; 1366 } else { 1367 return true; 1368 } 1369} 1370 1371 1372// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 1373// be zero. Op is expected to be a target specific node. Used by DAG 1374// combiner. 1375 1376void 1377AMDILTargetLowering::computeMaskedBitsForTargetNode( 1378 const SDValue Op, 1379 APInt &KnownZero, 1380 APInt &KnownOne, 1381 const SelectionDAG &DAG, 1382 unsigned Depth) const 1383{ 1384 APInt KnownZero2; 1385 APInt KnownOne2; 1386 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 1387 switch (Op.getOpcode()) { 1388 default: break; 1389 case AMDILISD::SELECT_CC: 1390 DAG.ComputeMaskedBits( 1391 Op.getOperand(1), 1392 KnownZero, 1393 KnownOne, 1394 Depth + 1 1395 ); 1396 DAG.ComputeMaskedBits( 1397 Op.getOperand(0), 1398 KnownZero2, 1399 KnownOne2 1400 ); 1401 assert((KnownZero & KnownOne) == 0 1402 && "Bits known to be one AND zero?"); 1403 assert((KnownZero2 & KnownOne2) == 0 1404 && "Bits known to be one AND zero?"); 1405 // Only known if known in both the LHS and RHS 1406 KnownOne &= KnownOne2; 1407 KnownZero &= KnownZero2; 1408 break; 1409 }; 1410} 1411 1412// This is the function that determines which calling convention should 1413// be used. Currently there is only one calling convention 1414CCAssignFn* 1415AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const 1416{ 1417 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1418 return CC_AMDIL32; 1419} 1420 1421// LowerCallResult - Lower the result values of an ISD::CALL into the 1422// appropriate copies out of appropriate physical registers. This assumes that 1423// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 1424// being lowered. The returns a SDNode with the same number of values as the 1425// ISD::CALL. 1426SDValue 1427AMDILTargetLowering::LowerCallResult( 1428 SDValue Chain, 1429 SDValue InFlag, 1430 CallingConv::ID CallConv, 1431 bool isVarArg, 1432 const SmallVectorImpl<ISD::InputArg> &Ins, 1433 DebugLoc dl, 1434 SelectionDAG &DAG, 1435 SmallVectorImpl<SDValue> &InVals) const 1436{ 1437 // Assign locations to each value returned by this call 1438 SmallVector<CCValAssign, 16> RVLocs; 1439 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1440 getTargetMachine(), RVLocs, *DAG.getContext()); 1441 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); 1442 1443 // Copy all of the result registers out of their specified physreg. 1444 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1445 EVT CopyVT = RVLocs[i].getValVT(); 1446 if (RVLocs[i].isRegLoc()) { 1447 Chain = DAG.getCopyFromReg( 1448 Chain, 1449 dl, 1450 RVLocs[i].getLocReg(), 1451 CopyVT, 1452 InFlag 1453 ).getValue(1); 1454 SDValue Val = Chain.getValue(0); 1455 InFlag = Chain.getValue(2); 1456 InVals.push_back(Val); 1457 } 1458 } 1459 1460 return Chain; 1461 1462} 1463 1464//===----------------------------------------------------------------------===// 1465// Other Lowering Hooks 1466//===----------------------------------------------------------------------===// 1467 1468// Recursively assign SDNodeOrdering to any unordered nodes 1469// This is necessary to maintain source ordering of instructions 1470// under -O0 to avoid odd-looking "skipping around" issues. 1471 static const SDValue 1472Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) 1473{ 1474 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { 1475 DAG.AssignOrdering( New.getNode(), order ); 1476 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) 1477 Ordered( DAG, order, New.getOperand(i) ); 1478 } 1479 return New; 1480} 1481 1482#define LOWER(A) \ 1483 case ISD:: A: \ 1484return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) 1485 1486SDValue 1487AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 1488{ 1489 switch (Op.getOpcode()) { 1490 default: 1491 Op.getNode()->dump(); 1492 assert(0 && "Custom lowering code for this" 1493 "instruction is not implemented yet!"); 1494 break; 1495 LOWER(GlobalAddress); 1496 LOWER(JumpTable); 1497 LOWER(ConstantPool); 1498 LOWER(ExternalSymbol); 1499 LOWER(FP_TO_UINT); 1500 LOWER(UINT_TO_FP); 1501 LOWER(MUL); 1502 LOWER(SUB); 1503 LOWER(FDIV); 1504 LOWER(SDIV); 1505 LOWER(SREM); 1506 LOWER(UREM); 1507 LOWER(BUILD_VECTOR); 1508 LOWER(INSERT_VECTOR_ELT); 1509 LOWER(EXTRACT_VECTOR_ELT); 1510 LOWER(EXTRACT_SUBVECTOR); 1511 LOWER(SCALAR_TO_VECTOR); 1512 LOWER(CONCAT_VECTORS); 1513 LOWER(SELECT); 1514 LOWER(SETCC); 1515 LOWER(SIGN_EXTEND_INREG); 1516 LOWER(BITCAST); 1517 LOWER(DYNAMIC_STACKALLOC); 1518 LOWER(BRCOND); 1519 LOWER(BR_CC); 1520 LOWER(FP_ROUND); 1521 } 1522 return Op; 1523} 1524 1525int 1526AMDILTargetLowering::getVarArgsFrameOffset() const 1527{ 1528 return VarArgsFrameOffset; 1529} 1530#undef LOWER 1531 1532SDValue 1533AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const 1534{ 1535 SDValue DST = Op; 1536 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); 1537 const GlobalValue *G = GADN->getGlobal(); 1538 DebugLoc DL = Op.getDebugLoc(); 1539 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); 1540 if (!GV) { 1541 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 1542 } else { 1543 if (GV->hasInitializer()) { 1544 const Constant *C = dyn_cast<Constant>(GV->getInitializer()); 1545 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 1546 DST = DAG.getConstant(CI->getValue(), Op.getValueType()); 1547 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { 1548 DST = DAG.getConstantFP(CF->getValueAPF(), 1549 Op.getValueType()); 1550 } else if (dyn_cast<ConstantAggregateZero>(C)) { 1551 EVT VT = Op.getValueType(); 1552 if (VT.isInteger()) { 1553 DST = DAG.getConstant(0, VT); 1554 } else { 1555 DST = DAG.getConstantFP(0, VT); 1556 } 1557 } else { 1558 assert(!"lowering this type of Global Address " 1559 "not implemented yet!"); 1560 C->dump(); 1561 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 1562 } 1563 } else { 1564 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 1565 } 1566 } 1567 return DST; 1568} 1569 1570SDValue 1571AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const 1572{ 1573 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1574 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); 1575 return Result; 1576} 1577SDValue 1578AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const 1579{ 1580 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1581 EVT PtrVT = Op.getValueType(); 1582 SDValue Result; 1583 if (CP->isMachineConstantPoolEntry()) { 1584 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1585 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 1586 } else { 1587 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1588 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 1589 } 1590 return Result; 1591} 1592 1593SDValue 1594AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const 1595{ 1596 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 1597 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); 1598 return Result; 1599} 1600 1601/// LowerFORMAL_ARGUMENTS - transform physical registers into 1602/// virtual registers and generate load operations for 1603/// arguments places on the stack. 1604/// TODO: isVarArg, hasStructRet, isMemReg 1605 SDValue 1606AMDILTargetLowering::LowerFormalArguments(SDValue Chain, 1607 CallingConv::ID CallConv, 1608 bool isVarArg, 1609 const SmallVectorImpl<ISD::InputArg> &Ins, 1610 DebugLoc dl, 1611 SelectionDAG &DAG, 1612 SmallVectorImpl<SDValue> &InVals) 1613const 1614{ 1615 1616 MachineFunction &MF = DAG.getMachineFunction(); 1617 MachineFrameInfo *MFI = MF.getFrameInfo(); 1618 //const Function *Fn = MF.getFunction(); 1619 //MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1620 1621 SmallVector<CCValAssign, 16> ArgLocs; 1622 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 1623 //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); 1624 1625 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), 1626 getTargetMachine(), ArgLocs, *DAG.getContext()); 1627 1628 // When more calling conventions are added, they need to be chosen here 1629 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); 1630 SDValue StackPtr; 1631 1632 //unsigned int FirstStackArgLoc = 0; 1633 1634 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 1635 CCValAssign &VA = ArgLocs[i]; 1636 if (VA.isRegLoc()) { 1637 EVT RegVT = VA.getLocVT(); 1638 const TargetRegisterClass *RC = getRegClassFromType( 1639 RegVT.getSimpleVT().SimpleTy); 1640 1641 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); 1642 SDValue ArgValue = DAG.getCopyFromReg( 1643 Chain, 1644 dl, 1645 Reg, 1646 RegVT); 1647 // If this is an 8 or 16-bit value, it is really passed 1648 // promoted to 32 bits. Insert an assert[sz]ext to capture 1649 // this, then truncate to the right size. 1650 1651 if (VA.getLocInfo() == CCValAssign::SExt) { 1652 ArgValue = DAG.getNode( 1653 ISD::AssertSext, 1654 dl, 1655 RegVT, 1656 ArgValue, 1657 DAG.getValueType(VA.getValVT())); 1658 } else if (VA.getLocInfo() == CCValAssign::ZExt) { 1659 ArgValue = DAG.getNode( 1660 ISD::AssertZext, 1661 dl, 1662 RegVT, 1663 ArgValue, 1664 DAG.getValueType(VA.getValVT())); 1665 } 1666 if (VA.getLocInfo() != CCValAssign::Full) { 1667 ArgValue = DAG.getNode( 1668 ISD::TRUNCATE, 1669 dl, 1670 VA.getValVT(), 1671 ArgValue); 1672 } 1673 // Add the value to the list of arguments 1674 // to be passed in registers 1675 InVals.push_back(ArgValue); 1676 if (isVarArg) { 1677 assert(0 && "Variable arguments are not yet supported"); 1678 // See MipsISelLowering.cpp for ideas on how to implement 1679 } 1680 } else if(VA.isMemLoc()) { 1681 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, 1682 dl, DAG, VA, MFI, i)); 1683 } else { 1684 assert(0 && "found a Value Assign that is " 1685 "neither a register or a memory location"); 1686 } 1687 } 1688 /*if (hasStructRet) { 1689 assert(0 && "Has struct return is not yet implemented"); 1690 // See MipsISelLowering.cpp for ideas on how to implement 1691 }*/ 1692 1693 if (isVarArg) { 1694 assert(0 && "Variable arguments are not yet supported"); 1695 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement 1696 } 1697 // This needs to be changed to non-zero if the return function needs 1698 // to pop bytes 1699 return Chain; 1700} 1701/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1702/// by "Src" to address "Dst" with size and alignment information specified by 1703/// the specific parameter attribute. The copy will be passed as a byval 1704/// function parameter. 1705static SDValue 1706CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1707 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 1708 assert(0 && "MemCopy does not exist yet"); 1709 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1710 1711 return DAG.getMemcpy(Chain, 1712 Src.getDebugLoc(), 1713 Dst, Src, SizeNode, Flags.getByValAlign(), 1714 /*IsVol=*/false, /*AlwaysInline=*/true, 1715 MachinePointerInfo(), MachinePointerInfo()); 1716} 1717 1718SDValue 1719AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, 1720 SDValue StackPtr, SDValue Arg, 1721 DebugLoc dl, SelectionDAG &DAG, 1722 const CCValAssign &VA, 1723 ISD::ArgFlagsTy Flags) const 1724{ 1725 unsigned int LocMemOffset = VA.getLocMemOffset(); 1726 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1727 PtrOff = DAG.getNode(ISD::ADD, 1728 dl, 1729 getPointerTy(), StackPtr, PtrOff); 1730 if (Flags.isByVal()) { 1731 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1732 } else { 1733 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, 1734 MachinePointerInfo::getStack(LocMemOffset), 1735 false, false, 0); 1736 } 1737 return PtrOff; 1738} 1739/// LowerCAL - functions arguments are copied from virtual 1740/// regs to (physical regs)/(stack frame), CALLSEQ_START and 1741/// CALLSEQ_END are emitted. 1742/// TODO: isVarArg, isTailCall, hasStructRet 1743SDValue 1744AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1745 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, 1746 bool& isTailCall, 1747 const SmallVectorImpl<ISD::OutputArg> &Outs, 1748 const SmallVectorImpl<SDValue> &OutVals, 1749 const SmallVectorImpl<ISD::InputArg> &Ins, 1750 DebugLoc dl, SelectionDAG &DAG, 1751 SmallVectorImpl<SDValue> &InVals) 1752const 1753{ 1754 isTailCall = false; 1755 MachineFunction& MF = DAG.getMachineFunction(); 1756 // FIXME: DO we need to handle fast calling conventions and tail call 1757 // optimizations?? X86/PPC ISelLowering 1758 /*bool hasStructRet = (TheCall->getNumArgs()) 1759 ? TheCall->getArgFlags(0).device()->isSRet() 1760 : false;*/ 1761 1762 MachineFrameInfo *MFI = MF.getFrameInfo(); 1763 1764 // Analyze operands of the call, assigning locations to each operand 1765 SmallVector<CCValAssign, 16> ArgLocs; 1766 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1767 getTargetMachine(), ArgLocs, *DAG.getContext()); 1768 // Analyize the calling operands, but need to change 1769 // if we have more than one calling convetion 1770 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); 1771 1772 unsigned int NumBytes = CCInfo.getNextStackOffset(); 1773 if (isTailCall) { 1774 assert(isTailCall && "Tail Call not handled yet!"); 1775 // See X86/PPC ISelLowering 1776 } 1777 1778 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1779 1780 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass; 1781 SmallVector<SDValue, 8> MemOpChains; 1782 SDValue StackPtr; 1783 //unsigned int FirstStacArgLoc = 0; 1784 //int LastArgStackLoc = 0; 1785 1786 // Walk the register/memloc assignments, insert copies/loads 1787 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 1788 CCValAssign &VA = ArgLocs[i]; 1789 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers 1790 // Arguments start after the 5 first operands of ISD::CALL 1791 SDValue Arg = OutVals[i]; 1792 //Promote the value if needed 1793 switch(VA.getLocInfo()) { 1794 default: assert(0 && "Unknown loc info!"); 1795 case CCValAssign::Full: 1796 break; 1797 case CCValAssign::SExt: 1798 Arg = DAG.getNode(ISD::SIGN_EXTEND, 1799 dl, 1800 VA.getLocVT(), Arg); 1801 break; 1802 case CCValAssign::ZExt: 1803 Arg = DAG.getNode(ISD::ZERO_EXTEND, 1804 dl, 1805 VA.getLocVT(), Arg); 1806 break; 1807 case CCValAssign::AExt: 1808 Arg = DAG.getNode(ISD::ANY_EXTEND, 1809 dl, 1810 VA.getLocVT(), Arg); 1811 break; 1812 } 1813 1814 if (VA.isRegLoc()) { 1815 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1816 } else if (VA.isMemLoc()) { 1817 // Create the frame index object for this incoming parameter 1818 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 1819 VA.getLocMemOffset(), true); 1820 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); 1821 1822 // emit ISD::STORE whichs stores the 1823 // parameter value to a stack Location 1824 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 1825 MachinePointerInfo::getFixedStack(FI), 1826 false, false, 0)); 1827 } else { 1828 assert(0 && "Not a Reg/Mem Loc, major error!"); 1829 } 1830 } 1831 if (!MemOpChains.empty()) { 1832 Chain = DAG.getNode(ISD::TokenFactor, 1833 dl, 1834 MVT::Other, 1835 &MemOpChains[0], 1836 MemOpChains.size()); 1837 } 1838 SDValue InFlag; 1839 if (!isTailCall) { 1840 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 1841 Chain = DAG.getCopyToReg(Chain, 1842 dl, 1843 RegsToPass[i].first, 1844 RegsToPass[i].second, 1845 InFlag); 1846 InFlag = Chain.getValue(1); 1847 } 1848 } 1849 1850 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 1851 // every direct call is) turn it into a TargetGlobalAddress/ 1852 // TargetExternalSymbol 1853 // node so that legalize doesn't hack it. 1854 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1855 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); 1856 } 1857 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1858 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1859 } 1860 else if (isTailCall) { 1861 assert(0 && "Tail calls are not handled yet"); 1862 // see X86 ISelLowering for ideas on implementation: 1708 1863 } 1864 1865 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE); 1866 SmallVector<SDValue, 8> Ops; 1867 1868 if (isTailCall) { 1869 assert(0 && "Tail calls are not handled yet"); 1870 // see X86 ISelLowering for ideas on implementation: 1721 1871 } 1872 // If this is a direct call, pass the chain and the callee 1873 if (Callee.getNode()) { 1874 Ops.push_back(Chain); 1875 Ops.push_back(Callee); 1876 } 1877 1878 if (isTailCall) { 1879 assert(0 && "Tail calls are not handled yet"); 1880 // see X86 ISelLowering for ideas on implementation: 1739 1881 } 1882 1883 // Add argument registers to the end of the list so that they are known 1884 // live into the call 1885 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 1886 Ops.push_back(DAG.getRegister( 1887 RegsToPass[i].first, 1888 RegsToPass[i].second.getValueType())); 1889 } 1890 if (InFlag.getNode()) { 1891 Ops.push_back(InFlag); 1892 } 1893 1894 // Emit Tail Call 1895 if (isTailCall) { 1896 assert(0 && "Tail calls are not handled yet"); 1897 // see X86 ISelLowering for ideas on implementation: 1762 1898 } 1899 1900 Chain = DAG.getNode(AMDILISD::CALL, 1901 dl, 1902 NodeTys, &Ops[0], Ops.size()); 1903 InFlag = Chain.getValue(1); 1904 1905 // Create the CALLSEQ_END node 1906 Chain = DAG.getCALLSEQ_END( 1907 Chain, 1908 DAG.getIntPtrConstant(NumBytes, true), 1909 DAG.getIntPtrConstant(0, true), 1910 InFlag); 1911 InFlag = Chain.getValue(1); 1912 // Handle result values, copying them out of physregs into vregs that 1913 // we return 1914 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 1915 InVals); 1916} 1917 1918SDValue 1919AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG, 1920 uint32_t bits) const 1921{ 1922 DebugLoc DL = Op.getDebugLoc(); 1923 EVT INTTY = Op.getValueType(); 1924 EVT FPTY; 1925 if (INTTY.isVector()) { 1926 FPTY = EVT(MVT::getVectorVT(MVT::f32, 1927 INTTY.getVectorNumElements())); 1928 } else { 1929 FPTY = EVT(MVT::f32); 1930 } 1931 /* static inline uint 1932 __clz_Nbit(uint x) 1933 { 1934 int xor = 0x3f800000U | x; 1935 float tp = as_float(xor); 1936 float t = tp + -1.0f; 1937 uint tint = as_uint(t); 1938 int cmp = (x != 0); 1939 uint tsrc = tint >> 23; 1940 uint tmask = tsrc & 0xffU; 1941 uint cst = (103 + N)U - tmask; 1942 return cmp ? cst : N; 1943 } 1944 */ 1945 assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32 1946 && "genCLZu16 only works on 32bit types"); 1947 // uint x = Op 1948 SDValue x = Op; 1949 // xornode = 0x3f800000 | x 1950 SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY, 1951 DAG.getConstant(0x3f800000, INTTY), x); 1952 // float tp = as_float(xornode) 1953 SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode); 1954 // float t = tp + -1.0f 1955 SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp, 1956 DAG.getConstantFP(-1.0f, FPTY)); 1957 // uint tint = as_uint(t) 1958 SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t); 1959 // int cmp = (x != 0) 1960 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 1961 DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x, 1962 DAG.getConstant(0, INTTY)); 1963 // uint tsrc = tint >> 23 1964 SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint, 1965 DAG.getConstant(23, INTTY)); 1966 // uint tmask = tsrc & 0xFF 1967 SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc, 1968 DAG.getConstant(0xFFU, INTTY)); 1969 // uint cst = (103 + bits) - tmask 1970 SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY, 1971 DAG.getConstant((103U + bits), INTTY), tmask); 1972 // return cmp ? cst : N 1973 cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst, 1974 DAG.getConstant(bits, INTTY)); 1975 return cst; 1976} 1977 1978SDValue 1979AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const 1980{ 1981 SDValue DST = SDValue(); 1982 DebugLoc DL = Op.getDebugLoc(); 1983 EVT INTTY = Op.getValueType(); 1984 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 1985 &this->getTargetMachine())->getSubtargetImpl(); 1986 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 1987 //__clz_32bit(uint u) 1988 //{ 1989 // int z = __amdil_ffb_hi(u) ; 1990 // return z < 0 ? 32 : z; 1991 // } 1992 // uint u = op 1993 SDValue u = Op; 1994 // int z = __amdil_ffb_hi(u) 1995 SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u); 1996 // int cmp = z < 0 1997 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 1998 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 1999 z, DAG.getConstant(0, INTTY)); 2000 // return cmp ? 32 : z 2001 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, 2002 DAG.getConstant(32, INTTY), z); 2003 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2004 // static inline uint 2005 //__clz_32bit(uint x) 2006 //{ 2007 // uint zh = __clz_16bit(x >> 16); 2008 // uint zl = __clz_16bit(x & 0xffffU); 2009 // return zh == 16U ? 16U + zl : zh; 2010 //} 2011 // uint x = Op 2012 SDValue x = Op; 2013 // uint xs16 = x >> 16 2014 SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x, 2015 DAG.getConstant(16, INTTY)); 2016 // uint zh = __clz_16bit(xs16) 2017 SDValue zh = genCLZuN(xs16, DAG, 16); 2018 // uint xa16 = x & 0xFFFF 2019 SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x, 2020 DAG.getConstant(0xFFFFU, INTTY)); 2021 // uint zl = __clz_16bit(xa16) 2022 SDValue zl = genCLZuN(xa16, DAG, 16); 2023 // uint cmp = zh == 16U 2024 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2025 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2026 zh, DAG.getConstant(16U, INTTY)); 2027 // uint zl16 = zl + 16 2028 SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY, 2029 DAG.getConstant(16, INTTY), zl); 2030 // return cmp ? zl16 : zh 2031 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2032 cmp, zl16, zh); 2033 } else { 2034 assert(0 && "Attempting to generate a CLZ function with an" 2035 " unknown graphics card"); 2036 } 2037 return DST; 2038} 2039SDValue 2040AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const 2041{ 2042 SDValue DST = SDValue(); 2043 DebugLoc DL = Op.getDebugLoc(); 2044 EVT INTTY; 2045 EVT LONGTY = Op.getValueType(); 2046 bool isVec = LONGTY.isVector(); 2047 if (isVec) { 2048 INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType() 2049 .getVectorNumElements())); 2050 } else { 2051 INTTY = EVT(MVT::i32); 2052 } 2053 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2054 &this->getTargetMachine())->getSubtargetImpl(); 2055 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 2056 // Evergreen: 2057 // static inline uint 2058 // __clz_u64(ulong x) 2059 // { 2060 //uint zhi = __clz_32bit((uint)(x >> 32)); 2061 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL)); 2062 //return zhi == 32U ? 32U + zlo : zhi; 2063 //} 2064 //ulong x = op 2065 SDValue x = Op; 2066 // uint xhi = x >> 32 2067 SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2068 // uint xlo = x & 0xFFFFFFFF 2069 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x); 2070 // uint zhi = __clz_32bit(xhi) 2071 SDValue zhi = genCLZu32(xhi, DAG); 2072 // uint zlo = __clz_32bit(xlo) 2073 SDValue zlo = genCLZu32(xlo, DAG); 2074 // uint cmp = zhi == 32 2075 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2076 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2077 zhi, DAG.getConstant(32U, INTTY)); 2078 // uint zlop32 = 32 + zlo 2079 SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY, 2080 DAG.getConstant(32U, INTTY), zlo); 2081 // return cmp ? zlop32: zhi 2082 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi); 2083 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2084 // HD4XXX: 2085 // static inline uint 2086 //__clz_64bit(ulong x) 2087 //{ 2088 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U; 2089 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU); 2090 //uint zl = __clz_23bit((uint)x & 0x7fffffU); 2091 //uint r = zh == 18U ? 18U + zm : zh; 2092 //return zh + zm == 41U ? 41U + zl : r; 2093 //} 2094 //ulong x = Op 2095 SDValue x = Op; 2096 // ulong xs46 = x >> 46 2097 SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2098 DAG.getConstant(46, LONGTY)); 2099 // uint ixs46 = (uint)xs46 2100 SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46); 2101 // ulong xs23 = x >> 23 2102 SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2103 DAG.getConstant(23, LONGTY)); 2104 // uint ixs23 = (uint)xs23 2105 SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23); 2106 // uint xs23m23 = ixs23 & 0x7FFFFF 2107 SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23, 2108 DAG.getConstant(0x7fffffU, INTTY)); 2109 // uint ix = (uint)x 2110 SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2111 // uint xm23 = ix & 0x7FFFFF 2112 SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix, 2113 DAG.getConstant(0x7fffffU, INTTY)); 2114 // uint zh = __clz_23bit(ixs46) 2115 SDValue zh = genCLZuN(ixs46, DAG, 23); 2116 // uint zm = __clz_23bit(xs23m23) 2117 SDValue zm = genCLZuN(xs23m23, DAG, 23); 2118 // uint zl = __clz_23bit(xm23) 2119 SDValue zl = genCLZuN(xm23, DAG, 23); 2120 // uint zhm5 = zh - 5 2121 SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh, 2122 DAG.getConstant(-5U, INTTY)); 2123 SDValue const18 = DAG.getConstant(18, INTTY); 2124 SDValue const41 = DAG.getConstant(41, INTTY); 2125 // uint cmp1 = zh = 18 2126 SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2127 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2128 zhm5, const18); 2129 // uint zhm5zm = zhm5 + zh 2130 SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm); 2131 // uint cmp2 = zhm5zm == 41 2132 SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2133 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2134 zhm5zm, const41); 2135 // uint zmp18 = zhm5 + 18 2136 SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18); 2137 // uint zlp41 = zl + 41 2138 SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41); 2139 // uint r = cmp1 ? zmp18 : zh 2140 SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2141 cmp1, zmp18, zhm5); 2142 // return cmp2 ? zlp41 : r 2143 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r); 2144 } else { 2145 assert(0 && "Attempting to generate a CLZ function with an" 2146 " unknown graphics card"); 2147 } 2148 return DST; 2149} 2150SDValue 2151AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG, 2152 bool includeSign) const 2153{ 2154 EVT INTVT; 2155 EVT LONGVT; 2156 SDValue DST; 2157 DebugLoc DL = RHS.getDebugLoc(); 2158 EVT RHSVT = RHS.getValueType(); 2159 bool isVec = RHSVT.isVector(); 2160 if (isVec) { 2161 LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT 2162 .getVectorNumElements())); 2163 INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT 2164 .getVectorNumElements())); 2165 } else { 2166 LONGVT = EVT(MVT::i64); 2167 INTVT = EVT(MVT::i32); 2168 } 2169 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2170 &this->getTargetMachine())->getSubtargetImpl(); 2171 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2172 // unsigned version: 2173 // uint uhi = (uint)(d * 0x1.0p-32); 2174 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d)); 2175 // return as_ulong2((uint2)(ulo, uhi)); 2176 // 2177 // signed version: 2178 // double ad = fabs(d); 2179 // long l = unsigned_version(ad); 2180 // long nl = -l; 2181 // return d == ad ? l : nl; 2182 SDValue d = RHS; 2183 if (includeSign) { 2184 d = DAG.getNode(ISD::FABS, DL, RHSVT, d); 2185 } 2186 SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d, 2187 DAG.getConstantFP(0x2f800000, RHSVT)); 2188 SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid); 2189 SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi); 2190 ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod, 2191 DAG.getConstantFP(0xcf800000, RHSVT), d); 2192 SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod); 2193 SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi); 2194 if (includeSign) { 2195 SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l); 2196 SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT, 2197 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32), 2198 RHS, d); 2199 l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl); 2200 } 2201 DST = l; 2202 } else { 2203 /* 2204 __attribute__((always_inline)) long 2205 cast_f64_to_i64(double d) 2206 { 2207 // Convert d in to 32-bit components 2208 long x = as_long(d); 2209 xhi = LCOMPHI(x); 2210 xlo = LCOMPLO(x); 2211 2212 // Generate 'normalized' mantissa 2213 mhi = xhi | 0x00100000; // hidden bit 2214 mhi <<= 11; 2215 temp = xlo >> (32 - 11); 2216 mhi |= temp 2217 mlo = xlo << 11; 2218 2219 // Compute shift right count from exponent 2220 e = (xhi >> (52-32)) & 0x7ff; 2221 sr = 1023 + 63 - e; 2222 srge64 = sr >= 64; 2223 srge32 = sr >= 32; 2224 2225 // Compute result for 0 <= sr < 32 2226 rhi0 = mhi >> (sr &31); 2227 rlo0 = mlo >> (sr &31); 2228 temp = mhi << (32 - sr); 2229 temp |= rlo0; 2230 rlo0 = sr ? temp : rlo0; 2231 2232 // Compute result for 32 <= sr 2233 rhi1 = 0; 2234 rlo1 = srge64 ? 0 : rhi0; 2235 2236 // Pick between the 2 results 2237 rhi = srge32 ? rhi1 : rhi0; 2238 rlo = srge32 ? rlo1 : rlo0; 2239 2240 // Optional saturate on overflow 2241 srlt0 = sr < 0; 2242 rhi = srlt0 ? MAXVALUE : rhi; 2243 rlo = srlt0 ? MAXVALUE : rlo; 2244 2245 // Create long 2246 res = LCREATE( rlo, rhi ); 2247 2248 // Deal with sign bit (ignoring whether result is signed or unsigned value) 2249 if (includeSign) { 2250 sign = ((signed int) xhi) >> 31; fill with sign bit 2251 sign = LCREATE( sign, sign ); 2252 res += sign; 2253 res ^= sign; 2254 } 2255 2256 return res; 2257 } 2258 */ 2259 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 2260 SDValue c32 = DAG.getConstant( 32, INTVT ); 2261 2262 // Convert d in to 32-bit components 2263 SDValue d = RHS; 2264 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 2265 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 2266 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 2267 2268 // Generate 'normalized' mantissa 2269 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 2270 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 2271 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 2272 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 2273 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 2274 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 2275 SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 ); 2276 2277 // Compute shift right count from exponent 2278 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 2279 xhi, DAG.getConstant( 52-32, INTVT ) ); 2280 e = DAG.getNode( ISD::AND, DL, INTVT, 2281 e, DAG.getConstant( 0x7ff, INTVT ) ); 2282 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 2283 DAG.getConstant( 1023 + 63, INTVT ), e ); 2284 SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 2285 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 2286 sr, DAG.getConstant(64, INTVT)); 2287 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 2288 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 2289 sr, DAG.getConstant(32, INTVT)); 2290 2291 // Compute result for 0 <= sr < 32 2292 SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 2293 SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr ); 2294 temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr ); 2295 temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp ); 2296 temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp ); 2297 rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 ); 2298 2299 // Compute result for 32 <= sr 2300 SDValue rhi1 = DAG.getConstant( 0, INTVT ); 2301 SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2302 srge64, rhi1, rhi0 ); 2303 2304 // Pick between the 2 results 2305 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2306 srge32, rhi1, rhi0 ); 2307 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2308 srge32, rlo1, rlo0 ); 2309 2310 // Create long 2311 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 2312 2313 // Deal with sign bit 2314 if (includeSign) { 2315 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 2316 xhi, DAG.getConstant( 31, INTVT ) ); 2317 sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign ); 2318 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign ); 2319 res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign ); 2320 } 2321 DST = res; 2322 } 2323 return DST; 2324} 2325SDValue 2326AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG, 2327 bool includeSign) const 2328{ 2329 EVT INTVT; 2330 EVT LONGVT; 2331 DebugLoc DL = RHS.getDebugLoc(); 2332 EVT RHSVT = RHS.getValueType(); 2333 bool isVec = RHSVT.isVector(); 2334 if (isVec) { 2335 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 2336 RHSVT.getVectorNumElements())); 2337 INTVT = EVT(MVT::getVectorVT(MVT::i32, 2338 RHSVT.getVectorNumElements())); 2339 } else { 2340 LONGVT = EVT(MVT::i64); 2341 INTVT = EVT(MVT::i32); 2342 } 2343 /* 2344 __attribute__((always_inline)) int 2345 cast_f64_to_[u|i]32(double d) 2346 { 2347 // Convert d in to 32-bit components 2348 long x = as_long(d); 2349 xhi = LCOMPHI(x); 2350 xlo = LCOMPLO(x); 2351 2352 // Generate 'normalized' mantissa 2353 mhi = xhi | 0x00100000; // hidden bit 2354 mhi <<= 11; 2355 temp = xlo >> (32 - 11); 2356 mhi |= temp 2357 2358 // Compute shift right count from exponent 2359 e = (xhi >> (52-32)) & 0x7ff; 2360 sr = 1023 + 31 - e; 2361 srge32 = sr >= 32; 2362 2363 // Compute result for 0 <= sr < 32 2364 res = mhi >> (sr &31); 2365 res = srge32 ? 0 : res; 2366 2367 // Optional saturate on overflow 2368 srlt0 = sr < 0; 2369 res = srlt0 ? MAXVALUE : res; 2370 2371 // Deal with sign bit (ignoring whether result is signed or unsigned value) 2372 if (includeSign) { 2373 sign = ((signed int) xhi) >> 31; fill with sign bit 2374 res += sign; 2375 res ^= sign; 2376 } 2377 2378 return res; 2379 } 2380 */ 2381 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 2382 2383 // Convert d in to 32-bit components 2384 SDValue d = RHS; 2385 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 2386 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 2387 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 2388 2389 // Generate 'normalized' mantissa 2390 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 2391 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 2392 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 2393 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 2394 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 2395 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 2396 2397 // Compute shift right count from exponent 2398 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 2399 xhi, DAG.getConstant( 52-32, INTVT ) ); 2400 e = DAG.getNode( ISD::AND, DL, INTVT, 2401 e, DAG.getConstant( 0x7ff, INTVT ) ); 2402 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 2403 DAG.getConstant( 1023 + 31, INTVT ), e ); 2404 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 2405 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 2406 sr, DAG.getConstant(32, INTVT)); 2407 2408 // Compute result for 0 <= sr < 32 2409 SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 2410 res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2411 srge32, DAG.getConstant(0,INTVT), res ); 2412 2413 // Deal with sign bit 2414 if (includeSign) { 2415 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 2416 xhi, DAG.getConstant( 31, INTVT ) ); 2417 res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign ); 2418 res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign ); 2419 } 2420 return res; 2421} 2422 2423SDValue 2424AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const 2425{ 2426 SDValue DST; 2427 SDValue RHS = Op.getOperand(0); 2428 EVT RHSVT = RHS.getValueType(); 2429 MVT RST = RHSVT.getScalarType().getSimpleVT(); 2430 EVT LHSVT = Op.getValueType(); 2431 MVT LST = LHSVT.getScalarType().getSimpleVT(); 2432 DebugLoc DL = Op.getDebugLoc(); 2433 const AMDILTargetMachine* 2434 amdtm = reinterpret_cast<const AMDILTargetMachine*> 2435 (&this->getTargetMachine()); 2436 const AMDILSubtarget* 2437 stm = static_cast<const AMDILSubtarget*>( 2438 amdtm->getSubtargetImpl()); 2439 if (RST == MVT::f64 && RHSVT.isVector() 2440 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2441 // We dont support vector 64bit floating point convertions. 2442 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { 2443 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2444 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 2445 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); 2446 if (!x) { 2447 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 2448 } else { 2449 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, 2450 DST, op, DAG.getTargetConstant(x, MVT::i32)); 2451 } 2452 2453 } 2454 } else { 2455 if (RST == MVT::f64 2456 && LST == MVT::i32) { 2457 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2458 DST = SDValue(Op.getNode(), 0); 2459 } else { 2460 DST = genf64toi32(RHS, DAG, false); 2461 } 2462 } else if (RST == MVT::f64 2463 && LST == MVT::i64) { 2464 DST = genf64toi64(RHS, DAG, false); 2465 } else if (RST == MVT::f64 2466 && (LST == MVT::i8 || LST == MVT::i16)) { 2467 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2468 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); 2469 } else { 2470 SDValue ToInt = genf64toi32(RHS, DAG, false); 2471 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); 2472 } 2473 2474 } else { 2475 DST = SDValue(Op.getNode(), 0); 2476 } 2477 } 2478 return DST; 2479} 2480SDValue 2481AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT, 2482 SelectionDAG &DAG) const 2483{ 2484 EVT RHSVT = RHS.getValueType(); 2485 DebugLoc DL = RHS.getDebugLoc(); 2486 EVT INTVT; 2487 EVT LONGVT; 2488 bool isVec = RHSVT.isVector(); 2489 if (isVec) { 2490 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 2491 RHSVT.getVectorNumElements())); 2492 INTVT = EVT(MVT::getVectorVT(MVT::i32, 2493 RHSVT.getVectorNumElements())); 2494 } else { 2495 LONGVT = EVT(MVT::i64); 2496 INTVT = EVT(MVT::i32); 2497 } 2498 SDValue x = RHS; 2499 const AMDILTargetMachine* 2500 amdtm = reinterpret_cast<const AMDILTargetMachine*> 2501 (&this->getTargetMachine()); 2502 const AMDILSubtarget* 2503 stm = static_cast<const AMDILSubtarget*>( 2504 amdtm->getSubtargetImpl()); 2505 if (stm->calVersion() >= CAL_VERSION_SC_135) { 2506 // unsigned x = RHS; 2507 // ulong xd = (ulong)(0x4330_0000 << 32) | x; 2508 // double d = as_double( xd ); 2509 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000 2510 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x, 2511 DAG.getConstant( 0x43300000, INTVT ) ); 2512 SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 2513 SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT, 2514 DAG.getConstant( 0x4330000000000000ULL, LONGVT ) ); 2515 return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd ); 2516 } else { 2517 SDValue clz = genCLZu32(x, DAG); 2518 2519 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2 2520 // Except for an input 0... which requires a 0 exponent 2521 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 2522 DAG.getConstant( (1023+31), INTVT), clz ); 2523 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x ); 2524 2525 // Normalize frac 2526 SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz ); 2527 2528 // Eliminate hidden bit 2529 rhi = DAG.getNode( ISD::AND, DL, INTVT, 2530 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 2531 2532 // Pack exponent and frac 2533 SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT, 2534 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 2535 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 2536 rhi, DAG.getConstant( 11, INTVT ) ); 2537 exp = DAG.getNode( ISD::SHL, DL, INTVT, 2538 exp, DAG.getConstant( 20, INTVT ) ); 2539 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 2540 2541 // Convert 2 x 32 in to 1 x 64, then to double precision float type 2542 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 2543 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 2544 } 2545} 2546SDValue 2547AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT, 2548 SelectionDAG &DAG) const 2549{ 2550 EVT RHSVT = RHS.getValueType(); 2551 DebugLoc DL = RHS.getDebugLoc(); 2552 EVT INTVT; 2553 EVT LONGVT; 2554 bool isVec = RHSVT.isVector(); 2555 if (isVec) { 2556 INTVT = EVT(MVT::getVectorVT(MVT::i32, 2557 RHSVT.getVectorNumElements())); 2558 } else { 2559 INTVT = EVT(MVT::i32); 2560 } 2561 LONGVT = RHSVT; 2562 SDValue x = RHS; 2563 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2564 &this->getTargetMachine())->getSubtargetImpl(); 2565 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2566 // double dhi = (double)(as_uint2(x).y); 2567 // double dlo = (double)(as_uint2(x).x); 2568 // return mad(dhi, 0x1.0p+32, dlo) 2569 SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x); 2570 dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi); 2571 SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x); 2572 dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo); 2573 return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi, 2574 DAG.getConstantFP(0x4f800000, LHSVT), dlo); 2575 } else if (stm->calVersion() >= CAL_VERSION_SC_135) { 2576 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL)); 2577 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32)); 2578 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo; 2579 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL 2580 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) ); 2581 SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 2582 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32 2583 SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) ); 2584 SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe ); 2585 SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT, 2586 DAG.getConstant( 0x4530000000100000ULL, LONGVT ) ); 2587 hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c ); 2588 return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo ); 2589 2590 } else { 2591 SDValue clz = genCLZu64(x, DAG); 2592 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 2593 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 2594 2595 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2 2596 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 2597 DAG.getConstant( (1023+63), INTVT), clz ); 2598 SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo ); 2599 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2600 mash, exp, mash ); // exp = exp, or 0 if input was 0 2601 2602 // Normalize frac 2603 SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT, 2604 clz, DAG.getConstant( 31, INTVT ) ); 2605 SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT, 2606 DAG.getConstant( 32, INTVT ), clz31 ); 2607 SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 ); 2608 SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift ); 2609 t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 ); 2610 SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 ); 2611 SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 2612 SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 2613 SDValue rlo2 = DAG.getConstant( 0, INTVT ); 2614 SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT, 2615 clz, DAG.getConstant( 32, INTVT ) ); 2616 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2617 clz32, rhi2, rhi1 ); 2618 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 2619 clz32, rlo2, rlo1 ); 2620 2621 // Eliminate hidden bit 2622 rhi = DAG.getNode( ISD::AND, DL, INTVT, 2623 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 2624 2625 // Save bits needed to round properly 2626 SDValue round = DAG.getNode( ISD::AND, DL, INTVT, 2627 rlo, DAG.getConstant( 0x7ff, INTVT ) ); 2628 2629 // Pack exponent and frac 2630 rlo = DAG.getNode( ISD::SRL, DL, INTVT, 2631 rlo, DAG.getConstant( 11, INTVT ) ); 2632 SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT, 2633 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 2634 rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp ); 2635 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 2636 rhi, DAG.getConstant( 11, INTVT ) ); 2637 exp = DAG.getNode( ISD::SHL, DL, INTVT, 2638 exp, DAG.getConstant( 20, INTVT ) ); 2639 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 2640 2641 // Compute rounding bit 2642 SDValue even = DAG.getNode( ISD::AND, DL, INTVT, 2643 rlo, DAG.getConstant( 1, INTVT ) ); 2644 SDValue grs = DAG.getNode( ISD::AND, DL, INTVT, 2645 round, DAG.getConstant( 0x3ff, INTVT ) ); 2646 grs = DAG.getNode( AMDILISD::CMP, DL, INTVT, 2647 DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32), 2648 grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none 2649 grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even ); 2650 round = DAG.getNode( ISD::SRL, DL, INTVT, 2651 round, DAG.getConstant( 10, INTVT ) ); 2652 round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1 2653 2654 // Add rounding bit 2655 SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, 2656 round, DAG.getConstant( 0, INTVT ) ); 2657 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 2658 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround ); 2659 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 2660 } 2661} 2662SDValue 2663AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const 2664{ 2665 SDValue RHS = Op.getOperand(0); 2666 EVT RHSVT = RHS.getValueType(); 2667 MVT RST = RHSVT.getScalarType().getSimpleVT(); 2668 EVT LHSVT = Op.getValueType(); 2669 MVT LST = LHSVT.getScalarType().getSimpleVT(); 2670 DebugLoc DL = Op.getDebugLoc(); 2671 SDValue DST; 2672 EVT INTVT; 2673 EVT LONGVT; 2674 const AMDILTargetMachine* 2675 amdtm = reinterpret_cast<const AMDILTargetMachine*> 2676 (&this->getTargetMachine()); 2677 const AMDILSubtarget* 2678 stm = static_cast<const AMDILSubtarget*>( 2679 amdtm->getSubtargetImpl()); 2680 if (LST == MVT::f64 && LHSVT.isVector() 2681 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2682 // We dont support vector 64bit floating point convertions. 2683 DST = Op; 2684 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { 2685 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2686 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 2687 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); 2688 if (!x) { 2689 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 2690 } else { 2691 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, 2692 op, DAG.getTargetConstant(x, MVT::i32)); 2693 } 2694 2695 } 2696 } else { 2697 2698 if (RST == MVT::i32 2699 && LST == MVT::f64) { 2700 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2701 DST = SDValue(Op.getNode(), 0); 2702 } else { 2703 DST = genu32tof64(RHS, LHSVT, DAG); 2704 } 2705 } else if (RST == MVT::i64 2706 && LST == MVT::f64) { 2707 DST = genu64tof64(RHS, LHSVT, DAG); 2708 } else { 2709 DST = SDValue(Op.getNode(), 0); 2710 } 2711 } 2712 return DST; 2713} 2714 2715SDValue 2716AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const 2717{ 2718 SDValue LHS = Op.getOperand(0); 2719 SDValue RHS = Op.getOperand(1); 2720 DebugLoc DL = Op.getDebugLoc(); 2721 EVT OVT = Op.getValueType(); 2722 SDValue DST; 2723 bool isVec = RHS.getValueType().isVector(); 2724 if (OVT.getScalarType() == MVT::i64) { 2725 /*const AMDILTargetMachine* 2726 amdtm = reinterpret_cast<const AMDILTargetMachine*> 2727 (&this->getTargetMachine()); 2728 const AMDILSubtarget* 2729 stm = dynamic_cast<const AMDILSubtarget*>( 2730 amdtm->getSubtargetImpl());*/ 2731 MVT INTTY = MVT::i32; 2732 if (OVT == MVT::v2i64) { 2733 INTTY = MVT::v2i32; 2734 } 2735 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; 2736 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 2737 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); 2738 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); 2739 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); 2740 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); 2741 INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO); 2742 INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI); 2743 //TODO: need to use IBORROW on HD5XXX and later hardware 2744 SDValue cmp; 2745 if (OVT == MVT::i64) { 2746 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2747 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 2748 LHSLO, RHSLO); 2749 } else { 2750 SDValue cmplo; 2751 SDValue cmphi; 2752 SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2753 DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32)); 2754 SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2755 DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32)); 2756 SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2757 DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32)); 2758 SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 2759 DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32)); 2760 cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 2761 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 2762 LHSRLO, RHSRLO); 2763 cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 2764 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 2765 LHSRHI, RHSRHI); 2766 cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo); 2767 cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32, 2768 cmp, cmphi, DAG.getTargetConstant(1, MVT::i32)); 2769 } 2770 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); 2771 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, 2772 INTLO, INTHI); 2773 } else { 2774 DST = SDValue(Op.getNode(), 0); 2775 } 2776 return DST; 2777} 2778SDValue 2779AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const 2780{ 2781 EVT OVT = Op.getValueType(); 2782 SDValue DST; 2783 if (OVT.getScalarType() == MVT::f64) { 2784 DST = LowerFDIV64(Op, DAG); 2785 } else if (OVT.getScalarType() == MVT::f32) { 2786 DST = LowerFDIV32(Op, DAG); 2787 } else { 2788 DST = SDValue(Op.getNode(), 0); 2789 } 2790 return DST; 2791} 2792 2793SDValue 2794AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const 2795{ 2796 EVT OVT = Op.getValueType(); 2797 SDValue DST; 2798 if (OVT.getScalarType() == MVT::i64) { 2799 DST = LowerSDIV64(Op, DAG); 2800 } else if (OVT.getScalarType() == MVT::i32) { 2801 DST = LowerSDIV32(Op, DAG); 2802 } else if (OVT.getScalarType() == MVT::i16 2803 || OVT.getScalarType() == MVT::i8) { 2804 DST = LowerSDIV24(Op, DAG); 2805 } else { 2806 DST = SDValue(Op.getNode(), 0); 2807 } 2808 return DST; 2809} 2810 2811SDValue 2812AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const 2813{ 2814 EVT OVT = Op.getValueType(); 2815 SDValue DST; 2816 if (OVT.getScalarType() == MVT::i64) { 2817 DST = LowerSREM64(Op, DAG); 2818 } else if (OVT.getScalarType() == MVT::i32) { 2819 DST = LowerSREM32(Op, DAG); 2820 } else if (OVT.getScalarType() == MVT::i16) { 2821 DST = LowerSREM16(Op, DAG); 2822 } else if (OVT.getScalarType() == MVT::i8) { 2823 DST = LowerSREM8(Op, DAG); 2824 } else { 2825 DST = SDValue(Op.getNode(), 0); 2826 } 2827 return DST; 2828} 2829 2830SDValue 2831AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const 2832{ 2833 EVT OVT = Op.getValueType(); 2834 SDValue DST; 2835 if (OVT.getScalarType() == MVT::i64) { 2836 DST = LowerUREM64(Op, DAG); 2837 } else if (OVT.getScalarType() == MVT::i32) { 2838 DST = LowerUREM32(Op, DAG); 2839 } else if (OVT.getScalarType() == MVT::i16) { 2840 DST = LowerUREM16(Op, DAG); 2841 } else if (OVT.getScalarType() == MVT::i8) { 2842 DST = LowerUREM8(Op, DAG); 2843 } else { 2844 DST = SDValue(Op.getNode(), 0); 2845 } 2846 return DST; 2847} 2848 2849SDValue 2850AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const 2851{ 2852 DebugLoc DL = Op.getDebugLoc(); 2853 EVT OVT = Op.getValueType(); 2854 SDValue DST; 2855 bool isVec = OVT.isVector(); 2856 if (OVT.getScalarType() != MVT::i64) 2857 { 2858 DST = SDValue(Op.getNode(), 0); 2859 } else { 2860 assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!"); 2861 // TODO: This needs to be turned into a tablegen pattern 2862 SDValue LHS = Op.getOperand(0); 2863 SDValue RHS = Op.getOperand(1); 2864 2865 MVT INTTY = MVT::i32; 2866 if (OVT == MVT::v2i64) { 2867 INTTY = MVT::v2i32; 2868 } 2869 // mul64(h1, l1, h0, l0) 2870 SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 2871 DL, 2872 INTTY, LHS); 2873 SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 2874 DL, 2875 INTTY, LHS); 2876 SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 2877 DL, 2878 INTTY, RHS); 2879 SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 2880 DL, 2881 INTTY, RHS); 2882 // MULLO_UINT_1 r1, h0, l1 2883 SDValue RHILLO = DAG.getNode(AMDILISD::UMUL, 2884 DL, 2885 INTTY, RHSHI, LHSLO); 2886 // MULLO_UINT_1 r2, h1, l0 2887 SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL, 2888 DL, 2889 INTTY, RHSLO, LHSHI); 2890 // ADD_INT hr, r1, r2 2891 SDValue ADDHI = DAG.getNode(ISD::ADD, 2892 DL, 2893 INTTY, RHILLO, RLOHHI); 2894 // MULHI_UINT_1 r3, l1, l0 2895 SDValue RLOLLO = DAG.getNode(ISD::MULHU, 2896 DL, 2897 INTTY, RHSLO, LHSLO); 2898 // ADD_INT hr, hr, r3 2899 SDValue HIGH = DAG.getNode(ISD::ADD, 2900 DL, 2901 INTTY, ADDHI, RLOLLO); 2902 // MULLO_UINT_1 l3, l1, l0 2903 SDValue LOW = DAG.getNode(AMDILISD::UMUL, 2904 DL, 2905 INTTY, LHSLO, RHSLO); 2906 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, 2907 DL, 2908 OVT, LOW, HIGH); 2909 } 2910 return DST; 2911} 2912SDValue 2913AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const 2914{ 2915 EVT VT = Op.getValueType(); 2916 SDValue Nodes1; 2917 SDValue second; 2918 SDValue third; 2919 SDValue fourth; 2920 DebugLoc DL = Op.getDebugLoc(); 2921 Nodes1 = DAG.getNode(AMDILISD::VBUILD, 2922 DL, 2923 VT, Op.getOperand(0)); 2924#if 0 2925 bool allEqual = true; 2926 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { 2927 if (Op.getOperand(0) != Op.getOperand(x)) { 2928 allEqual = false; 2929 break; 2930 } 2931 } 2932 if (allEqual) { 2933 return Nodes1; 2934 } 2935#endif 2936 switch(Op.getNumOperands()) { 2937 default: 2938 case 1: 2939 break; 2940 case 4: 2941 fourth = Op.getOperand(3); 2942 if (fourth.getOpcode() != ISD::UNDEF) { 2943 Nodes1 = DAG.getNode( 2944 ISD::INSERT_VECTOR_ELT, 2945 DL, 2946 Op.getValueType(), 2947 Nodes1, 2948 fourth, 2949 DAG.getConstant(7, MVT::i32)); 2950 } 2951 case 3: 2952 third = Op.getOperand(2); 2953 if (third.getOpcode() != ISD::UNDEF) { 2954 Nodes1 = DAG.getNode( 2955 ISD::INSERT_VECTOR_ELT, 2956 DL, 2957 Op.getValueType(), 2958 Nodes1, 2959 third, 2960 DAG.getConstant(6, MVT::i32)); 2961 } 2962 case 2: 2963 second = Op.getOperand(1); 2964 if (second.getOpcode() != ISD::UNDEF) { 2965 Nodes1 = DAG.getNode( 2966 ISD::INSERT_VECTOR_ELT, 2967 DL, 2968 Op.getValueType(), 2969 Nodes1, 2970 second, 2971 DAG.getConstant(5, MVT::i32)); 2972 } 2973 break; 2974 }; 2975 return Nodes1; 2976} 2977 2978SDValue 2979AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 2980 SelectionDAG &DAG) const 2981{ 2982 DebugLoc DL = Op.getDebugLoc(); 2983 EVT VT = Op.getValueType(); 2984 const SDValue *ptr = NULL; 2985 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 2986 uint32_t swizzleNum = 0; 2987 SDValue DST; 2988 if (!VT.isVector()) { 2989 SDValue Res = Op.getOperand(0); 2990 return Res; 2991 } 2992 2993 if (Op.getOperand(1).getOpcode() != ISD::UNDEF) { 2994 ptr = &Op.getOperand(1); 2995 } else { 2996 ptr = &Op.getOperand(0); 2997 } 2998 if (CSDN) { 2999 swizzleNum = (uint32_t)CSDN->getZExtValue(); 3000 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 3001 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 3002 DST = DAG.getNode(AMDILISD::VINSERT, 3003 DL, 3004 VT, 3005 Op.getOperand(0), 3006 *ptr, 3007 DAG.getTargetConstant(mask2, MVT::i32), 3008 DAG.getTargetConstant(mask3, MVT::i32)); 3009 } else { 3010 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 3011 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 3012 SDValue res = DAG.getNode(AMDILISD::VINSERT, 3013 DL, VT, Op.getOperand(0), *ptr, 3014 DAG.getTargetConstant(mask2, MVT::i32), 3015 DAG.getTargetConstant(mask3, MVT::i32)); 3016 for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) { 3017 mask2 = 0x04030201 & ~(0xFF << (x * 8)); 3018 mask3 = 0x01010101 & (0xFF << (x * 8)); 3019 SDValue t = DAG.getNode(AMDILISD::VINSERT, 3020 DL, VT, Op.getOperand(0), *ptr, 3021 DAG.getTargetConstant(mask2, MVT::i32), 3022 DAG.getTargetConstant(mask3, MVT::i32)); 3023 SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(), 3024 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 3025 Op.getOperand(2), DAG.getConstant(x, MVT::i32)); 3026 c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c); 3027 res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res); 3028 } 3029 DST = res; 3030 } 3031 return DST; 3032} 3033 3034SDValue 3035AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 3036 SelectionDAG &DAG) const 3037{ 3038 EVT VT = Op.getValueType(); 3039 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 3040 uint64_t swizzleNum = 0; 3041 DebugLoc DL = Op.getDebugLoc(); 3042 SDValue Res; 3043 if (!Op.getOperand(0).getValueType().isVector()) { 3044 Res = Op.getOperand(0); 3045 return Res; 3046 } 3047 if (CSDN) { 3048 // Static vector extraction 3049 swizzleNum = CSDN->getZExtValue() + 1; 3050 Res = DAG.getNode(AMDILISD::VEXTRACT, 3051 DL, VT, 3052 Op.getOperand(0), 3053 DAG.getTargetConstant(swizzleNum, MVT::i32)); 3054 } else { 3055 SDValue Op1 = Op.getOperand(1); 3056 uint32_t vecSize = 4; 3057 SDValue Op0 = Op.getOperand(0); 3058 SDValue res = DAG.getNode(AMDILISD::VEXTRACT, 3059 DL, VT, Op0, 3060 DAG.getTargetConstant(1, MVT::i32)); 3061 if (Op0.getValueType().isVector()) { 3062 vecSize = Op0.getValueType().getVectorNumElements(); 3063 } 3064 for (uint32_t x = 2; x <= vecSize; ++x) { 3065 SDValue t = DAG.getNode(AMDILISD::VEXTRACT, 3066 DL, VT, Op0, 3067 DAG.getTargetConstant(x, MVT::i32)); 3068 SDValue c = DAG.getNode(AMDILISD::CMP, 3069 DL, Op1.getValueType(), 3070 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 3071 Op1, DAG.getConstant(x, MVT::i32)); 3072 res = DAG.getNode(AMDILISD::CMOVLOG, DL, 3073 VT, c, t, res); 3074 3075 } 3076 Res = res; 3077 } 3078 return Res; 3079} 3080 3081SDValue 3082AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 3083 SelectionDAG &DAG) const 3084{ 3085 uint32_t vecSize = Op.getValueType().getVectorNumElements(); 3086 SDValue src = Op.getOperand(0); 3087 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 3088 uint64_t offset = 0; 3089 EVT vecType = Op.getValueType().getVectorElementType(); 3090 DebugLoc DL = Op.getDebugLoc(); 3091 SDValue Result; 3092 if (CSDN) { 3093 offset = CSDN->getZExtValue(); 3094 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3095 DL,vecType, src, DAG.getConstant(offset, MVT::i32)); 3096 Result = DAG.getNode(AMDILISD::VBUILD, DL, 3097 Op.getValueType(), Result); 3098 for (uint32_t x = 1; x < vecSize; ++x) { 3099 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 3100 src, DAG.getConstant(offset + x, MVT::i32)); 3101 if (elt.getOpcode() != ISD::UNDEF) { 3102 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 3103 Op.getValueType(), Result, elt, 3104 DAG.getConstant(x, MVT::i32)); 3105 } 3106 } 3107 } else { 3108 SDValue idx = Op.getOperand(1); 3109 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3110 DL, vecType, src, idx); 3111 Result = DAG.getNode(AMDILISD::VBUILD, DL, 3112 Op.getValueType(), Result); 3113 for (uint32_t x = 1; x < vecSize; ++x) { 3114 idx = DAG.getNode(ISD::ADD, DL, vecType, 3115 idx, DAG.getConstant(1, MVT::i32)); 3116 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 3117 src, idx); 3118 if (elt.getOpcode() != ISD::UNDEF) { 3119 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 3120 Op.getValueType(), Result, elt, idx); 3121 } 3122 } 3123 } 3124 return Result; 3125} 3126SDValue 3127AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 3128 SelectionDAG &DAG) const 3129{ 3130 SDValue Res = DAG.getNode(AMDILISD::VBUILD, 3131 Op.getDebugLoc(), 3132 Op.getValueType(), 3133 Op.getOperand(0)); 3134 return Res; 3135} 3136SDValue 3137AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const 3138{ 3139 SDValue Cond = Op.getOperand(0); 3140 SDValue LHS = Op.getOperand(1); 3141 SDValue RHS = Op.getOperand(2); 3142 DebugLoc DL = Op.getDebugLoc(); 3143 Cond = getConversionNode(DAG, Cond, Op, true); 3144 Cond = DAG.getNode(AMDILISD::CMOVLOG, 3145 DL, 3146 Op.getValueType(), Cond, LHS, RHS); 3147 return Cond; 3148} 3149SDValue 3150AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 3151{ 3152 SDValue Cond; 3153 SDValue LHS = Op.getOperand(0); 3154 SDValue RHS = Op.getOperand(1); 3155 SDValue CC = Op.getOperand(2); 3156 DebugLoc DL = Op.getDebugLoc(); 3157 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3158 unsigned int AMDILCC = CondCCodeToCC( 3159 SetCCOpcode, 3160 LHS.getValueType().getSimpleVT().SimpleTy); 3161 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 3162 Cond = DAG.getNode( 3163 ISD::SELECT_CC, 3164 Op.getDebugLoc(), 3165 LHS.getValueType(), 3166 LHS, RHS, 3167 DAG.getConstant(-1, MVT::i32), 3168 DAG.getConstant(0, MVT::i32), 3169 CC); 3170 Cond = getConversionNode(DAG, Cond, Op, true); 3171 Cond = DAG.getNode( 3172 ISD::AND, 3173 DL, 3174 Cond.getValueType(), 3175 DAG.getConstant(1, Cond.getValueType()), 3176 Cond); 3177 return Cond; 3178} 3179 3180SDValue 3181AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const 3182{ 3183 SDValue Data = Op.getOperand(0); 3184 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 3185 DebugLoc DL = Op.getDebugLoc(); 3186 EVT DVT = Data.getValueType(); 3187 EVT BVT = BaseType->getVT(); 3188 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 3189 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 3190 unsigned shiftBits = srcBits - baseBits; 3191 if (srcBits < 32) { 3192 // If the op is less than 32 bits, then it needs to extend to 32bits 3193 // so it can properly keep the upper bits valid. 3194 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 3195 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 3196 shiftBits = 32 - baseBits; 3197 DVT = IVT; 3198 } 3199 SDValue Shift = DAG.getConstant(shiftBits, DVT); 3200 // Shift left by 'Shift' bits. 3201 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 3202 // Signed shift Right by 'Shift' bits. 3203 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 3204 if (srcBits < 32) { 3205 // Once the sign extension is done, the op needs to be converted to 3206 // its original type. 3207 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 3208 } 3209 return Data; 3210} 3211EVT 3212AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const 3213{ 3214 int iSize = (size * numEle); 3215 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 3216 if (!vEle) { 3217 vEle = 1; 3218 } 3219 if (size == 64) { 3220 if (vEle == 1) { 3221 return EVT(MVT::i64); 3222 } else { 3223 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 3224 } 3225 } else { 3226 if (vEle == 1) { 3227 return EVT(MVT::i32); 3228 } else { 3229 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 3230 } 3231 } 3232} 3233 3234SDValue 3235AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const 3236{ 3237 SDValue Src = Op.getOperand(0); 3238 SDValue Dst = Op; 3239 SDValue Res; 3240 DebugLoc DL = Op.getDebugLoc(); 3241 EVT SrcVT = Src.getValueType(); 3242 EVT DstVT = Dst.getValueType(); 3243 // Lets bitcast the floating point types to an 3244 // equivalent integer type before converting to vectors. 3245 if (SrcVT.getScalarType().isFloatingPoint()) { 3246 Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType( 3247 SrcVT.getScalarType().getSimpleVT().getSizeInBits(), 3248 SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1), 3249 Src); 3250 SrcVT = Src.getValueType(); 3251 } 3252 uint32_t ScalarSrcSize = SrcVT.getScalarType() 3253 .getSimpleVT().getSizeInBits(); 3254 uint32_t ScalarDstSize = DstVT.getScalarType() 3255 .getSimpleVT().getSizeInBits(); 3256 uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; 3257 uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1; 3258 bool isVec = SrcVT.isVector(); 3259 if (DstVT.getScalarType().isInteger() && 3260 (SrcVT.getScalarType().isInteger() 3261 || SrcVT.getScalarType().isFloatingPoint())) { 3262 if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16) 3263 || (ScalarSrcSize == 64 3264 && DstNumEle == 4 3265 && ScalarDstSize == 16)) { 3266 // This is the problematic case when bitcasting i64 <-> <4 x i16> 3267 // This approach is a little different as we cannot generate a 3268 // <4 x i64> vector 3269 // as that is illegal in our backend and we are already past 3270 // the DAG legalizer. 3271 // So, in this case, we will do the following conversion. 3272 // Case 1: 3273 // %dst = <4 x i16> %src bitconvert i64 ==> 3274 // %tmp = <4 x i16> %src convert <4 x i32> 3275 // %tmp = <4 x i32> %tmp and 0xFFFF 3276 // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16> 3277 // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw 3278 // %dst = <2 x i32> %tmp bitcast i64 3279 // case 2: 3280 // %dst = i64 %src bitconvert <4 x i16> ==> 3281 // %tmp = i64 %src bitcast <2 x i32> 3282 // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy 3283 // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16> 3284 // %tmp = <4 x i32> %tmp and 0xFFFF 3285 // %dst = <4 x i16> %tmp bitcast <4 x i32> 3286 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32, 3287 DAG.getConstant(0xFFFF, MVT::i32)); 3288 SDValue const16 = DAG.getConstant(16, MVT::i32); 3289 if (ScalarDstSize == 64) { 3290 // case 1 3291 Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32); 3292 Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask); 3293 SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 3294 Op, DAG.getConstant(0, MVT::i32)); 3295 SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 3296 Op, DAG.getConstant(1, MVT::i32)); 3297 y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16); 3298 SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 3299 Op, DAG.getConstant(2, MVT::i32)); 3300 SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 3301 Op, DAG.getConstant(3, MVT::i32)); 3302 w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16); 3303 x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y); 3304 y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w); 3305 Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y); 3306 return Res; 3307 } else { 3308 // case 2 3309 SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src); 3310 SDValue lor16 3311 = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16); 3312 SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src); 3313 SDValue hir16 3314 = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16); 3315 SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL, 3316 MVT::v4i32, lo); 3317 SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 3318 getPointerTy(), DAG.getConstant(1, MVT::i32)); 3319 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 3320 resVec, lor16, idxVal); 3321 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 3322 getPointerTy(), DAG.getConstant(2, MVT::i32)); 3323 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 3324 resVec, hi, idxVal); 3325 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 3326 getPointerTy(), DAG.getConstant(3, MVT::i32)); 3327 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 3328 resVec, hir16, idxVal); 3329 resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask); 3330 Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16); 3331 return Res; 3332 } 3333 } else { 3334 // There are four cases we need to worry about for bitcasts 3335 // where the size of all 3336 // source, intermediates and result is <= 128 bits, unlike 3337 // the above case 3338 // 1) Sub32bit bitcast 32bitAlign 3339 // %dst = <4 x i8> bitcast i32 3340 // (also <[2|4] x i16> to <[2|4] x i32>) 3341 // 2) 32bitAlign bitcast Sub32bit 3342 // %dst = i32 bitcast <4 x i8> 3343 // 3) Sub32bit bitcast LargerSub32bit 3344 // %dst = <2 x i8> bitcast i16 3345 // (also <4 x i8> to <2 x i16>) 3346 // 4) Sub32bit bitcast SmallerSub32bit 3347 // %dst = i16 bitcast <2 x i8> 3348 // (also <2 x i16> to <4 x i8>) 3349 // This also only handles types that are powers of two 3350 if ((ScalarDstSize & (ScalarDstSize - 1)) 3351 || (ScalarSrcSize & (ScalarSrcSize - 1))) { 3352 } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) { 3353 // case 1: 3354 EVT IntTy = genIntType(ScalarDstSize, SrcNumEle); 3355#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors 3356 SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy); 3357#else 3358 SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 3359 DAG.getUNDEF(IntTy.getScalarType())); 3360 for (uint32_t x = 0; x < SrcNumEle; ++x) { 3361 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 3362 getPointerTy(), DAG.getConstant(x, MVT::i32)); 3363 SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 3364 SrcVT.getScalarType(), Src, 3365 DAG.getConstant(x, MVT::i32)); 3366 temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType()); 3367 res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy, 3368 res, temp, idx); 3369 } 3370#endif 3371 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 3372 DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32)); 3373 SDValue *newEle = new SDValue[SrcNumEle]; 3374 res = DAG.getNode(ISD::AND, DL, IntTy, res, mask); 3375 for (uint32_t x = 0; x < SrcNumEle; ++x) { 3376 newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 3377 IntTy.getScalarType(), res, 3378 DAG.getConstant(x, MVT::i32)); 3379 } 3380 uint32_t Ratio = SrcNumEle / DstNumEle; 3381 for (uint32_t x = 0; x < SrcNumEle; ++x) { 3382 if (x % Ratio) { 3383 newEle[x] = DAG.getNode(ISD::SHL, DL, 3384 IntTy.getScalarType(), newEle[x], 3385 DAG.getConstant(ScalarSrcSize * (x % Ratio), 3386 MVT::i32)); 3387 } 3388 } 3389 for (uint32_t x = 0; x < SrcNumEle; x += 2) { 3390 newEle[x] = DAG.getNode(ISD::OR, DL, 3391 IntTy.getScalarType(), newEle[x], newEle[x + 1]); 3392 } 3393 if (ScalarSrcSize == 8) { 3394 for (uint32_t x = 0; x < SrcNumEle; x += 4) { 3395 newEle[x] = DAG.getNode(ISD::OR, DL, 3396 IntTy.getScalarType(), newEle[x], newEle[x + 2]); 3397 } 3398 if (DstNumEle == 1) { 3399 Dst = newEle[0]; 3400 } else { 3401 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, 3402 newEle[0]); 3403 for (uint32_t x = 1; x < DstNumEle; ++x) { 3404 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 3405 getPointerTy(), DAG.getConstant(x, MVT::i32)); 3406 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 3407 DstVT, Dst, newEle[x * 4], idx); 3408 } 3409 } 3410 } else { 3411 if (DstNumEle == 1) { 3412 Dst = newEle[0]; 3413 } else { 3414 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, 3415 newEle[0]); 3416 for (uint32_t x = 1; x < DstNumEle; ++x) { 3417 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 3418 getPointerTy(), DAG.getConstant(x, MVT::i32)); 3419 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 3420 DstVT, Dst, newEle[x * 2], idx); 3421 } 3422 } 3423 } 3424 delete [] newEle; 3425 return Dst; 3426 } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) { 3427 // case 2: 3428 EVT IntTy = genIntType(ScalarSrcSize, DstNumEle); 3429 SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 3430 DAG.getUNDEF(IntTy.getScalarType())); 3431 uint32_t mult = (ScalarDstSize == 8) ? 4 : 2; 3432 for (uint32_t x = 0; x < SrcNumEle; ++x) { 3433 for (uint32_t y = 0; y < mult; ++y) { 3434 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 3435 getPointerTy(), 3436 DAG.getConstant(x * mult + y, MVT::i32)); 3437 SDValue t; 3438 if (SrcNumEle > 1) { 3439 t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3440 DL, SrcVT.getScalarType(), Src, 3441 DAG.getConstant(x, MVT::i32)); 3442 } else { 3443 t = Src; 3444 } 3445 if (y != 0) { 3446 t = DAG.getNode(ISD::SRL, DL, t.getValueType(), 3447 t, DAG.getConstant(y * ScalarDstSize, 3448 MVT::i32)); 3449 } 3450 vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, 3451 DL, IntTy, vec, t, idx); 3452 } 3453 } 3454 Dst = DAG.getSExtOrTrunc(vec, DL, DstVT); 3455 return Dst; 3456 } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) { 3457 // case 3: 3458 SDValue *numEle = new SDValue[SrcNumEle]; 3459 for (uint32_t x = 0; x < SrcNumEle; ++x) { 3460 numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 3461 MVT::i8, Src, DAG.getConstant(x, MVT::i32)); 3462 numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16); 3463 numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x], 3464 DAG.getConstant(0xFF, MVT::i16)); 3465 } 3466 for (uint32_t x = 1; x < SrcNumEle; x += 2) { 3467 numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x], 3468 DAG.getConstant(8, MVT::i16)); 3469 numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16, 3470 numEle[x-1], numEle[x]); 3471 } 3472 if (DstNumEle > 1) { 3473 // If we are not a scalar i16, the only other case is a 3474 // v2i16 since we can't have v8i8 at this point, v4i16 3475 // cannot be generated 3476 Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16, 3477 numEle[0]); 3478 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 3479 getPointerTy(), DAG.getConstant(1, MVT::i32)); 3480 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16, 3481 Dst, numEle[2], idx); 3482 } else { 3483 Dst = numEle[0]; 3484 } 3485 delete [] numEle; 3486 return Dst; 3487 } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) { 3488 // case 4: 3489 SDValue *numEle = new SDValue[DstNumEle]; 3490 for (uint32_t x = 0; x < SrcNumEle; ++x) { 3491 numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 3492 MVT::i16, Src, DAG.getConstant(x, MVT::i32)); 3493 numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16, 3494 numEle[x * 2], DAG.getConstant(8, MVT::i16)); 3495 } 3496 MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16; 3497 Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]); 3498 for (uint32_t x = 1; x < DstNumEle; ++x) { 3499 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 3500 getPointerTy(), DAG.getConstant(x, MVT::i32)); 3501 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty, 3502 Dst, numEle[x], idx); 3503 } 3504 delete [] numEle; 3505 ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8; 3506 Res = DAG.getSExtOrTrunc(Dst, DL, ty); 3507 return Res; 3508 } 3509 } 3510 } 3511 Res = DAG.getNode(AMDILISD::BITCONV, 3512 Dst.getDebugLoc(), 3513 Dst.getValueType(), Src); 3514 return Res; 3515} 3516 3517SDValue 3518AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 3519 SelectionDAG &DAG) const 3520{ 3521 SDValue Chain = Op.getOperand(0); 3522 SDValue Size = Op.getOperand(1); 3523 unsigned int SPReg = AMDIL::SP; 3524 DebugLoc DL = Op.getDebugLoc(); 3525 SDValue SP = DAG.getCopyFromReg(Chain, 3526 DL, 3527 SPReg, MVT::i32); 3528 SDValue NewSP = DAG.getNode(ISD::ADD, 3529 DL, 3530 MVT::i32, SP, Size); 3531 Chain = DAG.getCopyToReg(SP.getValue(1), 3532 DL, 3533 SPReg, NewSP); 3534 SDValue Ops[2] = {NewSP, Chain}; 3535 Chain = DAG.getMergeValues(Ops, 2 ,DL); 3536 return Chain; 3537} 3538SDValue 3539AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const 3540{ 3541 SDValue Chain = Op.getOperand(0); 3542 SDValue Cond = Op.getOperand(1); 3543 SDValue Jump = Op.getOperand(2); 3544 SDValue Result; 3545 Result = DAG.getNode( 3546 AMDILISD::BRANCH_COND, 3547 Op.getDebugLoc(), 3548 Op.getValueType(), 3549 Chain, Jump, Cond); 3550 return Result; 3551} 3552 3553SDValue 3554AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 3555{ 3556 SDValue Chain = Op.getOperand(0); 3557 SDValue CC = Op.getOperand(1); 3558 SDValue LHS = Op.getOperand(2); 3559 SDValue RHS = Op.getOperand(3); 3560 SDValue JumpT = Op.getOperand(4); 3561 SDValue CmpValue; 3562 SDValue Result; 3563 CmpValue = DAG.getNode( 3564 ISD::SELECT_CC, 3565 Op.getDebugLoc(), 3566 LHS.getValueType(), 3567 LHS, RHS, 3568 DAG.getConstant(-1, MVT::i32), 3569 DAG.getConstant(0, MVT::i32), 3570 CC); 3571 Result = DAG.getNode( 3572 AMDILISD::BRANCH_COND, 3573 CmpValue.getDebugLoc(), 3574 MVT::Other, Chain, 3575 JumpT, CmpValue); 3576 return Result; 3577} 3578 3579SDValue 3580AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const 3581{ 3582 SDValue Result = DAG.getNode( 3583 AMDILISD::DP_TO_FP, 3584 Op.getDebugLoc(), 3585 Op.getValueType(), 3586 Op.getOperand(0), 3587 Op.getOperand(1)); 3588 return Result; 3589} 3590 3591SDValue 3592AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const 3593{ 3594 SDValue Result = DAG.getNode( 3595 AMDILISD::VCONCAT, 3596 Op.getDebugLoc(), 3597 Op.getValueType(), 3598 Op.getOperand(0), 3599 Op.getOperand(1)); 3600 return Result; 3601} 3602// LowerRET - Lower an ISD::RET node. 3603SDValue 3604AMDILTargetLowering::LowerReturn(SDValue Chain, 3605 CallingConv::ID CallConv, bool isVarArg, 3606 const SmallVectorImpl<ISD::OutputArg> &Outs, 3607 const SmallVectorImpl<SDValue> &OutVals, 3608 DebugLoc dl, SelectionDAG &DAG) 3609const 3610{ 3611 //MachineFunction& MF = DAG.getMachineFunction(); 3612 // CCValAssign - represent the assignment of the return value 3613 // to a location 3614 SmallVector<CCValAssign, 16> RVLocs; 3615 3616 // CCState - Info about the registers and stack slot 3617 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3618 getTargetMachine(), RVLocs, *DAG.getContext()); 3619 3620 // Analyze return values of ISD::RET 3621 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); 3622 // If this is the first return lowered for this function, add 3623 // the regs to the liveout set for the function 3624 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 3625 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 3626 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { 3627 MRI.addLiveOut(RVLocs[i].getLocReg()); 3628 } 3629 } 3630 // FIXME: implement this when tail call is implemented 3631 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); 3632 // both x86 and ppc implement this in ISelLowering 3633 3634 // Regular return here 3635 SDValue Flag; 3636 SmallVector<SDValue, 6> RetOps; 3637 RetOps.push_back(Chain); 3638 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); 3639 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 3640 CCValAssign &VA = RVLocs[i]; 3641 SDValue ValToCopy = OutVals[i]; 3642 assert(VA.isRegLoc() && "Can only return in registers!"); 3643 // ISD::Ret => ret chain, (regnum1, val1), ... 3644 // So i * 2 + 1 index only the regnums 3645 Chain = DAG.getCopyToReg(Chain, 3646 dl, 3647 VA.getLocReg(), 3648 ValToCopy, 3649 Flag); 3650 // guarantee that all emitted copies are stuck together 3651 // avoiding something bad 3652 Flag = Chain.getValue(1); 3653 } 3654 /*if (MF.getFunction()->hasStructRetAttr()) { 3655 assert(0 && "Struct returns are not yet implemented!"); 3656 // Both MIPS and X86 have this 3657 }*/ 3658 RetOps[0] = Chain; 3659 if (Flag.getNode()) 3660 RetOps.push_back(Flag); 3661 3662 Flag = DAG.getNode(AMDILISD::RET_FLAG, 3663 dl, 3664 MVT::Other, &RetOps[0], RetOps.size()); 3665 return Flag; 3666} 3667 3668unsigned int 3669AMDILTargetLowering::getFunctionAlignment(const Function *) const 3670{ 3671 return 0; 3672} 3673 3674void 3675AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB, 3676 MachineBasicBlock::iterator &BBI, 3677 DebugLoc *DL, const TargetInstrInfo *TII) const 3678{ 3679 mBB = BB; 3680 mBBI = BBI; 3681 mDL = DL; 3682 mTII = TII; 3683} 3684uint32_t 3685AMDILTargetLowering::genVReg(uint32_t regType) const 3686{ 3687 return mBB->getParent()->getRegInfo().createVirtualRegister( 3688 getTargetMachine().getRegisterInfo()->getRegClass(regType)); 3689} 3690 3691MachineInstrBuilder 3692AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const 3693{ 3694 return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst); 3695} 3696 3697MachineInstrBuilder 3698AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 3699 uint32_t src1) const 3700{ 3701 return generateMachineInst(opcode, dst).addReg(src1); 3702} 3703 3704MachineInstrBuilder 3705AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 3706 uint32_t src1, uint32_t src2) const 3707{ 3708 return generateMachineInst(opcode, dst, src1).addReg(src2); 3709} 3710 3711MachineInstrBuilder 3712AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 3713 uint32_t src1, uint32_t src2, uint32_t src3) const 3714{ 3715 return generateMachineInst(opcode, dst, src1, src2).addReg(src3); 3716} 3717 3718 3719SDValue 3720AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const 3721{ 3722 DebugLoc DL = Op.getDebugLoc(); 3723 EVT OVT = Op.getValueType(); 3724 SDValue LHS = Op.getOperand(0); 3725 SDValue RHS = Op.getOperand(1); 3726 MVT INTTY; 3727 MVT FLTTY; 3728 if (!OVT.isVector()) { 3729 INTTY = MVT::i32; 3730 FLTTY = MVT::f32; 3731 } else if (OVT.getVectorNumElements() == 2) { 3732 INTTY = MVT::v2i32; 3733 FLTTY = MVT::v2f32; 3734 } else if (OVT.getVectorNumElements() == 4) { 3735 INTTY = MVT::v4i32; 3736 FLTTY = MVT::v4f32; 3737 } 3738 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 3739 // char|short jq = ia ^ ib; 3740 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 3741 3742 // jq = jq >> (bitsize - 2) 3743 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 3744 3745 // jq = jq | 0x1 3746 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 3747 3748 // jq = (int)jq 3749 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 3750 3751 // int ia = (int)LHS; 3752 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 3753 3754 // int ib, (int)RHS; 3755 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 3756 3757 // float fa = (float)ia; 3758 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 3759 3760 // float fb = (float)ib; 3761 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 3762 3763 // float fq = native_divide(fa, fb); 3764 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 3765 3766 // fq = trunc(fq); 3767 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 3768 3769 // float fqneg = -fq; 3770 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 3771 3772 // float fr = mad(fqneg, fb, fa); 3773 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); 3774 3775 // int iq = (int)fq; 3776 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 3777 3778 // fr = fabs(fr); 3779 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 3780 3781 // fb = fabs(fb); 3782 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 3783 3784 // int cv = fr >= fb; 3785 SDValue cv; 3786 if (INTTY == MVT::i32) { 3787 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 3788 } else { 3789 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 3790 } 3791 // jq = (cv ? jq : 0); 3792 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, 3793 DAG.getConstant(0, OVT)); 3794 // dst = iq + jq; 3795 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 3796 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 3797 return iq; 3798} 3799 3800SDValue 3801AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const 3802{ 3803 DebugLoc DL = Op.getDebugLoc(); 3804 EVT OVT = Op.getValueType(); 3805 SDValue LHS = Op.getOperand(0); 3806 SDValue RHS = Op.getOperand(1); 3807 // The LowerSDIV32 function generates equivalent to the following IL. 3808 // mov r0, LHS 3809 // mov r1, RHS 3810 // ilt r10, r0, 0 3811 // ilt r11, r1, 0 3812 // iadd r0, r0, r10 3813 // iadd r1, r1, r11 3814 // ixor r0, r0, r10 3815 // ixor r1, r1, r11 3816 // udiv r0, r0, r1 3817 // ixor r10, r10, r11 3818 // iadd r0, r0, r10 3819 // ixor DST, r0, r10 3820 3821 // mov r0, LHS 3822 SDValue r0 = LHS; 3823 3824 // mov r1, RHS 3825 SDValue r1 = RHS; 3826 3827 // ilt r10, r0, 0 3828 SDValue r10 = DAG.getSelectCC(DL, 3829 r0, DAG.getConstant(0, OVT), 3830 DAG.getConstant(-1, MVT::i32), 3831 DAG.getConstant(0, MVT::i32), 3832 ISD::SETLT); 3833 3834 // ilt r11, r1, 0 3835 SDValue r11 = DAG.getSelectCC(DL, 3836 r1, DAG.getConstant(0, OVT), 3837 DAG.getConstant(-1, MVT::i32), 3838 DAG.getConstant(0, MVT::i32), 3839 ISD::SETLT); 3840 3841 // iadd r0, r0, r10 3842 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 3843 3844 // iadd r1, r1, r11 3845 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 3846 3847 // ixor r0, r0, r10 3848 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 3849 3850 // ixor r1, r1, r11 3851 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 3852 3853 // udiv r0, r0, r1 3854 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 3855 3856 // ixor r10, r10, r11 3857 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 3858 3859 // iadd r0, r0, r10 3860 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 3861 3862 // ixor DST, r0, r10 3863 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 3864 return DST; 3865} 3866 3867SDValue 3868AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const 3869{ 3870 return SDValue(Op.getNode(), 0); 3871} 3872 3873SDValue 3874AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const 3875{ 3876 DebugLoc DL = Op.getDebugLoc(); 3877 EVT OVT = Op.getValueType(); 3878 SDValue LHS = Op.getOperand(0); 3879 SDValue RHS = Op.getOperand(1); 3880 MVT INTTY; 3881 MVT FLTTY; 3882 if (!OVT.isVector()) { 3883 INTTY = MVT::i32; 3884 FLTTY = MVT::f32; 3885 } else if (OVT.getVectorNumElements() == 2) { 3886 INTTY = MVT::v2i32; 3887 FLTTY = MVT::v2f32; 3888 } else if (OVT.getVectorNumElements() == 4) { 3889 INTTY = MVT::v4i32; 3890 FLTTY = MVT::v4f32; 3891 } 3892 3893 // The LowerUDIV24 function implements the following CL. 3894 // int ia = (int)LHS 3895 // float fa = (float)ia 3896 // int ib = (int)RHS 3897 // float fb = (float)ib 3898 // float fq = native_divide(fa, fb) 3899 // fq = trunc(fq) 3900 // float t = mad(fq, fb, fb) 3901 // int iq = (int)fq - (t <= fa) 3902 // return (type)iq 3903 3904 // int ia = (int)LHS 3905 SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY); 3906 3907 // float fa = (float)ia 3908 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 3909 3910 // int ib = (int)RHS 3911 SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY); 3912 3913 // float fb = (float)ib 3914 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 3915 3916 // float fq = native_divide(fa, fb) 3917 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 3918 3919 // fq = trunc(fq) 3920 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 3921 3922 // float t = mad(fq, fb, fb) 3923 SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb); 3924 3925 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1 3926 SDValue iq; 3927 fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 3928 if (INTTY == MVT::i32) { 3929 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 3930 } else { 3931 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 3932 } 3933 iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq); 3934 3935 3936 // return (type)iq 3937 iq = DAG.getZExtOrTrunc(iq, DL, OVT); 3938 return iq; 3939 3940} 3941 3942SDValue 3943AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const 3944{ 3945 DebugLoc DL = Op.getDebugLoc(); 3946 EVT OVT = Op.getValueType(); 3947 MVT INTTY = MVT::i32; 3948 if (OVT == MVT::v2i8) { 3949 INTTY = MVT::v2i32; 3950 } else if (OVT == MVT::v4i8) { 3951 INTTY = MVT::v4i32; 3952 } 3953 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 3954 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 3955 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 3956 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 3957 return LHS; 3958} 3959 3960SDValue 3961AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const 3962{ 3963 DebugLoc DL = Op.getDebugLoc(); 3964 EVT OVT = Op.getValueType(); 3965 MVT INTTY = MVT::i32; 3966 if (OVT == MVT::v2i16) { 3967 INTTY = MVT::v2i32; 3968 } else if (OVT == MVT::v4i16) { 3969 INTTY = MVT::v4i32; 3970 } 3971 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 3972 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 3973 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 3974 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 3975 return LHS; 3976} 3977 3978SDValue 3979AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const 3980{ 3981 DebugLoc DL = Op.getDebugLoc(); 3982 EVT OVT = Op.getValueType(); 3983 SDValue LHS = Op.getOperand(0); 3984 SDValue RHS = Op.getOperand(1); 3985 // The LowerSREM32 function generates equivalent to the following IL. 3986 // mov r0, LHS 3987 // mov r1, RHS 3988 // ilt r10, r0, 0 3989 // ilt r11, r1, 0 3990 // iadd r0, r0, r10 3991 // iadd r1, r1, r11 3992 // ixor r0, r0, r10 3993 // ixor r1, r1, r11 3994 // udiv r20, r0, r1 3995 // umul r20, r20, r1 3996 // sub r0, r0, r20 3997 // iadd r0, r0, r10 3998 // ixor DST, r0, r10 3999 4000 // mov r0, LHS 4001 SDValue r0 = LHS; 4002 4003 // mov r1, RHS 4004 SDValue r1 = RHS; 4005 4006 // ilt r10, r0, 0 4007 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4008 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 4009 r0, DAG.getConstant(0, OVT)); 4010 4011 // ilt r11, r1, 0 4012 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4013 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 4014 r1, DAG.getConstant(0, OVT)); 4015 4016 // iadd r0, r0, r10 4017 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 4018 4019 // iadd r1, r1, r11 4020 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 4021 4022 // ixor r0, r0, r10 4023 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 4024 4025 // ixor r1, r1, r11 4026 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 4027 4028 // udiv r20, r0, r1 4029 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 4030 4031 // umul r20, r20, r1 4032 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); 4033 4034 // sub r0, r0, r20 4035 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 4036 4037 // iadd r0, r0, r10 4038 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 4039 4040 // ixor DST, r0, r10 4041 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 4042 return DST; 4043} 4044 4045SDValue 4046AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const 4047{ 4048 return SDValue(Op.getNode(), 0); 4049} 4050 4051SDValue 4052AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const 4053{ 4054 DebugLoc DL = Op.getDebugLoc(); 4055 EVT OVT = Op.getValueType(); 4056 MVT INTTY = MVT::i32; 4057 if (OVT == MVT::v2i8) { 4058 INTTY = MVT::v2i32; 4059 } else if (OVT == MVT::v4i8) { 4060 INTTY = MVT::v4i32; 4061 } 4062 SDValue LHS = Op.getOperand(0); 4063 SDValue RHS = Op.getOperand(1); 4064 // The LowerUREM8 function generates equivalent to the following IL. 4065 // mov r0, as_u32(LHS) 4066 // mov r1, as_u32(RHS) 4067 // and r10, r0, 0xFF 4068 // and r11, r1, 0xFF 4069 // cmov_logical r3, r11, r11, 0x1 4070 // udiv r3, r10, r3 4071 // cmov_logical r3, r11, r3, 0 4072 // umul r3, r3, r11 4073 // sub r3, r10, r3 4074 // and as_u8(DST), r3, 0xFF 4075 4076 // mov r0, as_u32(LHS) 4077 SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY); 4078 4079 // mov r1, as_u32(RHS) 4080 SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY); 4081 4082 // and r10, r0, 0xFF 4083 SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0, 4084 DAG.getConstant(0xFF, INTTY)); 4085 4086 // and r11, r1, 0xFF 4087 SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1, 4088 DAG.getConstant(0xFF, INTTY)); 4089 4090 // cmov_logical r3, r11, r11, 0x1 4091 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11, 4092 DAG.getConstant(0x01, INTTY)); 4093 4094 // udiv r3, r10, r3 4095 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 4096 4097 // cmov_logical r3, r11, r3, 0 4098 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3, 4099 DAG.getConstant(0, INTTY)); 4100 4101 // umul r3, r3, r11 4102 r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11); 4103 4104 // sub r3, r10, r3 4105 r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3); 4106 4107 // and as_u8(DST), r3, 0xFF 4108 SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3, 4109 DAG.getConstant(0xFF, INTTY)); 4110 DST = DAG.getZExtOrTrunc(DST, DL, OVT); 4111 return DST; 4112} 4113 4114SDValue 4115AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const 4116{ 4117 DebugLoc DL = Op.getDebugLoc(); 4118 EVT OVT = Op.getValueType(); 4119 MVT INTTY = MVT::i32; 4120 if (OVT == MVT::v2i16) { 4121 INTTY = MVT::v2i32; 4122 } else if (OVT == MVT::v4i16) { 4123 INTTY = MVT::v4i32; 4124 } 4125 SDValue LHS = Op.getOperand(0); 4126 SDValue RHS = Op.getOperand(1); 4127 // The LowerUREM16 function generatest equivalent to the following IL. 4128 // mov r0, LHS 4129 // mov r1, RHS 4130 // DIV = LowerUDIV16(LHS, RHS) 4131 // and r10, r0, 0xFFFF 4132 // and r11, r1, 0xFFFF 4133 // cmov_logical r3, r11, r11, 0x1 4134 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 4135 // and r3, r3, 0xFFFF 4136 // cmov_logical r3, r11, r3, 0 4137 // umul r3, r3, r11 4138 // sub r3, r10, r3 4139 // and DST, r3, 0xFFFF 4140 4141 // mov r0, LHS 4142 SDValue r0 = LHS; 4143 4144 // mov r1, RHS 4145 SDValue r1 = RHS; 4146 4147 // and r10, r0, 0xFFFF 4148 SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0, 4149 DAG.getConstant(0xFFFF, OVT)); 4150 4151 // and r11, r1, 0xFFFF 4152 SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1, 4153 DAG.getConstant(0xFFFF, OVT)); 4154 4155 // cmov_logical r3, r11, r11, 0x1 4156 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11, 4157 DAG.getConstant(0x01, OVT)); 4158 4159 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 4160 r10 = DAG.getZExtOrTrunc(r10, DL, INTTY); 4161 r3 = DAG.getZExtOrTrunc(r3, DL, INTTY); 4162 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 4163 r3 = DAG.getZExtOrTrunc(r3, DL, OVT); 4164 r10 = DAG.getZExtOrTrunc(r10, DL, OVT); 4165 4166 // and r3, r3, 0xFFFF 4167 r3 = DAG.getNode(ISD::AND, DL, OVT, r3, 4168 DAG.getConstant(0xFFFF, OVT)); 4169 4170 // cmov_logical r3, r11, r3, 0 4171 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3, 4172 DAG.getConstant(0, OVT)); 4173 // umul r3, r3, r11 4174 r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11); 4175 4176 // sub r3, r10, r3 4177 r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3); 4178 4179 // and DST, r3, 0xFFFF 4180 SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3, 4181 DAG.getConstant(0xFFFF, OVT)); 4182 return DST; 4183} 4184 4185SDValue 4186AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const 4187{ 4188 DebugLoc DL = Op.getDebugLoc(); 4189 EVT OVT = Op.getValueType(); 4190 SDValue LHS = Op.getOperand(0); 4191 SDValue RHS = Op.getOperand(1); 4192 // The LowerUREM32 function generates equivalent to the following IL. 4193 // udiv r20, LHS, RHS 4194 // umul r20, r20, RHS 4195 // sub DST, LHS, r20 4196 4197 // udiv r20, LHS, RHS 4198 SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS); 4199 4200 // umul r20, r20, RHS 4201 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS); 4202 4203 // sub DST, LHS, r20 4204 SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20); 4205 return DST; 4206} 4207 4208SDValue 4209AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const 4210{ 4211 return SDValue(Op.getNode(), 0); 4212} 4213 4214 4215SDValue 4216AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const 4217{ 4218 DebugLoc DL = Op.getDebugLoc(); 4219 EVT OVT = Op.getValueType(); 4220 MVT INTTY = MVT::i32; 4221 if (OVT == MVT::v2f32) { 4222 INTTY = MVT::v2i32; 4223 } else if (OVT == MVT::v4f32) { 4224 INTTY = MVT::v4i32; 4225 } 4226 SDValue LHS = Op.getOperand(0); 4227 SDValue RHS = Op.getOperand(1); 4228 SDValue DST; 4229 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 4230 &this->getTargetMachine())->getSubtargetImpl(); 4231 if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 4232 // TODO: This doesn't work for vector types yet 4233 // The LowerFDIV32 function generates equivalent to the following 4234 // IL: 4235 // mov r20, as_int(LHS) 4236 // mov r21, as_int(RHS) 4237 // and r30, r20, 0x7f800000 4238 // and r31, r20, 0x807FFFFF 4239 // and r32, r21, 0x7f800000 4240 // and r33, r21, 0x807FFFFF 4241 // ieq r40, r30, 0x7F800000 4242 // ieq r41, r31, 0x7F800000 4243 // ieq r42, r32, 0 4244 // ieq r43, r33, 0 4245 // and r50, r20, 0x80000000 4246 // and r51, r21, 0x80000000 4247 // ior r32, r32, 0x3f800000 4248 // ior r33, r33, 0x3f800000 4249 // cmov_logical r32, r42, r50, r32 4250 // cmov_logical r33, r43, r51, r33 4251 // cmov_logical r32, r40, r20, r32 4252 // cmov_logical r33, r41, r21, r33 4253 // ior r50, r40, r41 4254 // ior r51, r42, r43 4255 // ior r50, r50, r51 4256 // inegate r52, r31 4257 // iadd r30, r30, r52 4258 // cmov_logical r30, r50, 0, r30 4259 // div_zeroop(infinity) r21, 1.0, r33 4260 // mul_ieee r20, r32, r21 4261 // and r22, r20, 0x7FFFFFFF 4262 // and r23, r20, 0x80000000 4263 // ishr r60, r22, 0x00000017 4264 // ishr r61, r30, 0x00000017 4265 // iadd r20, r20, r30 4266 // iadd r21, r22, r30 4267 // iadd r60, r60, r61 4268 // ige r42, 0, R60 4269 // ior r41, r23, 0x7F800000 4270 // ige r40, r60, 0x000000FF 4271 // cmov_logical r40, r50, 0, r40 4272 // cmov_logical r20, r42, r23, r20 4273 // cmov_logical DST, r40, r41, r20 4274 // as_float(DST) 4275 4276 // mov r20, as_int(LHS) 4277 SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS); 4278 4279 // mov r21, as_int(RHS) 4280 SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS); 4281 4282 // and r30, r20, 0x7f800000 4283 SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20, 4284 DAG.getConstant(0x7F800000, INTTY)); 4285 4286 // and r31, r21, 0x7f800000 4287 SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21, 4288 DAG.getConstant(0x7f800000, INTTY)); 4289 4290 // and r32, r20, 0x807FFFFF 4291 SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20, 4292 DAG.getConstant(0x807FFFFF, INTTY)); 4293 4294 // and r33, r21, 0x807FFFFF 4295 SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21, 4296 DAG.getConstant(0x807FFFFF, INTTY)); 4297 4298 // ieq r40, r30, 0x7F800000 4299 SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 4300 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 4301 R30, DAG.getConstant(0x7F800000, INTTY)); 4302 4303 // ieq r41, r31, 0x7F800000 4304 SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 4305 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 4306 R31, DAG.getConstant(0x7F800000, INTTY)); 4307 4308 // ieq r42, r30, 0 4309 SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 4310 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 4311 R30, DAG.getConstant(0, INTTY)); 4312 4313 // ieq r43, r31, 0 4314 SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 4315 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 4316 R31, DAG.getConstant(0, INTTY)); 4317 4318 // and r50, r20, 0x80000000 4319 SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20, 4320 DAG.getConstant(0x80000000, INTTY)); 4321 4322 // and r51, r21, 0x80000000 4323 SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21, 4324 DAG.getConstant(0x80000000, INTTY)); 4325 4326 // ior r32, r32, 0x3f800000 4327 R32 = DAG.getNode(ISD::OR, DL, INTTY, R32, 4328 DAG.getConstant(0x3F800000, INTTY)); 4329 4330 // ior r33, r33, 0x3f800000 4331 R33 = DAG.getNode(ISD::OR, DL, INTTY, R33, 4332 DAG.getConstant(0x3F800000, INTTY)); 4333 4334 // cmov_logical r32, r42, r50, r32 4335 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32); 4336 4337 // cmov_logical r33, r43, r51, r33 4338 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33); 4339 4340 // cmov_logical r32, r40, r20, r32 4341 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32); 4342 4343 // cmov_logical r33, r41, r21, r33 4344 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33); 4345 4346 // ior r50, r40, r41 4347 R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41); 4348 4349 // ior r51, r42, r43 4350 R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43); 4351 4352 // ior r50, r50, r51 4353 R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51); 4354 4355 // inegate r52, r31 4356 SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31); 4357 4358 // iadd r30, r30, r52 4359 R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52); 4360 4361 // cmov_logical r30, r50, 0, r30 4362 R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 4363 DAG.getConstant(0, INTTY), R30); 4364 4365 // div_zeroop(infinity) r21, 1.0, as_float(r33) 4366 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 4367 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 4368 DAG.getConstantFP(1.0f, OVT), R33); 4369 4370 // mul_ieee as_int(r20), as_float(r32), r21 4371 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 4372 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 4373 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 4374 4375 // div_zeroop(infinity) r21, 1.0, as_float(r33) 4376 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 4377 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 4378 DAG.getConstantFP(1.0f, OVT), R33); 4379 4380 // mul_ieee as_int(r20), as_float(r32), r21 4381 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 4382 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 4383 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 4384 4385 // and r22, r20, 0x7FFFFFFF 4386 SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20, 4387 DAG.getConstant(0x7FFFFFFF, INTTY)); 4388 4389 // and r23, r20, 0x80000000 4390 SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20, 4391 DAG.getConstant(0x80000000, INTTY)); 4392 4393 // ishr r60, r22, 0x00000017 4394 SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22, 4395 DAG.getConstant(0x00000017, INTTY)); 4396 4397 // ishr r61, r30, 0x00000017 4398 SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30, 4399 DAG.getConstant(0x00000017, INTTY)); 4400 4401 // iadd r20, r20, r30 4402 R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30); 4403 4404 // iadd r21, r22, r30 4405 R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30); 4406 4407 // iadd r60, r60, r61 4408 R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61); 4409 4410 // ige r42, 0, R60 4411 R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 4412 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 4413 DAG.getConstant(0, INTTY), 4414 R60); 4415 4416 // ior r41, r23, 0x7F800000 4417 R41 = DAG.getNode(ISD::OR, DL, INTTY, R23, 4418 DAG.getConstant(0x7F800000, INTTY)); 4419 4420 // ige r40, r60, 0x000000FF 4421 R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 4422 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 4423 R60, 4424 DAG.getConstant(0x0000000FF, INTTY)); 4425 4426 // cmov_logical r40, r50, 0, r40 4427 R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 4428 DAG.getConstant(0, INTTY), 4429 R40); 4430 4431 // cmov_logical r20, r42, r23, r20 4432 R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20); 4433 4434 // cmov_logical DST, r40, r41, r20 4435 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20); 4436 4437 // as_float(DST) 4438 DST = DAG.getNode(ISDBITCAST, DL, OVT, DST); 4439 } else { 4440 // The following sequence of DAG nodes produce the following IL: 4441 // fabs r1, RHS 4442 // lt r2, 0x1.0p+96f, r1 4443 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 4444 // mul_ieee r1, RHS, r3 4445 // div_zeroop(infinity) r0, LHS, r1 4446 // mul_ieee DST, r0, r3 4447 4448 // fabs r1, RHS 4449 SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS); 4450 // lt r2, 0x1.0p+96f, r1 4451 SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4452 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32), 4453 DAG.getConstant(0x6f800000, INTTY), r1); 4454 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 4455 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2, 4456 DAG.getConstant(0x2f800000, INTTY), 4457 DAG.getConstant(0x3f800000, INTTY)); 4458 // mul_ieee r1, RHS, r3 4459 r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3); 4460 // div_zeroop(infinity) r0, LHS, r1 4461 SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1); 4462 // mul_ieee DST, r0, r3 4463 DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3); 4464 } 4465 return DST; 4466} 4467 4468SDValue 4469AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const 4470{ 4471 return SDValue(Op.getNode(), 0); 4472} 4473