AMDILISelLowering.cpp revision 440ab9ea02690008b4d8da11494fd1e9cd86e57e
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9// 10// This file implements the interfaces that AMDIL uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "AMDILISelLowering.h" 16#include "AMDILDevices.h" 17#include "AMDILIntrinsicInfo.h" 18#include "AMDILRegisterInfo.h" 19#include "AMDILSubtarget.h" 20#include "AMDILUtilityFunctions.h" 21#include "llvm/CallingConv.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/PseudoSourceValue.h" 25#include "llvm/CodeGen/SelectionDAG.h" 26#include "llvm/CodeGen/SelectionDAGNodes.h" 27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28#include "llvm/DerivedTypes.h" 29#include "llvm/Instructions.h" 30#include "llvm/Intrinsics.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetOptions.h" 34 35using namespace llvm; 36#define ISDBITCAST ISD::BITCAST 37#define MVTGLUE MVT::Glue 38//===----------------------------------------------------------------------===// 39// Calling Convention Implementation 40//===----------------------------------------------------------------------===// 41#include "AMDGPUGenCallingConv.inc" 42 43//===----------------------------------------------------------------------===// 44// TargetLowering Implementation Help Functions Begin 45//===----------------------------------------------------------------------===// 46 static SDValue 47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) 48{ 49 DebugLoc DL = Src.getDebugLoc(); 50 EVT svt = Src.getValueType().getScalarType(); 51 EVT dvt = Dst.getValueType().getScalarType(); 52 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { 53 if (dvt.bitsGT(svt)) { 54 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); 55 } else if (svt.bitsLT(svt)) { 56 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, 57 DAG.getConstant(1, MVT::i32)); 58 } 59 } else if (svt.isInteger() && dvt.isInteger()) { 60 if (!svt.bitsEq(dvt)) { 61 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 62 } 63 } else if (svt.isInteger()) { 64 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; 65 if (!svt.bitsEq(dvt)) { 66 if (dvt.getSimpleVT().SimpleTy == MVT::f32) { 67 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); 68 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { 69 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); 70 } else { 71 assert(0 && "We only support 32 and 64bit fp types"); 72 } 73 } 74 Src = DAG.getNode(opcode, DL, dvt, Src); 75 } else if (dvt.isInteger()) { 76 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; 77 if (svt.getSimpleVT().SimpleTy == MVT::f32) { 78 Src = DAG.getNode(opcode, DL, MVT::i32, Src); 79 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { 80 Src = DAG.getNode(opcode, DL, MVT::i64, Src); 81 } else { 82 assert(0 && "We only support 32 and 64bit fp types"); 83 } 84 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 85 } 86 return Src; 87} 88// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC 89// condition. 90 static AMDILCC::CondCodes 91CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) 92{ 93 switch (CC) { 94 default: 95 { 96 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; 97 assert(0 && "Unknown condition code!"); 98 } 99 case ISD::SETO: 100 switch(type) { 101 case MVT::f32: 102 return AMDILCC::IL_CC_F_O; 103 case MVT::f64: 104 return AMDILCC::IL_CC_D_O; 105 default: 106 assert(0 && "Opcode combination not generated correctly!"); 107 return AMDILCC::COND_ERROR; 108 }; 109 case ISD::SETUO: 110 switch(type) { 111 case MVT::f32: 112 return AMDILCC::IL_CC_F_UO; 113 case MVT::f64: 114 return AMDILCC::IL_CC_D_UO; 115 default: 116 assert(0 && "Opcode combination not generated correctly!"); 117 return AMDILCC::COND_ERROR; 118 }; 119 case ISD::SETGT: 120 switch (type) { 121 case MVT::i1: 122 case MVT::i8: 123 case MVT::i16: 124 case MVT::i32: 125 return AMDILCC::IL_CC_I_GT; 126 case MVT::f32: 127 return AMDILCC::IL_CC_F_GT; 128 case MVT::f64: 129 return AMDILCC::IL_CC_D_GT; 130 case MVT::i64: 131 return AMDILCC::IL_CC_L_GT; 132 default: 133 assert(0 && "Opcode combination not generated correctly!"); 134 return AMDILCC::COND_ERROR; 135 }; 136 case ISD::SETGE: 137 switch (type) { 138 case MVT::i1: 139 case MVT::i8: 140 case MVT::i16: 141 case MVT::i32: 142 return AMDILCC::IL_CC_I_GE; 143 case MVT::f32: 144 return AMDILCC::IL_CC_F_GE; 145 case MVT::f64: 146 return AMDILCC::IL_CC_D_GE; 147 case MVT::i64: 148 return AMDILCC::IL_CC_L_GE; 149 default: 150 assert(0 && "Opcode combination not generated correctly!"); 151 return AMDILCC::COND_ERROR; 152 }; 153 case ISD::SETLT: 154 switch (type) { 155 case MVT::i1: 156 case MVT::i8: 157 case MVT::i16: 158 case MVT::i32: 159 return AMDILCC::IL_CC_I_LT; 160 case MVT::f32: 161 return AMDILCC::IL_CC_F_LT; 162 case MVT::f64: 163 return AMDILCC::IL_CC_D_LT; 164 case MVT::i64: 165 return AMDILCC::IL_CC_L_LT; 166 default: 167 assert(0 && "Opcode combination not generated correctly!"); 168 return AMDILCC::COND_ERROR; 169 }; 170 case ISD::SETLE: 171 switch (type) { 172 case MVT::i1: 173 case MVT::i8: 174 case MVT::i16: 175 case MVT::i32: 176 return AMDILCC::IL_CC_I_LE; 177 case MVT::f32: 178 return AMDILCC::IL_CC_F_LE; 179 case MVT::f64: 180 return AMDILCC::IL_CC_D_LE; 181 case MVT::i64: 182 return AMDILCC::IL_CC_L_LE; 183 default: 184 assert(0 && "Opcode combination not generated correctly!"); 185 return AMDILCC::COND_ERROR; 186 }; 187 case ISD::SETNE: 188 switch (type) { 189 case MVT::i1: 190 case MVT::i8: 191 case MVT::i16: 192 case MVT::i32: 193 return AMDILCC::IL_CC_I_NE; 194 case MVT::f32: 195 return AMDILCC::IL_CC_F_NE; 196 case MVT::f64: 197 return AMDILCC::IL_CC_D_NE; 198 case MVT::i64: 199 return AMDILCC::IL_CC_L_NE; 200 default: 201 assert(0 && "Opcode combination not generated correctly!"); 202 return AMDILCC::COND_ERROR; 203 }; 204 case ISD::SETEQ: 205 switch (type) { 206 case MVT::i1: 207 case MVT::i8: 208 case MVT::i16: 209 case MVT::i32: 210 return AMDILCC::IL_CC_I_EQ; 211 case MVT::f32: 212 return AMDILCC::IL_CC_F_EQ; 213 case MVT::f64: 214 return AMDILCC::IL_CC_D_EQ; 215 case MVT::i64: 216 return AMDILCC::IL_CC_L_EQ; 217 default: 218 assert(0 && "Opcode combination not generated correctly!"); 219 return AMDILCC::COND_ERROR; 220 }; 221 case ISD::SETUGT: 222 switch (type) { 223 case MVT::i1: 224 case MVT::i8: 225 case MVT::i16: 226 case MVT::i32: 227 return AMDILCC::IL_CC_U_GT; 228 case MVT::f32: 229 return AMDILCC::IL_CC_F_UGT; 230 case MVT::f64: 231 return AMDILCC::IL_CC_D_UGT; 232 case MVT::i64: 233 return AMDILCC::IL_CC_UL_GT; 234 default: 235 assert(0 && "Opcode combination not generated correctly!"); 236 return AMDILCC::COND_ERROR; 237 }; 238 case ISD::SETUGE: 239 switch (type) { 240 case MVT::i1: 241 case MVT::i8: 242 case MVT::i16: 243 case MVT::i32: 244 return AMDILCC::IL_CC_U_GE; 245 case MVT::f32: 246 return AMDILCC::IL_CC_F_UGE; 247 case MVT::f64: 248 return AMDILCC::IL_CC_D_UGE; 249 case MVT::i64: 250 return AMDILCC::IL_CC_UL_GE; 251 default: 252 assert(0 && "Opcode combination not generated correctly!"); 253 return AMDILCC::COND_ERROR; 254 }; 255 case ISD::SETULT: 256 switch (type) { 257 case MVT::i1: 258 case MVT::i8: 259 case MVT::i16: 260 case MVT::i32: 261 return AMDILCC::IL_CC_U_LT; 262 case MVT::f32: 263 return AMDILCC::IL_CC_F_ULT; 264 case MVT::f64: 265 return AMDILCC::IL_CC_D_ULT; 266 case MVT::i64: 267 return AMDILCC::IL_CC_UL_LT; 268 default: 269 assert(0 && "Opcode combination not generated correctly!"); 270 return AMDILCC::COND_ERROR; 271 }; 272 case ISD::SETULE: 273 switch (type) { 274 case MVT::i1: 275 case MVT::i8: 276 case MVT::i16: 277 case MVT::i32: 278 return AMDILCC::IL_CC_U_LE; 279 case MVT::f32: 280 return AMDILCC::IL_CC_F_ULE; 281 case MVT::f64: 282 return AMDILCC::IL_CC_D_ULE; 283 case MVT::i64: 284 return AMDILCC::IL_CC_UL_LE; 285 default: 286 assert(0 && "Opcode combination not generated correctly!"); 287 return AMDILCC::COND_ERROR; 288 }; 289 case ISD::SETUNE: 290 switch (type) { 291 case MVT::i1: 292 case MVT::i8: 293 case MVT::i16: 294 case MVT::i32: 295 return AMDILCC::IL_CC_U_NE; 296 case MVT::f32: 297 return AMDILCC::IL_CC_F_UNE; 298 case MVT::f64: 299 return AMDILCC::IL_CC_D_UNE; 300 case MVT::i64: 301 return AMDILCC::IL_CC_UL_NE; 302 default: 303 assert(0 && "Opcode combination not generated correctly!"); 304 return AMDILCC::COND_ERROR; 305 }; 306 case ISD::SETUEQ: 307 switch (type) { 308 case MVT::i1: 309 case MVT::i8: 310 case MVT::i16: 311 case MVT::i32: 312 return AMDILCC::IL_CC_U_EQ; 313 case MVT::f32: 314 return AMDILCC::IL_CC_F_UEQ; 315 case MVT::f64: 316 return AMDILCC::IL_CC_D_UEQ; 317 case MVT::i64: 318 return AMDILCC::IL_CC_UL_EQ; 319 default: 320 assert(0 && "Opcode combination not generated correctly!"); 321 return AMDILCC::COND_ERROR; 322 }; 323 case ISD::SETOGT: 324 switch (type) { 325 case MVT::f32: 326 return AMDILCC::IL_CC_F_OGT; 327 case MVT::f64: 328 return AMDILCC::IL_CC_D_OGT; 329 case MVT::i1: 330 case MVT::i8: 331 case MVT::i16: 332 case MVT::i32: 333 case MVT::i64: 334 default: 335 assert(0 && "Opcode combination not generated correctly!"); 336 return AMDILCC::COND_ERROR; 337 }; 338 case ISD::SETOGE: 339 switch (type) { 340 case MVT::f32: 341 return AMDILCC::IL_CC_F_OGE; 342 case MVT::f64: 343 return AMDILCC::IL_CC_D_OGE; 344 case MVT::i1: 345 case MVT::i8: 346 case MVT::i16: 347 case MVT::i32: 348 case MVT::i64: 349 default: 350 assert(0 && "Opcode combination not generated correctly!"); 351 return AMDILCC::COND_ERROR; 352 }; 353 case ISD::SETOLT: 354 switch (type) { 355 case MVT::f32: 356 return AMDILCC::IL_CC_F_OLT; 357 case MVT::f64: 358 return AMDILCC::IL_CC_D_OLT; 359 case MVT::i1: 360 case MVT::i8: 361 case MVT::i16: 362 case MVT::i32: 363 case MVT::i64: 364 default: 365 assert(0 && "Opcode combination not generated correctly!"); 366 return AMDILCC::COND_ERROR; 367 }; 368 case ISD::SETOLE: 369 switch (type) { 370 case MVT::f32: 371 return AMDILCC::IL_CC_F_OLE; 372 case MVT::f64: 373 return AMDILCC::IL_CC_D_OLE; 374 case MVT::i1: 375 case MVT::i8: 376 case MVT::i16: 377 case MVT::i32: 378 case MVT::i64: 379 default: 380 assert(0 && "Opcode combination not generated correctly!"); 381 return AMDILCC::COND_ERROR; 382 }; 383 case ISD::SETONE: 384 switch (type) { 385 case MVT::f32: 386 return AMDILCC::IL_CC_F_ONE; 387 case MVT::f64: 388 return AMDILCC::IL_CC_D_ONE; 389 case MVT::i1: 390 case MVT::i8: 391 case MVT::i16: 392 case MVT::i32: 393 case MVT::i64: 394 default: 395 assert(0 && "Opcode combination not generated correctly!"); 396 return AMDILCC::COND_ERROR; 397 }; 398 case ISD::SETOEQ: 399 switch (type) { 400 case MVT::f32: 401 return AMDILCC::IL_CC_F_OEQ; 402 case MVT::f64: 403 return AMDILCC::IL_CC_D_OEQ; 404 case MVT::i1: 405 case MVT::i8: 406 case MVT::i16: 407 case MVT::i32: 408 case MVT::i64: 409 default: 410 assert(0 && "Opcode combination not generated correctly!"); 411 return AMDILCC::COND_ERROR; 412 }; 413 }; 414} 415 416/// Helper function used by LowerFormalArguments 417static const TargetRegisterClass* 418getRegClassFromType(unsigned int type) { 419 switch (type) { 420 default: 421 assert(0 && "Passed in type does not match any register classes."); 422 case MVT::i8: 423 return &AMDIL::GPRI8RegClass; 424 case MVT::i16: 425 return &AMDIL::GPRI16RegClass; 426 case MVT::i32: 427 return &AMDIL::GPRI32RegClass; 428 case MVT::f32: 429 return &AMDIL::GPRF32RegClass; 430 case MVT::i64: 431 return &AMDIL::GPRI64RegClass; 432 case MVT::f64: 433 return &AMDIL::GPRF64RegClass; 434 case MVT::v4f32: 435 return &AMDIL::GPRV4F32RegClass; 436 case MVT::v4i8: 437 return &AMDIL::GPRV4I8RegClass; 438 case MVT::v4i16: 439 return &AMDIL::GPRV4I16RegClass; 440 case MVT::v4i32: 441 return &AMDIL::GPRV4I32RegClass; 442 case MVT::v2f32: 443 return &AMDIL::GPRV2F32RegClass; 444 case MVT::v2i8: 445 return &AMDIL::GPRV2I8RegClass; 446 case MVT::v2i16: 447 return &AMDIL::GPRV2I16RegClass; 448 case MVT::v2i32: 449 return &AMDIL::GPRV2I32RegClass; 450 case MVT::v2f64: 451 return &AMDIL::GPRV2F64RegClass; 452 case MVT::v2i64: 453 return &AMDIL::GPRV2I64RegClass; 454 } 455} 456 457SDValue 458AMDILTargetLowering::LowerMemArgument( 459 SDValue Chain, 460 CallingConv::ID CallConv, 461 const SmallVectorImpl<ISD::InputArg> &Ins, 462 DebugLoc dl, SelectionDAG &DAG, 463 const CCValAssign &VA, 464 MachineFrameInfo *MFI, 465 unsigned i) const 466{ 467 // Create the nodes corresponding to a load from this parameter slot. 468 ISD::ArgFlagsTy Flags = Ins[i].Flags; 469 470 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && 471 getTargetMachine().Options.GuaranteedTailCallOpt; 472 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 473 474 // FIXME: For now, all byval parameter objects are marked mutable. This can 475 // be changed with more analysis. 476 // In case of tail call optimization mark all arguments mutable. Since they 477 // could be overwritten by lowering of arguments in case of a tail call. 478 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 479 VA.getLocMemOffset(), isImmutable); 480 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 481 482 if (Flags.isByVal()) 483 return FIN; 484 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 485 MachinePointerInfo::getFixedStack(FI), 486 false, false, false, 0); 487} 488//===----------------------------------------------------------------------===// 489// TargetLowering Implementation Help Functions End 490//===----------------------------------------------------------------------===// 491 492//===----------------------------------------------------------------------===// 493// TargetLowering Class Implementation Begins 494//===----------------------------------------------------------------------===// 495 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) 496: TargetLowering(TM, new TargetLoweringObjectFileELF()) 497{ 498 int types[] = 499 { 500 (int)MVT::i8, 501 (int)MVT::i16, 502 (int)MVT::i32, 503 (int)MVT::f32, 504 (int)MVT::f64, 505 (int)MVT::i64, 506 (int)MVT::v2i8, 507 (int)MVT::v4i8, 508 (int)MVT::v2i16, 509 (int)MVT::v4i16, 510 (int)MVT::v4f32, 511 (int)MVT::v4i32, 512 (int)MVT::v2f32, 513 (int)MVT::v2i32, 514 (int)MVT::v2f64, 515 (int)MVT::v2i64 516 }; 517 518 int IntTypes[] = 519 { 520 (int)MVT::i8, 521 (int)MVT::i16, 522 (int)MVT::i32, 523 (int)MVT::i64 524 }; 525 526 int FloatTypes[] = 527 { 528 (int)MVT::f32, 529 (int)MVT::f64 530 }; 531 532 int VectorTypes[] = 533 { 534 (int)MVT::v2i8, 535 (int)MVT::v4i8, 536 (int)MVT::v2i16, 537 (int)MVT::v4i16, 538 (int)MVT::v4f32, 539 (int)MVT::v4i32, 540 (int)MVT::v2f32, 541 (int)MVT::v2i32, 542 (int)MVT::v2f64, 543 (int)MVT::v2i64 544 }; 545 size_t numTypes = sizeof(types) / sizeof(*types); 546 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); 547 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); 548 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); 549 550 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 551 // These are the current register classes that are 552 // supported 553 554 for (unsigned int x = 0; x < numTypes; ++x) { 555 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 556 557 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 558 // We cannot sextinreg, expand to shifts 559 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 560 setOperationAction(ISD::SUBE, VT, Expand); 561 setOperationAction(ISD::SUBC, VT, Expand); 562 setOperationAction(ISD::ADDE, VT, Expand); 563 setOperationAction(ISD::ADDC, VT, Expand); 564 setOperationAction(ISD::SETCC, VT, Custom); 565 setOperationAction(ISD::BRCOND, VT, Custom); 566 setOperationAction(ISD::BR_CC, VT, Custom); 567 setOperationAction(ISD::BR_JT, VT, Expand); 568 setOperationAction(ISD::BRIND, VT, Expand); 569 // TODO: Implement custom UREM/SREM routines 570 setOperationAction(ISD::SREM, VT, Expand); 571 setOperationAction(ISD::GlobalAddress, VT, Custom); 572 setOperationAction(ISD::JumpTable, VT, Custom); 573 setOperationAction(ISD::ConstantPool, VT, Custom); 574 setOperationAction(ISD::SELECT, VT, Custom); 575 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 576 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 577 if (VT != MVT::i64 && VT != MVT::v2i64) { 578 setOperationAction(ISD::SDIV, VT, Custom); 579 } 580 } 581 for (unsigned int x = 0; x < numFloatTypes; ++x) { 582 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 583 584 // IL does not have these operations for floating point types 585 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 586 setOperationAction(ISD::SETOLT, VT, Expand); 587 setOperationAction(ISD::SETOGE, VT, Expand); 588 setOperationAction(ISD::SETOGT, VT, Expand); 589 setOperationAction(ISD::SETOLE, VT, Expand); 590 setOperationAction(ISD::SETULT, VT, Expand); 591 setOperationAction(ISD::SETUGE, VT, Expand); 592 setOperationAction(ISD::SETUGT, VT, Expand); 593 setOperationAction(ISD::SETULE, VT, Expand); 594 } 595 596 for (unsigned int x = 0; x < numIntTypes; ++x) { 597 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 598 599 // GPU also does not have divrem function for signed or unsigned 600 setOperationAction(ISD::SDIVREM, VT, Expand); 601 602 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 603 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 604 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 605 606 // GPU doesn't have a rotl, rotr, or byteswap instruction 607 setOperationAction(ISD::ROTR, VT, Expand); 608 setOperationAction(ISD::ROTL, VT, Expand); 609 setOperationAction(ISD::BSWAP, VT, Expand); 610 611 // GPU doesn't have any counting operators 612 setOperationAction(ISD::CTPOP, VT, Expand); 613 setOperationAction(ISD::CTTZ, VT, Expand); 614 setOperationAction(ISD::CTLZ, VT, Expand); 615 } 616 617 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) 618 { 619 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 620 621 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 622 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 623 setOperationAction(ISD::SDIVREM, VT, Expand); 624 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 625 // setOperationAction(ISD::VSETCC, VT, Expand); 626 setOperationAction(ISD::SETCC, VT, Expand); 627 setOperationAction(ISD::SELECT_CC, VT, Expand); 628 setOperationAction(ISD::SELECT, VT, Expand); 629 630 } 631 if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) { 632 setOperationAction(ISD::MULHU, MVT::i64, Expand); 633 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 634 setOperationAction(ISD::MULHS, MVT::i64, Expand); 635 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 636 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 637 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 638 setOperationAction(ISD::Constant , MVT::i64 , Legal); 639 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 640 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 641 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 642 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 643 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 644 } 645 if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 646 // we support loading/storing v2f64 but not operations on the type 647 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 648 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 649 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 650 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 651 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 652 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 653 // We want to expand vector conversions into their scalar 654 // counterparts. 655 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 656 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 657 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 658 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 659 setOperationAction(ISD::FABS, MVT::f64, Expand); 660 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 661 } 662 // TODO: Fix the UDIV24 algorithm so it works for these 663 // types correctly. This needs vector comparisons 664 // for this to work correctly. 665 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 666 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 667 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 668 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 669 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 670 setOperationAction(ISD::SUBC, MVT::Other, Expand); 671 setOperationAction(ISD::ADDE, MVT::Other, Expand); 672 setOperationAction(ISD::ADDC, MVT::Other, Expand); 673 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 674 setOperationAction(ISD::BR_CC, MVT::Other, Custom); 675 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 676 setOperationAction(ISD::BRIND, MVT::Other, Expand); 677 setOperationAction(ISD::SETCC, MVT::Other, Custom); 678 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 679 680 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); 681 // Use the default implementation. 682 setOperationAction(ISD::VAARG , MVT::Other, Expand); 683 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 684 setOperationAction(ISD::VAEND , MVT::Other, Expand); 685 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 686 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 687 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 688 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 689 setOperationAction(ISD::Constant , MVT::i32 , Legal); 690 setOperationAction(ISD::TRAP , MVT::Other , Legal); 691 692 setStackPointerRegisterToSaveRestore(AMDIL::SP); 693 setSchedulingPreference(Sched::RegPressure); 694 setPow2DivIsCheap(false); 695 setPrefLoopAlignment(16); 696 setSelectIsExpensive(true); 697 setJumpIsExpensive(true); 698 699 maxStoresPerMemcpy = 4096; 700 maxStoresPerMemmove = 4096; 701 maxStoresPerMemset = 4096; 702 703#undef numTypes 704#undef numIntTypes 705#undef numVectorTypes 706#undef numFloatTypes 707} 708 709const char * 710AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const 711{ 712 switch (Opcode) { 713 default: return 0; 714 case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY"; 715 case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP"; 716 case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP"; 717 case AMDILISD::BITCONV: return "AMDILISD::BITCONV"; 718 case AMDILISD::CMOV: return "AMDILISD::CMOV"; 719 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; 720 case AMDILISD::INEGATE: return "AMDILISD::INEGATE"; 721 case AMDILISD::MAD: return "AMDILISD::MAD"; 722 case AMDILISD::UMAD: return "AMDILISD::UMAD"; 723 case AMDILISD::CALL: return "AMDILISD::CALL"; 724 case AMDILISD::RET: return "AMDILISD::RET"; 725 case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI"; 726 case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO"; 727 case AMDILISD::ADD: return "AMDILISD::ADD"; 728 case AMDILISD::UMUL: return "AMDILISD::UMUL"; 729 case AMDILISD::AND: return "AMDILISD::AND"; 730 case AMDILISD::OR: return "AMDILISD::OR"; 731 case AMDILISD::NOT: return "AMDILISD::NOT"; 732 case AMDILISD::XOR: return "AMDILISD::XOR"; 733 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; 734 case AMDILISD::SMAX: return "AMDILISD::SMAX"; 735 case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE"; 736 case AMDILISD::MOVE: return "AMDILISD::MOVE"; 737 case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; 738 case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT"; 739 case AMDILISD::VINSERT: return "AMDILISD::VINSERT"; 740 case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT"; 741 case AMDILISD::LCREATE: return "AMDILISD::LCREATE"; 742 case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI"; 743 case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO"; 744 case AMDILISD::DCREATE: return "AMDILISD::DCREATE"; 745 case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI"; 746 case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO"; 747 case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2"; 748 case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2"; 749 case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2"; 750 case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2"; 751 case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2"; 752 case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2"; 753 case AMDILISD::CMP: return "AMDILISD::CMP"; 754 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; 755 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; 756 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; 757 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; 758 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; 759 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; 760 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; 761 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; 762 case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO"; 763 case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO"; 764 case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP"; 765 case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR"; 766 case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD"; 767 case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND"; 768 case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG"; 769 case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC"; 770 case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC"; 771 case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX"; 772 case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX"; 773 case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN"; 774 case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN"; 775 case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR"; 776 case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB"; 777 case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB"; 778 case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG"; 779 case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR"; 780 case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET"; 781 case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET"; 782 case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET"; 783 case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET"; 784 case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET"; 785 case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET"; 786 case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET"; 787 case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET"; 788 case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET"; 789 case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET"; 790 case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET"; 791 case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET"; 792 case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET"; 793 case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET"; 794 case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD"; 795 case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND"; 796 case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG"; 797 case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC"; 798 case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC"; 799 case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX"; 800 case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX"; 801 case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN"; 802 case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN"; 803 case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR"; 804 case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB"; 805 case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB"; 806 case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG"; 807 case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR"; 808 case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET"; 809 case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET"; 810 case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET"; 811 case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET"; 812 case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET"; 813 case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET"; 814 case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET"; 815 case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET"; 816 case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET"; 817 case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET"; 818 case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET"; 819 case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET"; 820 case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET"; 821 case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD"; 822 case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND"; 823 case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG"; 824 case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC"; 825 case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC"; 826 case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX"; 827 case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX"; 828 case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN"; 829 case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN"; 830 case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR"; 831 case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR"; 832 case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB"; 833 case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB"; 834 case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG"; 835 case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR"; 836 case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET"; 837 case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET"; 838 case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET"; 839 case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET"; 840 case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET"; 841 case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET"; 842 case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET"; 843 case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET"; 844 case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET"; 845 case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET"; 846 case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET"; 847 case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET"; 848 case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET"; 849 case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET"; 850 case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET"; 851 case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC"; 852 case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET"; 853 case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME"; 854 case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET"; 855 case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ"; 856 case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE"; 857 case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0"; 858 case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1"; 859 case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ"; 860 case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE"; 861 case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0"; 862 case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1"; 863 864 }; 865} 866bool 867AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 868 const CallInst &I, unsigned Intrinsic) const 869{ 870 if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic 871 || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) { 872 return false; 873 } 874 bool bitCastToInt = false; 875 unsigned IntNo; 876 bool isRet = true; 877 const AMDILSubtarget *STM = &this->getTargetMachine() 878 .getSubtarget<AMDILSubtarget>(); 879 switch (Intrinsic) { 880 default: return false; // Don't custom lower most intrinsics. 881 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32: 882 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32: 883 IntNo = AMDILISD::ATOM_G_ADD; break; 884 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret: 885 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret: 886 isRet = false; 887 IntNo = AMDILISD::ATOM_G_ADD_NORET; break; 888 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32: 889 case AMDGPUIntrinsic::AMDIL_atomic_add_li32: 890 IntNo = AMDILISD::ATOM_L_ADD; break; 891 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret: 892 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret: 893 isRet = false; 894 IntNo = AMDILISD::ATOM_L_ADD_NORET; break; 895 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32: 896 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32: 897 IntNo = AMDILISD::ATOM_R_ADD; break; 898 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret: 899 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret: 900 isRet = false; 901 IntNo = AMDILISD::ATOM_R_ADD_NORET; break; 902 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32: 903 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32: 904 IntNo = AMDILISD::ATOM_G_AND; break; 905 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret: 906 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret: 907 isRet = false; 908 IntNo = AMDILISD::ATOM_G_AND_NORET; break; 909 case AMDGPUIntrinsic::AMDIL_atomic_and_li32: 910 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32: 911 IntNo = AMDILISD::ATOM_L_AND; break; 912 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret: 913 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret: 914 isRet = false; 915 IntNo = AMDILISD::ATOM_L_AND_NORET; break; 916 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32: 917 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32: 918 IntNo = AMDILISD::ATOM_R_AND; break; 919 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret: 920 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret: 921 isRet = false; 922 IntNo = AMDILISD::ATOM_R_AND_NORET; break; 923 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32: 924 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32: 925 IntNo = AMDILISD::ATOM_G_CMPXCHG; break; 926 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret: 927 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret: 928 isRet = false; 929 IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break; 930 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32: 931 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32: 932 IntNo = AMDILISD::ATOM_L_CMPXCHG; break; 933 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret: 934 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret: 935 isRet = false; 936 IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break; 937 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32: 938 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32: 939 IntNo = AMDILISD::ATOM_R_CMPXCHG; break; 940 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret: 941 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret: 942 isRet = false; 943 IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break; 944 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32: 945 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32: 946 if (STM->calVersion() >= CAL_VERSION_SC_136) { 947 IntNo = AMDILISD::ATOM_G_DEC; 948 } else { 949 IntNo = AMDILISD::ATOM_G_SUB; 950 } 951 break; 952 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret: 953 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret: 954 isRet = false; 955 if (STM->calVersion() >= CAL_VERSION_SC_136) { 956 IntNo = AMDILISD::ATOM_G_DEC_NORET; 957 } else { 958 IntNo = AMDILISD::ATOM_G_SUB_NORET; 959 } 960 break; 961 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32: 962 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32: 963 if (STM->calVersion() >= CAL_VERSION_SC_136) { 964 IntNo = AMDILISD::ATOM_L_DEC; 965 } else { 966 IntNo = AMDILISD::ATOM_L_SUB; 967 } 968 break; 969 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret: 970 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret: 971 isRet = false; 972 if (STM->calVersion() >= CAL_VERSION_SC_136) { 973 IntNo = AMDILISD::ATOM_L_DEC_NORET; 974 } else { 975 IntNo = AMDILISD::ATOM_L_SUB_NORET; 976 } 977 break; 978 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32: 979 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32: 980 if (STM->calVersion() >= CAL_VERSION_SC_136) { 981 IntNo = AMDILISD::ATOM_R_DEC; 982 } else { 983 IntNo = AMDILISD::ATOM_R_SUB; 984 } 985 break; 986 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret: 987 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret: 988 isRet = false; 989 if (STM->calVersion() >= CAL_VERSION_SC_136) { 990 IntNo = AMDILISD::ATOM_R_DEC_NORET; 991 } else { 992 IntNo = AMDILISD::ATOM_R_SUB_NORET; 993 } 994 break; 995 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32: 996 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32: 997 if (STM->calVersion() >= CAL_VERSION_SC_136) { 998 IntNo = AMDILISD::ATOM_G_INC; 999 } else { 1000 IntNo = AMDILISD::ATOM_G_ADD; 1001 } 1002 break; 1003 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret: 1004 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret: 1005 isRet = false; 1006 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1007 IntNo = AMDILISD::ATOM_G_INC_NORET; 1008 } else { 1009 IntNo = AMDILISD::ATOM_G_ADD_NORET; 1010 } 1011 break; 1012 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32: 1013 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32: 1014 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1015 IntNo = AMDILISD::ATOM_L_INC; 1016 } else { 1017 IntNo = AMDILISD::ATOM_L_ADD; 1018 } 1019 break; 1020 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret: 1021 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret: 1022 isRet = false; 1023 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1024 IntNo = AMDILISD::ATOM_L_INC_NORET; 1025 } else { 1026 IntNo = AMDILISD::ATOM_L_ADD_NORET; 1027 } 1028 break; 1029 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32: 1030 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32: 1031 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1032 IntNo = AMDILISD::ATOM_R_INC; 1033 } else { 1034 IntNo = AMDILISD::ATOM_R_ADD; 1035 } 1036 break; 1037 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret: 1038 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret: 1039 isRet = false; 1040 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1041 IntNo = AMDILISD::ATOM_R_INC_NORET; 1042 } else { 1043 IntNo = AMDILISD::ATOM_R_ADD_NORET; 1044 } 1045 break; 1046 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32: 1047 IntNo = AMDILISD::ATOM_G_MAX; break; 1048 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32: 1049 IntNo = AMDILISD::ATOM_G_UMAX; break; 1050 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret: 1051 isRet = false; 1052 IntNo = AMDILISD::ATOM_G_MAX_NORET; break; 1053 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret: 1054 isRet = false; 1055 IntNo = AMDILISD::ATOM_G_UMAX_NORET; break; 1056 case AMDGPUIntrinsic::AMDIL_atomic_max_li32: 1057 IntNo = AMDILISD::ATOM_L_MAX; break; 1058 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32: 1059 IntNo = AMDILISD::ATOM_L_UMAX; break; 1060 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret: 1061 isRet = false; 1062 IntNo = AMDILISD::ATOM_L_MAX_NORET; break; 1063 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret: 1064 isRet = false; 1065 IntNo = AMDILISD::ATOM_L_UMAX_NORET; break; 1066 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32: 1067 IntNo = AMDILISD::ATOM_R_MAX; break; 1068 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32: 1069 IntNo = AMDILISD::ATOM_R_UMAX; break; 1070 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret: 1071 isRet = false; 1072 IntNo = AMDILISD::ATOM_R_MAX_NORET; break; 1073 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret: 1074 isRet = false; 1075 IntNo = AMDILISD::ATOM_R_UMAX_NORET; break; 1076 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32: 1077 IntNo = AMDILISD::ATOM_G_MIN; break; 1078 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32: 1079 IntNo = AMDILISD::ATOM_G_UMIN; break; 1080 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret: 1081 isRet = false; 1082 IntNo = AMDILISD::ATOM_G_MIN_NORET; break; 1083 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret: 1084 isRet = false; 1085 IntNo = AMDILISD::ATOM_G_UMIN_NORET; break; 1086 case AMDGPUIntrinsic::AMDIL_atomic_min_li32: 1087 IntNo = AMDILISD::ATOM_L_MIN; break; 1088 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32: 1089 IntNo = AMDILISD::ATOM_L_UMIN; break; 1090 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret: 1091 isRet = false; 1092 IntNo = AMDILISD::ATOM_L_MIN_NORET; break; 1093 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret: 1094 isRet = false; 1095 IntNo = AMDILISD::ATOM_L_UMIN_NORET; break; 1096 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32: 1097 IntNo = AMDILISD::ATOM_R_MIN; break; 1098 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32: 1099 IntNo = AMDILISD::ATOM_R_UMIN; break; 1100 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret: 1101 isRet = false; 1102 IntNo = AMDILISD::ATOM_R_MIN_NORET; break; 1103 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret: 1104 isRet = false; 1105 IntNo = AMDILISD::ATOM_R_UMIN_NORET; break; 1106 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32: 1107 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32: 1108 IntNo = AMDILISD::ATOM_G_OR; break; 1109 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret: 1110 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret: 1111 isRet = false; 1112 IntNo = AMDILISD::ATOM_G_OR_NORET; break; 1113 case AMDGPUIntrinsic::AMDIL_atomic_or_li32: 1114 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32: 1115 IntNo = AMDILISD::ATOM_L_OR; break; 1116 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret: 1117 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret: 1118 isRet = false; 1119 IntNo = AMDILISD::ATOM_L_OR_NORET; break; 1120 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32: 1121 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32: 1122 IntNo = AMDILISD::ATOM_R_OR; break; 1123 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret: 1124 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret: 1125 isRet = false; 1126 IntNo = AMDILISD::ATOM_R_OR_NORET; break; 1127 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32: 1128 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32: 1129 IntNo = AMDILISD::ATOM_G_SUB; break; 1130 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret: 1131 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret: 1132 isRet = false; 1133 IntNo = AMDILISD::ATOM_G_SUB_NORET; break; 1134 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32: 1135 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32: 1136 IntNo = AMDILISD::ATOM_L_SUB; break; 1137 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret: 1138 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret: 1139 isRet = false; 1140 IntNo = AMDILISD::ATOM_L_SUB_NORET; break; 1141 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32: 1142 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32: 1143 IntNo = AMDILISD::ATOM_R_SUB; break; 1144 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret: 1145 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret: 1146 isRet = false; 1147 IntNo = AMDILISD::ATOM_R_SUB_NORET; break; 1148 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32: 1149 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32: 1150 IntNo = AMDILISD::ATOM_G_RSUB; break; 1151 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret: 1152 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret: 1153 isRet = false; 1154 IntNo = AMDILISD::ATOM_G_RSUB_NORET; break; 1155 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32: 1156 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32: 1157 IntNo = AMDILISD::ATOM_L_RSUB; break; 1158 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret: 1159 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret: 1160 isRet = false; 1161 IntNo = AMDILISD::ATOM_L_RSUB_NORET; break; 1162 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32: 1163 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32: 1164 IntNo = AMDILISD::ATOM_R_RSUB; break; 1165 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret: 1166 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret: 1167 isRet = false; 1168 IntNo = AMDILISD::ATOM_R_RSUB_NORET; break; 1169 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32: 1170 bitCastToInt = true; 1171 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32: 1172 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32: 1173 IntNo = AMDILISD::ATOM_G_XCHG; break; 1174 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret: 1175 bitCastToInt = true; 1176 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret: 1177 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret: 1178 isRet = false; 1179 IntNo = AMDILISD::ATOM_G_XCHG_NORET; break; 1180 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32: 1181 bitCastToInt = true; 1182 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32: 1183 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32: 1184 IntNo = AMDILISD::ATOM_L_XCHG; break; 1185 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret: 1186 bitCastToInt = true; 1187 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret: 1188 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret: 1189 isRet = false; 1190 IntNo = AMDILISD::ATOM_L_XCHG_NORET; break; 1191 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32: 1192 bitCastToInt = true; 1193 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32: 1194 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32: 1195 IntNo = AMDILISD::ATOM_R_XCHG; break; 1196 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret: 1197 bitCastToInt = true; 1198 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret: 1199 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret: 1200 isRet = false; 1201 IntNo = AMDILISD::ATOM_R_XCHG_NORET; break; 1202 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32: 1203 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32: 1204 IntNo = AMDILISD::ATOM_G_XOR; break; 1205 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret: 1206 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret: 1207 isRet = false; 1208 IntNo = AMDILISD::ATOM_G_XOR_NORET; break; 1209 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32: 1210 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32: 1211 IntNo = AMDILISD::ATOM_L_XOR; break; 1212 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret: 1213 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret: 1214 isRet = false; 1215 IntNo = AMDILISD::ATOM_L_XOR_NORET; break; 1216 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32: 1217 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32: 1218 IntNo = AMDILISD::ATOM_R_XOR; break; 1219 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret: 1220 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret: 1221 isRet = false; 1222 IntNo = AMDILISD::ATOM_R_XOR_NORET; break; 1223 case AMDGPUIntrinsic::AMDIL_append_alloc_i32: 1224 IntNo = AMDILISD::APPEND_ALLOC; break; 1225 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret: 1226 isRet = false; 1227 IntNo = AMDILISD::APPEND_ALLOC_NORET; break; 1228 case AMDGPUIntrinsic::AMDIL_append_consume_i32: 1229 IntNo = AMDILISD::APPEND_CONSUME; break; 1230 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret: 1231 isRet = false; 1232 IntNo = AMDILISD::APPEND_CONSUME_NORET; break; 1233 }; 1234 1235 Info.opc = IntNo; 1236 Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32; 1237 Info.ptrVal = I.getOperand(0); 1238 Info.offset = 0; 1239 Info.align = 4; 1240 Info.vol = true; 1241 Info.readMem = isRet; 1242 Info.writeMem = true; 1243 return true; 1244} 1245// The backend supports 32 and 64 bit floating point immediates 1246bool 1247AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const 1248{ 1249 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1250 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1251 return true; 1252 } else { 1253 return false; 1254 } 1255} 1256 1257bool 1258AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const 1259{ 1260 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1261 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1262 return false; 1263 } else { 1264 return true; 1265 } 1266} 1267 1268 1269// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 1270// be zero. Op is expected to be a target specific node. Used by DAG 1271// combiner. 1272 1273void 1274AMDILTargetLowering::computeMaskedBitsForTargetNode( 1275 const SDValue Op, 1276 APInt &KnownZero, 1277 APInt &KnownOne, 1278 const SelectionDAG &DAG, 1279 unsigned Depth) const 1280{ 1281 APInt KnownZero2; 1282 APInt KnownOne2; 1283 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 1284 switch (Op.getOpcode()) { 1285 default: break; 1286 case AMDILISD::SELECT_CC: 1287 DAG.ComputeMaskedBits( 1288 Op.getOperand(1), 1289 KnownZero, 1290 KnownOne, 1291 Depth + 1 1292 ); 1293 DAG.ComputeMaskedBits( 1294 Op.getOperand(0), 1295 KnownZero2, 1296 KnownOne2 1297 ); 1298 assert((KnownZero & KnownOne) == 0 1299 && "Bits known to be one AND zero?"); 1300 assert((KnownZero2 & KnownOne2) == 0 1301 && "Bits known to be one AND zero?"); 1302 // Only known if known in both the LHS and RHS 1303 KnownOne &= KnownOne2; 1304 KnownZero &= KnownZero2; 1305 break; 1306 }; 1307} 1308 1309// This is the function that determines which calling convention should 1310// be used. Currently there is only one calling convention 1311CCAssignFn* 1312AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const 1313{ 1314 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1315 return CC_AMDIL32; 1316} 1317 1318// LowerCallResult - Lower the result values of an ISD::CALL into the 1319// appropriate copies out of appropriate physical registers. This assumes that 1320// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 1321// being lowered. The returns a SDNode with the same number of values as the 1322// ISD::CALL. 1323SDValue 1324AMDILTargetLowering::LowerCallResult( 1325 SDValue Chain, 1326 SDValue InFlag, 1327 CallingConv::ID CallConv, 1328 bool isVarArg, 1329 const SmallVectorImpl<ISD::InputArg> &Ins, 1330 DebugLoc dl, 1331 SelectionDAG &DAG, 1332 SmallVectorImpl<SDValue> &InVals) const 1333{ 1334 // Assign locations to each value returned by this call 1335 SmallVector<CCValAssign, 16> RVLocs; 1336 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1337 getTargetMachine(), RVLocs, *DAG.getContext()); 1338 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); 1339 1340 // Copy all of the result registers out of their specified physreg. 1341 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1342 EVT CopyVT = RVLocs[i].getValVT(); 1343 if (RVLocs[i].isRegLoc()) { 1344 Chain = DAG.getCopyFromReg( 1345 Chain, 1346 dl, 1347 RVLocs[i].getLocReg(), 1348 CopyVT, 1349 InFlag 1350 ).getValue(1); 1351 SDValue Val = Chain.getValue(0); 1352 InFlag = Chain.getValue(2); 1353 InVals.push_back(Val); 1354 } 1355 } 1356 1357 return Chain; 1358 1359} 1360 1361//===----------------------------------------------------------------------===// 1362// Other Lowering Hooks 1363//===----------------------------------------------------------------------===// 1364 1365// Recursively assign SDNodeOrdering to any unordered nodes 1366// This is necessary to maintain source ordering of instructions 1367// under -O0 to avoid odd-looking "skipping around" issues. 1368 static const SDValue 1369Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) 1370{ 1371 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { 1372 DAG.AssignOrdering( New.getNode(), order ); 1373 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) 1374 Ordered( DAG, order, New.getOperand(i) ); 1375 } 1376 return New; 1377} 1378 1379#define LOWER(A) \ 1380 case ISD:: A: \ 1381return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) 1382 1383SDValue 1384AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 1385{ 1386 switch (Op.getOpcode()) { 1387 default: 1388 Op.getNode()->dump(); 1389 assert(0 && "Custom lowering code for this" 1390 "instruction is not implemented yet!"); 1391 break; 1392 LOWER(GlobalAddress); 1393 LOWER(JumpTable); 1394 LOWER(ConstantPool); 1395 LOWER(ExternalSymbol); 1396 LOWER(SDIV); 1397 LOWER(SREM); 1398 LOWER(BUILD_VECTOR); 1399 LOWER(SELECT); 1400 LOWER(SETCC); 1401 LOWER(SIGN_EXTEND_INREG); 1402 LOWER(DYNAMIC_STACKALLOC); 1403 LOWER(BRCOND); 1404 LOWER(BR_CC); 1405 } 1406 return Op; 1407} 1408 1409#undef LOWER 1410 1411SDValue 1412AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const 1413{ 1414 SDValue DST = Op; 1415 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); 1416 const GlobalValue *G = GADN->getGlobal(); 1417 DebugLoc DL = Op.getDebugLoc(); 1418 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); 1419 if (!GV) { 1420 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 1421 } else { 1422 if (GV->hasInitializer()) { 1423 const Constant *C = dyn_cast<Constant>(GV->getInitializer()); 1424 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 1425 DST = DAG.getConstant(CI->getValue(), Op.getValueType()); 1426 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { 1427 DST = DAG.getConstantFP(CF->getValueAPF(), 1428 Op.getValueType()); 1429 } else if (dyn_cast<ConstantAggregateZero>(C)) { 1430 EVT VT = Op.getValueType(); 1431 if (VT.isInteger()) { 1432 DST = DAG.getConstant(0, VT); 1433 } else { 1434 DST = DAG.getConstantFP(0, VT); 1435 } 1436 } else { 1437 assert(!"lowering this type of Global Address " 1438 "not implemented yet!"); 1439 C->dump(); 1440 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 1441 } 1442 } else { 1443 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 1444 } 1445 } 1446 return DST; 1447} 1448 1449SDValue 1450AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const 1451{ 1452 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1453 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); 1454 return Result; 1455} 1456SDValue 1457AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const 1458{ 1459 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1460 EVT PtrVT = Op.getValueType(); 1461 SDValue Result; 1462 if (CP->isMachineConstantPoolEntry()) { 1463 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1464 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 1465 } else { 1466 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1467 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 1468 } 1469 return Result; 1470} 1471 1472SDValue 1473AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const 1474{ 1475 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 1476 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); 1477 return Result; 1478} 1479 1480/// LowerFORMAL_ARGUMENTS - transform physical registers into 1481/// virtual registers and generate load operations for 1482/// arguments places on the stack. 1483/// TODO: isVarArg, hasStructRet, isMemReg 1484 SDValue 1485AMDILTargetLowering::LowerFormalArguments(SDValue Chain, 1486 CallingConv::ID CallConv, 1487 bool isVarArg, 1488 const SmallVectorImpl<ISD::InputArg> &Ins, 1489 DebugLoc dl, 1490 SelectionDAG &DAG, 1491 SmallVectorImpl<SDValue> &InVals) 1492const 1493{ 1494 1495 MachineFunction &MF = DAG.getMachineFunction(); 1496 MachineFrameInfo *MFI = MF.getFrameInfo(); 1497 //const Function *Fn = MF.getFunction(); 1498 //MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1499 1500 SmallVector<CCValAssign, 16> ArgLocs; 1501 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 1502 //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); 1503 1504 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), 1505 getTargetMachine(), ArgLocs, *DAG.getContext()); 1506 1507 // When more calling conventions are added, they need to be chosen here 1508 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); 1509 SDValue StackPtr; 1510 1511 //unsigned int FirstStackArgLoc = 0; 1512 1513 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 1514 CCValAssign &VA = ArgLocs[i]; 1515 if (VA.isRegLoc()) { 1516 EVT RegVT = VA.getLocVT(); 1517 const TargetRegisterClass *RC = getRegClassFromType( 1518 RegVT.getSimpleVT().SimpleTy); 1519 1520 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); 1521 SDValue ArgValue = DAG.getCopyFromReg( 1522 Chain, 1523 dl, 1524 Reg, 1525 RegVT); 1526 // If this is an 8 or 16-bit value, it is really passed 1527 // promoted to 32 bits. Insert an assert[sz]ext to capture 1528 // this, then truncate to the right size. 1529 1530 if (VA.getLocInfo() == CCValAssign::SExt) { 1531 ArgValue = DAG.getNode( 1532 ISD::AssertSext, 1533 dl, 1534 RegVT, 1535 ArgValue, 1536 DAG.getValueType(VA.getValVT())); 1537 } else if (VA.getLocInfo() == CCValAssign::ZExt) { 1538 ArgValue = DAG.getNode( 1539 ISD::AssertZext, 1540 dl, 1541 RegVT, 1542 ArgValue, 1543 DAG.getValueType(VA.getValVT())); 1544 } 1545 if (VA.getLocInfo() != CCValAssign::Full) { 1546 ArgValue = DAG.getNode( 1547 ISD::TRUNCATE, 1548 dl, 1549 VA.getValVT(), 1550 ArgValue); 1551 } 1552 // Add the value to the list of arguments 1553 // to be passed in registers 1554 InVals.push_back(ArgValue); 1555 if (isVarArg) { 1556 assert(0 && "Variable arguments are not yet supported"); 1557 // See MipsISelLowering.cpp for ideas on how to implement 1558 } 1559 } else if(VA.isMemLoc()) { 1560 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, 1561 dl, DAG, VA, MFI, i)); 1562 } else { 1563 assert(0 && "found a Value Assign that is " 1564 "neither a register or a memory location"); 1565 } 1566 } 1567 /*if (hasStructRet) { 1568 assert(0 && "Has struct return is not yet implemented"); 1569 // See MipsISelLowering.cpp for ideas on how to implement 1570 }*/ 1571 1572 if (isVarArg) { 1573 assert(0 && "Variable arguments are not yet supported"); 1574 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement 1575 } 1576 // This needs to be changed to non-zero if the return function needs 1577 // to pop bytes 1578 return Chain; 1579} 1580/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1581/// by "Src" to address "Dst" with size and alignment information specified by 1582/// the specific parameter attribute. The copy will be passed as a byval 1583/// function parameter. 1584static SDValue 1585CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1586 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 1587 assert(0 && "MemCopy does not exist yet"); 1588 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1589 1590 return DAG.getMemcpy(Chain, 1591 Src.getDebugLoc(), 1592 Dst, Src, SizeNode, Flags.getByValAlign(), 1593 /*IsVol=*/false, /*AlwaysInline=*/true, 1594 MachinePointerInfo(), MachinePointerInfo()); 1595} 1596 1597SDValue 1598AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, 1599 SDValue StackPtr, SDValue Arg, 1600 DebugLoc dl, SelectionDAG &DAG, 1601 const CCValAssign &VA, 1602 ISD::ArgFlagsTy Flags) const 1603{ 1604 unsigned int LocMemOffset = VA.getLocMemOffset(); 1605 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1606 PtrOff = DAG.getNode(ISD::ADD, 1607 dl, 1608 getPointerTy(), StackPtr, PtrOff); 1609 if (Flags.isByVal()) { 1610 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1611 } else { 1612 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, 1613 MachinePointerInfo::getStack(LocMemOffset), 1614 false, false, 0); 1615 } 1616 return PtrOff; 1617} 1618/// LowerCAL - functions arguments are copied from virtual 1619/// regs to (physical regs)/(stack frame), CALLSEQ_START and 1620/// CALLSEQ_END are emitted. 1621/// TODO: isVarArg, isTailCall, hasStructRet 1622SDValue 1623AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1624 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, 1625 bool& isTailCall, 1626 const SmallVectorImpl<ISD::OutputArg> &Outs, 1627 const SmallVectorImpl<SDValue> &OutVals, 1628 const SmallVectorImpl<ISD::InputArg> &Ins, 1629 DebugLoc dl, SelectionDAG &DAG, 1630 SmallVectorImpl<SDValue> &InVals) 1631const 1632{ 1633 isTailCall = false; 1634 MachineFunction& MF = DAG.getMachineFunction(); 1635 // FIXME: DO we need to handle fast calling conventions and tail call 1636 // optimizations?? X86/PPC ISelLowering 1637 /*bool hasStructRet = (TheCall->getNumArgs()) 1638 ? TheCall->getArgFlags(0).device()->isSRet() 1639 : false;*/ 1640 1641 MachineFrameInfo *MFI = MF.getFrameInfo(); 1642 1643 // Analyze operands of the call, assigning locations to each operand 1644 SmallVector<CCValAssign, 16> ArgLocs; 1645 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1646 getTargetMachine(), ArgLocs, *DAG.getContext()); 1647 // Analyize the calling operands, but need to change 1648 // if we have more than one calling convetion 1649 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); 1650 1651 unsigned int NumBytes = CCInfo.getNextStackOffset(); 1652 if (isTailCall) { 1653 assert(isTailCall && "Tail Call not handled yet!"); 1654 // See X86/PPC ISelLowering 1655 } 1656 1657 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1658 1659 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass; 1660 SmallVector<SDValue, 8> MemOpChains; 1661 SDValue StackPtr; 1662 //unsigned int FirstStacArgLoc = 0; 1663 //int LastArgStackLoc = 0; 1664 1665 // Walk the register/memloc assignments, insert copies/loads 1666 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 1667 CCValAssign &VA = ArgLocs[i]; 1668 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers 1669 // Arguments start after the 5 first operands of ISD::CALL 1670 SDValue Arg = OutVals[i]; 1671 //Promote the value if needed 1672 switch(VA.getLocInfo()) { 1673 default: assert(0 && "Unknown loc info!"); 1674 case CCValAssign::Full: 1675 break; 1676 case CCValAssign::SExt: 1677 Arg = DAG.getNode(ISD::SIGN_EXTEND, 1678 dl, 1679 VA.getLocVT(), Arg); 1680 break; 1681 case CCValAssign::ZExt: 1682 Arg = DAG.getNode(ISD::ZERO_EXTEND, 1683 dl, 1684 VA.getLocVT(), Arg); 1685 break; 1686 case CCValAssign::AExt: 1687 Arg = DAG.getNode(ISD::ANY_EXTEND, 1688 dl, 1689 VA.getLocVT(), Arg); 1690 break; 1691 } 1692 1693 if (VA.isRegLoc()) { 1694 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1695 } else if (VA.isMemLoc()) { 1696 // Create the frame index object for this incoming parameter 1697 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 1698 VA.getLocMemOffset(), true); 1699 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); 1700 1701 // emit ISD::STORE whichs stores the 1702 // parameter value to a stack Location 1703 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 1704 MachinePointerInfo::getFixedStack(FI), 1705 false, false, 0)); 1706 } else { 1707 assert(0 && "Not a Reg/Mem Loc, major error!"); 1708 } 1709 } 1710 if (!MemOpChains.empty()) { 1711 Chain = DAG.getNode(ISD::TokenFactor, 1712 dl, 1713 MVT::Other, 1714 &MemOpChains[0], 1715 MemOpChains.size()); 1716 } 1717 SDValue InFlag; 1718 if (!isTailCall) { 1719 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 1720 Chain = DAG.getCopyToReg(Chain, 1721 dl, 1722 RegsToPass[i].first, 1723 RegsToPass[i].second, 1724 InFlag); 1725 InFlag = Chain.getValue(1); 1726 } 1727 } 1728 1729 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 1730 // every direct call is) turn it into a TargetGlobalAddress/ 1731 // TargetExternalSymbol 1732 // node so that legalize doesn't hack it. 1733 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1734 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); 1735 } 1736 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1737 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1738 } 1739 else if (isTailCall) { 1740 assert(0 && "Tail calls are not handled yet"); 1741 // see X86 ISelLowering for ideas on implementation: 1708 1742 } 1743 1744 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE); 1745 SmallVector<SDValue, 8> Ops; 1746 1747 if (isTailCall) { 1748 assert(0 && "Tail calls are not handled yet"); 1749 // see X86 ISelLowering for ideas on implementation: 1721 1750 } 1751 // If this is a direct call, pass the chain and the callee 1752 if (Callee.getNode()) { 1753 Ops.push_back(Chain); 1754 Ops.push_back(Callee); 1755 } 1756 1757 if (isTailCall) { 1758 assert(0 && "Tail calls are not handled yet"); 1759 // see X86 ISelLowering for ideas on implementation: 1739 1760 } 1761 1762 // Add argument registers to the end of the list so that they are known 1763 // live into the call 1764 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 1765 Ops.push_back(DAG.getRegister( 1766 RegsToPass[i].first, 1767 RegsToPass[i].second.getValueType())); 1768 } 1769 if (InFlag.getNode()) { 1770 Ops.push_back(InFlag); 1771 } 1772 1773 // Emit Tail Call 1774 if (isTailCall) { 1775 assert(0 && "Tail calls are not handled yet"); 1776 // see X86 ISelLowering for ideas on implementation: 1762 1777 } 1778 1779 Chain = DAG.getNode(AMDILISD::CALL, 1780 dl, 1781 NodeTys, &Ops[0], Ops.size()); 1782 InFlag = Chain.getValue(1); 1783 1784 // Create the CALLSEQ_END node 1785 Chain = DAG.getCALLSEQ_END( 1786 Chain, 1787 DAG.getIntPtrConstant(NumBytes, true), 1788 DAG.getIntPtrConstant(0, true), 1789 InFlag); 1790 InFlag = Chain.getValue(1); 1791 // Handle result values, copying them out of physregs into vregs that 1792 // we return 1793 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 1794 InVals); 1795} 1796 1797SDValue 1798AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const 1799{ 1800 EVT OVT = Op.getValueType(); 1801 SDValue DST; 1802 if (OVT.getScalarType() == MVT::i64) { 1803 DST = LowerSDIV64(Op, DAG); 1804 } else if (OVT.getScalarType() == MVT::i32) { 1805 DST = LowerSDIV32(Op, DAG); 1806 } else if (OVT.getScalarType() == MVT::i16 1807 || OVT.getScalarType() == MVT::i8) { 1808 DST = LowerSDIV24(Op, DAG); 1809 } else { 1810 DST = SDValue(Op.getNode(), 0); 1811 } 1812 return DST; 1813} 1814 1815SDValue 1816AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const 1817{ 1818 EVT OVT = Op.getValueType(); 1819 SDValue DST; 1820 if (OVT.getScalarType() == MVT::i64) { 1821 DST = LowerSREM64(Op, DAG); 1822 } else if (OVT.getScalarType() == MVT::i32) { 1823 DST = LowerSREM32(Op, DAG); 1824 } else if (OVT.getScalarType() == MVT::i16) { 1825 DST = LowerSREM16(Op, DAG); 1826 } else if (OVT.getScalarType() == MVT::i8) { 1827 DST = LowerSREM8(Op, DAG); 1828 } else { 1829 DST = SDValue(Op.getNode(), 0); 1830 } 1831 return DST; 1832} 1833 1834SDValue 1835AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const 1836{ 1837 EVT VT = Op.getValueType(); 1838 SDValue Nodes1; 1839 SDValue second; 1840 SDValue third; 1841 SDValue fourth; 1842 DebugLoc DL = Op.getDebugLoc(); 1843 Nodes1 = DAG.getNode(AMDILISD::VBUILD, 1844 DL, 1845 VT, Op.getOperand(0)); 1846#if 0 1847 bool allEqual = true; 1848 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { 1849 if (Op.getOperand(0) != Op.getOperand(x)) { 1850 allEqual = false; 1851 break; 1852 } 1853 } 1854 if (allEqual) { 1855 return Nodes1; 1856 } 1857#endif 1858 switch(Op.getNumOperands()) { 1859 default: 1860 case 1: 1861 break; 1862 case 4: 1863 fourth = Op.getOperand(3); 1864 if (fourth.getOpcode() != ISD::UNDEF) { 1865 Nodes1 = DAG.getNode( 1866 ISD::INSERT_VECTOR_ELT, 1867 DL, 1868 Op.getValueType(), 1869 Nodes1, 1870 fourth, 1871 DAG.getConstant(7, MVT::i32)); 1872 } 1873 case 3: 1874 third = Op.getOperand(2); 1875 if (third.getOpcode() != ISD::UNDEF) { 1876 Nodes1 = DAG.getNode( 1877 ISD::INSERT_VECTOR_ELT, 1878 DL, 1879 Op.getValueType(), 1880 Nodes1, 1881 third, 1882 DAG.getConstant(6, MVT::i32)); 1883 } 1884 case 2: 1885 second = Op.getOperand(1); 1886 if (second.getOpcode() != ISD::UNDEF) { 1887 Nodes1 = DAG.getNode( 1888 ISD::INSERT_VECTOR_ELT, 1889 DL, 1890 Op.getValueType(), 1891 Nodes1, 1892 second, 1893 DAG.getConstant(5, MVT::i32)); 1894 } 1895 break; 1896 }; 1897 return Nodes1; 1898} 1899 1900SDValue 1901AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const 1902{ 1903 SDValue Cond = Op.getOperand(0); 1904 SDValue LHS = Op.getOperand(1); 1905 SDValue RHS = Op.getOperand(2); 1906 DebugLoc DL = Op.getDebugLoc(); 1907 Cond = getConversionNode(DAG, Cond, Op, true); 1908 Cond = DAG.getNode(AMDILISD::CMOVLOG, 1909 DL, 1910 Op.getValueType(), Cond, LHS, RHS); 1911 return Cond; 1912} 1913SDValue 1914AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 1915{ 1916 SDValue Cond; 1917 SDValue LHS = Op.getOperand(0); 1918 SDValue RHS = Op.getOperand(1); 1919 SDValue CC = Op.getOperand(2); 1920 DebugLoc DL = Op.getDebugLoc(); 1921 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 1922 unsigned int AMDILCC = CondCCodeToCC( 1923 SetCCOpcode, 1924 LHS.getValueType().getSimpleVT().SimpleTy); 1925 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 1926 Cond = DAG.getNode( 1927 ISD::SELECT_CC, 1928 Op.getDebugLoc(), 1929 LHS.getValueType(), 1930 LHS, RHS, 1931 DAG.getConstant(-1, MVT::i32), 1932 DAG.getConstant(0, MVT::i32), 1933 CC); 1934 Cond = getConversionNode(DAG, Cond, Op, true); 1935 Cond = DAG.getNode( 1936 ISD::AND, 1937 DL, 1938 Cond.getValueType(), 1939 DAG.getConstant(1, Cond.getValueType()), 1940 Cond); 1941 return Cond; 1942} 1943 1944SDValue 1945AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const 1946{ 1947 SDValue Data = Op.getOperand(0); 1948 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 1949 DebugLoc DL = Op.getDebugLoc(); 1950 EVT DVT = Data.getValueType(); 1951 EVT BVT = BaseType->getVT(); 1952 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 1953 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 1954 unsigned shiftBits = srcBits - baseBits; 1955 if (srcBits < 32) { 1956 // If the op is less than 32 bits, then it needs to extend to 32bits 1957 // so it can properly keep the upper bits valid. 1958 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 1959 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 1960 shiftBits = 32 - baseBits; 1961 DVT = IVT; 1962 } 1963 SDValue Shift = DAG.getConstant(shiftBits, DVT); 1964 // Shift left by 'Shift' bits. 1965 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 1966 // Signed shift Right by 'Shift' bits. 1967 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 1968 if (srcBits < 32) { 1969 // Once the sign extension is done, the op needs to be converted to 1970 // its original type. 1971 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 1972 } 1973 return Data; 1974} 1975EVT 1976AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const 1977{ 1978 int iSize = (size * numEle); 1979 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 1980 if (!vEle) { 1981 vEle = 1; 1982 } 1983 if (size == 64) { 1984 if (vEle == 1) { 1985 return EVT(MVT::i64); 1986 } else { 1987 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 1988 } 1989 } else { 1990 if (vEle == 1) { 1991 return EVT(MVT::i32); 1992 } else { 1993 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 1994 } 1995 } 1996} 1997 1998SDValue 1999AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 2000 SelectionDAG &DAG) const 2001{ 2002 SDValue Chain = Op.getOperand(0); 2003 SDValue Size = Op.getOperand(1); 2004 unsigned int SPReg = AMDIL::SP; 2005 DebugLoc DL = Op.getDebugLoc(); 2006 SDValue SP = DAG.getCopyFromReg(Chain, 2007 DL, 2008 SPReg, MVT::i32); 2009 SDValue NewSP = DAG.getNode(ISD::ADD, 2010 DL, 2011 MVT::i32, SP, Size); 2012 Chain = DAG.getCopyToReg(SP.getValue(1), 2013 DL, 2014 SPReg, NewSP); 2015 SDValue Ops[2] = {NewSP, Chain}; 2016 Chain = DAG.getMergeValues(Ops, 2 ,DL); 2017 return Chain; 2018} 2019SDValue 2020AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const 2021{ 2022 SDValue Chain = Op.getOperand(0); 2023 SDValue Cond = Op.getOperand(1); 2024 SDValue Jump = Op.getOperand(2); 2025 SDValue Result; 2026 Result = DAG.getNode( 2027 AMDILISD::BRANCH_COND, 2028 Op.getDebugLoc(), 2029 Op.getValueType(), 2030 Chain, Jump, Cond); 2031 return Result; 2032} 2033 2034SDValue 2035AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 2036{ 2037 SDValue Chain = Op.getOperand(0); 2038 SDValue CC = Op.getOperand(1); 2039 SDValue LHS = Op.getOperand(2); 2040 SDValue RHS = Op.getOperand(3); 2041 SDValue JumpT = Op.getOperand(4); 2042 SDValue CmpValue; 2043 SDValue Result; 2044 CmpValue = DAG.getNode( 2045 ISD::SELECT_CC, 2046 Op.getDebugLoc(), 2047 LHS.getValueType(), 2048 LHS, RHS, 2049 DAG.getConstant(-1, MVT::i32), 2050 DAG.getConstant(0, MVT::i32), 2051 CC); 2052 Result = DAG.getNode( 2053 AMDILISD::BRANCH_COND, 2054 CmpValue.getDebugLoc(), 2055 MVT::Other, Chain, 2056 JumpT, CmpValue); 2057 return Result; 2058} 2059 2060// LowerRET - Lower an ISD::RET node. 2061SDValue 2062AMDILTargetLowering::LowerReturn(SDValue Chain, 2063 CallingConv::ID CallConv, bool isVarArg, 2064 const SmallVectorImpl<ISD::OutputArg> &Outs, 2065 const SmallVectorImpl<SDValue> &OutVals, 2066 DebugLoc dl, SelectionDAG &DAG) 2067const 2068{ 2069 //MachineFunction& MF = DAG.getMachineFunction(); 2070 // CCValAssign - represent the assignment of the return value 2071 // to a location 2072 SmallVector<CCValAssign, 16> RVLocs; 2073 2074 // CCState - Info about the registers and stack slot 2075 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2076 getTargetMachine(), RVLocs, *DAG.getContext()); 2077 2078 // Analyze return values of ISD::RET 2079 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); 2080 // If this is the first return lowered for this function, add 2081 // the regs to the liveout set for the function 2082 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 2083 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 2084 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { 2085 MRI.addLiveOut(RVLocs[i].getLocReg()); 2086 } 2087 } 2088 // FIXME: implement this when tail call is implemented 2089 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); 2090 // both x86 and ppc implement this in ISelLowering 2091 2092 // Regular return here 2093 SDValue Flag; 2094 SmallVector<SDValue, 6> RetOps; 2095 RetOps.push_back(Chain); 2096 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); 2097 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 2098 CCValAssign &VA = RVLocs[i]; 2099 SDValue ValToCopy = OutVals[i]; 2100 assert(VA.isRegLoc() && "Can only return in registers!"); 2101 // ISD::Ret => ret chain, (regnum1, val1), ... 2102 // So i * 2 + 1 index only the regnums 2103 Chain = DAG.getCopyToReg(Chain, 2104 dl, 2105 VA.getLocReg(), 2106 ValToCopy, 2107 Flag); 2108 // guarantee that all emitted copies are stuck together 2109 // avoiding something bad 2110 Flag = Chain.getValue(1); 2111 } 2112 /*if (MF.getFunction()->hasStructRetAttr()) { 2113 assert(0 && "Struct returns are not yet implemented!"); 2114 // Both MIPS and X86 have this 2115 }*/ 2116 RetOps[0] = Chain; 2117 if (Flag.getNode()) 2118 RetOps.push_back(Flag); 2119 2120 Flag = DAG.getNode(AMDILISD::RET_FLAG, 2121 dl, 2122 MVT::Other, &RetOps[0], RetOps.size()); 2123 return Flag; 2124} 2125 2126unsigned int 2127AMDILTargetLowering::getFunctionAlignment(const Function *) const 2128{ 2129 return 0; 2130} 2131 2132SDValue 2133AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const 2134{ 2135 DebugLoc DL = Op.getDebugLoc(); 2136 EVT OVT = Op.getValueType(); 2137 SDValue LHS = Op.getOperand(0); 2138 SDValue RHS = Op.getOperand(1); 2139 MVT INTTY; 2140 MVT FLTTY; 2141 if (!OVT.isVector()) { 2142 INTTY = MVT::i32; 2143 FLTTY = MVT::f32; 2144 } else if (OVT.getVectorNumElements() == 2) { 2145 INTTY = MVT::v2i32; 2146 FLTTY = MVT::v2f32; 2147 } else if (OVT.getVectorNumElements() == 4) { 2148 INTTY = MVT::v4i32; 2149 FLTTY = MVT::v4f32; 2150 } 2151 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 2152 // char|short jq = ia ^ ib; 2153 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 2154 2155 // jq = jq >> (bitsize - 2) 2156 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 2157 2158 // jq = jq | 0x1 2159 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 2160 2161 // jq = (int)jq 2162 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 2163 2164 // int ia = (int)LHS; 2165 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 2166 2167 // int ib, (int)RHS; 2168 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 2169 2170 // float fa = (float)ia; 2171 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 2172 2173 // float fb = (float)ib; 2174 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 2175 2176 // float fq = native_divide(fa, fb); 2177 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 2178 2179 // fq = trunc(fq); 2180 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 2181 2182 // float fqneg = -fq; 2183 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 2184 2185 // float fr = mad(fqneg, fb, fa); 2186 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); 2187 2188 // int iq = (int)fq; 2189 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 2190 2191 // fr = fabs(fr); 2192 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 2193 2194 // fb = fabs(fb); 2195 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 2196 2197 // int cv = fr >= fb; 2198 SDValue cv; 2199 if (INTTY == MVT::i32) { 2200 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 2201 } else { 2202 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 2203 } 2204 // jq = (cv ? jq : 0); 2205 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, 2206 DAG.getConstant(0, OVT)); 2207 // dst = iq + jq; 2208 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 2209 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 2210 return iq; 2211} 2212 2213SDValue 2214AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const 2215{ 2216 DebugLoc DL = Op.getDebugLoc(); 2217 EVT OVT = Op.getValueType(); 2218 SDValue LHS = Op.getOperand(0); 2219 SDValue RHS = Op.getOperand(1); 2220 // The LowerSDIV32 function generates equivalent to the following IL. 2221 // mov r0, LHS 2222 // mov r1, RHS 2223 // ilt r10, r0, 0 2224 // ilt r11, r1, 0 2225 // iadd r0, r0, r10 2226 // iadd r1, r1, r11 2227 // ixor r0, r0, r10 2228 // ixor r1, r1, r11 2229 // udiv r0, r0, r1 2230 // ixor r10, r10, r11 2231 // iadd r0, r0, r10 2232 // ixor DST, r0, r10 2233 2234 // mov r0, LHS 2235 SDValue r0 = LHS; 2236 2237 // mov r1, RHS 2238 SDValue r1 = RHS; 2239 2240 // ilt r10, r0, 0 2241 SDValue r10 = DAG.getSelectCC(DL, 2242 r0, DAG.getConstant(0, OVT), 2243 DAG.getConstant(-1, MVT::i32), 2244 DAG.getConstant(0, MVT::i32), 2245 ISD::SETLT); 2246 2247 // ilt r11, r1, 0 2248 SDValue r11 = DAG.getSelectCC(DL, 2249 r1, DAG.getConstant(0, OVT), 2250 DAG.getConstant(-1, MVT::i32), 2251 DAG.getConstant(0, MVT::i32), 2252 ISD::SETLT); 2253 2254 // iadd r0, r0, r10 2255 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 2256 2257 // iadd r1, r1, r11 2258 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 2259 2260 // ixor r0, r0, r10 2261 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 2262 2263 // ixor r1, r1, r11 2264 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 2265 2266 // udiv r0, r0, r1 2267 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 2268 2269 // ixor r10, r10, r11 2270 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 2271 2272 // iadd r0, r0, r10 2273 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 2274 2275 // ixor DST, r0, r10 2276 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 2277 return DST; 2278} 2279 2280SDValue 2281AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const 2282{ 2283 return SDValue(Op.getNode(), 0); 2284} 2285 2286SDValue 2287AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const 2288{ 2289 DebugLoc DL = Op.getDebugLoc(); 2290 EVT OVT = Op.getValueType(); 2291 MVT INTTY = MVT::i32; 2292 if (OVT == MVT::v2i8) { 2293 INTTY = MVT::v2i32; 2294 } else if (OVT == MVT::v4i8) { 2295 INTTY = MVT::v4i32; 2296 } 2297 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 2298 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 2299 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 2300 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 2301 return LHS; 2302} 2303 2304SDValue 2305AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const 2306{ 2307 DebugLoc DL = Op.getDebugLoc(); 2308 EVT OVT = Op.getValueType(); 2309 MVT INTTY = MVT::i32; 2310 if (OVT == MVT::v2i16) { 2311 INTTY = MVT::v2i32; 2312 } else if (OVT == MVT::v4i16) { 2313 INTTY = MVT::v4i32; 2314 } 2315 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 2316 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 2317 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 2318 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 2319 return LHS; 2320} 2321 2322SDValue 2323AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const 2324{ 2325 DebugLoc DL = Op.getDebugLoc(); 2326 EVT OVT = Op.getValueType(); 2327 SDValue LHS = Op.getOperand(0); 2328 SDValue RHS = Op.getOperand(1); 2329 // The LowerSREM32 function generates equivalent to the following IL. 2330 // mov r0, LHS 2331 // mov r1, RHS 2332 // ilt r10, r0, 0 2333 // ilt r11, r1, 0 2334 // iadd r0, r0, r10 2335 // iadd r1, r1, r11 2336 // ixor r0, r0, r10 2337 // ixor r1, r1, r11 2338 // udiv r20, r0, r1 2339 // umul r20, r20, r1 2340 // sub r0, r0, r20 2341 // iadd r0, r0, r10 2342 // ixor DST, r0, r10 2343 2344 // mov r0, LHS 2345 SDValue r0 = LHS; 2346 2347 // mov r1, RHS 2348 SDValue r1 = RHS; 2349 2350 // ilt r10, r0, 0 2351 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 2352 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 2353 r0, DAG.getConstant(0, OVT)); 2354 2355 // ilt r11, r1, 0 2356 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 2357 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 2358 r1, DAG.getConstant(0, OVT)); 2359 2360 // iadd r0, r0, r10 2361 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 2362 2363 // iadd r1, r1, r11 2364 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 2365 2366 // ixor r0, r0, r10 2367 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 2368 2369 // ixor r1, r1, r11 2370 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 2371 2372 // udiv r20, r0, r1 2373 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 2374 2375 // umul r20, r20, r1 2376 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); 2377 2378 // sub r0, r0, r20 2379 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 2380 2381 // iadd r0, r0, r10 2382 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 2383 2384 // ixor DST, r0, r10 2385 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 2386 return DST; 2387} 2388 2389SDValue 2390AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const 2391{ 2392 return SDValue(Op.getNode(), 0); 2393} 2394