AMDILISelLowering.cpp revision 76b44034b9b234d3db4012342f0fae677d4f10f6
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9// 10// This file implements the interfaces that AMDIL uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "AMDILISelLowering.h" 16#include "AMDILDevices.h" 17#include "AMDILIntrinsicInfo.h" 18#include "AMDILRegisterInfo.h" 19#include "AMDILSubtarget.h" 20#include "AMDILUtilityFunctions.h" 21#include "llvm/CallingConv.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/PseudoSourceValue.h" 25#include "llvm/CodeGen/SelectionDAG.h" 26#include "llvm/CodeGen/SelectionDAGNodes.h" 27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28#include "llvm/DerivedTypes.h" 29#include "llvm/Instructions.h" 30#include "llvm/Intrinsics.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/Target/TargetInstrInfo.h" 33#include "llvm/Target/TargetOptions.h" 34 35using namespace llvm; 36#define ISDBITCAST ISD::BITCAST 37#define MVTGLUE MVT::Glue 38//===----------------------------------------------------------------------===// 39// Calling Convention Implementation 40//===----------------------------------------------------------------------===// 41#include "AMDGPUGenCallingConv.inc" 42 43//===----------------------------------------------------------------------===// 44// TargetLowering Implementation Help Functions Begin 45//===----------------------------------------------------------------------===// 46 static SDValue 47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) 48{ 49 DebugLoc DL = Src.getDebugLoc(); 50 EVT svt = Src.getValueType().getScalarType(); 51 EVT dvt = Dst.getValueType().getScalarType(); 52 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { 53 if (dvt.bitsGT(svt)) { 54 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); 55 } else if (svt.bitsLT(svt)) { 56 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, 57 DAG.getConstant(1, MVT::i32)); 58 } 59 } else if (svt.isInteger() && dvt.isInteger()) { 60 if (!svt.bitsEq(dvt)) { 61 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 62 } 63 } else if (svt.isInteger()) { 64 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; 65 if (!svt.bitsEq(dvt)) { 66 if (dvt.getSimpleVT().SimpleTy == MVT::f32) { 67 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); 68 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { 69 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); 70 } else { 71 assert(0 && "We only support 32 and 64bit fp types"); 72 } 73 } 74 Src = DAG.getNode(opcode, DL, dvt, Src); 75 } else if (dvt.isInteger()) { 76 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; 77 if (svt.getSimpleVT().SimpleTy == MVT::f32) { 78 Src = DAG.getNode(opcode, DL, MVT::i32, Src); 79 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { 80 Src = DAG.getNode(opcode, DL, MVT::i64, Src); 81 } else { 82 assert(0 && "We only support 32 and 64bit fp types"); 83 } 84 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 85 } 86 return Src; 87} 88// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC 89// condition. 90 static AMDILCC::CondCodes 91CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) 92{ 93 switch (CC) { 94 default: 95 { 96 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; 97 assert(0 && "Unknown condition code!"); 98 } 99 case ISD::SETO: 100 switch(type) { 101 case MVT::f32: 102 return AMDILCC::IL_CC_F_O; 103 case MVT::f64: 104 return AMDILCC::IL_CC_D_O; 105 default: 106 assert(0 && "Opcode combination not generated correctly!"); 107 return AMDILCC::COND_ERROR; 108 }; 109 case ISD::SETUO: 110 switch(type) { 111 case MVT::f32: 112 return AMDILCC::IL_CC_F_UO; 113 case MVT::f64: 114 return AMDILCC::IL_CC_D_UO; 115 default: 116 assert(0 && "Opcode combination not generated correctly!"); 117 return AMDILCC::COND_ERROR; 118 }; 119 case ISD::SETGT: 120 switch (type) { 121 case MVT::i1: 122 case MVT::i8: 123 case MVT::i16: 124 case MVT::i32: 125 return AMDILCC::IL_CC_I_GT; 126 case MVT::f32: 127 return AMDILCC::IL_CC_F_GT; 128 case MVT::f64: 129 return AMDILCC::IL_CC_D_GT; 130 case MVT::i64: 131 return AMDILCC::IL_CC_L_GT; 132 default: 133 assert(0 && "Opcode combination not generated correctly!"); 134 return AMDILCC::COND_ERROR; 135 }; 136 case ISD::SETGE: 137 switch (type) { 138 case MVT::i1: 139 case MVT::i8: 140 case MVT::i16: 141 case MVT::i32: 142 return AMDILCC::IL_CC_I_GE; 143 case MVT::f32: 144 return AMDILCC::IL_CC_F_GE; 145 case MVT::f64: 146 return AMDILCC::IL_CC_D_GE; 147 case MVT::i64: 148 return AMDILCC::IL_CC_L_GE; 149 default: 150 assert(0 && "Opcode combination not generated correctly!"); 151 return AMDILCC::COND_ERROR; 152 }; 153 case ISD::SETLT: 154 switch (type) { 155 case MVT::i1: 156 case MVT::i8: 157 case MVT::i16: 158 case MVT::i32: 159 return AMDILCC::IL_CC_I_LT; 160 case MVT::f32: 161 return AMDILCC::IL_CC_F_LT; 162 case MVT::f64: 163 return AMDILCC::IL_CC_D_LT; 164 case MVT::i64: 165 return AMDILCC::IL_CC_L_LT; 166 default: 167 assert(0 && "Opcode combination not generated correctly!"); 168 return AMDILCC::COND_ERROR; 169 }; 170 case ISD::SETLE: 171 switch (type) { 172 case MVT::i1: 173 case MVT::i8: 174 case MVT::i16: 175 case MVT::i32: 176 return AMDILCC::IL_CC_I_LE; 177 case MVT::f32: 178 return AMDILCC::IL_CC_F_LE; 179 case MVT::f64: 180 return AMDILCC::IL_CC_D_LE; 181 case MVT::i64: 182 return AMDILCC::IL_CC_L_LE; 183 default: 184 assert(0 && "Opcode combination not generated correctly!"); 185 return AMDILCC::COND_ERROR; 186 }; 187 case ISD::SETNE: 188 switch (type) { 189 case MVT::i1: 190 case MVT::i8: 191 case MVT::i16: 192 case MVT::i32: 193 return AMDILCC::IL_CC_I_NE; 194 case MVT::f32: 195 return AMDILCC::IL_CC_F_NE; 196 case MVT::f64: 197 return AMDILCC::IL_CC_D_NE; 198 case MVT::i64: 199 return AMDILCC::IL_CC_L_NE; 200 default: 201 assert(0 && "Opcode combination not generated correctly!"); 202 return AMDILCC::COND_ERROR; 203 }; 204 case ISD::SETEQ: 205 switch (type) { 206 case MVT::i1: 207 case MVT::i8: 208 case MVT::i16: 209 case MVT::i32: 210 return AMDILCC::IL_CC_I_EQ; 211 case MVT::f32: 212 return AMDILCC::IL_CC_F_EQ; 213 case MVT::f64: 214 return AMDILCC::IL_CC_D_EQ; 215 case MVT::i64: 216 return AMDILCC::IL_CC_L_EQ; 217 default: 218 assert(0 && "Opcode combination not generated correctly!"); 219 return AMDILCC::COND_ERROR; 220 }; 221 case ISD::SETUGT: 222 switch (type) { 223 case MVT::i1: 224 case MVT::i8: 225 case MVT::i16: 226 case MVT::i32: 227 return AMDILCC::IL_CC_U_GT; 228 case MVT::f32: 229 return AMDILCC::IL_CC_F_UGT; 230 case MVT::f64: 231 return AMDILCC::IL_CC_D_UGT; 232 case MVT::i64: 233 return AMDILCC::IL_CC_UL_GT; 234 default: 235 assert(0 && "Opcode combination not generated correctly!"); 236 return AMDILCC::COND_ERROR; 237 }; 238 case ISD::SETUGE: 239 switch (type) { 240 case MVT::i1: 241 case MVT::i8: 242 case MVT::i16: 243 case MVT::i32: 244 return AMDILCC::IL_CC_U_GE; 245 case MVT::f32: 246 return AMDILCC::IL_CC_F_UGE; 247 case MVT::f64: 248 return AMDILCC::IL_CC_D_UGE; 249 case MVT::i64: 250 return AMDILCC::IL_CC_UL_GE; 251 default: 252 assert(0 && "Opcode combination not generated correctly!"); 253 return AMDILCC::COND_ERROR; 254 }; 255 case ISD::SETULT: 256 switch (type) { 257 case MVT::i1: 258 case MVT::i8: 259 case MVT::i16: 260 case MVT::i32: 261 return AMDILCC::IL_CC_U_LT; 262 case MVT::f32: 263 return AMDILCC::IL_CC_F_ULT; 264 case MVT::f64: 265 return AMDILCC::IL_CC_D_ULT; 266 case MVT::i64: 267 return AMDILCC::IL_CC_UL_LT; 268 default: 269 assert(0 && "Opcode combination not generated correctly!"); 270 return AMDILCC::COND_ERROR; 271 }; 272 case ISD::SETULE: 273 switch (type) { 274 case MVT::i1: 275 case MVT::i8: 276 case MVT::i16: 277 case MVT::i32: 278 return AMDILCC::IL_CC_U_LE; 279 case MVT::f32: 280 return AMDILCC::IL_CC_F_ULE; 281 case MVT::f64: 282 return AMDILCC::IL_CC_D_ULE; 283 case MVT::i64: 284 return AMDILCC::IL_CC_UL_LE; 285 default: 286 assert(0 && "Opcode combination not generated correctly!"); 287 return AMDILCC::COND_ERROR; 288 }; 289 case ISD::SETUNE: 290 switch (type) { 291 case MVT::i1: 292 case MVT::i8: 293 case MVT::i16: 294 case MVT::i32: 295 return AMDILCC::IL_CC_U_NE; 296 case MVT::f32: 297 return AMDILCC::IL_CC_F_UNE; 298 case MVT::f64: 299 return AMDILCC::IL_CC_D_UNE; 300 case MVT::i64: 301 return AMDILCC::IL_CC_UL_NE; 302 default: 303 assert(0 && "Opcode combination not generated correctly!"); 304 return AMDILCC::COND_ERROR; 305 }; 306 case ISD::SETUEQ: 307 switch (type) { 308 case MVT::i1: 309 case MVT::i8: 310 case MVT::i16: 311 case MVT::i32: 312 return AMDILCC::IL_CC_U_EQ; 313 case MVT::f32: 314 return AMDILCC::IL_CC_F_UEQ; 315 case MVT::f64: 316 return AMDILCC::IL_CC_D_UEQ; 317 case MVT::i64: 318 return AMDILCC::IL_CC_UL_EQ; 319 default: 320 assert(0 && "Opcode combination not generated correctly!"); 321 return AMDILCC::COND_ERROR; 322 }; 323 case ISD::SETOGT: 324 switch (type) { 325 case MVT::f32: 326 return AMDILCC::IL_CC_F_OGT; 327 case MVT::f64: 328 return AMDILCC::IL_CC_D_OGT; 329 case MVT::i1: 330 case MVT::i8: 331 case MVT::i16: 332 case MVT::i32: 333 case MVT::i64: 334 default: 335 assert(0 && "Opcode combination not generated correctly!"); 336 return AMDILCC::COND_ERROR; 337 }; 338 case ISD::SETOGE: 339 switch (type) { 340 case MVT::f32: 341 return AMDILCC::IL_CC_F_OGE; 342 case MVT::f64: 343 return AMDILCC::IL_CC_D_OGE; 344 case MVT::i1: 345 case MVT::i8: 346 case MVT::i16: 347 case MVT::i32: 348 case MVT::i64: 349 default: 350 assert(0 && "Opcode combination not generated correctly!"); 351 return AMDILCC::COND_ERROR; 352 }; 353 case ISD::SETOLT: 354 switch (type) { 355 case MVT::f32: 356 return AMDILCC::IL_CC_F_OLT; 357 case MVT::f64: 358 return AMDILCC::IL_CC_D_OLT; 359 case MVT::i1: 360 case MVT::i8: 361 case MVT::i16: 362 case MVT::i32: 363 case MVT::i64: 364 default: 365 assert(0 && "Opcode combination not generated correctly!"); 366 return AMDILCC::COND_ERROR; 367 }; 368 case ISD::SETOLE: 369 switch (type) { 370 case MVT::f32: 371 return AMDILCC::IL_CC_F_OLE; 372 case MVT::f64: 373 return AMDILCC::IL_CC_D_OLE; 374 case MVT::i1: 375 case MVT::i8: 376 case MVT::i16: 377 case MVT::i32: 378 case MVT::i64: 379 default: 380 assert(0 && "Opcode combination not generated correctly!"); 381 return AMDILCC::COND_ERROR; 382 }; 383 case ISD::SETONE: 384 switch (type) { 385 case MVT::f32: 386 return AMDILCC::IL_CC_F_ONE; 387 case MVT::f64: 388 return AMDILCC::IL_CC_D_ONE; 389 case MVT::i1: 390 case MVT::i8: 391 case MVT::i16: 392 case MVT::i32: 393 case MVT::i64: 394 default: 395 assert(0 && "Opcode combination not generated correctly!"); 396 return AMDILCC::COND_ERROR; 397 }; 398 case ISD::SETOEQ: 399 switch (type) { 400 case MVT::f32: 401 return AMDILCC::IL_CC_F_OEQ; 402 case MVT::f64: 403 return AMDILCC::IL_CC_D_OEQ; 404 case MVT::i1: 405 case MVT::i8: 406 case MVT::i16: 407 case MVT::i32: 408 case MVT::i64: 409 default: 410 assert(0 && "Opcode combination not generated correctly!"); 411 return AMDILCC::COND_ERROR; 412 }; 413 }; 414} 415 416SDValue 417AMDILTargetLowering::LowerMemArgument( 418 SDValue Chain, 419 CallingConv::ID CallConv, 420 const SmallVectorImpl<ISD::InputArg> &Ins, 421 DebugLoc dl, SelectionDAG &DAG, 422 const CCValAssign &VA, 423 MachineFrameInfo *MFI, 424 unsigned i) const 425{ 426 // Create the nodes corresponding to a load from this parameter slot. 427 ISD::ArgFlagsTy Flags = Ins[i].Flags; 428 429 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && 430 getTargetMachine().Options.GuaranteedTailCallOpt; 431 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 432 433 // FIXME: For now, all byval parameter objects are marked mutable. This can 434 // be changed with more analysis. 435 // In case of tail call optimization mark all arguments mutable. Since they 436 // could be overwritten by lowering of arguments in case of a tail call. 437 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 438 VA.getLocMemOffset(), isImmutable); 439 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 440 441 if (Flags.isByVal()) 442 return FIN; 443 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 444 MachinePointerInfo::getFixedStack(FI), 445 false, false, false, 0); 446} 447//===----------------------------------------------------------------------===// 448// TargetLowering Implementation Help Functions End 449//===----------------------------------------------------------------------===// 450 451//===----------------------------------------------------------------------===// 452// TargetLowering Class Implementation Begins 453//===----------------------------------------------------------------------===// 454 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) 455: TargetLowering(TM, new TargetLoweringObjectFileELF()) 456{ 457 int types[] = 458 { 459 (int)MVT::i8, 460 (int)MVT::i16, 461 (int)MVT::i32, 462 (int)MVT::f32, 463 (int)MVT::f64, 464 (int)MVT::i64, 465 (int)MVT::v2i8, 466 (int)MVT::v4i8, 467 (int)MVT::v2i16, 468 (int)MVT::v4i16, 469 (int)MVT::v4f32, 470 (int)MVT::v4i32, 471 (int)MVT::v2f32, 472 (int)MVT::v2i32, 473 (int)MVT::v2f64, 474 (int)MVT::v2i64 475 }; 476 477 int IntTypes[] = 478 { 479 (int)MVT::i8, 480 (int)MVT::i16, 481 (int)MVT::i32, 482 (int)MVT::i64 483 }; 484 485 int FloatTypes[] = 486 { 487 (int)MVT::f32, 488 (int)MVT::f64 489 }; 490 491 int VectorTypes[] = 492 { 493 (int)MVT::v2i8, 494 (int)MVT::v4i8, 495 (int)MVT::v2i16, 496 (int)MVT::v4i16, 497 (int)MVT::v4f32, 498 (int)MVT::v4i32, 499 (int)MVT::v2f32, 500 (int)MVT::v2i32, 501 (int)MVT::v2f64, 502 (int)MVT::v2i64 503 }; 504 size_t numTypes = sizeof(types) / sizeof(*types); 505 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); 506 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); 507 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); 508 509 const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>(); 510 // These are the current register classes that are 511 // supported 512 513 for (unsigned int x = 0; x < numTypes; ++x) { 514 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 515 516 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 517 // We cannot sextinreg, expand to shifts 518 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 519 setOperationAction(ISD::SUBE, VT, Expand); 520 setOperationAction(ISD::SUBC, VT, Expand); 521 setOperationAction(ISD::ADDE, VT, Expand); 522 setOperationAction(ISD::ADDC, VT, Expand); 523 setOperationAction(ISD::SETCC, VT, Custom); 524 setOperationAction(ISD::BRCOND, VT, Custom); 525 setOperationAction(ISD::BR_CC, VT, Custom); 526 setOperationAction(ISD::BR_JT, VT, Expand); 527 setOperationAction(ISD::BRIND, VT, Expand); 528 // TODO: Implement custom UREM/SREM routines 529 setOperationAction(ISD::SREM, VT, Expand); 530 setOperationAction(ISD::GlobalAddress, VT, Custom); 531 setOperationAction(ISD::JumpTable, VT, Custom); 532 setOperationAction(ISD::ConstantPool, VT, Custom); 533 setOperationAction(ISD::SELECT, VT, Custom); 534 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 535 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 536 if (VT != MVT::i64 && VT != MVT::v2i64) { 537 setOperationAction(ISD::SDIV, VT, Custom); 538 } 539 } 540 for (unsigned int x = 0; x < numFloatTypes; ++x) { 541 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 542 543 // IL does not have these operations for floating point types 544 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 545 setOperationAction(ISD::SETOLT, VT, Expand); 546 setOperationAction(ISD::SETOGE, VT, Expand); 547 setOperationAction(ISD::SETOGT, VT, Expand); 548 setOperationAction(ISD::SETOLE, VT, Expand); 549 setOperationAction(ISD::SETULT, VT, Expand); 550 setOperationAction(ISD::SETUGE, VT, Expand); 551 setOperationAction(ISD::SETUGT, VT, Expand); 552 setOperationAction(ISD::SETULE, VT, Expand); 553 } 554 555 for (unsigned int x = 0; x < numIntTypes; ++x) { 556 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 557 558 // GPU also does not have divrem function for signed or unsigned 559 setOperationAction(ISD::SDIVREM, VT, Expand); 560 561 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 562 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 563 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 564 565 // GPU doesn't have a rotl, rotr, or byteswap instruction 566 setOperationAction(ISD::ROTR, VT, Expand); 567 setOperationAction(ISD::BSWAP, VT, Expand); 568 569 // GPU doesn't have any counting operators 570 setOperationAction(ISD::CTPOP, VT, Expand); 571 setOperationAction(ISD::CTTZ, VT, Expand); 572 setOperationAction(ISD::CTLZ, VT, Expand); 573 } 574 575 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) 576 { 577 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 578 579 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 580 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 581 setOperationAction(ISD::SDIVREM, VT, Expand); 582 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 583 // setOperationAction(ISD::VSETCC, VT, Expand); 584 setOperationAction(ISD::SETCC, VT, Expand); 585 setOperationAction(ISD::SELECT_CC, VT, Expand); 586 setOperationAction(ISD::SELECT, VT, Expand); 587 588 } 589 if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) { 590 setOperationAction(ISD::MULHU, MVT::i64, Expand); 591 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 592 setOperationAction(ISD::MULHS, MVT::i64, Expand); 593 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 594 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 595 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 596 setOperationAction(ISD::Constant , MVT::i64 , Legal); 597 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 598 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 599 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 600 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 601 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 602 } 603 if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 604 // we support loading/storing v2f64 but not operations on the type 605 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 606 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 607 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 608 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 609 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 610 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 611 // We want to expand vector conversions into their scalar 612 // counterparts. 613 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 614 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 615 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 616 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 617 setOperationAction(ISD::FABS, MVT::f64, Expand); 618 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 619 } 620 // TODO: Fix the UDIV24 algorithm so it works for these 621 // types correctly. This needs vector comparisons 622 // for this to work correctly. 623 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 624 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 625 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 626 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 627 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 628 setOperationAction(ISD::SUBC, MVT::Other, Expand); 629 setOperationAction(ISD::ADDE, MVT::Other, Expand); 630 setOperationAction(ISD::ADDC, MVT::Other, Expand); 631 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 632 setOperationAction(ISD::BR_CC, MVT::Other, Custom); 633 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 634 setOperationAction(ISD::BRIND, MVT::Other, Expand); 635 setOperationAction(ISD::SETCC, MVT::Other, Custom); 636 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 637 638 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); 639 // Use the default implementation. 640 setOperationAction(ISD::VAARG , MVT::Other, Expand); 641 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 642 setOperationAction(ISD::VAEND , MVT::Other, Expand); 643 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 644 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 645 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 646 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 647 setOperationAction(ISD::Constant , MVT::i32 , Legal); 648 setOperationAction(ISD::TRAP , MVT::Other , Legal); 649 650 setStackPointerRegisterToSaveRestore(AMDGPU::SP); 651 setSchedulingPreference(Sched::RegPressure); 652 setPow2DivIsCheap(false); 653 setPrefLoopAlignment(16); 654 setSelectIsExpensive(true); 655 setJumpIsExpensive(true); 656 657 maxStoresPerMemcpy = 4096; 658 maxStoresPerMemmove = 4096; 659 maxStoresPerMemset = 4096; 660 661#undef numTypes 662#undef numIntTypes 663#undef numVectorTypes 664#undef numFloatTypes 665} 666 667const char * 668AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const 669{ 670 switch (Opcode) { 671 default: return 0; 672 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; 673 case AMDILISD::MAD: return "AMDILISD::MAD"; 674 case AMDILISD::CALL: return "AMDILISD::CALL"; 675 case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC"; 676 case AMDILISD::UMUL: return "AMDILISD::UMUL"; 677 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; 678 case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; 679 case AMDILISD::CMP: return "AMDILISD::CMP"; 680 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; 681 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; 682 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; 683 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; 684 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; 685 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; 686 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; 687 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; 688 689 }; 690} 691bool 692AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 693 const CallInst &I, unsigned Intrinsic) const 694{ 695 return false; 696} 697// The backend supports 32 and 64 bit floating point immediates 698bool 699AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const 700{ 701 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 702 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 703 return true; 704 } else { 705 return false; 706 } 707} 708 709bool 710AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const 711{ 712 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 713 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 714 return false; 715 } else { 716 return true; 717 } 718} 719 720 721// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 722// be zero. Op is expected to be a target specific node. Used by DAG 723// combiner. 724 725void 726AMDILTargetLowering::computeMaskedBitsForTargetNode( 727 const SDValue Op, 728 APInt &KnownZero, 729 APInt &KnownOne, 730 const SelectionDAG &DAG, 731 unsigned Depth) const 732{ 733 APInt KnownZero2; 734 APInt KnownOne2; 735 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 736 switch (Op.getOpcode()) { 737 default: break; 738 case AMDILISD::SELECT_CC: 739 DAG.ComputeMaskedBits( 740 Op.getOperand(1), 741 KnownZero, 742 KnownOne, 743 Depth + 1 744 ); 745 DAG.ComputeMaskedBits( 746 Op.getOperand(0), 747 KnownZero2, 748 KnownOne2 749 ); 750 assert((KnownZero & KnownOne) == 0 751 && "Bits known to be one AND zero?"); 752 assert((KnownZero2 & KnownOne2) == 0 753 && "Bits known to be one AND zero?"); 754 // Only known if known in both the LHS and RHS 755 KnownOne &= KnownOne2; 756 KnownZero &= KnownZero2; 757 break; 758 }; 759} 760 761// This is the function that determines which calling convention should 762// be used. Currently there is only one calling convention 763CCAssignFn* 764AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const 765{ 766 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 767 return CC_AMDIL32; 768} 769 770// LowerCallResult - Lower the result values of an ISD::CALL into the 771// appropriate copies out of appropriate physical registers. This assumes that 772// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 773// being lowered. The returns a SDNode with the same number of values as the 774// ISD::CALL. 775SDValue 776AMDILTargetLowering::LowerCallResult( 777 SDValue Chain, 778 SDValue InFlag, 779 CallingConv::ID CallConv, 780 bool isVarArg, 781 const SmallVectorImpl<ISD::InputArg> &Ins, 782 DebugLoc dl, 783 SelectionDAG &DAG, 784 SmallVectorImpl<SDValue> &InVals) const 785{ 786 // Assign locations to each value returned by this call 787 SmallVector<CCValAssign, 16> RVLocs; 788 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 789 getTargetMachine(), RVLocs, *DAG.getContext()); 790 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); 791 792 // Copy all of the result registers out of their specified physreg. 793 for (unsigned i = 0; i != RVLocs.size(); ++i) { 794 EVT CopyVT = RVLocs[i].getValVT(); 795 if (RVLocs[i].isRegLoc()) { 796 Chain = DAG.getCopyFromReg( 797 Chain, 798 dl, 799 RVLocs[i].getLocReg(), 800 CopyVT, 801 InFlag 802 ).getValue(1); 803 SDValue Val = Chain.getValue(0); 804 InFlag = Chain.getValue(2); 805 InVals.push_back(Val); 806 } 807 } 808 809 return Chain; 810 811} 812 813//===----------------------------------------------------------------------===// 814// Other Lowering Hooks 815//===----------------------------------------------------------------------===// 816 817// Recursively assign SDNodeOrdering to any unordered nodes 818// This is necessary to maintain source ordering of instructions 819// under -O0 to avoid odd-looking "skipping around" issues. 820 static const SDValue 821Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) 822{ 823 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { 824 DAG.AssignOrdering( New.getNode(), order ); 825 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) 826 Ordered( DAG, order, New.getOperand(i) ); 827 } 828 return New; 829} 830 831#define LOWER(A) \ 832 case ISD:: A: \ 833return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) 834 835SDValue 836AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 837{ 838 switch (Op.getOpcode()) { 839 default: 840 Op.getNode()->dump(); 841 assert(0 && "Custom lowering code for this" 842 "instruction is not implemented yet!"); 843 break; 844 LOWER(GlobalAddress); 845 LOWER(JumpTable); 846 LOWER(ConstantPool); 847 LOWER(ExternalSymbol); 848 LOWER(SDIV); 849 LOWER(SREM); 850 LOWER(BUILD_VECTOR); 851 LOWER(SELECT); 852 LOWER(SETCC); 853 LOWER(SIGN_EXTEND_INREG); 854 LOWER(DYNAMIC_STACKALLOC); 855 LOWER(BRCOND); 856 LOWER(BR_CC); 857 } 858 return Op; 859} 860 861#undef LOWER 862 863SDValue 864AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const 865{ 866 SDValue DST = Op; 867 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); 868 const GlobalValue *G = GADN->getGlobal(); 869 DebugLoc DL = Op.getDebugLoc(); 870 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); 871 if (!GV) { 872 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 873 } else { 874 if (GV->hasInitializer()) { 875 const Constant *C = dyn_cast<Constant>(GV->getInitializer()); 876 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 877 DST = DAG.getConstant(CI->getValue(), Op.getValueType()); 878 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { 879 DST = DAG.getConstantFP(CF->getValueAPF(), 880 Op.getValueType()); 881 } else if (dyn_cast<ConstantAggregateZero>(C)) { 882 EVT VT = Op.getValueType(); 883 if (VT.isInteger()) { 884 DST = DAG.getConstant(0, VT); 885 } else { 886 DST = DAG.getConstantFP(0, VT); 887 } 888 } else { 889 assert(!"lowering this type of Global Address " 890 "not implemented yet!"); 891 C->dump(); 892 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 893 } 894 } else { 895 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 896 } 897 } 898 return DST; 899} 900 901SDValue 902AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const 903{ 904 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 905 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); 906 return Result; 907} 908SDValue 909AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const 910{ 911 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 912 EVT PtrVT = Op.getValueType(); 913 SDValue Result; 914 if (CP->isMachineConstantPoolEntry()) { 915 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 916 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 917 } else { 918 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 919 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 920 } 921 return Result; 922} 923 924SDValue 925AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const 926{ 927 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 928 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); 929 return Result; 930} 931 932/// LowerFORMAL_ARGUMENTS - transform physical registers into 933/// virtual registers and generate load operations for 934/// arguments places on the stack. 935/// TODO: isVarArg, hasStructRet, isMemReg 936 SDValue 937AMDILTargetLowering::LowerFormalArguments(SDValue Chain, 938 CallingConv::ID CallConv, 939 bool isVarArg, 940 const SmallVectorImpl<ISD::InputArg> &Ins, 941 DebugLoc dl, 942 SelectionDAG &DAG, 943 SmallVectorImpl<SDValue> &InVals) 944const 945{ 946 947 MachineFunction &MF = DAG.getMachineFunction(); 948 MachineFrameInfo *MFI = MF.getFrameInfo(); 949 //const Function *Fn = MF.getFunction(); 950 //MachineRegisterInfo &RegInfo = MF.getRegInfo(); 951 952 SmallVector<CCValAssign, 16> ArgLocs; 953 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 954 //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); 955 956 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), 957 getTargetMachine(), ArgLocs, *DAG.getContext()); 958 959 // When more calling conventions are added, they need to be chosen here 960 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); 961 SDValue StackPtr; 962 963 //unsigned int FirstStackArgLoc = 0; 964 965 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 966 CCValAssign &VA = ArgLocs[i]; 967 if (VA.isRegLoc()) { 968 EVT RegVT = VA.getLocVT(); 969 const TargetRegisterClass *RC = getRegClassFor( 970 RegVT.getSimpleVT().SimpleTy); 971 972 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); 973 SDValue ArgValue = DAG.getCopyFromReg( 974 Chain, 975 dl, 976 Reg, 977 RegVT); 978 // If this is an 8 or 16-bit value, it is really passed 979 // promoted to 32 bits. Insert an assert[sz]ext to capture 980 // this, then truncate to the right size. 981 982 if (VA.getLocInfo() == CCValAssign::SExt) { 983 ArgValue = DAG.getNode( 984 ISD::AssertSext, 985 dl, 986 RegVT, 987 ArgValue, 988 DAG.getValueType(VA.getValVT())); 989 } else if (VA.getLocInfo() == CCValAssign::ZExt) { 990 ArgValue = DAG.getNode( 991 ISD::AssertZext, 992 dl, 993 RegVT, 994 ArgValue, 995 DAG.getValueType(VA.getValVT())); 996 } 997 if (VA.getLocInfo() != CCValAssign::Full) { 998 ArgValue = DAG.getNode( 999 ISD::TRUNCATE, 1000 dl, 1001 VA.getValVT(), 1002 ArgValue); 1003 } 1004 // Add the value to the list of arguments 1005 // to be passed in registers 1006 InVals.push_back(ArgValue); 1007 if (isVarArg) { 1008 assert(0 && "Variable arguments are not yet supported"); 1009 // See MipsISelLowering.cpp for ideas on how to implement 1010 } 1011 } else if(VA.isMemLoc()) { 1012 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, 1013 dl, DAG, VA, MFI, i)); 1014 } else { 1015 assert(0 && "found a Value Assign that is " 1016 "neither a register or a memory location"); 1017 } 1018 } 1019 /*if (hasStructRet) { 1020 assert(0 && "Has struct return is not yet implemented"); 1021 // See MipsISelLowering.cpp for ideas on how to implement 1022 }*/ 1023 1024 if (isVarArg) { 1025 assert(0 && "Variable arguments are not yet supported"); 1026 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement 1027 } 1028 // This needs to be changed to non-zero if the return function needs 1029 // to pop bytes 1030 return Chain; 1031} 1032/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1033/// by "Src" to address "Dst" with size and alignment information specified by 1034/// the specific parameter attribute. The copy will be passed as a byval 1035/// function parameter. 1036static SDValue 1037CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1038 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 1039 assert(0 && "MemCopy does not exist yet"); 1040 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1041 1042 return DAG.getMemcpy(Chain, 1043 Src.getDebugLoc(), 1044 Dst, Src, SizeNode, Flags.getByValAlign(), 1045 /*IsVol=*/false, /*AlwaysInline=*/true, 1046 MachinePointerInfo(), MachinePointerInfo()); 1047} 1048 1049SDValue 1050AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, 1051 SDValue StackPtr, SDValue Arg, 1052 DebugLoc dl, SelectionDAG &DAG, 1053 const CCValAssign &VA, 1054 ISD::ArgFlagsTy Flags) const 1055{ 1056 unsigned int LocMemOffset = VA.getLocMemOffset(); 1057 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1058 PtrOff = DAG.getNode(ISD::ADD, 1059 dl, 1060 getPointerTy(), StackPtr, PtrOff); 1061 if (Flags.isByVal()) { 1062 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 1063 } else { 1064 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, 1065 MachinePointerInfo::getStack(LocMemOffset), 1066 false, false, 0); 1067 } 1068 return PtrOff; 1069} 1070/// LowerCAL - functions arguments are copied from virtual 1071/// regs to (physical regs)/(stack frame), CALLSEQ_START and 1072/// CALLSEQ_END are emitted. 1073/// TODO: isVarArg, isTailCall, hasStructRet 1074SDValue 1075AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1076 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, 1077 bool& isTailCall, 1078 const SmallVectorImpl<ISD::OutputArg> &Outs, 1079 const SmallVectorImpl<SDValue> &OutVals, 1080 const SmallVectorImpl<ISD::InputArg> &Ins, 1081 DebugLoc dl, SelectionDAG &DAG, 1082 SmallVectorImpl<SDValue> &InVals) 1083const 1084{ 1085 isTailCall = false; 1086 MachineFunction& MF = DAG.getMachineFunction(); 1087 // FIXME: DO we need to handle fast calling conventions and tail call 1088 // optimizations?? X86/PPC ISelLowering 1089 /*bool hasStructRet = (TheCall->getNumArgs()) 1090 ? TheCall->getArgFlags(0).device()->isSRet() 1091 : false;*/ 1092 1093 MachineFrameInfo *MFI = MF.getFrameInfo(); 1094 1095 // Analyze operands of the call, assigning locations to each operand 1096 SmallVector<CCValAssign, 16> ArgLocs; 1097 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1098 getTargetMachine(), ArgLocs, *DAG.getContext()); 1099 // Analyize the calling operands, but need to change 1100 // if we have more than one calling convetion 1101 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); 1102 1103 unsigned int NumBytes = CCInfo.getNextStackOffset(); 1104 if (isTailCall) { 1105 assert(isTailCall && "Tail Call not handled yet!"); 1106 // See X86/PPC ISelLowering 1107 } 1108 1109 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1110 1111 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass; 1112 SmallVector<SDValue, 8> MemOpChains; 1113 SDValue StackPtr; 1114 //unsigned int FirstStacArgLoc = 0; 1115 //int LastArgStackLoc = 0; 1116 1117 // Walk the register/memloc assignments, insert copies/loads 1118 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 1119 CCValAssign &VA = ArgLocs[i]; 1120 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers 1121 // Arguments start after the 5 first operands of ISD::CALL 1122 SDValue Arg = OutVals[i]; 1123 //Promote the value if needed 1124 switch(VA.getLocInfo()) { 1125 default: assert(0 && "Unknown loc info!"); 1126 case CCValAssign::Full: 1127 break; 1128 case CCValAssign::SExt: 1129 Arg = DAG.getNode(ISD::SIGN_EXTEND, 1130 dl, 1131 VA.getLocVT(), Arg); 1132 break; 1133 case CCValAssign::ZExt: 1134 Arg = DAG.getNode(ISD::ZERO_EXTEND, 1135 dl, 1136 VA.getLocVT(), Arg); 1137 break; 1138 case CCValAssign::AExt: 1139 Arg = DAG.getNode(ISD::ANY_EXTEND, 1140 dl, 1141 VA.getLocVT(), Arg); 1142 break; 1143 } 1144 1145 if (VA.isRegLoc()) { 1146 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1147 } else if (VA.isMemLoc()) { 1148 // Create the frame index object for this incoming parameter 1149 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 1150 VA.getLocMemOffset(), true); 1151 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); 1152 1153 // emit ISD::STORE whichs stores the 1154 // parameter value to a stack Location 1155 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 1156 MachinePointerInfo::getFixedStack(FI), 1157 false, false, 0)); 1158 } else { 1159 assert(0 && "Not a Reg/Mem Loc, major error!"); 1160 } 1161 } 1162 if (!MemOpChains.empty()) { 1163 Chain = DAG.getNode(ISD::TokenFactor, 1164 dl, 1165 MVT::Other, 1166 &MemOpChains[0], 1167 MemOpChains.size()); 1168 } 1169 SDValue InFlag; 1170 if (!isTailCall) { 1171 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 1172 Chain = DAG.getCopyToReg(Chain, 1173 dl, 1174 RegsToPass[i].first, 1175 RegsToPass[i].second, 1176 InFlag); 1177 InFlag = Chain.getValue(1); 1178 } 1179 } 1180 1181 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 1182 // every direct call is) turn it into a TargetGlobalAddress/ 1183 // TargetExternalSymbol 1184 // node so that legalize doesn't hack it. 1185 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1186 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); 1187 } 1188 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1189 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 1190 } 1191 else if (isTailCall) { 1192 assert(0 && "Tail calls are not handled yet"); 1193 // see X86 ISelLowering for ideas on implementation: 1708 1194 } 1195 1196 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE); 1197 SmallVector<SDValue, 8> Ops; 1198 1199 if (isTailCall) { 1200 assert(0 && "Tail calls are not handled yet"); 1201 // see X86 ISelLowering for ideas on implementation: 1721 1202 } 1203 // If this is a direct call, pass the chain and the callee 1204 if (Callee.getNode()) { 1205 Ops.push_back(Chain); 1206 Ops.push_back(Callee); 1207 } 1208 1209 if (isTailCall) { 1210 assert(0 && "Tail calls are not handled yet"); 1211 // see X86 ISelLowering for ideas on implementation: 1739 1212 } 1213 1214 // Add argument registers to the end of the list so that they are known 1215 // live into the call 1216 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 1217 Ops.push_back(DAG.getRegister( 1218 RegsToPass[i].first, 1219 RegsToPass[i].second.getValueType())); 1220 } 1221 if (InFlag.getNode()) { 1222 Ops.push_back(InFlag); 1223 } 1224 1225 // Emit Tail Call 1226 if (isTailCall) { 1227 assert(0 && "Tail calls are not handled yet"); 1228 // see X86 ISelLowering for ideas on implementation: 1762 1229 } 1230 1231 Chain = DAG.getNode(AMDILISD::CALL, 1232 dl, 1233 NodeTys, &Ops[0], Ops.size()); 1234 InFlag = Chain.getValue(1); 1235 1236 // Create the CALLSEQ_END node 1237 Chain = DAG.getCALLSEQ_END( 1238 Chain, 1239 DAG.getIntPtrConstant(NumBytes, true), 1240 DAG.getIntPtrConstant(0, true), 1241 InFlag); 1242 InFlag = Chain.getValue(1); 1243 // Handle result values, copying them out of physregs into vregs that 1244 // we return 1245 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 1246 InVals); 1247} 1248 1249SDValue 1250AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const 1251{ 1252 EVT OVT = Op.getValueType(); 1253 SDValue DST; 1254 if (OVT.getScalarType() == MVT::i64) { 1255 DST = LowerSDIV64(Op, DAG); 1256 } else if (OVT.getScalarType() == MVT::i32) { 1257 DST = LowerSDIV32(Op, DAG); 1258 } else if (OVT.getScalarType() == MVT::i16 1259 || OVT.getScalarType() == MVT::i8) { 1260 DST = LowerSDIV24(Op, DAG); 1261 } else { 1262 DST = SDValue(Op.getNode(), 0); 1263 } 1264 return DST; 1265} 1266 1267SDValue 1268AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const 1269{ 1270 EVT OVT = Op.getValueType(); 1271 SDValue DST; 1272 if (OVT.getScalarType() == MVT::i64) { 1273 DST = LowerSREM64(Op, DAG); 1274 } else if (OVT.getScalarType() == MVT::i32) { 1275 DST = LowerSREM32(Op, DAG); 1276 } else if (OVT.getScalarType() == MVT::i16) { 1277 DST = LowerSREM16(Op, DAG); 1278 } else if (OVT.getScalarType() == MVT::i8) { 1279 DST = LowerSREM8(Op, DAG); 1280 } else { 1281 DST = SDValue(Op.getNode(), 0); 1282 } 1283 return DST; 1284} 1285 1286SDValue 1287AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const 1288{ 1289 EVT VT = Op.getValueType(); 1290 SDValue Nodes1; 1291 SDValue second; 1292 SDValue third; 1293 SDValue fourth; 1294 DebugLoc DL = Op.getDebugLoc(); 1295 Nodes1 = DAG.getNode(AMDILISD::VBUILD, 1296 DL, 1297 VT, Op.getOperand(0)); 1298#if 0 1299 bool allEqual = true; 1300 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { 1301 if (Op.getOperand(0) != Op.getOperand(x)) { 1302 allEqual = false; 1303 break; 1304 } 1305 } 1306 if (allEqual) { 1307 return Nodes1; 1308 } 1309#endif 1310 switch(Op.getNumOperands()) { 1311 default: 1312 case 1: 1313 break; 1314 case 4: 1315 fourth = Op.getOperand(3); 1316 if (fourth.getOpcode() != ISD::UNDEF) { 1317 Nodes1 = DAG.getNode( 1318 ISD::INSERT_VECTOR_ELT, 1319 DL, 1320 Op.getValueType(), 1321 Nodes1, 1322 fourth, 1323 DAG.getConstant(7, MVT::i32)); 1324 } 1325 case 3: 1326 third = Op.getOperand(2); 1327 if (third.getOpcode() != ISD::UNDEF) { 1328 Nodes1 = DAG.getNode( 1329 ISD::INSERT_VECTOR_ELT, 1330 DL, 1331 Op.getValueType(), 1332 Nodes1, 1333 third, 1334 DAG.getConstant(6, MVT::i32)); 1335 } 1336 case 2: 1337 second = Op.getOperand(1); 1338 if (second.getOpcode() != ISD::UNDEF) { 1339 Nodes1 = DAG.getNode( 1340 ISD::INSERT_VECTOR_ELT, 1341 DL, 1342 Op.getValueType(), 1343 Nodes1, 1344 second, 1345 DAG.getConstant(5, MVT::i32)); 1346 } 1347 break; 1348 }; 1349 return Nodes1; 1350} 1351 1352SDValue 1353AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const 1354{ 1355 SDValue Cond = Op.getOperand(0); 1356 SDValue LHS = Op.getOperand(1); 1357 SDValue RHS = Op.getOperand(2); 1358 DebugLoc DL = Op.getDebugLoc(); 1359 Cond = getConversionNode(DAG, Cond, Op, true); 1360 Cond = DAG.getNode(AMDILISD::CMOVLOG, 1361 DL, 1362 Op.getValueType(), Cond, LHS, RHS); 1363 return Cond; 1364} 1365SDValue 1366AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 1367{ 1368 SDValue Cond; 1369 SDValue LHS = Op.getOperand(0); 1370 SDValue RHS = Op.getOperand(1); 1371 SDValue CC = Op.getOperand(2); 1372 DebugLoc DL = Op.getDebugLoc(); 1373 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 1374 unsigned int AMDILCC = CondCCodeToCC( 1375 SetCCOpcode, 1376 LHS.getValueType().getSimpleVT().SimpleTy); 1377 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 1378 Cond = DAG.getNode( 1379 ISD::SELECT_CC, 1380 Op.getDebugLoc(), 1381 LHS.getValueType(), 1382 LHS, RHS, 1383 DAG.getConstant(-1, MVT::i32), 1384 DAG.getConstant(0, MVT::i32), 1385 CC); 1386 Cond = getConversionNode(DAG, Cond, Op, true); 1387 Cond = DAG.getNode( 1388 ISD::AND, 1389 DL, 1390 Cond.getValueType(), 1391 DAG.getConstant(1, Cond.getValueType()), 1392 Cond); 1393 return Cond; 1394} 1395 1396SDValue 1397AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const 1398{ 1399 SDValue Data = Op.getOperand(0); 1400 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 1401 DebugLoc DL = Op.getDebugLoc(); 1402 EVT DVT = Data.getValueType(); 1403 EVT BVT = BaseType->getVT(); 1404 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 1405 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 1406 unsigned shiftBits = srcBits - baseBits; 1407 if (srcBits < 32) { 1408 // If the op is less than 32 bits, then it needs to extend to 32bits 1409 // so it can properly keep the upper bits valid. 1410 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 1411 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 1412 shiftBits = 32 - baseBits; 1413 DVT = IVT; 1414 } 1415 SDValue Shift = DAG.getConstant(shiftBits, DVT); 1416 // Shift left by 'Shift' bits. 1417 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 1418 // Signed shift Right by 'Shift' bits. 1419 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 1420 if (srcBits < 32) { 1421 // Once the sign extension is done, the op needs to be converted to 1422 // its original type. 1423 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 1424 } 1425 return Data; 1426} 1427EVT 1428AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const 1429{ 1430 int iSize = (size * numEle); 1431 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 1432 if (!vEle) { 1433 vEle = 1; 1434 } 1435 if (size == 64) { 1436 if (vEle == 1) { 1437 return EVT(MVT::i64); 1438 } else { 1439 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 1440 } 1441 } else { 1442 if (vEle == 1) { 1443 return EVT(MVT::i32); 1444 } else { 1445 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 1446 } 1447 } 1448} 1449 1450SDValue 1451AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 1452 SelectionDAG &DAG) const 1453{ 1454 SDValue Chain = Op.getOperand(0); 1455 SDValue Size = Op.getOperand(1); 1456 unsigned int SPReg = AMDGPU::SP; 1457 DebugLoc DL = Op.getDebugLoc(); 1458 SDValue SP = DAG.getCopyFromReg(Chain, 1459 DL, 1460 SPReg, MVT::i32); 1461 SDValue NewSP = DAG.getNode(ISD::ADD, 1462 DL, 1463 MVT::i32, SP, Size); 1464 Chain = DAG.getCopyToReg(SP.getValue(1), 1465 DL, 1466 SPReg, NewSP); 1467 SDValue Ops[2] = {NewSP, Chain}; 1468 Chain = DAG.getMergeValues(Ops, 2 ,DL); 1469 return Chain; 1470} 1471SDValue 1472AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const 1473{ 1474 SDValue Chain = Op.getOperand(0); 1475 SDValue Cond = Op.getOperand(1); 1476 SDValue Jump = Op.getOperand(2); 1477 SDValue Result; 1478 Result = DAG.getNode( 1479 AMDILISD::BRANCH_COND, 1480 Op.getDebugLoc(), 1481 Op.getValueType(), 1482 Chain, Jump, Cond); 1483 return Result; 1484} 1485 1486SDValue 1487AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 1488{ 1489 SDValue Chain = Op.getOperand(0); 1490 SDValue CC = Op.getOperand(1); 1491 SDValue LHS = Op.getOperand(2); 1492 SDValue RHS = Op.getOperand(3); 1493 SDValue JumpT = Op.getOperand(4); 1494 SDValue CmpValue; 1495 SDValue Result; 1496 CmpValue = DAG.getNode( 1497 ISD::SELECT_CC, 1498 Op.getDebugLoc(), 1499 LHS.getValueType(), 1500 LHS, RHS, 1501 DAG.getConstant(-1, MVT::i32), 1502 DAG.getConstant(0, MVT::i32), 1503 CC); 1504 Result = DAG.getNode( 1505 AMDILISD::BRANCH_COND, 1506 CmpValue.getDebugLoc(), 1507 MVT::Other, Chain, 1508 JumpT, CmpValue); 1509 return Result; 1510} 1511 1512// LowerRET - Lower an ISD::RET node. 1513SDValue 1514AMDILTargetLowering::LowerReturn(SDValue Chain, 1515 CallingConv::ID CallConv, bool isVarArg, 1516 const SmallVectorImpl<ISD::OutputArg> &Outs, 1517 const SmallVectorImpl<SDValue> &OutVals, 1518 DebugLoc dl, SelectionDAG &DAG) 1519const 1520{ 1521 //MachineFunction& MF = DAG.getMachineFunction(); 1522 // CCValAssign - represent the assignment of the return value 1523 // to a location 1524 SmallVector<CCValAssign, 16> RVLocs; 1525 1526 // CCState - Info about the registers and stack slot 1527 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1528 getTargetMachine(), RVLocs, *DAG.getContext()); 1529 1530 // Analyze return values of ISD::RET 1531 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); 1532 // If this is the first return lowered for this function, add 1533 // the regs to the liveout set for the function 1534 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 1535 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 1536 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { 1537 MRI.addLiveOut(RVLocs[i].getLocReg()); 1538 } 1539 } 1540 // FIXME: implement this when tail call is implemented 1541 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); 1542 // both x86 and ppc implement this in ISelLowering 1543 1544 // Regular return here 1545 SDValue Flag; 1546 SmallVector<SDValue, 6> RetOps; 1547 RetOps.push_back(Chain); 1548 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); 1549 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 1550 CCValAssign &VA = RVLocs[i]; 1551 SDValue ValToCopy = OutVals[i]; 1552 assert(VA.isRegLoc() && "Can only return in registers!"); 1553 // ISD::Ret => ret chain, (regnum1, val1), ... 1554 // So i * 2 + 1 index only the regnums 1555 Chain = DAG.getCopyToReg(Chain, 1556 dl, 1557 VA.getLocReg(), 1558 ValToCopy, 1559 Flag); 1560 // guarantee that all emitted copies are stuck together 1561 // avoiding something bad 1562 Flag = Chain.getValue(1); 1563 } 1564 /*if (MF.getFunction()->hasStructRetAttr()) { 1565 assert(0 && "Struct returns are not yet implemented!"); 1566 // Both MIPS and X86 have this 1567 }*/ 1568 RetOps[0] = Chain; 1569 if (Flag.getNode()) 1570 RetOps.push_back(Flag); 1571 1572 Flag = DAG.getNode(AMDILISD::RET_FLAG, 1573 dl, 1574 MVT::Other, &RetOps[0], RetOps.size()); 1575 return Flag; 1576} 1577 1578unsigned int 1579AMDILTargetLowering::getFunctionAlignment(const Function *) const 1580{ 1581 return 0; 1582} 1583 1584SDValue 1585AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const 1586{ 1587 DebugLoc DL = Op.getDebugLoc(); 1588 EVT OVT = Op.getValueType(); 1589 SDValue LHS = Op.getOperand(0); 1590 SDValue RHS = Op.getOperand(1); 1591 MVT INTTY; 1592 MVT FLTTY; 1593 if (!OVT.isVector()) { 1594 INTTY = MVT::i32; 1595 FLTTY = MVT::f32; 1596 } else if (OVT.getVectorNumElements() == 2) { 1597 INTTY = MVT::v2i32; 1598 FLTTY = MVT::v2f32; 1599 } else if (OVT.getVectorNumElements() == 4) { 1600 INTTY = MVT::v4i32; 1601 FLTTY = MVT::v4f32; 1602 } 1603 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 1604 // char|short jq = ia ^ ib; 1605 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 1606 1607 // jq = jq >> (bitsize - 2) 1608 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 1609 1610 // jq = jq | 0x1 1611 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 1612 1613 // jq = (int)jq 1614 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 1615 1616 // int ia = (int)LHS; 1617 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 1618 1619 // int ib, (int)RHS; 1620 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 1621 1622 // float fa = (float)ia; 1623 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 1624 1625 // float fb = (float)ib; 1626 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 1627 1628 // float fq = native_divide(fa, fb); 1629 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 1630 1631 // fq = trunc(fq); 1632 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 1633 1634 // float fqneg = -fq; 1635 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 1636 1637 // float fr = mad(fqneg, fb, fa); 1638 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); 1639 1640 // int iq = (int)fq; 1641 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 1642 1643 // fr = fabs(fr); 1644 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 1645 1646 // fb = fabs(fb); 1647 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 1648 1649 // int cv = fr >= fb; 1650 SDValue cv; 1651 if (INTTY == MVT::i32) { 1652 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 1653 } else { 1654 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 1655 } 1656 // jq = (cv ? jq : 0); 1657 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, 1658 DAG.getConstant(0, OVT)); 1659 // dst = iq + jq; 1660 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 1661 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 1662 return iq; 1663} 1664 1665SDValue 1666AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const 1667{ 1668 DebugLoc DL = Op.getDebugLoc(); 1669 EVT OVT = Op.getValueType(); 1670 SDValue LHS = Op.getOperand(0); 1671 SDValue RHS = Op.getOperand(1); 1672 // The LowerSDIV32 function generates equivalent to the following IL. 1673 // mov r0, LHS 1674 // mov r1, RHS 1675 // ilt r10, r0, 0 1676 // ilt r11, r1, 0 1677 // iadd r0, r0, r10 1678 // iadd r1, r1, r11 1679 // ixor r0, r0, r10 1680 // ixor r1, r1, r11 1681 // udiv r0, r0, r1 1682 // ixor r10, r10, r11 1683 // iadd r0, r0, r10 1684 // ixor DST, r0, r10 1685 1686 // mov r0, LHS 1687 SDValue r0 = LHS; 1688 1689 // mov r1, RHS 1690 SDValue r1 = RHS; 1691 1692 // ilt r10, r0, 0 1693 SDValue r10 = DAG.getSelectCC(DL, 1694 r0, DAG.getConstant(0, OVT), 1695 DAG.getConstant(-1, MVT::i32), 1696 DAG.getConstant(0, MVT::i32), 1697 ISD::SETLT); 1698 1699 // ilt r11, r1, 0 1700 SDValue r11 = DAG.getSelectCC(DL, 1701 r1, DAG.getConstant(0, OVT), 1702 DAG.getConstant(-1, MVT::i32), 1703 DAG.getConstant(0, MVT::i32), 1704 ISD::SETLT); 1705 1706 // iadd r0, r0, r10 1707 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 1708 1709 // iadd r1, r1, r11 1710 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 1711 1712 // ixor r0, r0, r10 1713 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 1714 1715 // ixor r1, r1, r11 1716 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 1717 1718 // udiv r0, r0, r1 1719 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 1720 1721 // ixor r10, r10, r11 1722 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 1723 1724 // iadd r0, r0, r10 1725 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 1726 1727 // ixor DST, r0, r10 1728 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 1729 return DST; 1730} 1731 1732SDValue 1733AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const 1734{ 1735 return SDValue(Op.getNode(), 0); 1736} 1737 1738SDValue 1739AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const 1740{ 1741 DebugLoc DL = Op.getDebugLoc(); 1742 EVT OVT = Op.getValueType(); 1743 MVT INTTY = MVT::i32; 1744 if (OVT == MVT::v2i8) { 1745 INTTY = MVT::v2i32; 1746 } else if (OVT == MVT::v4i8) { 1747 INTTY = MVT::v4i32; 1748 } 1749 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 1750 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 1751 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 1752 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 1753 return LHS; 1754} 1755 1756SDValue 1757AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const 1758{ 1759 DebugLoc DL = Op.getDebugLoc(); 1760 EVT OVT = Op.getValueType(); 1761 MVT INTTY = MVT::i32; 1762 if (OVT == MVT::v2i16) { 1763 INTTY = MVT::v2i32; 1764 } else if (OVT == MVT::v4i16) { 1765 INTTY = MVT::v4i32; 1766 } 1767 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 1768 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 1769 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 1770 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 1771 return LHS; 1772} 1773 1774SDValue 1775AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const 1776{ 1777 DebugLoc DL = Op.getDebugLoc(); 1778 EVT OVT = Op.getValueType(); 1779 SDValue LHS = Op.getOperand(0); 1780 SDValue RHS = Op.getOperand(1); 1781 // The LowerSREM32 function generates equivalent to the following IL. 1782 // mov r0, LHS 1783 // mov r1, RHS 1784 // ilt r10, r0, 0 1785 // ilt r11, r1, 0 1786 // iadd r0, r0, r10 1787 // iadd r1, r1, r11 1788 // ixor r0, r0, r10 1789 // ixor r1, r1, r11 1790 // udiv r20, r0, r1 1791 // umul r20, r20, r1 1792 // sub r0, r0, r20 1793 // iadd r0, r0, r10 1794 // ixor DST, r0, r10 1795 1796 // mov r0, LHS 1797 SDValue r0 = LHS; 1798 1799 // mov r1, RHS 1800 SDValue r1 = RHS; 1801 1802 // ilt r10, r0, 0 1803 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 1804 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 1805 r0, DAG.getConstant(0, OVT)); 1806 1807 // ilt r11, r1, 0 1808 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 1809 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 1810 r1, DAG.getConstant(0, OVT)); 1811 1812 // iadd r0, r0, r10 1813 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 1814 1815 // iadd r1, r1, r11 1816 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 1817 1818 // ixor r0, r0, r10 1819 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 1820 1821 // ixor r1, r1, r11 1822 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 1823 1824 // udiv r20, r0, r1 1825 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 1826 1827 // umul r20, r20, r1 1828 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); 1829 1830 // sub r0, r0, r20 1831 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 1832 1833 // iadd r0, r0, r10 1834 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 1835 1836 // ixor DST, r0, r10 1837 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 1838 return DST; 1839} 1840 1841SDValue 1842AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const 1843{ 1844 return SDValue(Op.getNode(), 0); 1845} 1846