AMDILISelLowering.cpp revision 5aaaa6a426258dc714c7346bec062795998f9986
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9// 10// This file implements the interfaces that AMDIL uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "AMDILISelLowering.h" 16#include "AMDILDevices.h" 17#include "AMDILIntrinsicInfo.h" 18#include "AMDILSubtarget.h" 19#include "AMDILTargetMachine.h" 20#include "AMDILUtilityFunctions.h" 21#include "llvm/CallingConv.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineRegisterInfo.h" 24#include "llvm/CodeGen/PseudoSourceValue.h" 25#include "llvm/CodeGen/SelectionDAG.h" 26#include "llvm/CodeGen/SelectionDAGNodes.h" 27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28#include "llvm/DerivedTypes.h" 29#include "llvm/Instructions.h" 30#include "llvm/Intrinsics.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/Target/TargetOptions.h" 33 34using namespace llvm; 35#define ISDBITCAST ISD::BITCAST 36#define MVTGLUE MVT::Glue 37//===----------------------------------------------------------------------===// 38// Calling Convention Implementation 39//===----------------------------------------------------------------------===// 40#include "AMDILGenCallingConv.inc" 41 42//===----------------------------------------------------------------------===// 43// TargetLowering Implementation Help Functions Begin 44//===----------------------------------------------------------------------===// 45 static SDValue 46getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) 47{ 48 DebugLoc DL = Src.getDebugLoc(); 49 EVT svt = Src.getValueType().getScalarType(); 50 EVT dvt = Dst.getValueType().getScalarType(); 51 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { 52 if (dvt.bitsGT(svt)) { 53 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); 54 } else if (svt.bitsLT(svt)) { 55 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, 56 DAG.getConstant(1, MVT::i32)); 57 } 58 } else if (svt.isInteger() && dvt.isInteger()) { 59 if (!svt.bitsEq(dvt)) { 60 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 61 } else { 62 Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src); 63 } 64 } else if (svt.isInteger()) { 65 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; 66 if (!svt.bitsEq(dvt)) { 67 if (dvt.getSimpleVT().SimpleTy == MVT::f32) { 68 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); 69 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { 70 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); 71 } else { 72 assert(0 && "We only support 32 and 64bit fp types"); 73 } 74 } 75 Src = DAG.getNode(opcode, DL, dvt, Src); 76 } else if (dvt.isInteger()) { 77 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; 78 if (svt.getSimpleVT().SimpleTy == MVT::f32) { 79 Src = DAG.getNode(opcode, DL, MVT::i32, Src); 80 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { 81 Src = DAG.getNode(opcode, DL, MVT::i64, Src); 82 } else { 83 assert(0 && "We only support 32 and 64bit fp types"); 84 } 85 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 86 } 87 return Src; 88} 89// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC 90// condition. 91 static AMDILCC::CondCodes 92CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) 93{ 94 switch (CC) { 95 default: 96 { 97 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; 98 assert(0 && "Unknown condition code!"); 99 } 100 case ISD::SETO: 101 switch(type) { 102 case MVT::f32: 103 return AMDILCC::IL_CC_F_O; 104 case MVT::f64: 105 return AMDILCC::IL_CC_D_O; 106 default: 107 assert(0 && "Opcode combination not generated correctly!"); 108 return AMDILCC::COND_ERROR; 109 }; 110 case ISD::SETUO: 111 switch(type) { 112 case MVT::f32: 113 return AMDILCC::IL_CC_F_UO; 114 case MVT::f64: 115 return AMDILCC::IL_CC_D_UO; 116 default: 117 assert(0 && "Opcode combination not generated correctly!"); 118 return AMDILCC::COND_ERROR; 119 }; 120 case ISD::SETGT: 121 switch (type) { 122 case MVT::i1: 123 case MVT::i8: 124 case MVT::i16: 125 case MVT::i32: 126 return AMDILCC::IL_CC_I_GT; 127 case MVT::f32: 128 return AMDILCC::IL_CC_F_GT; 129 case MVT::f64: 130 return AMDILCC::IL_CC_D_GT; 131 case MVT::i64: 132 return AMDILCC::IL_CC_L_GT; 133 default: 134 assert(0 && "Opcode combination not generated correctly!"); 135 return AMDILCC::COND_ERROR; 136 }; 137 case ISD::SETGE: 138 switch (type) { 139 case MVT::i1: 140 case MVT::i8: 141 case MVT::i16: 142 case MVT::i32: 143 return AMDILCC::IL_CC_I_GE; 144 case MVT::f32: 145 return AMDILCC::IL_CC_F_GE; 146 case MVT::f64: 147 return AMDILCC::IL_CC_D_GE; 148 case MVT::i64: 149 return AMDILCC::IL_CC_L_GE; 150 default: 151 assert(0 && "Opcode combination not generated correctly!"); 152 return AMDILCC::COND_ERROR; 153 }; 154 case ISD::SETLT: 155 switch (type) { 156 case MVT::i1: 157 case MVT::i8: 158 case MVT::i16: 159 case MVT::i32: 160 return AMDILCC::IL_CC_I_LT; 161 case MVT::f32: 162 return AMDILCC::IL_CC_F_LT; 163 case MVT::f64: 164 return AMDILCC::IL_CC_D_LT; 165 case MVT::i64: 166 return AMDILCC::IL_CC_L_LT; 167 default: 168 assert(0 && "Opcode combination not generated correctly!"); 169 return AMDILCC::COND_ERROR; 170 }; 171 case ISD::SETLE: 172 switch (type) { 173 case MVT::i1: 174 case MVT::i8: 175 case MVT::i16: 176 case MVT::i32: 177 return AMDILCC::IL_CC_I_LE; 178 case MVT::f32: 179 return AMDILCC::IL_CC_F_LE; 180 case MVT::f64: 181 return AMDILCC::IL_CC_D_LE; 182 case MVT::i64: 183 return AMDILCC::IL_CC_L_LE; 184 default: 185 assert(0 && "Opcode combination not generated correctly!"); 186 return AMDILCC::COND_ERROR; 187 }; 188 case ISD::SETNE: 189 switch (type) { 190 case MVT::i1: 191 case MVT::i8: 192 case MVT::i16: 193 case MVT::i32: 194 return AMDILCC::IL_CC_I_NE; 195 case MVT::f32: 196 return AMDILCC::IL_CC_F_NE; 197 case MVT::f64: 198 return AMDILCC::IL_CC_D_NE; 199 case MVT::i64: 200 return AMDILCC::IL_CC_L_NE; 201 default: 202 assert(0 && "Opcode combination not generated correctly!"); 203 return AMDILCC::COND_ERROR; 204 }; 205 case ISD::SETEQ: 206 switch (type) { 207 case MVT::i1: 208 case MVT::i8: 209 case MVT::i16: 210 case MVT::i32: 211 return AMDILCC::IL_CC_I_EQ; 212 case MVT::f32: 213 return AMDILCC::IL_CC_F_EQ; 214 case MVT::f64: 215 return AMDILCC::IL_CC_D_EQ; 216 case MVT::i64: 217 return AMDILCC::IL_CC_L_EQ; 218 default: 219 assert(0 && "Opcode combination not generated correctly!"); 220 return AMDILCC::COND_ERROR; 221 }; 222 case ISD::SETUGT: 223 switch (type) { 224 case MVT::i1: 225 case MVT::i8: 226 case MVT::i16: 227 case MVT::i32: 228 return AMDILCC::IL_CC_U_GT; 229 case MVT::f32: 230 return AMDILCC::IL_CC_F_UGT; 231 case MVT::f64: 232 return AMDILCC::IL_CC_D_UGT; 233 case MVT::i64: 234 return AMDILCC::IL_CC_UL_GT; 235 default: 236 assert(0 && "Opcode combination not generated correctly!"); 237 return AMDILCC::COND_ERROR; 238 }; 239 case ISD::SETUGE: 240 switch (type) { 241 case MVT::i1: 242 case MVT::i8: 243 case MVT::i16: 244 case MVT::i32: 245 return AMDILCC::IL_CC_U_GE; 246 case MVT::f32: 247 return AMDILCC::IL_CC_F_UGE; 248 case MVT::f64: 249 return AMDILCC::IL_CC_D_UGE; 250 case MVT::i64: 251 return AMDILCC::IL_CC_UL_GE; 252 default: 253 assert(0 && "Opcode combination not generated correctly!"); 254 return AMDILCC::COND_ERROR; 255 }; 256 case ISD::SETULT: 257 switch (type) { 258 case MVT::i1: 259 case MVT::i8: 260 case MVT::i16: 261 case MVT::i32: 262 return AMDILCC::IL_CC_U_LT; 263 case MVT::f32: 264 return AMDILCC::IL_CC_F_ULT; 265 case MVT::f64: 266 return AMDILCC::IL_CC_D_ULT; 267 case MVT::i64: 268 return AMDILCC::IL_CC_UL_LT; 269 default: 270 assert(0 && "Opcode combination not generated correctly!"); 271 return AMDILCC::COND_ERROR; 272 }; 273 case ISD::SETULE: 274 switch (type) { 275 case MVT::i1: 276 case MVT::i8: 277 case MVT::i16: 278 case MVT::i32: 279 return AMDILCC::IL_CC_U_LE; 280 case MVT::f32: 281 return AMDILCC::IL_CC_F_ULE; 282 case MVT::f64: 283 return AMDILCC::IL_CC_D_ULE; 284 case MVT::i64: 285 return AMDILCC::IL_CC_UL_LE; 286 default: 287 assert(0 && "Opcode combination not generated correctly!"); 288 return AMDILCC::COND_ERROR; 289 }; 290 case ISD::SETUNE: 291 switch (type) { 292 case MVT::i1: 293 case MVT::i8: 294 case MVT::i16: 295 case MVT::i32: 296 return AMDILCC::IL_CC_U_NE; 297 case MVT::f32: 298 return AMDILCC::IL_CC_F_UNE; 299 case MVT::f64: 300 return AMDILCC::IL_CC_D_UNE; 301 case MVT::i64: 302 return AMDILCC::IL_CC_UL_NE; 303 default: 304 assert(0 && "Opcode combination not generated correctly!"); 305 return AMDILCC::COND_ERROR; 306 }; 307 case ISD::SETUEQ: 308 switch (type) { 309 case MVT::i1: 310 case MVT::i8: 311 case MVT::i16: 312 case MVT::i32: 313 return AMDILCC::IL_CC_U_EQ; 314 case MVT::f32: 315 return AMDILCC::IL_CC_F_UEQ; 316 case MVT::f64: 317 return AMDILCC::IL_CC_D_UEQ; 318 case MVT::i64: 319 return AMDILCC::IL_CC_UL_EQ; 320 default: 321 assert(0 && "Opcode combination not generated correctly!"); 322 return AMDILCC::COND_ERROR; 323 }; 324 case ISD::SETOGT: 325 switch (type) { 326 case MVT::f32: 327 return AMDILCC::IL_CC_F_OGT; 328 case MVT::f64: 329 return AMDILCC::IL_CC_D_OGT; 330 case MVT::i1: 331 case MVT::i8: 332 case MVT::i16: 333 case MVT::i32: 334 case MVT::i64: 335 default: 336 assert(0 && "Opcode combination not generated correctly!"); 337 return AMDILCC::COND_ERROR; 338 }; 339 case ISD::SETOGE: 340 switch (type) { 341 case MVT::f32: 342 return AMDILCC::IL_CC_F_OGE; 343 case MVT::f64: 344 return AMDILCC::IL_CC_D_OGE; 345 case MVT::i1: 346 case MVT::i8: 347 case MVT::i16: 348 case MVT::i32: 349 case MVT::i64: 350 default: 351 assert(0 && "Opcode combination not generated correctly!"); 352 return AMDILCC::COND_ERROR; 353 }; 354 case ISD::SETOLT: 355 switch (type) { 356 case MVT::f32: 357 return AMDILCC::IL_CC_F_OLT; 358 case MVT::f64: 359 return AMDILCC::IL_CC_D_OLT; 360 case MVT::i1: 361 case MVT::i8: 362 case MVT::i16: 363 case MVT::i32: 364 case MVT::i64: 365 default: 366 assert(0 && "Opcode combination not generated correctly!"); 367 return AMDILCC::COND_ERROR; 368 }; 369 case ISD::SETOLE: 370 switch (type) { 371 case MVT::f32: 372 return AMDILCC::IL_CC_F_OLE; 373 case MVT::f64: 374 return AMDILCC::IL_CC_D_OLE; 375 case MVT::i1: 376 case MVT::i8: 377 case MVT::i16: 378 case MVT::i32: 379 case MVT::i64: 380 default: 381 assert(0 && "Opcode combination not generated correctly!"); 382 return AMDILCC::COND_ERROR; 383 }; 384 case ISD::SETONE: 385 switch (type) { 386 case MVT::f32: 387 return AMDILCC::IL_CC_F_ONE; 388 case MVT::f64: 389 return AMDILCC::IL_CC_D_ONE; 390 case MVT::i1: 391 case MVT::i8: 392 case MVT::i16: 393 case MVT::i32: 394 case MVT::i64: 395 default: 396 assert(0 && "Opcode combination not generated correctly!"); 397 return AMDILCC::COND_ERROR; 398 }; 399 case ISD::SETOEQ: 400 switch (type) { 401 case MVT::f32: 402 return AMDILCC::IL_CC_F_OEQ; 403 case MVT::f64: 404 return AMDILCC::IL_CC_D_OEQ; 405 case MVT::i1: 406 case MVT::i8: 407 case MVT::i16: 408 case MVT::i32: 409 case MVT::i64: 410 default: 411 assert(0 && "Opcode combination not generated correctly!"); 412 return AMDILCC::COND_ERROR; 413 }; 414 }; 415} 416 417 static unsigned int 418translateToOpcode(uint64_t CCCode, unsigned int regClass) 419{ 420 switch (CCCode) { 421 case AMDILCC::IL_CC_D_EQ: 422 case AMDILCC::IL_CC_D_OEQ: 423 if (regClass == AMDIL::GPRV2F64RegClassID) { 424 return (unsigned int)AMDIL::DEQ_v2f64; 425 } else { 426 return (unsigned int)AMDIL::DEQ; 427 } 428 case AMDILCC::IL_CC_D_LE: 429 case AMDILCC::IL_CC_D_OLE: 430 case AMDILCC::IL_CC_D_ULE: 431 case AMDILCC::IL_CC_D_GE: 432 case AMDILCC::IL_CC_D_OGE: 433 case AMDILCC::IL_CC_D_UGE: 434 return (unsigned int)AMDIL::DGE; 435 case AMDILCC::IL_CC_D_LT: 436 case AMDILCC::IL_CC_D_OLT: 437 case AMDILCC::IL_CC_D_ULT: 438 case AMDILCC::IL_CC_D_GT: 439 case AMDILCC::IL_CC_D_OGT: 440 case AMDILCC::IL_CC_D_UGT: 441 return (unsigned int)AMDIL::DLT; 442 case AMDILCC::IL_CC_D_NE: 443 case AMDILCC::IL_CC_D_UNE: 444 return (unsigned int)AMDIL::DNE; 445 case AMDILCC::IL_CC_F_EQ: 446 case AMDILCC::IL_CC_F_OEQ: 447 return (unsigned int)AMDIL::FEQ; 448 case AMDILCC::IL_CC_F_LE: 449 case AMDILCC::IL_CC_F_ULE: 450 case AMDILCC::IL_CC_F_OLE: 451 case AMDILCC::IL_CC_F_GE: 452 case AMDILCC::IL_CC_F_UGE: 453 case AMDILCC::IL_CC_F_OGE: 454 return (unsigned int)AMDIL::FGE; 455 case AMDILCC::IL_CC_F_LT: 456 case AMDILCC::IL_CC_F_OLT: 457 case AMDILCC::IL_CC_F_ULT: 458 case AMDILCC::IL_CC_F_GT: 459 case AMDILCC::IL_CC_F_OGT: 460 case AMDILCC::IL_CC_F_UGT: 461 if (regClass == AMDIL::GPRV2F32RegClassID) { 462 return (unsigned int)AMDIL::FLT_v2f32; 463 } else if (regClass == AMDIL::GPRV4F32RegClassID) { 464 return (unsigned int)AMDIL::FLT_v4f32; 465 } else { 466 return (unsigned int)AMDIL::FLT; 467 } 468 case AMDILCC::IL_CC_F_NE: 469 case AMDILCC::IL_CC_F_UNE: 470 return (unsigned int)AMDIL::FNE; 471 case AMDILCC::IL_CC_I_EQ: 472 case AMDILCC::IL_CC_U_EQ: 473 if (regClass == AMDIL::GPRI32RegClassID 474 || regClass == AMDIL::GPRI8RegClassID 475 || regClass == AMDIL::GPRI16RegClassID) { 476 return (unsigned int)AMDIL::IEQ; 477 } else if (regClass == AMDIL::GPRV2I32RegClassID 478 || regClass == AMDIL::GPRV2I8RegClassID 479 || regClass == AMDIL::GPRV2I16RegClassID) { 480 return (unsigned int)AMDIL::IEQ_v2i32; 481 } else if (regClass == AMDIL::GPRV4I32RegClassID 482 || regClass == AMDIL::GPRV4I8RegClassID 483 || regClass == AMDIL::GPRV4I16RegClassID) { 484 return (unsigned int)AMDIL::IEQ_v4i32; 485 } else { 486 assert(!"Unknown reg class!"); 487 } 488 case AMDILCC::IL_CC_L_EQ: 489 case AMDILCC::IL_CC_UL_EQ: 490 return (unsigned int)AMDIL::LEQ; 491 case AMDILCC::IL_CC_I_GE: 492 case AMDILCC::IL_CC_I_LE: 493 if (regClass == AMDIL::GPRI32RegClassID 494 || regClass == AMDIL::GPRI8RegClassID 495 || regClass == AMDIL::GPRI16RegClassID) { 496 return (unsigned int)AMDIL::IGE; 497 } else if (regClass == AMDIL::GPRV2I32RegClassID 498 || regClass == AMDIL::GPRI8RegClassID 499 || regClass == AMDIL::GPRI16RegClassID) { 500 return (unsigned int)AMDIL::IGE_v2i32; 501 } else if (regClass == AMDIL::GPRV4I32RegClassID 502 || regClass == AMDIL::GPRI8RegClassID 503 || regClass == AMDIL::GPRI16RegClassID) { 504 return (unsigned int)AMDIL::IGE_v4i32; 505 } else { 506 assert(!"Unknown reg class!"); 507 } 508 case AMDILCC::IL_CC_I_LT: 509 case AMDILCC::IL_CC_I_GT: 510 if (regClass == AMDIL::GPRI32RegClassID 511 || regClass == AMDIL::GPRI8RegClassID 512 || regClass == AMDIL::GPRI16RegClassID) { 513 return (unsigned int)AMDIL::ILT; 514 } else if (regClass == AMDIL::GPRV2I32RegClassID 515 || regClass == AMDIL::GPRI8RegClassID 516 || regClass == AMDIL::GPRI16RegClassID) { 517 return (unsigned int)AMDIL::ILT_v2i32; 518 } else if (regClass == AMDIL::GPRV4I32RegClassID 519 || regClass == AMDIL::GPRI8RegClassID 520 || regClass == AMDIL::GPRI16RegClassID) { 521 return (unsigned int)AMDIL::ILT_v4i32; 522 } else { 523 assert(!"Unknown reg class!"); 524 } 525 case AMDILCC::IL_CC_L_GE: 526 return (unsigned int)AMDIL::LGE; 527 case AMDILCC::IL_CC_L_LE: 528 return (unsigned int)AMDIL::LLE; 529 case AMDILCC::IL_CC_L_LT: 530 return (unsigned int)AMDIL::LLT; 531 case AMDILCC::IL_CC_L_GT: 532 return (unsigned int)AMDIL::LGT; 533 case AMDILCC::IL_CC_I_NE: 534 case AMDILCC::IL_CC_U_NE: 535 if (regClass == AMDIL::GPRI32RegClassID 536 || regClass == AMDIL::GPRI8RegClassID 537 || regClass == AMDIL::GPRI16RegClassID) { 538 return (unsigned int)AMDIL::INE; 539 } else if (regClass == AMDIL::GPRV2I32RegClassID 540 || regClass == AMDIL::GPRI8RegClassID 541 || regClass == AMDIL::GPRI16RegClassID) { 542 return (unsigned int)AMDIL::INE_v2i32; 543 } else if (regClass == AMDIL::GPRV4I32RegClassID 544 || regClass == AMDIL::GPRI8RegClassID 545 || regClass == AMDIL::GPRI16RegClassID) { 546 return (unsigned int)AMDIL::INE_v4i32; 547 } else { 548 assert(!"Unknown reg class!"); 549 } 550 case AMDILCC::IL_CC_U_GE: 551 case AMDILCC::IL_CC_U_LE: 552 if (regClass == AMDIL::GPRI32RegClassID 553 || regClass == AMDIL::GPRI8RegClassID 554 || regClass == AMDIL::GPRI16RegClassID) { 555 return (unsigned int)AMDIL::UGE; 556 } else if (regClass == AMDIL::GPRV2I32RegClassID 557 || regClass == AMDIL::GPRI8RegClassID 558 || regClass == AMDIL::GPRI16RegClassID) { 559 return (unsigned int)AMDIL::UGE_v2i32; 560 } else if (regClass == AMDIL::GPRV4I32RegClassID 561 || regClass == AMDIL::GPRI8RegClassID 562 || regClass == AMDIL::GPRI16RegClassID) { 563 return (unsigned int)AMDIL::UGE_v4i32; 564 } else { 565 assert(!"Unknown reg class!"); 566 } 567 case AMDILCC::IL_CC_L_NE: 568 case AMDILCC::IL_CC_UL_NE: 569 return (unsigned int)AMDIL::LNE; 570 case AMDILCC::IL_CC_UL_GE: 571 return (unsigned int)AMDIL::ULGE; 572 case AMDILCC::IL_CC_UL_LE: 573 return (unsigned int)AMDIL::ULLE; 574 case AMDILCC::IL_CC_U_LT: 575 if (regClass == AMDIL::GPRI32RegClassID 576 || regClass == AMDIL::GPRI8RegClassID 577 || regClass == AMDIL::GPRI16RegClassID) { 578 return (unsigned int)AMDIL::ULT; 579 } else if (regClass == AMDIL::GPRV2I32RegClassID 580 || regClass == AMDIL::GPRI8RegClassID 581 || regClass == AMDIL::GPRI16RegClassID) { 582 return (unsigned int)AMDIL::ULT_v2i32; 583 } else if (regClass == AMDIL::GPRV4I32RegClassID 584 || regClass == AMDIL::GPRI8RegClassID 585 || regClass == AMDIL::GPRI16RegClassID) { 586 return (unsigned int)AMDIL::ULT_v4i32; 587 } else { 588 assert(!"Unknown reg class!"); 589 } 590 case AMDILCC::IL_CC_U_GT: 591 if (regClass == AMDIL::GPRI32RegClassID 592 || regClass == AMDIL::GPRI8RegClassID 593 || regClass == AMDIL::GPRI16RegClassID) { 594 return (unsigned int)AMDIL::UGT; 595 } else if (regClass == AMDIL::GPRV2I32RegClassID 596 || regClass == AMDIL::GPRI8RegClassID 597 || regClass == AMDIL::GPRI16RegClassID) { 598 return (unsigned int)AMDIL::UGT_v2i32; 599 } else if (regClass == AMDIL::GPRV4I32RegClassID 600 || regClass == AMDIL::GPRI8RegClassID 601 || regClass == AMDIL::GPRI16RegClassID) { 602 return (unsigned int)AMDIL::UGT_v4i32; 603 } else { 604 assert(!"Unknown reg class!"); 605 } 606 case AMDILCC::IL_CC_UL_LT: 607 return (unsigned int)AMDIL::ULLT; 608 case AMDILCC::IL_CC_UL_GT: 609 return (unsigned int)AMDIL::ULGT; 610 case AMDILCC::IL_CC_F_UEQ: 611 case AMDILCC::IL_CC_D_UEQ: 612 case AMDILCC::IL_CC_F_ONE: 613 case AMDILCC::IL_CC_D_ONE: 614 case AMDILCC::IL_CC_F_O: 615 case AMDILCC::IL_CC_F_UO: 616 case AMDILCC::IL_CC_D_O: 617 case AMDILCC::IL_CC_D_UO: 618 // we don't care 619 return 0; 620 621 } 622 errs()<<"Opcode: "<<CCCode<<"\n"; 623 assert(0 && "Unknown opcode retrieved"); 624 return 0; 625} 626 627/// Helper function used by LowerFormalArguments 628static const TargetRegisterClass* 629getRegClassFromType(unsigned int type) { 630 switch (type) { 631 default: 632 assert(0 && "Passed in type does not match any register classes."); 633 case MVT::i8: 634 return &AMDIL::GPRI8RegClass; 635 case MVT::i16: 636 return &AMDIL::GPRI16RegClass; 637 case MVT::i32: 638 return &AMDIL::GPRI32RegClass; 639 case MVT::f32: 640 return &AMDIL::GPRF32RegClass; 641 case MVT::i64: 642 return &AMDIL::GPRI64RegClass; 643 case MVT::f64: 644 return &AMDIL::GPRF64RegClass; 645 case MVT::v4f32: 646 return &AMDIL::GPRV4F32RegClass; 647 case MVT::v4i8: 648 return &AMDIL::GPRV4I8RegClass; 649 case MVT::v4i16: 650 return &AMDIL::GPRV4I16RegClass; 651 case MVT::v4i32: 652 return &AMDIL::GPRV4I32RegClass; 653 case MVT::v2f32: 654 return &AMDIL::GPRV2F32RegClass; 655 case MVT::v2i8: 656 return &AMDIL::GPRV2I8RegClass; 657 case MVT::v2i16: 658 return &AMDIL::GPRV2I16RegClass; 659 case MVT::v2i32: 660 return &AMDIL::GPRV2I32RegClass; 661 case MVT::v2f64: 662 return &AMDIL::GPRV2F64RegClass; 663 case MVT::v2i64: 664 return &AMDIL::GPRV2I64RegClass; 665 } 666} 667 668SDValue 669AMDILTargetLowering::LowerMemArgument( 670 SDValue Chain, 671 CallingConv::ID CallConv, 672 const SmallVectorImpl<ISD::InputArg> &Ins, 673 DebugLoc dl, SelectionDAG &DAG, 674 const CCValAssign &VA, 675 MachineFrameInfo *MFI, 676 unsigned i) const 677{ 678 // Create the nodes corresponding to a load from this parameter slot. 679 ISD::ArgFlagsTy Flags = Ins[i].Flags; 680 681 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && 682 getTargetMachine().Options.GuaranteedTailCallOpt; 683 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 684 685 // FIXME: For now, all byval parameter objects are marked mutable. This can 686 // be changed with more analysis. 687 // In case of tail call optimization mark all arguments mutable. Since they 688 // could be overwritten by lowering of arguments in case of a tail call. 689 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 690 VA.getLocMemOffset(), isImmutable); 691 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 692 693 if (Flags.isByVal()) 694 return FIN; 695 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 696 MachinePointerInfo::getFixedStack(FI), 697 false, false, false, 0); 698} 699//===----------------------------------------------------------------------===// 700// TargetLowering Implementation Help Functions End 701//===----------------------------------------------------------------------===// 702//===----------------------------------------------------------------------===// 703// Instruction generation functions 704//===----------------------------------------------------------------------===// 705uint32_t 706AMDILTargetLowering::addExtensionInstructions( 707 uint32_t reg, bool signedShift, 708 unsigned int simpleVT) const 709{ 710 int shiftSize = 0; 711 uint32_t LShift, RShift; 712 switch(simpleVT) 713 { 714 default: 715 return reg; 716 case AMDIL::GPRI8RegClassID: 717 shiftSize = 24; 718 LShift = AMDIL::SHL_i8; 719 if (signedShift) { 720 RShift = AMDIL::SHR_i8; 721 } else { 722 RShift = AMDIL::USHR_i8; 723 } 724 break; 725 case AMDIL::GPRV2I8RegClassID: 726 shiftSize = 24; 727 LShift = AMDIL::SHL_v2i8; 728 if (signedShift) { 729 RShift = AMDIL::SHR_v2i8; 730 } else { 731 RShift = AMDIL::USHR_v2i8; 732 } 733 break; 734 case AMDIL::GPRV4I8RegClassID: 735 shiftSize = 24; 736 LShift = AMDIL::SHL_v4i8; 737 if (signedShift) { 738 RShift = AMDIL::SHR_v4i8; 739 } else { 740 RShift = AMDIL::USHR_v4i8; 741 } 742 break; 743 case AMDIL::GPRI16RegClassID: 744 shiftSize = 16; 745 LShift = AMDIL::SHL_i16; 746 if (signedShift) { 747 RShift = AMDIL::SHR_i16; 748 } else { 749 RShift = AMDIL::USHR_i16; 750 } 751 break; 752 case AMDIL::GPRV2I16RegClassID: 753 shiftSize = 16; 754 LShift = AMDIL::SHL_v2i16; 755 if (signedShift) { 756 RShift = AMDIL::SHR_v2i16; 757 } else { 758 RShift = AMDIL::USHR_v2i16; 759 } 760 break; 761 case AMDIL::GPRV4I16RegClassID: 762 shiftSize = 16; 763 LShift = AMDIL::SHL_v4i16; 764 if (signedShift) { 765 RShift = AMDIL::SHR_v4i16; 766 } else { 767 RShift = AMDIL::USHR_v4i16; 768 } 769 break; 770 }; 771 uint32_t LoadReg = genVReg(simpleVT); 772 uint32_t tmp1 = genVReg(simpleVT); 773 uint32_t tmp2 = genVReg(simpleVT); 774 generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize); 775 generateMachineInst(LShift, tmp1, reg, LoadReg); 776 generateMachineInst(RShift, tmp2, tmp1, LoadReg); 777 return tmp2; 778} 779 780MachineOperand 781AMDILTargetLowering::convertToReg(MachineOperand op) const 782{ 783 if (op.isReg()) { 784 return op; 785 } else if (op.isImm()) { 786 uint32_t loadReg 787 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 788 generateMachineInst(AMDIL::LOADCONST_i32, loadReg) 789 .addImm(op.getImm()); 790 op.ChangeToRegister(loadReg, false); 791 } else if (op.isFPImm()) { 792 uint32_t loadReg 793 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 794 generateMachineInst(AMDIL::LOADCONST_f32, loadReg) 795 .addFPImm(op.getFPImm()); 796 op.ChangeToRegister(loadReg, false); 797 } else if (op.isMBB()) { 798 op.ChangeToRegister(0, false); 799 } else if (op.isFI()) { 800 op.ChangeToRegister(0, false); 801 } else if (op.isCPI()) { 802 op.ChangeToRegister(0, false); 803 } else if (op.isJTI()) { 804 op.ChangeToRegister(0, false); 805 } else if (op.isGlobal()) { 806 op.ChangeToRegister(0, false); 807 } else if (op.isSymbol()) { 808 op.ChangeToRegister(0, false); 809 }/* else if (op.isMetadata()) { 810 op.ChangeToRegister(0, false); 811 }*/ 812 return op; 813} 814 815void 816AMDILTargetLowering::generateCMPInstr( 817 MachineInstr *MI, 818 MachineBasicBlock *BB, 819 const TargetInstrInfo& TII) 820const 821{ 822 MachineOperand DST = MI->getOperand(0); 823 MachineOperand CC = MI->getOperand(1); 824 MachineOperand LHS = MI->getOperand(2); 825 MachineOperand RHS = MI->getOperand(3); 826 int64_t ccCode = CC.getImm(); 827 unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass; 828 unsigned int opCode = translateToOpcode(ccCode, simpleVT); 829 DebugLoc DL = MI->getDebugLoc(); 830 MachineBasicBlock::iterator BBI = MI; 831 setPrivateData(BB, BBI, &DL, &TII); 832 if (!LHS.isReg()) { 833 LHS = convertToReg(LHS); 834 } 835 if (!RHS.isReg()) { 836 RHS = convertToReg(RHS); 837 } 838 switch (ccCode) { 839 case AMDILCC::IL_CC_I_EQ: 840 case AMDILCC::IL_CC_I_NE: 841 case AMDILCC::IL_CC_I_GE: 842 case AMDILCC::IL_CC_I_LT: 843 { 844 uint32_t lhsreg = addExtensionInstructions( 845 LHS.getReg(), true, simpleVT); 846 uint32_t rhsreg = addExtensionInstructions( 847 RHS.getReg(), true, simpleVT); 848 generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg); 849 } 850 break; 851 case AMDILCC::IL_CC_U_EQ: 852 case AMDILCC::IL_CC_U_NE: 853 case AMDILCC::IL_CC_U_GE: 854 case AMDILCC::IL_CC_U_LT: 855 case AMDILCC::IL_CC_D_EQ: 856 case AMDILCC::IL_CC_F_EQ: 857 case AMDILCC::IL_CC_F_OEQ: 858 case AMDILCC::IL_CC_D_OEQ: 859 case AMDILCC::IL_CC_D_NE: 860 case AMDILCC::IL_CC_F_NE: 861 case AMDILCC::IL_CC_F_UNE: 862 case AMDILCC::IL_CC_D_UNE: 863 case AMDILCC::IL_CC_D_GE: 864 case AMDILCC::IL_CC_F_GE: 865 case AMDILCC::IL_CC_D_OGE: 866 case AMDILCC::IL_CC_F_OGE: 867 case AMDILCC::IL_CC_D_LT: 868 case AMDILCC::IL_CC_F_LT: 869 case AMDILCC::IL_CC_F_OLT: 870 case AMDILCC::IL_CC_D_OLT: 871 generateMachineInst(opCode, DST.getReg(), 872 LHS.getReg(), RHS.getReg()); 873 break; 874 case AMDILCC::IL_CC_I_GT: 875 case AMDILCC::IL_CC_I_LE: 876 { 877 uint32_t lhsreg = addExtensionInstructions( 878 LHS.getReg(), true, simpleVT); 879 uint32_t rhsreg = addExtensionInstructions( 880 RHS.getReg(), true, simpleVT); 881 generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg); 882 } 883 break; 884 case AMDILCC::IL_CC_U_GT: 885 case AMDILCC::IL_CC_U_LE: 886 case AMDILCC::IL_CC_F_GT: 887 case AMDILCC::IL_CC_D_GT: 888 case AMDILCC::IL_CC_F_OGT: 889 case AMDILCC::IL_CC_D_OGT: 890 case AMDILCC::IL_CC_F_LE: 891 case AMDILCC::IL_CC_D_LE: 892 case AMDILCC::IL_CC_D_OLE: 893 case AMDILCC::IL_CC_F_OLE: 894 generateMachineInst(opCode, DST.getReg(), 895 RHS.getReg(), LHS.getReg()); 896 break; 897 case AMDILCC::IL_CC_F_UGT: 898 case AMDILCC::IL_CC_F_ULE: 899 { 900 uint32_t VReg[4] = { 901 genVReg(simpleVT), genVReg(simpleVT), 902 genVReg(simpleVT), genVReg(simpleVT) 903 }; 904 generateMachineInst(opCode, VReg[0], 905 RHS.getReg(), LHS.getReg()); 906 generateMachineInst(AMDIL::FNE, VReg[1], 907 RHS.getReg(), RHS.getReg()); 908 generateMachineInst(AMDIL::FNE, VReg[2], 909 LHS.getReg(), LHS.getReg()); 910 generateMachineInst(AMDIL::BINARY_OR_f32, 911 VReg[3], VReg[0], VReg[1]); 912 generateMachineInst(AMDIL::BINARY_OR_f32, 913 DST.getReg(), VReg[2], VReg[3]); 914 } 915 break; 916 case AMDILCC::IL_CC_F_ULT: 917 case AMDILCC::IL_CC_F_UGE: 918 { 919 uint32_t VReg[4] = { 920 genVReg(simpleVT), genVReg(simpleVT), 921 genVReg(simpleVT), genVReg(simpleVT) 922 }; 923 generateMachineInst(opCode, VReg[0], 924 LHS.getReg(), RHS.getReg()); 925 generateMachineInst(AMDIL::FNE, VReg[1], 926 RHS.getReg(), RHS.getReg()); 927 generateMachineInst(AMDIL::FNE, VReg[2], 928 LHS.getReg(), LHS.getReg()); 929 generateMachineInst(AMDIL::BINARY_OR_f32, 930 VReg[3], VReg[0], VReg[1]); 931 generateMachineInst(AMDIL::BINARY_OR_f32, 932 DST.getReg(), VReg[2], VReg[3]); 933 } 934 break; 935 case AMDILCC::IL_CC_D_UGT: 936 case AMDILCC::IL_CC_D_ULE: 937 { 938 uint32_t regID = AMDIL::GPRF64RegClassID; 939 uint32_t VReg[4] = { 940 genVReg(regID), genVReg(regID), 941 genVReg(regID), genVReg(regID) 942 }; 943 // The result of a double comparison is a 32bit result 944 generateMachineInst(opCode, VReg[0], 945 RHS.getReg(), LHS.getReg()); 946 generateMachineInst(AMDIL::DNE, VReg[1], 947 RHS.getReg(), RHS.getReg()); 948 generateMachineInst(AMDIL::DNE, VReg[2], 949 LHS.getReg(), LHS.getReg()); 950 generateMachineInst(AMDIL::BINARY_OR_f32, 951 VReg[3], VReg[0], VReg[1]); 952 generateMachineInst(AMDIL::BINARY_OR_f32, 953 DST.getReg(), VReg[2], VReg[3]); 954 } 955 break; 956 case AMDILCC::IL_CC_D_UGE: 957 case AMDILCC::IL_CC_D_ULT: 958 { 959 uint32_t regID = AMDIL::GPRF64RegClassID; 960 uint32_t VReg[4] = { 961 genVReg(regID), genVReg(regID), 962 genVReg(regID), genVReg(regID) 963 }; 964 // The result of a double comparison is a 32bit result 965 generateMachineInst(opCode, VReg[0], 966 LHS.getReg(), RHS.getReg()); 967 generateMachineInst(AMDIL::DNE, VReg[1], 968 RHS.getReg(), RHS.getReg()); 969 generateMachineInst(AMDIL::DNE, VReg[2], 970 LHS.getReg(), LHS.getReg()); 971 generateMachineInst(AMDIL::BINARY_OR_f32, 972 VReg[3], VReg[0], VReg[1]); 973 generateMachineInst(AMDIL::BINARY_OR_f32, 974 DST.getReg(), VReg[2], VReg[3]); 975 } 976 break; 977 case AMDILCC::IL_CC_F_UEQ: 978 { 979 uint32_t VReg[4] = { 980 genVReg(simpleVT), genVReg(simpleVT), 981 genVReg(simpleVT), genVReg(simpleVT) 982 }; 983 generateMachineInst(AMDIL::FEQ, VReg[0], 984 LHS.getReg(), RHS.getReg()); 985 generateMachineInst(AMDIL::FNE, VReg[1], 986 LHS.getReg(), LHS.getReg()); 987 generateMachineInst(AMDIL::FNE, VReg[2], 988 RHS.getReg(), RHS.getReg()); 989 generateMachineInst(AMDIL::BINARY_OR_f32, 990 VReg[3], VReg[0], VReg[1]); 991 generateMachineInst(AMDIL::BINARY_OR_f32, 992 DST.getReg(), VReg[2], VReg[3]); 993 } 994 break; 995 case AMDILCC::IL_CC_F_ONE: 996 { 997 uint32_t VReg[4] = { 998 genVReg(simpleVT), genVReg(simpleVT), 999 genVReg(simpleVT), genVReg(simpleVT) 1000 }; 1001 generateMachineInst(AMDIL::FNE, VReg[0], 1002 LHS.getReg(), RHS.getReg()); 1003 generateMachineInst(AMDIL::FEQ, VReg[1], 1004 LHS.getReg(), LHS.getReg()); 1005 generateMachineInst(AMDIL::FEQ, VReg[2], 1006 RHS.getReg(), RHS.getReg()); 1007 generateMachineInst(AMDIL::BINARY_AND_f32, 1008 VReg[3], VReg[0], VReg[1]); 1009 generateMachineInst(AMDIL::BINARY_AND_f32, 1010 DST.getReg(), VReg[2], VReg[3]); 1011 } 1012 break; 1013 case AMDILCC::IL_CC_D_UEQ: 1014 { 1015 uint32_t regID = AMDIL::GPRF64RegClassID; 1016 uint32_t VReg[4] = { 1017 genVReg(regID), genVReg(regID), 1018 genVReg(regID), genVReg(regID) 1019 }; 1020 // The result of a double comparison is a 32bit result 1021 generateMachineInst(AMDIL::DEQ, VReg[0], 1022 LHS.getReg(), RHS.getReg()); 1023 generateMachineInst(AMDIL::DNE, VReg[1], 1024 LHS.getReg(), LHS.getReg()); 1025 generateMachineInst(AMDIL::DNE, VReg[2], 1026 RHS.getReg(), RHS.getReg()); 1027 generateMachineInst(AMDIL::BINARY_OR_f32, 1028 VReg[3], VReg[0], VReg[1]); 1029 generateMachineInst(AMDIL::BINARY_OR_f32, 1030 DST.getReg(), VReg[2], VReg[3]); 1031 1032 } 1033 break; 1034 case AMDILCC::IL_CC_D_ONE: 1035 { 1036 uint32_t regID = AMDIL::GPRF64RegClassID; 1037 uint32_t VReg[4] = { 1038 genVReg(regID), genVReg(regID), 1039 genVReg(regID), genVReg(regID) 1040 }; 1041 // The result of a double comparison is a 32bit result 1042 generateMachineInst(AMDIL::DNE, VReg[0], 1043 LHS.getReg(), RHS.getReg()); 1044 generateMachineInst(AMDIL::DEQ, VReg[1], 1045 LHS.getReg(), LHS.getReg()); 1046 generateMachineInst(AMDIL::DEQ, VReg[2], 1047 RHS.getReg(), RHS.getReg()); 1048 generateMachineInst(AMDIL::BINARY_AND_f32, 1049 VReg[3], VReg[0], VReg[1]); 1050 generateMachineInst(AMDIL::BINARY_AND_f32, 1051 DST.getReg(), VReg[2], VReg[3]); 1052 1053 } 1054 break; 1055 case AMDILCC::IL_CC_F_O: 1056 { 1057 uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) }; 1058 generateMachineInst(AMDIL::FEQ, VReg[0], 1059 RHS.getReg(), RHS.getReg()); 1060 generateMachineInst(AMDIL::FEQ, VReg[1], 1061 LHS.getReg(), LHS.getReg()); 1062 generateMachineInst(AMDIL::BINARY_AND_f32, 1063 DST.getReg(), VReg[0], VReg[1]); 1064 } 1065 break; 1066 case AMDILCC::IL_CC_D_O: 1067 { 1068 uint32_t regID = AMDIL::GPRF64RegClassID; 1069 uint32_t VReg[2] = { genVReg(regID), genVReg(regID) }; 1070 // The result of a double comparison is a 32bit result 1071 generateMachineInst(AMDIL::DEQ, VReg[0], 1072 RHS.getReg(), RHS.getReg()); 1073 generateMachineInst(AMDIL::DEQ, VReg[1], 1074 LHS.getReg(), LHS.getReg()); 1075 generateMachineInst(AMDIL::BINARY_AND_f32, 1076 DST.getReg(), VReg[0], VReg[1]); 1077 } 1078 break; 1079 case AMDILCC::IL_CC_F_UO: 1080 { 1081 uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) }; 1082 generateMachineInst(AMDIL::FNE, VReg[0], 1083 RHS.getReg(), RHS.getReg()); 1084 generateMachineInst(AMDIL::FNE, VReg[1], 1085 LHS.getReg(), LHS.getReg()); 1086 generateMachineInst(AMDIL::BINARY_OR_f32, 1087 DST.getReg(), VReg[0], VReg[1]); 1088 } 1089 break; 1090 case AMDILCC::IL_CC_D_UO: 1091 { 1092 uint32_t regID = AMDIL::GPRF64RegClassID; 1093 uint32_t VReg[2] = { genVReg(regID), genVReg(regID) }; 1094 // The result of a double comparison is a 32bit result 1095 generateMachineInst(AMDIL::DNE, VReg[0], 1096 RHS.getReg(), RHS.getReg()); 1097 generateMachineInst(AMDIL::DNE, VReg[1], 1098 LHS.getReg(), LHS.getReg()); 1099 generateMachineInst(AMDIL::BINARY_OR_f32, 1100 DST.getReg(), VReg[0], VReg[1]); 1101 } 1102 break; 1103 case AMDILCC::IL_CC_L_LE: 1104 case AMDILCC::IL_CC_L_GE: 1105 case AMDILCC::IL_CC_L_EQ: 1106 case AMDILCC::IL_CC_L_NE: 1107 case AMDILCC::IL_CC_L_LT: 1108 case AMDILCC::IL_CC_L_GT: 1109 case AMDILCC::IL_CC_UL_LE: 1110 case AMDILCC::IL_CC_UL_GE: 1111 case AMDILCC::IL_CC_UL_EQ: 1112 case AMDILCC::IL_CC_UL_NE: 1113 case AMDILCC::IL_CC_UL_LT: 1114 case AMDILCC::IL_CC_UL_GT: 1115 { 1116 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 1117 &this->getTargetMachine())->getSubtargetImpl(); 1118 if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) { 1119 generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg()); 1120 } else { 1121 generateLongRelational(MI, opCode); 1122 } 1123 } 1124 break; 1125 case AMDILCC::COND_ERROR: 1126 assert(0 && "Invalid CC code"); 1127 break; 1128 }; 1129} 1130 1131//===----------------------------------------------------------------------===// 1132// TargetLowering Class Implementation Begins 1133//===----------------------------------------------------------------------===// 1134 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) 1135: TargetLowering(TM, new TargetLoweringObjectFileELF()) 1136{ 1137 int types[] = 1138 { 1139 (int)MVT::i8, 1140 (int)MVT::i16, 1141 (int)MVT::i32, 1142 (int)MVT::f32, 1143 (int)MVT::f64, 1144 (int)MVT::i64, 1145 (int)MVT::v2i8, 1146 (int)MVT::v4i8, 1147 (int)MVT::v2i16, 1148 (int)MVT::v4i16, 1149 (int)MVT::v4f32, 1150 (int)MVT::v4i32, 1151 (int)MVT::v2f32, 1152 (int)MVT::v2i32, 1153 (int)MVT::v2f64, 1154 (int)MVT::v2i64 1155 }; 1156 1157 int IntTypes[] = 1158 { 1159 (int)MVT::i8, 1160 (int)MVT::i16, 1161 (int)MVT::i32, 1162 (int)MVT::i64 1163 }; 1164 1165 int FloatTypes[] = 1166 { 1167 (int)MVT::f32, 1168 (int)MVT::f64 1169 }; 1170 1171 int VectorTypes[] = 1172 { 1173 (int)MVT::v2i8, 1174 (int)MVT::v4i8, 1175 (int)MVT::v2i16, 1176 (int)MVT::v4i16, 1177 (int)MVT::v4f32, 1178 (int)MVT::v4i32, 1179 (int)MVT::v2f32, 1180 (int)MVT::v2i32, 1181 (int)MVT::v2f64, 1182 (int)MVT::v2i64 1183 }; 1184 size_t numTypes = sizeof(types) / sizeof(*types); 1185 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); 1186 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); 1187 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); 1188 1189 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 1190 &this->getTargetMachine())->getSubtargetImpl(); 1191 // These are the current register classes that are 1192 // supported 1193 1194 addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass); 1195 addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass); 1196 1197 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 1198 addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass); 1199 addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass); 1200 } 1201 if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) { 1202 addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass); 1203 addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass); 1204 addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass); 1205 setOperationAction(ISD::Constant , MVT::i8 , Legal); 1206 } 1207 if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) { 1208 addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass); 1209 addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass); 1210 addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass); 1211 setOperationAction(ISD::Constant , MVT::i16 , Legal); 1212 } 1213 addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass); 1214 addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass); 1215 addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass); 1216 addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass); 1217 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { 1218 addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass); 1219 addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass); 1220 } 1221 1222 for (unsigned int x = 0; x < numTypes; ++x) { 1223 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 1224 1225 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 1226 // We cannot sextinreg, expand to shifts 1227 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 1228 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 1229 setOperationAction(ISD::FP_ROUND, VT, Expand); 1230 setOperationAction(ISD::OR, VT, Custom); 1231 setOperationAction(ISD::SUBE, VT, Expand); 1232 setOperationAction(ISD::SUBC, VT, Expand); 1233 setOperationAction(ISD::ADD, VT, Custom); 1234 setOperationAction(ISD::ADDE, VT, Expand); 1235 setOperationAction(ISD::ADDC, VT, Expand); 1236 setOperationAction(ISD::SETCC, VT, Custom); 1237 setOperationAction(ISD::BRCOND, VT, Custom); 1238 setOperationAction(ISD::BR_CC, VT, Custom); 1239 setOperationAction(ISD::BR_JT, VT, Expand); 1240 setOperationAction(ISD::BRIND, VT, Expand); 1241 // TODO: Implement custom UREM/SREM routines 1242 setOperationAction(ISD::UREM, VT, Expand); 1243 setOperationAction(ISD::SREM, VT, Expand); 1244 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 1245 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 1246 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 1247 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 1248 setOperationAction(ISDBITCAST, VT, Custom); 1249 setOperationAction(ISD::GlobalAddress, VT, Custom); 1250 setOperationAction(ISD::JumpTable, VT, Custom); 1251 setOperationAction(ISD::ConstantPool, VT, Custom); 1252 setOperationAction(ISD::SELECT_CC, VT, Custom); 1253 setOperationAction(ISD::SELECT, VT, Custom); 1254 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 1255 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 1256 if (VT != MVT::i64 && VT != MVT::v2i64) { 1257 setOperationAction(ISD::SDIV, VT, Custom); 1258 setOperationAction(ISD::UDIV, VT, Custom); 1259 } 1260 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 1261 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 1262 } 1263 for (unsigned int x = 0; x < numFloatTypes; ++x) { 1264 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 1265 1266 // IL does not have these operations for floating point types 1267 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 1268 setOperationAction(ISD::FP_ROUND, VT, Custom); 1269 setOperationAction(ISD::SETOLT, VT, Expand); 1270 setOperationAction(ISD::SETOGE, VT, Expand); 1271 setOperationAction(ISD::SETOGT, VT, Expand); 1272 setOperationAction(ISD::SETOLE, VT, Expand); 1273 setOperationAction(ISD::SETULT, VT, Expand); 1274 setOperationAction(ISD::SETUGE, VT, Expand); 1275 setOperationAction(ISD::SETUGT, VT, Expand); 1276 setOperationAction(ISD::SETULE, VT, Expand); 1277 } 1278 1279 for (unsigned int x = 0; x < numIntTypes; ++x) { 1280 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 1281 1282 // GPU also does not have divrem function for signed or unsigned 1283 setOperationAction(ISD::SDIVREM, VT, Expand); 1284 setOperationAction(ISD::UDIVREM, VT, Expand); 1285 setOperationAction(ISD::FP_ROUND, VT, Expand); 1286 1287 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 1288 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 1289 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 1290 1291 // GPU doesn't have a rotl, rotr, or byteswap instruction 1292 setOperationAction(ISD::ROTR, VT, Expand); 1293 setOperationAction(ISD::ROTL, VT, Expand); 1294 setOperationAction(ISD::BSWAP, VT, Expand); 1295 1296 // GPU doesn't have any counting operators 1297 setOperationAction(ISD::CTPOP, VT, Expand); 1298 setOperationAction(ISD::CTTZ, VT, Expand); 1299 setOperationAction(ISD::CTLZ, VT, Expand); 1300 } 1301 1302 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) 1303 { 1304 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 1305 1306 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 1307 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 1308 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 1309 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 1310 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 1311 setOperationAction(ISD::FP_ROUND, VT, Expand); 1312 setOperationAction(ISD::SDIVREM, VT, Expand); 1313 setOperationAction(ISD::UDIVREM, VT, Expand); 1314 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 1315 // setOperationAction(ISD::VSETCC, VT, Expand); 1316 setOperationAction(ISD::SETCC, VT, Expand); 1317 setOperationAction(ISD::SELECT_CC, VT, Expand); 1318 setOperationAction(ISD::SELECT, VT, Expand); 1319 1320 } 1321 setOperationAction(ISD::FP_ROUND, MVT::Other, Expand); 1322 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { 1323 if (stm->calVersion() < CAL_VERSION_SC_139 1324 || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 1325 setOperationAction(ISD::MUL, MVT::i64, Custom); 1326 } 1327 setOperationAction(ISD::SUB, MVT::i64, Custom); 1328 setOperationAction(ISD::ADD, MVT::i64, Custom); 1329 setOperationAction(ISD::MULHU, MVT::i64, Expand); 1330 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 1331 setOperationAction(ISD::MULHS, MVT::i64, Expand); 1332 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 1333 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 1334 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 1335 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 1336 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 1337 setOperationAction(ISD::Constant , MVT::i64 , Legal); 1338 setOperationAction(ISD::UDIV, MVT::v2i64, Expand); 1339 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 1340 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand); 1341 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand); 1342 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand); 1343 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand); 1344 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 1345 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 1346 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 1347 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 1348 } 1349 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 1350 // we support loading/storing v2f64 but not operations on the type 1351 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 1352 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 1353 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 1354 setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand); 1355 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 1356 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 1357 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 1358 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 1359 // We want to expand vector conversions into their scalar 1360 // counterparts. 1361 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand); 1362 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand); 1363 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand); 1364 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand); 1365 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 1366 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 1367 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 1368 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 1369 setOperationAction(ISD::FABS, MVT::f64, Expand); 1370 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 1371 } 1372 // TODO: Fix the UDIV24 algorithm so it works for these 1373 // types correctly. This needs vector comparisons 1374 // for this to work correctly. 1375 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 1376 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 1377 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 1378 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 1379 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 1380 setOperationAction(ISD::SUBC, MVT::Other, Expand); 1381 setOperationAction(ISD::ADDE, MVT::Other, Expand); 1382 setOperationAction(ISD::ADDC, MVT::Other, Expand); 1383 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 1384 setOperationAction(ISD::BR_CC, MVT::Other, Custom); 1385 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 1386 setOperationAction(ISD::BRIND, MVT::Other, Expand); 1387 setOperationAction(ISD::SETCC, MVT::Other, Custom); 1388 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 1389 setOperationAction(ISD::FDIV, MVT::f32, Custom); 1390 setOperationAction(ISD::FDIV, MVT::v2f32, Custom); 1391 setOperationAction(ISD::FDIV, MVT::v4f32, Custom); 1392 1393 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); 1394 // Use the default implementation. 1395 setOperationAction(ISD::VAARG , MVT::Other, Expand); 1396 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 1397 setOperationAction(ISD::VAEND , MVT::Other, Expand); 1398 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 1399 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 1400 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 1401 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 1402 setOperationAction(ISD::Constant , MVT::i32 , Legal); 1403 setOperationAction(ISD::TRAP , MVT::Other , Legal); 1404 1405 setStackPointerRegisterToSaveRestore(AMDIL::SP); 1406 setSchedulingPreference(Sched::RegPressure); 1407 setPow2DivIsCheap(false); 1408 setPrefLoopAlignment(16); 1409 setSelectIsExpensive(true); 1410 setJumpIsExpensive(true); 1411 computeRegisterProperties(); 1412 1413 maxStoresPerMemcpy = 4096; 1414 maxStoresPerMemmove = 4096; 1415 maxStoresPerMemset = 4096; 1416 1417#undef numTypes 1418#undef numIntTypes 1419#undef numVectorTypes 1420#undef numFloatTypes 1421} 1422 1423const char * 1424AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const 1425{ 1426 switch (Opcode) { 1427 default: return 0; 1428 case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY"; 1429 case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP"; 1430 case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP"; 1431 case AMDILISD::BITCONV: return "AMDILISD::BITCONV"; 1432 case AMDILISD::CMOV: return "AMDILISD::CMOV"; 1433 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; 1434 case AMDILISD::INEGATE: return "AMDILISD::INEGATE"; 1435 case AMDILISD::MAD: return "AMDILISD::MAD"; 1436 case AMDILISD::UMAD: return "AMDILISD::UMAD"; 1437 case AMDILISD::CALL: return "AMDILISD::CALL"; 1438 case AMDILISD::RET: return "AMDILISD::RET"; 1439 case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI"; 1440 case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO"; 1441 case AMDILISD::ADD: return "AMDILISD::ADD"; 1442 case AMDILISD::UMUL: return "AMDILISD::UMUL"; 1443 case AMDILISD::AND: return "AMDILISD::AND"; 1444 case AMDILISD::OR: return "AMDILISD::OR"; 1445 case AMDILISD::NOT: return "AMDILISD::NOT"; 1446 case AMDILISD::XOR: return "AMDILISD::XOR"; 1447 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; 1448 case AMDILISD::SMAX: return "AMDILISD::SMAX"; 1449 case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE"; 1450 case AMDILISD::MOVE: return "AMDILISD::MOVE"; 1451 case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; 1452 case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT"; 1453 case AMDILISD::VINSERT: return "AMDILISD::VINSERT"; 1454 case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT"; 1455 case AMDILISD::LCREATE: return "AMDILISD::LCREATE"; 1456 case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI"; 1457 case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO"; 1458 case AMDILISD::DCREATE: return "AMDILISD::DCREATE"; 1459 case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI"; 1460 case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO"; 1461 case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2"; 1462 case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2"; 1463 case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2"; 1464 case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2"; 1465 case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2"; 1466 case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2"; 1467 case AMDILISD::CMP: return "AMDILISD::CMP"; 1468 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; 1469 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; 1470 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; 1471 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; 1472 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; 1473 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; 1474 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; 1475 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; 1476 case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO"; 1477 case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO"; 1478 case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP"; 1479 case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR"; 1480 case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD"; 1481 case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND"; 1482 case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG"; 1483 case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC"; 1484 case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC"; 1485 case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX"; 1486 case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX"; 1487 case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN"; 1488 case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN"; 1489 case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR"; 1490 case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB"; 1491 case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB"; 1492 case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG"; 1493 case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR"; 1494 case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET"; 1495 case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET"; 1496 case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET"; 1497 case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET"; 1498 case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET"; 1499 case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET"; 1500 case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET"; 1501 case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET"; 1502 case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET"; 1503 case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET"; 1504 case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET"; 1505 case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET"; 1506 case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET"; 1507 case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET"; 1508 case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD"; 1509 case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND"; 1510 case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG"; 1511 case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC"; 1512 case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC"; 1513 case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX"; 1514 case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX"; 1515 case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN"; 1516 case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN"; 1517 case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR"; 1518 case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB"; 1519 case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB"; 1520 case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG"; 1521 case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR"; 1522 case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET"; 1523 case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET"; 1524 case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET"; 1525 case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET"; 1526 case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET"; 1527 case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET"; 1528 case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET"; 1529 case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET"; 1530 case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET"; 1531 case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET"; 1532 case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET"; 1533 case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET"; 1534 case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET"; 1535 case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD"; 1536 case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND"; 1537 case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG"; 1538 case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC"; 1539 case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC"; 1540 case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX"; 1541 case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX"; 1542 case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN"; 1543 case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN"; 1544 case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR"; 1545 case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR"; 1546 case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB"; 1547 case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB"; 1548 case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG"; 1549 case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR"; 1550 case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET"; 1551 case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET"; 1552 case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET"; 1553 case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET"; 1554 case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET"; 1555 case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET"; 1556 case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET"; 1557 case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET"; 1558 case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET"; 1559 case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET"; 1560 case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET"; 1561 case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET"; 1562 case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET"; 1563 case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET"; 1564 case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET"; 1565 case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC"; 1566 case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET"; 1567 case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME"; 1568 case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET"; 1569 case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ"; 1570 case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE"; 1571 case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0"; 1572 case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1"; 1573 case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ"; 1574 case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE"; 1575 case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0"; 1576 case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1"; 1577 1578 }; 1579} 1580bool 1581AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1582 const CallInst &I, unsigned Intrinsic) const 1583{ 1584 if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic 1585 || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) { 1586 return false; 1587 } 1588 bool bitCastToInt = false; 1589 unsigned IntNo; 1590 bool isRet = true; 1591 const AMDILSubtarget *STM = &this->getTargetMachine() 1592 .getSubtarget<AMDILSubtarget>(); 1593 switch (Intrinsic) { 1594 default: return false; // Don't custom lower most intrinsics. 1595 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32: 1596 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32: 1597 IntNo = AMDILISD::ATOM_G_ADD; break; 1598 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret: 1599 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret: 1600 isRet = false; 1601 IntNo = AMDILISD::ATOM_G_ADD_NORET; break; 1602 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32: 1603 case AMDGPUIntrinsic::AMDIL_atomic_add_li32: 1604 IntNo = AMDILISD::ATOM_L_ADD; break; 1605 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret: 1606 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret: 1607 isRet = false; 1608 IntNo = AMDILISD::ATOM_L_ADD_NORET; break; 1609 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32: 1610 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32: 1611 IntNo = AMDILISD::ATOM_R_ADD; break; 1612 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret: 1613 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret: 1614 isRet = false; 1615 IntNo = AMDILISD::ATOM_R_ADD_NORET; break; 1616 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32: 1617 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32: 1618 IntNo = AMDILISD::ATOM_G_AND; break; 1619 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret: 1620 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret: 1621 isRet = false; 1622 IntNo = AMDILISD::ATOM_G_AND_NORET; break; 1623 case AMDGPUIntrinsic::AMDIL_atomic_and_li32: 1624 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32: 1625 IntNo = AMDILISD::ATOM_L_AND; break; 1626 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret: 1627 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret: 1628 isRet = false; 1629 IntNo = AMDILISD::ATOM_L_AND_NORET; break; 1630 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32: 1631 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32: 1632 IntNo = AMDILISD::ATOM_R_AND; break; 1633 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret: 1634 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret: 1635 isRet = false; 1636 IntNo = AMDILISD::ATOM_R_AND_NORET; break; 1637 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32: 1638 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32: 1639 IntNo = AMDILISD::ATOM_G_CMPXCHG; break; 1640 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret: 1641 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret: 1642 isRet = false; 1643 IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break; 1644 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32: 1645 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32: 1646 IntNo = AMDILISD::ATOM_L_CMPXCHG; break; 1647 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret: 1648 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret: 1649 isRet = false; 1650 IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break; 1651 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32: 1652 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32: 1653 IntNo = AMDILISD::ATOM_R_CMPXCHG; break; 1654 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret: 1655 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret: 1656 isRet = false; 1657 IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break; 1658 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32: 1659 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32: 1660 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1661 IntNo = AMDILISD::ATOM_G_DEC; 1662 } else { 1663 IntNo = AMDILISD::ATOM_G_SUB; 1664 } 1665 break; 1666 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret: 1667 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret: 1668 isRet = false; 1669 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1670 IntNo = AMDILISD::ATOM_G_DEC_NORET; 1671 } else { 1672 IntNo = AMDILISD::ATOM_G_SUB_NORET; 1673 } 1674 break; 1675 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32: 1676 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32: 1677 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1678 IntNo = AMDILISD::ATOM_L_DEC; 1679 } else { 1680 IntNo = AMDILISD::ATOM_L_SUB; 1681 } 1682 break; 1683 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret: 1684 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret: 1685 isRet = false; 1686 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1687 IntNo = AMDILISD::ATOM_L_DEC_NORET; 1688 } else { 1689 IntNo = AMDILISD::ATOM_L_SUB_NORET; 1690 } 1691 break; 1692 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32: 1693 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32: 1694 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1695 IntNo = AMDILISD::ATOM_R_DEC; 1696 } else { 1697 IntNo = AMDILISD::ATOM_R_SUB; 1698 } 1699 break; 1700 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret: 1701 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret: 1702 isRet = false; 1703 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1704 IntNo = AMDILISD::ATOM_R_DEC_NORET; 1705 } else { 1706 IntNo = AMDILISD::ATOM_R_SUB_NORET; 1707 } 1708 break; 1709 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32: 1710 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32: 1711 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1712 IntNo = AMDILISD::ATOM_G_INC; 1713 } else { 1714 IntNo = AMDILISD::ATOM_G_ADD; 1715 } 1716 break; 1717 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret: 1718 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret: 1719 isRet = false; 1720 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1721 IntNo = AMDILISD::ATOM_G_INC_NORET; 1722 } else { 1723 IntNo = AMDILISD::ATOM_G_ADD_NORET; 1724 } 1725 break; 1726 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32: 1727 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32: 1728 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1729 IntNo = AMDILISD::ATOM_L_INC; 1730 } else { 1731 IntNo = AMDILISD::ATOM_L_ADD; 1732 } 1733 break; 1734 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret: 1735 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret: 1736 isRet = false; 1737 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1738 IntNo = AMDILISD::ATOM_L_INC_NORET; 1739 } else { 1740 IntNo = AMDILISD::ATOM_L_ADD_NORET; 1741 } 1742 break; 1743 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32: 1744 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32: 1745 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1746 IntNo = AMDILISD::ATOM_R_INC; 1747 } else { 1748 IntNo = AMDILISD::ATOM_R_ADD; 1749 } 1750 break; 1751 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret: 1752 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret: 1753 isRet = false; 1754 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1755 IntNo = AMDILISD::ATOM_R_INC_NORET; 1756 } else { 1757 IntNo = AMDILISD::ATOM_R_ADD_NORET; 1758 } 1759 break; 1760 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32: 1761 IntNo = AMDILISD::ATOM_G_MAX; break; 1762 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32: 1763 IntNo = AMDILISD::ATOM_G_UMAX; break; 1764 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret: 1765 isRet = false; 1766 IntNo = AMDILISD::ATOM_G_MAX_NORET; break; 1767 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret: 1768 isRet = false; 1769 IntNo = AMDILISD::ATOM_G_UMAX_NORET; break; 1770 case AMDGPUIntrinsic::AMDIL_atomic_max_li32: 1771 IntNo = AMDILISD::ATOM_L_MAX; break; 1772 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32: 1773 IntNo = AMDILISD::ATOM_L_UMAX; break; 1774 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret: 1775 isRet = false; 1776 IntNo = AMDILISD::ATOM_L_MAX_NORET; break; 1777 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret: 1778 isRet = false; 1779 IntNo = AMDILISD::ATOM_L_UMAX_NORET; break; 1780 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32: 1781 IntNo = AMDILISD::ATOM_R_MAX; break; 1782 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32: 1783 IntNo = AMDILISD::ATOM_R_UMAX; break; 1784 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret: 1785 isRet = false; 1786 IntNo = AMDILISD::ATOM_R_MAX_NORET; break; 1787 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret: 1788 isRet = false; 1789 IntNo = AMDILISD::ATOM_R_UMAX_NORET; break; 1790 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32: 1791 IntNo = AMDILISD::ATOM_G_MIN; break; 1792 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32: 1793 IntNo = AMDILISD::ATOM_G_UMIN; break; 1794 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret: 1795 isRet = false; 1796 IntNo = AMDILISD::ATOM_G_MIN_NORET; break; 1797 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret: 1798 isRet = false; 1799 IntNo = AMDILISD::ATOM_G_UMIN_NORET; break; 1800 case AMDGPUIntrinsic::AMDIL_atomic_min_li32: 1801 IntNo = AMDILISD::ATOM_L_MIN; break; 1802 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32: 1803 IntNo = AMDILISD::ATOM_L_UMIN; break; 1804 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret: 1805 isRet = false; 1806 IntNo = AMDILISD::ATOM_L_MIN_NORET; break; 1807 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret: 1808 isRet = false; 1809 IntNo = AMDILISD::ATOM_L_UMIN_NORET; break; 1810 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32: 1811 IntNo = AMDILISD::ATOM_R_MIN; break; 1812 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32: 1813 IntNo = AMDILISD::ATOM_R_UMIN; break; 1814 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret: 1815 isRet = false; 1816 IntNo = AMDILISD::ATOM_R_MIN_NORET; break; 1817 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret: 1818 isRet = false; 1819 IntNo = AMDILISD::ATOM_R_UMIN_NORET; break; 1820 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32: 1821 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32: 1822 IntNo = AMDILISD::ATOM_G_OR; break; 1823 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret: 1824 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret: 1825 isRet = false; 1826 IntNo = AMDILISD::ATOM_G_OR_NORET; break; 1827 case AMDGPUIntrinsic::AMDIL_atomic_or_li32: 1828 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32: 1829 IntNo = AMDILISD::ATOM_L_OR; break; 1830 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret: 1831 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret: 1832 isRet = false; 1833 IntNo = AMDILISD::ATOM_L_OR_NORET; break; 1834 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32: 1835 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32: 1836 IntNo = AMDILISD::ATOM_R_OR; break; 1837 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret: 1838 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret: 1839 isRet = false; 1840 IntNo = AMDILISD::ATOM_R_OR_NORET; break; 1841 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32: 1842 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32: 1843 IntNo = AMDILISD::ATOM_G_SUB; break; 1844 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret: 1845 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret: 1846 isRet = false; 1847 IntNo = AMDILISD::ATOM_G_SUB_NORET; break; 1848 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32: 1849 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32: 1850 IntNo = AMDILISD::ATOM_L_SUB; break; 1851 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret: 1852 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret: 1853 isRet = false; 1854 IntNo = AMDILISD::ATOM_L_SUB_NORET; break; 1855 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32: 1856 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32: 1857 IntNo = AMDILISD::ATOM_R_SUB; break; 1858 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret: 1859 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret: 1860 isRet = false; 1861 IntNo = AMDILISD::ATOM_R_SUB_NORET; break; 1862 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32: 1863 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32: 1864 IntNo = AMDILISD::ATOM_G_RSUB; break; 1865 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret: 1866 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret: 1867 isRet = false; 1868 IntNo = AMDILISD::ATOM_G_RSUB_NORET; break; 1869 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32: 1870 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32: 1871 IntNo = AMDILISD::ATOM_L_RSUB; break; 1872 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret: 1873 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret: 1874 isRet = false; 1875 IntNo = AMDILISD::ATOM_L_RSUB_NORET; break; 1876 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32: 1877 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32: 1878 IntNo = AMDILISD::ATOM_R_RSUB; break; 1879 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret: 1880 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret: 1881 isRet = false; 1882 IntNo = AMDILISD::ATOM_R_RSUB_NORET; break; 1883 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32: 1884 bitCastToInt = true; 1885 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32: 1886 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32: 1887 IntNo = AMDILISD::ATOM_G_XCHG; break; 1888 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret: 1889 bitCastToInt = true; 1890 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret: 1891 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret: 1892 isRet = false; 1893 IntNo = AMDILISD::ATOM_G_XCHG_NORET; break; 1894 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32: 1895 bitCastToInt = true; 1896 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32: 1897 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32: 1898 IntNo = AMDILISD::ATOM_L_XCHG; break; 1899 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret: 1900 bitCastToInt = true; 1901 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret: 1902 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret: 1903 isRet = false; 1904 IntNo = AMDILISD::ATOM_L_XCHG_NORET; break; 1905 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32: 1906 bitCastToInt = true; 1907 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32: 1908 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32: 1909 IntNo = AMDILISD::ATOM_R_XCHG; break; 1910 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret: 1911 bitCastToInt = true; 1912 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret: 1913 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret: 1914 isRet = false; 1915 IntNo = AMDILISD::ATOM_R_XCHG_NORET; break; 1916 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32: 1917 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32: 1918 IntNo = AMDILISD::ATOM_G_XOR; break; 1919 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret: 1920 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret: 1921 isRet = false; 1922 IntNo = AMDILISD::ATOM_G_XOR_NORET; break; 1923 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32: 1924 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32: 1925 IntNo = AMDILISD::ATOM_L_XOR; break; 1926 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret: 1927 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret: 1928 isRet = false; 1929 IntNo = AMDILISD::ATOM_L_XOR_NORET; break; 1930 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32: 1931 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32: 1932 IntNo = AMDILISD::ATOM_R_XOR; break; 1933 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret: 1934 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret: 1935 isRet = false; 1936 IntNo = AMDILISD::ATOM_R_XOR_NORET; break; 1937 case AMDGPUIntrinsic::AMDIL_append_alloc_i32: 1938 IntNo = AMDILISD::APPEND_ALLOC; break; 1939 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret: 1940 isRet = false; 1941 IntNo = AMDILISD::APPEND_ALLOC_NORET; break; 1942 case AMDGPUIntrinsic::AMDIL_append_consume_i32: 1943 IntNo = AMDILISD::APPEND_CONSUME; break; 1944 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret: 1945 isRet = false; 1946 IntNo = AMDILISD::APPEND_CONSUME_NORET; break; 1947 }; 1948 1949 Info.opc = IntNo; 1950 Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32; 1951 Info.ptrVal = I.getOperand(0); 1952 Info.offset = 0; 1953 Info.align = 4; 1954 Info.vol = true; 1955 Info.readMem = isRet; 1956 Info.writeMem = true; 1957 return true; 1958} 1959// The backend supports 32 and 64 bit floating point immediates 1960bool 1961AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const 1962{ 1963 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1964 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1965 return true; 1966 } else { 1967 return false; 1968 } 1969} 1970 1971bool 1972AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const 1973{ 1974 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1975 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1976 return false; 1977 } else { 1978 return true; 1979 } 1980} 1981 1982 1983// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 1984// be zero. Op is expected to be a target specific node. Used by DAG 1985// combiner. 1986 1987void 1988AMDILTargetLowering::computeMaskedBitsForTargetNode( 1989 const SDValue Op, 1990 APInt &KnownZero, 1991 APInt &KnownOne, 1992 const SelectionDAG &DAG, 1993 unsigned Depth) const 1994{ 1995 APInt KnownZero2; 1996 APInt KnownOne2; 1997 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 1998 switch (Op.getOpcode()) { 1999 default: break; 2000 case AMDILISD::SELECT_CC: 2001 DAG.ComputeMaskedBits( 2002 Op.getOperand(1), 2003 KnownZero, 2004 KnownOne, 2005 Depth + 1 2006 ); 2007 DAG.ComputeMaskedBits( 2008 Op.getOperand(0), 2009 KnownZero2, 2010 KnownOne2 2011 ); 2012 assert((KnownZero & KnownOne) == 0 2013 && "Bits known to be one AND zero?"); 2014 assert((KnownZero2 & KnownOne2) == 0 2015 && "Bits known to be one AND zero?"); 2016 // Only known if known in both the LHS and RHS 2017 KnownOne &= KnownOne2; 2018 KnownZero &= KnownZero2; 2019 break; 2020 }; 2021} 2022 2023// This is the function that determines which calling convention should 2024// be used. Currently there is only one calling convention 2025CCAssignFn* 2026AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const 2027{ 2028 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2029 return CC_AMDIL32; 2030} 2031 2032// LowerCallResult - Lower the result values of an ISD::CALL into the 2033// appropriate copies out of appropriate physical registers. This assumes that 2034// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 2035// being lowered. The returns a SDNode with the same number of values as the 2036// ISD::CALL. 2037SDValue 2038AMDILTargetLowering::LowerCallResult( 2039 SDValue Chain, 2040 SDValue InFlag, 2041 CallingConv::ID CallConv, 2042 bool isVarArg, 2043 const SmallVectorImpl<ISD::InputArg> &Ins, 2044 DebugLoc dl, 2045 SelectionDAG &DAG, 2046 SmallVectorImpl<SDValue> &InVals) const 2047{ 2048 // Assign locations to each value returned by this call 2049 SmallVector<CCValAssign, 16> RVLocs; 2050 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2051 getTargetMachine(), RVLocs, *DAG.getContext()); 2052 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); 2053 2054 // Copy all of the result registers out of their specified physreg. 2055 for (unsigned i = 0; i != RVLocs.size(); ++i) { 2056 EVT CopyVT = RVLocs[i].getValVT(); 2057 if (RVLocs[i].isRegLoc()) { 2058 Chain = DAG.getCopyFromReg( 2059 Chain, 2060 dl, 2061 RVLocs[i].getLocReg(), 2062 CopyVT, 2063 InFlag 2064 ).getValue(1); 2065 SDValue Val = Chain.getValue(0); 2066 InFlag = Chain.getValue(2); 2067 InVals.push_back(Val); 2068 } 2069 } 2070 2071 return Chain; 2072 2073} 2074 2075//===----------------------------------------------------------------------===// 2076// Other Lowering Hooks 2077//===----------------------------------------------------------------------===// 2078 2079MachineBasicBlock * 2080AMDILTargetLowering::EmitInstrWithCustomInserter( 2081 MachineInstr *MI, MachineBasicBlock *BB) const 2082{ 2083 const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); 2084 switch (MI->getOpcode()) { 2085 ExpandCaseToAllTypes(AMDIL::CMP); 2086 generateCMPInstr(MI, BB, TII); 2087 MI->eraseFromParent(); 2088 break; 2089 default: 2090 break; 2091 } 2092 return BB; 2093} 2094 2095// Recursively assign SDNodeOrdering to any unordered nodes 2096// This is necessary to maintain source ordering of instructions 2097// under -O0 to avoid odd-looking "skipping around" issues. 2098 static const SDValue 2099Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) 2100{ 2101 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { 2102 DAG.AssignOrdering( New.getNode(), order ); 2103 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) 2104 Ordered( DAG, order, New.getOperand(i) ); 2105 } 2106 return New; 2107} 2108 2109#define LOWER(A) \ 2110 case ISD:: A: \ 2111return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) 2112 2113SDValue 2114AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 2115{ 2116 switch (Op.getOpcode()) { 2117 default: 2118 Op.getNode()->dump(); 2119 assert(0 && "Custom lowering code for this" 2120 "instruction is not implemented yet!"); 2121 break; 2122 LOWER(GlobalAddress); 2123 LOWER(JumpTable); 2124 LOWER(ConstantPool); 2125 LOWER(ExternalSymbol); 2126 LOWER(FP_TO_SINT); 2127 LOWER(FP_TO_UINT); 2128 LOWER(SINT_TO_FP); 2129 LOWER(UINT_TO_FP); 2130 LOWER(ADD); 2131 LOWER(MUL); 2132 LOWER(SUB); 2133 LOWER(FDIV); 2134 LOWER(SDIV); 2135 LOWER(SREM); 2136 LOWER(UDIV); 2137 LOWER(UREM); 2138 LOWER(BUILD_VECTOR); 2139 LOWER(INSERT_VECTOR_ELT); 2140 LOWER(EXTRACT_VECTOR_ELT); 2141 LOWER(EXTRACT_SUBVECTOR); 2142 LOWER(SCALAR_TO_VECTOR); 2143 LOWER(CONCAT_VECTORS); 2144 LOWER(AND); 2145 LOWER(OR); 2146 LOWER(SELECT); 2147 LOWER(SELECT_CC); 2148 LOWER(SETCC); 2149 LOWER(SIGN_EXTEND_INREG); 2150 LOWER(BITCAST); 2151 LOWER(DYNAMIC_STACKALLOC); 2152 LOWER(BRCOND); 2153 LOWER(BR_CC); 2154 LOWER(FP_ROUND); 2155 } 2156 return Op; 2157} 2158 2159int 2160AMDILTargetLowering::getVarArgsFrameOffset() const 2161{ 2162 return VarArgsFrameOffset; 2163} 2164#undef LOWER 2165 2166SDValue 2167AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const 2168{ 2169 SDValue DST = Op; 2170 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); 2171 const GlobalValue *G = GADN->getGlobal(); 2172 DebugLoc DL = Op.getDebugLoc(); 2173 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); 2174 if (!GV) { 2175 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 2176 } else { 2177 if (GV->hasInitializer()) { 2178 const Constant *C = dyn_cast<Constant>(GV->getInitializer()); 2179 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 2180 DST = DAG.getConstant(CI->getValue(), Op.getValueType()); 2181 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { 2182 DST = DAG.getConstantFP(CF->getValueAPF(), 2183 Op.getValueType()); 2184 } else if (dyn_cast<ConstantAggregateZero>(C)) { 2185 EVT VT = Op.getValueType(); 2186 if (VT.isInteger()) { 2187 DST = DAG.getConstant(0, VT); 2188 } else { 2189 DST = DAG.getConstantFP(0, VT); 2190 } 2191 } else { 2192 assert(!"lowering this type of Global Address " 2193 "not implemented yet!"); 2194 C->dump(); 2195 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 2196 } 2197 } else { 2198 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 2199 } 2200 } 2201 return DST; 2202} 2203 2204SDValue 2205AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const 2206{ 2207 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2208 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); 2209 return Result; 2210} 2211SDValue 2212AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const 2213{ 2214 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2215 EVT PtrVT = Op.getValueType(); 2216 SDValue Result; 2217 if (CP->isMachineConstantPoolEntry()) { 2218 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2219 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 2220 } else { 2221 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2222 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 2223 } 2224 return Result; 2225} 2226 2227SDValue 2228AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const 2229{ 2230 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2231 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); 2232 return Result; 2233} 2234 2235/// LowerFORMAL_ARGUMENTS - transform physical registers into 2236/// virtual registers and generate load operations for 2237/// arguments places on the stack. 2238/// TODO: isVarArg, hasStructRet, isMemReg 2239 SDValue 2240AMDILTargetLowering::LowerFormalArguments(SDValue Chain, 2241 CallingConv::ID CallConv, 2242 bool isVarArg, 2243 const SmallVectorImpl<ISD::InputArg> &Ins, 2244 DebugLoc dl, 2245 SelectionDAG &DAG, 2246 SmallVectorImpl<SDValue> &InVals) 2247const 2248{ 2249 2250 MachineFunction &MF = DAG.getMachineFunction(); 2251 MachineFrameInfo *MFI = MF.getFrameInfo(); 2252 //const Function *Fn = MF.getFunction(); 2253 //MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2254 2255 SmallVector<CCValAssign, 16> ArgLocs; 2256 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 2257 //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); 2258 2259 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), 2260 getTargetMachine(), ArgLocs, *DAG.getContext()); 2261 2262 // When more calling conventions are added, they need to be chosen here 2263 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); 2264 SDValue StackPtr; 2265 2266 //unsigned int FirstStackArgLoc = 0; 2267 2268 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 2269 CCValAssign &VA = ArgLocs[i]; 2270 if (VA.isRegLoc()) { 2271 EVT RegVT = VA.getLocVT(); 2272 const TargetRegisterClass *RC = getRegClassFromType( 2273 RegVT.getSimpleVT().SimpleTy); 2274 2275 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); 2276 SDValue ArgValue = DAG.getCopyFromReg( 2277 Chain, 2278 dl, 2279 Reg, 2280 RegVT); 2281 // If this is an 8 or 16-bit value, it is really passed 2282 // promoted to 32 bits. Insert an assert[sz]ext to capture 2283 // this, then truncate to the right size. 2284 2285 if (VA.getLocInfo() == CCValAssign::SExt) { 2286 ArgValue = DAG.getNode( 2287 ISD::AssertSext, 2288 dl, 2289 RegVT, 2290 ArgValue, 2291 DAG.getValueType(VA.getValVT())); 2292 } else if (VA.getLocInfo() == CCValAssign::ZExt) { 2293 ArgValue = DAG.getNode( 2294 ISD::AssertZext, 2295 dl, 2296 RegVT, 2297 ArgValue, 2298 DAG.getValueType(VA.getValVT())); 2299 } 2300 if (VA.getLocInfo() != CCValAssign::Full) { 2301 ArgValue = DAG.getNode( 2302 ISD::TRUNCATE, 2303 dl, 2304 VA.getValVT(), 2305 ArgValue); 2306 } 2307 // Add the value to the list of arguments 2308 // to be passed in registers 2309 InVals.push_back(ArgValue); 2310 if (isVarArg) { 2311 assert(0 && "Variable arguments are not yet supported"); 2312 // See MipsISelLowering.cpp for ideas on how to implement 2313 } 2314 } else if(VA.isMemLoc()) { 2315 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, 2316 dl, DAG, VA, MFI, i)); 2317 } else { 2318 assert(0 && "found a Value Assign that is " 2319 "neither a register or a memory location"); 2320 } 2321 } 2322 /*if (hasStructRet) { 2323 assert(0 && "Has struct return is not yet implemented"); 2324 // See MipsISelLowering.cpp for ideas on how to implement 2325 }*/ 2326 2327 if (isVarArg) { 2328 assert(0 && "Variable arguments are not yet supported"); 2329 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement 2330 } 2331 // This needs to be changed to non-zero if the return function needs 2332 // to pop bytes 2333 return Chain; 2334} 2335/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 2336/// by "Src" to address "Dst" with size and alignment information specified by 2337/// the specific parameter attribute. The copy will be passed as a byval 2338/// function parameter. 2339static SDValue 2340CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 2341 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 2342 assert(0 && "MemCopy does not exist yet"); 2343 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 2344 2345 return DAG.getMemcpy(Chain, 2346 Src.getDebugLoc(), 2347 Dst, Src, SizeNode, Flags.getByValAlign(), 2348 /*IsVol=*/false, /*AlwaysInline=*/true, 2349 MachinePointerInfo(), MachinePointerInfo()); 2350} 2351 2352SDValue 2353AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, 2354 SDValue StackPtr, SDValue Arg, 2355 DebugLoc dl, SelectionDAG &DAG, 2356 const CCValAssign &VA, 2357 ISD::ArgFlagsTy Flags) const 2358{ 2359 unsigned int LocMemOffset = VA.getLocMemOffset(); 2360 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 2361 PtrOff = DAG.getNode(ISD::ADD, 2362 dl, 2363 getPointerTy(), StackPtr, PtrOff); 2364 if (Flags.isByVal()) { 2365 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 2366 } else { 2367 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, 2368 MachinePointerInfo::getStack(LocMemOffset), 2369 false, false, 0); 2370 } 2371 return PtrOff; 2372} 2373/// LowerCAL - functions arguments are copied from virtual 2374/// regs to (physical regs)/(stack frame), CALLSEQ_START and 2375/// CALLSEQ_END are emitted. 2376/// TODO: isVarArg, isTailCall, hasStructRet 2377SDValue 2378AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 2379 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, 2380 bool& isTailCall, 2381 const SmallVectorImpl<ISD::OutputArg> &Outs, 2382 const SmallVectorImpl<SDValue> &OutVals, 2383 const SmallVectorImpl<ISD::InputArg> &Ins, 2384 DebugLoc dl, SelectionDAG &DAG, 2385 SmallVectorImpl<SDValue> &InVals) 2386const 2387{ 2388 isTailCall = false; 2389 MachineFunction& MF = DAG.getMachineFunction(); 2390 // FIXME: DO we need to handle fast calling conventions and tail call 2391 // optimizations?? X86/PPC ISelLowering 2392 /*bool hasStructRet = (TheCall->getNumArgs()) 2393 ? TheCall->getArgFlags(0).device()->isSRet() 2394 : false;*/ 2395 2396 MachineFrameInfo *MFI = MF.getFrameInfo(); 2397 2398 // Analyze operands of the call, assigning locations to each operand 2399 SmallVector<CCValAssign, 16> ArgLocs; 2400 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2401 getTargetMachine(), ArgLocs, *DAG.getContext()); 2402 // Analyize the calling operands, but need to change 2403 // if we have more than one calling convetion 2404 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); 2405 2406 unsigned int NumBytes = CCInfo.getNextStackOffset(); 2407 if (isTailCall) { 2408 assert(isTailCall && "Tail Call not handled yet!"); 2409 // See X86/PPC ISelLowering 2410 } 2411 2412 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 2413 2414 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass; 2415 SmallVector<SDValue, 8> MemOpChains; 2416 SDValue StackPtr; 2417 //unsigned int FirstStacArgLoc = 0; 2418 //int LastArgStackLoc = 0; 2419 2420 // Walk the register/memloc assignments, insert copies/loads 2421 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 2422 CCValAssign &VA = ArgLocs[i]; 2423 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers 2424 // Arguments start after the 5 first operands of ISD::CALL 2425 SDValue Arg = OutVals[i]; 2426 //Promote the value if needed 2427 switch(VA.getLocInfo()) { 2428 default: assert(0 && "Unknown loc info!"); 2429 case CCValAssign::Full: 2430 break; 2431 case CCValAssign::SExt: 2432 Arg = DAG.getNode(ISD::SIGN_EXTEND, 2433 dl, 2434 VA.getLocVT(), Arg); 2435 break; 2436 case CCValAssign::ZExt: 2437 Arg = DAG.getNode(ISD::ZERO_EXTEND, 2438 dl, 2439 VA.getLocVT(), Arg); 2440 break; 2441 case CCValAssign::AExt: 2442 Arg = DAG.getNode(ISD::ANY_EXTEND, 2443 dl, 2444 VA.getLocVT(), Arg); 2445 break; 2446 } 2447 2448 if (VA.isRegLoc()) { 2449 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 2450 } else if (VA.isMemLoc()) { 2451 // Create the frame index object for this incoming parameter 2452 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 2453 VA.getLocMemOffset(), true); 2454 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); 2455 2456 // emit ISD::STORE whichs stores the 2457 // parameter value to a stack Location 2458 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 2459 MachinePointerInfo::getFixedStack(FI), 2460 false, false, 0)); 2461 } else { 2462 assert(0 && "Not a Reg/Mem Loc, major error!"); 2463 } 2464 } 2465 if (!MemOpChains.empty()) { 2466 Chain = DAG.getNode(ISD::TokenFactor, 2467 dl, 2468 MVT::Other, 2469 &MemOpChains[0], 2470 MemOpChains.size()); 2471 } 2472 SDValue InFlag; 2473 if (!isTailCall) { 2474 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 2475 Chain = DAG.getCopyToReg(Chain, 2476 dl, 2477 RegsToPass[i].first, 2478 RegsToPass[i].second, 2479 InFlag); 2480 InFlag = Chain.getValue(1); 2481 } 2482 } 2483 2484 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 2485 // every direct call is) turn it into a TargetGlobalAddress/ 2486 // TargetExternalSymbol 2487 // node so that legalize doesn't hack it. 2488 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2489 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); 2490 } 2491 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2492 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 2493 } 2494 else if (isTailCall) { 2495 assert(0 && "Tail calls are not handled yet"); 2496 // see X86 ISelLowering for ideas on implementation: 1708 2497 } 2498 2499 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE); 2500 SmallVector<SDValue, 8> Ops; 2501 2502 if (isTailCall) { 2503 assert(0 && "Tail calls are not handled yet"); 2504 // see X86 ISelLowering for ideas on implementation: 1721 2505 } 2506 // If this is a direct call, pass the chain and the callee 2507 if (Callee.getNode()) { 2508 Ops.push_back(Chain); 2509 Ops.push_back(Callee); 2510 } 2511 2512 if (isTailCall) { 2513 assert(0 && "Tail calls are not handled yet"); 2514 // see X86 ISelLowering for ideas on implementation: 1739 2515 } 2516 2517 // Add argument registers to the end of the list so that they are known 2518 // live into the call 2519 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 2520 Ops.push_back(DAG.getRegister( 2521 RegsToPass[i].first, 2522 RegsToPass[i].second.getValueType())); 2523 } 2524 if (InFlag.getNode()) { 2525 Ops.push_back(InFlag); 2526 } 2527 2528 // Emit Tail Call 2529 if (isTailCall) { 2530 assert(0 && "Tail calls are not handled yet"); 2531 // see X86 ISelLowering for ideas on implementation: 1762 2532 } 2533 2534 Chain = DAG.getNode(AMDILISD::CALL, 2535 dl, 2536 NodeTys, &Ops[0], Ops.size()); 2537 InFlag = Chain.getValue(1); 2538 2539 // Create the CALLSEQ_END node 2540 Chain = DAG.getCALLSEQ_END( 2541 Chain, 2542 DAG.getIntPtrConstant(NumBytes, true), 2543 DAG.getIntPtrConstant(0, true), 2544 InFlag); 2545 InFlag = Chain.getValue(1); 2546 // Handle result values, copying them out of physregs into vregs that 2547 // we return 2548 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 2549 InVals); 2550} 2551static void checkMADType( 2552 SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD) 2553{ 2554 bool globalLoadStore = false; 2555 is24bitMAD = false; 2556 is32bitMAD = false; 2557 return; 2558 assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for " 2559 "this to work correctly!"); 2560 if (Op.getNode()->use_empty()) { 2561 return; 2562 } 2563 for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(), 2564 nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) { 2565 SDNode *ptr = *nBegin; 2566 const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr); 2567 // If we are not a LSBaseSDNode then we don't do this 2568 // optimization. 2569 // If we are a LSBaseSDNode, but the op is not the offset 2570 // or base pointer, then we don't do this optimization 2571 // (i.e. we are the value being stored) 2572 if (!lsNode || 2573 (lsNode->writeMem() && lsNode->getOperand(1) == Op)) { 2574 return; 2575 } 2576 const PointerType *PT = 2577 dyn_cast<PointerType>(lsNode->getSrcValue()->getType()); 2578 unsigned as = PT->getAddressSpace(); 2579 switch(as) { 2580 default: 2581 globalLoadStore = true; 2582 case AMDILAS::PRIVATE_ADDRESS: 2583 if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { 2584 globalLoadStore = true; 2585 } 2586 break; 2587 case AMDILAS::CONSTANT_ADDRESS: 2588 if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) { 2589 globalLoadStore = true; 2590 } 2591 break; 2592 case AMDILAS::LOCAL_ADDRESS: 2593 if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) { 2594 globalLoadStore = true; 2595 } 2596 break; 2597 case AMDILAS::REGION_ADDRESS: 2598 if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) { 2599 globalLoadStore = true; 2600 } 2601 break; 2602 } 2603 } 2604 if (globalLoadStore) { 2605 is32bitMAD = true; 2606 } else { 2607 is24bitMAD = true; 2608 } 2609} 2610 2611SDValue 2612AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const 2613{ 2614 SDValue LHS = Op.getOperand(0); 2615 SDValue RHS = Op.getOperand(1); 2616 DebugLoc DL = Op.getDebugLoc(); 2617 EVT OVT = Op.getValueType(); 2618 SDValue DST; 2619 const AMDILSubtarget *stm = &this->getTargetMachine() 2620 .getSubtarget<AMDILSubtarget>(); 2621 bool isVec = OVT.isVector(); 2622 if (OVT.getScalarType() == MVT::i64) { 2623 MVT INTTY = MVT::i32; 2624 if (OVT == MVT::v2i64) { 2625 INTTY = MVT::v2i32; 2626 } 2627 if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps) 2628 && INTTY == MVT::i32) { 2629 DST = DAG.getNode(AMDILISD::ADD, 2630 DL, 2631 OVT, 2632 LHS, RHS); 2633 } else { 2634 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; 2635 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 2636 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); 2637 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); 2638 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); 2639 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); 2640 INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO); 2641 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI); 2642 SDValue cmp; 2643 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2644 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 2645 INTLO, RHSLO); 2646 cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp); 2647 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); 2648 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, 2649 INTLO, INTHI); 2650 } 2651 } else { 2652 if (LHS.getOpcode() == ISD::FrameIndex || 2653 RHS.getOpcode() == ISD::FrameIndex) { 2654 DST = DAG.getNode(AMDILISD::ADDADDR, 2655 DL, 2656 OVT, 2657 LHS, RHS); 2658 } else { 2659 if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem) 2660 && LHS.getNumOperands() 2661 && RHS.getNumOperands()) { 2662 bool is24bitMAD = false; 2663 bool is32bitMAD = false; 2664 const ConstantSDNode *LHSConstOpCode = 2665 dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1)); 2666 const ConstantSDNode *RHSConstOpCode = 2667 dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1)); 2668 if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode) 2669 || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) 2670 || LHS.getOpcode() == ISD::MUL 2671 || RHS.getOpcode() == ISD::MUL) { 2672 SDValue Op1, Op2, Op3; 2673 // FIXME: Fix this so that it works for unsigned 24bit ops. 2674 if (LHS.getOpcode() == ISD::MUL) { 2675 Op1 = LHS.getOperand(0); 2676 Op2 = LHS.getOperand(1); 2677 Op3 = RHS; 2678 } else if (RHS.getOpcode() == ISD::MUL) { 2679 Op1 = RHS.getOperand(0); 2680 Op2 = RHS.getOperand(1); 2681 Op3 = LHS; 2682 } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) { 2683 Op1 = LHS.getOperand(0); 2684 Op2 = DAG.getConstant( 2685 1 << LHSConstOpCode->getZExtValue(), MVT::i32); 2686 Op3 = RHS; 2687 } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) { 2688 Op1 = RHS.getOperand(0); 2689 Op2 = DAG.getConstant( 2690 1 << RHSConstOpCode->getZExtValue(), MVT::i32); 2691 Op3 = LHS; 2692 } 2693 checkMADType(Op, stm, is24bitMAD, is32bitMAD); 2694 // We can possibly do a MAD transform! 2695 if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) { 2696 uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32; 2697 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/); 2698 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN, 2699 DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32), 2700 Op1, Op2, Op3); 2701 } else if(is32bitMAD) { 2702 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/); 2703 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN, 2704 DL, Tys, DAG.getEntryNode(), 2705 DAG.getConstant( 2706 AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32), 2707 Op1, Op2, Op3); 2708 } 2709 } 2710 } 2711 DST = DAG.getNode(AMDILISD::ADD, 2712 DL, 2713 OVT, 2714 LHS, RHS); 2715 } 2716 } 2717 return DST; 2718} 2719SDValue 2720AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG, 2721 uint32_t bits) const 2722{ 2723 DebugLoc DL = Op.getDebugLoc(); 2724 EVT INTTY = Op.getValueType(); 2725 EVT FPTY; 2726 if (INTTY.isVector()) { 2727 FPTY = EVT(MVT::getVectorVT(MVT::f32, 2728 INTTY.getVectorNumElements())); 2729 } else { 2730 FPTY = EVT(MVT::f32); 2731 } 2732 /* static inline uint 2733 __clz_Nbit(uint x) 2734 { 2735 int xor = 0x3f800000U | x; 2736 float tp = as_float(xor); 2737 float t = tp + -1.0f; 2738 uint tint = as_uint(t); 2739 int cmp = (x != 0); 2740 uint tsrc = tint >> 23; 2741 uint tmask = tsrc & 0xffU; 2742 uint cst = (103 + N)U - tmask; 2743 return cmp ? cst : N; 2744 } 2745 */ 2746 assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32 2747 && "genCLZu16 only works on 32bit types"); 2748 // uint x = Op 2749 SDValue x = Op; 2750 // xornode = 0x3f800000 | x 2751 SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY, 2752 DAG.getConstant(0x3f800000, INTTY), x); 2753 // float tp = as_float(xornode) 2754 SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode); 2755 // float t = tp + -1.0f 2756 SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp, 2757 DAG.getConstantFP(-1.0f, FPTY)); 2758 // uint tint = as_uint(t) 2759 SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t); 2760 // int cmp = (x != 0) 2761 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2762 DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x, 2763 DAG.getConstant(0, INTTY)); 2764 // uint tsrc = tint >> 23 2765 SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint, 2766 DAG.getConstant(23, INTTY)); 2767 // uint tmask = tsrc & 0xFF 2768 SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc, 2769 DAG.getConstant(0xFFU, INTTY)); 2770 // uint cst = (103 + bits) - tmask 2771 SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY, 2772 DAG.getConstant((103U + bits), INTTY), tmask); 2773 // return cmp ? cst : N 2774 cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst, 2775 DAG.getConstant(bits, INTTY)); 2776 return cst; 2777} 2778 2779SDValue 2780AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const 2781{ 2782 SDValue DST = SDValue(); 2783 DebugLoc DL = Op.getDebugLoc(); 2784 EVT INTTY = Op.getValueType(); 2785 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2786 &this->getTargetMachine())->getSubtargetImpl(); 2787 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 2788 //__clz_32bit(uint u) 2789 //{ 2790 // int z = __amdil_ffb_hi(u) ; 2791 // return z < 0 ? 32 : z; 2792 // } 2793 // uint u = op 2794 SDValue u = Op; 2795 // int z = __amdil_ffb_hi(u) 2796 SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u); 2797 // int cmp = z < 0 2798 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2799 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 2800 z, DAG.getConstant(0, INTTY)); 2801 // return cmp ? 32 : z 2802 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, 2803 DAG.getConstant(32, INTTY), z); 2804 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2805 // static inline uint 2806 //__clz_32bit(uint x) 2807 //{ 2808 // uint zh = __clz_16bit(x >> 16); 2809 // uint zl = __clz_16bit(x & 0xffffU); 2810 // return zh == 16U ? 16U + zl : zh; 2811 //} 2812 // uint x = Op 2813 SDValue x = Op; 2814 // uint xs16 = x >> 16 2815 SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x, 2816 DAG.getConstant(16, INTTY)); 2817 // uint zh = __clz_16bit(xs16) 2818 SDValue zh = genCLZuN(xs16, DAG, 16); 2819 // uint xa16 = x & 0xFFFF 2820 SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x, 2821 DAG.getConstant(0xFFFFU, INTTY)); 2822 // uint zl = __clz_16bit(xa16) 2823 SDValue zl = genCLZuN(xa16, DAG, 16); 2824 // uint cmp = zh == 16U 2825 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2826 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2827 zh, DAG.getConstant(16U, INTTY)); 2828 // uint zl16 = zl + 16 2829 SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY, 2830 DAG.getConstant(16, INTTY), zl); 2831 // return cmp ? zl16 : zh 2832 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2833 cmp, zl16, zh); 2834 } else { 2835 assert(0 && "Attempting to generate a CLZ function with an" 2836 " unknown graphics card"); 2837 } 2838 return DST; 2839} 2840SDValue 2841AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const 2842{ 2843 SDValue DST = SDValue(); 2844 DebugLoc DL = Op.getDebugLoc(); 2845 EVT INTTY; 2846 EVT LONGTY = Op.getValueType(); 2847 bool isVec = LONGTY.isVector(); 2848 if (isVec) { 2849 INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType() 2850 .getVectorNumElements())); 2851 } else { 2852 INTTY = EVT(MVT::i32); 2853 } 2854 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2855 &this->getTargetMachine())->getSubtargetImpl(); 2856 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 2857 // Evergreen: 2858 // static inline uint 2859 // __clz_u64(ulong x) 2860 // { 2861 //uint zhi = __clz_32bit((uint)(x >> 32)); 2862 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL)); 2863 //return zhi == 32U ? 32U + zlo : zhi; 2864 //} 2865 //ulong x = op 2866 SDValue x = Op; 2867 // uint xhi = x >> 32 2868 SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2869 // uint xlo = x & 0xFFFFFFFF 2870 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x); 2871 // uint zhi = __clz_32bit(xhi) 2872 SDValue zhi = genCLZu32(xhi, DAG); 2873 // uint zlo = __clz_32bit(xlo) 2874 SDValue zlo = genCLZu32(xlo, DAG); 2875 // uint cmp = zhi == 32 2876 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2877 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2878 zhi, DAG.getConstant(32U, INTTY)); 2879 // uint zlop32 = 32 + zlo 2880 SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY, 2881 DAG.getConstant(32U, INTTY), zlo); 2882 // return cmp ? zlop32: zhi 2883 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi); 2884 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2885 // HD4XXX: 2886 // static inline uint 2887 //__clz_64bit(ulong x) 2888 //{ 2889 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U; 2890 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU); 2891 //uint zl = __clz_23bit((uint)x & 0x7fffffU); 2892 //uint r = zh == 18U ? 18U + zm : zh; 2893 //return zh + zm == 41U ? 41U + zl : r; 2894 //} 2895 //ulong x = Op 2896 SDValue x = Op; 2897 // ulong xs46 = x >> 46 2898 SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2899 DAG.getConstant(46, LONGTY)); 2900 // uint ixs46 = (uint)xs46 2901 SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46); 2902 // ulong xs23 = x >> 23 2903 SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2904 DAG.getConstant(23, LONGTY)); 2905 // uint ixs23 = (uint)xs23 2906 SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23); 2907 // uint xs23m23 = ixs23 & 0x7FFFFF 2908 SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23, 2909 DAG.getConstant(0x7fffffU, INTTY)); 2910 // uint ix = (uint)x 2911 SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2912 // uint xm23 = ix & 0x7FFFFF 2913 SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix, 2914 DAG.getConstant(0x7fffffU, INTTY)); 2915 // uint zh = __clz_23bit(ixs46) 2916 SDValue zh = genCLZuN(ixs46, DAG, 23); 2917 // uint zm = __clz_23bit(xs23m23) 2918 SDValue zm = genCLZuN(xs23m23, DAG, 23); 2919 // uint zl = __clz_23bit(xm23) 2920 SDValue zl = genCLZuN(xm23, DAG, 23); 2921 // uint zhm5 = zh - 5 2922 SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh, 2923 DAG.getConstant(-5U, INTTY)); 2924 SDValue const18 = DAG.getConstant(18, INTTY); 2925 SDValue const41 = DAG.getConstant(41, INTTY); 2926 // uint cmp1 = zh = 18 2927 SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2928 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2929 zhm5, const18); 2930 // uint zhm5zm = zhm5 + zh 2931 SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm); 2932 // uint cmp2 = zhm5zm == 41 2933 SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2934 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2935 zhm5zm, const41); 2936 // uint zmp18 = zhm5 + 18 2937 SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18); 2938 // uint zlp41 = zl + 41 2939 SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41); 2940 // uint r = cmp1 ? zmp18 : zh 2941 SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2942 cmp1, zmp18, zhm5); 2943 // return cmp2 ? zlp41 : r 2944 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r); 2945 } else { 2946 assert(0 && "Attempting to generate a CLZ function with an" 2947 " unknown graphics card"); 2948 } 2949 return DST; 2950} 2951SDValue 2952AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG, 2953 bool includeSign) const 2954{ 2955 EVT INTVT; 2956 EVT LONGVT; 2957 SDValue DST; 2958 DebugLoc DL = RHS.getDebugLoc(); 2959 EVT RHSVT = RHS.getValueType(); 2960 bool isVec = RHSVT.isVector(); 2961 if (isVec) { 2962 LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT 2963 .getVectorNumElements())); 2964 INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT 2965 .getVectorNumElements())); 2966 } else { 2967 LONGVT = EVT(MVT::i64); 2968 INTVT = EVT(MVT::i32); 2969 } 2970 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2971 &this->getTargetMachine())->getSubtargetImpl(); 2972 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2973 // unsigned version: 2974 // uint uhi = (uint)(d * 0x1.0p-32); 2975 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d)); 2976 // return as_ulong2((uint2)(ulo, uhi)); 2977 // 2978 // signed version: 2979 // double ad = fabs(d); 2980 // long l = unsigned_version(ad); 2981 // long nl = -l; 2982 // return d == ad ? l : nl; 2983 SDValue d = RHS; 2984 if (includeSign) { 2985 d = DAG.getNode(ISD::FABS, DL, RHSVT, d); 2986 } 2987 SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d, 2988 DAG.getConstantFP(0x2f800000, RHSVT)); 2989 SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid); 2990 SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi); 2991 ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod, 2992 DAG.getConstantFP(0xcf800000, RHSVT), d); 2993 SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod); 2994 SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi); 2995 if (includeSign) { 2996 SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l); 2997 SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT, 2998 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32), 2999 RHS, d); 3000 l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl); 3001 } 3002 DST = l; 3003 } else { 3004 /* 3005 __attribute__((always_inline)) long 3006 cast_f64_to_i64(double d) 3007 { 3008 // Convert d in to 32-bit components 3009 long x = as_long(d); 3010 xhi = LCOMPHI(x); 3011 xlo = LCOMPLO(x); 3012 3013 // Generate 'normalized' mantissa 3014 mhi = xhi | 0x00100000; // hidden bit 3015 mhi <<= 11; 3016 temp = xlo >> (32 - 11); 3017 mhi |= temp 3018 mlo = xlo << 11; 3019 3020 // Compute shift right count from exponent 3021 e = (xhi >> (52-32)) & 0x7ff; 3022 sr = 1023 + 63 - e; 3023 srge64 = sr >= 64; 3024 srge32 = sr >= 32; 3025 3026 // Compute result for 0 <= sr < 32 3027 rhi0 = mhi >> (sr &31); 3028 rlo0 = mlo >> (sr &31); 3029 temp = mhi << (32 - sr); 3030 temp |= rlo0; 3031 rlo0 = sr ? temp : rlo0; 3032 3033 // Compute result for 32 <= sr 3034 rhi1 = 0; 3035 rlo1 = srge64 ? 0 : rhi0; 3036 3037 // Pick between the 2 results 3038 rhi = srge32 ? rhi1 : rhi0; 3039 rlo = srge32 ? rlo1 : rlo0; 3040 3041 // Optional saturate on overflow 3042 srlt0 = sr < 0; 3043 rhi = srlt0 ? MAXVALUE : rhi; 3044 rlo = srlt0 ? MAXVALUE : rlo; 3045 3046 // Create long 3047 res = LCREATE( rlo, rhi ); 3048 3049 // Deal with sign bit (ignoring whether result is signed or unsigned value) 3050 if (includeSign) { 3051 sign = ((signed int) xhi) >> 31; fill with sign bit 3052 sign = LCREATE( sign, sign ); 3053 res += sign; 3054 res ^= sign; 3055 } 3056 3057 return res; 3058 } 3059 */ 3060 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 3061 SDValue c32 = DAG.getConstant( 32, INTVT ); 3062 3063 // Convert d in to 32-bit components 3064 SDValue d = RHS; 3065 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 3066 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 3067 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 3068 3069 // Generate 'normalized' mantissa 3070 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 3071 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 3072 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 3073 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 3074 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 3075 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 3076 SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 ); 3077 3078 // Compute shift right count from exponent 3079 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 3080 xhi, DAG.getConstant( 52-32, INTVT ) ); 3081 e = DAG.getNode( ISD::AND, DL, INTVT, 3082 e, DAG.getConstant( 0x7ff, INTVT ) ); 3083 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 3084 DAG.getConstant( 1023 + 63, INTVT ), e ); 3085 SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3086 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 3087 sr, DAG.getConstant(64, INTVT)); 3088 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3089 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 3090 sr, DAG.getConstant(32, INTVT)); 3091 3092 // Compute result for 0 <= sr < 32 3093 SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 3094 SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr ); 3095 temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr ); 3096 temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp ); 3097 temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp ); 3098 rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 ); 3099 3100 // Compute result for 32 <= sr 3101 SDValue rhi1 = DAG.getConstant( 0, INTVT ); 3102 SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3103 srge64, rhi1, rhi0 ); 3104 3105 // Pick between the 2 results 3106 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3107 srge32, rhi1, rhi0 ); 3108 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3109 srge32, rlo1, rlo0 ); 3110 3111 // Create long 3112 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 3113 3114 // Deal with sign bit 3115 if (includeSign) { 3116 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 3117 xhi, DAG.getConstant( 31, INTVT ) ); 3118 sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign ); 3119 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign ); 3120 res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign ); 3121 } 3122 DST = res; 3123 } 3124 return DST; 3125} 3126SDValue 3127AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG, 3128 bool includeSign) const 3129{ 3130 EVT INTVT; 3131 EVT LONGVT; 3132 DebugLoc DL = RHS.getDebugLoc(); 3133 EVT RHSVT = RHS.getValueType(); 3134 bool isVec = RHSVT.isVector(); 3135 if (isVec) { 3136 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 3137 RHSVT.getVectorNumElements())); 3138 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3139 RHSVT.getVectorNumElements())); 3140 } else { 3141 LONGVT = EVT(MVT::i64); 3142 INTVT = EVT(MVT::i32); 3143 } 3144 /* 3145 __attribute__((always_inline)) int 3146 cast_f64_to_[u|i]32(double d) 3147 { 3148 // Convert d in to 32-bit components 3149 long x = as_long(d); 3150 xhi = LCOMPHI(x); 3151 xlo = LCOMPLO(x); 3152 3153 // Generate 'normalized' mantissa 3154 mhi = xhi | 0x00100000; // hidden bit 3155 mhi <<= 11; 3156 temp = xlo >> (32 - 11); 3157 mhi |= temp 3158 3159 // Compute shift right count from exponent 3160 e = (xhi >> (52-32)) & 0x7ff; 3161 sr = 1023 + 31 - e; 3162 srge32 = sr >= 32; 3163 3164 // Compute result for 0 <= sr < 32 3165 res = mhi >> (sr &31); 3166 res = srge32 ? 0 : res; 3167 3168 // Optional saturate on overflow 3169 srlt0 = sr < 0; 3170 res = srlt0 ? MAXVALUE : res; 3171 3172 // Deal with sign bit (ignoring whether result is signed or unsigned value) 3173 if (includeSign) { 3174 sign = ((signed int) xhi) >> 31; fill with sign bit 3175 res += sign; 3176 res ^= sign; 3177 } 3178 3179 return res; 3180 } 3181 */ 3182 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 3183 3184 // Convert d in to 32-bit components 3185 SDValue d = RHS; 3186 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 3187 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 3188 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 3189 3190 // Generate 'normalized' mantissa 3191 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 3192 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 3193 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 3194 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 3195 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 3196 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 3197 3198 // Compute shift right count from exponent 3199 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 3200 xhi, DAG.getConstant( 52-32, INTVT ) ); 3201 e = DAG.getNode( ISD::AND, DL, INTVT, 3202 e, DAG.getConstant( 0x7ff, INTVT ) ); 3203 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 3204 DAG.getConstant( 1023 + 31, INTVT ), e ); 3205 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3206 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 3207 sr, DAG.getConstant(32, INTVT)); 3208 3209 // Compute result for 0 <= sr < 32 3210 SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 3211 res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3212 srge32, DAG.getConstant(0,INTVT), res ); 3213 3214 // Deal with sign bit 3215 if (includeSign) { 3216 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 3217 xhi, DAG.getConstant( 31, INTVT ) ); 3218 res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign ); 3219 res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign ); 3220 } 3221 return res; 3222} 3223SDValue 3224AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const 3225{ 3226 SDValue RHS = Op.getOperand(0); 3227 EVT RHSVT = RHS.getValueType(); 3228 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3229 EVT LHSVT = Op.getValueType(); 3230 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3231 DebugLoc DL = Op.getDebugLoc(); 3232 SDValue DST; 3233 const AMDILTargetMachine* 3234 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3235 (&this->getTargetMachine()); 3236 const AMDILSubtarget* 3237 stm = static_cast<const AMDILSubtarget*>( 3238 amdtm->getSubtargetImpl()); 3239 if (RST == MVT::f64 && RHSVT.isVector() 3240 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3241 // We dont support vector 64bit floating point convertions. 3242 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { 3243 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3244 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3245 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); 3246 if (!x) { 3247 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3248 } else { 3249 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, 3250 DST, op, DAG.getTargetConstant(x, MVT::i32)); 3251 } 3252 } 3253 } else { 3254 if (RST == MVT::f64 3255 && LST == MVT::i32) { 3256 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3257 DST = SDValue(Op.getNode(), 0); 3258 } else { 3259 DST = genf64toi32(RHS, DAG, true); 3260 } 3261 } else if (RST == MVT::f64 3262 && LST == MVT::i64) { 3263 DST = genf64toi64(RHS, DAG, true); 3264 } else if (RST == MVT::f64 3265 && (LST == MVT::i8 || LST == MVT::i16)) { 3266 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3267 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); 3268 } else { 3269 SDValue ToInt = genf64toi32(RHS, DAG, true); 3270 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); 3271 } 3272 3273 } else { 3274 DST = SDValue(Op.getNode(), 0); 3275 } 3276 } 3277 return DST; 3278} 3279 3280SDValue 3281AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const 3282{ 3283 SDValue DST; 3284 SDValue RHS = Op.getOperand(0); 3285 EVT RHSVT = RHS.getValueType(); 3286 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3287 EVT LHSVT = Op.getValueType(); 3288 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3289 DebugLoc DL = Op.getDebugLoc(); 3290 const AMDILTargetMachine* 3291 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3292 (&this->getTargetMachine()); 3293 const AMDILSubtarget* 3294 stm = static_cast<const AMDILSubtarget*>( 3295 amdtm->getSubtargetImpl()); 3296 if (RST == MVT::f64 && RHSVT.isVector() 3297 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3298 // We dont support vector 64bit floating point convertions. 3299 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { 3300 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3301 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3302 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); 3303 if (!x) { 3304 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3305 } else { 3306 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, 3307 DST, op, DAG.getTargetConstant(x, MVT::i32)); 3308 } 3309 3310 } 3311 } else { 3312 if (RST == MVT::f64 3313 && LST == MVT::i32) { 3314 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3315 DST = SDValue(Op.getNode(), 0); 3316 } else { 3317 DST = genf64toi32(RHS, DAG, false); 3318 } 3319 } else if (RST == MVT::f64 3320 && LST == MVT::i64) { 3321 DST = genf64toi64(RHS, DAG, false); 3322 } else if (RST == MVT::f64 3323 && (LST == MVT::i8 || LST == MVT::i16)) { 3324 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3325 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); 3326 } else { 3327 SDValue ToInt = genf64toi32(RHS, DAG, false); 3328 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); 3329 } 3330 3331 } else { 3332 DST = SDValue(Op.getNode(), 0); 3333 } 3334 } 3335 return DST; 3336} 3337SDValue 3338AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT, 3339 SelectionDAG &DAG) const 3340{ 3341 EVT RHSVT = RHS.getValueType(); 3342 DebugLoc DL = RHS.getDebugLoc(); 3343 EVT INTVT; 3344 EVT LONGVT; 3345 bool isVec = RHSVT.isVector(); 3346 if (isVec) { 3347 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 3348 RHSVT.getVectorNumElements())); 3349 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3350 RHSVT.getVectorNumElements())); 3351 } else { 3352 LONGVT = EVT(MVT::i64); 3353 INTVT = EVT(MVT::i32); 3354 } 3355 SDValue x = RHS; 3356 const AMDILTargetMachine* 3357 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3358 (&this->getTargetMachine()); 3359 const AMDILSubtarget* 3360 stm = static_cast<const AMDILSubtarget*>( 3361 amdtm->getSubtargetImpl()); 3362 if (stm->calVersion() >= CAL_VERSION_SC_135) { 3363 // unsigned x = RHS; 3364 // ulong xd = (ulong)(0x4330_0000 << 32) | x; 3365 // double d = as_double( xd ); 3366 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000 3367 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x, 3368 DAG.getConstant( 0x43300000, INTVT ) ); 3369 SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 3370 SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT, 3371 DAG.getConstant( 0x4330000000000000ULL, LONGVT ) ); 3372 return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd ); 3373 } else { 3374 SDValue clz = genCLZu32(x, DAG); 3375 3376 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2 3377 // Except for an input 0... which requires a 0 exponent 3378 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 3379 DAG.getConstant( (1023+31), INTVT), clz ); 3380 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x ); 3381 3382 // Normalize frac 3383 SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz ); 3384 3385 // Eliminate hidden bit 3386 rhi = DAG.getNode( ISD::AND, DL, INTVT, 3387 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 3388 3389 // Pack exponent and frac 3390 SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT, 3391 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 3392 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 3393 rhi, DAG.getConstant( 11, INTVT ) ); 3394 exp = DAG.getNode( ISD::SHL, DL, INTVT, 3395 exp, DAG.getConstant( 20, INTVT ) ); 3396 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 3397 3398 // Convert 2 x 32 in to 1 x 64, then to double precision float type 3399 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 3400 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 3401 } 3402} 3403SDValue 3404AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT, 3405 SelectionDAG &DAG) const 3406{ 3407 EVT RHSVT = RHS.getValueType(); 3408 DebugLoc DL = RHS.getDebugLoc(); 3409 EVT INTVT; 3410 EVT LONGVT; 3411 bool isVec = RHSVT.isVector(); 3412 if (isVec) { 3413 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3414 RHSVT.getVectorNumElements())); 3415 } else { 3416 INTVT = EVT(MVT::i32); 3417 } 3418 LONGVT = RHSVT; 3419 SDValue x = RHS; 3420 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 3421 &this->getTargetMachine())->getSubtargetImpl(); 3422 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3423 // double dhi = (double)(as_uint2(x).y); 3424 // double dlo = (double)(as_uint2(x).x); 3425 // return mad(dhi, 0x1.0p+32, dlo) 3426 SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x); 3427 dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi); 3428 SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x); 3429 dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo); 3430 return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi, 3431 DAG.getConstantFP(0x4f800000, LHSVT), dlo); 3432 } else if (stm->calVersion() >= CAL_VERSION_SC_135) { 3433 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL)); 3434 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32)); 3435 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo; 3436 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL 3437 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) ); 3438 SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 3439 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32 3440 SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) ); 3441 SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe ); 3442 SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT, 3443 DAG.getConstant( 0x4530000000100000ULL, LONGVT ) ); 3444 hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c ); 3445 return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo ); 3446 3447 } else { 3448 SDValue clz = genCLZu64(x, DAG); 3449 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 3450 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 3451 3452 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2 3453 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 3454 DAG.getConstant( (1023+63), INTVT), clz ); 3455 SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo ); 3456 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3457 mash, exp, mash ); // exp = exp, or 0 if input was 0 3458 3459 // Normalize frac 3460 SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT, 3461 clz, DAG.getConstant( 31, INTVT ) ); 3462 SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT, 3463 DAG.getConstant( 32, INTVT ), clz31 ); 3464 SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 ); 3465 SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift ); 3466 t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 ); 3467 SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 ); 3468 SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 3469 SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 3470 SDValue rlo2 = DAG.getConstant( 0, INTVT ); 3471 SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT, 3472 clz, DAG.getConstant( 32, INTVT ) ); 3473 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3474 clz32, rhi2, rhi1 ); 3475 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3476 clz32, rlo2, rlo1 ); 3477 3478 // Eliminate hidden bit 3479 rhi = DAG.getNode( ISD::AND, DL, INTVT, 3480 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 3481 3482 // Save bits needed to round properly 3483 SDValue round = DAG.getNode( ISD::AND, DL, INTVT, 3484 rlo, DAG.getConstant( 0x7ff, INTVT ) ); 3485 3486 // Pack exponent and frac 3487 rlo = DAG.getNode( ISD::SRL, DL, INTVT, 3488 rlo, DAG.getConstant( 11, INTVT ) ); 3489 SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT, 3490 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 3491 rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp ); 3492 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 3493 rhi, DAG.getConstant( 11, INTVT ) ); 3494 exp = DAG.getNode( ISD::SHL, DL, INTVT, 3495 exp, DAG.getConstant( 20, INTVT ) ); 3496 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 3497 3498 // Compute rounding bit 3499 SDValue even = DAG.getNode( ISD::AND, DL, INTVT, 3500 rlo, DAG.getConstant( 1, INTVT ) ); 3501 SDValue grs = DAG.getNode( ISD::AND, DL, INTVT, 3502 round, DAG.getConstant( 0x3ff, INTVT ) ); 3503 grs = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3504 DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32), 3505 grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none 3506 grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even ); 3507 round = DAG.getNode( ISD::SRL, DL, INTVT, 3508 round, DAG.getConstant( 10, INTVT ) ); 3509 round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1 3510 3511 // Add rounding bit 3512 SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, 3513 round, DAG.getConstant( 0, INTVT ) ); 3514 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 3515 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround ); 3516 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 3517 } 3518} 3519SDValue 3520AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const 3521{ 3522 SDValue RHS = Op.getOperand(0); 3523 EVT RHSVT = RHS.getValueType(); 3524 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3525 EVT LHSVT = Op.getValueType(); 3526 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3527 DebugLoc DL = Op.getDebugLoc(); 3528 SDValue DST; 3529 EVT INTVT; 3530 EVT LONGVT; 3531 const AMDILTargetMachine* 3532 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3533 (&this->getTargetMachine()); 3534 const AMDILSubtarget* 3535 stm = static_cast<const AMDILSubtarget*>( 3536 amdtm->getSubtargetImpl()); 3537 if (LST == MVT::f64 && LHSVT.isVector() 3538 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3539 // We dont support vector 64bit floating point convertions. 3540 DST = Op; 3541 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { 3542 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3543 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3544 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); 3545 if (!x) { 3546 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3547 } else { 3548 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, 3549 op, DAG.getTargetConstant(x, MVT::i32)); 3550 } 3551 3552 } 3553 } else { 3554 3555 if (RST == MVT::i32 3556 && LST == MVT::f64) { 3557 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3558 DST = SDValue(Op.getNode(), 0); 3559 } else { 3560 DST = genu32tof64(RHS, LHSVT, DAG); 3561 } 3562 } else if (RST == MVT::i64 3563 && LST == MVT::f64) { 3564 DST = genu64tof64(RHS, LHSVT, DAG); 3565 } else { 3566 DST = SDValue(Op.getNode(), 0); 3567 } 3568 } 3569 return DST; 3570} 3571 3572SDValue 3573AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const 3574{ 3575 SDValue RHS = Op.getOperand(0); 3576 EVT RHSVT = RHS.getValueType(); 3577 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3578 EVT INTVT; 3579 EVT LONGVT; 3580 SDValue DST; 3581 bool isVec = RHSVT.isVector(); 3582 DebugLoc DL = Op.getDebugLoc(); 3583 EVT LHSVT = Op.getValueType(); 3584 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3585 const AMDILTargetMachine* 3586 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3587 (&this->getTargetMachine()); 3588 const AMDILSubtarget* 3589 stm = static_cast<const AMDILSubtarget*>( 3590 amdtm->getSubtargetImpl()); 3591 if (LST == MVT::f64 && LHSVT.isVector() 3592 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3593 // We dont support vector 64bit floating point convertions. 3594 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { 3595 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3596 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3597 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); 3598 if (!x) { 3599 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3600 } else { 3601 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, 3602 op, DAG.getTargetConstant(x, MVT::i32)); 3603 } 3604 3605 } 3606 } else { 3607 3608 if (isVec) { 3609 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 3610 RHSVT.getVectorNumElements())); 3611 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3612 RHSVT.getVectorNumElements())); 3613 } else { 3614 LONGVT = EVT(MVT::i64); 3615 INTVT = EVT(MVT::i32); 3616 } 3617 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3618 if ((RST == MVT::i32 || RST == MVT::i64) 3619 && LST == MVT::f64) { 3620 if (RST == MVT::i32) { 3621 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3622 DST = SDValue(Op.getNode(), 0); 3623 return DST; 3624 } 3625 } 3626 SDValue c31 = DAG.getConstant( 31, INTVT ); 3627 SDValue cSbit = DAG.getConstant( 0x80000000, INTVT ); 3628 3629 SDValue S; // Sign, as 0 or -1 3630 SDValue Sbit; // Sign bit, as one bit, MSB only. 3631 if (RST == MVT::i32) { 3632 Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit ); 3633 S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 ); 3634 } else { // 64-bit case... SRA of 64-bit values is slow 3635 SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS ); 3636 Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit ); 3637 SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 ); 3638 S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp ); 3639 } 3640 3641 // get abs() of input value, given sign as S (0 or -1) 3642 // SpI = RHS + S 3643 SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S); 3644 // SpIxS = SpI ^ S 3645 SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S); 3646 3647 // Convert unsigned value to double precision 3648 SDValue R; 3649 if (RST == MVT::i32) { 3650 // r = cast_u32_to_f64(SpIxS) 3651 R = genu32tof64(SpIxS, LHSVT, DAG); 3652 } else { 3653 // r = cast_u64_to_f64(SpIxS) 3654 R = genu64tof64(SpIxS, LHSVT, DAG); 3655 } 3656 3657 // drop in the sign bit 3658 SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R ); 3659 SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t ); 3660 SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t ); 3661 thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit ); 3662 t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi ); 3663 DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t ); 3664 } else { 3665 DST = SDValue(Op.getNode(), 0); 3666 } 3667 } 3668 return DST; 3669} 3670SDValue 3671AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const 3672{ 3673 SDValue LHS = Op.getOperand(0); 3674 SDValue RHS = Op.getOperand(1); 3675 DebugLoc DL = Op.getDebugLoc(); 3676 EVT OVT = Op.getValueType(); 3677 SDValue DST; 3678 bool isVec = RHS.getValueType().isVector(); 3679 if (OVT.getScalarType() == MVT::i64) { 3680 /*const AMDILTargetMachine* 3681 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3682 (&this->getTargetMachine()); 3683 const AMDILSubtarget* 3684 stm = dynamic_cast<const AMDILSubtarget*>( 3685 amdtm->getSubtargetImpl());*/ 3686 MVT INTTY = MVT::i32; 3687 if (OVT == MVT::v2i64) { 3688 INTTY = MVT::v2i32; 3689 } 3690 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; 3691 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 3692 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); 3693 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); 3694 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); 3695 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); 3696 INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO); 3697 INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI); 3698 //TODO: need to use IBORROW on HD5XXX and later hardware 3699 SDValue cmp; 3700 if (OVT == MVT::i64) { 3701 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 3702 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 3703 LHSLO, RHSLO); 3704 } else { 3705 SDValue cmplo; 3706 SDValue cmphi; 3707 SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3708 DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32)); 3709 SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3710 DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32)); 3711 SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3712 DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32)); 3713 SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3714 DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32)); 3715 cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 3716 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 3717 LHSRLO, RHSRLO); 3718 cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 3719 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 3720 LHSRHI, RHSRHI); 3721 cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo); 3722 cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32, 3723 cmp, cmphi, DAG.getTargetConstant(1, MVT::i32)); 3724 } 3725 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); 3726 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, 3727 INTLO, INTHI); 3728 } else { 3729 DST = SDValue(Op.getNode(), 0); 3730 } 3731 return DST; 3732} 3733SDValue 3734AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const 3735{ 3736 EVT OVT = Op.getValueType(); 3737 SDValue DST; 3738 if (OVT.getScalarType() == MVT::f64) { 3739 DST = LowerFDIV64(Op, DAG); 3740 } else if (OVT.getScalarType() == MVT::f32) { 3741 DST = LowerFDIV32(Op, DAG); 3742 } else { 3743 DST = SDValue(Op.getNode(), 0); 3744 } 3745 return DST; 3746} 3747 3748SDValue 3749AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const 3750{ 3751 EVT OVT = Op.getValueType(); 3752 SDValue DST; 3753 if (OVT.getScalarType() == MVT::i64) { 3754 DST = LowerSDIV64(Op, DAG); 3755 } else if (OVT.getScalarType() == MVT::i32) { 3756 DST = LowerSDIV32(Op, DAG); 3757 } else if (OVT.getScalarType() == MVT::i16 3758 || OVT.getScalarType() == MVT::i8) { 3759 DST = LowerSDIV24(Op, DAG); 3760 } else { 3761 DST = SDValue(Op.getNode(), 0); 3762 } 3763 return DST; 3764} 3765 3766SDValue 3767AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const 3768{ 3769 EVT OVT = Op.getValueType(); 3770 SDValue DST; 3771 if (OVT.getScalarType() == MVT::i64) { 3772 DST = LowerUDIV64(Op, DAG); 3773 } else if (OVT.getScalarType() == MVT::i32) { 3774 DST = LowerUDIV32(Op, DAG); 3775 } else if (OVT.getScalarType() == MVT::i16 3776 || OVT.getScalarType() == MVT::i8) { 3777 DST = LowerUDIV24(Op, DAG); 3778 } else { 3779 DST = SDValue(Op.getNode(), 0); 3780 } 3781 return DST; 3782} 3783 3784SDValue 3785AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const 3786{ 3787 EVT OVT = Op.getValueType(); 3788 SDValue DST; 3789 if (OVT.getScalarType() == MVT::i64) { 3790 DST = LowerSREM64(Op, DAG); 3791 } else if (OVT.getScalarType() == MVT::i32) { 3792 DST = LowerSREM32(Op, DAG); 3793 } else if (OVT.getScalarType() == MVT::i16) { 3794 DST = LowerSREM16(Op, DAG); 3795 } else if (OVT.getScalarType() == MVT::i8) { 3796 DST = LowerSREM8(Op, DAG); 3797 } else { 3798 DST = SDValue(Op.getNode(), 0); 3799 } 3800 return DST; 3801} 3802 3803SDValue 3804AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const 3805{ 3806 EVT OVT = Op.getValueType(); 3807 SDValue DST; 3808 if (OVT.getScalarType() == MVT::i64) { 3809 DST = LowerUREM64(Op, DAG); 3810 } else if (OVT.getScalarType() == MVT::i32) { 3811 DST = LowerUREM32(Op, DAG); 3812 } else if (OVT.getScalarType() == MVT::i16) { 3813 DST = LowerUREM16(Op, DAG); 3814 } else if (OVT.getScalarType() == MVT::i8) { 3815 DST = LowerUREM8(Op, DAG); 3816 } else { 3817 DST = SDValue(Op.getNode(), 0); 3818 } 3819 return DST; 3820} 3821 3822SDValue 3823AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const 3824{ 3825 DebugLoc DL = Op.getDebugLoc(); 3826 EVT OVT = Op.getValueType(); 3827 SDValue DST; 3828 bool isVec = OVT.isVector(); 3829 if (OVT.getScalarType() != MVT::i64) 3830 { 3831 DST = SDValue(Op.getNode(), 0); 3832 } else { 3833 assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!"); 3834 // TODO: This needs to be turned into a tablegen pattern 3835 SDValue LHS = Op.getOperand(0); 3836 SDValue RHS = Op.getOperand(1); 3837 3838 MVT INTTY = MVT::i32; 3839 if (OVT == MVT::v2i64) { 3840 INTTY = MVT::v2i32; 3841 } 3842 // mul64(h1, l1, h0, l0) 3843 SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 3844 DL, 3845 INTTY, LHS); 3846 SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 3847 DL, 3848 INTTY, LHS); 3849 SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 3850 DL, 3851 INTTY, RHS); 3852 SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 3853 DL, 3854 INTTY, RHS); 3855 // MULLO_UINT_1 r1, h0, l1 3856 SDValue RHILLO = DAG.getNode(AMDILISD::UMUL, 3857 DL, 3858 INTTY, RHSHI, LHSLO); 3859 // MULLO_UINT_1 r2, h1, l0 3860 SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL, 3861 DL, 3862 INTTY, RHSLO, LHSHI); 3863 // ADD_INT hr, r1, r2 3864 SDValue ADDHI = DAG.getNode(ISD::ADD, 3865 DL, 3866 INTTY, RHILLO, RLOHHI); 3867 // MULHI_UINT_1 r3, l1, l0 3868 SDValue RLOLLO = DAG.getNode(ISD::MULHU, 3869 DL, 3870 INTTY, RHSLO, LHSLO); 3871 // ADD_INT hr, hr, r3 3872 SDValue HIGH = DAG.getNode(ISD::ADD, 3873 DL, 3874 INTTY, ADDHI, RLOLLO); 3875 // MULLO_UINT_1 l3, l1, l0 3876 SDValue LOW = DAG.getNode(AMDILISD::UMUL, 3877 DL, 3878 INTTY, LHSLO, RHSLO); 3879 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, 3880 DL, 3881 OVT, LOW, HIGH); 3882 } 3883 return DST; 3884} 3885SDValue 3886AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const 3887{ 3888 EVT VT = Op.getValueType(); 3889 SDValue Nodes1; 3890 SDValue second; 3891 SDValue third; 3892 SDValue fourth; 3893 DebugLoc DL = Op.getDebugLoc(); 3894 Nodes1 = DAG.getNode(AMDILISD::VBUILD, 3895 DL, 3896 VT, Op.getOperand(0)); 3897 bool allEqual = true; 3898 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { 3899 if (Op.getOperand(0) != Op.getOperand(x)) { 3900 allEqual = false; 3901 break; 3902 } 3903 } 3904 if (allEqual) { 3905 return Nodes1; 3906 } 3907 switch(Op.getNumOperands()) { 3908 default: 3909 case 1: 3910 break; 3911 case 4: 3912 fourth = Op.getOperand(3); 3913 if (fourth.getOpcode() != ISD::UNDEF) { 3914 Nodes1 = DAG.getNode( 3915 ISD::INSERT_VECTOR_ELT, 3916 DL, 3917 Op.getValueType(), 3918 Nodes1, 3919 fourth, 3920 DAG.getConstant(7, MVT::i32)); 3921 } 3922 case 3: 3923 third = Op.getOperand(2); 3924 if (third.getOpcode() != ISD::UNDEF) { 3925 Nodes1 = DAG.getNode( 3926 ISD::INSERT_VECTOR_ELT, 3927 DL, 3928 Op.getValueType(), 3929 Nodes1, 3930 third, 3931 DAG.getConstant(6, MVT::i32)); 3932 } 3933 case 2: 3934 second = Op.getOperand(1); 3935 if (second.getOpcode() != ISD::UNDEF) { 3936 Nodes1 = DAG.getNode( 3937 ISD::INSERT_VECTOR_ELT, 3938 DL, 3939 Op.getValueType(), 3940 Nodes1, 3941 second, 3942 DAG.getConstant(5, MVT::i32)); 3943 } 3944 break; 3945 }; 3946 return Nodes1; 3947} 3948 3949SDValue 3950AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 3951 SelectionDAG &DAG) const 3952{ 3953 DebugLoc DL = Op.getDebugLoc(); 3954 EVT VT = Op.getValueType(); 3955 const SDValue *ptr = NULL; 3956 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3957 uint32_t swizzleNum = 0; 3958 SDValue DST; 3959 if (!VT.isVector()) { 3960 SDValue Res = Op.getOperand(0); 3961 return Res; 3962 } 3963 3964 if (Op.getOperand(1).getOpcode() != ISD::UNDEF) { 3965 ptr = &Op.getOperand(1); 3966 } else { 3967 ptr = &Op.getOperand(0); 3968 } 3969 if (CSDN) { 3970 swizzleNum = (uint32_t)CSDN->getZExtValue(); 3971 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 3972 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 3973 DST = DAG.getNode(AMDILISD::VINSERT, 3974 DL, 3975 VT, 3976 Op.getOperand(0), 3977 *ptr, 3978 DAG.getTargetConstant(mask2, MVT::i32), 3979 DAG.getTargetConstant(mask3, MVT::i32)); 3980 } else { 3981 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 3982 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 3983 SDValue res = DAG.getNode(AMDILISD::VINSERT, 3984 DL, VT, Op.getOperand(0), *ptr, 3985 DAG.getTargetConstant(mask2, MVT::i32), 3986 DAG.getTargetConstant(mask3, MVT::i32)); 3987 for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) { 3988 mask2 = 0x04030201 & ~(0xFF << (x * 8)); 3989 mask3 = 0x01010101 & (0xFF << (x * 8)); 3990 SDValue t = DAG.getNode(AMDILISD::VINSERT, 3991 DL, VT, Op.getOperand(0), *ptr, 3992 DAG.getTargetConstant(mask2, MVT::i32), 3993 DAG.getTargetConstant(mask3, MVT::i32)); 3994 SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(), 3995 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 3996 Op.getOperand(2), DAG.getConstant(x, MVT::i32)); 3997 c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c); 3998 res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res); 3999 } 4000 DST = res; 4001 } 4002 return DST; 4003} 4004 4005SDValue 4006AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 4007 SelectionDAG &DAG) const 4008{ 4009 EVT VT = Op.getValueType(); 4010 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4011 uint64_t swizzleNum = 0; 4012 DebugLoc DL = Op.getDebugLoc(); 4013 SDValue Res; 4014 if (!Op.getOperand(0).getValueType().isVector()) { 4015 Res = Op.getOperand(0); 4016 return Res; 4017 } 4018 if (CSDN) { 4019 // Static vector extraction 4020 swizzleNum = CSDN->getZExtValue() + 1; 4021 Res = DAG.getNode(AMDILISD::VEXTRACT, 4022 DL, VT, 4023 Op.getOperand(0), 4024 DAG.getTargetConstant(swizzleNum, MVT::i32)); 4025 } else { 4026 SDValue Op1 = Op.getOperand(1); 4027 uint32_t vecSize = 4; 4028 SDValue Op0 = Op.getOperand(0); 4029 SDValue res = DAG.getNode(AMDILISD::VEXTRACT, 4030 DL, VT, Op0, 4031 DAG.getTargetConstant(1, MVT::i32)); 4032 if (Op0.getValueType().isVector()) { 4033 vecSize = Op0.getValueType().getVectorNumElements(); 4034 } 4035 for (uint32_t x = 2; x <= vecSize; ++x) { 4036 SDValue t = DAG.getNode(AMDILISD::VEXTRACT, 4037 DL, VT, Op0, 4038 DAG.getTargetConstant(x, MVT::i32)); 4039 SDValue c = DAG.getNode(AMDILISD::CMP, 4040 DL, Op1.getValueType(), 4041 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 4042 Op1, DAG.getConstant(x, MVT::i32)); 4043 res = DAG.getNode(AMDILISD::CMOVLOG, DL, 4044 VT, c, t, res); 4045 4046 } 4047 Res = res; 4048 } 4049 return Res; 4050} 4051 4052SDValue 4053AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 4054 SelectionDAG &DAG) const 4055{ 4056 uint32_t vecSize = Op.getValueType().getVectorNumElements(); 4057 SDValue src = Op.getOperand(0); 4058 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4059 uint64_t offset = 0; 4060 EVT vecType = Op.getValueType().getVectorElementType(); 4061 DebugLoc DL = Op.getDebugLoc(); 4062 SDValue Result; 4063 if (CSDN) { 4064 offset = CSDN->getZExtValue(); 4065 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 4066 DL,vecType, src, DAG.getConstant(offset, MVT::i32)); 4067 Result = DAG.getNode(AMDILISD::VBUILD, DL, 4068 Op.getValueType(), Result); 4069 for (uint32_t x = 1; x < vecSize; ++x) { 4070 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 4071 src, DAG.getConstant(offset + x, MVT::i32)); 4072 if (elt.getOpcode() != ISD::UNDEF) { 4073 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4074 Op.getValueType(), Result, elt, 4075 DAG.getConstant(x, MVT::i32)); 4076 } 4077 } 4078 } else { 4079 SDValue idx = Op.getOperand(1); 4080 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 4081 DL, vecType, src, idx); 4082 Result = DAG.getNode(AMDILISD::VBUILD, DL, 4083 Op.getValueType(), Result); 4084 for (uint32_t x = 1; x < vecSize; ++x) { 4085 idx = DAG.getNode(ISD::ADD, DL, vecType, 4086 idx, DAG.getConstant(1, MVT::i32)); 4087 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 4088 src, idx); 4089 if (elt.getOpcode() != ISD::UNDEF) { 4090 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4091 Op.getValueType(), Result, elt, idx); 4092 } 4093 } 4094 } 4095 return Result; 4096} 4097SDValue 4098AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 4099 SelectionDAG &DAG) const 4100{ 4101 SDValue Res = DAG.getNode(AMDILISD::VBUILD, 4102 Op.getDebugLoc(), 4103 Op.getValueType(), 4104 Op.getOperand(0)); 4105 return Res; 4106} 4107SDValue 4108AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const 4109{ 4110 SDValue andOp; 4111 andOp = DAG.getNode( 4112 AMDILISD::AND, 4113 Op.getDebugLoc(), 4114 Op.getValueType(), 4115 Op.getOperand(0), 4116 Op.getOperand(1)); 4117 return andOp; 4118} 4119SDValue 4120AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const 4121{ 4122 SDValue orOp; 4123 orOp = DAG.getNode(AMDILISD::OR, 4124 Op.getDebugLoc(), 4125 Op.getValueType(), 4126 Op.getOperand(0), 4127 Op.getOperand(1)); 4128 return orOp; 4129} 4130SDValue 4131AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const 4132{ 4133 SDValue Cond = Op.getOperand(0); 4134 SDValue LHS = Op.getOperand(1); 4135 SDValue RHS = Op.getOperand(2); 4136 DebugLoc DL = Op.getDebugLoc(); 4137 Cond = getConversionNode(DAG, Cond, Op, true); 4138 Cond = DAG.getNode(AMDILISD::CMOVLOG, 4139 DL, 4140 Op.getValueType(), Cond, LHS, RHS); 4141 return Cond; 4142} 4143SDValue 4144AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 4145{ 4146 SDValue Cond; 4147 SDValue LHS = Op.getOperand(0); 4148 SDValue RHS = Op.getOperand(1); 4149 SDValue TRUE = Op.getOperand(2); 4150 SDValue FALSE = Op.getOperand(3); 4151 SDValue CC = Op.getOperand(4); 4152 DebugLoc DL = Op.getDebugLoc(); 4153 bool skipCMov = false; 4154 bool genINot = false; 4155 EVT OVT = Op.getValueType(); 4156 4157 // Check for possible elimination of cmov 4158 if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) { 4159 const ConstantSDNode *trueConst 4160 = dyn_cast<ConstantSDNode>( TRUE.getNode() ); 4161 const ConstantSDNode *falseConst 4162 = dyn_cast<ConstantSDNode>( FALSE.getNode() ); 4163 if (trueConst && falseConst) { 4164 // both possible result values are constants 4165 if (trueConst->isAllOnesValue() 4166 && falseConst->isNullValue()) { // and convenient constants 4167 skipCMov = true; 4168 } 4169 else if (trueConst->isNullValue() 4170 && falseConst->isAllOnesValue()) { // less convenient 4171 skipCMov = true; 4172 genINot = true; 4173 } 4174 } 4175 } 4176 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4177 unsigned int AMDILCC = CondCCodeToCC( 4178 SetCCOpcode, 4179 LHS.getValueType().getSimpleVT().SimpleTy); 4180 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 4181 Cond = DAG.getNode( 4182 AMDILISD::CMP, 4183 DL, 4184 LHS.getValueType(), 4185 DAG.getConstant(AMDILCC, MVT::i32), 4186 LHS, 4187 RHS); 4188 Cond = getConversionNode(DAG, Cond, Op, true); 4189 if (genINot) { 4190 Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond); 4191 } 4192 if (!skipCMov) { 4193 Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE); 4194 } 4195 return Cond; 4196} 4197SDValue 4198AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 4199{ 4200 SDValue Cond; 4201 SDValue LHS = Op.getOperand(0); 4202 SDValue RHS = Op.getOperand(1); 4203 SDValue CC = Op.getOperand(2); 4204 DebugLoc DL = Op.getDebugLoc(); 4205 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4206 unsigned int AMDILCC = CondCCodeToCC( 4207 SetCCOpcode, 4208 LHS.getValueType().getSimpleVT().SimpleTy); 4209 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 4210 Cond = DAG.getNode( 4211 AMDILISD::CMP, 4212 DL, 4213 LHS.getValueType(), 4214 DAG.getConstant(AMDILCC, MVT::i32), 4215 LHS, 4216 RHS); 4217 Cond = getConversionNode(DAG, Cond, Op, true); 4218 Cond = DAG.getNode( 4219 ISD::AND, 4220 DL, 4221 Cond.getValueType(), 4222 DAG.getConstant(1, Cond.getValueType()), 4223 Cond); 4224 return Cond; 4225} 4226 4227SDValue 4228AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const 4229{ 4230 SDValue Data = Op.getOperand(0); 4231 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 4232 DebugLoc DL = Op.getDebugLoc(); 4233 EVT DVT = Data.getValueType(); 4234 EVT BVT = BaseType->getVT(); 4235 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 4236 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 4237 unsigned shiftBits = srcBits - baseBits; 4238 if (srcBits < 32) { 4239 // If the op is less than 32 bits, then it needs to extend to 32bits 4240 // so it can properly keep the upper bits valid. 4241 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 4242 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 4243 shiftBits = 32 - baseBits; 4244 DVT = IVT; 4245 } 4246 SDValue Shift = DAG.getConstant(shiftBits, DVT); 4247 // Shift left by 'Shift' bits. 4248 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 4249 // Signed shift Right by 'Shift' bits. 4250 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 4251 if (srcBits < 32) { 4252 // Once the sign extension is done, the op needs to be converted to 4253 // its original type. 4254 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 4255 } 4256 return Data; 4257} 4258EVT 4259AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const 4260{ 4261 int iSize = (size * numEle); 4262 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 4263 if (!vEle) { 4264 vEle = 1; 4265 } 4266 if (size == 64) { 4267 if (vEle == 1) { 4268 return EVT(MVT::i64); 4269 } else { 4270 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 4271 } 4272 } else { 4273 if (vEle == 1) { 4274 return EVT(MVT::i32); 4275 } else { 4276 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 4277 } 4278 } 4279} 4280 4281SDValue 4282AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const 4283{ 4284 SDValue Src = Op.getOperand(0); 4285 SDValue Dst = Op; 4286 SDValue Res; 4287 DebugLoc DL = Op.getDebugLoc(); 4288 EVT SrcVT = Src.getValueType(); 4289 EVT DstVT = Dst.getValueType(); 4290 // Lets bitcast the floating point types to an 4291 // equivalent integer type before converting to vectors. 4292 if (SrcVT.getScalarType().isFloatingPoint()) { 4293 Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType( 4294 SrcVT.getScalarType().getSimpleVT().getSizeInBits(), 4295 SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1), 4296 Src); 4297 SrcVT = Src.getValueType(); 4298 } 4299 uint32_t ScalarSrcSize = SrcVT.getScalarType() 4300 .getSimpleVT().getSizeInBits(); 4301 uint32_t ScalarDstSize = DstVT.getScalarType() 4302 .getSimpleVT().getSizeInBits(); 4303 uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; 4304 uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1; 4305 bool isVec = SrcVT.isVector(); 4306 if (DstVT.getScalarType().isInteger() && 4307 (SrcVT.getScalarType().isInteger() 4308 || SrcVT.getScalarType().isFloatingPoint())) { 4309 if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16) 4310 || (ScalarSrcSize == 64 4311 && DstNumEle == 4 4312 && ScalarDstSize == 16)) { 4313 // This is the problematic case when bitcasting i64 <-> <4 x i16> 4314 // This approach is a little different as we cannot generate a 4315 // <4 x i64> vector 4316 // as that is illegal in our backend and we are already past 4317 // the DAG legalizer. 4318 // So, in this case, we will do the following conversion. 4319 // Case 1: 4320 // %dst = <4 x i16> %src bitconvert i64 ==> 4321 // %tmp = <4 x i16> %src convert <4 x i32> 4322 // %tmp = <4 x i32> %tmp and 0xFFFF 4323 // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16> 4324 // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw 4325 // %dst = <2 x i32> %tmp bitcast i64 4326 // case 2: 4327 // %dst = i64 %src bitconvert <4 x i16> ==> 4328 // %tmp = i64 %src bitcast <2 x i32> 4329 // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy 4330 // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16> 4331 // %tmp = <4 x i32> %tmp and 0xFFFF 4332 // %dst = <4 x i16> %tmp bitcast <4 x i32> 4333 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32, 4334 DAG.getConstant(0xFFFF, MVT::i32)); 4335 SDValue const16 = DAG.getConstant(16, MVT::i32); 4336 if (ScalarDstSize == 64) { 4337 // case 1 4338 Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32); 4339 Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask); 4340 SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4341 Op, DAG.getConstant(0, MVT::i32)); 4342 SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4343 Op, DAG.getConstant(1, MVT::i32)); 4344 y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16); 4345 SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4346 Op, DAG.getConstant(2, MVT::i32)); 4347 SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4348 Op, DAG.getConstant(3, MVT::i32)); 4349 w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16); 4350 x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y); 4351 y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w); 4352 Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y); 4353 return Res; 4354 } else { 4355 // case 2 4356 SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src); 4357 SDValue lor16 4358 = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16); 4359 SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src); 4360 SDValue hir16 4361 = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16); 4362 SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL, 4363 MVT::v4i32, lo); 4364 SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 4365 getPointerTy(), DAG.getConstant(1, MVT::i32)); 4366 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 4367 resVec, lor16, idxVal); 4368 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 4369 getPointerTy(), DAG.getConstant(2, MVT::i32)); 4370 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 4371 resVec, hi, idxVal); 4372 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 4373 getPointerTy(), DAG.getConstant(3, MVT::i32)); 4374 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 4375 resVec, hir16, idxVal); 4376 resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask); 4377 Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16); 4378 return Res; 4379 } 4380 } else { 4381 // There are four cases we need to worry about for bitcasts 4382 // where the size of all 4383 // source, intermediates and result is <= 128 bits, unlike 4384 // the above case 4385 // 1) Sub32bit bitcast 32bitAlign 4386 // %dst = <4 x i8> bitcast i32 4387 // (also <[2|4] x i16> to <[2|4] x i32>) 4388 // 2) 32bitAlign bitcast Sub32bit 4389 // %dst = i32 bitcast <4 x i8> 4390 // 3) Sub32bit bitcast LargerSub32bit 4391 // %dst = <2 x i8> bitcast i16 4392 // (also <4 x i8> to <2 x i16>) 4393 // 4) Sub32bit bitcast SmallerSub32bit 4394 // %dst = i16 bitcast <2 x i8> 4395 // (also <2 x i16> to <4 x i8>) 4396 // This also only handles types that are powers of two 4397 if ((ScalarDstSize & (ScalarDstSize - 1)) 4398 || (ScalarSrcSize & (ScalarSrcSize - 1))) { 4399 } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) { 4400 // case 1: 4401 EVT IntTy = genIntType(ScalarDstSize, SrcNumEle); 4402#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors 4403 SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy); 4404#else 4405 SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 4406 DAG.getUNDEF(IntTy.getScalarType())); 4407 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4408 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4409 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4410 SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4411 SrcVT.getScalarType(), Src, 4412 DAG.getConstant(x, MVT::i32)); 4413 temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType()); 4414 res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy, 4415 res, temp, idx); 4416 } 4417#endif 4418 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 4419 DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32)); 4420 SDValue *newEle = new SDValue[SrcNumEle]; 4421 res = DAG.getNode(ISD::AND, DL, IntTy, res, mask); 4422 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4423 newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4424 IntTy.getScalarType(), res, 4425 DAG.getConstant(x, MVT::i32)); 4426 } 4427 uint32_t Ratio = SrcNumEle / DstNumEle; 4428 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4429 if (x % Ratio) { 4430 newEle[x] = DAG.getNode(ISD::SHL, DL, 4431 IntTy.getScalarType(), newEle[x], 4432 DAG.getConstant(ScalarSrcSize * (x % Ratio), 4433 MVT::i32)); 4434 } 4435 } 4436 for (uint32_t x = 0; x < SrcNumEle; x += 2) { 4437 newEle[x] = DAG.getNode(ISD::OR, DL, 4438 IntTy.getScalarType(), newEle[x], newEle[x + 1]); 4439 } 4440 if (ScalarSrcSize == 8) { 4441 for (uint32_t x = 0; x < SrcNumEle; x += 4) { 4442 newEle[x] = DAG.getNode(ISD::OR, DL, 4443 IntTy.getScalarType(), newEle[x], newEle[x + 2]); 4444 } 4445 if (DstNumEle == 1) { 4446 Dst = newEle[0]; 4447 } else { 4448 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, 4449 newEle[0]); 4450 for (uint32_t x = 1; x < DstNumEle; ++x) { 4451 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4452 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4453 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4454 DstVT, Dst, newEle[x * 4], idx); 4455 } 4456 } 4457 } else { 4458 if (DstNumEle == 1) { 4459 Dst = newEle[0]; 4460 } else { 4461 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, 4462 newEle[0]); 4463 for (uint32_t x = 1; x < DstNumEle; ++x) { 4464 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4465 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4466 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4467 DstVT, Dst, newEle[x * 2], idx); 4468 } 4469 } 4470 } 4471 delete [] newEle; 4472 return Dst; 4473 } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) { 4474 // case 2: 4475 EVT IntTy = genIntType(ScalarSrcSize, DstNumEle); 4476 SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 4477 DAG.getUNDEF(IntTy.getScalarType())); 4478 uint32_t mult = (ScalarDstSize == 8) ? 4 : 2; 4479 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4480 for (uint32_t y = 0; y < mult; ++y) { 4481 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4482 getPointerTy(), 4483 DAG.getConstant(x * mult + y, MVT::i32)); 4484 SDValue t; 4485 if (SrcNumEle > 1) { 4486 t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 4487 DL, SrcVT.getScalarType(), Src, 4488 DAG.getConstant(x, MVT::i32)); 4489 } else { 4490 t = Src; 4491 } 4492 if (y != 0) { 4493 t = DAG.getNode(ISD::SRL, DL, t.getValueType(), 4494 t, DAG.getConstant(y * ScalarDstSize, 4495 MVT::i32)); 4496 } 4497 vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, 4498 DL, IntTy, vec, t, idx); 4499 } 4500 } 4501 Dst = DAG.getSExtOrTrunc(vec, DL, DstVT); 4502 return Dst; 4503 } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) { 4504 // case 3: 4505 SDValue *numEle = new SDValue[SrcNumEle]; 4506 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4507 numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4508 MVT::i8, Src, DAG.getConstant(x, MVT::i32)); 4509 numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16); 4510 numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x], 4511 DAG.getConstant(0xFF, MVT::i16)); 4512 } 4513 for (uint32_t x = 1; x < SrcNumEle; x += 2) { 4514 numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x], 4515 DAG.getConstant(8, MVT::i16)); 4516 numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16, 4517 numEle[x-1], numEle[x]); 4518 } 4519 if (DstNumEle > 1) { 4520 // If we are not a scalar i16, the only other case is a 4521 // v2i16 since we can't have v8i8 at this point, v4i16 4522 // cannot be generated 4523 Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16, 4524 numEle[0]); 4525 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4526 getPointerTy(), DAG.getConstant(1, MVT::i32)); 4527 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16, 4528 Dst, numEle[2], idx); 4529 } else { 4530 Dst = numEle[0]; 4531 } 4532 delete [] numEle; 4533 return Dst; 4534 } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) { 4535 // case 4: 4536 SDValue *numEle = new SDValue[DstNumEle]; 4537 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4538 numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4539 MVT::i16, Src, DAG.getConstant(x, MVT::i32)); 4540 numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16, 4541 numEle[x * 2], DAG.getConstant(8, MVT::i16)); 4542 } 4543 MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16; 4544 Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]); 4545 for (uint32_t x = 1; x < DstNumEle; ++x) { 4546 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4547 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4548 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty, 4549 Dst, numEle[x], idx); 4550 } 4551 delete [] numEle; 4552 ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8; 4553 Res = DAG.getSExtOrTrunc(Dst, DL, ty); 4554 return Res; 4555 } 4556 } 4557 } 4558 Res = DAG.getNode(AMDILISD::BITCONV, 4559 Dst.getDebugLoc(), 4560 Dst.getValueType(), Src); 4561 return Res; 4562} 4563 4564SDValue 4565AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 4566 SelectionDAG &DAG) const 4567{ 4568 SDValue Chain = Op.getOperand(0); 4569 SDValue Size = Op.getOperand(1); 4570 unsigned int SPReg = AMDIL::SP; 4571 DebugLoc DL = Op.getDebugLoc(); 4572 SDValue SP = DAG.getCopyFromReg(Chain, 4573 DL, 4574 SPReg, MVT::i32); 4575 SDValue NewSP = DAG.getNode(ISD::ADD, 4576 DL, 4577 MVT::i32, SP, Size); 4578 Chain = DAG.getCopyToReg(SP.getValue(1), 4579 DL, 4580 SPReg, NewSP); 4581 SDValue Ops[2] = {NewSP, Chain}; 4582 Chain = DAG.getMergeValues(Ops, 2 ,DL); 4583 return Chain; 4584} 4585SDValue 4586AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const 4587{ 4588 SDValue Chain = Op.getOperand(0); 4589 SDValue Cond = Op.getOperand(1); 4590 SDValue Jump = Op.getOperand(2); 4591 SDValue Result; 4592 Result = DAG.getNode( 4593 AMDILISD::BRANCH_COND, 4594 Op.getDebugLoc(), 4595 Op.getValueType(), 4596 Chain, Jump, Cond); 4597 return Result; 4598} 4599 4600SDValue 4601AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 4602{ 4603 SDValue Chain = Op.getOperand(0); 4604 CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1)); 4605 SDValue LHS = Op.getOperand(2); 4606 SDValue RHS = Op.getOperand(3); 4607 SDValue JumpT = Op.getOperand(4); 4608 SDValue CmpValue; 4609 ISD::CondCode CC = CCNode->get(); 4610 SDValue Result; 4611 unsigned int cmpOpcode = CondCCodeToCC( 4612 CC, 4613 LHS.getValueType().getSimpleVT().SimpleTy); 4614 CmpValue = DAG.getNode( 4615 AMDILISD::CMP, 4616 Op.getDebugLoc(), 4617 LHS.getValueType(), 4618 DAG.getConstant(cmpOpcode, MVT::i32), 4619 LHS, RHS); 4620 Result = DAG.getNode( 4621 AMDILISD::BRANCH_COND, 4622 CmpValue.getDebugLoc(), 4623 MVT::Other, Chain, 4624 JumpT, CmpValue); 4625 return Result; 4626} 4627 4628SDValue 4629AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const 4630{ 4631 SDValue Result = DAG.getNode( 4632 AMDILISD::DP_TO_FP, 4633 Op.getDebugLoc(), 4634 Op.getValueType(), 4635 Op.getOperand(0), 4636 Op.getOperand(1)); 4637 return Result; 4638} 4639 4640SDValue 4641AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const 4642{ 4643 SDValue Result = DAG.getNode( 4644 AMDILISD::VCONCAT, 4645 Op.getDebugLoc(), 4646 Op.getValueType(), 4647 Op.getOperand(0), 4648 Op.getOperand(1)); 4649 return Result; 4650} 4651// LowerRET - Lower an ISD::RET node. 4652SDValue 4653AMDILTargetLowering::LowerReturn(SDValue Chain, 4654 CallingConv::ID CallConv, bool isVarArg, 4655 const SmallVectorImpl<ISD::OutputArg> &Outs, 4656 const SmallVectorImpl<SDValue> &OutVals, 4657 DebugLoc dl, SelectionDAG &DAG) 4658const 4659{ 4660 //MachineFunction& MF = DAG.getMachineFunction(); 4661 // CCValAssign - represent the assignment of the return value 4662 // to a location 4663 SmallVector<CCValAssign, 16> RVLocs; 4664 4665 // CCState - Info about the registers and stack slot 4666 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4667 getTargetMachine(), RVLocs, *DAG.getContext()); 4668 4669 // Analyze return values of ISD::RET 4670 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); 4671 // If this is the first return lowered for this function, add 4672 // the regs to the liveout set for the function 4673 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 4674 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 4675 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { 4676 MRI.addLiveOut(RVLocs[i].getLocReg()); 4677 } 4678 } 4679 // FIXME: implement this when tail call is implemented 4680 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); 4681 // both x86 and ppc implement this in ISelLowering 4682 4683 // Regular return here 4684 SDValue Flag; 4685 SmallVector<SDValue, 6> RetOps; 4686 RetOps.push_back(Chain); 4687 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); 4688 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 4689 CCValAssign &VA = RVLocs[i]; 4690 SDValue ValToCopy = OutVals[i]; 4691 assert(VA.isRegLoc() && "Can only return in registers!"); 4692 // ISD::Ret => ret chain, (regnum1, val1), ... 4693 // So i * 2 + 1 index only the regnums 4694 Chain = DAG.getCopyToReg(Chain, 4695 dl, 4696 VA.getLocReg(), 4697 ValToCopy, 4698 Flag); 4699 // guarantee that all emitted copies are stuck together 4700 // avoiding something bad 4701 Flag = Chain.getValue(1); 4702 } 4703 /*if (MF.getFunction()->hasStructRetAttr()) { 4704 assert(0 && "Struct returns are not yet implemented!"); 4705 // Both MIPS and X86 have this 4706 }*/ 4707 RetOps[0] = Chain; 4708 if (Flag.getNode()) 4709 RetOps.push_back(Flag); 4710 4711 Flag = DAG.getNode(AMDILISD::RET_FLAG, 4712 dl, 4713 MVT::Other, &RetOps[0], RetOps.size()); 4714 return Flag; 4715} 4716void 4717AMDILTargetLowering::generateLongRelational(MachineInstr *MI, 4718 unsigned int opCode) const 4719{ 4720 MachineOperand DST = MI->getOperand(0); 4721 MachineOperand LHS = MI->getOperand(2); 4722 MachineOperand RHS = MI->getOperand(3); 4723 unsigned int opi32Code = 0, si32Code = 0; 4724 unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass; 4725 uint32_t REGS[12]; 4726 // All the relationals can be generated with with 6 temp registers 4727 for (int x = 0; x < 12; ++x) { 4728 REGS[x] = genVReg(simpleVT); 4729 } 4730 // Pull out the high and low components of each 64 bit register 4731 generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg()); 4732 generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg()); 4733 generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg()); 4734 generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg()); 4735 // Determine the correct opcode that we should use 4736 switch(opCode) { 4737 default: 4738 assert(!"comparison case not handled!"); 4739 break; 4740 case AMDIL::LEQ: 4741 si32Code = opi32Code = AMDIL::IEQ; 4742 break; 4743 case AMDIL::LNE: 4744 si32Code = opi32Code = AMDIL::INE; 4745 break; 4746 case AMDIL::LLE: 4747 case AMDIL::ULLE: 4748 case AMDIL::LGE: 4749 case AMDIL::ULGE: 4750 if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) { 4751 std::swap(REGS[0], REGS[2]); 4752 } else { 4753 std::swap(REGS[1], REGS[3]); 4754 } 4755 if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) { 4756 opi32Code = AMDIL::ILT; 4757 } else { 4758 opi32Code = AMDIL::ULT; 4759 } 4760 si32Code = AMDIL::UGE; 4761 break; 4762 case AMDIL::LGT: 4763 case AMDIL::ULGT: 4764 std::swap(REGS[0], REGS[2]); 4765 std::swap(REGS[1], REGS[3]); 4766 case AMDIL::LLT: 4767 case AMDIL::ULLT: 4768 if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) { 4769 opi32Code = AMDIL::ILT; 4770 } else { 4771 opi32Code = AMDIL::ULT; 4772 } 4773 si32Code = AMDIL::ULT; 4774 break; 4775 }; 4776 // Do the initial opcode on the high and low components. 4777 // This leaves the following: 4778 // REGS[4] = L_HI OP R_HI 4779 // REGS[5] = L_LO OP R_LO 4780 generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]); 4781 generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]); 4782 switch(opi32Code) { 4783 case AMDIL::IEQ: 4784 case AMDIL::INE: 4785 { 4786 // combine the results with an and or or depending on if 4787 // we are eq or ne 4788 uint32_t combineOp = (opi32Code == AMDIL::IEQ) 4789 ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32; 4790 generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]); 4791 } 4792 break; 4793 default: 4794 // this finishes codegen for the following pattern 4795 // REGS[4] || (REGS[5] && (L_HI == R_HI)) 4796 generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]); 4797 generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5], 4798 REGS[9]); 4799 generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4], 4800 REGS[10]); 4801 break; 4802 } 4803 generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]); 4804} 4805 4806unsigned int 4807AMDILTargetLowering::getFunctionAlignment(const Function *) const 4808{ 4809 return 0; 4810} 4811 4812void 4813AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB, 4814 MachineBasicBlock::iterator &BBI, 4815 DebugLoc *DL, const TargetInstrInfo *TII) const 4816{ 4817 mBB = BB; 4818 mBBI = BBI; 4819 mDL = DL; 4820 mTII = TII; 4821} 4822uint32_t 4823AMDILTargetLowering::genVReg(uint32_t regType) const 4824{ 4825 return mBB->getParent()->getRegInfo().createVirtualRegister( 4826 getTargetMachine().getRegisterInfo()->getRegClass(regType)); 4827} 4828 4829MachineInstrBuilder 4830AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const 4831{ 4832 return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst); 4833} 4834 4835MachineInstrBuilder 4836AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 4837 uint32_t src1) const 4838{ 4839 return generateMachineInst(opcode, dst).addReg(src1); 4840} 4841 4842MachineInstrBuilder 4843AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 4844 uint32_t src1, uint32_t src2) const 4845{ 4846 return generateMachineInst(opcode, dst, src1).addReg(src2); 4847} 4848 4849MachineInstrBuilder 4850AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 4851 uint32_t src1, uint32_t src2, uint32_t src3) const 4852{ 4853 return generateMachineInst(opcode, dst, src1, src2).addReg(src3); 4854} 4855 4856 4857SDValue 4858AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const 4859{ 4860 DebugLoc DL = Op.getDebugLoc(); 4861 EVT OVT = Op.getValueType(); 4862 SDValue LHS = Op.getOperand(0); 4863 SDValue RHS = Op.getOperand(1); 4864 MVT INTTY; 4865 MVT FLTTY; 4866 if (!OVT.isVector()) { 4867 INTTY = MVT::i32; 4868 FLTTY = MVT::f32; 4869 } else if (OVT.getVectorNumElements() == 2) { 4870 INTTY = MVT::v2i32; 4871 FLTTY = MVT::v2f32; 4872 } else if (OVT.getVectorNumElements() == 4) { 4873 INTTY = MVT::v4i32; 4874 FLTTY = MVT::v4f32; 4875 } 4876 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 4877 // char|short jq = ia ^ ib; 4878 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 4879 4880 // jq = jq >> (bitsize - 2) 4881 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 4882 4883 // jq = jq | 0x1 4884 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 4885 4886 // jq = (int)jq 4887 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 4888 4889 // int ia = (int)LHS; 4890 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 4891 4892 // int ib, (int)RHS; 4893 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 4894 4895 // float fa = (float)ia; 4896 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 4897 4898 // float fb = (float)ib; 4899 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 4900 4901 // float fq = native_divide(fa, fb); 4902 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 4903 4904 // fq = trunc(fq); 4905 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 4906 4907 // float fqneg = -fq; 4908 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 4909 4910 // float fr = mad(fqneg, fb, fa); 4911 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); 4912 4913 // int iq = (int)fq; 4914 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 4915 4916 // fr = fabs(fr); 4917 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 4918 4919 // fb = fabs(fb); 4920 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 4921 4922 // int cv = fr >= fb; 4923 SDValue cv; 4924 if (INTTY == MVT::i32) { 4925 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 4926 } else { 4927 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 4928 } 4929 // jq = (cv ? jq : 0); 4930 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, 4931 DAG.getConstant(0, OVT)); 4932 // dst = iq + jq; 4933 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 4934 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 4935 return iq; 4936} 4937 4938SDValue 4939AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const 4940{ 4941 DebugLoc DL = Op.getDebugLoc(); 4942 EVT OVT = Op.getValueType(); 4943 SDValue LHS = Op.getOperand(0); 4944 SDValue RHS = Op.getOperand(1); 4945 // The LowerSDIV32 function generates equivalent to the following IL. 4946 // mov r0, LHS 4947 // mov r1, RHS 4948 // ilt r10, r0, 0 4949 // ilt r11, r1, 0 4950 // iadd r0, r0, r10 4951 // iadd r1, r1, r11 4952 // ixor r0, r0, r10 4953 // ixor r1, r1, r11 4954 // udiv r0, r0, r1 4955 // ixor r10, r10, r11 4956 // iadd r0, r0, r10 4957 // ixor DST, r0, r10 4958 4959 // mov r0, LHS 4960 SDValue r0 = LHS; 4961 4962 // mov r1, RHS 4963 SDValue r1 = RHS; 4964 4965 // ilt r10, r0, 0 4966 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4967 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 4968 r0, DAG.getConstant(0, OVT)); 4969 4970 // ilt r11, r1, 0 4971 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4972 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 4973 r1, DAG.getConstant(0, OVT)); 4974 4975 // iadd r0, r0, r10 4976 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 4977 4978 // iadd r1, r1, r11 4979 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 4980 4981 // ixor r0, r0, r10 4982 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 4983 4984 // ixor r1, r1, r11 4985 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 4986 4987 // udiv r0, r0, r1 4988 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 4989 4990 // ixor r10, r10, r11 4991 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 4992 4993 // iadd r0, r0, r10 4994 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 4995 4996 // ixor DST, r0, r10 4997 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 4998 return DST; 4999} 5000 5001SDValue 5002AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const 5003{ 5004 return SDValue(Op.getNode(), 0); 5005} 5006 5007SDValue 5008AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const 5009{ 5010 DebugLoc DL = Op.getDebugLoc(); 5011 EVT OVT = Op.getValueType(); 5012 SDValue LHS = Op.getOperand(0); 5013 SDValue RHS = Op.getOperand(1); 5014 MVT INTTY; 5015 MVT FLTTY; 5016 if (!OVT.isVector()) { 5017 INTTY = MVT::i32; 5018 FLTTY = MVT::f32; 5019 } else if (OVT.getVectorNumElements() == 2) { 5020 INTTY = MVT::v2i32; 5021 FLTTY = MVT::v2f32; 5022 } else if (OVT.getVectorNumElements() == 4) { 5023 INTTY = MVT::v4i32; 5024 FLTTY = MVT::v4f32; 5025 } 5026 5027 // The LowerUDIV24 function implements the following CL. 5028 // int ia = (int)LHS 5029 // float fa = (float)ia 5030 // int ib = (int)RHS 5031 // float fb = (float)ib 5032 // float fq = native_divide(fa, fb) 5033 // fq = trunc(fq) 5034 // float t = mad(fq, fb, fb) 5035 // int iq = (int)fq - (t <= fa) 5036 // return (type)iq 5037 5038 // int ia = (int)LHS 5039 SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY); 5040 5041 // float fa = (float)ia 5042 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 5043 5044 // int ib = (int)RHS 5045 SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY); 5046 5047 // float fb = (float)ib 5048 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 5049 5050 // float fq = native_divide(fa, fb) 5051 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 5052 5053 // fq = trunc(fq) 5054 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 5055 5056 // float t = mad(fq, fb, fb) 5057 SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb); 5058 5059 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1 5060 SDValue iq; 5061 fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 5062 if (INTTY == MVT::i32) { 5063 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 5064 } else { 5065 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 5066 } 5067 iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq); 5068 5069 5070 // return (type)iq 5071 iq = DAG.getZExtOrTrunc(iq, DL, OVT); 5072 return iq; 5073 5074} 5075 5076SDValue 5077AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const 5078{ 5079 return SDValue(Op.getNode(), 0); 5080} 5081 5082SDValue 5083AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const 5084{ 5085 return SDValue(Op.getNode(), 0); 5086} 5087SDValue 5088AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const 5089{ 5090 DebugLoc DL = Op.getDebugLoc(); 5091 EVT OVT = Op.getValueType(); 5092 MVT INTTY = MVT::i32; 5093 if (OVT == MVT::v2i8) { 5094 INTTY = MVT::v2i32; 5095 } else if (OVT == MVT::v4i8) { 5096 INTTY = MVT::v4i32; 5097 } 5098 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 5099 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 5100 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 5101 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 5102 return LHS; 5103} 5104 5105SDValue 5106AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const 5107{ 5108 DebugLoc DL = Op.getDebugLoc(); 5109 EVT OVT = Op.getValueType(); 5110 MVT INTTY = MVT::i32; 5111 if (OVT == MVT::v2i16) { 5112 INTTY = MVT::v2i32; 5113 } else if (OVT == MVT::v4i16) { 5114 INTTY = MVT::v4i32; 5115 } 5116 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 5117 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 5118 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 5119 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 5120 return LHS; 5121} 5122 5123SDValue 5124AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const 5125{ 5126 DebugLoc DL = Op.getDebugLoc(); 5127 EVT OVT = Op.getValueType(); 5128 SDValue LHS = Op.getOperand(0); 5129 SDValue RHS = Op.getOperand(1); 5130 // The LowerSREM32 function generates equivalent to the following IL. 5131 // mov r0, LHS 5132 // mov r1, RHS 5133 // ilt r10, r0, 0 5134 // ilt r11, r1, 0 5135 // iadd r0, r0, r10 5136 // iadd r1, r1, r11 5137 // ixor r0, r0, r10 5138 // ixor r1, r1, r11 5139 // udiv r20, r0, r1 5140 // umul r20, r20, r1 5141 // sub r0, r0, r20 5142 // iadd r0, r0, r10 5143 // ixor DST, r0, r10 5144 5145 // mov r0, LHS 5146 SDValue r0 = LHS; 5147 5148 // mov r1, RHS 5149 SDValue r1 = RHS; 5150 5151 // ilt r10, r0, 0 5152 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 5153 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 5154 r0, DAG.getConstant(0, OVT)); 5155 5156 // ilt r11, r1, 0 5157 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 5158 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 5159 r1, DAG.getConstant(0, OVT)); 5160 5161 // iadd r0, r0, r10 5162 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 5163 5164 // iadd r1, r1, r11 5165 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 5166 5167 // ixor r0, r0, r10 5168 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 5169 5170 // ixor r1, r1, r11 5171 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 5172 5173 // udiv r20, r0, r1 5174 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 5175 5176 // umul r20, r20, r1 5177 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); 5178 5179 // sub r0, r0, r20 5180 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 5181 5182 // iadd r0, r0, r10 5183 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 5184 5185 // ixor DST, r0, r10 5186 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 5187 return DST; 5188} 5189 5190SDValue 5191AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const 5192{ 5193 return SDValue(Op.getNode(), 0); 5194} 5195 5196SDValue 5197AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const 5198{ 5199 DebugLoc DL = Op.getDebugLoc(); 5200 EVT OVT = Op.getValueType(); 5201 MVT INTTY = MVT::i32; 5202 if (OVT == MVT::v2i8) { 5203 INTTY = MVT::v2i32; 5204 } else if (OVT == MVT::v4i8) { 5205 INTTY = MVT::v4i32; 5206 } 5207 SDValue LHS = Op.getOperand(0); 5208 SDValue RHS = Op.getOperand(1); 5209 // The LowerUREM8 function generates equivalent to the following IL. 5210 // mov r0, as_u32(LHS) 5211 // mov r1, as_u32(RHS) 5212 // and r10, r0, 0xFF 5213 // and r11, r1, 0xFF 5214 // cmov_logical r3, r11, r11, 0x1 5215 // udiv r3, r10, r3 5216 // cmov_logical r3, r11, r3, 0 5217 // umul r3, r3, r11 5218 // sub r3, r10, r3 5219 // and as_u8(DST), r3, 0xFF 5220 5221 // mov r0, as_u32(LHS) 5222 SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY); 5223 5224 // mov r1, as_u32(RHS) 5225 SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY); 5226 5227 // and r10, r0, 0xFF 5228 SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0, 5229 DAG.getConstant(0xFF, INTTY)); 5230 5231 // and r11, r1, 0xFF 5232 SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1, 5233 DAG.getConstant(0xFF, INTTY)); 5234 5235 // cmov_logical r3, r11, r11, 0x1 5236 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11, 5237 DAG.getConstant(0x01, INTTY)); 5238 5239 // udiv r3, r10, r3 5240 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 5241 5242 // cmov_logical r3, r11, r3, 0 5243 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3, 5244 DAG.getConstant(0, INTTY)); 5245 5246 // umul r3, r3, r11 5247 r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11); 5248 5249 // sub r3, r10, r3 5250 r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3); 5251 5252 // and as_u8(DST), r3, 0xFF 5253 SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3, 5254 DAG.getConstant(0xFF, INTTY)); 5255 DST = DAG.getZExtOrTrunc(DST, DL, OVT); 5256 return DST; 5257} 5258 5259SDValue 5260AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const 5261{ 5262 DebugLoc DL = Op.getDebugLoc(); 5263 EVT OVT = Op.getValueType(); 5264 MVT INTTY = MVT::i32; 5265 if (OVT == MVT::v2i16) { 5266 INTTY = MVT::v2i32; 5267 } else if (OVT == MVT::v4i16) { 5268 INTTY = MVT::v4i32; 5269 } 5270 SDValue LHS = Op.getOperand(0); 5271 SDValue RHS = Op.getOperand(1); 5272 // The LowerUREM16 function generatest equivalent to the following IL. 5273 // mov r0, LHS 5274 // mov r1, RHS 5275 // DIV = LowerUDIV16(LHS, RHS) 5276 // and r10, r0, 0xFFFF 5277 // and r11, r1, 0xFFFF 5278 // cmov_logical r3, r11, r11, 0x1 5279 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 5280 // and r3, r3, 0xFFFF 5281 // cmov_logical r3, r11, r3, 0 5282 // umul r3, r3, r11 5283 // sub r3, r10, r3 5284 // and DST, r3, 0xFFFF 5285 5286 // mov r0, LHS 5287 SDValue r0 = LHS; 5288 5289 // mov r1, RHS 5290 SDValue r1 = RHS; 5291 5292 // and r10, r0, 0xFFFF 5293 SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0, 5294 DAG.getConstant(0xFFFF, OVT)); 5295 5296 // and r11, r1, 0xFFFF 5297 SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1, 5298 DAG.getConstant(0xFFFF, OVT)); 5299 5300 // cmov_logical r3, r11, r11, 0x1 5301 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11, 5302 DAG.getConstant(0x01, OVT)); 5303 5304 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 5305 r10 = DAG.getZExtOrTrunc(r10, DL, INTTY); 5306 r3 = DAG.getZExtOrTrunc(r3, DL, INTTY); 5307 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 5308 r3 = DAG.getZExtOrTrunc(r3, DL, OVT); 5309 r10 = DAG.getZExtOrTrunc(r10, DL, OVT); 5310 5311 // and r3, r3, 0xFFFF 5312 r3 = DAG.getNode(ISD::AND, DL, OVT, r3, 5313 DAG.getConstant(0xFFFF, OVT)); 5314 5315 // cmov_logical r3, r11, r3, 0 5316 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3, 5317 DAG.getConstant(0, OVT)); 5318 // umul r3, r3, r11 5319 r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11); 5320 5321 // sub r3, r10, r3 5322 r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3); 5323 5324 // and DST, r3, 0xFFFF 5325 SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3, 5326 DAG.getConstant(0xFFFF, OVT)); 5327 return DST; 5328} 5329 5330SDValue 5331AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const 5332{ 5333 DebugLoc DL = Op.getDebugLoc(); 5334 EVT OVT = Op.getValueType(); 5335 SDValue LHS = Op.getOperand(0); 5336 SDValue RHS = Op.getOperand(1); 5337 // The LowerUREM32 function generates equivalent to the following IL. 5338 // udiv r20, LHS, RHS 5339 // umul r20, r20, RHS 5340 // sub DST, LHS, r20 5341 5342 // udiv r20, LHS, RHS 5343 SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS); 5344 5345 // umul r20, r20, RHS 5346 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS); 5347 5348 // sub DST, LHS, r20 5349 SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20); 5350 return DST; 5351} 5352 5353SDValue 5354AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const 5355{ 5356 return SDValue(Op.getNode(), 0); 5357} 5358 5359 5360SDValue 5361AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const 5362{ 5363 DebugLoc DL = Op.getDebugLoc(); 5364 EVT OVT = Op.getValueType(); 5365 MVT INTTY = MVT::i32; 5366 if (OVT == MVT::v2f32) { 5367 INTTY = MVT::v2i32; 5368 } else if (OVT == MVT::v4f32) { 5369 INTTY = MVT::v4i32; 5370 } 5371 SDValue LHS = Op.getOperand(0); 5372 SDValue RHS = Op.getOperand(1); 5373 SDValue DST; 5374 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 5375 &this->getTargetMachine())->getSubtargetImpl(); 5376 if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 5377 // TODO: This doesn't work for vector types yet 5378 // The LowerFDIV32 function generates equivalent to the following 5379 // IL: 5380 // mov r20, as_int(LHS) 5381 // mov r21, as_int(RHS) 5382 // and r30, r20, 0x7f800000 5383 // and r31, r20, 0x807FFFFF 5384 // and r32, r21, 0x7f800000 5385 // and r33, r21, 0x807FFFFF 5386 // ieq r40, r30, 0x7F800000 5387 // ieq r41, r31, 0x7F800000 5388 // ieq r42, r32, 0 5389 // ieq r43, r33, 0 5390 // and r50, r20, 0x80000000 5391 // and r51, r21, 0x80000000 5392 // ior r32, r32, 0x3f800000 5393 // ior r33, r33, 0x3f800000 5394 // cmov_logical r32, r42, r50, r32 5395 // cmov_logical r33, r43, r51, r33 5396 // cmov_logical r32, r40, r20, r32 5397 // cmov_logical r33, r41, r21, r33 5398 // ior r50, r40, r41 5399 // ior r51, r42, r43 5400 // ior r50, r50, r51 5401 // inegate r52, r31 5402 // iadd r30, r30, r52 5403 // cmov_logical r30, r50, 0, r30 5404 // div_zeroop(infinity) r21, 1.0, r33 5405 // mul_ieee r20, r32, r21 5406 // and r22, r20, 0x7FFFFFFF 5407 // and r23, r20, 0x80000000 5408 // ishr r60, r22, 0x00000017 5409 // ishr r61, r30, 0x00000017 5410 // iadd r20, r20, r30 5411 // iadd r21, r22, r30 5412 // iadd r60, r60, r61 5413 // ige r42, 0, R60 5414 // ior r41, r23, 0x7F800000 5415 // ige r40, r60, 0x000000FF 5416 // cmov_logical r40, r50, 0, r40 5417 // cmov_logical r20, r42, r23, r20 5418 // cmov_logical DST, r40, r41, r20 5419 // as_float(DST) 5420 5421 // mov r20, as_int(LHS) 5422 SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS); 5423 5424 // mov r21, as_int(RHS) 5425 SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS); 5426 5427 // and r30, r20, 0x7f800000 5428 SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5429 DAG.getConstant(0x7F800000, INTTY)); 5430 5431 // and r31, r21, 0x7f800000 5432 SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21, 5433 DAG.getConstant(0x7f800000, INTTY)); 5434 5435 // and r32, r20, 0x807FFFFF 5436 SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5437 DAG.getConstant(0x807FFFFF, INTTY)); 5438 5439 // and r33, r21, 0x807FFFFF 5440 SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21, 5441 DAG.getConstant(0x807FFFFF, INTTY)); 5442 5443 // ieq r40, r30, 0x7F800000 5444 SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5445 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5446 R30, DAG.getConstant(0x7F800000, INTTY)); 5447 5448 // ieq r41, r31, 0x7F800000 5449 SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5450 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5451 R31, DAG.getConstant(0x7F800000, INTTY)); 5452 5453 // ieq r42, r30, 0 5454 SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5455 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5456 R30, DAG.getConstant(0, INTTY)); 5457 5458 // ieq r43, r31, 0 5459 SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5460 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5461 R31, DAG.getConstant(0, INTTY)); 5462 5463 // and r50, r20, 0x80000000 5464 SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5465 DAG.getConstant(0x80000000, INTTY)); 5466 5467 // and r51, r21, 0x80000000 5468 SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21, 5469 DAG.getConstant(0x80000000, INTTY)); 5470 5471 // ior r32, r32, 0x3f800000 5472 R32 = DAG.getNode(ISD::OR, DL, INTTY, R32, 5473 DAG.getConstant(0x3F800000, INTTY)); 5474 5475 // ior r33, r33, 0x3f800000 5476 R33 = DAG.getNode(ISD::OR, DL, INTTY, R33, 5477 DAG.getConstant(0x3F800000, INTTY)); 5478 5479 // cmov_logical r32, r42, r50, r32 5480 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32); 5481 5482 // cmov_logical r33, r43, r51, r33 5483 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33); 5484 5485 // cmov_logical r32, r40, r20, r32 5486 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32); 5487 5488 // cmov_logical r33, r41, r21, r33 5489 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33); 5490 5491 // ior r50, r40, r41 5492 R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41); 5493 5494 // ior r51, r42, r43 5495 R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43); 5496 5497 // ior r50, r50, r51 5498 R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51); 5499 5500 // inegate r52, r31 5501 SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31); 5502 5503 // iadd r30, r30, r52 5504 R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52); 5505 5506 // cmov_logical r30, r50, 0, r30 5507 R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 5508 DAG.getConstant(0, INTTY), R30); 5509 5510 // div_zeroop(infinity) r21, 1.0, as_float(r33) 5511 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 5512 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 5513 DAG.getConstantFP(1.0f, OVT), R33); 5514 5515 // mul_ieee as_int(r20), as_float(r32), r21 5516 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 5517 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 5518 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 5519 5520 // div_zeroop(infinity) r21, 1.0, as_float(r33) 5521 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 5522 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 5523 DAG.getConstantFP(1.0f, OVT), R33); 5524 5525 // mul_ieee as_int(r20), as_float(r32), r21 5526 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 5527 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 5528 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 5529 5530 // and r22, r20, 0x7FFFFFFF 5531 SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5532 DAG.getConstant(0x7FFFFFFF, INTTY)); 5533 5534 // and r23, r20, 0x80000000 5535 SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5536 DAG.getConstant(0x80000000, INTTY)); 5537 5538 // ishr r60, r22, 0x00000017 5539 SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22, 5540 DAG.getConstant(0x00000017, INTTY)); 5541 5542 // ishr r61, r30, 0x00000017 5543 SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30, 5544 DAG.getConstant(0x00000017, INTTY)); 5545 5546 // iadd r20, r20, r30 5547 R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30); 5548 5549 // iadd r21, r22, r30 5550 R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30); 5551 5552 // iadd r60, r60, r61 5553 R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61); 5554 5555 // ige r42, 0, R60 5556 R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5557 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 5558 DAG.getConstant(0, INTTY), 5559 R60); 5560 5561 // ior r41, r23, 0x7F800000 5562 R41 = DAG.getNode(ISD::OR, DL, INTTY, R23, 5563 DAG.getConstant(0x7F800000, INTTY)); 5564 5565 // ige r40, r60, 0x000000FF 5566 R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5567 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 5568 R60, 5569 DAG.getConstant(0x0000000FF, INTTY)); 5570 5571 // cmov_logical r40, r50, 0, r40 5572 R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 5573 DAG.getConstant(0, INTTY), 5574 R40); 5575 5576 // cmov_logical r20, r42, r23, r20 5577 R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20); 5578 5579 // cmov_logical DST, r40, r41, r20 5580 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20); 5581 5582 // as_float(DST) 5583 DST = DAG.getNode(ISDBITCAST, DL, OVT, DST); 5584 } else { 5585 // The following sequence of DAG nodes produce the following IL: 5586 // fabs r1, RHS 5587 // lt r2, 0x1.0p+96f, r1 5588 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 5589 // mul_ieee r1, RHS, r3 5590 // div_zeroop(infinity) r0, LHS, r1 5591 // mul_ieee DST, r0, r3 5592 5593 // fabs r1, RHS 5594 SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS); 5595 // lt r2, 0x1.0p+96f, r1 5596 SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT, 5597 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32), 5598 DAG.getConstant(0x6f800000, INTTY), r1); 5599 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 5600 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2, 5601 DAG.getConstant(0x2f800000, INTTY), 5602 DAG.getConstant(0x3f800000, INTTY)); 5603 // mul_ieee r1, RHS, r3 5604 r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3); 5605 // div_zeroop(infinity) r0, LHS, r1 5606 SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1); 5607 // mul_ieee DST, r0, r3 5608 DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3); 5609 } 5610 return DST; 5611} 5612 5613SDValue 5614AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const 5615{ 5616 return SDValue(Op.getNode(), 0); 5617} 5618