AMDILISelLowering.cpp revision 04993c963008ded3a6ad5e5b4d69ba08d1948a93
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9// 10// This file implements the interfaces that AMDIL uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "AMDILISelLowering.h" 16#include "AMDILDevices.h" 17#include "AMDILIntrinsicInfo.h" 18#include "AMDILMachineFunctionInfo.h" 19#include "AMDILSubtarget.h" 20#include "AMDILTargetMachine.h" 21#include "AMDILUtilityFunctions.h" 22#include "llvm/CallingConv.h" 23#include "llvm/CodeGen/MachineFrameInfo.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25#include "llvm/CodeGen/PseudoSourceValue.h" 26#include "llvm/CodeGen/SelectionDAG.h" 27#include "llvm/CodeGen/SelectionDAGNodes.h" 28#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 29#include "llvm/DerivedTypes.h" 30#include "llvm/Instructions.h" 31#include "llvm/Intrinsics.h" 32#include "llvm/Support/raw_ostream.h" 33#include "llvm/Target/TargetOptions.h" 34 35using namespace llvm; 36#define ISDBITCAST ISD::BITCAST 37#define MVTGLUE MVT::Glue 38//===----------------------------------------------------------------------===// 39// Calling Convention Implementation 40//===----------------------------------------------------------------------===// 41#include "AMDILGenCallingConv.inc" 42 43//===----------------------------------------------------------------------===// 44// TargetLowering Implementation Help Functions Begin 45//===----------------------------------------------------------------------===// 46 static SDValue 47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) 48{ 49 DebugLoc DL = Src.getDebugLoc(); 50 EVT svt = Src.getValueType().getScalarType(); 51 EVT dvt = Dst.getValueType().getScalarType(); 52 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { 53 if (dvt.bitsGT(svt)) { 54 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); 55 } else if (svt.bitsLT(svt)) { 56 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, 57 DAG.getConstant(1, MVT::i32)); 58 } 59 } else if (svt.isInteger() && dvt.isInteger()) { 60 if (!svt.bitsEq(dvt)) { 61 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 62 } else { 63 Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src); 64 } 65 } else if (svt.isInteger()) { 66 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; 67 if (!svt.bitsEq(dvt)) { 68 if (dvt.getSimpleVT().SimpleTy == MVT::f32) { 69 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); 70 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { 71 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); 72 } else { 73 assert(0 && "We only support 32 and 64bit fp types"); 74 } 75 } 76 Src = DAG.getNode(opcode, DL, dvt, Src); 77 } else if (dvt.isInteger()) { 78 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; 79 if (svt.getSimpleVT().SimpleTy == MVT::f32) { 80 Src = DAG.getNode(opcode, DL, MVT::i32, Src); 81 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { 82 Src = DAG.getNode(opcode, DL, MVT::i64, Src); 83 } else { 84 assert(0 && "We only support 32 and 64bit fp types"); 85 } 86 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 87 } 88 return Src; 89} 90// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC 91// condition. 92 static AMDILCC::CondCodes 93CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) 94{ 95 switch (CC) { 96 default: 97 { 98 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; 99 assert(0 && "Unknown condition code!"); 100 } 101 case ISD::SETO: 102 switch(type) { 103 case MVT::f32: 104 return AMDILCC::IL_CC_F_O; 105 case MVT::f64: 106 return AMDILCC::IL_CC_D_O; 107 default: 108 assert(0 && "Opcode combination not generated correctly!"); 109 return AMDILCC::COND_ERROR; 110 }; 111 case ISD::SETUO: 112 switch(type) { 113 case MVT::f32: 114 return AMDILCC::IL_CC_F_UO; 115 case MVT::f64: 116 return AMDILCC::IL_CC_D_UO; 117 default: 118 assert(0 && "Opcode combination not generated correctly!"); 119 return AMDILCC::COND_ERROR; 120 }; 121 case ISD::SETGT: 122 switch (type) { 123 case MVT::i1: 124 case MVT::i8: 125 case MVT::i16: 126 case MVT::i32: 127 return AMDILCC::IL_CC_I_GT; 128 case MVT::f32: 129 return AMDILCC::IL_CC_F_GT; 130 case MVT::f64: 131 return AMDILCC::IL_CC_D_GT; 132 case MVT::i64: 133 return AMDILCC::IL_CC_L_GT; 134 default: 135 assert(0 && "Opcode combination not generated correctly!"); 136 return AMDILCC::COND_ERROR; 137 }; 138 case ISD::SETGE: 139 switch (type) { 140 case MVT::i1: 141 case MVT::i8: 142 case MVT::i16: 143 case MVT::i32: 144 return AMDILCC::IL_CC_I_GE; 145 case MVT::f32: 146 return AMDILCC::IL_CC_F_GE; 147 case MVT::f64: 148 return AMDILCC::IL_CC_D_GE; 149 case MVT::i64: 150 return AMDILCC::IL_CC_L_GE; 151 default: 152 assert(0 && "Opcode combination not generated correctly!"); 153 return AMDILCC::COND_ERROR; 154 }; 155 case ISD::SETLT: 156 switch (type) { 157 case MVT::i1: 158 case MVT::i8: 159 case MVT::i16: 160 case MVT::i32: 161 return AMDILCC::IL_CC_I_LT; 162 case MVT::f32: 163 return AMDILCC::IL_CC_F_LT; 164 case MVT::f64: 165 return AMDILCC::IL_CC_D_LT; 166 case MVT::i64: 167 return AMDILCC::IL_CC_L_LT; 168 default: 169 assert(0 && "Opcode combination not generated correctly!"); 170 return AMDILCC::COND_ERROR; 171 }; 172 case ISD::SETLE: 173 switch (type) { 174 case MVT::i1: 175 case MVT::i8: 176 case MVT::i16: 177 case MVT::i32: 178 return AMDILCC::IL_CC_I_LE; 179 case MVT::f32: 180 return AMDILCC::IL_CC_F_LE; 181 case MVT::f64: 182 return AMDILCC::IL_CC_D_LE; 183 case MVT::i64: 184 return AMDILCC::IL_CC_L_LE; 185 default: 186 assert(0 && "Opcode combination not generated correctly!"); 187 return AMDILCC::COND_ERROR; 188 }; 189 case ISD::SETNE: 190 switch (type) { 191 case MVT::i1: 192 case MVT::i8: 193 case MVT::i16: 194 case MVT::i32: 195 return AMDILCC::IL_CC_I_NE; 196 case MVT::f32: 197 return AMDILCC::IL_CC_F_NE; 198 case MVT::f64: 199 return AMDILCC::IL_CC_D_NE; 200 case MVT::i64: 201 return AMDILCC::IL_CC_L_NE; 202 default: 203 assert(0 && "Opcode combination not generated correctly!"); 204 return AMDILCC::COND_ERROR; 205 }; 206 case ISD::SETEQ: 207 switch (type) { 208 case MVT::i1: 209 case MVT::i8: 210 case MVT::i16: 211 case MVT::i32: 212 return AMDILCC::IL_CC_I_EQ; 213 case MVT::f32: 214 return AMDILCC::IL_CC_F_EQ; 215 case MVT::f64: 216 return AMDILCC::IL_CC_D_EQ; 217 case MVT::i64: 218 return AMDILCC::IL_CC_L_EQ; 219 default: 220 assert(0 && "Opcode combination not generated correctly!"); 221 return AMDILCC::COND_ERROR; 222 }; 223 case ISD::SETUGT: 224 switch (type) { 225 case MVT::i1: 226 case MVT::i8: 227 case MVT::i16: 228 case MVT::i32: 229 return AMDILCC::IL_CC_U_GT; 230 case MVT::f32: 231 return AMDILCC::IL_CC_F_UGT; 232 case MVT::f64: 233 return AMDILCC::IL_CC_D_UGT; 234 case MVT::i64: 235 return AMDILCC::IL_CC_UL_GT; 236 default: 237 assert(0 && "Opcode combination not generated correctly!"); 238 return AMDILCC::COND_ERROR; 239 }; 240 case ISD::SETUGE: 241 switch (type) { 242 case MVT::i1: 243 case MVT::i8: 244 case MVT::i16: 245 case MVT::i32: 246 return AMDILCC::IL_CC_U_GE; 247 case MVT::f32: 248 return AMDILCC::IL_CC_F_UGE; 249 case MVT::f64: 250 return AMDILCC::IL_CC_D_UGE; 251 case MVT::i64: 252 return AMDILCC::IL_CC_UL_GE; 253 default: 254 assert(0 && "Opcode combination not generated correctly!"); 255 return AMDILCC::COND_ERROR; 256 }; 257 case ISD::SETULT: 258 switch (type) { 259 case MVT::i1: 260 case MVT::i8: 261 case MVT::i16: 262 case MVT::i32: 263 return AMDILCC::IL_CC_U_LT; 264 case MVT::f32: 265 return AMDILCC::IL_CC_F_ULT; 266 case MVT::f64: 267 return AMDILCC::IL_CC_D_ULT; 268 case MVT::i64: 269 return AMDILCC::IL_CC_UL_LT; 270 default: 271 assert(0 && "Opcode combination not generated correctly!"); 272 return AMDILCC::COND_ERROR; 273 }; 274 case ISD::SETULE: 275 switch (type) { 276 case MVT::i1: 277 case MVT::i8: 278 case MVT::i16: 279 case MVT::i32: 280 return AMDILCC::IL_CC_U_LE; 281 case MVT::f32: 282 return AMDILCC::IL_CC_F_ULE; 283 case MVT::f64: 284 return AMDILCC::IL_CC_D_ULE; 285 case MVT::i64: 286 return AMDILCC::IL_CC_UL_LE; 287 default: 288 assert(0 && "Opcode combination not generated correctly!"); 289 return AMDILCC::COND_ERROR; 290 }; 291 case ISD::SETUNE: 292 switch (type) { 293 case MVT::i1: 294 case MVT::i8: 295 case MVT::i16: 296 case MVT::i32: 297 return AMDILCC::IL_CC_U_NE; 298 case MVT::f32: 299 return AMDILCC::IL_CC_F_UNE; 300 case MVT::f64: 301 return AMDILCC::IL_CC_D_UNE; 302 case MVT::i64: 303 return AMDILCC::IL_CC_UL_NE; 304 default: 305 assert(0 && "Opcode combination not generated correctly!"); 306 return AMDILCC::COND_ERROR; 307 }; 308 case ISD::SETUEQ: 309 switch (type) { 310 case MVT::i1: 311 case MVT::i8: 312 case MVT::i16: 313 case MVT::i32: 314 return AMDILCC::IL_CC_U_EQ; 315 case MVT::f32: 316 return AMDILCC::IL_CC_F_UEQ; 317 case MVT::f64: 318 return AMDILCC::IL_CC_D_UEQ; 319 case MVT::i64: 320 return AMDILCC::IL_CC_UL_EQ; 321 default: 322 assert(0 && "Opcode combination not generated correctly!"); 323 return AMDILCC::COND_ERROR; 324 }; 325 case ISD::SETOGT: 326 switch (type) { 327 case MVT::f32: 328 return AMDILCC::IL_CC_F_OGT; 329 case MVT::f64: 330 return AMDILCC::IL_CC_D_OGT; 331 case MVT::i1: 332 case MVT::i8: 333 case MVT::i16: 334 case MVT::i32: 335 case MVT::i64: 336 default: 337 assert(0 && "Opcode combination not generated correctly!"); 338 return AMDILCC::COND_ERROR; 339 }; 340 case ISD::SETOGE: 341 switch (type) { 342 case MVT::f32: 343 return AMDILCC::IL_CC_F_OGE; 344 case MVT::f64: 345 return AMDILCC::IL_CC_D_OGE; 346 case MVT::i1: 347 case MVT::i8: 348 case MVT::i16: 349 case MVT::i32: 350 case MVT::i64: 351 default: 352 assert(0 && "Opcode combination not generated correctly!"); 353 return AMDILCC::COND_ERROR; 354 }; 355 case ISD::SETOLT: 356 switch (type) { 357 case MVT::f32: 358 return AMDILCC::IL_CC_F_OLT; 359 case MVT::f64: 360 return AMDILCC::IL_CC_D_OLT; 361 case MVT::i1: 362 case MVT::i8: 363 case MVT::i16: 364 case MVT::i32: 365 case MVT::i64: 366 default: 367 assert(0 && "Opcode combination not generated correctly!"); 368 return AMDILCC::COND_ERROR; 369 }; 370 case ISD::SETOLE: 371 switch (type) { 372 case MVT::f32: 373 return AMDILCC::IL_CC_F_OLE; 374 case MVT::f64: 375 return AMDILCC::IL_CC_D_OLE; 376 case MVT::i1: 377 case MVT::i8: 378 case MVT::i16: 379 case MVT::i32: 380 case MVT::i64: 381 default: 382 assert(0 && "Opcode combination not generated correctly!"); 383 return AMDILCC::COND_ERROR; 384 }; 385 case ISD::SETONE: 386 switch (type) { 387 case MVT::f32: 388 return AMDILCC::IL_CC_F_ONE; 389 case MVT::f64: 390 return AMDILCC::IL_CC_D_ONE; 391 case MVT::i1: 392 case MVT::i8: 393 case MVT::i16: 394 case MVT::i32: 395 case MVT::i64: 396 default: 397 assert(0 && "Opcode combination not generated correctly!"); 398 return AMDILCC::COND_ERROR; 399 }; 400 case ISD::SETOEQ: 401 switch (type) { 402 case MVT::f32: 403 return AMDILCC::IL_CC_F_OEQ; 404 case MVT::f64: 405 return AMDILCC::IL_CC_D_OEQ; 406 case MVT::i1: 407 case MVT::i8: 408 case MVT::i16: 409 case MVT::i32: 410 case MVT::i64: 411 default: 412 assert(0 && "Opcode combination not generated correctly!"); 413 return AMDILCC::COND_ERROR; 414 }; 415 }; 416} 417 418 static unsigned int 419translateToOpcode(uint64_t CCCode, unsigned int regClass) 420{ 421 switch (CCCode) { 422 case AMDILCC::IL_CC_D_EQ: 423 case AMDILCC::IL_CC_D_OEQ: 424 if (regClass == AMDIL::GPRV2F64RegClassID) { 425 return (unsigned int)AMDIL::DEQ_v2f64; 426 } else { 427 return (unsigned int)AMDIL::DEQ; 428 } 429 case AMDILCC::IL_CC_D_LE: 430 case AMDILCC::IL_CC_D_OLE: 431 case AMDILCC::IL_CC_D_ULE: 432 case AMDILCC::IL_CC_D_GE: 433 case AMDILCC::IL_CC_D_OGE: 434 case AMDILCC::IL_CC_D_UGE: 435 return (unsigned int)AMDIL::DGE; 436 case AMDILCC::IL_CC_D_LT: 437 case AMDILCC::IL_CC_D_OLT: 438 case AMDILCC::IL_CC_D_ULT: 439 case AMDILCC::IL_CC_D_GT: 440 case AMDILCC::IL_CC_D_OGT: 441 case AMDILCC::IL_CC_D_UGT: 442 return (unsigned int)AMDIL::DLT; 443 case AMDILCC::IL_CC_D_NE: 444 case AMDILCC::IL_CC_D_UNE: 445 return (unsigned int)AMDIL::DNE; 446 case AMDILCC::IL_CC_F_EQ: 447 case AMDILCC::IL_CC_F_OEQ: 448 return (unsigned int)AMDIL::FEQ; 449 case AMDILCC::IL_CC_F_LE: 450 case AMDILCC::IL_CC_F_ULE: 451 case AMDILCC::IL_CC_F_OLE: 452 case AMDILCC::IL_CC_F_GE: 453 case AMDILCC::IL_CC_F_UGE: 454 case AMDILCC::IL_CC_F_OGE: 455 return (unsigned int)AMDIL::FGE; 456 case AMDILCC::IL_CC_F_LT: 457 case AMDILCC::IL_CC_F_OLT: 458 case AMDILCC::IL_CC_F_ULT: 459 case AMDILCC::IL_CC_F_GT: 460 case AMDILCC::IL_CC_F_OGT: 461 case AMDILCC::IL_CC_F_UGT: 462 if (regClass == AMDIL::GPRV2F32RegClassID) { 463 return (unsigned int)AMDIL::FLT_v2f32; 464 } else if (regClass == AMDIL::GPRV4F32RegClassID) { 465 return (unsigned int)AMDIL::FLT_v4f32; 466 } else { 467 return (unsigned int)AMDIL::FLT; 468 } 469 case AMDILCC::IL_CC_F_NE: 470 case AMDILCC::IL_CC_F_UNE: 471 return (unsigned int)AMDIL::FNE; 472 case AMDILCC::IL_CC_I_EQ: 473 case AMDILCC::IL_CC_U_EQ: 474 if (regClass == AMDIL::GPRI32RegClassID 475 || regClass == AMDIL::GPRI8RegClassID 476 || regClass == AMDIL::GPRI16RegClassID) { 477 return (unsigned int)AMDIL::IEQ; 478 } else if (regClass == AMDIL::GPRV2I32RegClassID 479 || regClass == AMDIL::GPRV2I8RegClassID 480 || regClass == AMDIL::GPRV2I16RegClassID) { 481 return (unsigned int)AMDIL::IEQ_v2i32; 482 } else if (regClass == AMDIL::GPRV4I32RegClassID 483 || regClass == AMDIL::GPRV4I8RegClassID 484 || regClass == AMDIL::GPRV4I16RegClassID) { 485 return (unsigned int)AMDIL::IEQ_v4i32; 486 } else { 487 assert(!"Unknown reg class!"); 488 } 489 case AMDILCC::IL_CC_L_EQ: 490 case AMDILCC::IL_CC_UL_EQ: 491 return (unsigned int)AMDIL::LEQ; 492 case AMDILCC::IL_CC_I_GE: 493 case AMDILCC::IL_CC_I_LE: 494 if (regClass == AMDIL::GPRI32RegClassID 495 || regClass == AMDIL::GPRI8RegClassID 496 || regClass == AMDIL::GPRI16RegClassID) { 497 return (unsigned int)AMDIL::IGE; 498 } else if (regClass == AMDIL::GPRV2I32RegClassID 499 || regClass == AMDIL::GPRI8RegClassID 500 || regClass == AMDIL::GPRI16RegClassID) { 501 return (unsigned int)AMDIL::IGE_v2i32; 502 } else if (regClass == AMDIL::GPRV4I32RegClassID 503 || regClass == AMDIL::GPRI8RegClassID 504 || regClass == AMDIL::GPRI16RegClassID) { 505 return (unsigned int)AMDIL::IGE_v4i32; 506 } else { 507 assert(!"Unknown reg class!"); 508 } 509 case AMDILCC::IL_CC_I_LT: 510 case AMDILCC::IL_CC_I_GT: 511 if (regClass == AMDIL::GPRI32RegClassID 512 || regClass == AMDIL::GPRI8RegClassID 513 || regClass == AMDIL::GPRI16RegClassID) { 514 return (unsigned int)AMDIL::ILT; 515 } else if (regClass == AMDIL::GPRV2I32RegClassID 516 || regClass == AMDIL::GPRI8RegClassID 517 || regClass == AMDIL::GPRI16RegClassID) { 518 return (unsigned int)AMDIL::ILT_v2i32; 519 } else if (regClass == AMDIL::GPRV4I32RegClassID 520 || regClass == AMDIL::GPRI8RegClassID 521 || regClass == AMDIL::GPRI16RegClassID) { 522 return (unsigned int)AMDIL::ILT_v4i32; 523 } else { 524 assert(!"Unknown reg class!"); 525 } 526 case AMDILCC::IL_CC_L_GE: 527 return (unsigned int)AMDIL::LGE; 528 case AMDILCC::IL_CC_L_LE: 529 return (unsigned int)AMDIL::LLE; 530 case AMDILCC::IL_CC_L_LT: 531 return (unsigned int)AMDIL::LLT; 532 case AMDILCC::IL_CC_L_GT: 533 return (unsigned int)AMDIL::LGT; 534 case AMDILCC::IL_CC_I_NE: 535 case AMDILCC::IL_CC_U_NE: 536 if (regClass == AMDIL::GPRI32RegClassID 537 || regClass == AMDIL::GPRI8RegClassID 538 || regClass == AMDIL::GPRI16RegClassID) { 539 return (unsigned int)AMDIL::INE; 540 } else if (regClass == AMDIL::GPRV2I32RegClassID 541 || regClass == AMDIL::GPRI8RegClassID 542 || regClass == AMDIL::GPRI16RegClassID) { 543 return (unsigned int)AMDIL::INE_v2i32; 544 } else if (regClass == AMDIL::GPRV4I32RegClassID 545 || regClass == AMDIL::GPRI8RegClassID 546 || regClass == AMDIL::GPRI16RegClassID) { 547 return (unsigned int)AMDIL::INE_v4i32; 548 } else { 549 assert(!"Unknown reg class!"); 550 } 551 case AMDILCC::IL_CC_U_GE: 552 case AMDILCC::IL_CC_U_LE: 553 if (regClass == AMDIL::GPRI32RegClassID 554 || regClass == AMDIL::GPRI8RegClassID 555 || regClass == AMDIL::GPRI16RegClassID) { 556 return (unsigned int)AMDIL::UGE; 557 } else if (regClass == AMDIL::GPRV2I32RegClassID 558 || regClass == AMDIL::GPRI8RegClassID 559 || regClass == AMDIL::GPRI16RegClassID) { 560 return (unsigned int)AMDIL::UGE_v2i32; 561 } else if (regClass == AMDIL::GPRV4I32RegClassID 562 || regClass == AMDIL::GPRI8RegClassID 563 || regClass == AMDIL::GPRI16RegClassID) { 564 return (unsigned int)AMDIL::UGE_v4i32; 565 } else { 566 assert(!"Unknown reg class!"); 567 } 568 case AMDILCC::IL_CC_L_NE: 569 case AMDILCC::IL_CC_UL_NE: 570 return (unsigned int)AMDIL::LNE; 571 case AMDILCC::IL_CC_UL_GE: 572 return (unsigned int)AMDIL::ULGE; 573 case AMDILCC::IL_CC_UL_LE: 574 return (unsigned int)AMDIL::ULLE; 575 case AMDILCC::IL_CC_U_LT: 576 if (regClass == AMDIL::GPRI32RegClassID 577 || regClass == AMDIL::GPRI8RegClassID 578 || regClass == AMDIL::GPRI16RegClassID) { 579 return (unsigned int)AMDIL::ULT; 580 } else if (regClass == AMDIL::GPRV2I32RegClassID 581 || regClass == AMDIL::GPRI8RegClassID 582 || regClass == AMDIL::GPRI16RegClassID) { 583 return (unsigned int)AMDIL::ULT_v2i32; 584 } else if (regClass == AMDIL::GPRV4I32RegClassID 585 || regClass == AMDIL::GPRI8RegClassID 586 || regClass == AMDIL::GPRI16RegClassID) { 587 return (unsigned int)AMDIL::ULT_v4i32; 588 } else { 589 assert(!"Unknown reg class!"); 590 } 591 case AMDILCC::IL_CC_U_GT: 592 if (regClass == AMDIL::GPRI32RegClassID 593 || regClass == AMDIL::GPRI8RegClassID 594 || regClass == AMDIL::GPRI16RegClassID) { 595 return (unsigned int)AMDIL::UGT; 596 } else if (regClass == AMDIL::GPRV2I32RegClassID 597 || regClass == AMDIL::GPRI8RegClassID 598 || regClass == AMDIL::GPRI16RegClassID) { 599 return (unsigned int)AMDIL::UGT_v2i32; 600 } else if (regClass == AMDIL::GPRV4I32RegClassID 601 || regClass == AMDIL::GPRI8RegClassID 602 || regClass == AMDIL::GPRI16RegClassID) { 603 return (unsigned int)AMDIL::UGT_v4i32; 604 } else { 605 assert(!"Unknown reg class!"); 606 } 607 case AMDILCC::IL_CC_UL_LT: 608 return (unsigned int)AMDIL::ULLT; 609 case AMDILCC::IL_CC_UL_GT: 610 return (unsigned int)AMDIL::ULGT; 611 case AMDILCC::IL_CC_F_UEQ: 612 case AMDILCC::IL_CC_D_UEQ: 613 case AMDILCC::IL_CC_F_ONE: 614 case AMDILCC::IL_CC_D_ONE: 615 case AMDILCC::IL_CC_F_O: 616 case AMDILCC::IL_CC_F_UO: 617 case AMDILCC::IL_CC_D_O: 618 case AMDILCC::IL_CC_D_UO: 619 // we don't care 620 return 0; 621 622 } 623 errs()<<"Opcode: "<<CCCode<<"\n"; 624 assert(0 && "Unknown opcode retrieved"); 625 return 0; 626} 627SDValue 628AMDILTargetLowering::LowerMemArgument( 629 SDValue Chain, 630 CallingConv::ID CallConv, 631 const SmallVectorImpl<ISD::InputArg> &Ins, 632 DebugLoc dl, SelectionDAG &DAG, 633 const CCValAssign &VA, 634 MachineFrameInfo *MFI, 635 unsigned i) const 636{ 637 // Create the nodes corresponding to a load from this parameter slot. 638 ISD::ArgFlagsTy Flags = Ins[i].Flags; 639 640 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && 641 getTargetMachine().Options.GuaranteedTailCallOpt; 642 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 643 644 // FIXME: For now, all byval parameter objects are marked mutable. This can 645 // be changed with more analysis. 646 // In case of tail call optimization mark all arguments mutable. Since they 647 // could be overwritten by lowering of arguments in case of a tail call. 648 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 649 VA.getLocMemOffset(), isImmutable); 650 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 651 652 if (Flags.isByVal()) 653 return FIN; 654 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 655 MachinePointerInfo::getFixedStack(FI), 656 false, false, false, 0); 657} 658//===----------------------------------------------------------------------===// 659// TargetLowering Implementation Help Functions End 660//===----------------------------------------------------------------------===// 661//===----------------------------------------------------------------------===// 662// Instruction generation functions 663//===----------------------------------------------------------------------===// 664uint32_t 665AMDILTargetLowering::addExtensionInstructions( 666 uint32_t reg, bool signedShift, 667 unsigned int simpleVT) const 668{ 669 int shiftSize = 0; 670 uint32_t LShift, RShift; 671 switch(simpleVT) 672 { 673 default: 674 return reg; 675 case AMDIL::GPRI8RegClassID: 676 shiftSize = 24; 677 LShift = AMDIL::SHL_i8; 678 if (signedShift) { 679 RShift = AMDIL::SHR_i8; 680 } else { 681 RShift = AMDIL::USHR_i8; 682 } 683 break; 684 case AMDIL::GPRV2I8RegClassID: 685 shiftSize = 24; 686 LShift = AMDIL::SHL_v2i8; 687 if (signedShift) { 688 RShift = AMDIL::SHR_v2i8; 689 } else { 690 RShift = AMDIL::USHR_v2i8; 691 } 692 break; 693 case AMDIL::GPRV4I8RegClassID: 694 shiftSize = 24; 695 LShift = AMDIL::SHL_v4i8; 696 if (signedShift) { 697 RShift = AMDIL::SHR_v4i8; 698 } else { 699 RShift = AMDIL::USHR_v4i8; 700 } 701 break; 702 case AMDIL::GPRI16RegClassID: 703 shiftSize = 16; 704 LShift = AMDIL::SHL_i16; 705 if (signedShift) { 706 RShift = AMDIL::SHR_i16; 707 } else { 708 RShift = AMDIL::USHR_i16; 709 } 710 break; 711 case AMDIL::GPRV2I16RegClassID: 712 shiftSize = 16; 713 LShift = AMDIL::SHL_v2i16; 714 if (signedShift) { 715 RShift = AMDIL::SHR_v2i16; 716 } else { 717 RShift = AMDIL::USHR_v2i16; 718 } 719 break; 720 case AMDIL::GPRV4I16RegClassID: 721 shiftSize = 16; 722 LShift = AMDIL::SHL_v4i16; 723 if (signedShift) { 724 RShift = AMDIL::SHR_v4i16; 725 } else { 726 RShift = AMDIL::USHR_v4i16; 727 } 728 break; 729 }; 730 uint32_t LoadReg = genVReg(simpleVT); 731 uint32_t tmp1 = genVReg(simpleVT); 732 uint32_t tmp2 = genVReg(simpleVT); 733 generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize); 734 generateMachineInst(LShift, tmp1, reg, LoadReg); 735 generateMachineInst(RShift, tmp2, tmp1, LoadReg); 736 return tmp2; 737} 738 739MachineOperand 740AMDILTargetLowering::convertToReg(MachineOperand op) const 741{ 742 if (op.isReg()) { 743 return op; 744 } else if (op.isImm()) { 745 uint32_t loadReg 746 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 747 generateMachineInst(AMDIL::LOADCONST_i32, loadReg) 748 .addImm(op.getImm()); 749 op.ChangeToRegister(loadReg, false); 750 } else if (op.isFPImm()) { 751 uint32_t loadReg 752 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 753 generateMachineInst(AMDIL::LOADCONST_f32, loadReg) 754 .addFPImm(op.getFPImm()); 755 op.ChangeToRegister(loadReg, false); 756 } else if (op.isMBB()) { 757 op.ChangeToRegister(0, false); 758 } else if (op.isFI()) { 759 op.ChangeToRegister(0, false); 760 } else if (op.isCPI()) { 761 op.ChangeToRegister(0, false); 762 } else if (op.isJTI()) { 763 op.ChangeToRegister(0, false); 764 } else if (op.isGlobal()) { 765 op.ChangeToRegister(0, false); 766 } else if (op.isSymbol()) { 767 op.ChangeToRegister(0, false); 768 }/* else if (op.isMetadata()) { 769 op.ChangeToRegister(0, false); 770 }*/ 771 return op; 772} 773 774void 775AMDILTargetLowering::generateCMPInstr( 776 MachineInstr *MI, 777 MachineBasicBlock *BB, 778 const TargetInstrInfo& TII) 779const 780{ 781 MachineOperand DST = MI->getOperand(0); 782 MachineOperand CC = MI->getOperand(1); 783 MachineOperand LHS = MI->getOperand(2); 784 MachineOperand RHS = MI->getOperand(3); 785 int64_t ccCode = CC.getImm(); 786 unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass; 787 unsigned int opCode = translateToOpcode(ccCode, simpleVT); 788 DebugLoc DL = MI->getDebugLoc(); 789 MachineBasicBlock::iterator BBI = MI; 790 setPrivateData(BB, BBI, &DL, &TII); 791 if (!LHS.isReg()) { 792 LHS = convertToReg(LHS); 793 } 794 if (!RHS.isReg()) { 795 RHS = convertToReg(RHS); 796 } 797 switch (ccCode) { 798 case AMDILCC::IL_CC_I_EQ: 799 case AMDILCC::IL_CC_I_NE: 800 case AMDILCC::IL_CC_I_GE: 801 case AMDILCC::IL_CC_I_LT: 802 { 803 uint32_t lhsreg = addExtensionInstructions( 804 LHS.getReg(), true, simpleVT); 805 uint32_t rhsreg = addExtensionInstructions( 806 RHS.getReg(), true, simpleVT); 807 generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg); 808 } 809 break; 810 case AMDILCC::IL_CC_U_EQ: 811 case AMDILCC::IL_CC_U_NE: 812 case AMDILCC::IL_CC_U_GE: 813 case AMDILCC::IL_CC_U_LT: 814 case AMDILCC::IL_CC_D_EQ: 815 case AMDILCC::IL_CC_F_EQ: 816 case AMDILCC::IL_CC_F_OEQ: 817 case AMDILCC::IL_CC_D_OEQ: 818 case AMDILCC::IL_CC_D_NE: 819 case AMDILCC::IL_CC_F_NE: 820 case AMDILCC::IL_CC_F_UNE: 821 case AMDILCC::IL_CC_D_UNE: 822 case AMDILCC::IL_CC_D_GE: 823 case AMDILCC::IL_CC_F_GE: 824 case AMDILCC::IL_CC_D_OGE: 825 case AMDILCC::IL_CC_F_OGE: 826 case AMDILCC::IL_CC_D_LT: 827 case AMDILCC::IL_CC_F_LT: 828 case AMDILCC::IL_CC_F_OLT: 829 case AMDILCC::IL_CC_D_OLT: 830 generateMachineInst(opCode, DST.getReg(), 831 LHS.getReg(), RHS.getReg()); 832 break; 833 case AMDILCC::IL_CC_I_GT: 834 case AMDILCC::IL_CC_I_LE: 835 { 836 uint32_t lhsreg = addExtensionInstructions( 837 LHS.getReg(), true, simpleVT); 838 uint32_t rhsreg = addExtensionInstructions( 839 RHS.getReg(), true, simpleVT); 840 generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg); 841 } 842 break; 843 case AMDILCC::IL_CC_U_GT: 844 case AMDILCC::IL_CC_U_LE: 845 case AMDILCC::IL_CC_F_GT: 846 case AMDILCC::IL_CC_D_GT: 847 case AMDILCC::IL_CC_F_OGT: 848 case AMDILCC::IL_CC_D_OGT: 849 case AMDILCC::IL_CC_F_LE: 850 case AMDILCC::IL_CC_D_LE: 851 case AMDILCC::IL_CC_D_OLE: 852 case AMDILCC::IL_CC_F_OLE: 853 generateMachineInst(opCode, DST.getReg(), 854 RHS.getReg(), LHS.getReg()); 855 break; 856 case AMDILCC::IL_CC_F_UGT: 857 case AMDILCC::IL_CC_F_ULE: 858 { 859 uint32_t VReg[4] = { 860 genVReg(simpleVT), genVReg(simpleVT), 861 genVReg(simpleVT), genVReg(simpleVT) 862 }; 863 generateMachineInst(opCode, VReg[0], 864 RHS.getReg(), LHS.getReg()); 865 generateMachineInst(AMDIL::FNE, VReg[1], 866 RHS.getReg(), RHS.getReg()); 867 generateMachineInst(AMDIL::FNE, VReg[2], 868 LHS.getReg(), LHS.getReg()); 869 generateMachineInst(AMDIL::BINARY_OR_f32, 870 VReg[3], VReg[0], VReg[1]); 871 generateMachineInst(AMDIL::BINARY_OR_f32, 872 DST.getReg(), VReg[2], VReg[3]); 873 } 874 break; 875 case AMDILCC::IL_CC_F_ULT: 876 case AMDILCC::IL_CC_F_UGE: 877 { 878 uint32_t VReg[4] = { 879 genVReg(simpleVT), genVReg(simpleVT), 880 genVReg(simpleVT), genVReg(simpleVT) 881 }; 882 generateMachineInst(opCode, VReg[0], 883 LHS.getReg(), RHS.getReg()); 884 generateMachineInst(AMDIL::FNE, VReg[1], 885 RHS.getReg(), RHS.getReg()); 886 generateMachineInst(AMDIL::FNE, VReg[2], 887 LHS.getReg(), LHS.getReg()); 888 generateMachineInst(AMDIL::BINARY_OR_f32, 889 VReg[3], VReg[0], VReg[1]); 890 generateMachineInst(AMDIL::BINARY_OR_f32, 891 DST.getReg(), VReg[2], VReg[3]); 892 } 893 break; 894 case AMDILCC::IL_CC_D_UGT: 895 case AMDILCC::IL_CC_D_ULE: 896 { 897 uint32_t regID = AMDIL::GPRF64RegClassID; 898 uint32_t VReg[4] = { 899 genVReg(regID), genVReg(regID), 900 genVReg(regID), genVReg(regID) 901 }; 902 // The result of a double comparison is a 32bit result 903 generateMachineInst(opCode, VReg[0], 904 RHS.getReg(), LHS.getReg()); 905 generateMachineInst(AMDIL::DNE, VReg[1], 906 RHS.getReg(), RHS.getReg()); 907 generateMachineInst(AMDIL::DNE, VReg[2], 908 LHS.getReg(), LHS.getReg()); 909 generateMachineInst(AMDIL::BINARY_OR_f32, 910 VReg[3], VReg[0], VReg[1]); 911 generateMachineInst(AMDIL::BINARY_OR_f32, 912 DST.getReg(), VReg[2], VReg[3]); 913 } 914 break; 915 case AMDILCC::IL_CC_D_UGE: 916 case AMDILCC::IL_CC_D_ULT: 917 { 918 uint32_t regID = AMDIL::GPRF64RegClassID; 919 uint32_t VReg[4] = { 920 genVReg(regID), genVReg(regID), 921 genVReg(regID), genVReg(regID) 922 }; 923 // The result of a double comparison is a 32bit result 924 generateMachineInst(opCode, VReg[0], 925 LHS.getReg(), RHS.getReg()); 926 generateMachineInst(AMDIL::DNE, VReg[1], 927 RHS.getReg(), RHS.getReg()); 928 generateMachineInst(AMDIL::DNE, VReg[2], 929 LHS.getReg(), LHS.getReg()); 930 generateMachineInst(AMDIL::BINARY_OR_f32, 931 VReg[3], VReg[0], VReg[1]); 932 generateMachineInst(AMDIL::BINARY_OR_f32, 933 DST.getReg(), VReg[2], VReg[3]); 934 } 935 break; 936 case AMDILCC::IL_CC_F_UEQ: 937 { 938 uint32_t VReg[4] = { 939 genVReg(simpleVT), genVReg(simpleVT), 940 genVReg(simpleVT), genVReg(simpleVT) 941 }; 942 generateMachineInst(AMDIL::FEQ, VReg[0], 943 LHS.getReg(), RHS.getReg()); 944 generateMachineInst(AMDIL::FNE, VReg[1], 945 LHS.getReg(), LHS.getReg()); 946 generateMachineInst(AMDIL::FNE, VReg[2], 947 RHS.getReg(), RHS.getReg()); 948 generateMachineInst(AMDIL::BINARY_OR_f32, 949 VReg[3], VReg[0], VReg[1]); 950 generateMachineInst(AMDIL::BINARY_OR_f32, 951 DST.getReg(), VReg[2], VReg[3]); 952 } 953 break; 954 case AMDILCC::IL_CC_F_ONE: 955 { 956 uint32_t VReg[4] = { 957 genVReg(simpleVT), genVReg(simpleVT), 958 genVReg(simpleVT), genVReg(simpleVT) 959 }; 960 generateMachineInst(AMDIL::FNE, VReg[0], 961 LHS.getReg(), RHS.getReg()); 962 generateMachineInst(AMDIL::FEQ, VReg[1], 963 LHS.getReg(), LHS.getReg()); 964 generateMachineInst(AMDIL::FEQ, VReg[2], 965 RHS.getReg(), RHS.getReg()); 966 generateMachineInst(AMDIL::BINARY_AND_f32, 967 VReg[3], VReg[0], VReg[1]); 968 generateMachineInst(AMDIL::BINARY_AND_f32, 969 DST.getReg(), VReg[2], VReg[3]); 970 } 971 break; 972 case AMDILCC::IL_CC_D_UEQ: 973 { 974 uint32_t regID = AMDIL::GPRF64RegClassID; 975 uint32_t VReg[4] = { 976 genVReg(regID), genVReg(regID), 977 genVReg(regID), genVReg(regID) 978 }; 979 // The result of a double comparison is a 32bit result 980 generateMachineInst(AMDIL::DEQ, VReg[0], 981 LHS.getReg(), RHS.getReg()); 982 generateMachineInst(AMDIL::DNE, VReg[1], 983 LHS.getReg(), LHS.getReg()); 984 generateMachineInst(AMDIL::DNE, VReg[2], 985 RHS.getReg(), RHS.getReg()); 986 generateMachineInst(AMDIL::BINARY_OR_f32, 987 VReg[3], VReg[0], VReg[1]); 988 generateMachineInst(AMDIL::BINARY_OR_f32, 989 DST.getReg(), VReg[2], VReg[3]); 990 991 } 992 break; 993 case AMDILCC::IL_CC_D_ONE: 994 { 995 uint32_t regID = AMDIL::GPRF64RegClassID; 996 uint32_t VReg[4] = { 997 genVReg(regID), genVReg(regID), 998 genVReg(regID), genVReg(regID) 999 }; 1000 // The result of a double comparison is a 32bit result 1001 generateMachineInst(AMDIL::DNE, VReg[0], 1002 LHS.getReg(), RHS.getReg()); 1003 generateMachineInst(AMDIL::DEQ, VReg[1], 1004 LHS.getReg(), LHS.getReg()); 1005 generateMachineInst(AMDIL::DEQ, VReg[2], 1006 RHS.getReg(), RHS.getReg()); 1007 generateMachineInst(AMDIL::BINARY_AND_f32, 1008 VReg[3], VReg[0], VReg[1]); 1009 generateMachineInst(AMDIL::BINARY_AND_f32, 1010 DST.getReg(), VReg[2], VReg[3]); 1011 1012 } 1013 break; 1014 case AMDILCC::IL_CC_F_O: 1015 { 1016 uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) }; 1017 generateMachineInst(AMDIL::FEQ, VReg[0], 1018 RHS.getReg(), RHS.getReg()); 1019 generateMachineInst(AMDIL::FEQ, VReg[1], 1020 LHS.getReg(), LHS.getReg()); 1021 generateMachineInst(AMDIL::BINARY_AND_f32, 1022 DST.getReg(), VReg[0], VReg[1]); 1023 } 1024 break; 1025 case AMDILCC::IL_CC_D_O: 1026 { 1027 uint32_t regID = AMDIL::GPRF64RegClassID; 1028 uint32_t VReg[2] = { genVReg(regID), genVReg(regID) }; 1029 // The result of a double comparison is a 32bit result 1030 generateMachineInst(AMDIL::DEQ, VReg[0], 1031 RHS.getReg(), RHS.getReg()); 1032 generateMachineInst(AMDIL::DEQ, VReg[1], 1033 LHS.getReg(), LHS.getReg()); 1034 generateMachineInst(AMDIL::BINARY_AND_f32, 1035 DST.getReg(), VReg[0], VReg[1]); 1036 } 1037 break; 1038 case AMDILCC::IL_CC_F_UO: 1039 { 1040 uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) }; 1041 generateMachineInst(AMDIL::FNE, VReg[0], 1042 RHS.getReg(), RHS.getReg()); 1043 generateMachineInst(AMDIL::FNE, VReg[1], 1044 LHS.getReg(), LHS.getReg()); 1045 generateMachineInst(AMDIL::BINARY_OR_f32, 1046 DST.getReg(), VReg[0], VReg[1]); 1047 } 1048 break; 1049 case AMDILCC::IL_CC_D_UO: 1050 { 1051 uint32_t regID = AMDIL::GPRF64RegClassID; 1052 uint32_t VReg[2] = { genVReg(regID), genVReg(regID) }; 1053 // The result of a double comparison is a 32bit result 1054 generateMachineInst(AMDIL::DNE, VReg[0], 1055 RHS.getReg(), RHS.getReg()); 1056 generateMachineInst(AMDIL::DNE, VReg[1], 1057 LHS.getReg(), LHS.getReg()); 1058 generateMachineInst(AMDIL::BINARY_OR_f32, 1059 DST.getReg(), VReg[0], VReg[1]); 1060 } 1061 break; 1062 case AMDILCC::IL_CC_L_LE: 1063 case AMDILCC::IL_CC_L_GE: 1064 case AMDILCC::IL_CC_L_EQ: 1065 case AMDILCC::IL_CC_L_NE: 1066 case AMDILCC::IL_CC_L_LT: 1067 case AMDILCC::IL_CC_L_GT: 1068 case AMDILCC::IL_CC_UL_LE: 1069 case AMDILCC::IL_CC_UL_GE: 1070 case AMDILCC::IL_CC_UL_EQ: 1071 case AMDILCC::IL_CC_UL_NE: 1072 case AMDILCC::IL_CC_UL_LT: 1073 case AMDILCC::IL_CC_UL_GT: 1074 { 1075 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 1076 &this->getTargetMachine())->getSubtargetImpl(); 1077 if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) { 1078 generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg()); 1079 } else { 1080 generateLongRelational(MI, opCode); 1081 } 1082 } 1083 break; 1084 case AMDILCC::COND_ERROR: 1085 assert(0 && "Invalid CC code"); 1086 break; 1087 }; 1088} 1089 1090//===----------------------------------------------------------------------===// 1091// TargetLowering Class Implementation Begins 1092//===----------------------------------------------------------------------===// 1093 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) 1094: TargetLowering(TM, new TargetLoweringObjectFileELF()) 1095{ 1096 int types[] = 1097 { 1098 (int)MVT::i8, 1099 (int)MVT::i16, 1100 (int)MVT::i32, 1101 (int)MVT::f32, 1102 (int)MVT::f64, 1103 (int)MVT::i64, 1104 (int)MVT::v2i8, 1105 (int)MVT::v4i8, 1106 (int)MVT::v2i16, 1107 (int)MVT::v4i16, 1108 (int)MVT::v4f32, 1109 (int)MVT::v4i32, 1110 (int)MVT::v2f32, 1111 (int)MVT::v2i32, 1112 (int)MVT::v2f64, 1113 (int)MVT::v2i64 1114 }; 1115 1116 int IntTypes[] = 1117 { 1118 (int)MVT::i8, 1119 (int)MVT::i16, 1120 (int)MVT::i32, 1121 (int)MVT::i64 1122 }; 1123 1124 int FloatTypes[] = 1125 { 1126 (int)MVT::f32, 1127 (int)MVT::f64 1128 }; 1129 1130 int VectorTypes[] = 1131 { 1132 (int)MVT::v2i8, 1133 (int)MVT::v4i8, 1134 (int)MVT::v2i16, 1135 (int)MVT::v4i16, 1136 (int)MVT::v4f32, 1137 (int)MVT::v4i32, 1138 (int)MVT::v2f32, 1139 (int)MVT::v2i32, 1140 (int)MVT::v2f64, 1141 (int)MVT::v2i64 1142 }; 1143 size_t numTypes = sizeof(types) / sizeof(*types); 1144 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); 1145 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); 1146 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); 1147 1148 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 1149 &this->getTargetMachine())->getSubtargetImpl(); 1150 // These are the current register classes that are 1151 // supported 1152 1153 addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass); 1154 addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass); 1155 1156 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 1157 addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass); 1158 addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass); 1159 } 1160 if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) { 1161 addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass); 1162 addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass); 1163 addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass); 1164 setOperationAction(ISD::Constant , MVT::i8 , Legal); 1165 } 1166 if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) { 1167 addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass); 1168 addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass); 1169 addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass); 1170 setOperationAction(ISD::Constant , MVT::i16 , Legal); 1171 } 1172 addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass); 1173 addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass); 1174 addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass); 1175 addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass); 1176 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { 1177 addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass); 1178 addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass); 1179 } 1180 1181 for (unsigned int x = 0; x < numTypes; ++x) { 1182 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 1183 1184 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 1185 // We cannot sextinreg, expand to shifts 1186 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 1187 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 1188 setOperationAction(ISD::FP_ROUND, VT, Expand); 1189 setOperationAction(ISD::OR, VT, Custom); 1190 setOperationAction(ISD::SUBE, VT, Expand); 1191 setOperationAction(ISD::SUBC, VT, Expand); 1192 setOperationAction(ISD::ADD, VT, Custom); 1193 setOperationAction(ISD::ADDE, VT, Expand); 1194 setOperationAction(ISD::ADDC, VT, Expand); 1195 setOperationAction(ISD::SETCC, VT, Custom); 1196 setOperationAction(ISD::BRCOND, VT, Custom); 1197 setOperationAction(ISD::BR_CC, VT, Custom); 1198 setOperationAction(ISD::BR_JT, VT, Expand); 1199 setOperationAction(ISD::BRIND, VT, Expand); 1200 // TODO: Implement custom UREM/SREM routines 1201 setOperationAction(ISD::UREM, VT, Expand); 1202 setOperationAction(ISD::SREM, VT, Expand); 1203 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 1204 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 1205 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 1206 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 1207 setOperationAction(ISDBITCAST, VT, Custom); 1208 setOperationAction(ISD::GlobalAddress, VT, Custom); 1209 setOperationAction(ISD::JumpTable, VT, Custom); 1210 setOperationAction(ISD::ConstantPool, VT, Custom); 1211 setOperationAction(ISD::SELECT_CC, VT, Custom); 1212 setOperationAction(ISD::SELECT, VT, Custom); 1213 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 1214 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 1215 if (VT != MVT::i64 && VT != MVT::v2i64) { 1216 setOperationAction(ISD::SDIV, VT, Custom); 1217 setOperationAction(ISD::UDIV, VT, Custom); 1218 } 1219 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 1220 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 1221 } 1222 for (unsigned int x = 0; x < numFloatTypes; ++x) { 1223 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 1224 1225 // IL does not have these operations for floating point types 1226 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 1227 setOperationAction(ISD::FP_ROUND, VT, Custom); 1228 setOperationAction(ISD::SETOLT, VT, Expand); 1229 setOperationAction(ISD::SETOGE, VT, Expand); 1230 setOperationAction(ISD::SETOGT, VT, Expand); 1231 setOperationAction(ISD::SETOLE, VT, Expand); 1232 setOperationAction(ISD::SETULT, VT, Expand); 1233 setOperationAction(ISD::SETUGE, VT, Expand); 1234 setOperationAction(ISD::SETUGT, VT, Expand); 1235 setOperationAction(ISD::SETULE, VT, Expand); 1236 } 1237 1238 for (unsigned int x = 0; x < numIntTypes; ++x) { 1239 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 1240 1241 // GPU also does not have divrem function for signed or unsigned 1242 setOperationAction(ISD::SDIVREM, VT, Expand); 1243 setOperationAction(ISD::UDIVREM, VT, Expand); 1244 setOperationAction(ISD::FP_ROUND, VT, Expand); 1245 1246 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 1247 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 1248 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 1249 1250 // GPU doesn't have a rotl, rotr, or byteswap instruction 1251 setOperationAction(ISD::ROTR, VT, Expand); 1252 setOperationAction(ISD::ROTL, VT, Expand); 1253 setOperationAction(ISD::BSWAP, VT, Expand); 1254 1255 // GPU doesn't have any counting operators 1256 setOperationAction(ISD::CTPOP, VT, Expand); 1257 setOperationAction(ISD::CTTZ, VT, Expand); 1258 setOperationAction(ISD::CTLZ, VT, Expand); 1259 } 1260 1261 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) 1262 { 1263 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 1264 1265 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 1266 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 1267 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 1268 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 1269 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 1270 setOperationAction(ISD::FP_ROUND, VT, Expand); 1271 setOperationAction(ISD::SDIVREM, VT, Expand); 1272 setOperationAction(ISD::UDIVREM, VT, Expand); 1273 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 1274 // setOperationAction(ISD::VSETCC, VT, Expand); 1275 setOperationAction(ISD::SETCC, VT, Expand); 1276 setOperationAction(ISD::SELECT_CC, VT, Expand); 1277 setOperationAction(ISD::SELECT, VT, Expand); 1278 1279 } 1280 setOperationAction(ISD::FP_ROUND, MVT::Other, Expand); 1281 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { 1282 if (stm->calVersion() < CAL_VERSION_SC_139 1283 || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 1284 setOperationAction(ISD::MUL, MVT::i64, Custom); 1285 } 1286 setOperationAction(ISD::SUB, MVT::i64, Custom); 1287 setOperationAction(ISD::ADD, MVT::i64, Custom); 1288 setOperationAction(ISD::MULHU, MVT::i64, Expand); 1289 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 1290 setOperationAction(ISD::MULHS, MVT::i64, Expand); 1291 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 1292 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 1293 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 1294 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 1295 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 1296 setOperationAction(ISD::Constant , MVT::i64 , Legal); 1297 setOperationAction(ISD::UDIV, MVT::v2i64, Expand); 1298 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 1299 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand); 1300 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand); 1301 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand); 1302 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand); 1303 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 1304 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 1305 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 1306 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 1307 } 1308 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 1309 // we support loading/storing v2f64 but not operations on the type 1310 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 1311 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 1312 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 1313 setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand); 1314 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 1315 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 1316 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 1317 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 1318 // We want to expand vector conversions into their scalar 1319 // counterparts. 1320 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand); 1321 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand); 1322 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand); 1323 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand); 1324 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 1325 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 1326 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 1327 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 1328 setOperationAction(ISD::FABS, MVT::f64, Expand); 1329 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 1330 } 1331 // TODO: Fix the UDIV24 algorithm so it works for these 1332 // types correctly. This needs vector comparisons 1333 // for this to work correctly. 1334 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 1335 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 1336 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 1337 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 1338 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 1339 setOperationAction(ISD::SUBC, MVT::Other, Expand); 1340 setOperationAction(ISD::ADDE, MVT::Other, Expand); 1341 setOperationAction(ISD::ADDC, MVT::Other, Expand); 1342 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 1343 setOperationAction(ISD::BR_CC, MVT::Other, Custom); 1344 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 1345 setOperationAction(ISD::BRIND, MVT::Other, Expand); 1346 setOperationAction(ISD::SETCC, MVT::Other, Custom); 1347 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 1348 setOperationAction(ISD::FDIV, MVT::f32, Custom); 1349 setOperationAction(ISD::FDIV, MVT::v2f32, Custom); 1350 setOperationAction(ISD::FDIV, MVT::v4f32, Custom); 1351 1352 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); 1353 // Use the default implementation. 1354 setOperationAction(ISD::VAARG , MVT::Other, Expand); 1355 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 1356 setOperationAction(ISD::VAEND , MVT::Other, Expand); 1357 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 1358 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 1359 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 1360 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 1361 setOperationAction(ISD::Constant , MVT::i32 , Legal); 1362 setOperationAction(ISD::TRAP , MVT::Other , Legal); 1363 1364 setStackPointerRegisterToSaveRestore(AMDIL::SP); 1365 setSchedulingPreference(Sched::RegPressure); 1366 setPow2DivIsCheap(false); 1367 setPrefLoopAlignment(16); 1368 setSelectIsExpensive(true); 1369 setJumpIsExpensive(true); 1370 computeRegisterProperties(); 1371 1372 maxStoresPerMemcpy = 4096; 1373 maxStoresPerMemmove = 4096; 1374 maxStoresPerMemset = 4096; 1375 1376#undef numTypes 1377#undef numIntTypes 1378#undef numVectorTypes 1379#undef numFloatTypes 1380} 1381 1382const char * 1383AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const 1384{ 1385 switch (Opcode) { 1386 default: return 0; 1387 case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY"; 1388 case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP"; 1389 case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP"; 1390 case AMDILISD::BITCONV: return "AMDILISD::BITCONV"; 1391 case AMDILISD::CMOV: return "AMDILISD::CMOV"; 1392 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; 1393 case AMDILISD::INEGATE: return "AMDILISD::INEGATE"; 1394 case AMDILISD::MAD: return "AMDILISD::MAD"; 1395 case AMDILISD::UMAD: return "AMDILISD::UMAD"; 1396 case AMDILISD::CALL: return "AMDILISD::CALL"; 1397 case AMDILISD::RET: return "AMDILISD::RET"; 1398 case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI"; 1399 case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO"; 1400 case AMDILISD::ADD: return "AMDILISD::ADD"; 1401 case AMDILISD::UMUL: return "AMDILISD::UMUL"; 1402 case AMDILISD::AND: return "AMDILISD::AND"; 1403 case AMDILISD::OR: return "AMDILISD::OR"; 1404 case AMDILISD::NOT: return "AMDILISD::NOT"; 1405 case AMDILISD::XOR: return "AMDILISD::XOR"; 1406 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; 1407 case AMDILISD::SMAX: return "AMDILISD::SMAX"; 1408 case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE"; 1409 case AMDILISD::MOVE: return "AMDILISD::MOVE"; 1410 case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; 1411 case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT"; 1412 case AMDILISD::VINSERT: return "AMDILISD::VINSERT"; 1413 case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT"; 1414 case AMDILISD::LCREATE: return "AMDILISD::LCREATE"; 1415 case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI"; 1416 case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO"; 1417 case AMDILISD::DCREATE: return "AMDILISD::DCREATE"; 1418 case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI"; 1419 case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO"; 1420 case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2"; 1421 case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2"; 1422 case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2"; 1423 case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2"; 1424 case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2"; 1425 case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2"; 1426 case AMDILISD::CMP: return "AMDILISD::CMP"; 1427 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; 1428 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; 1429 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; 1430 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; 1431 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; 1432 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; 1433 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; 1434 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; 1435 case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO"; 1436 case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO"; 1437 case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP"; 1438 case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR"; 1439 case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD"; 1440 case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND"; 1441 case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG"; 1442 case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC"; 1443 case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC"; 1444 case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX"; 1445 case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX"; 1446 case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN"; 1447 case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN"; 1448 case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR"; 1449 case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB"; 1450 case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB"; 1451 case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG"; 1452 case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR"; 1453 case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET"; 1454 case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET"; 1455 case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET"; 1456 case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET"; 1457 case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET"; 1458 case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET"; 1459 case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET"; 1460 case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET"; 1461 case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET"; 1462 case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET"; 1463 case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET"; 1464 case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET"; 1465 case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET"; 1466 case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET"; 1467 case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD"; 1468 case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND"; 1469 case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG"; 1470 case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC"; 1471 case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC"; 1472 case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX"; 1473 case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX"; 1474 case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN"; 1475 case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN"; 1476 case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR"; 1477 case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB"; 1478 case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB"; 1479 case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG"; 1480 case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR"; 1481 case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET"; 1482 case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET"; 1483 case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET"; 1484 case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET"; 1485 case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET"; 1486 case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET"; 1487 case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET"; 1488 case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET"; 1489 case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET"; 1490 case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET"; 1491 case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET"; 1492 case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET"; 1493 case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET"; 1494 case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD"; 1495 case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND"; 1496 case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG"; 1497 case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC"; 1498 case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC"; 1499 case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX"; 1500 case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX"; 1501 case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN"; 1502 case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN"; 1503 case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR"; 1504 case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR"; 1505 case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB"; 1506 case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB"; 1507 case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG"; 1508 case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR"; 1509 case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET"; 1510 case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET"; 1511 case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET"; 1512 case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET"; 1513 case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET"; 1514 case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET"; 1515 case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET"; 1516 case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET"; 1517 case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET"; 1518 case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET"; 1519 case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET"; 1520 case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET"; 1521 case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET"; 1522 case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET"; 1523 case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET"; 1524 case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC"; 1525 case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET"; 1526 case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME"; 1527 case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET"; 1528 case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ"; 1529 case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE"; 1530 case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0"; 1531 case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1"; 1532 case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ"; 1533 case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE"; 1534 case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0"; 1535 case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1"; 1536 1537 }; 1538} 1539bool 1540AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1541 const CallInst &I, unsigned Intrinsic) const 1542{ 1543 if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic 1544 || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) { 1545 return false; 1546 } 1547 bool bitCastToInt = false; 1548 unsigned IntNo; 1549 bool isRet = true; 1550 const AMDILSubtarget *STM = &this->getTargetMachine() 1551 .getSubtarget<AMDILSubtarget>(); 1552 switch (Intrinsic) { 1553 default: return false; // Don't custom lower most intrinsics. 1554 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32: 1555 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32: 1556 IntNo = AMDILISD::ATOM_G_ADD; break; 1557 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret: 1558 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret: 1559 isRet = false; 1560 IntNo = AMDILISD::ATOM_G_ADD_NORET; break; 1561 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32: 1562 case AMDGPUIntrinsic::AMDIL_atomic_add_li32: 1563 IntNo = AMDILISD::ATOM_L_ADD; break; 1564 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret: 1565 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret: 1566 isRet = false; 1567 IntNo = AMDILISD::ATOM_L_ADD_NORET; break; 1568 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32: 1569 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32: 1570 IntNo = AMDILISD::ATOM_R_ADD; break; 1571 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret: 1572 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret: 1573 isRet = false; 1574 IntNo = AMDILISD::ATOM_R_ADD_NORET; break; 1575 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32: 1576 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32: 1577 IntNo = AMDILISD::ATOM_G_AND; break; 1578 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret: 1579 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret: 1580 isRet = false; 1581 IntNo = AMDILISD::ATOM_G_AND_NORET; break; 1582 case AMDGPUIntrinsic::AMDIL_atomic_and_li32: 1583 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32: 1584 IntNo = AMDILISD::ATOM_L_AND; break; 1585 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret: 1586 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret: 1587 isRet = false; 1588 IntNo = AMDILISD::ATOM_L_AND_NORET; break; 1589 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32: 1590 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32: 1591 IntNo = AMDILISD::ATOM_R_AND; break; 1592 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret: 1593 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret: 1594 isRet = false; 1595 IntNo = AMDILISD::ATOM_R_AND_NORET; break; 1596 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32: 1597 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32: 1598 IntNo = AMDILISD::ATOM_G_CMPXCHG; break; 1599 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret: 1600 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret: 1601 isRet = false; 1602 IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break; 1603 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32: 1604 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32: 1605 IntNo = AMDILISD::ATOM_L_CMPXCHG; break; 1606 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret: 1607 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret: 1608 isRet = false; 1609 IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break; 1610 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32: 1611 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32: 1612 IntNo = AMDILISD::ATOM_R_CMPXCHG; break; 1613 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret: 1614 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret: 1615 isRet = false; 1616 IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break; 1617 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32: 1618 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32: 1619 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1620 IntNo = AMDILISD::ATOM_G_DEC; 1621 } else { 1622 IntNo = AMDILISD::ATOM_G_SUB; 1623 } 1624 break; 1625 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret: 1626 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret: 1627 isRet = false; 1628 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1629 IntNo = AMDILISD::ATOM_G_DEC_NORET; 1630 } else { 1631 IntNo = AMDILISD::ATOM_G_SUB_NORET; 1632 } 1633 break; 1634 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32: 1635 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32: 1636 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1637 IntNo = AMDILISD::ATOM_L_DEC; 1638 } else { 1639 IntNo = AMDILISD::ATOM_L_SUB; 1640 } 1641 break; 1642 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret: 1643 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret: 1644 isRet = false; 1645 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1646 IntNo = AMDILISD::ATOM_L_DEC_NORET; 1647 } else { 1648 IntNo = AMDILISD::ATOM_L_SUB_NORET; 1649 } 1650 break; 1651 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32: 1652 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32: 1653 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1654 IntNo = AMDILISD::ATOM_R_DEC; 1655 } else { 1656 IntNo = AMDILISD::ATOM_R_SUB; 1657 } 1658 break; 1659 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret: 1660 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret: 1661 isRet = false; 1662 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1663 IntNo = AMDILISD::ATOM_R_DEC_NORET; 1664 } else { 1665 IntNo = AMDILISD::ATOM_R_SUB_NORET; 1666 } 1667 break; 1668 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32: 1669 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32: 1670 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1671 IntNo = AMDILISD::ATOM_G_INC; 1672 } else { 1673 IntNo = AMDILISD::ATOM_G_ADD; 1674 } 1675 break; 1676 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret: 1677 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret: 1678 isRet = false; 1679 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1680 IntNo = AMDILISD::ATOM_G_INC_NORET; 1681 } else { 1682 IntNo = AMDILISD::ATOM_G_ADD_NORET; 1683 } 1684 break; 1685 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32: 1686 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32: 1687 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1688 IntNo = AMDILISD::ATOM_L_INC; 1689 } else { 1690 IntNo = AMDILISD::ATOM_L_ADD; 1691 } 1692 break; 1693 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret: 1694 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret: 1695 isRet = false; 1696 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1697 IntNo = AMDILISD::ATOM_L_INC_NORET; 1698 } else { 1699 IntNo = AMDILISD::ATOM_L_ADD_NORET; 1700 } 1701 break; 1702 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32: 1703 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32: 1704 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1705 IntNo = AMDILISD::ATOM_R_INC; 1706 } else { 1707 IntNo = AMDILISD::ATOM_R_ADD; 1708 } 1709 break; 1710 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret: 1711 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret: 1712 isRet = false; 1713 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1714 IntNo = AMDILISD::ATOM_R_INC_NORET; 1715 } else { 1716 IntNo = AMDILISD::ATOM_R_ADD_NORET; 1717 } 1718 break; 1719 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32: 1720 IntNo = AMDILISD::ATOM_G_MAX; break; 1721 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32: 1722 IntNo = AMDILISD::ATOM_G_UMAX; break; 1723 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret: 1724 isRet = false; 1725 IntNo = AMDILISD::ATOM_G_MAX_NORET; break; 1726 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret: 1727 isRet = false; 1728 IntNo = AMDILISD::ATOM_G_UMAX_NORET; break; 1729 case AMDGPUIntrinsic::AMDIL_atomic_max_li32: 1730 IntNo = AMDILISD::ATOM_L_MAX; break; 1731 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32: 1732 IntNo = AMDILISD::ATOM_L_UMAX; break; 1733 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret: 1734 isRet = false; 1735 IntNo = AMDILISD::ATOM_L_MAX_NORET; break; 1736 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret: 1737 isRet = false; 1738 IntNo = AMDILISD::ATOM_L_UMAX_NORET; break; 1739 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32: 1740 IntNo = AMDILISD::ATOM_R_MAX; break; 1741 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32: 1742 IntNo = AMDILISD::ATOM_R_UMAX; break; 1743 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret: 1744 isRet = false; 1745 IntNo = AMDILISD::ATOM_R_MAX_NORET; break; 1746 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret: 1747 isRet = false; 1748 IntNo = AMDILISD::ATOM_R_UMAX_NORET; break; 1749 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32: 1750 IntNo = AMDILISD::ATOM_G_MIN; break; 1751 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32: 1752 IntNo = AMDILISD::ATOM_G_UMIN; break; 1753 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret: 1754 isRet = false; 1755 IntNo = AMDILISD::ATOM_G_MIN_NORET; break; 1756 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret: 1757 isRet = false; 1758 IntNo = AMDILISD::ATOM_G_UMIN_NORET; break; 1759 case AMDGPUIntrinsic::AMDIL_atomic_min_li32: 1760 IntNo = AMDILISD::ATOM_L_MIN; break; 1761 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32: 1762 IntNo = AMDILISD::ATOM_L_UMIN; break; 1763 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret: 1764 isRet = false; 1765 IntNo = AMDILISD::ATOM_L_MIN_NORET; break; 1766 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret: 1767 isRet = false; 1768 IntNo = AMDILISD::ATOM_L_UMIN_NORET; break; 1769 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32: 1770 IntNo = AMDILISD::ATOM_R_MIN; break; 1771 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32: 1772 IntNo = AMDILISD::ATOM_R_UMIN; break; 1773 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret: 1774 isRet = false; 1775 IntNo = AMDILISD::ATOM_R_MIN_NORET; break; 1776 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret: 1777 isRet = false; 1778 IntNo = AMDILISD::ATOM_R_UMIN_NORET; break; 1779 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32: 1780 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32: 1781 IntNo = AMDILISD::ATOM_G_OR; break; 1782 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret: 1783 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret: 1784 isRet = false; 1785 IntNo = AMDILISD::ATOM_G_OR_NORET; break; 1786 case AMDGPUIntrinsic::AMDIL_atomic_or_li32: 1787 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32: 1788 IntNo = AMDILISD::ATOM_L_OR; break; 1789 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret: 1790 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret: 1791 isRet = false; 1792 IntNo = AMDILISD::ATOM_L_OR_NORET; break; 1793 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32: 1794 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32: 1795 IntNo = AMDILISD::ATOM_R_OR; break; 1796 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret: 1797 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret: 1798 isRet = false; 1799 IntNo = AMDILISD::ATOM_R_OR_NORET; break; 1800 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32: 1801 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32: 1802 IntNo = AMDILISD::ATOM_G_SUB; break; 1803 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret: 1804 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret: 1805 isRet = false; 1806 IntNo = AMDILISD::ATOM_G_SUB_NORET; break; 1807 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32: 1808 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32: 1809 IntNo = AMDILISD::ATOM_L_SUB; break; 1810 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret: 1811 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret: 1812 isRet = false; 1813 IntNo = AMDILISD::ATOM_L_SUB_NORET; break; 1814 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32: 1815 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32: 1816 IntNo = AMDILISD::ATOM_R_SUB; break; 1817 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret: 1818 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret: 1819 isRet = false; 1820 IntNo = AMDILISD::ATOM_R_SUB_NORET; break; 1821 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32: 1822 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32: 1823 IntNo = AMDILISD::ATOM_G_RSUB; break; 1824 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret: 1825 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret: 1826 isRet = false; 1827 IntNo = AMDILISD::ATOM_G_RSUB_NORET; break; 1828 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32: 1829 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32: 1830 IntNo = AMDILISD::ATOM_L_RSUB; break; 1831 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret: 1832 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret: 1833 isRet = false; 1834 IntNo = AMDILISD::ATOM_L_RSUB_NORET; break; 1835 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32: 1836 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32: 1837 IntNo = AMDILISD::ATOM_R_RSUB; break; 1838 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret: 1839 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret: 1840 isRet = false; 1841 IntNo = AMDILISD::ATOM_R_RSUB_NORET; break; 1842 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32: 1843 bitCastToInt = true; 1844 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32: 1845 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32: 1846 IntNo = AMDILISD::ATOM_G_XCHG; break; 1847 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret: 1848 bitCastToInt = true; 1849 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret: 1850 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret: 1851 isRet = false; 1852 IntNo = AMDILISD::ATOM_G_XCHG_NORET; break; 1853 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32: 1854 bitCastToInt = true; 1855 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32: 1856 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32: 1857 IntNo = AMDILISD::ATOM_L_XCHG; break; 1858 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret: 1859 bitCastToInt = true; 1860 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret: 1861 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret: 1862 isRet = false; 1863 IntNo = AMDILISD::ATOM_L_XCHG_NORET; break; 1864 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32: 1865 bitCastToInt = true; 1866 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32: 1867 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32: 1868 IntNo = AMDILISD::ATOM_R_XCHG; break; 1869 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret: 1870 bitCastToInt = true; 1871 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret: 1872 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret: 1873 isRet = false; 1874 IntNo = AMDILISD::ATOM_R_XCHG_NORET; break; 1875 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32: 1876 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32: 1877 IntNo = AMDILISD::ATOM_G_XOR; break; 1878 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret: 1879 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret: 1880 isRet = false; 1881 IntNo = AMDILISD::ATOM_G_XOR_NORET; break; 1882 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32: 1883 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32: 1884 IntNo = AMDILISD::ATOM_L_XOR; break; 1885 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret: 1886 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret: 1887 isRet = false; 1888 IntNo = AMDILISD::ATOM_L_XOR_NORET; break; 1889 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32: 1890 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32: 1891 IntNo = AMDILISD::ATOM_R_XOR; break; 1892 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret: 1893 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret: 1894 isRet = false; 1895 IntNo = AMDILISD::ATOM_R_XOR_NORET; break; 1896 case AMDGPUIntrinsic::AMDIL_append_alloc_i32: 1897 IntNo = AMDILISD::APPEND_ALLOC; break; 1898 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret: 1899 isRet = false; 1900 IntNo = AMDILISD::APPEND_ALLOC_NORET; break; 1901 case AMDGPUIntrinsic::AMDIL_append_consume_i32: 1902 IntNo = AMDILISD::APPEND_CONSUME; break; 1903 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret: 1904 isRet = false; 1905 IntNo = AMDILISD::APPEND_CONSUME_NORET; break; 1906 }; 1907 1908 Info.opc = IntNo; 1909 Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32; 1910 Info.ptrVal = I.getOperand(0); 1911 Info.offset = 0; 1912 Info.align = 4; 1913 Info.vol = true; 1914 Info.readMem = isRet; 1915 Info.writeMem = true; 1916 return true; 1917} 1918// The backend supports 32 and 64 bit floating point immediates 1919bool 1920AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const 1921{ 1922 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1923 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1924 return true; 1925 } else { 1926 return false; 1927 } 1928} 1929 1930bool 1931AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const 1932{ 1933 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1934 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1935 return false; 1936 } else { 1937 return true; 1938 } 1939} 1940 1941 1942// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 1943// be zero. Op is expected to be a target specific node. Used by DAG 1944// combiner. 1945 1946void 1947AMDILTargetLowering::computeMaskedBitsForTargetNode( 1948 const SDValue Op, 1949 APInt &KnownZero, 1950 APInt &KnownOne, 1951 const SelectionDAG &DAG, 1952 unsigned Depth) const 1953{ 1954 APInt KnownZero2; 1955 APInt KnownOne2; 1956 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 1957 switch (Op.getOpcode()) { 1958 default: break; 1959 case AMDILISD::SELECT_CC: 1960 DAG.ComputeMaskedBits( 1961 Op.getOperand(1), 1962 KnownZero, 1963 KnownOne, 1964 Depth + 1 1965 ); 1966 DAG.ComputeMaskedBits( 1967 Op.getOperand(0), 1968 KnownZero2, 1969 KnownOne2 1970 ); 1971 assert((KnownZero & KnownOne) == 0 1972 && "Bits known to be one AND zero?"); 1973 assert((KnownZero2 & KnownOne2) == 0 1974 && "Bits known to be one AND zero?"); 1975 // Only known if known in both the LHS and RHS 1976 KnownOne &= KnownOne2; 1977 KnownZero &= KnownZero2; 1978 break; 1979 }; 1980} 1981 1982// This is the function that determines which calling convention should 1983// be used. Currently there is only one calling convention 1984CCAssignFn* 1985AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const 1986{ 1987 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1988 return CC_AMDIL32; 1989} 1990 1991// LowerCallResult - Lower the result values of an ISD::CALL into the 1992// appropriate copies out of appropriate physical registers. This assumes that 1993// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 1994// being lowered. The returns a SDNode with the same number of values as the 1995// ISD::CALL. 1996SDValue 1997AMDILTargetLowering::LowerCallResult( 1998 SDValue Chain, 1999 SDValue InFlag, 2000 CallingConv::ID CallConv, 2001 bool isVarArg, 2002 const SmallVectorImpl<ISD::InputArg> &Ins, 2003 DebugLoc dl, 2004 SelectionDAG &DAG, 2005 SmallVectorImpl<SDValue> &InVals) const 2006{ 2007 // Assign locations to each value returned by this call 2008 SmallVector<CCValAssign, 16> RVLocs; 2009 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2010 getTargetMachine(), RVLocs, *DAG.getContext()); 2011 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); 2012 2013 // Copy all of the result registers out of their specified physreg. 2014 for (unsigned i = 0; i != RVLocs.size(); ++i) { 2015 EVT CopyVT = RVLocs[i].getValVT(); 2016 if (RVLocs[i].isRegLoc()) { 2017 Chain = DAG.getCopyFromReg( 2018 Chain, 2019 dl, 2020 RVLocs[i].getLocReg(), 2021 CopyVT, 2022 InFlag 2023 ).getValue(1); 2024 SDValue Val = Chain.getValue(0); 2025 InFlag = Chain.getValue(2); 2026 InVals.push_back(Val); 2027 } 2028 } 2029 2030 return Chain; 2031 2032} 2033 2034//===----------------------------------------------------------------------===// 2035// Other Lowering Hooks 2036//===----------------------------------------------------------------------===// 2037 2038MachineBasicBlock * 2039AMDILTargetLowering::EmitInstrWithCustomInserter( 2040 MachineInstr *MI, MachineBasicBlock *BB) const 2041{ 2042 const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); 2043 switch (MI->getOpcode()) { 2044 ExpandCaseToAllTypes(AMDIL::CMP); 2045 generateCMPInstr(MI, BB, TII); 2046 MI->eraseFromParent(); 2047 break; 2048 default: 2049 break; 2050 } 2051 return BB; 2052} 2053 2054// Recursively assign SDNodeOrdering to any unordered nodes 2055// This is necessary to maintain source ordering of instructions 2056// under -O0 to avoid odd-looking "skipping around" issues. 2057 static const SDValue 2058Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) 2059{ 2060 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { 2061 DAG.AssignOrdering( New.getNode(), order ); 2062 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) 2063 Ordered( DAG, order, New.getOperand(i) ); 2064 } 2065 return New; 2066} 2067 2068#define LOWER(A) \ 2069 case ISD:: A: \ 2070return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) 2071 2072SDValue 2073AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 2074{ 2075 switch (Op.getOpcode()) { 2076 default: 2077 Op.getNode()->dump(); 2078 assert(0 && "Custom lowering code for this" 2079 "instruction is not implemented yet!"); 2080 break; 2081 LOWER(GlobalAddress); 2082 LOWER(JumpTable); 2083 LOWER(ConstantPool); 2084 LOWER(ExternalSymbol); 2085 LOWER(FP_TO_SINT); 2086 LOWER(FP_TO_UINT); 2087 LOWER(SINT_TO_FP); 2088 LOWER(UINT_TO_FP); 2089 LOWER(ADD); 2090 LOWER(MUL); 2091 LOWER(SUB); 2092 LOWER(FDIV); 2093 LOWER(SDIV); 2094 LOWER(SREM); 2095 LOWER(UDIV); 2096 LOWER(UREM); 2097 LOWER(BUILD_VECTOR); 2098 LOWER(INSERT_VECTOR_ELT); 2099 LOWER(EXTRACT_VECTOR_ELT); 2100 LOWER(EXTRACT_SUBVECTOR); 2101 LOWER(SCALAR_TO_VECTOR); 2102 LOWER(CONCAT_VECTORS); 2103 LOWER(AND); 2104 LOWER(OR); 2105 LOWER(SELECT); 2106 LOWER(SELECT_CC); 2107 LOWER(SETCC); 2108 LOWER(SIGN_EXTEND_INREG); 2109 LOWER(BITCAST); 2110 LOWER(DYNAMIC_STACKALLOC); 2111 LOWER(BRCOND); 2112 LOWER(BR_CC); 2113 LOWER(FP_ROUND); 2114 } 2115 return Op; 2116} 2117 2118int 2119AMDILTargetLowering::getVarArgsFrameOffset() const 2120{ 2121 return VarArgsFrameOffset; 2122} 2123#undef LOWER 2124 2125SDValue 2126AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const 2127{ 2128 SDValue DST = Op; 2129 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); 2130 const GlobalValue *G = GADN->getGlobal(); 2131 DebugLoc DL = Op.getDebugLoc(); 2132 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); 2133 if (!GV) { 2134 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 2135 } else { 2136 if (GV->hasInitializer()) { 2137 const Constant *C = dyn_cast<Constant>(GV->getInitializer()); 2138 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 2139 DST = DAG.getConstant(CI->getValue(), Op.getValueType()); 2140 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { 2141 DST = DAG.getConstantFP(CF->getValueAPF(), 2142 Op.getValueType()); 2143 } else if (dyn_cast<ConstantAggregateZero>(C)) { 2144 EVT VT = Op.getValueType(); 2145 if (VT.isInteger()) { 2146 DST = DAG.getConstant(0, VT); 2147 } else { 2148 DST = DAG.getConstantFP(0, VT); 2149 } 2150 } else { 2151 assert(!"lowering this type of Global Address " 2152 "not implemented yet!"); 2153 C->dump(); 2154 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 2155 } 2156 } else { 2157 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 2158 } 2159 } 2160 return DST; 2161} 2162 2163SDValue 2164AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const 2165{ 2166 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2167 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); 2168 return Result; 2169} 2170SDValue 2171AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const 2172{ 2173 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2174 EVT PtrVT = Op.getValueType(); 2175 SDValue Result; 2176 if (CP->isMachineConstantPoolEntry()) { 2177 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2178 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 2179 } else { 2180 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2181 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 2182 } 2183 return Result; 2184} 2185 2186SDValue 2187AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const 2188{ 2189 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2190 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); 2191 return Result; 2192} 2193/// LowerFORMAL_ARGUMENTS - transform physical registers into 2194/// virtual registers and generate load operations for 2195/// arguments places on the stack. 2196/// TODO: isVarArg, hasStructRet, isMemReg 2197 SDValue 2198AMDILTargetLowering::LowerFormalArguments(SDValue Chain, 2199 CallingConv::ID CallConv, 2200 bool isVarArg, 2201 const SmallVectorImpl<ISD::InputArg> &Ins, 2202 DebugLoc dl, 2203 SelectionDAG &DAG, 2204 SmallVectorImpl<SDValue> &InVals) 2205const 2206{ 2207 2208 MachineFunction &MF = DAG.getMachineFunction(); 2209 AMDILMachineFunctionInfo *FuncInfo 2210 = MF.getInfo<AMDILMachineFunctionInfo>(); 2211 MachineFrameInfo *MFI = MF.getFrameInfo(); 2212 //const Function *Fn = MF.getFunction(); 2213 //MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2214 2215 SmallVector<CCValAssign, 16> ArgLocs; 2216 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 2217 //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); 2218 2219 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), 2220 getTargetMachine(), ArgLocs, *DAG.getContext()); 2221 2222 // When more calling conventions are added, they need to be chosen here 2223 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); 2224 SDValue StackPtr; 2225 2226 //unsigned int FirstStackArgLoc = 0; 2227 2228 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 2229 CCValAssign &VA = ArgLocs[i]; 2230 if (VA.isRegLoc()) { 2231 EVT RegVT = VA.getLocVT(); 2232 const TargetRegisterClass *RC = getRegClassFromType( 2233 RegVT.getSimpleVT().SimpleTy); 2234 2235 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); 2236 SDValue ArgValue = DAG.getCopyFromReg( 2237 Chain, 2238 dl, 2239 Reg, 2240 RegVT); 2241 // If this is an 8 or 16-bit value, it is really passed 2242 // promoted to 32 bits. Insert an assert[sz]ext to capture 2243 // this, then truncate to the right size. 2244 2245 if (VA.getLocInfo() == CCValAssign::SExt) { 2246 ArgValue = DAG.getNode( 2247 ISD::AssertSext, 2248 dl, 2249 RegVT, 2250 ArgValue, 2251 DAG.getValueType(VA.getValVT())); 2252 } else if (VA.getLocInfo() == CCValAssign::ZExt) { 2253 ArgValue = DAG.getNode( 2254 ISD::AssertZext, 2255 dl, 2256 RegVT, 2257 ArgValue, 2258 DAG.getValueType(VA.getValVT())); 2259 } 2260 if (VA.getLocInfo() != CCValAssign::Full) { 2261 ArgValue = DAG.getNode( 2262 ISD::TRUNCATE, 2263 dl, 2264 VA.getValVT(), 2265 ArgValue); 2266 } 2267 // Add the value to the list of arguments 2268 // to be passed in registers 2269 InVals.push_back(ArgValue); 2270 if (isVarArg) { 2271 assert(0 && "Variable arguments are not yet supported"); 2272 // See MipsISelLowering.cpp for ideas on how to implement 2273 } 2274 } else if(VA.isMemLoc()) { 2275 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, 2276 dl, DAG, VA, MFI, i)); 2277 } else { 2278 assert(0 && "found a Value Assign that is " 2279 "neither a register or a memory location"); 2280 } 2281 } 2282 /*if (hasStructRet) { 2283 assert(0 && "Has struct return is not yet implemented"); 2284 // See MipsISelLowering.cpp for ideas on how to implement 2285 }*/ 2286 2287 unsigned int StackSize = CCInfo.getNextStackOffset(); 2288 if (isVarArg) { 2289 assert(0 && "Variable arguments are not yet supported"); 2290 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement 2291 } 2292 // This needs to be changed to non-zero if the return function needs 2293 // to pop bytes 2294 FuncInfo->setBytesToPopOnReturn(StackSize); 2295 return Chain; 2296} 2297/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 2298/// by "Src" to address "Dst" with size and alignment information specified by 2299/// the specific parameter attribute. The copy will be passed as a byval 2300/// function parameter. 2301static SDValue 2302CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 2303 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 2304 assert(0 && "MemCopy does not exist yet"); 2305 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 2306 2307 return DAG.getMemcpy(Chain, 2308 Src.getDebugLoc(), 2309 Dst, Src, SizeNode, Flags.getByValAlign(), 2310 /*IsVol=*/false, /*AlwaysInline=*/true, 2311 MachinePointerInfo(), MachinePointerInfo()); 2312} 2313 2314SDValue 2315AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, 2316 SDValue StackPtr, SDValue Arg, 2317 DebugLoc dl, SelectionDAG &DAG, 2318 const CCValAssign &VA, 2319 ISD::ArgFlagsTy Flags) const 2320{ 2321 unsigned int LocMemOffset = VA.getLocMemOffset(); 2322 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 2323 PtrOff = DAG.getNode(ISD::ADD, 2324 dl, 2325 getPointerTy(), StackPtr, PtrOff); 2326 if (Flags.isByVal()) { 2327 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 2328 } else { 2329 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, 2330 MachinePointerInfo::getStack(LocMemOffset), 2331 false, false, 0); 2332 } 2333 return PtrOff; 2334} 2335/// LowerCAL - functions arguments are copied from virtual 2336/// regs to (physical regs)/(stack frame), CALLSEQ_START and 2337/// CALLSEQ_END are emitted. 2338/// TODO: isVarArg, isTailCall, hasStructRet 2339SDValue 2340AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 2341 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, 2342 bool& isTailCall, 2343 const SmallVectorImpl<ISD::OutputArg> &Outs, 2344 const SmallVectorImpl<SDValue> &OutVals, 2345 const SmallVectorImpl<ISD::InputArg> &Ins, 2346 DebugLoc dl, SelectionDAG &DAG, 2347 SmallVectorImpl<SDValue> &InVals) 2348const 2349{ 2350 isTailCall = false; 2351 MachineFunction& MF = DAG.getMachineFunction(); 2352 // FIXME: DO we need to handle fast calling conventions and tail call 2353 // optimizations?? X86/PPC ISelLowering 2354 /*bool hasStructRet = (TheCall->getNumArgs()) 2355 ? TheCall->getArgFlags(0).device()->isSRet() 2356 : false;*/ 2357 2358 MachineFrameInfo *MFI = MF.getFrameInfo(); 2359 2360 // Analyze operands of the call, assigning locations to each operand 2361 SmallVector<CCValAssign, 16> ArgLocs; 2362 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2363 getTargetMachine(), ArgLocs, *DAG.getContext()); 2364 // Analyize the calling operands, but need to change 2365 // if we have more than one calling convetion 2366 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); 2367 2368 unsigned int NumBytes = CCInfo.getNextStackOffset(); 2369 if (isTailCall) { 2370 assert(isTailCall && "Tail Call not handled yet!"); 2371 // See X86/PPC ISelLowering 2372 } 2373 2374 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 2375 2376 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass; 2377 SmallVector<SDValue, 8> MemOpChains; 2378 SDValue StackPtr; 2379 //unsigned int FirstStacArgLoc = 0; 2380 //int LastArgStackLoc = 0; 2381 2382 // Walk the register/memloc assignments, insert copies/loads 2383 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 2384 CCValAssign &VA = ArgLocs[i]; 2385 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers 2386 // Arguments start after the 5 first operands of ISD::CALL 2387 SDValue Arg = OutVals[i]; 2388 //Promote the value if needed 2389 switch(VA.getLocInfo()) { 2390 default: assert(0 && "Unknown loc info!"); 2391 case CCValAssign::Full: 2392 break; 2393 case CCValAssign::SExt: 2394 Arg = DAG.getNode(ISD::SIGN_EXTEND, 2395 dl, 2396 VA.getLocVT(), Arg); 2397 break; 2398 case CCValAssign::ZExt: 2399 Arg = DAG.getNode(ISD::ZERO_EXTEND, 2400 dl, 2401 VA.getLocVT(), Arg); 2402 break; 2403 case CCValAssign::AExt: 2404 Arg = DAG.getNode(ISD::ANY_EXTEND, 2405 dl, 2406 VA.getLocVT(), Arg); 2407 break; 2408 } 2409 2410 if (VA.isRegLoc()) { 2411 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 2412 } else if (VA.isMemLoc()) { 2413 // Create the frame index object for this incoming parameter 2414 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 2415 VA.getLocMemOffset(), true); 2416 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); 2417 2418 // emit ISD::STORE whichs stores the 2419 // parameter value to a stack Location 2420 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 2421 MachinePointerInfo::getFixedStack(FI), 2422 false, false, 0)); 2423 } else { 2424 assert(0 && "Not a Reg/Mem Loc, major error!"); 2425 } 2426 } 2427 if (!MemOpChains.empty()) { 2428 Chain = DAG.getNode(ISD::TokenFactor, 2429 dl, 2430 MVT::Other, 2431 &MemOpChains[0], 2432 MemOpChains.size()); 2433 } 2434 SDValue InFlag; 2435 if (!isTailCall) { 2436 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 2437 Chain = DAG.getCopyToReg(Chain, 2438 dl, 2439 RegsToPass[i].first, 2440 RegsToPass[i].second, 2441 InFlag); 2442 InFlag = Chain.getValue(1); 2443 } 2444 } 2445 2446 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 2447 // every direct call is) turn it into a TargetGlobalAddress/ 2448 // TargetExternalSymbol 2449 // node so that legalize doesn't hack it. 2450 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2451 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); 2452 } 2453 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2454 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 2455 } 2456 else if (isTailCall) { 2457 assert(0 && "Tail calls are not handled yet"); 2458 // see X86 ISelLowering for ideas on implementation: 1708 2459 } 2460 2461 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE); 2462 SmallVector<SDValue, 8> Ops; 2463 2464 if (isTailCall) { 2465 assert(0 && "Tail calls are not handled yet"); 2466 // see X86 ISelLowering for ideas on implementation: 1721 2467 } 2468 // If this is a direct call, pass the chain and the callee 2469 if (Callee.getNode()) { 2470 Ops.push_back(Chain); 2471 Ops.push_back(Callee); 2472 } 2473 2474 if (isTailCall) { 2475 assert(0 && "Tail calls are not handled yet"); 2476 // see X86 ISelLowering for ideas on implementation: 1739 2477 } 2478 2479 // Add argument registers to the end of the list so that they are known 2480 // live into the call 2481 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 2482 Ops.push_back(DAG.getRegister( 2483 RegsToPass[i].first, 2484 RegsToPass[i].second.getValueType())); 2485 } 2486 if (InFlag.getNode()) { 2487 Ops.push_back(InFlag); 2488 } 2489 2490 // Emit Tail Call 2491 if (isTailCall) { 2492 assert(0 && "Tail calls are not handled yet"); 2493 // see X86 ISelLowering for ideas on implementation: 1762 2494 } 2495 2496 Chain = DAG.getNode(AMDILISD::CALL, 2497 dl, 2498 NodeTys, &Ops[0], Ops.size()); 2499 InFlag = Chain.getValue(1); 2500 2501 // Create the CALLSEQ_END node 2502 Chain = DAG.getCALLSEQ_END( 2503 Chain, 2504 DAG.getIntPtrConstant(NumBytes, true), 2505 DAG.getIntPtrConstant(0, true), 2506 InFlag); 2507 InFlag = Chain.getValue(1); 2508 // Handle result values, copying them out of physregs into vregs that 2509 // we return 2510 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 2511 InVals); 2512} 2513static void checkMADType( 2514 SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD) 2515{ 2516 bool globalLoadStore = false; 2517 is24bitMAD = false; 2518 is32bitMAD = false; 2519 return; 2520 assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for " 2521 "this to work correctly!"); 2522 if (Op.getNode()->use_empty()) { 2523 return; 2524 } 2525 for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(), 2526 nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) { 2527 SDNode *ptr = *nBegin; 2528 const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr); 2529 // If we are not a LSBaseSDNode then we don't do this 2530 // optimization. 2531 // If we are a LSBaseSDNode, but the op is not the offset 2532 // or base pointer, then we don't do this optimization 2533 // (i.e. we are the value being stored) 2534 if (!lsNode || 2535 (lsNode->writeMem() && lsNode->getOperand(1) == Op)) { 2536 return; 2537 } 2538 const PointerType *PT = 2539 dyn_cast<PointerType>(lsNode->getSrcValue()->getType()); 2540 unsigned as = PT->getAddressSpace(); 2541 switch(as) { 2542 default: 2543 globalLoadStore = true; 2544 case AMDILAS::PRIVATE_ADDRESS: 2545 if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { 2546 globalLoadStore = true; 2547 } 2548 break; 2549 case AMDILAS::CONSTANT_ADDRESS: 2550 if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) { 2551 globalLoadStore = true; 2552 } 2553 break; 2554 case AMDILAS::LOCAL_ADDRESS: 2555 if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) { 2556 globalLoadStore = true; 2557 } 2558 break; 2559 case AMDILAS::REGION_ADDRESS: 2560 if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) { 2561 globalLoadStore = true; 2562 } 2563 break; 2564 } 2565 } 2566 if (globalLoadStore) { 2567 is32bitMAD = true; 2568 } else { 2569 is24bitMAD = true; 2570 } 2571} 2572 2573SDValue 2574AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const 2575{ 2576 SDValue LHS = Op.getOperand(0); 2577 SDValue RHS = Op.getOperand(1); 2578 DebugLoc DL = Op.getDebugLoc(); 2579 EVT OVT = Op.getValueType(); 2580 SDValue DST; 2581 const AMDILSubtarget *stm = &this->getTargetMachine() 2582 .getSubtarget<AMDILSubtarget>(); 2583 bool isVec = OVT.isVector(); 2584 if (OVT.getScalarType() == MVT::i64) { 2585 MVT INTTY = MVT::i32; 2586 if (OVT == MVT::v2i64) { 2587 INTTY = MVT::v2i32; 2588 } 2589 if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps) 2590 && INTTY == MVT::i32) { 2591 DST = DAG.getNode(AMDILISD::ADD, 2592 DL, 2593 OVT, 2594 LHS, RHS); 2595 } else { 2596 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; 2597 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 2598 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); 2599 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); 2600 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); 2601 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); 2602 INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO); 2603 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI); 2604 SDValue cmp; 2605 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2606 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 2607 INTLO, RHSLO); 2608 cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp); 2609 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); 2610 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, 2611 INTLO, INTHI); 2612 } 2613 } else { 2614 if (LHS.getOpcode() == ISD::FrameIndex || 2615 RHS.getOpcode() == ISD::FrameIndex) { 2616 DST = DAG.getNode(AMDILISD::ADDADDR, 2617 DL, 2618 OVT, 2619 LHS, RHS); 2620 } else { 2621 if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem) 2622 && LHS.getNumOperands() 2623 && RHS.getNumOperands()) { 2624 bool is24bitMAD = false; 2625 bool is32bitMAD = false; 2626 const ConstantSDNode *LHSConstOpCode = 2627 dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1)); 2628 const ConstantSDNode *RHSConstOpCode = 2629 dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1)); 2630 if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode) 2631 || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) 2632 || LHS.getOpcode() == ISD::MUL 2633 || RHS.getOpcode() == ISD::MUL) { 2634 SDValue Op1, Op2, Op3; 2635 // FIXME: Fix this so that it works for unsigned 24bit ops. 2636 if (LHS.getOpcode() == ISD::MUL) { 2637 Op1 = LHS.getOperand(0); 2638 Op2 = LHS.getOperand(1); 2639 Op3 = RHS; 2640 } else if (RHS.getOpcode() == ISD::MUL) { 2641 Op1 = RHS.getOperand(0); 2642 Op2 = RHS.getOperand(1); 2643 Op3 = LHS; 2644 } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) { 2645 Op1 = LHS.getOperand(0); 2646 Op2 = DAG.getConstant( 2647 1 << LHSConstOpCode->getZExtValue(), MVT::i32); 2648 Op3 = RHS; 2649 } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) { 2650 Op1 = RHS.getOperand(0); 2651 Op2 = DAG.getConstant( 2652 1 << RHSConstOpCode->getZExtValue(), MVT::i32); 2653 Op3 = LHS; 2654 } 2655 checkMADType(Op, stm, is24bitMAD, is32bitMAD); 2656 // We can possibly do a MAD transform! 2657 if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) { 2658 uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32; 2659 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/); 2660 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN, 2661 DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32), 2662 Op1, Op2, Op3); 2663 } else if(is32bitMAD) { 2664 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/); 2665 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN, 2666 DL, Tys, DAG.getEntryNode(), 2667 DAG.getConstant( 2668 AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32), 2669 Op1, Op2, Op3); 2670 } 2671 } 2672 } 2673 DST = DAG.getNode(AMDILISD::ADD, 2674 DL, 2675 OVT, 2676 LHS, RHS); 2677 } 2678 } 2679 return DST; 2680} 2681SDValue 2682AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG, 2683 uint32_t bits) const 2684{ 2685 DebugLoc DL = Op.getDebugLoc(); 2686 EVT INTTY = Op.getValueType(); 2687 EVT FPTY; 2688 if (INTTY.isVector()) { 2689 FPTY = EVT(MVT::getVectorVT(MVT::f32, 2690 INTTY.getVectorNumElements())); 2691 } else { 2692 FPTY = EVT(MVT::f32); 2693 } 2694 /* static inline uint 2695 __clz_Nbit(uint x) 2696 { 2697 int xor = 0x3f800000U | x; 2698 float tp = as_float(xor); 2699 float t = tp + -1.0f; 2700 uint tint = as_uint(t); 2701 int cmp = (x != 0); 2702 uint tsrc = tint >> 23; 2703 uint tmask = tsrc & 0xffU; 2704 uint cst = (103 + N)U - tmask; 2705 return cmp ? cst : N; 2706 } 2707 */ 2708 assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32 2709 && "genCLZu16 only works on 32bit types"); 2710 // uint x = Op 2711 SDValue x = Op; 2712 // xornode = 0x3f800000 | x 2713 SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY, 2714 DAG.getConstant(0x3f800000, INTTY), x); 2715 // float tp = as_float(xornode) 2716 SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode); 2717 // float t = tp + -1.0f 2718 SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp, 2719 DAG.getConstantFP(-1.0f, FPTY)); 2720 // uint tint = as_uint(t) 2721 SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t); 2722 // int cmp = (x != 0) 2723 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2724 DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x, 2725 DAG.getConstant(0, INTTY)); 2726 // uint tsrc = tint >> 23 2727 SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint, 2728 DAG.getConstant(23, INTTY)); 2729 // uint tmask = tsrc & 0xFF 2730 SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc, 2731 DAG.getConstant(0xFFU, INTTY)); 2732 // uint cst = (103 + bits) - tmask 2733 SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY, 2734 DAG.getConstant((103U + bits), INTTY), tmask); 2735 // return cmp ? cst : N 2736 cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst, 2737 DAG.getConstant(bits, INTTY)); 2738 return cst; 2739} 2740 2741SDValue 2742AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const 2743{ 2744 SDValue DST = SDValue(); 2745 DebugLoc DL = Op.getDebugLoc(); 2746 EVT INTTY = Op.getValueType(); 2747 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2748 &this->getTargetMachine())->getSubtargetImpl(); 2749 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 2750 //__clz_32bit(uint u) 2751 //{ 2752 // int z = __amdil_ffb_hi(u) ; 2753 // return z < 0 ? 32 : z; 2754 // } 2755 // uint u = op 2756 SDValue u = Op; 2757 // int z = __amdil_ffb_hi(u) 2758 SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u); 2759 // int cmp = z < 0 2760 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2761 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 2762 z, DAG.getConstant(0, INTTY)); 2763 // return cmp ? 32 : z 2764 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, 2765 DAG.getConstant(32, INTTY), z); 2766 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2767 // static inline uint 2768 //__clz_32bit(uint x) 2769 //{ 2770 // uint zh = __clz_16bit(x >> 16); 2771 // uint zl = __clz_16bit(x & 0xffffU); 2772 // return zh == 16U ? 16U + zl : zh; 2773 //} 2774 // uint x = Op 2775 SDValue x = Op; 2776 // uint xs16 = x >> 16 2777 SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x, 2778 DAG.getConstant(16, INTTY)); 2779 // uint zh = __clz_16bit(xs16) 2780 SDValue zh = genCLZuN(xs16, DAG, 16); 2781 // uint xa16 = x & 0xFFFF 2782 SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x, 2783 DAG.getConstant(0xFFFFU, INTTY)); 2784 // uint zl = __clz_16bit(xa16) 2785 SDValue zl = genCLZuN(xa16, DAG, 16); 2786 // uint cmp = zh == 16U 2787 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2788 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2789 zh, DAG.getConstant(16U, INTTY)); 2790 // uint zl16 = zl + 16 2791 SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY, 2792 DAG.getConstant(16, INTTY), zl); 2793 // return cmp ? zl16 : zh 2794 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2795 cmp, zl16, zh); 2796 } else { 2797 assert(0 && "Attempting to generate a CLZ function with an" 2798 " unknown graphics card"); 2799 } 2800 return DST; 2801} 2802SDValue 2803AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const 2804{ 2805 SDValue DST = SDValue(); 2806 DebugLoc DL = Op.getDebugLoc(); 2807 EVT INTTY; 2808 EVT LONGTY = Op.getValueType(); 2809 bool isVec = LONGTY.isVector(); 2810 if (isVec) { 2811 INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType() 2812 .getVectorNumElements())); 2813 } else { 2814 INTTY = EVT(MVT::i32); 2815 } 2816 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2817 &this->getTargetMachine())->getSubtargetImpl(); 2818 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 2819 // Evergreen: 2820 // static inline uint 2821 // __clz_u64(ulong x) 2822 // { 2823 //uint zhi = __clz_32bit((uint)(x >> 32)); 2824 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL)); 2825 //return zhi == 32U ? 32U + zlo : zhi; 2826 //} 2827 //ulong x = op 2828 SDValue x = Op; 2829 // uint xhi = x >> 32 2830 SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2831 // uint xlo = x & 0xFFFFFFFF 2832 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x); 2833 // uint zhi = __clz_32bit(xhi) 2834 SDValue zhi = genCLZu32(xhi, DAG); 2835 // uint zlo = __clz_32bit(xlo) 2836 SDValue zlo = genCLZu32(xlo, DAG); 2837 // uint cmp = zhi == 32 2838 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2839 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2840 zhi, DAG.getConstant(32U, INTTY)); 2841 // uint zlop32 = 32 + zlo 2842 SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY, 2843 DAG.getConstant(32U, INTTY), zlo); 2844 // return cmp ? zlop32: zhi 2845 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi); 2846 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2847 // HD4XXX: 2848 // static inline uint 2849 //__clz_64bit(ulong x) 2850 //{ 2851 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U; 2852 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU); 2853 //uint zl = __clz_23bit((uint)x & 0x7fffffU); 2854 //uint r = zh == 18U ? 18U + zm : zh; 2855 //return zh + zm == 41U ? 41U + zl : r; 2856 //} 2857 //ulong x = Op 2858 SDValue x = Op; 2859 // ulong xs46 = x >> 46 2860 SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2861 DAG.getConstant(46, LONGTY)); 2862 // uint ixs46 = (uint)xs46 2863 SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46); 2864 // ulong xs23 = x >> 23 2865 SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2866 DAG.getConstant(23, LONGTY)); 2867 // uint ixs23 = (uint)xs23 2868 SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23); 2869 // uint xs23m23 = ixs23 & 0x7FFFFF 2870 SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23, 2871 DAG.getConstant(0x7fffffU, INTTY)); 2872 // uint ix = (uint)x 2873 SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2874 // uint xm23 = ix & 0x7FFFFF 2875 SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix, 2876 DAG.getConstant(0x7fffffU, INTTY)); 2877 // uint zh = __clz_23bit(ixs46) 2878 SDValue zh = genCLZuN(ixs46, DAG, 23); 2879 // uint zm = __clz_23bit(xs23m23) 2880 SDValue zm = genCLZuN(xs23m23, DAG, 23); 2881 // uint zl = __clz_23bit(xm23) 2882 SDValue zl = genCLZuN(xm23, DAG, 23); 2883 // uint zhm5 = zh - 5 2884 SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh, 2885 DAG.getConstant(-5U, INTTY)); 2886 SDValue const18 = DAG.getConstant(18, INTTY); 2887 SDValue const41 = DAG.getConstant(41, INTTY); 2888 // uint cmp1 = zh = 18 2889 SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2890 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2891 zhm5, const18); 2892 // uint zhm5zm = zhm5 + zh 2893 SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm); 2894 // uint cmp2 = zhm5zm == 41 2895 SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2896 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2897 zhm5zm, const41); 2898 // uint zmp18 = zhm5 + 18 2899 SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18); 2900 // uint zlp41 = zl + 41 2901 SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41); 2902 // uint r = cmp1 ? zmp18 : zh 2903 SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2904 cmp1, zmp18, zhm5); 2905 // return cmp2 ? zlp41 : r 2906 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r); 2907 } else { 2908 assert(0 && "Attempting to generate a CLZ function with an" 2909 " unknown graphics card"); 2910 } 2911 return DST; 2912} 2913SDValue 2914AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG, 2915 bool includeSign) const 2916{ 2917 EVT INTVT; 2918 EVT LONGVT; 2919 SDValue DST; 2920 DebugLoc DL = RHS.getDebugLoc(); 2921 EVT RHSVT = RHS.getValueType(); 2922 bool isVec = RHSVT.isVector(); 2923 if (isVec) { 2924 LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT 2925 .getVectorNumElements())); 2926 INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT 2927 .getVectorNumElements())); 2928 } else { 2929 LONGVT = EVT(MVT::i64); 2930 INTVT = EVT(MVT::i32); 2931 } 2932 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2933 &this->getTargetMachine())->getSubtargetImpl(); 2934 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2935 // unsigned version: 2936 // uint uhi = (uint)(d * 0x1.0p-32); 2937 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d)); 2938 // return as_ulong2((uint2)(ulo, uhi)); 2939 // 2940 // signed version: 2941 // double ad = fabs(d); 2942 // long l = unsigned_version(ad); 2943 // long nl = -l; 2944 // return d == ad ? l : nl; 2945 SDValue d = RHS; 2946 if (includeSign) { 2947 d = DAG.getNode(ISD::FABS, DL, RHSVT, d); 2948 } 2949 SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d, 2950 DAG.getConstantFP(0x2f800000, RHSVT)); 2951 SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid); 2952 SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi); 2953 ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod, 2954 DAG.getConstantFP(0xcf800000, RHSVT), d); 2955 SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod); 2956 SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi); 2957 if (includeSign) { 2958 SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l); 2959 SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT, 2960 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32), 2961 RHS, d); 2962 l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl); 2963 } 2964 DST = l; 2965 } else { 2966 /* 2967 __attribute__((always_inline)) long 2968 cast_f64_to_i64(double d) 2969 { 2970 // Convert d in to 32-bit components 2971 long x = as_long(d); 2972 xhi = LCOMPHI(x); 2973 xlo = LCOMPLO(x); 2974 2975 // Generate 'normalized' mantissa 2976 mhi = xhi | 0x00100000; // hidden bit 2977 mhi <<= 11; 2978 temp = xlo >> (32 - 11); 2979 mhi |= temp 2980 mlo = xlo << 11; 2981 2982 // Compute shift right count from exponent 2983 e = (xhi >> (52-32)) & 0x7ff; 2984 sr = 1023 + 63 - e; 2985 srge64 = sr >= 64; 2986 srge32 = sr >= 32; 2987 2988 // Compute result for 0 <= sr < 32 2989 rhi0 = mhi >> (sr &31); 2990 rlo0 = mlo >> (sr &31); 2991 temp = mhi << (32 - sr); 2992 temp |= rlo0; 2993 rlo0 = sr ? temp : rlo0; 2994 2995 // Compute result for 32 <= sr 2996 rhi1 = 0; 2997 rlo1 = srge64 ? 0 : rhi0; 2998 2999 // Pick between the 2 results 3000 rhi = srge32 ? rhi1 : rhi0; 3001 rlo = srge32 ? rlo1 : rlo0; 3002 3003 // Optional saturate on overflow 3004 srlt0 = sr < 0; 3005 rhi = srlt0 ? MAXVALUE : rhi; 3006 rlo = srlt0 ? MAXVALUE : rlo; 3007 3008 // Create long 3009 res = LCREATE( rlo, rhi ); 3010 3011 // Deal with sign bit (ignoring whether result is signed or unsigned value) 3012 if (includeSign) { 3013 sign = ((signed int) xhi) >> 31; fill with sign bit 3014 sign = LCREATE( sign, sign ); 3015 res += sign; 3016 res ^= sign; 3017 } 3018 3019 return res; 3020 } 3021 */ 3022 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 3023 SDValue c32 = DAG.getConstant( 32, INTVT ); 3024 3025 // Convert d in to 32-bit components 3026 SDValue d = RHS; 3027 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 3028 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 3029 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 3030 3031 // Generate 'normalized' mantissa 3032 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 3033 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 3034 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 3035 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 3036 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 3037 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 3038 SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 ); 3039 3040 // Compute shift right count from exponent 3041 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 3042 xhi, DAG.getConstant( 52-32, INTVT ) ); 3043 e = DAG.getNode( ISD::AND, DL, INTVT, 3044 e, DAG.getConstant( 0x7ff, INTVT ) ); 3045 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 3046 DAG.getConstant( 1023 + 63, INTVT ), e ); 3047 SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3048 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 3049 sr, DAG.getConstant(64, INTVT)); 3050 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3051 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 3052 sr, DAG.getConstant(32, INTVT)); 3053 3054 // Compute result for 0 <= sr < 32 3055 SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 3056 SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr ); 3057 temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr ); 3058 temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp ); 3059 temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp ); 3060 rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 ); 3061 3062 // Compute result for 32 <= sr 3063 SDValue rhi1 = DAG.getConstant( 0, INTVT ); 3064 SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3065 srge64, rhi1, rhi0 ); 3066 3067 // Pick between the 2 results 3068 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3069 srge32, rhi1, rhi0 ); 3070 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3071 srge32, rlo1, rlo0 ); 3072 3073 // Create long 3074 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 3075 3076 // Deal with sign bit 3077 if (includeSign) { 3078 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 3079 xhi, DAG.getConstant( 31, INTVT ) ); 3080 sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign ); 3081 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign ); 3082 res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign ); 3083 } 3084 DST = res; 3085 } 3086 return DST; 3087} 3088SDValue 3089AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG, 3090 bool includeSign) const 3091{ 3092 EVT INTVT; 3093 EVT LONGVT; 3094 DebugLoc DL = RHS.getDebugLoc(); 3095 EVT RHSVT = RHS.getValueType(); 3096 bool isVec = RHSVT.isVector(); 3097 if (isVec) { 3098 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 3099 RHSVT.getVectorNumElements())); 3100 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3101 RHSVT.getVectorNumElements())); 3102 } else { 3103 LONGVT = EVT(MVT::i64); 3104 INTVT = EVT(MVT::i32); 3105 } 3106 /* 3107 __attribute__((always_inline)) int 3108 cast_f64_to_[u|i]32(double d) 3109 { 3110 // Convert d in to 32-bit components 3111 long x = as_long(d); 3112 xhi = LCOMPHI(x); 3113 xlo = LCOMPLO(x); 3114 3115 // Generate 'normalized' mantissa 3116 mhi = xhi | 0x00100000; // hidden bit 3117 mhi <<= 11; 3118 temp = xlo >> (32 - 11); 3119 mhi |= temp 3120 3121 // Compute shift right count from exponent 3122 e = (xhi >> (52-32)) & 0x7ff; 3123 sr = 1023 + 31 - e; 3124 srge32 = sr >= 32; 3125 3126 // Compute result for 0 <= sr < 32 3127 res = mhi >> (sr &31); 3128 res = srge32 ? 0 : res; 3129 3130 // Optional saturate on overflow 3131 srlt0 = sr < 0; 3132 res = srlt0 ? MAXVALUE : res; 3133 3134 // Deal with sign bit (ignoring whether result is signed or unsigned value) 3135 if (includeSign) { 3136 sign = ((signed int) xhi) >> 31; fill with sign bit 3137 res += sign; 3138 res ^= sign; 3139 } 3140 3141 return res; 3142 } 3143 */ 3144 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 3145 3146 // Convert d in to 32-bit components 3147 SDValue d = RHS; 3148 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 3149 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 3150 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 3151 3152 // Generate 'normalized' mantissa 3153 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 3154 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 3155 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 3156 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 3157 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 3158 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 3159 3160 // Compute shift right count from exponent 3161 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 3162 xhi, DAG.getConstant( 52-32, INTVT ) ); 3163 e = DAG.getNode( ISD::AND, DL, INTVT, 3164 e, DAG.getConstant( 0x7ff, INTVT ) ); 3165 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 3166 DAG.getConstant( 1023 + 31, INTVT ), e ); 3167 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3168 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 3169 sr, DAG.getConstant(32, INTVT)); 3170 3171 // Compute result for 0 <= sr < 32 3172 SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 3173 res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3174 srge32, DAG.getConstant(0,INTVT), res ); 3175 3176 // Deal with sign bit 3177 if (includeSign) { 3178 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 3179 xhi, DAG.getConstant( 31, INTVT ) ); 3180 res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign ); 3181 res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign ); 3182 } 3183 return res; 3184} 3185SDValue 3186AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const 3187{ 3188 SDValue RHS = Op.getOperand(0); 3189 EVT RHSVT = RHS.getValueType(); 3190 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3191 EVT LHSVT = Op.getValueType(); 3192 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3193 DebugLoc DL = Op.getDebugLoc(); 3194 SDValue DST; 3195 const AMDILTargetMachine* 3196 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3197 (&this->getTargetMachine()); 3198 const AMDILSubtarget* 3199 stm = dynamic_cast<const AMDILSubtarget*>( 3200 amdtm->getSubtargetImpl()); 3201 if (RST == MVT::f64 && RHSVT.isVector() 3202 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3203 // We dont support vector 64bit floating point convertions. 3204 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { 3205 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3206 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3207 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); 3208 if (!x) { 3209 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3210 } else { 3211 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, 3212 DST, op, DAG.getTargetConstant(x, MVT::i32)); 3213 } 3214 } 3215 } else { 3216 if (RST == MVT::f64 3217 && LST == MVT::i32) { 3218 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3219 DST = SDValue(Op.getNode(), 0); 3220 } else { 3221 DST = genf64toi32(RHS, DAG, true); 3222 } 3223 } else if (RST == MVT::f64 3224 && LST == MVT::i64) { 3225 DST = genf64toi64(RHS, DAG, true); 3226 } else if (RST == MVT::f64 3227 && (LST == MVT::i8 || LST == MVT::i16)) { 3228 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3229 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); 3230 } else { 3231 SDValue ToInt = genf64toi32(RHS, DAG, true); 3232 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); 3233 } 3234 3235 } else { 3236 DST = SDValue(Op.getNode(), 0); 3237 } 3238 } 3239 return DST; 3240} 3241 3242SDValue 3243AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const 3244{ 3245 SDValue DST; 3246 SDValue RHS = Op.getOperand(0); 3247 EVT RHSVT = RHS.getValueType(); 3248 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3249 EVT LHSVT = Op.getValueType(); 3250 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3251 DebugLoc DL = Op.getDebugLoc(); 3252 const AMDILTargetMachine* 3253 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3254 (&this->getTargetMachine()); 3255 const AMDILSubtarget* 3256 stm = dynamic_cast<const AMDILSubtarget*>( 3257 amdtm->getSubtargetImpl()); 3258 if (RST == MVT::f64 && RHSVT.isVector() 3259 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3260 // We dont support vector 64bit floating point convertions. 3261 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { 3262 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3263 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3264 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); 3265 if (!x) { 3266 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3267 } else { 3268 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, 3269 DST, op, DAG.getTargetConstant(x, MVT::i32)); 3270 } 3271 3272 } 3273 } else { 3274 if (RST == MVT::f64 3275 && LST == MVT::i32) { 3276 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3277 DST = SDValue(Op.getNode(), 0); 3278 } else { 3279 DST = genf64toi32(RHS, DAG, false); 3280 } 3281 } else if (RST == MVT::f64 3282 && LST == MVT::i64) { 3283 DST = genf64toi64(RHS, DAG, false); 3284 } else if (RST == MVT::f64 3285 && (LST == MVT::i8 || LST == MVT::i16)) { 3286 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3287 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); 3288 } else { 3289 SDValue ToInt = genf64toi32(RHS, DAG, false); 3290 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); 3291 } 3292 3293 } else { 3294 DST = SDValue(Op.getNode(), 0); 3295 } 3296 } 3297 return DST; 3298} 3299SDValue 3300AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT, 3301 SelectionDAG &DAG) const 3302{ 3303 EVT RHSVT = RHS.getValueType(); 3304 DebugLoc DL = RHS.getDebugLoc(); 3305 EVT INTVT; 3306 EVT LONGVT; 3307 bool isVec = RHSVT.isVector(); 3308 if (isVec) { 3309 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 3310 RHSVT.getVectorNumElements())); 3311 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3312 RHSVT.getVectorNumElements())); 3313 } else { 3314 LONGVT = EVT(MVT::i64); 3315 INTVT = EVT(MVT::i32); 3316 } 3317 SDValue x = RHS; 3318 const AMDILTargetMachine* 3319 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3320 (&this->getTargetMachine()); 3321 const AMDILSubtarget* 3322 stm = dynamic_cast<const AMDILSubtarget*>( 3323 amdtm->getSubtargetImpl()); 3324 if (stm->calVersion() >= CAL_VERSION_SC_135) { 3325 // unsigned x = RHS; 3326 // ulong xd = (ulong)(0x4330_0000 << 32) | x; 3327 // double d = as_double( xd ); 3328 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000 3329 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x, 3330 DAG.getConstant( 0x43300000, INTVT ) ); 3331 SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 3332 SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT, 3333 DAG.getConstant( 0x4330000000000000ULL, LONGVT ) ); 3334 return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd ); 3335 } else { 3336 SDValue clz = genCLZu32(x, DAG); 3337 3338 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2 3339 // Except for an input 0... which requires a 0 exponent 3340 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 3341 DAG.getConstant( (1023+31), INTVT), clz ); 3342 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x ); 3343 3344 // Normalize frac 3345 SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz ); 3346 3347 // Eliminate hidden bit 3348 rhi = DAG.getNode( ISD::AND, DL, INTVT, 3349 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 3350 3351 // Pack exponent and frac 3352 SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT, 3353 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 3354 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 3355 rhi, DAG.getConstant( 11, INTVT ) ); 3356 exp = DAG.getNode( ISD::SHL, DL, INTVT, 3357 exp, DAG.getConstant( 20, INTVT ) ); 3358 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 3359 3360 // Convert 2 x 32 in to 1 x 64, then to double precision float type 3361 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 3362 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 3363 } 3364} 3365SDValue 3366AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT, 3367 SelectionDAG &DAG) const 3368{ 3369 EVT RHSVT = RHS.getValueType(); 3370 DebugLoc DL = RHS.getDebugLoc(); 3371 EVT INTVT; 3372 EVT LONGVT; 3373 bool isVec = RHSVT.isVector(); 3374 if (isVec) { 3375 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3376 RHSVT.getVectorNumElements())); 3377 } else { 3378 INTVT = EVT(MVT::i32); 3379 } 3380 LONGVT = RHSVT; 3381 SDValue x = RHS; 3382 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 3383 &this->getTargetMachine())->getSubtargetImpl(); 3384 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3385 // double dhi = (double)(as_uint2(x).y); 3386 // double dlo = (double)(as_uint2(x).x); 3387 // return mad(dhi, 0x1.0p+32, dlo) 3388 SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x); 3389 dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi); 3390 SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x); 3391 dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo); 3392 return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi, 3393 DAG.getConstantFP(0x4f800000, LHSVT), dlo); 3394 } else if (stm->calVersion() >= CAL_VERSION_SC_135) { 3395 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL)); 3396 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32)); 3397 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo; 3398 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL 3399 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) ); 3400 SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 3401 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32 3402 SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) ); 3403 SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe ); 3404 SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT, 3405 DAG.getConstant( 0x4530000000100000ULL, LONGVT ) ); 3406 hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c ); 3407 return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo ); 3408 3409 } else { 3410 SDValue clz = genCLZu64(x, DAG); 3411 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 3412 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 3413 3414 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2 3415 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 3416 DAG.getConstant( (1023+63), INTVT), clz ); 3417 SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo ); 3418 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3419 mash, exp, mash ); // exp = exp, or 0 if input was 0 3420 3421 // Normalize frac 3422 SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT, 3423 clz, DAG.getConstant( 31, INTVT ) ); 3424 SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT, 3425 DAG.getConstant( 32, INTVT ), clz31 ); 3426 SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 ); 3427 SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift ); 3428 t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 ); 3429 SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 ); 3430 SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 3431 SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 3432 SDValue rlo2 = DAG.getConstant( 0, INTVT ); 3433 SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT, 3434 clz, DAG.getConstant( 32, INTVT ) ); 3435 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3436 clz32, rhi2, rhi1 ); 3437 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3438 clz32, rlo2, rlo1 ); 3439 3440 // Eliminate hidden bit 3441 rhi = DAG.getNode( ISD::AND, DL, INTVT, 3442 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 3443 3444 // Save bits needed to round properly 3445 SDValue round = DAG.getNode( ISD::AND, DL, INTVT, 3446 rlo, DAG.getConstant( 0x7ff, INTVT ) ); 3447 3448 // Pack exponent and frac 3449 rlo = DAG.getNode( ISD::SRL, DL, INTVT, 3450 rlo, DAG.getConstant( 11, INTVT ) ); 3451 SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT, 3452 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 3453 rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp ); 3454 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 3455 rhi, DAG.getConstant( 11, INTVT ) ); 3456 exp = DAG.getNode( ISD::SHL, DL, INTVT, 3457 exp, DAG.getConstant( 20, INTVT ) ); 3458 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 3459 3460 // Compute rounding bit 3461 SDValue even = DAG.getNode( ISD::AND, DL, INTVT, 3462 rlo, DAG.getConstant( 1, INTVT ) ); 3463 SDValue grs = DAG.getNode( ISD::AND, DL, INTVT, 3464 round, DAG.getConstant( 0x3ff, INTVT ) ); 3465 grs = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3466 DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32), 3467 grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none 3468 grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even ); 3469 round = DAG.getNode( ISD::SRL, DL, INTVT, 3470 round, DAG.getConstant( 10, INTVT ) ); 3471 round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1 3472 3473 // Add rounding bit 3474 SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, 3475 round, DAG.getConstant( 0, INTVT ) ); 3476 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 3477 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround ); 3478 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 3479 } 3480} 3481SDValue 3482AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const 3483{ 3484 SDValue RHS = Op.getOperand(0); 3485 EVT RHSVT = RHS.getValueType(); 3486 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3487 EVT LHSVT = Op.getValueType(); 3488 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3489 DebugLoc DL = Op.getDebugLoc(); 3490 SDValue DST; 3491 EVT INTVT; 3492 EVT LONGVT; 3493 const AMDILTargetMachine* 3494 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3495 (&this->getTargetMachine()); 3496 const AMDILSubtarget* 3497 stm = dynamic_cast<const AMDILSubtarget*>( 3498 amdtm->getSubtargetImpl()); 3499 if (LST == MVT::f64 && LHSVT.isVector() 3500 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3501 // We dont support vector 64bit floating point convertions. 3502 DST = Op; 3503 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { 3504 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3505 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3506 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); 3507 if (!x) { 3508 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3509 } else { 3510 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, 3511 op, DAG.getTargetConstant(x, MVT::i32)); 3512 } 3513 3514 } 3515 } else { 3516 3517 if (RST == MVT::i32 3518 && LST == MVT::f64) { 3519 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3520 DST = SDValue(Op.getNode(), 0); 3521 } else { 3522 DST = genu32tof64(RHS, LHSVT, DAG); 3523 } 3524 } else if (RST == MVT::i64 3525 && LST == MVT::f64) { 3526 DST = genu64tof64(RHS, LHSVT, DAG); 3527 } else { 3528 DST = SDValue(Op.getNode(), 0); 3529 } 3530 } 3531 return DST; 3532} 3533 3534SDValue 3535AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const 3536{ 3537 SDValue RHS = Op.getOperand(0); 3538 EVT RHSVT = RHS.getValueType(); 3539 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3540 EVT INTVT; 3541 EVT LONGVT; 3542 SDValue DST; 3543 bool isVec = RHSVT.isVector(); 3544 DebugLoc DL = Op.getDebugLoc(); 3545 EVT LHSVT = Op.getValueType(); 3546 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3547 const AMDILTargetMachine* 3548 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3549 (&this->getTargetMachine()); 3550 const AMDILSubtarget* 3551 stm = dynamic_cast<const AMDILSubtarget*>( 3552 amdtm->getSubtargetImpl()); 3553 if (LST == MVT::f64 && LHSVT.isVector() 3554 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3555 // We dont support vector 64bit floating point convertions. 3556 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { 3557 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3558 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3559 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); 3560 if (!x) { 3561 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3562 } else { 3563 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, 3564 op, DAG.getTargetConstant(x, MVT::i32)); 3565 } 3566 3567 } 3568 } else { 3569 3570 if (isVec) { 3571 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 3572 RHSVT.getVectorNumElements())); 3573 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3574 RHSVT.getVectorNumElements())); 3575 } else { 3576 LONGVT = EVT(MVT::i64); 3577 INTVT = EVT(MVT::i32); 3578 } 3579 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3580 if ((RST == MVT::i32 || RST == MVT::i64) 3581 && LST == MVT::f64) { 3582 if (RST == MVT::i32) { 3583 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3584 DST = SDValue(Op.getNode(), 0); 3585 return DST; 3586 } 3587 } 3588 SDValue c31 = DAG.getConstant( 31, INTVT ); 3589 SDValue cSbit = DAG.getConstant( 0x80000000, INTVT ); 3590 3591 SDValue S; // Sign, as 0 or -1 3592 SDValue Sbit; // Sign bit, as one bit, MSB only. 3593 if (RST == MVT::i32) { 3594 Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit ); 3595 S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 ); 3596 } else { // 64-bit case... SRA of 64-bit values is slow 3597 SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS ); 3598 Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit ); 3599 SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 ); 3600 S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp ); 3601 } 3602 3603 // get abs() of input value, given sign as S (0 or -1) 3604 // SpI = RHS + S 3605 SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S); 3606 // SpIxS = SpI ^ S 3607 SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S); 3608 3609 // Convert unsigned value to double precision 3610 SDValue R; 3611 if (RST == MVT::i32) { 3612 // r = cast_u32_to_f64(SpIxS) 3613 R = genu32tof64(SpIxS, LHSVT, DAG); 3614 } else { 3615 // r = cast_u64_to_f64(SpIxS) 3616 R = genu64tof64(SpIxS, LHSVT, DAG); 3617 } 3618 3619 // drop in the sign bit 3620 SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R ); 3621 SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t ); 3622 SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t ); 3623 thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit ); 3624 t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi ); 3625 DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t ); 3626 } else { 3627 DST = SDValue(Op.getNode(), 0); 3628 } 3629 } 3630 return DST; 3631} 3632SDValue 3633AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const 3634{ 3635 SDValue LHS = Op.getOperand(0); 3636 SDValue RHS = Op.getOperand(1); 3637 DebugLoc DL = Op.getDebugLoc(); 3638 EVT OVT = Op.getValueType(); 3639 SDValue DST; 3640 bool isVec = RHS.getValueType().isVector(); 3641 if (OVT.getScalarType() == MVT::i64) { 3642 /*const AMDILTargetMachine* 3643 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3644 (&this->getTargetMachine()); 3645 const AMDILSubtarget* 3646 stm = dynamic_cast<const AMDILSubtarget*>( 3647 amdtm->getSubtargetImpl());*/ 3648 MVT INTTY = MVT::i32; 3649 if (OVT == MVT::v2i64) { 3650 INTTY = MVT::v2i32; 3651 } 3652 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; 3653 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 3654 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); 3655 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); 3656 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); 3657 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); 3658 INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO); 3659 INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI); 3660 //TODO: need to use IBORROW on HD5XXX and later hardware 3661 SDValue cmp; 3662 if (OVT == MVT::i64) { 3663 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 3664 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 3665 LHSLO, RHSLO); 3666 } else { 3667 SDValue cmplo; 3668 SDValue cmphi; 3669 SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3670 DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32)); 3671 SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3672 DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32)); 3673 SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3674 DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32)); 3675 SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3676 DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32)); 3677 cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 3678 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 3679 LHSRLO, RHSRLO); 3680 cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 3681 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 3682 LHSRHI, RHSRHI); 3683 cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo); 3684 cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32, 3685 cmp, cmphi, DAG.getTargetConstant(1, MVT::i32)); 3686 } 3687 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); 3688 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, 3689 INTLO, INTHI); 3690 } else { 3691 DST = SDValue(Op.getNode(), 0); 3692 } 3693 return DST; 3694} 3695SDValue 3696AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const 3697{ 3698 EVT OVT = Op.getValueType(); 3699 SDValue DST; 3700 if (OVT.getScalarType() == MVT::f64) { 3701 DST = LowerFDIV64(Op, DAG); 3702 } else if (OVT.getScalarType() == MVT::f32) { 3703 DST = LowerFDIV32(Op, DAG); 3704 } else { 3705 DST = SDValue(Op.getNode(), 0); 3706 } 3707 return DST; 3708} 3709 3710SDValue 3711AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const 3712{ 3713 EVT OVT = Op.getValueType(); 3714 SDValue DST; 3715 if (OVT.getScalarType() == MVT::i64) { 3716 DST = LowerSDIV64(Op, DAG); 3717 } else if (OVT.getScalarType() == MVT::i32) { 3718 DST = LowerSDIV32(Op, DAG); 3719 } else if (OVT.getScalarType() == MVT::i16 3720 || OVT.getScalarType() == MVT::i8) { 3721 DST = LowerSDIV24(Op, DAG); 3722 } else { 3723 DST = SDValue(Op.getNode(), 0); 3724 } 3725 return DST; 3726} 3727 3728SDValue 3729AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const 3730{ 3731 EVT OVT = Op.getValueType(); 3732 SDValue DST; 3733 if (OVT.getScalarType() == MVT::i64) { 3734 DST = LowerUDIV64(Op, DAG); 3735 } else if (OVT.getScalarType() == MVT::i32) { 3736 DST = LowerUDIV32(Op, DAG); 3737 } else if (OVT.getScalarType() == MVT::i16 3738 || OVT.getScalarType() == MVT::i8) { 3739 DST = LowerUDIV24(Op, DAG); 3740 } else { 3741 DST = SDValue(Op.getNode(), 0); 3742 } 3743 return DST; 3744} 3745 3746SDValue 3747AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const 3748{ 3749 EVT OVT = Op.getValueType(); 3750 SDValue DST; 3751 if (OVT.getScalarType() == MVT::i64) { 3752 DST = LowerSREM64(Op, DAG); 3753 } else if (OVT.getScalarType() == MVT::i32) { 3754 DST = LowerSREM32(Op, DAG); 3755 } else if (OVT.getScalarType() == MVT::i16) { 3756 DST = LowerSREM16(Op, DAG); 3757 } else if (OVT.getScalarType() == MVT::i8) { 3758 DST = LowerSREM8(Op, DAG); 3759 } else { 3760 DST = SDValue(Op.getNode(), 0); 3761 } 3762 return DST; 3763} 3764 3765SDValue 3766AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const 3767{ 3768 EVT OVT = Op.getValueType(); 3769 SDValue DST; 3770 if (OVT.getScalarType() == MVT::i64) { 3771 DST = LowerUREM64(Op, DAG); 3772 } else if (OVT.getScalarType() == MVT::i32) { 3773 DST = LowerUREM32(Op, DAG); 3774 } else if (OVT.getScalarType() == MVT::i16) { 3775 DST = LowerUREM16(Op, DAG); 3776 } else if (OVT.getScalarType() == MVT::i8) { 3777 DST = LowerUREM8(Op, DAG); 3778 } else { 3779 DST = SDValue(Op.getNode(), 0); 3780 } 3781 return DST; 3782} 3783 3784SDValue 3785AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const 3786{ 3787 DebugLoc DL = Op.getDebugLoc(); 3788 EVT OVT = Op.getValueType(); 3789 SDValue DST; 3790 bool isVec = OVT.isVector(); 3791 if (OVT.getScalarType() != MVT::i64) 3792 { 3793 DST = SDValue(Op.getNode(), 0); 3794 } else { 3795 assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!"); 3796 // TODO: This needs to be turned into a tablegen pattern 3797 SDValue LHS = Op.getOperand(0); 3798 SDValue RHS = Op.getOperand(1); 3799 3800 MVT INTTY = MVT::i32; 3801 if (OVT == MVT::v2i64) { 3802 INTTY = MVT::v2i32; 3803 } 3804 // mul64(h1, l1, h0, l0) 3805 SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 3806 DL, 3807 INTTY, LHS); 3808 SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 3809 DL, 3810 INTTY, LHS); 3811 SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 3812 DL, 3813 INTTY, RHS); 3814 SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 3815 DL, 3816 INTTY, RHS); 3817 // MULLO_UINT_1 r1, h0, l1 3818 SDValue RHILLO = DAG.getNode(AMDILISD::UMUL, 3819 DL, 3820 INTTY, RHSHI, LHSLO); 3821 // MULLO_UINT_1 r2, h1, l0 3822 SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL, 3823 DL, 3824 INTTY, RHSLO, LHSHI); 3825 // ADD_INT hr, r1, r2 3826 SDValue ADDHI = DAG.getNode(ISD::ADD, 3827 DL, 3828 INTTY, RHILLO, RLOHHI); 3829 // MULHI_UINT_1 r3, l1, l0 3830 SDValue RLOLLO = DAG.getNode(ISD::MULHU, 3831 DL, 3832 INTTY, RHSLO, LHSLO); 3833 // ADD_INT hr, hr, r3 3834 SDValue HIGH = DAG.getNode(ISD::ADD, 3835 DL, 3836 INTTY, ADDHI, RLOLLO); 3837 // MULLO_UINT_1 l3, l1, l0 3838 SDValue LOW = DAG.getNode(AMDILISD::UMUL, 3839 DL, 3840 INTTY, LHSLO, RHSLO); 3841 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, 3842 DL, 3843 OVT, LOW, HIGH); 3844 } 3845 return DST; 3846} 3847SDValue 3848AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const 3849{ 3850 EVT VT = Op.getValueType(); 3851 //printSDValue(Op, 1); 3852 SDValue Nodes1; 3853 SDValue second; 3854 SDValue third; 3855 SDValue fourth; 3856 DebugLoc DL = Op.getDebugLoc(); 3857 Nodes1 = DAG.getNode(AMDILISD::VBUILD, 3858 DL, 3859 VT, Op.getOperand(0)); 3860 bool allEqual = true; 3861 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { 3862 if (Op.getOperand(0) != Op.getOperand(x)) { 3863 allEqual = false; 3864 break; 3865 } 3866 } 3867 if (allEqual) { 3868 return Nodes1; 3869 } 3870 switch(Op.getNumOperands()) { 3871 default: 3872 case 1: 3873 break; 3874 case 4: 3875 fourth = Op.getOperand(3); 3876 if (fourth.getOpcode() != ISD::UNDEF) { 3877 Nodes1 = DAG.getNode( 3878 ISD::INSERT_VECTOR_ELT, 3879 DL, 3880 Op.getValueType(), 3881 Nodes1, 3882 fourth, 3883 DAG.getConstant(7, MVT::i32)); 3884 } 3885 case 3: 3886 third = Op.getOperand(2); 3887 if (third.getOpcode() != ISD::UNDEF) { 3888 Nodes1 = DAG.getNode( 3889 ISD::INSERT_VECTOR_ELT, 3890 DL, 3891 Op.getValueType(), 3892 Nodes1, 3893 third, 3894 DAG.getConstant(6, MVT::i32)); 3895 } 3896 case 2: 3897 second = Op.getOperand(1); 3898 if (second.getOpcode() != ISD::UNDEF) { 3899 Nodes1 = DAG.getNode( 3900 ISD::INSERT_VECTOR_ELT, 3901 DL, 3902 Op.getValueType(), 3903 Nodes1, 3904 second, 3905 DAG.getConstant(5, MVT::i32)); 3906 } 3907 break; 3908 }; 3909 return Nodes1; 3910} 3911 3912SDValue 3913AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 3914 SelectionDAG &DAG) const 3915{ 3916 DebugLoc DL = Op.getDebugLoc(); 3917 EVT VT = Op.getValueType(); 3918 const SDValue *ptr = NULL; 3919 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3920 uint32_t swizzleNum = 0; 3921 SDValue DST; 3922 if (!VT.isVector()) { 3923 SDValue Res = Op.getOperand(0); 3924 return Res; 3925 } 3926 3927 if (Op.getOperand(1).getOpcode() != ISD::UNDEF) { 3928 ptr = &Op.getOperand(1); 3929 } else { 3930 ptr = &Op.getOperand(0); 3931 } 3932 if (CSDN) { 3933 swizzleNum = (uint32_t)CSDN->getZExtValue(); 3934 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 3935 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 3936 DST = DAG.getNode(AMDILISD::VINSERT, 3937 DL, 3938 VT, 3939 Op.getOperand(0), 3940 *ptr, 3941 DAG.getTargetConstant(mask2, MVT::i32), 3942 DAG.getTargetConstant(mask3, MVT::i32)); 3943 } else { 3944 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 3945 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 3946 SDValue res = DAG.getNode(AMDILISD::VINSERT, 3947 DL, VT, Op.getOperand(0), *ptr, 3948 DAG.getTargetConstant(mask2, MVT::i32), 3949 DAG.getTargetConstant(mask3, MVT::i32)); 3950 for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) { 3951 mask2 = 0x04030201 & ~(0xFF << (x * 8)); 3952 mask3 = 0x01010101 & (0xFF << (x * 8)); 3953 SDValue t = DAG.getNode(AMDILISD::VINSERT, 3954 DL, VT, Op.getOperand(0), *ptr, 3955 DAG.getTargetConstant(mask2, MVT::i32), 3956 DAG.getTargetConstant(mask3, MVT::i32)); 3957 SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(), 3958 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 3959 Op.getOperand(2), DAG.getConstant(x, MVT::i32)); 3960 c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c); 3961 res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res); 3962 } 3963 DST = res; 3964 } 3965 return DST; 3966} 3967 3968SDValue 3969AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 3970 SelectionDAG &DAG) const 3971{ 3972 EVT VT = Op.getValueType(); 3973 //printSDValue(Op, 1); 3974 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 3975 uint64_t swizzleNum = 0; 3976 DebugLoc DL = Op.getDebugLoc(); 3977 SDValue Res; 3978 if (!Op.getOperand(0).getValueType().isVector()) { 3979 Res = Op.getOperand(0); 3980 return Res; 3981 } 3982 if (CSDN) { 3983 // Static vector extraction 3984 swizzleNum = CSDN->getZExtValue() + 1; 3985 Res = DAG.getNode(AMDILISD::VEXTRACT, 3986 DL, VT, 3987 Op.getOperand(0), 3988 DAG.getTargetConstant(swizzleNum, MVT::i32)); 3989 } else { 3990 SDValue Op1 = Op.getOperand(1); 3991 uint32_t vecSize = 4; 3992 SDValue Op0 = Op.getOperand(0); 3993 SDValue res = DAG.getNode(AMDILISD::VEXTRACT, 3994 DL, VT, Op0, 3995 DAG.getTargetConstant(1, MVT::i32)); 3996 if (Op0.getValueType().isVector()) { 3997 vecSize = Op0.getValueType().getVectorNumElements(); 3998 } 3999 for (uint32_t x = 2; x <= vecSize; ++x) { 4000 SDValue t = DAG.getNode(AMDILISD::VEXTRACT, 4001 DL, VT, Op0, 4002 DAG.getTargetConstant(x, MVT::i32)); 4003 SDValue c = DAG.getNode(AMDILISD::CMP, 4004 DL, Op1.getValueType(), 4005 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 4006 Op1, DAG.getConstant(x, MVT::i32)); 4007 res = DAG.getNode(AMDILISD::CMOVLOG, DL, 4008 VT, c, t, res); 4009 4010 } 4011 Res = res; 4012 } 4013 return Res; 4014} 4015 4016SDValue 4017AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 4018 SelectionDAG &DAG) const 4019{ 4020 uint32_t vecSize = Op.getValueType().getVectorNumElements(); 4021 SDValue src = Op.getOperand(0); 4022 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4023 uint64_t offset = 0; 4024 EVT vecType = Op.getValueType().getVectorElementType(); 4025 DebugLoc DL = Op.getDebugLoc(); 4026 SDValue Result; 4027 if (CSDN) { 4028 offset = CSDN->getZExtValue(); 4029 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 4030 DL,vecType, src, DAG.getConstant(offset, MVT::i32)); 4031 Result = DAG.getNode(AMDILISD::VBUILD, DL, 4032 Op.getValueType(), Result); 4033 for (uint32_t x = 1; x < vecSize; ++x) { 4034 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 4035 src, DAG.getConstant(offset + x, MVT::i32)); 4036 if (elt.getOpcode() != ISD::UNDEF) { 4037 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4038 Op.getValueType(), Result, elt, 4039 DAG.getConstant(x, MVT::i32)); 4040 } 4041 } 4042 } else { 4043 SDValue idx = Op.getOperand(1); 4044 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 4045 DL, vecType, src, idx); 4046 Result = DAG.getNode(AMDILISD::VBUILD, DL, 4047 Op.getValueType(), Result); 4048 for (uint32_t x = 1; x < vecSize; ++x) { 4049 idx = DAG.getNode(ISD::ADD, DL, vecType, 4050 idx, DAG.getConstant(1, MVT::i32)); 4051 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 4052 src, idx); 4053 if (elt.getOpcode() != ISD::UNDEF) { 4054 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4055 Op.getValueType(), Result, elt, idx); 4056 } 4057 } 4058 } 4059 return Result; 4060} 4061SDValue 4062AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 4063 SelectionDAG &DAG) const 4064{ 4065 SDValue Res = DAG.getNode(AMDILISD::VBUILD, 4066 Op.getDebugLoc(), 4067 Op.getValueType(), 4068 Op.getOperand(0)); 4069 return Res; 4070} 4071SDValue 4072AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const 4073{ 4074 SDValue andOp; 4075 andOp = DAG.getNode( 4076 AMDILISD::AND, 4077 Op.getDebugLoc(), 4078 Op.getValueType(), 4079 Op.getOperand(0), 4080 Op.getOperand(1)); 4081 return andOp; 4082} 4083SDValue 4084AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const 4085{ 4086 SDValue orOp; 4087 orOp = DAG.getNode(AMDILISD::OR, 4088 Op.getDebugLoc(), 4089 Op.getValueType(), 4090 Op.getOperand(0), 4091 Op.getOperand(1)); 4092 return orOp; 4093} 4094SDValue 4095AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const 4096{ 4097 SDValue Cond = Op.getOperand(0); 4098 SDValue LHS = Op.getOperand(1); 4099 SDValue RHS = Op.getOperand(2); 4100 DebugLoc DL = Op.getDebugLoc(); 4101 Cond = getConversionNode(DAG, Cond, Op, true); 4102 Cond = DAG.getNode(AMDILISD::CMOVLOG, 4103 DL, 4104 Op.getValueType(), Cond, LHS, RHS); 4105 return Cond; 4106} 4107SDValue 4108AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 4109{ 4110 SDValue Cond; 4111 SDValue LHS = Op.getOperand(0); 4112 SDValue RHS = Op.getOperand(1); 4113 SDValue TRUE = Op.getOperand(2); 4114 SDValue FALSE = Op.getOperand(3); 4115 SDValue CC = Op.getOperand(4); 4116 DebugLoc DL = Op.getDebugLoc(); 4117 bool skipCMov = false; 4118 bool genINot = false; 4119 EVT OVT = Op.getValueType(); 4120 4121 // Check for possible elimination of cmov 4122 if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) { 4123 const ConstantSDNode *trueConst 4124 = dyn_cast<ConstantSDNode>( TRUE.getNode() ); 4125 const ConstantSDNode *falseConst 4126 = dyn_cast<ConstantSDNode>( FALSE.getNode() ); 4127 if (trueConst && falseConst) { 4128 // both possible result values are constants 4129 if (trueConst->isAllOnesValue() 4130 && falseConst->isNullValue()) { // and convenient constants 4131 skipCMov = true; 4132 } 4133 else if (trueConst->isNullValue() 4134 && falseConst->isAllOnesValue()) { // less convenient 4135 skipCMov = true; 4136 genINot = true; 4137 } 4138 } 4139 } 4140 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4141 unsigned int AMDILCC = CondCCodeToCC( 4142 SetCCOpcode, 4143 LHS.getValueType().getSimpleVT().SimpleTy); 4144 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 4145 Cond = DAG.getNode( 4146 AMDILISD::CMP, 4147 DL, 4148 LHS.getValueType(), 4149 DAG.getConstant(AMDILCC, MVT::i32), 4150 LHS, 4151 RHS); 4152 Cond = getConversionNode(DAG, Cond, Op, true); 4153 if (genINot) { 4154 Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond); 4155 } 4156 if (!skipCMov) { 4157 Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE); 4158 } 4159 return Cond; 4160} 4161SDValue 4162AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 4163{ 4164 SDValue Cond; 4165 SDValue LHS = Op.getOperand(0); 4166 SDValue RHS = Op.getOperand(1); 4167 SDValue CC = Op.getOperand(2); 4168 DebugLoc DL = Op.getDebugLoc(); 4169 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4170 unsigned int AMDILCC = CondCCodeToCC( 4171 SetCCOpcode, 4172 LHS.getValueType().getSimpleVT().SimpleTy); 4173 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 4174 Cond = DAG.getNode( 4175 AMDILISD::CMP, 4176 DL, 4177 LHS.getValueType(), 4178 DAG.getConstant(AMDILCC, MVT::i32), 4179 LHS, 4180 RHS); 4181 Cond = getConversionNode(DAG, Cond, Op, true); 4182 Cond = DAG.getNode( 4183 ISD::AND, 4184 DL, 4185 Cond.getValueType(), 4186 DAG.getConstant(1, Cond.getValueType()), 4187 Cond); 4188 return Cond; 4189} 4190 4191SDValue 4192AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const 4193{ 4194 SDValue Data = Op.getOperand(0); 4195 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 4196 DebugLoc DL = Op.getDebugLoc(); 4197 EVT DVT = Data.getValueType(); 4198 EVT BVT = BaseType->getVT(); 4199 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 4200 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 4201 unsigned shiftBits = srcBits - baseBits; 4202 if (srcBits < 32) { 4203 // If the op is less than 32 bits, then it needs to extend to 32bits 4204 // so it can properly keep the upper bits valid. 4205 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 4206 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 4207 shiftBits = 32 - baseBits; 4208 DVT = IVT; 4209 } 4210 SDValue Shift = DAG.getConstant(shiftBits, DVT); 4211 // Shift left by 'Shift' bits. 4212 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 4213 // Signed shift Right by 'Shift' bits. 4214 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 4215 if (srcBits < 32) { 4216 // Once the sign extension is done, the op needs to be converted to 4217 // its original type. 4218 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 4219 } 4220 return Data; 4221} 4222EVT 4223AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const 4224{ 4225 int iSize = (size * numEle); 4226 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 4227 if (!vEle) { 4228 vEle = 1; 4229 } 4230 if (size == 64) { 4231 if (vEle == 1) { 4232 return EVT(MVT::i64); 4233 } else { 4234 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 4235 } 4236 } else { 4237 if (vEle == 1) { 4238 return EVT(MVT::i32); 4239 } else { 4240 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 4241 } 4242 } 4243} 4244 4245SDValue 4246AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const 4247{ 4248 SDValue Src = Op.getOperand(0); 4249 SDValue Dst = Op; 4250 SDValue Res; 4251 DebugLoc DL = Op.getDebugLoc(); 4252 EVT SrcVT = Src.getValueType(); 4253 EVT DstVT = Dst.getValueType(); 4254 // Lets bitcast the floating point types to an 4255 // equivalent integer type before converting to vectors. 4256 if (SrcVT.getScalarType().isFloatingPoint()) { 4257 Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType( 4258 SrcVT.getScalarType().getSimpleVT().getSizeInBits(), 4259 SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1), 4260 Src); 4261 SrcVT = Src.getValueType(); 4262 } 4263 uint32_t ScalarSrcSize = SrcVT.getScalarType() 4264 .getSimpleVT().getSizeInBits(); 4265 uint32_t ScalarDstSize = DstVT.getScalarType() 4266 .getSimpleVT().getSizeInBits(); 4267 uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; 4268 uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1; 4269 bool isVec = SrcVT.isVector(); 4270 if (DstVT.getScalarType().isInteger() && 4271 (SrcVT.getScalarType().isInteger() 4272 || SrcVT.getScalarType().isFloatingPoint())) { 4273 if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16) 4274 || (ScalarSrcSize == 64 4275 && DstNumEle == 4 4276 && ScalarDstSize == 16)) { 4277 // This is the problematic case when bitcasting i64 <-> <4 x i16> 4278 // This approach is a little different as we cannot generate a 4279 // <4 x i64> vector 4280 // as that is illegal in our backend and we are already past 4281 // the DAG legalizer. 4282 // So, in this case, we will do the following conversion. 4283 // Case 1: 4284 // %dst = <4 x i16> %src bitconvert i64 ==> 4285 // %tmp = <4 x i16> %src convert <4 x i32> 4286 // %tmp = <4 x i32> %tmp and 0xFFFF 4287 // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16> 4288 // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw 4289 // %dst = <2 x i32> %tmp bitcast i64 4290 // case 2: 4291 // %dst = i64 %src bitconvert <4 x i16> ==> 4292 // %tmp = i64 %src bitcast <2 x i32> 4293 // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy 4294 // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16> 4295 // %tmp = <4 x i32> %tmp and 0xFFFF 4296 // %dst = <4 x i16> %tmp bitcast <4 x i32> 4297 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32, 4298 DAG.getConstant(0xFFFF, MVT::i32)); 4299 SDValue const16 = DAG.getConstant(16, MVT::i32); 4300 if (ScalarDstSize == 64) { 4301 // case 1 4302 Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32); 4303 Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask); 4304 SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4305 Op, DAG.getConstant(0, MVT::i32)); 4306 SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4307 Op, DAG.getConstant(1, MVT::i32)); 4308 y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16); 4309 SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4310 Op, DAG.getConstant(2, MVT::i32)); 4311 SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4312 Op, DAG.getConstant(3, MVT::i32)); 4313 w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16); 4314 x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y); 4315 y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w); 4316 Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y); 4317 return Res; 4318 } else { 4319 // case 2 4320 SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src); 4321 SDValue lor16 4322 = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16); 4323 SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src); 4324 SDValue hir16 4325 = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16); 4326 SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL, 4327 MVT::v4i32, lo); 4328 SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 4329 getPointerTy(), DAG.getConstant(1, MVT::i32)); 4330 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 4331 resVec, lor16, idxVal); 4332 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 4333 getPointerTy(), DAG.getConstant(2, MVT::i32)); 4334 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 4335 resVec, hi, idxVal); 4336 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 4337 getPointerTy(), DAG.getConstant(3, MVT::i32)); 4338 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 4339 resVec, hir16, idxVal); 4340 resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask); 4341 Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16); 4342 return Res; 4343 } 4344 } else { 4345 // There are four cases we need to worry about for bitcasts 4346 // where the size of all 4347 // source, intermediates and result is <= 128 bits, unlike 4348 // the above case 4349 // 1) Sub32bit bitcast 32bitAlign 4350 // %dst = <4 x i8> bitcast i32 4351 // (also <[2|4] x i16> to <[2|4] x i32>) 4352 // 2) 32bitAlign bitcast Sub32bit 4353 // %dst = i32 bitcast <4 x i8> 4354 // 3) Sub32bit bitcast LargerSub32bit 4355 // %dst = <2 x i8> bitcast i16 4356 // (also <4 x i8> to <2 x i16>) 4357 // 4) Sub32bit bitcast SmallerSub32bit 4358 // %dst = i16 bitcast <2 x i8> 4359 // (also <2 x i16> to <4 x i8>) 4360 // This also only handles types that are powers of two 4361 if ((ScalarDstSize & (ScalarDstSize - 1)) 4362 || (ScalarSrcSize & (ScalarSrcSize - 1))) { 4363 } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) { 4364 // case 1: 4365 EVT IntTy = genIntType(ScalarDstSize, SrcNumEle); 4366#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors 4367 SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy); 4368#else 4369 SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 4370 DAG.getUNDEF(IntTy.getScalarType())); 4371 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4372 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4373 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4374 SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4375 SrcVT.getScalarType(), Src, 4376 DAG.getConstant(x, MVT::i32)); 4377 temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType()); 4378 res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy, 4379 res, temp, idx); 4380 } 4381#endif 4382 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 4383 DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32)); 4384 SDValue *newEle = new SDValue[SrcNumEle]; 4385 res = DAG.getNode(ISD::AND, DL, IntTy, res, mask); 4386 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4387 newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4388 IntTy.getScalarType(), res, 4389 DAG.getConstant(x, MVT::i32)); 4390 } 4391 uint32_t Ratio = SrcNumEle / DstNumEle; 4392 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4393 if (x % Ratio) { 4394 newEle[x] = DAG.getNode(ISD::SHL, DL, 4395 IntTy.getScalarType(), newEle[x], 4396 DAG.getConstant(ScalarSrcSize * (x % Ratio), 4397 MVT::i32)); 4398 } 4399 } 4400 for (uint32_t x = 0; x < SrcNumEle; x += 2) { 4401 newEle[x] = DAG.getNode(ISD::OR, DL, 4402 IntTy.getScalarType(), newEle[x], newEle[x + 1]); 4403 } 4404 if (ScalarSrcSize == 8) { 4405 for (uint32_t x = 0; x < SrcNumEle; x += 4) { 4406 newEle[x] = DAG.getNode(ISD::OR, DL, 4407 IntTy.getScalarType(), newEle[x], newEle[x + 2]); 4408 } 4409 if (DstNumEle == 1) { 4410 Dst = newEle[0]; 4411 } else { 4412 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, 4413 newEle[0]); 4414 for (uint32_t x = 1; x < DstNumEle; ++x) { 4415 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4416 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4417 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4418 DstVT, Dst, newEle[x * 4], idx); 4419 } 4420 } 4421 } else { 4422 if (DstNumEle == 1) { 4423 Dst = newEle[0]; 4424 } else { 4425 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, 4426 newEle[0]); 4427 for (uint32_t x = 1; x < DstNumEle; ++x) { 4428 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4429 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4430 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4431 DstVT, Dst, newEle[x * 2], idx); 4432 } 4433 } 4434 } 4435 delete [] newEle; 4436 return Dst; 4437 } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) { 4438 // case 2: 4439 EVT IntTy = genIntType(ScalarSrcSize, DstNumEle); 4440 SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 4441 DAG.getUNDEF(IntTy.getScalarType())); 4442 uint32_t mult = (ScalarDstSize == 8) ? 4 : 2; 4443 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4444 for (uint32_t y = 0; y < mult; ++y) { 4445 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4446 getPointerTy(), 4447 DAG.getConstant(x * mult + y, MVT::i32)); 4448 SDValue t; 4449 if (SrcNumEle > 1) { 4450 t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 4451 DL, SrcVT.getScalarType(), Src, 4452 DAG.getConstant(x, MVT::i32)); 4453 } else { 4454 t = Src; 4455 } 4456 if (y != 0) { 4457 t = DAG.getNode(ISD::SRL, DL, t.getValueType(), 4458 t, DAG.getConstant(y * ScalarDstSize, 4459 MVT::i32)); 4460 } 4461 vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, 4462 DL, IntTy, vec, t, idx); 4463 } 4464 } 4465 Dst = DAG.getSExtOrTrunc(vec, DL, DstVT); 4466 return Dst; 4467 } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) { 4468 // case 3: 4469 SDValue *numEle = new SDValue[SrcNumEle]; 4470 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4471 numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4472 MVT::i8, Src, DAG.getConstant(x, MVT::i32)); 4473 numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16); 4474 numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x], 4475 DAG.getConstant(0xFF, MVT::i16)); 4476 } 4477 for (uint32_t x = 1; x < SrcNumEle; x += 2) { 4478 numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x], 4479 DAG.getConstant(8, MVT::i16)); 4480 numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16, 4481 numEle[x-1], numEle[x]); 4482 } 4483 if (DstNumEle > 1) { 4484 // If we are not a scalar i16, the only other case is a 4485 // v2i16 since we can't have v8i8 at this point, v4i16 4486 // cannot be generated 4487 Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16, 4488 numEle[0]); 4489 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4490 getPointerTy(), DAG.getConstant(1, MVT::i32)); 4491 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16, 4492 Dst, numEle[2], idx); 4493 } else { 4494 Dst = numEle[0]; 4495 } 4496 delete [] numEle; 4497 return Dst; 4498 } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) { 4499 // case 4: 4500 SDValue *numEle = new SDValue[DstNumEle]; 4501 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4502 numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4503 MVT::i16, Src, DAG.getConstant(x, MVT::i32)); 4504 numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16, 4505 numEle[x * 2], DAG.getConstant(8, MVT::i16)); 4506 } 4507 MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16; 4508 Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]); 4509 for (uint32_t x = 1; x < DstNumEle; ++x) { 4510 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4511 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4512 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty, 4513 Dst, numEle[x], idx); 4514 } 4515 delete [] numEle; 4516 ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8; 4517 Res = DAG.getSExtOrTrunc(Dst, DL, ty); 4518 return Res; 4519 } 4520 } 4521 } 4522 Res = DAG.getNode(AMDILISD::BITCONV, 4523 Dst.getDebugLoc(), 4524 Dst.getValueType(), Src); 4525 return Res; 4526} 4527 4528SDValue 4529AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 4530 SelectionDAG &DAG) const 4531{ 4532 SDValue Chain = Op.getOperand(0); 4533 SDValue Size = Op.getOperand(1); 4534 unsigned int SPReg = AMDIL::SP; 4535 DebugLoc DL = Op.getDebugLoc(); 4536 SDValue SP = DAG.getCopyFromReg(Chain, 4537 DL, 4538 SPReg, MVT::i32); 4539 SDValue NewSP = DAG.getNode(ISD::ADD, 4540 DL, 4541 MVT::i32, SP, Size); 4542 Chain = DAG.getCopyToReg(SP.getValue(1), 4543 DL, 4544 SPReg, NewSP); 4545 SDValue Ops[2] = {NewSP, Chain}; 4546 Chain = DAG.getMergeValues(Ops, 2 ,DL); 4547 return Chain; 4548} 4549SDValue 4550AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const 4551{ 4552 SDValue Chain = Op.getOperand(0); 4553 SDValue Cond = Op.getOperand(1); 4554 SDValue Jump = Op.getOperand(2); 4555 SDValue Result; 4556 Result = DAG.getNode( 4557 AMDILISD::BRANCH_COND, 4558 Op.getDebugLoc(), 4559 Op.getValueType(), 4560 Chain, Jump, Cond); 4561 return Result; 4562} 4563 4564SDValue 4565AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 4566{ 4567 SDValue Chain = Op.getOperand(0); 4568 CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1)); 4569 SDValue LHS = Op.getOperand(2); 4570 SDValue RHS = Op.getOperand(3); 4571 SDValue JumpT = Op.getOperand(4); 4572 SDValue CmpValue; 4573 ISD::CondCode CC = CCNode->get(); 4574 SDValue Result; 4575 unsigned int cmpOpcode = CondCCodeToCC( 4576 CC, 4577 LHS.getValueType().getSimpleVT().SimpleTy); 4578 CmpValue = DAG.getNode( 4579 AMDILISD::CMP, 4580 Op.getDebugLoc(), 4581 LHS.getValueType(), 4582 DAG.getConstant(cmpOpcode, MVT::i32), 4583 LHS, RHS); 4584 Result = DAG.getNode( 4585 AMDILISD::BRANCH_COND, 4586 CmpValue.getDebugLoc(), 4587 MVT::Other, Chain, 4588 JumpT, CmpValue); 4589 return Result; 4590} 4591 4592SDValue 4593AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const 4594{ 4595 SDValue Result = DAG.getNode( 4596 AMDILISD::DP_TO_FP, 4597 Op.getDebugLoc(), 4598 Op.getValueType(), 4599 Op.getOperand(0), 4600 Op.getOperand(1)); 4601 return Result; 4602} 4603 4604SDValue 4605AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const 4606{ 4607 SDValue Result = DAG.getNode( 4608 AMDILISD::VCONCAT, 4609 Op.getDebugLoc(), 4610 Op.getValueType(), 4611 Op.getOperand(0), 4612 Op.getOperand(1)); 4613 return Result; 4614} 4615// LowerRET - Lower an ISD::RET node. 4616SDValue 4617AMDILTargetLowering::LowerReturn(SDValue Chain, 4618 CallingConv::ID CallConv, bool isVarArg, 4619 const SmallVectorImpl<ISD::OutputArg> &Outs, 4620 const SmallVectorImpl<SDValue> &OutVals, 4621 DebugLoc dl, SelectionDAG &DAG) 4622const 4623{ 4624 //MachineFunction& MF = DAG.getMachineFunction(); 4625 // CCValAssign - represent the assignment of the return value 4626 // to a location 4627 SmallVector<CCValAssign, 16> RVLocs; 4628 4629 // CCState - Info about the registers and stack slot 4630 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4631 getTargetMachine(), RVLocs, *DAG.getContext()); 4632 4633 // Analyze return values of ISD::RET 4634 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); 4635 // If this is the first return lowered for this function, add 4636 // the regs to the liveout set for the function 4637 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 4638 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 4639 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { 4640 MRI.addLiveOut(RVLocs[i].getLocReg()); 4641 } 4642 } 4643 // FIXME: implement this when tail call is implemented 4644 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); 4645 // both x86 and ppc implement this in ISelLowering 4646 4647 // Regular return here 4648 SDValue Flag; 4649 SmallVector<SDValue, 6> RetOps; 4650 RetOps.push_back(Chain); 4651 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); 4652 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 4653 CCValAssign &VA = RVLocs[i]; 4654 SDValue ValToCopy = OutVals[i]; 4655 assert(VA.isRegLoc() && "Can only return in registers!"); 4656 // ISD::Ret => ret chain, (regnum1, val1), ... 4657 // So i * 2 + 1 index only the regnums 4658 Chain = DAG.getCopyToReg(Chain, 4659 dl, 4660 VA.getLocReg(), 4661 ValToCopy, 4662 Flag); 4663 // guarantee that all emitted copies are stuck together 4664 // avoiding something bad 4665 Flag = Chain.getValue(1); 4666 } 4667 /*if (MF.getFunction()->hasStructRetAttr()) { 4668 assert(0 && "Struct returns are not yet implemented!"); 4669 // Both MIPS and X86 have this 4670 }*/ 4671 RetOps[0] = Chain; 4672 if (Flag.getNode()) 4673 RetOps.push_back(Flag); 4674 4675 Flag = DAG.getNode(AMDILISD::RET_FLAG, 4676 dl, 4677 MVT::Other, &RetOps[0], RetOps.size()); 4678 return Flag; 4679} 4680void 4681AMDILTargetLowering::generateLongRelational(MachineInstr *MI, 4682 unsigned int opCode) const 4683{ 4684 MachineOperand DST = MI->getOperand(0); 4685 MachineOperand LHS = MI->getOperand(2); 4686 MachineOperand RHS = MI->getOperand(3); 4687 unsigned int opi32Code = 0, si32Code = 0; 4688 unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass; 4689 uint32_t REGS[12]; 4690 // All the relationals can be generated with with 6 temp registers 4691 for (int x = 0; x < 12; ++x) { 4692 REGS[x] = genVReg(simpleVT); 4693 } 4694 // Pull out the high and low components of each 64 bit register 4695 generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg()); 4696 generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg()); 4697 generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg()); 4698 generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg()); 4699 // Determine the correct opcode that we should use 4700 switch(opCode) { 4701 default: 4702 assert(!"comparison case not handled!"); 4703 break; 4704 case AMDIL::LEQ: 4705 si32Code = opi32Code = AMDIL::IEQ; 4706 break; 4707 case AMDIL::LNE: 4708 si32Code = opi32Code = AMDIL::INE; 4709 break; 4710 case AMDIL::LLE: 4711 case AMDIL::ULLE: 4712 case AMDIL::LGE: 4713 case AMDIL::ULGE: 4714 if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) { 4715 std::swap(REGS[0], REGS[2]); 4716 } else { 4717 std::swap(REGS[1], REGS[3]); 4718 } 4719 if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) { 4720 opi32Code = AMDIL::ILT; 4721 } else { 4722 opi32Code = AMDIL::ULT; 4723 } 4724 si32Code = AMDIL::UGE; 4725 break; 4726 case AMDIL::LGT: 4727 case AMDIL::ULGT: 4728 std::swap(REGS[0], REGS[2]); 4729 std::swap(REGS[1], REGS[3]); 4730 case AMDIL::LLT: 4731 case AMDIL::ULLT: 4732 if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) { 4733 opi32Code = AMDIL::ILT; 4734 } else { 4735 opi32Code = AMDIL::ULT; 4736 } 4737 si32Code = AMDIL::ULT; 4738 break; 4739 }; 4740 // Do the initial opcode on the high and low components. 4741 // This leaves the following: 4742 // REGS[4] = L_HI OP R_HI 4743 // REGS[5] = L_LO OP R_LO 4744 generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]); 4745 generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]); 4746 switch(opi32Code) { 4747 case AMDIL::IEQ: 4748 case AMDIL::INE: 4749 { 4750 // combine the results with an and or or depending on if 4751 // we are eq or ne 4752 uint32_t combineOp = (opi32Code == AMDIL::IEQ) 4753 ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32; 4754 generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]); 4755 } 4756 break; 4757 default: 4758 // this finishes codegen for the following pattern 4759 // REGS[4] || (REGS[5] && (L_HI == R_HI)) 4760 generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]); 4761 generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5], 4762 REGS[9]); 4763 generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4], 4764 REGS[10]); 4765 break; 4766 } 4767 generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]); 4768} 4769 4770unsigned int 4771AMDILTargetLowering::getFunctionAlignment(const Function *) const 4772{ 4773 return 0; 4774} 4775 4776void 4777AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB, 4778 MachineBasicBlock::iterator &BBI, 4779 DebugLoc *DL, const TargetInstrInfo *TII) const 4780{ 4781 mBB = BB; 4782 mBBI = BBI; 4783 mDL = DL; 4784 mTII = TII; 4785} 4786uint32_t 4787AMDILTargetLowering::genVReg(uint32_t regType) const 4788{ 4789 return mBB->getParent()->getRegInfo().createVirtualRegister( 4790 getRegClassFromID(regType)); 4791} 4792 4793MachineInstrBuilder 4794AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const 4795{ 4796 return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst); 4797} 4798 4799MachineInstrBuilder 4800AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 4801 uint32_t src1) const 4802{ 4803 return generateMachineInst(opcode, dst).addReg(src1); 4804} 4805 4806MachineInstrBuilder 4807AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 4808 uint32_t src1, uint32_t src2) const 4809{ 4810 return generateMachineInst(opcode, dst, src1).addReg(src2); 4811} 4812 4813MachineInstrBuilder 4814AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 4815 uint32_t src1, uint32_t src2, uint32_t src3) const 4816{ 4817 return generateMachineInst(opcode, dst, src1, src2).addReg(src3); 4818} 4819 4820 4821SDValue 4822AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const 4823{ 4824 DebugLoc DL = Op.getDebugLoc(); 4825 EVT OVT = Op.getValueType(); 4826 SDValue LHS = Op.getOperand(0); 4827 SDValue RHS = Op.getOperand(1); 4828 MVT INTTY; 4829 MVT FLTTY; 4830 if (!OVT.isVector()) { 4831 INTTY = MVT::i32; 4832 FLTTY = MVT::f32; 4833 } else if (OVT.getVectorNumElements() == 2) { 4834 INTTY = MVT::v2i32; 4835 FLTTY = MVT::v2f32; 4836 } else if (OVT.getVectorNumElements() == 4) { 4837 INTTY = MVT::v4i32; 4838 FLTTY = MVT::v4f32; 4839 } 4840 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 4841 // char|short jq = ia ^ ib; 4842 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 4843 4844 // jq = jq >> (bitsize - 2) 4845 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 4846 4847 // jq = jq | 0x1 4848 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 4849 4850 // jq = (int)jq 4851 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 4852 4853 // int ia = (int)LHS; 4854 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 4855 4856 // int ib, (int)RHS; 4857 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 4858 4859 // float fa = (float)ia; 4860 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 4861 4862 // float fb = (float)ib; 4863 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 4864 4865 // float fq = native_divide(fa, fb); 4866 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 4867 4868 // fq = trunc(fq); 4869 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 4870 4871 // float fqneg = -fq; 4872 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 4873 4874 // float fr = mad(fqneg, fb, fa); 4875 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); 4876 4877 // int iq = (int)fq; 4878 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 4879 4880 // fr = fabs(fr); 4881 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 4882 4883 // fb = fabs(fb); 4884 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 4885 4886 // int cv = fr >= fb; 4887 SDValue cv; 4888 if (INTTY == MVT::i32) { 4889 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 4890 } else { 4891 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 4892 } 4893 // jq = (cv ? jq : 0); 4894 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, 4895 DAG.getConstant(0, OVT)); 4896 // dst = iq + jq; 4897 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 4898 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 4899 return iq; 4900} 4901 4902SDValue 4903AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const 4904{ 4905 DebugLoc DL = Op.getDebugLoc(); 4906 EVT OVT = Op.getValueType(); 4907 SDValue LHS = Op.getOperand(0); 4908 SDValue RHS = Op.getOperand(1); 4909 // The LowerSDIV32 function generates equivalent to the following IL. 4910 // mov r0, LHS 4911 // mov r1, RHS 4912 // ilt r10, r0, 0 4913 // ilt r11, r1, 0 4914 // iadd r0, r0, r10 4915 // iadd r1, r1, r11 4916 // ixor r0, r0, r10 4917 // ixor r1, r1, r11 4918 // udiv r0, r0, r1 4919 // ixor r10, r10, r11 4920 // iadd r0, r0, r10 4921 // ixor DST, r0, r10 4922 4923 // mov r0, LHS 4924 SDValue r0 = LHS; 4925 4926 // mov r1, RHS 4927 SDValue r1 = RHS; 4928 4929 // ilt r10, r0, 0 4930 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4931 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 4932 r0, DAG.getConstant(0, OVT)); 4933 4934 // ilt r11, r1, 0 4935 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4936 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 4937 r1, DAG.getConstant(0, OVT)); 4938 4939 // iadd r0, r0, r10 4940 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 4941 4942 // iadd r1, r1, r11 4943 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 4944 4945 // ixor r0, r0, r10 4946 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 4947 4948 // ixor r1, r1, r11 4949 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 4950 4951 // udiv r0, r0, r1 4952 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 4953 4954 // ixor r10, r10, r11 4955 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 4956 4957 // iadd r0, r0, r10 4958 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 4959 4960 // ixor DST, r0, r10 4961 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 4962 return DST; 4963} 4964 4965SDValue 4966AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const 4967{ 4968 return SDValue(Op.getNode(), 0); 4969} 4970 4971SDValue 4972AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const 4973{ 4974 DebugLoc DL = Op.getDebugLoc(); 4975 EVT OVT = Op.getValueType(); 4976 SDValue LHS = Op.getOperand(0); 4977 SDValue RHS = Op.getOperand(1); 4978 MVT INTTY; 4979 MVT FLTTY; 4980 if (!OVT.isVector()) { 4981 INTTY = MVT::i32; 4982 FLTTY = MVT::f32; 4983 } else if (OVT.getVectorNumElements() == 2) { 4984 INTTY = MVT::v2i32; 4985 FLTTY = MVT::v2f32; 4986 } else if (OVT.getVectorNumElements() == 4) { 4987 INTTY = MVT::v4i32; 4988 FLTTY = MVT::v4f32; 4989 } 4990 4991 // The LowerUDIV24 function implements the following CL. 4992 // int ia = (int)LHS 4993 // float fa = (float)ia 4994 // int ib = (int)RHS 4995 // float fb = (float)ib 4996 // float fq = native_divide(fa, fb) 4997 // fq = trunc(fq) 4998 // float t = mad(fq, fb, fb) 4999 // int iq = (int)fq - (t <= fa) 5000 // return (type)iq 5001 5002 // int ia = (int)LHS 5003 SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY); 5004 5005 // float fa = (float)ia 5006 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 5007 5008 // int ib = (int)RHS 5009 SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY); 5010 5011 // float fb = (float)ib 5012 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 5013 5014 // float fq = native_divide(fa, fb) 5015 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 5016 5017 // fq = trunc(fq) 5018 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 5019 5020 // float t = mad(fq, fb, fb) 5021 SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb); 5022 5023 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1 5024 SDValue iq; 5025 fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 5026 if (INTTY == MVT::i32) { 5027 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 5028 } else { 5029 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 5030 } 5031 iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq); 5032 5033 5034 // return (type)iq 5035 iq = DAG.getZExtOrTrunc(iq, DL, OVT); 5036 return iq; 5037 5038} 5039 5040SDValue 5041AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const 5042{ 5043 return SDValue(Op.getNode(), 0); 5044} 5045 5046SDValue 5047AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const 5048{ 5049 return SDValue(Op.getNode(), 0); 5050} 5051SDValue 5052AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const 5053{ 5054 DebugLoc DL = Op.getDebugLoc(); 5055 EVT OVT = Op.getValueType(); 5056 MVT INTTY = MVT::i32; 5057 if (OVT == MVT::v2i8) { 5058 INTTY = MVT::v2i32; 5059 } else if (OVT == MVT::v4i8) { 5060 INTTY = MVT::v4i32; 5061 } 5062 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 5063 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 5064 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 5065 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 5066 return LHS; 5067} 5068 5069SDValue 5070AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const 5071{ 5072 DebugLoc DL = Op.getDebugLoc(); 5073 EVT OVT = Op.getValueType(); 5074 MVT INTTY = MVT::i32; 5075 if (OVT == MVT::v2i16) { 5076 INTTY = MVT::v2i32; 5077 } else if (OVT == MVT::v4i16) { 5078 INTTY = MVT::v4i32; 5079 } 5080 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 5081 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 5082 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 5083 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 5084 return LHS; 5085} 5086 5087SDValue 5088AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const 5089{ 5090 DebugLoc DL = Op.getDebugLoc(); 5091 EVT OVT = Op.getValueType(); 5092 SDValue LHS = Op.getOperand(0); 5093 SDValue RHS = Op.getOperand(1); 5094 // The LowerSREM32 function generates equivalent to the following IL. 5095 // mov r0, LHS 5096 // mov r1, RHS 5097 // ilt r10, r0, 0 5098 // ilt r11, r1, 0 5099 // iadd r0, r0, r10 5100 // iadd r1, r1, r11 5101 // ixor r0, r0, r10 5102 // ixor r1, r1, r11 5103 // udiv r20, r0, r1 5104 // umul r20, r20, r1 5105 // sub r0, r0, r20 5106 // iadd r0, r0, r10 5107 // ixor DST, r0, r10 5108 5109 // mov r0, LHS 5110 SDValue r0 = LHS; 5111 5112 // mov r1, RHS 5113 SDValue r1 = RHS; 5114 5115 // ilt r10, r0, 0 5116 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 5117 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 5118 r0, DAG.getConstant(0, OVT)); 5119 5120 // ilt r11, r1, 0 5121 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 5122 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 5123 r1, DAG.getConstant(0, OVT)); 5124 5125 // iadd r0, r0, r10 5126 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 5127 5128 // iadd r1, r1, r11 5129 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 5130 5131 // ixor r0, r0, r10 5132 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 5133 5134 // ixor r1, r1, r11 5135 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 5136 5137 // udiv r20, r0, r1 5138 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 5139 5140 // umul r20, r20, r1 5141 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); 5142 5143 // sub r0, r0, r20 5144 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 5145 5146 // iadd r0, r0, r10 5147 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 5148 5149 // ixor DST, r0, r10 5150 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 5151 return DST; 5152} 5153 5154SDValue 5155AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const 5156{ 5157 return SDValue(Op.getNode(), 0); 5158} 5159 5160SDValue 5161AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const 5162{ 5163 DebugLoc DL = Op.getDebugLoc(); 5164 EVT OVT = Op.getValueType(); 5165 MVT INTTY = MVT::i32; 5166 if (OVT == MVT::v2i8) { 5167 INTTY = MVT::v2i32; 5168 } else if (OVT == MVT::v4i8) { 5169 INTTY = MVT::v4i32; 5170 } 5171 SDValue LHS = Op.getOperand(0); 5172 SDValue RHS = Op.getOperand(1); 5173 // The LowerUREM8 function generates equivalent to the following IL. 5174 // mov r0, as_u32(LHS) 5175 // mov r1, as_u32(RHS) 5176 // and r10, r0, 0xFF 5177 // and r11, r1, 0xFF 5178 // cmov_logical r3, r11, r11, 0x1 5179 // udiv r3, r10, r3 5180 // cmov_logical r3, r11, r3, 0 5181 // umul r3, r3, r11 5182 // sub r3, r10, r3 5183 // and as_u8(DST), r3, 0xFF 5184 5185 // mov r0, as_u32(LHS) 5186 SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY); 5187 5188 // mov r1, as_u32(RHS) 5189 SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY); 5190 5191 // and r10, r0, 0xFF 5192 SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0, 5193 DAG.getConstant(0xFF, INTTY)); 5194 5195 // and r11, r1, 0xFF 5196 SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1, 5197 DAG.getConstant(0xFF, INTTY)); 5198 5199 // cmov_logical r3, r11, r11, 0x1 5200 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11, 5201 DAG.getConstant(0x01, INTTY)); 5202 5203 // udiv r3, r10, r3 5204 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 5205 5206 // cmov_logical r3, r11, r3, 0 5207 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3, 5208 DAG.getConstant(0, INTTY)); 5209 5210 // umul r3, r3, r11 5211 r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11); 5212 5213 // sub r3, r10, r3 5214 r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3); 5215 5216 // and as_u8(DST), r3, 0xFF 5217 SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3, 5218 DAG.getConstant(0xFF, INTTY)); 5219 DST = DAG.getZExtOrTrunc(DST, DL, OVT); 5220 return DST; 5221} 5222 5223SDValue 5224AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const 5225{ 5226 DebugLoc DL = Op.getDebugLoc(); 5227 EVT OVT = Op.getValueType(); 5228 MVT INTTY = MVT::i32; 5229 if (OVT == MVT::v2i16) { 5230 INTTY = MVT::v2i32; 5231 } else if (OVT == MVT::v4i16) { 5232 INTTY = MVT::v4i32; 5233 } 5234 SDValue LHS = Op.getOperand(0); 5235 SDValue RHS = Op.getOperand(1); 5236 // The LowerUREM16 function generatest equivalent to the following IL. 5237 // mov r0, LHS 5238 // mov r1, RHS 5239 // DIV = LowerUDIV16(LHS, RHS) 5240 // and r10, r0, 0xFFFF 5241 // and r11, r1, 0xFFFF 5242 // cmov_logical r3, r11, r11, 0x1 5243 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 5244 // and r3, r3, 0xFFFF 5245 // cmov_logical r3, r11, r3, 0 5246 // umul r3, r3, r11 5247 // sub r3, r10, r3 5248 // and DST, r3, 0xFFFF 5249 5250 // mov r0, LHS 5251 SDValue r0 = LHS; 5252 5253 // mov r1, RHS 5254 SDValue r1 = RHS; 5255 5256 // and r10, r0, 0xFFFF 5257 SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0, 5258 DAG.getConstant(0xFFFF, OVT)); 5259 5260 // and r11, r1, 0xFFFF 5261 SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1, 5262 DAG.getConstant(0xFFFF, OVT)); 5263 5264 // cmov_logical r3, r11, r11, 0x1 5265 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11, 5266 DAG.getConstant(0x01, OVT)); 5267 5268 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 5269 r10 = DAG.getZExtOrTrunc(r10, DL, INTTY); 5270 r3 = DAG.getZExtOrTrunc(r3, DL, INTTY); 5271 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 5272 r3 = DAG.getZExtOrTrunc(r3, DL, OVT); 5273 r10 = DAG.getZExtOrTrunc(r10, DL, OVT); 5274 5275 // and r3, r3, 0xFFFF 5276 r3 = DAG.getNode(ISD::AND, DL, OVT, r3, 5277 DAG.getConstant(0xFFFF, OVT)); 5278 5279 // cmov_logical r3, r11, r3, 0 5280 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3, 5281 DAG.getConstant(0, OVT)); 5282 // umul r3, r3, r11 5283 r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11); 5284 5285 // sub r3, r10, r3 5286 r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3); 5287 5288 // and DST, r3, 0xFFFF 5289 SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3, 5290 DAG.getConstant(0xFFFF, OVT)); 5291 return DST; 5292} 5293 5294SDValue 5295AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const 5296{ 5297 DebugLoc DL = Op.getDebugLoc(); 5298 EVT OVT = Op.getValueType(); 5299 SDValue LHS = Op.getOperand(0); 5300 SDValue RHS = Op.getOperand(1); 5301 // The LowerUREM32 function generates equivalent to the following IL. 5302 // udiv r20, LHS, RHS 5303 // umul r20, r20, RHS 5304 // sub DST, LHS, r20 5305 5306 // udiv r20, LHS, RHS 5307 SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS); 5308 5309 // umul r20, r20, RHS 5310 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS); 5311 5312 // sub DST, LHS, r20 5313 SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20); 5314 return DST; 5315} 5316 5317SDValue 5318AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const 5319{ 5320 return SDValue(Op.getNode(), 0); 5321} 5322 5323 5324SDValue 5325AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const 5326{ 5327 DebugLoc DL = Op.getDebugLoc(); 5328 EVT OVT = Op.getValueType(); 5329 MVT INTTY = MVT::i32; 5330 if (OVT == MVT::v2f32) { 5331 INTTY = MVT::v2i32; 5332 } else if (OVT == MVT::v4f32) { 5333 INTTY = MVT::v4i32; 5334 } 5335 SDValue LHS = Op.getOperand(0); 5336 SDValue RHS = Op.getOperand(1); 5337 SDValue DST; 5338 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 5339 &this->getTargetMachine())->getSubtargetImpl(); 5340 if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 5341 // TODO: This doesn't work for vector types yet 5342 // The LowerFDIV32 function generates equivalent to the following 5343 // IL: 5344 // mov r20, as_int(LHS) 5345 // mov r21, as_int(RHS) 5346 // and r30, r20, 0x7f800000 5347 // and r31, r20, 0x807FFFFF 5348 // and r32, r21, 0x7f800000 5349 // and r33, r21, 0x807FFFFF 5350 // ieq r40, r30, 0x7F800000 5351 // ieq r41, r31, 0x7F800000 5352 // ieq r42, r32, 0 5353 // ieq r43, r33, 0 5354 // and r50, r20, 0x80000000 5355 // and r51, r21, 0x80000000 5356 // ior r32, r32, 0x3f800000 5357 // ior r33, r33, 0x3f800000 5358 // cmov_logical r32, r42, r50, r32 5359 // cmov_logical r33, r43, r51, r33 5360 // cmov_logical r32, r40, r20, r32 5361 // cmov_logical r33, r41, r21, r33 5362 // ior r50, r40, r41 5363 // ior r51, r42, r43 5364 // ior r50, r50, r51 5365 // inegate r52, r31 5366 // iadd r30, r30, r52 5367 // cmov_logical r30, r50, 0, r30 5368 // div_zeroop(infinity) r21, 1.0, r33 5369 // mul_ieee r20, r32, r21 5370 // and r22, r20, 0x7FFFFFFF 5371 // and r23, r20, 0x80000000 5372 // ishr r60, r22, 0x00000017 5373 // ishr r61, r30, 0x00000017 5374 // iadd r20, r20, r30 5375 // iadd r21, r22, r30 5376 // iadd r60, r60, r61 5377 // ige r42, 0, R60 5378 // ior r41, r23, 0x7F800000 5379 // ige r40, r60, 0x000000FF 5380 // cmov_logical r40, r50, 0, r40 5381 // cmov_logical r20, r42, r23, r20 5382 // cmov_logical DST, r40, r41, r20 5383 // as_float(DST) 5384 5385 // mov r20, as_int(LHS) 5386 SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS); 5387 5388 // mov r21, as_int(RHS) 5389 SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS); 5390 5391 // and r30, r20, 0x7f800000 5392 SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5393 DAG.getConstant(0x7F800000, INTTY)); 5394 5395 // and r31, r21, 0x7f800000 5396 SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21, 5397 DAG.getConstant(0x7f800000, INTTY)); 5398 5399 // and r32, r20, 0x807FFFFF 5400 SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5401 DAG.getConstant(0x807FFFFF, INTTY)); 5402 5403 // and r33, r21, 0x807FFFFF 5404 SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21, 5405 DAG.getConstant(0x807FFFFF, INTTY)); 5406 5407 // ieq r40, r30, 0x7F800000 5408 SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5409 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5410 R30, DAG.getConstant(0x7F800000, INTTY)); 5411 5412 // ieq r41, r31, 0x7F800000 5413 SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5414 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5415 R31, DAG.getConstant(0x7F800000, INTTY)); 5416 5417 // ieq r42, r30, 0 5418 SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5419 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5420 R30, DAG.getConstant(0, INTTY)); 5421 5422 // ieq r43, r31, 0 5423 SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5424 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5425 R31, DAG.getConstant(0, INTTY)); 5426 5427 // and r50, r20, 0x80000000 5428 SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5429 DAG.getConstant(0x80000000, INTTY)); 5430 5431 // and r51, r21, 0x80000000 5432 SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21, 5433 DAG.getConstant(0x80000000, INTTY)); 5434 5435 // ior r32, r32, 0x3f800000 5436 R32 = DAG.getNode(ISD::OR, DL, INTTY, R32, 5437 DAG.getConstant(0x3F800000, INTTY)); 5438 5439 // ior r33, r33, 0x3f800000 5440 R33 = DAG.getNode(ISD::OR, DL, INTTY, R33, 5441 DAG.getConstant(0x3F800000, INTTY)); 5442 5443 // cmov_logical r32, r42, r50, r32 5444 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32); 5445 5446 // cmov_logical r33, r43, r51, r33 5447 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33); 5448 5449 // cmov_logical r32, r40, r20, r32 5450 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32); 5451 5452 // cmov_logical r33, r41, r21, r33 5453 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33); 5454 5455 // ior r50, r40, r41 5456 R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41); 5457 5458 // ior r51, r42, r43 5459 R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43); 5460 5461 // ior r50, r50, r51 5462 R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51); 5463 5464 // inegate r52, r31 5465 SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31); 5466 5467 // iadd r30, r30, r52 5468 R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52); 5469 5470 // cmov_logical r30, r50, 0, r30 5471 R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 5472 DAG.getConstant(0, INTTY), R30); 5473 5474 // div_zeroop(infinity) r21, 1.0, as_float(r33) 5475 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 5476 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 5477 DAG.getConstantFP(1.0f, OVT), R33); 5478 5479 // mul_ieee as_int(r20), as_float(r32), r21 5480 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 5481 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 5482 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 5483 5484 // div_zeroop(infinity) r21, 1.0, as_float(r33) 5485 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 5486 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 5487 DAG.getConstantFP(1.0f, OVT), R33); 5488 5489 // mul_ieee as_int(r20), as_float(r32), r21 5490 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 5491 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 5492 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 5493 5494 // and r22, r20, 0x7FFFFFFF 5495 SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5496 DAG.getConstant(0x7FFFFFFF, INTTY)); 5497 5498 // and r23, r20, 0x80000000 5499 SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5500 DAG.getConstant(0x80000000, INTTY)); 5501 5502 // ishr r60, r22, 0x00000017 5503 SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22, 5504 DAG.getConstant(0x00000017, INTTY)); 5505 5506 // ishr r61, r30, 0x00000017 5507 SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30, 5508 DAG.getConstant(0x00000017, INTTY)); 5509 5510 // iadd r20, r20, r30 5511 R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30); 5512 5513 // iadd r21, r22, r30 5514 R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30); 5515 5516 // iadd r60, r60, r61 5517 R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61); 5518 5519 // ige r42, 0, R60 5520 R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5521 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 5522 DAG.getConstant(0, INTTY), 5523 R60); 5524 5525 // ior r41, r23, 0x7F800000 5526 R41 = DAG.getNode(ISD::OR, DL, INTTY, R23, 5527 DAG.getConstant(0x7F800000, INTTY)); 5528 5529 // ige r40, r60, 0x000000FF 5530 R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5531 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 5532 R60, 5533 DAG.getConstant(0x0000000FF, INTTY)); 5534 5535 // cmov_logical r40, r50, 0, r40 5536 R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 5537 DAG.getConstant(0, INTTY), 5538 R40); 5539 5540 // cmov_logical r20, r42, r23, r20 5541 R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20); 5542 5543 // cmov_logical DST, r40, r41, r20 5544 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20); 5545 5546 // as_float(DST) 5547 DST = DAG.getNode(ISDBITCAST, DL, OVT, DST); 5548 } else { 5549 // The following sequence of DAG nodes produce the following IL: 5550 // fabs r1, RHS 5551 // lt r2, 0x1.0p+96f, r1 5552 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 5553 // mul_ieee r1, RHS, r3 5554 // div_zeroop(infinity) r0, LHS, r1 5555 // mul_ieee DST, r0, r3 5556 5557 // fabs r1, RHS 5558 SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS); 5559 // lt r2, 0x1.0p+96f, r1 5560 SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT, 5561 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32), 5562 DAG.getConstant(0x6f800000, INTTY), r1); 5563 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 5564 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2, 5565 DAG.getConstant(0x2f800000, INTTY), 5566 DAG.getConstant(0x3f800000, INTTY)); 5567 // mul_ieee r1, RHS, r3 5568 r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3); 5569 // div_zeroop(infinity) r0, LHS, r1 5570 SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1); 5571 // mul_ieee DST, r0, r3 5572 DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3); 5573 } 5574 return DST; 5575} 5576 5577SDValue 5578AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const 5579{ 5580 return SDValue(Op.getNode(), 0); 5581} 5582