AMDILISelLowering.cpp revision a75c6163e605f35b14f26930dd9227e4f337ec9e
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//==-----------------------------------------------------------------------===// 9// 10// This file implements the interfaces that AMDIL uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#include "AMDILISelLowering.h" 16#include "AMDILDevices.h" 17#include "AMDILGlobalManager.h" 18#include "AMDILIntrinsicInfo.h" 19#include "AMDILKernelManager.h" 20#include "AMDILMachineFunctionInfo.h" 21#include "AMDILSubtarget.h" 22#include "AMDILTargetMachine.h" 23#include "AMDILUtilityFunctions.h" 24#include "llvm/CallingConv.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineRegisterInfo.h" 27#include "llvm/CodeGen/PseudoSourceValue.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/CodeGen/SelectionDAGNodes.h" 30#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 31#include "llvm/DerivedTypes.h" 32#include "llvm/Instructions.h" 33#include "llvm/Intrinsics.h" 34#include "llvm/Target/TargetOptions.h" 35 36using namespace llvm; 37#define ISDBITCAST ISD::BITCAST 38#define MVTGLUE MVT::Glue 39//===----------------------------------------------------------------------===// 40// Calling Convention Implementation 41//===----------------------------------------------------------------------===// 42#include "AMDILGenCallingConv.inc" 43 44//===----------------------------------------------------------------------===// 45// TargetLowering Implementation Help Functions Begin 46//===----------------------------------------------------------------------===// 47 static SDValue 48getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType) 49{ 50 DebugLoc DL = Src.getDebugLoc(); 51 EVT svt = Src.getValueType().getScalarType(); 52 EVT dvt = Dst.getValueType().getScalarType(); 53 if (svt.isFloatingPoint() && dvt.isFloatingPoint()) { 54 if (dvt.bitsGT(svt)) { 55 Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src); 56 } else if (svt.bitsLT(svt)) { 57 Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src, 58 DAG.getConstant(1, MVT::i32)); 59 } 60 } else if (svt.isInteger() && dvt.isInteger()) { 61 if (!svt.bitsEq(dvt)) { 62 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 63 } else { 64 Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src); 65 } 66 } else if (svt.isInteger()) { 67 unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP; 68 if (!svt.bitsEq(dvt)) { 69 if (dvt.getSimpleVT().SimpleTy == MVT::f32) { 70 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32); 71 } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) { 72 Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64); 73 } else { 74 assert(0 && "We only support 32 and 64bit fp types"); 75 } 76 } 77 Src = DAG.getNode(opcode, DL, dvt, Src); 78 } else if (dvt.isInteger()) { 79 unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT; 80 if (svt.getSimpleVT().SimpleTy == MVT::f32) { 81 Src = DAG.getNode(opcode, DL, MVT::i32, Src); 82 } else if (svt.getSimpleVT().SimpleTy == MVT::f64) { 83 Src = DAG.getNode(opcode, DL, MVT::i64, Src); 84 } else { 85 assert(0 && "We only support 32 and 64bit fp types"); 86 } 87 Src = DAG.getSExtOrTrunc(Src, DL, dvt); 88 } 89 return Src; 90} 91// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC 92// condition. 93 static AMDILCC::CondCodes 94CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type) 95{ 96 switch (CC) { 97 default: 98 { 99 errs()<<"Condition Code: "<< (unsigned int)CC<<"\n"; 100 assert(0 && "Unknown condition code!"); 101 } 102 case ISD::SETO: 103 switch(type) { 104 case MVT::f32: 105 return AMDILCC::IL_CC_F_O; 106 case MVT::f64: 107 return AMDILCC::IL_CC_D_O; 108 default: 109 assert(0 && "Opcode combination not generated correctly!"); 110 return AMDILCC::COND_ERROR; 111 }; 112 case ISD::SETUO: 113 switch(type) { 114 case MVT::f32: 115 return AMDILCC::IL_CC_F_UO; 116 case MVT::f64: 117 return AMDILCC::IL_CC_D_UO; 118 default: 119 assert(0 && "Opcode combination not generated correctly!"); 120 return AMDILCC::COND_ERROR; 121 }; 122 case ISD::SETGT: 123 switch (type) { 124 case MVT::i1: 125 case MVT::i8: 126 case MVT::i16: 127 case MVT::i32: 128 return AMDILCC::IL_CC_I_GT; 129 case MVT::f32: 130 return AMDILCC::IL_CC_F_GT; 131 case MVT::f64: 132 return AMDILCC::IL_CC_D_GT; 133 case MVT::i64: 134 return AMDILCC::IL_CC_L_GT; 135 default: 136 assert(0 && "Opcode combination not generated correctly!"); 137 return AMDILCC::COND_ERROR; 138 }; 139 case ISD::SETGE: 140 switch (type) { 141 case MVT::i1: 142 case MVT::i8: 143 case MVT::i16: 144 case MVT::i32: 145 return AMDILCC::IL_CC_I_GE; 146 case MVT::f32: 147 return AMDILCC::IL_CC_F_GE; 148 case MVT::f64: 149 return AMDILCC::IL_CC_D_GE; 150 case MVT::i64: 151 return AMDILCC::IL_CC_L_GE; 152 default: 153 assert(0 && "Opcode combination not generated correctly!"); 154 return AMDILCC::COND_ERROR; 155 }; 156 case ISD::SETLT: 157 switch (type) { 158 case MVT::i1: 159 case MVT::i8: 160 case MVT::i16: 161 case MVT::i32: 162 return AMDILCC::IL_CC_I_LT; 163 case MVT::f32: 164 return AMDILCC::IL_CC_F_LT; 165 case MVT::f64: 166 return AMDILCC::IL_CC_D_LT; 167 case MVT::i64: 168 return AMDILCC::IL_CC_L_LT; 169 default: 170 assert(0 && "Opcode combination not generated correctly!"); 171 return AMDILCC::COND_ERROR; 172 }; 173 case ISD::SETLE: 174 switch (type) { 175 case MVT::i1: 176 case MVT::i8: 177 case MVT::i16: 178 case MVT::i32: 179 return AMDILCC::IL_CC_I_LE; 180 case MVT::f32: 181 return AMDILCC::IL_CC_F_LE; 182 case MVT::f64: 183 return AMDILCC::IL_CC_D_LE; 184 case MVT::i64: 185 return AMDILCC::IL_CC_L_LE; 186 default: 187 assert(0 && "Opcode combination not generated correctly!"); 188 return AMDILCC::COND_ERROR; 189 }; 190 case ISD::SETNE: 191 switch (type) { 192 case MVT::i1: 193 case MVT::i8: 194 case MVT::i16: 195 case MVT::i32: 196 return AMDILCC::IL_CC_I_NE; 197 case MVT::f32: 198 return AMDILCC::IL_CC_F_NE; 199 case MVT::f64: 200 return AMDILCC::IL_CC_D_NE; 201 case MVT::i64: 202 return AMDILCC::IL_CC_L_NE; 203 default: 204 assert(0 && "Opcode combination not generated correctly!"); 205 return AMDILCC::COND_ERROR; 206 }; 207 case ISD::SETEQ: 208 switch (type) { 209 case MVT::i1: 210 case MVT::i8: 211 case MVT::i16: 212 case MVT::i32: 213 return AMDILCC::IL_CC_I_EQ; 214 case MVT::f32: 215 return AMDILCC::IL_CC_F_EQ; 216 case MVT::f64: 217 return AMDILCC::IL_CC_D_EQ; 218 case MVT::i64: 219 return AMDILCC::IL_CC_L_EQ; 220 default: 221 assert(0 && "Opcode combination not generated correctly!"); 222 return AMDILCC::COND_ERROR; 223 }; 224 case ISD::SETUGT: 225 switch (type) { 226 case MVT::i1: 227 case MVT::i8: 228 case MVT::i16: 229 case MVT::i32: 230 return AMDILCC::IL_CC_U_GT; 231 case MVT::f32: 232 return AMDILCC::IL_CC_F_UGT; 233 case MVT::f64: 234 return AMDILCC::IL_CC_D_UGT; 235 case MVT::i64: 236 return AMDILCC::IL_CC_UL_GT; 237 default: 238 assert(0 && "Opcode combination not generated correctly!"); 239 return AMDILCC::COND_ERROR; 240 }; 241 case ISD::SETUGE: 242 switch (type) { 243 case MVT::i1: 244 case MVT::i8: 245 case MVT::i16: 246 case MVT::i32: 247 return AMDILCC::IL_CC_U_GE; 248 case MVT::f32: 249 return AMDILCC::IL_CC_F_UGE; 250 case MVT::f64: 251 return AMDILCC::IL_CC_D_UGE; 252 case MVT::i64: 253 return AMDILCC::IL_CC_UL_GE; 254 default: 255 assert(0 && "Opcode combination not generated correctly!"); 256 return AMDILCC::COND_ERROR; 257 }; 258 case ISD::SETULT: 259 switch (type) { 260 case MVT::i1: 261 case MVT::i8: 262 case MVT::i16: 263 case MVT::i32: 264 return AMDILCC::IL_CC_U_LT; 265 case MVT::f32: 266 return AMDILCC::IL_CC_F_ULT; 267 case MVT::f64: 268 return AMDILCC::IL_CC_D_ULT; 269 case MVT::i64: 270 return AMDILCC::IL_CC_UL_LT; 271 default: 272 assert(0 && "Opcode combination not generated correctly!"); 273 return AMDILCC::COND_ERROR; 274 }; 275 case ISD::SETULE: 276 switch (type) { 277 case MVT::i1: 278 case MVT::i8: 279 case MVT::i16: 280 case MVT::i32: 281 return AMDILCC::IL_CC_U_LE; 282 case MVT::f32: 283 return AMDILCC::IL_CC_F_ULE; 284 case MVT::f64: 285 return AMDILCC::IL_CC_D_ULE; 286 case MVT::i64: 287 return AMDILCC::IL_CC_UL_LE; 288 default: 289 assert(0 && "Opcode combination not generated correctly!"); 290 return AMDILCC::COND_ERROR; 291 }; 292 case ISD::SETUNE: 293 switch (type) { 294 case MVT::i1: 295 case MVT::i8: 296 case MVT::i16: 297 case MVT::i32: 298 return AMDILCC::IL_CC_U_NE; 299 case MVT::f32: 300 return AMDILCC::IL_CC_F_UNE; 301 case MVT::f64: 302 return AMDILCC::IL_CC_D_UNE; 303 case MVT::i64: 304 return AMDILCC::IL_CC_UL_NE; 305 default: 306 assert(0 && "Opcode combination not generated correctly!"); 307 return AMDILCC::COND_ERROR; 308 }; 309 case ISD::SETUEQ: 310 switch (type) { 311 case MVT::i1: 312 case MVT::i8: 313 case MVT::i16: 314 case MVT::i32: 315 return AMDILCC::IL_CC_U_EQ; 316 case MVT::f32: 317 return AMDILCC::IL_CC_F_UEQ; 318 case MVT::f64: 319 return AMDILCC::IL_CC_D_UEQ; 320 case MVT::i64: 321 return AMDILCC::IL_CC_UL_EQ; 322 default: 323 assert(0 && "Opcode combination not generated correctly!"); 324 return AMDILCC::COND_ERROR; 325 }; 326 case ISD::SETOGT: 327 switch (type) { 328 case MVT::f32: 329 return AMDILCC::IL_CC_F_OGT; 330 case MVT::f64: 331 return AMDILCC::IL_CC_D_OGT; 332 case MVT::i1: 333 case MVT::i8: 334 case MVT::i16: 335 case MVT::i32: 336 case MVT::i64: 337 default: 338 assert(0 && "Opcode combination not generated correctly!"); 339 return AMDILCC::COND_ERROR; 340 }; 341 case ISD::SETOGE: 342 switch (type) { 343 case MVT::f32: 344 return AMDILCC::IL_CC_F_OGE; 345 case MVT::f64: 346 return AMDILCC::IL_CC_D_OGE; 347 case MVT::i1: 348 case MVT::i8: 349 case MVT::i16: 350 case MVT::i32: 351 case MVT::i64: 352 default: 353 assert(0 && "Opcode combination not generated correctly!"); 354 return AMDILCC::COND_ERROR; 355 }; 356 case ISD::SETOLT: 357 switch (type) { 358 case MVT::f32: 359 return AMDILCC::IL_CC_F_OLT; 360 case MVT::f64: 361 return AMDILCC::IL_CC_D_OLT; 362 case MVT::i1: 363 case MVT::i8: 364 case MVT::i16: 365 case MVT::i32: 366 case MVT::i64: 367 default: 368 assert(0 && "Opcode combination not generated correctly!"); 369 return AMDILCC::COND_ERROR; 370 }; 371 case ISD::SETOLE: 372 switch (type) { 373 case MVT::f32: 374 return AMDILCC::IL_CC_F_OLE; 375 case MVT::f64: 376 return AMDILCC::IL_CC_D_OLE; 377 case MVT::i1: 378 case MVT::i8: 379 case MVT::i16: 380 case MVT::i32: 381 case MVT::i64: 382 default: 383 assert(0 && "Opcode combination not generated correctly!"); 384 return AMDILCC::COND_ERROR; 385 }; 386 case ISD::SETONE: 387 switch (type) { 388 case MVT::f32: 389 return AMDILCC::IL_CC_F_ONE; 390 case MVT::f64: 391 return AMDILCC::IL_CC_D_ONE; 392 case MVT::i1: 393 case MVT::i8: 394 case MVT::i16: 395 case MVT::i32: 396 case MVT::i64: 397 default: 398 assert(0 && "Opcode combination not generated correctly!"); 399 return AMDILCC::COND_ERROR; 400 }; 401 case ISD::SETOEQ: 402 switch (type) { 403 case MVT::f32: 404 return AMDILCC::IL_CC_F_OEQ; 405 case MVT::f64: 406 return AMDILCC::IL_CC_D_OEQ; 407 case MVT::i1: 408 case MVT::i8: 409 case MVT::i16: 410 case MVT::i32: 411 case MVT::i64: 412 default: 413 assert(0 && "Opcode combination not generated correctly!"); 414 return AMDILCC::COND_ERROR; 415 }; 416 }; 417} 418 419 static unsigned int 420translateToOpcode(uint64_t CCCode, unsigned int regClass) 421{ 422 switch (CCCode) { 423 case AMDILCC::IL_CC_D_EQ: 424 case AMDILCC::IL_CC_D_OEQ: 425 if (regClass == AMDIL::GPRV2F64RegClassID) { 426 return (unsigned int)AMDIL::DEQ_v2f64; 427 } else { 428 return (unsigned int)AMDIL::DEQ; 429 } 430 case AMDILCC::IL_CC_D_LE: 431 case AMDILCC::IL_CC_D_OLE: 432 case AMDILCC::IL_CC_D_ULE: 433 case AMDILCC::IL_CC_D_GE: 434 case AMDILCC::IL_CC_D_OGE: 435 case AMDILCC::IL_CC_D_UGE: 436 return (unsigned int)AMDIL::DGE; 437 case AMDILCC::IL_CC_D_LT: 438 case AMDILCC::IL_CC_D_OLT: 439 case AMDILCC::IL_CC_D_ULT: 440 case AMDILCC::IL_CC_D_GT: 441 case AMDILCC::IL_CC_D_OGT: 442 case AMDILCC::IL_CC_D_UGT: 443 return (unsigned int)AMDIL::DLT; 444 case AMDILCC::IL_CC_D_NE: 445 case AMDILCC::IL_CC_D_UNE: 446 return (unsigned int)AMDIL::DNE; 447 case AMDILCC::IL_CC_F_EQ: 448 case AMDILCC::IL_CC_F_OEQ: 449 return (unsigned int)AMDIL::FEQ; 450 case AMDILCC::IL_CC_F_LE: 451 case AMDILCC::IL_CC_F_ULE: 452 case AMDILCC::IL_CC_F_OLE: 453 case AMDILCC::IL_CC_F_GE: 454 case AMDILCC::IL_CC_F_UGE: 455 case AMDILCC::IL_CC_F_OGE: 456 return (unsigned int)AMDIL::FGE; 457 case AMDILCC::IL_CC_F_LT: 458 case AMDILCC::IL_CC_F_OLT: 459 case AMDILCC::IL_CC_F_ULT: 460 case AMDILCC::IL_CC_F_GT: 461 case AMDILCC::IL_CC_F_OGT: 462 case AMDILCC::IL_CC_F_UGT: 463 if (regClass == AMDIL::GPRV2F32RegClassID) { 464 return (unsigned int)AMDIL::FLT_v2f32; 465 } else if (regClass == AMDIL::GPRV4F32RegClassID) { 466 return (unsigned int)AMDIL::FLT_v4f32; 467 } else { 468 return (unsigned int)AMDIL::FLT; 469 } 470 case AMDILCC::IL_CC_F_NE: 471 case AMDILCC::IL_CC_F_UNE: 472 return (unsigned int)AMDIL::FNE; 473 case AMDILCC::IL_CC_I_EQ: 474 case AMDILCC::IL_CC_U_EQ: 475 if (regClass == AMDIL::GPRI32RegClassID 476 || regClass == AMDIL::GPRI8RegClassID 477 || regClass == AMDIL::GPRI16RegClassID) { 478 return (unsigned int)AMDIL::IEQ; 479 } else if (regClass == AMDIL::GPRV2I32RegClassID 480 || regClass == AMDIL::GPRV2I8RegClassID 481 || regClass == AMDIL::GPRV2I16RegClassID) { 482 return (unsigned int)AMDIL::IEQ_v2i32; 483 } else if (regClass == AMDIL::GPRV4I32RegClassID 484 || regClass == AMDIL::GPRV4I8RegClassID 485 || regClass == AMDIL::GPRV4I16RegClassID) { 486 return (unsigned int)AMDIL::IEQ_v4i32; 487 } else { 488 assert(!"Unknown reg class!"); 489 } 490 case AMDILCC::IL_CC_L_EQ: 491 case AMDILCC::IL_CC_UL_EQ: 492 return (unsigned int)AMDIL::LEQ; 493 case AMDILCC::IL_CC_I_GE: 494 case AMDILCC::IL_CC_I_LE: 495 if (regClass == AMDIL::GPRI32RegClassID 496 || regClass == AMDIL::GPRI8RegClassID 497 || regClass == AMDIL::GPRI16RegClassID) { 498 return (unsigned int)AMDIL::IGE; 499 } else if (regClass == AMDIL::GPRV2I32RegClassID 500 || regClass == AMDIL::GPRI8RegClassID 501 || regClass == AMDIL::GPRI16RegClassID) { 502 return (unsigned int)AMDIL::IGE_v2i32; 503 } else if (regClass == AMDIL::GPRV4I32RegClassID 504 || regClass == AMDIL::GPRI8RegClassID 505 || regClass == AMDIL::GPRI16RegClassID) { 506 return (unsigned int)AMDIL::IGE_v4i32; 507 } else { 508 assert(!"Unknown reg class!"); 509 } 510 case AMDILCC::IL_CC_I_LT: 511 case AMDILCC::IL_CC_I_GT: 512 if (regClass == AMDIL::GPRI32RegClassID 513 || regClass == AMDIL::GPRI8RegClassID 514 || regClass == AMDIL::GPRI16RegClassID) { 515 return (unsigned int)AMDIL::ILT; 516 } else if (regClass == AMDIL::GPRV2I32RegClassID 517 || regClass == AMDIL::GPRI8RegClassID 518 || regClass == AMDIL::GPRI16RegClassID) { 519 return (unsigned int)AMDIL::ILT_v2i32; 520 } else if (regClass == AMDIL::GPRV4I32RegClassID 521 || regClass == AMDIL::GPRI8RegClassID 522 || regClass == AMDIL::GPRI16RegClassID) { 523 return (unsigned int)AMDIL::ILT_v4i32; 524 } else { 525 assert(!"Unknown reg class!"); 526 } 527 case AMDILCC::IL_CC_L_GE: 528 return (unsigned int)AMDIL::LGE; 529 case AMDILCC::IL_CC_L_LE: 530 return (unsigned int)AMDIL::LLE; 531 case AMDILCC::IL_CC_L_LT: 532 return (unsigned int)AMDIL::LLT; 533 case AMDILCC::IL_CC_L_GT: 534 return (unsigned int)AMDIL::LGT; 535 case AMDILCC::IL_CC_I_NE: 536 case AMDILCC::IL_CC_U_NE: 537 if (regClass == AMDIL::GPRI32RegClassID 538 || regClass == AMDIL::GPRI8RegClassID 539 || regClass == AMDIL::GPRI16RegClassID) { 540 return (unsigned int)AMDIL::INE; 541 } else if (regClass == AMDIL::GPRV2I32RegClassID 542 || regClass == AMDIL::GPRI8RegClassID 543 || regClass == AMDIL::GPRI16RegClassID) { 544 return (unsigned int)AMDIL::INE_v2i32; 545 } else if (regClass == AMDIL::GPRV4I32RegClassID 546 || regClass == AMDIL::GPRI8RegClassID 547 || regClass == AMDIL::GPRI16RegClassID) { 548 return (unsigned int)AMDIL::INE_v4i32; 549 } else { 550 assert(!"Unknown reg class!"); 551 } 552 case AMDILCC::IL_CC_U_GE: 553 case AMDILCC::IL_CC_U_LE: 554 if (regClass == AMDIL::GPRI32RegClassID 555 || regClass == AMDIL::GPRI8RegClassID 556 || regClass == AMDIL::GPRI16RegClassID) { 557 return (unsigned int)AMDIL::UGE; 558 } else if (regClass == AMDIL::GPRV2I32RegClassID 559 || regClass == AMDIL::GPRI8RegClassID 560 || regClass == AMDIL::GPRI16RegClassID) { 561 return (unsigned int)AMDIL::UGE_v2i32; 562 } else if (regClass == AMDIL::GPRV4I32RegClassID 563 || regClass == AMDIL::GPRI8RegClassID 564 || regClass == AMDIL::GPRI16RegClassID) { 565 return (unsigned int)AMDIL::UGE_v4i32; 566 } else { 567 assert(!"Unknown reg class!"); 568 } 569 case AMDILCC::IL_CC_L_NE: 570 case AMDILCC::IL_CC_UL_NE: 571 return (unsigned int)AMDIL::LNE; 572 case AMDILCC::IL_CC_UL_GE: 573 return (unsigned int)AMDIL::ULGE; 574 case AMDILCC::IL_CC_UL_LE: 575 return (unsigned int)AMDIL::ULLE; 576 case AMDILCC::IL_CC_U_LT: 577 if (regClass == AMDIL::GPRI32RegClassID 578 || regClass == AMDIL::GPRI8RegClassID 579 || regClass == AMDIL::GPRI16RegClassID) { 580 return (unsigned int)AMDIL::ULT; 581 } else if (regClass == AMDIL::GPRV2I32RegClassID 582 || regClass == AMDIL::GPRI8RegClassID 583 || regClass == AMDIL::GPRI16RegClassID) { 584 return (unsigned int)AMDIL::ULT_v2i32; 585 } else if (regClass == AMDIL::GPRV4I32RegClassID 586 || regClass == AMDIL::GPRI8RegClassID 587 || regClass == AMDIL::GPRI16RegClassID) { 588 return (unsigned int)AMDIL::ULT_v4i32; 589 } else { 590 assert(!"Unknown reg class!"); 591 } 592 case AMDILCC::IL_CC_U_GT: 593 if (regClass == AMDIL::GPRI32RegClassID 594 || regClass == AMDIL::GPRI8RegClassID 595 || regClass == AMDIL::GPRI16RegClassID) { 596 return (unsigned int)AMDIL::UGT; 597 } else if (regClass == AMDIL::GPRV2I32RegClassID 598 || regClass == AMDIL::GPRI8RegClassID 599 || regClass == AMDIL::GPRI16RegClassID) { 600 return (unsigned int)AMDIL::UGT_v2i32; 601 } else if (regClass == AMDIL::GPRV4I32RegClassID 602 || regClass == AMDIL::GPRI8RegClassID 603 || regClass == AMDIL::GPRI16RegClassID) { 604 return (unsigned int)AMDIL::UGT_v4i32; 605 } else { 606 assert(!"Unknown reg class!"); 607 } 608 case AMDILCC::IL_CC_UL_LT: 609 return (unsigned int)AMDIL::ULLT; 610 case AMDILCC::IL_CC_UL_GT: 611 return (unsigned int)AMDIL::ULGT; 612 case AMDILCC::IL_CC_F_UEQ: 613 case AMDILCC::IL_CC_D_UEQ: 614 case AMDILCC::IL_CC_F_ONE: 615 case AMDILCC::IL_CC_D_ONE: 616 case AMDILCC::IL_CC_F_O: 617 case AMDILCC::IL_CC_F_UO: 618 case AMDILCC::IL_CC_D_O: 619 case AMDILCC::IL_CC_D_UO: 620 // we don't care 621 return 0; 622 623 } 624 errs()<<"Opcode: "<<CCCode<<"\n"; 625 assert(0 && "Unknown opcode retrieved"); 626 return 0; 627} 628SDValue 629AMDILTargetLowering::LowerMemArgument( 630 SDValue Chain, 631 CallingConv::ID CallConv, 632 const SmallVectorImpl<ISD::InputArg> &Ins, 633 DebugLoc dl, SelectionDAG &DAG, 634 const CCValAssign &VA, 635 MachineFrameInfo *MFI, 636 unsigned i) const 637{ 638 // Create the nodes corresponding to a load from this parameter slot. 639 ISD::ArgFlagsTy Flags = Ins[i].Flags; 640 641 bool AlwaysUseMutable = (CallConv==CallingConv::Fast) && 642 getTargetMachine().Options.GuaranteedTailCallOpt; 643 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); 644 645 // FIXME: For now, all byval parameter objects are marked mutable. This can 646 // be changed with more analysis. 647 // In case of tail call optimization mark all arguments mutable. Since they 648 // could be overwritten by lowering of arguments in case of a tail call. 649 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 650 VA.getLocMemOffset(), isImmutable); 651 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 652 653 if (Flags.isByVal()) 654 return FIN; 655 return DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 656 MachinePointerInfo::getFixedStack(FI), 657 false, false, false, 0); 658} 659//===----------------------------------------------------------------------===// 660// TargetLowering Implementation Help Functions End 661//===----------------------------------------------------------------------===// 662//===----------------------------------------------------------------------===// 663// Instruction generation functions 664//===----------------------------------------------------------------------===// 665uint32_t 666AMDILTargetLowering::addExtensionInstructions( 667 uint32_t reg, bool signedShift, 668 unsigned int simpleVT) const 669{ 670 int shiftSize = 0; 671 uint32_t LShift, RShift; 672 switch(simpleVT) 673 { 674 default: 675 return reg; 676 case AMDIL::GPRI8RegClassID: 677 shiftSize = 24; 678 LShift = AMDIL::SHL_i8; 679 if (signedShift) { 680 RShift = AMDIL::SHR_i8; 681 } else { 682 RShift = AMDIL::USHR_i8; 683 } 684 break; 685 case AMDIL::GPRV2I8RegClassID: 686 shiftSize = 24; 687 LShift = AMDIL::SHL_v2i8; 688 if (signedShift) { 689 RShift = AMDIL::SHR_v2i8; 690 } else { 691 RShift = AMDIL::USHR_v2i8; 692 } 693 break; 694 case AMDIL::GPRV4I8RegClassID: 695 shiftSize = 24; 696 LShift = AMDIL::SHL_v4i8; 697 if (signedShift) { 698 RShift = AMDIL::SHR_v4i8; 699 } else { 700 RShift = AMDIL::USHR_v4i8; 701 } 702 break; 703 case AMDIL::GPRI16RegClassID: 704 shiftSize = 16; 705 LShift = AMDIL::SHL_i16; 706 if (signedShift) { 707 RShift = AMDIL::SHR_i16; 708 } else { 709 RShift = AMDIL::USHR_i16; 710 } 711 break; 712 case AMDIL::GPRV2I16RegClassID: 713 shiftSize = 16; 714 LShift = AMDIL::SHL_v2i16; 715 if (signedShift) { 716 RShift = AMDIL::SHR_v2i16; 717 } else { 718 RShift = AMDIL::USHR_v2i16; 719 } 720 break; 721 case AMDIL::GPRV4I16RegClassID: 722 shiftSize = 16; 723 LShift = AMDIL::SHL_v4i16; 724 if (signedShift) { 725 RShift = AMDIL::SHR_v4i16; 726 } else { 727 RShift = AMDIL::USHR_v4i16; 728 } 729 break; 730 }; 731 uint32_t LoadReg = genVReg(simpleVT); 732 uint32_t tmp1 = genVReg(simpleVT); 733 uint32_t tmp2 = genVReg(simpleVT); 734 generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize); 735 generateMachineInst(LShift, tmp1, reg, LoadReg); 736 generateMachineInst(RShift, tmp2, tmp1, LoadReg); 737 return tmp2; 738} 739 740MachineOperand 741AMDILTargetLowering::convertToReg(MachineOperand op) const 742{ 743 if (op.isReg()) { 744 return op; 745 } else if (op.isImm()) { 746 uint32_t loadReg 747 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 748 generateMachineInst(AMDIL::LOADCONST_i32, loadReg) 749 .addImm(op.getImm()); 750 op.ChangeToRegister(loadReg, false); 751 } else if (op.isFPImm()) { 752 uint32_t loadReg 753 = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass); 754 generateMachineInst(AMDIL::LOADCONST_f32, loadReg) 755 .addFPImm(op.getFPImm()); 756 op.ChangeToRegister(loadReg, false); 757 } else if (op.isMBB()) { 758 op.ChangeToRegister(0, false); 759 } else if (op.isFI()) { 760 op.ChangeToRegister(0, false); 761 } else if (op.isCPI()) { 762 op.ChangeToRegister(0, false); 763 } else if (op.isJTI()) { 764 op.ChangeToRegister(0, false); 765 } else if (op.isGlobal()) { 766 op.ChangeToRegister(0, false); 767 } else if (op.isSymbol()) { 768 op.ChangeToRegister(0, false); 769 }/* else if (op.isMetadata()) { 770 op.ChangeToRegister(0, false); 771 }*/ 772 return op; 773} 774 775void 776AMDILTargetLowering::generateCMPInstr( 777 MachineInstr *MI, 778 MachineBasicBlock *BB, 779 const TargetInstrInfo& TII) 780const 781{ 782 MachineOperand DST = MI->getOperand(0); 783 MachineOperand CC = MI->getOperand(1); 784 MachineOperand LHS = MI->getOperand(2); 785 MachineOperand RHS = MI->getOperand(3); 786 int64_t ccCode = CC.getImm(); 787 unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass; 788 unsigned int opCode = translateToOpcode(ccCode, simpleVT); 789 DebugLoc DL = MI->getDebugLoc(); 790 MachineBasicBlock::iterator BBI = MI; 791 setPrivateData(BB, BBI, &DL, &TII); 792 if (!LHS.isReg()) { 793 LHS = convertToReg(LHS); 794 } 795 if (!RHS.isReg()) { 796 RHS = convertToReg(RHS); 797 } 798 switch (ccCode) { 799 case AMDILCC::IL_CC_I_EQ: 800 case AMDILCC::IL_CC_I_NE: 801 case AMDILCC::IL_CC_I_GE: 802 case AMDILCC::IL_CC_I_LT: 803 { 804 uint32_t lhsreg = addExtensionInstructions( 805 LHS.getReg(), true, simpleVT); 806 uint32_t rhsreg = addExtensionInstructions( 807 RHS.getReg(), true, simpleVT); 808 generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg); 809 } 810 break; 811 case AMDILCC::IL_CC_U_EQ: 812 case AMDILCC::IL_CC_U_NE: 813 case AMDILCC::IL_CC_U_GE: 814 case AMDILCC::IL_CC_U_LT: 815 case AMDILCC::IL_CC_D_EQ: 816 case AMDILCC::IL_CC_F_EQ: 817 case AMDILCC::IL_CC_F_OEQ: 818 case AMDILCC::IL_CC_D_OEQ: 819 case AMDILCC::IL_CC_D_NE: 820 case AMDILCC::IL_CC_F_NE: 821 case AMDILCC::IL_CC_F_UNE: 822 case AMDILCC::IL_CC_D_UNE: 823 case AMDILCC::IL_CC_D_GE: 824 case AMDILCC::IL_CC_F_GE: 825 case AMDILCC::IL_CC_D_OGE: 826 case AMDILCC::IL_CC_F_OGE: 827 case AMDILCC::IL_CC_D_LT: 828 case AMDILCC::IL_CC_F_LT: 829 case AMDILCC::IL_CC_F_OLT: 830 case AMDILCC::IL_CC_D_OLT: 831 generateMachineInst(opCode, DST.getReg(), 832 LHS.getReg(), RHS.getReg()); 833 break; 834 case AMDILCC::IL_CC_I_GT: 835 case AMDILCC::IL_CC_I_LE: 836 { 837 uint32_t lhsreg = addExtensionInstructions( 838 LHS.getReg(), true, simpleVT); 839 uint32_t rhsreg = addExtensionInstructions( 840 RHS.getReg(), true, simpleVT); 841 generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg); 842 } 843 break; 844 case AMDILCC::IL_CC_U_GT: 845 case AMDILCC::IL_CC_U_LE: 846 case AMDILCC::IL_CC_F_GT: 847 case AMDILCC::IL_CC_D_GT: 848 case AMDILCC::IL_CC_F_OGT: 849 case AMDILCC::IL_CC_D_OGT: 850 case AMDILCC::IL_CC_F_LE: 851 case AMDILCC::IL_CC_D_LE: 852 case AMDILCC::IL_CC_D_OLE: 853 case AMDILCC::IL_CC_F_OLE: 854 generateMachineInst(opCode, DST.getReg(), 855 RHS.getReg(), LHS.getReg()); 856 break; 857 case AMDILCC::IL_CC_F_UGT: 858 case AMDILCC::IL_CC_F_ULE: 859 { 860 uint32_t VReg[4] = { 861 genVReg(simpleVT), genVReg(simpleVT), 862 genVReg(simpleVT), genVReg(simpleVT) 863 }; 864 generateMachineInst(opCode, VReg[0], 865 RHS.getReg(), LHS.getReg()); 866 generateMachineInst(AMDIL::FNE, VReg[1], 867 RHS.getReg(), RHS.getReg()); 868 generateMachineInst(AMDIL::FNE, VReg[2], 869 LHS.getReg(), LHS.getReg()); 870 generateMachineInst(AMDIL::BINARY_OR_f32, 871 VReg[3], VReg[0], VReg[1]); 872 generateMachineInst(AMDIL::BINARY_OR_f32, 873 DST.getReg(), VReg[2], VReg[3]); 874 } 875 break; 876 case AMDILCC::IL_CC_F_ULT: 877 case AMDILCC::IL_CC_F_UGE: 878 { 879 uint32_t VReg[4] = { 880 genVReg(simpleVT), genVReg(simpleVT), 881 genVReg(simpleVT), genVReg(simpleVT) 882 }; 883 generateMachineInst(opCode, VReg[0], 884 LHS.getReg(), RHS.getReg()); 885 generateMachineInst(AMDIL::FNE, VReg[1], 886 RHS.getReg(), RHS.getReg()); 887 generateMachineInst(AMDIL::FNE, VReg[2], 888 LHS.getReg(), LHS.getReg()); 889 generateMachineInst(AMDIL::BINARY_OR_f32, 890 VReg[3], VReg[0], VReg[1]); 891 generateMachineInst(AMDIL::BINARY_OR_f32, 892 DST.getReg(), VReg[2], VReg[3]); 893 } 894 break; 895 case AMDILCC::IL_CC_D_UGT: 896 case AMDILCC::IL_CC_D_ULE: 897 { 898 uint32_t regID = AMDIL::GPRF64RegClassID; 899 uint32_t VReg[4] = { 900 genVReg(regID), genVReg(regID), 901 genVReg(regID), genVReg(regID) 902 }; 903 // The result of a double comparison is a 32bit result 904 generateMachineInst(opCode, VReg[0], 905 RHS.getReg(), LHS.getReg()); 906 generateMachineInst(AMDIL::DNE, VReg[1], 907 RHS.getReg(), RHS.getReg()); 908 generateMachineInst(AMDIL::DNE, VReg[2], 909 LHS.getReg(), LHS.getReg()); 910 generateMachineInst(AMDIL::BINARY_OR_f32, 911 VReg[3], VReg[0], VReg[1]); 912 generateMachineInst(AMDIL::BINARY_OR_f32, 913 DST.getReg(), VReg[2], VReg[3]); 914 } 915 break; 916 case AMDILCC::IL_CC_D_UGE: 917 case AMDILCC::IL_CC_D_ULT: 918 { 919 uint32_t regID = AMDIL::GPRF64RegClassID; 920 uint32_t VReg[4] = { 921 genVReg(regID), genVReg(regID), 922 genVReg(regID), genVReg(regID) 923 }; 924 // The result of a double comparison is a 32bit result 925 generateMachineInst(opCode, VReg[0], 926 LHS.getReg(), RHS.getReg()); 927 generateMachineInst(AMDIL::DNE, VReg[1], 928 RHS.getReg(), RHS.getReg()); 929 generateMachineInst(AMDIL::DNE, VReg[2], 930 LHS.getReg(), LHS.getReg()); 931 generateMachineInst(AMDIL::BINARY_OR_f32, 932 VReg[3], VReg[0], VReg[1]); 933 generateMachineInst(AMDIL::BINARY_OR_f32, 934 DST.getReg(), VReg[2], VReg[3]); 935 } 936 break; 937 case AMDILCC::IL_CC_F_UEQ: 938 { 939 uint32_t VReg[4] = { 940 genVReg(simpleVT), genVReg(simpleVT), 941 genVReg(simpleVT), genVReg(simpleVT) 942 }; 943 generateMachineInst(AMDIL::FEQ, VReg[0], 944 LHS.getReg(), RHS.getReg()); 945 generateMachineInst(AMDIL::FNE, VReg[1], 946 LHS.getReg(), LHS.getReg()); 947 generateMachineInst(AMDIL::FNE, VReg[2], 948 RHS.getReg(), RHS.getReg()); 949 generateMachineInst(AMDIL::BINARY_OR_f32, 950 VReg[3], VReg[0], VReg[1]); 951 generateMachineInst(AMDIL::BINARY_OR_f32, 952 DST.getReg(), VReg[2], VReg[3]); 953 } 954 break; 955 case AMDILCC::IL_CC_F_ONE: 956 { 957 uint32_t VReg[4] = { 958 genVReg(simpleVT), genVReg(simpleVT), 959 genVReg(simpleVT), genVReg(simpleVT) 960 }; 961 generateMachineInst(AMDIL::FNE, VReg[0], 962 LHS.getReg(), RHS.getReg()); 963 generateMachineInst(AMDIL::FEQ, VReg[1], 964 LHS.getReg(), LHS.getReg()); 965 generateMachineInst(AMDIL::FEQ, VReg[2], 966 RHS.getReg(), RHS.getReg()); 967 generateMachineInst(AMDIL::BINARY_AND_f32, 968 VReg[3], VReg[0], VReg[1]); 969 generateMachineInst(AMDIL::BINARY_AND_f32, 970 DST.getReg(), VReg[2], VReg[3]); 971 } 972 break; 973 case AMDILCC::IL_CC_D_UEQ: 974 { 975 uint32_t regID = AMDIL::GPRF64RegClassID; 976 uint32_t VReg[4] = { 977 genVReg(regID), genVReg(regID), 978 genVReg(regID), genVReg(regID) 979 }; 980 // The result of a double comparison is a 32bit result 981 generateMachineInst(AMDIL::DEQ, VReg[0], 982 LHS.getReg(), RHS.getReg()); 983 generateMachineInst(AMDIL::DNE, VReg[1], 984 LHS.getReg(), LHS.getReg()); 985 generateMachineInst(AMDIL::DNE, VReg[2], 986 RHS.getReg(), RHS.getReg()); 987 generateMachineInst(AMDIL::BINARY_OR_f32, 988 VReg[3], VReg[0], VReg[1]); 989 generateMachineInst(AMDIL::BINARY_OR_f32, 990 DST.getReg(), VReg[2], VReg[3]); 991 992 } 993 break; 994 case AMDILCC::IL_CC_D_ONE: 995 { 996 uint32_t regID = AMDIL::GPRF64RegClassID; 997 uint32_t VReg[4] = { 998 genVReg(regID), genVReg(regID), 999 genVReg(regID), genVReg(regID) 1000 }; 1001 // The result of a double comparison is a 32bit result 1002 generateMachineInst(AMDIL::DNE, VReg[0], 1003 LHS.getReg(), RHS.getReg()); 1004 generateMachineInst(AMDIL::DEQ, VReg[1], 1005 LHS.getReg(), LHS.getReg()); 1006 generateMachineInst(AMDIL::DEQ, VReg[2], 1007 RHS.getReg(), RHS.getReg()); 1008 generateMachineInst(AMDIL::BINARY_AND_f32, 1009 VReg[3], VReg[0], VReg[1]); 1010 generateMachineInst(AMDIL::BINARY_AND_f32, 1011 DST.getReg(), VReg[2], VReg[3]); 1012 1013 } 1014 break; 1015 case AMDILCC::IL_CC_F_O: 1016 { 1017 uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) }; 1018 generateMachineInst(AMDIL::FEQ, VReg[0], 1019 RHS.getReg(), RHS.getReg()); 1020 generateMachineInst(AMDIL::FEQ, VReg[1], 1021 LHS.getReg(), LHS.getReg()); 1022 generateMachineInst(AMDIL::BINARY_AND_f32, 1023 DST.getReg(), VReg[0], VReg[1]); 1024 } 1025 break; 1026 case AMDILCC::IL_CC_D_O: 1027 { 1028 uint32_t regID = AMDIL::GPRF64RegClassID; 1029 uint32_t VReg[2] = { genVReg(regID), genVReg(regID) }; 1030 // The result of a double comparison is a 32bit result 1031 generateMachineInst(AMDIL::DEQ, VReg[0], 1032 RHS.getReg(), RHS.getReg()); 1033 generateMachineInst(AMDIL::DEQ, VReg[1], 1034 LHS.getReg(), LHS.getReg()); 1035 generateMachineInst(AMDIL::BINARY_AND_f32, 1036 DST.getReg(), VReg[0], VReg[1]); 1037 } 1038 break; 1039 case AMDILCC::IL_CC_F_UO: 1040 { 1041 uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) }; 1042 generateMachineInst(AMDIL::FNE, VReg[0], 1043 RHS.getReg(), RHS.getReg()); 1044 generateMachineInst(AMDIL::FNE, VReg[1], 1045 LHS.getReg(), LHS.getReg()); 1046 generateMachineInst(AMDIL::BINARY_OR_f32, 1047 DST.getReg(), VReg[0], VReg[1]); 1048 } 1049 break; 1050 case AMDILCC::IL_CC_D_UO: 1051 { 1052 uint32_t regID = AMDIL::GPRF64RegClassID; 1053 uint32_t VReg[2] = { genVReg(regID), genVReg(regID) }; 1054 // The result of a double comparison is a 32bit result 1055 generateMachineInst(AMDIL::DNE, VReg[0], 1056 RHS.getReg(), RHS.getReg()); 1057 generateMachineInst(AMDIL::DNE, VReg[1], 1058 LHS.getReg(), LHS.getReg()); 1059 generateMachineInst(AMDIL::BINARY_OR_f32, 1060 DST.getReg(), VReg[0], VReg[1]); 1061 } 1062 break; 1063 case AMDILCC::IL_CC_L_LE: 1064 case AMDILCC::IL_CC_L_GE: 1065 case AMDILCC::IL_CC_L_EQ: 1066 case AMDILCC::IL_CC_L_NE: 1067 case AMDILCC::IL_CC_L_LT: 1068 case AMDILCC::IL_CC_L_GT: 1069 case AMDILCC::IL_CC_UL_LE: 1070 case AMDILCC::IL_CC_UL_GE: 1071 case AMDILCC::IL_CC_UL_EQ: 1072 case AMDILCC::IL_CC_UL_NE: 1073 case AMDILCC::IL_CC_UL_LT: 1074 case AMDILCC::IL_CC_UL_GT: 1075 { 1076 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 1077 &this->getTargetMachine())->getSubtargetImpl(); 1078 if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) { 1079 generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg()); 1080 } else { 1081 generateLongRelational(MI, opCode); 1082 } 1083 } 1084 break; 1085 case AMDILCC::COND_ERROR: 1086 assert(0 && "Invalid CC code"); 1087 break; 1088 }; 1089} 1090 1091//===----------------------------------------------------------------------===// 1092// TargetLowering Class Implementation Begins 1093//===----------------------------------------------------------------------===// 1094 AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM) 1095: TargetLowering(TM, new TargetLoweringObjectFileELF()) 1096{ 1097 int types[] = 1098 { 1099 (int)MVT::i8, 1100 (int)MVT::i16, 1101 (int)MVT::i32, 1102 (int)MVT::f32, 1103 (int)MVT::f64, 1104 (int)MVT::i64, 1105 (int)MVT::v2i8, 1106 (int)MVT::v4i8, 1107 (int)MVT::v2i16, 1108 (int)MVT::v4i16, 1109 (int)MVT::v4f32, 1110 (int)MVT::v4i32, 1111 (int)MVT::v2f32, 1112 (int)MVT::v2i32, 1113 (int)MVT::v2f64, 1114 (int)MVT::v2i64 1115 }; 1116 1117 int IntTypes[] = 1118 { 1119 (int)MVT::i8, 1120 (int)MVT::i16, 1121 (int)MVT::i32, 1122 (int)MVT::i64 1123 }; 1124 1125 int FloatTypes[] = 1126 { 1127 (int)MVT::f32, 1128 (int)MVT::f64 1129 }; 1130 1131 int VectorTypes[] = 1132 { 1133 (int)MVT::v2i8, 1134 (int)MVT::v4i8, 1135 (int)MVT::v2i16, 1136 (int)MVT::v4i16, 1137 (int)MVT::v4f32, 1138 (int)MVT::v4i32, 1139 (int)MVT::v2f32, 1140 (int)MVT::v2i32, 1141 (int)MVT::v2f64, 1142 (int)MVT::v2i64 1143 }; 1144 size_t numTypes = sizeof(types) / sizeof(*types); 1145 size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); 1146 size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); 1147 size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); 1148 1149 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 1150 &this->getTargetMachine())->getSubtargetImpl(); 1151 // These are the current register classes that are 1152 // supported 1153 1154 addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass); 1155 addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass); 1156 1157 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 1158 addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass); 1159 addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass); 1160 } 1161 if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) { 1162 addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass); 1163 addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass); 1164 addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass); 1165 setOperationAction(ISD::Constant , MVT::i8 , Legal); 1166 } 1167 if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) { 1168 addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass); 1169 addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass); 1170 addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass); 1171 setOperationAction(ISD::Constant , MVT::i16 , Legal); 1172 } 1173 addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass); 1174 addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass); 1175 addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass); 1176 addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass); 1177 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { 1178 addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass); 1179 addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass); 1180 } 1181 1182 for (unsigned int x = 0; x < numTypes; ++x) { 1183 MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; 1184 1185 //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types 1186 // We cannot sextinreg, expand to shifts 1187 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); 1188 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 1189 setOperationAction(ISD::FP_ROUND, VT, Expand); 1190 setOperationAction(ISD::OR, VT, Custom); 1191 setOperationAction(ISD::SUBE, VT, Expand); 1192 setOperationAction(ISD::SUBC, VT, Expand); 1193 setOperationAction(ISD::ADD, VT, Custom); 1194 setOperationAction(ISD::ADDE, VT, Expand); 1195 setOperationAction(ISD::ADDC, VT, Expand); 1196 setOperationAction(ISD::SETCC, VT, Custom); 1197 setOperationAction(ISD::BRCOND, VT, Custom); 1198 setOperationAction(ISD::BR_CC, VT, Custom); 1199 setOperationAction(ISD::BR_JT, VT, Expand); 1200 setOperationAction(ISD::BRIND, VT, Expand); 1201 // TODO: Implement custom UREM/SREM routines 1202 setOperationAction(ISD::UREM, VT, Expand); 1203 setOperationAction(ISD::SREM, VT, Expand); 1204 setOperationAction(ISD::SINT_TO_FP, VT, Custom); 1205 setOperationAction(ISD::UINT_TO_FP, VT, Custom); 1206 setOperationAction(ISD::FP_TO_SINT, VT, Custom); 1207 setOperationAction(ISD::FP_TO_UINT, VT, Custom); 1208 setOperationAction(ISDBITCAST, VT, Custom); 1209 setOperationAction(ISD::GlobalAddress, VT, Custom); 1210 setOperationAction(ISD::JumpTable, VT, Custom); 1211 setOperationAction(ISD::ConstantPool, VT, Custom); 1212 setOperationAction(ISD::SELECT_CC, VT, Custom); 1213 setOperationAction(ISD::SELECT, VT, Custom); 1214 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 1215 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 1216 if (VT != MVT::i64 && VT != MVT::v2i64) { 1217 setOperationAction(ISD::SDIV, VT, Custom); 1218 setOperationAction(ISD::UDIV, VT, Custom); 1219 } 1220 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 1221 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 1222 } 1223 for (unsigned int x = 0; x < numFloatTypes; ++x) { 1224 MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; 1225 1226 // IL does not have these operations for floating point types 1227 setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); 1228 setOperationAction(ISD::FP_ROUND, VT, Custom); 1229 setOperationAction(ISD::SETOLT, VT, Expand); 1230 setOperationAction(ISD::SETOGE, VT, Expand); 1231 setOperationAction(ISD::SETOGT, VT, Expand); 1232 setOperationAction(ISD::SETOLE, VT, Expand); 1233 setOperationAction(ISD::SETULT, VT, Expand); 1234 setOperationAction(ISD::SETUGE, VT, Expand); 1235 setOperationAction(ISD::SETUGT, VT, Expand); 1236 setOperationAction(ISD::SETULE, VT, Expand); 1237 } 1238 1239 for (unsigned int x = 0; x < numIntTypes; ++x) { 1240 MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; 1241 1242 // GPU also does not have divrem function for signed or unsigned 1243 setOperationAction(ISD::SDIVREM, VT, Expand); 1244 setOperationAction(ISD::UDIVREM, VT, Expand); 1245 setOperationAction(ISD::FP_ROUND, VT, Expand); 1246 1247 // GPU does not have [S|U]MUL_LOHI functions as a single instruction 1248 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 1249 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 1250 1251 // GPU doesn't have a rotl, rotr, or byteswap instruction 1252 setOperationAction(ISD::ROTR, VT, Expand); 1253 setOperationAction(ISD::ROTL, VT, Expand); 1254 setOperationAction(ISD::BSWAP, VT, Expand); 1255 1256 // GPU doesn't have any counting operators 1257 setOperationAction(ISD::CTPOP, VT, Expand); 1258 setOperationAction(ISD::CTTZ, VT, Expand); 1259 setOperationAction(ISD::CTLZ, VT, Expand); 1260 } 1261 1262 for ( unsigned int ii = 0; ii < numVectorTypes; ++ii ) 1263 { 1264 MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; 1265 1266 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 1267 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); 1268 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 1269 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); 1270 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); 1271 setOperationAction(ISD::FP_ROUND, VT, Expand); 1272 setOperationAction(ISD::SDIVREM, VT, Expand); 1273 setOperationAction(ISD::UDIVREM, VT, Expand); 1274 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 1275 // setOperationAction(ISD::VSETCC, VT, Expand); 1276 setOperationAction(ISD::SETCC, VT, Expand); 1277 setOperationAction(ISD::SELECT_CC, VT, Expand); 1278 setOperationAction(ISD::SELECT, VT, Expand); 1279 1280 } 1281 setOperationAction(ISD::FP_ROUND, MVT::Other, Expand); 1282 if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) { 1283 if (stm->calVersion() < CAL_VERSION_SC_139 1284 || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 1285 setOperationAction(ISD::MUL, MVT::i64, Custom); 1286 } 1287 setOperationAction(ISD::SUB, MVT::i64, Custom); 1288 setOperationAction(ISD::ADD, MVT::i64, Custom); 1289 setOperationAction(ISD::MULHU, MVT::i64, Expand); 1290 setOperationAction(ISD::MULHU, MVT::v2i64, Expand); 1291 setOperationAction(ISD::MULHS, MVT::i64, Expand); 1292 setOperationAction(ISD::MULHS, MVT::v2i64, Expand); 1293 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 1294 setOperationAction(ISD::SUB, MVT::v2i64, Expand); 1295 setOperationAction(ISD::ADD, MVT::v2i64, Expand); 1296 setOperationAction(ISD::SREM, MVT::v2i64, Expand); 1297 setOperationAction(ISD::Constant , MVT::i64 , Legal); 1298 setOperationAction(ISD::UDIV, MVT::v2i64, Expand); 1299 setOperationAction(ISD::SDIV, MVT::v2i64, Expand); 1300 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand); 1301 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand); 1302 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand); 1303 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand); 1304 setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); 1305 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); 1306 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); 1307 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); 1308 } 1309 if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) { 1310 // we support loading/storing v2f64 but not operations on the type 1311 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 1312 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 1313 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 1314 setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand); 1315 setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); 1316 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 1317 setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); 1318 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 1319 // We want to expand vector conversions into their scalar 1320 // counterparts. 1321 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand); 1322 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand); 1323 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand); 1324 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand); 1325 setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); 1326 setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); 1327 setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); 1328 setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); 1329 setOperationAction(ISD::FABS, MVT::f64, Expand); 1330 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 1331 } 1332 // TODO: Fix the UDIV24 algorithm so it works for these 1333 // types correctly. This needs vector comparisons 1334 // for this to work correctly. 1335 setOperationAction(ISD::UDIV, MVT::v2i8, Expand); 1336 setOperationAction(ISD::UDIV, MVT::v4i8, Expand); 1337 setOperationAction(ISD::UDIV, MVT::v2i16, Expand); 1338 setOperationAction(ISD::UDIV, MVT::v4i16, Expand); 1339 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); 1340 setOperationAction(ISD::SUBC, MVT::Other, Expand); 1341 setOperationAction(ISD::ADDE, MVT::Other, Expand); 1342 setOperationAction(ISD::ADDC, MVT::Other, Expand); 1343 setOperationAction(ISD::BRCOND, MVT::Other, Custom); 1344 setOperationAction(ISD::BR_CC, MVT::Other, Custom); 1345 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 1346 setOperationAction(ISD::BRIND, MVT::Other, Expand); 1347 setOperationAction(ISD::SETCC, MVT::Other, Custom); 1348 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); 1349 setOperationAction(ISD::FDIV, MVT::f32, Custom); 1350 setOperationAction(ISD::FDIV, MVT::v2f32, Custom); 1351 setOperationAction(ISD::FDIV, MVT::v4f32, Custom); 1352 1353 setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom); 1354 // Use the default implementation. 1355 setOperationAction(ISD::VAARG , MVT::Other, Expand); 1356 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 1357 setOperationAction(ISD::VAEND , MVT::Other, Expand); 1358 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 1359 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 1360 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 1361 setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); 1362 setOperationAction(ISD::Constant , MVT::i32 , Legal); 1363 setOperationAction(ISD::TRAP , MVT::Other , Legal); 1364 1365 setStackPointerRegisterToSaveRestore(AMDIL::SP); 1366 setSchedulingPreference(Sched::RegPressure); 1367 setPow2DivIsCheap(false); 1368 setPrefLoopAlignment(16); 1369 setSelectIsExpensive(true); 1370 setJumpIsExpensive(true); 1371 computeRegisterProperties(); 1372 1373 maxStoresPerMemcpy = 4096; 1374 maxStoresPerMemmove = 4096; 1375 maxStoresPerMemset = 4096; 1376 1377#undef numTypes 1378#undef numIntTypes 1379#undef numVectorTypes 1380#undef numFloatTypes 1381} 1382 1383const char * 1384AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const 1385{ 1386 switch (Opcode) { 1387 default: return 0; 1388 case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY"; 1389 case AMDILISD::DP_TO_FP: return "AMDILISD::DP_TO_FP"; 1390 case AMDILISD::FP_TO_DP: return "AMDILISD::FP_TO_DP"; 1391 case AMDILISD::BITCONV: return "AMDILISD::BITCONV"; 1392 case AMDILISD::CMOV: return "AMDILISD::CMOV"; 1393 case AMDILISD::CMOVLOG: return "AMDILISD::CMOVLOG"; 1394 case AMDILISD::INEGATE: return "AMDILISD::INEGATE"; 1395 case AMDILISD::MAD: return "AMDILISD::MAD"; 1396 case AMDILISD::UMAD: return "AMDILISD::UMAD"; 1397 case AMDILISD::CALL: return "AMDILISD::CALL"; 1398 case AMDILISD::RET: return "AMDILISD::RET"; 1399 case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI"; 1400 case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO"; 1401 case AMDILISD::ADD: return "AMDILISD::ADD"; 1402 case AMDILISD::UMUL: return "AMDILISD::UMUL"; 1403 case AMDILISD::AND: return "AMDILISD::AND"; 1404 case AMDILISD::OR: return "AMDILISD::OR"; 1405 case AMDILISD::NOT: return "AMDILISD::NOT"; 1406 case AMDILISD::XOR: return "AMDILISD::XOR"; 1407 case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF"; 1408 case AMDILISD::SMAX: return "AMDILISD::SMAX"; 1409 case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE"; 1410 case AMDILISD::MOVE: return "AMDILISD::MOVE"; 1411 case AMDILISD::VBUILD: return "AMDILISD::VBUILD"; 1412 case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT"; 1413 case AMDILISD::VINSERT: return "AMDILISD::VINSERT"; 1414 case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT"; 1415 case AMDILISD::LCREATE: return "AMDILISD::LCREATE"; 1416 case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI"; 1417 case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO"; 1418 case AMDILISD::DCREATE: return "AMDILISD::DCREATE"; 1419 case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI"; 1420 case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO"; 1421 case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2"; 1422 case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2"; 1423 case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2"; 1424 case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2"; 1425 case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2"; 1426 case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2"; 1427 case AMDILISD::CMP: return "AMDILISD::CMP"; 1428 case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT"; 1429 case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE"; 1430 case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT"; 1431 case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE"; 1432 case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ"; 1433 case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE"; 1434 case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG"; 1435 case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND"; 1436 case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO"; 1437 case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO"; 1438 case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP"; 1439 case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR"; 1440 case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD"; 1441 case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND"; 1442 case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG"; 1443 case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC"; 1444 case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC"; 1445 case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX"; 1446 case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX"; 1447 case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN"; 1448 case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN"; 1449 case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR"; 1450 case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB"; 1451 case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB"; 1452 case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG"; 1453 case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR"; 1454 case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET"; 1455 case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET"; 1456 case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET"; 1457 case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET"; 1458 case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET"; 1459 case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET"; 1460 case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET"; 1461 case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET"; 1462 case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET"; 1463 case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET"; 1464 case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET"; 1465 case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET"; 1466 case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET"; 1467 case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET"; 1468 case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD"; 1469 case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND"; 1470 case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG"; 1471 case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC"; 1472 case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC"; 1473 case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX"; 1474 case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX"; 1475 case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN"; 1476 case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN"; 1477 case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR"; 1478 case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB"; 1479 case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB"; 1480 case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG"; 1481 case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR"; 1482 case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET"; 1483 case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET"; 1484 case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET"; 1485 case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET"; 1486 case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET"; 1487 case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET"; 1488 case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET"; 1489 case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET"; 1490 case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET"; 1491 case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET"; 1492 case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET"; 1493 case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET"; 1494 case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET"; 1495 case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD"; 1496 case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND"; 1497 case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG"; 1498 case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC"; 1499 case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC"; 1500 case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX"; 1501 case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX"; 1502 case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN"; 1503 case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN"; 1504 case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR"; 1505 case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR"; 1506 case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB"; 1507 case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB"; 1508 case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG"; 1509 case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR"; 1510 case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET"; 1511 case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET"; 1512 case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET"; 1513 case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET"; 1514 case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET"; 1515 case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET"; 1516 case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET"; 1517 case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET"; 1518 case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET"; 1519 case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET"; 1520 case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET"; 1521 case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET"; 1522 case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET"; 1523 case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET"; 1524 case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET"; 1525 case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC"; 1526 case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET"; 1527 case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME"; 1528 case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET"; 1529 case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ"; 1530 case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE"; 1531 case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0"; 1532 case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1"; 1533 case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ"; 1534 case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE"; 1535 case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0"; 1536 case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1"; 1537 1538 }; 1539} 1540bool 1541AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 1542 const CallInst &I, unsigned Intrinsic) const 1543{ 1544 if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic 1545 || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) { 1546 return false; 1547 } 1548 bool bitCastToInt = false; 1549 unsigned IntNo; 1550 bool isRet = true; 1551 const AMDILSubtarget *STM = &this->getTargetMachine() 1552 .getSubtarget<AMDILSubtarget>(); 1553 switch (Intrinsic) { 1554 default: return false; // Don't custom lower most intrinsics. 1555 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32: 1556 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32: 1557 IntNo = AMDILISD::ATOM_G_ADD; break; 1558 case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret: 1559 case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret: 1560 isRet = false; 1561 IntNo = AMDILISD::ATOM_G_ADD_NORET; break; 1562 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32: 1563 case AMDGPUIntrinsic::AMDIL_atomic_add_li32: 1564 IntNo = AMDILISD::ATOM_L_ADD; break; 1565 case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret: 1566 case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret: 1567 isRet = false; 1568 IntNo = AMDILISD::ATOM_L_ADD_NORET; break; 1569 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32: 1570 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32: 1571 IntNo = AMDILISD::ATOM_R_ADD; break; 1572 case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret: 1573 case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret: 1574 isRet = false; 1575 IntNo = AMDILISD::ATOM_R_ADD_NORET; break; 1576 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32: 1577 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32: 1578 IntNo = AMDILISD::ATOM_G_AND; break; 1579 case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret: 1580 case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret: 1581 isRet = false; 1582 IntNo = AMDILISD::ATOM_G_AND_NORET; break; 1583 case AMDGPUIntrinsic::AMDIL_atomic_and_li32: 1584 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32: 1585 IntNo = AMDILISD::ATOM_L_AND; break; 1586 case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret: 1587 case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret: 1588 isRet = false; 1589 IntNo = AMDILISD::ATOM_L_AND_NORET; break; 1590 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32: 1591 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32: 1592 IntNo = AMDILISD::ATOM_R_AND; break; 1593 case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret: 1594 case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret: 1595 isRet = false; 1596 IntNo = AMDILISD::ATOM_R_AND_NORET; break; 1597 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32: 1598 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32: 1599 IntNo = AMDILISD::ATOM_G_CMPXCHG; break; 1600 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret: 1601 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret: 1602 isRet = false; 1603 IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break; 1604 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32: 1605 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32: 1606 IntNo = AMDILISD::ATOM_L_CMPXCHG; break; 1607 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret: 1608 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret: 1609 isRet = false; 1610 IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break; 1611 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32: 1612 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32: 1613 IntNo = AMDILISD::ATOM_R_CMPXCHG; break; 1614 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret: 1615 case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret: 1616 isRet = false; 1617 IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break; 1618 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32: 1619 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32: 1620 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1621 IntNo = AMDILISD::ATOM_G_DEC; 1622 } else { 1623 IntNo = AMDILISD::ATOM_G_SUB; 1624 } 1625 break; 1626 case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret: 1627 case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret: 1628 isRet = false; 1629 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1630 IntNo = AMDILISD::ATOM_G_DEC_NORET; 1631 } else { 1632 IntNo = AMDILISD::ATOM_G_SUB_NORET; 1633 } 1634 break; 1635 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32: 1636 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32: 1637 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1638 IntNo = AMDILISD::ATOM_L_DEC; 1639 } else { 1640 IntNo = AMDILISD::ATOM_L_SUB; 1641 } 1642 break; 1643 case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret: 1644 case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret: 1645 isRet = false; 1646 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1647 IntNo = AMDILISD::ATOM_L_DEC_NORET; 1648 } else { 1649 IntNo = AMDILISD::ATOM_L_SUB_NORET; 1650 } 1651 break; 1652 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32: 1653 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32: 1654 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1655 IntNo = AMDILISD::ATOM_R_DEC; 1656 } else { 1657 IntNo = AMDILISD::ATOM_R_SUB; 1658 } 1659 break; 1660 case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret: 1661 case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret: 1662 isRet = false; 1663 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1664 IntNo = AMDILISD::ATOM_R_DEC_NORET; 1665 } else { 1666 IntNo = AMDILISD::ATOM_R_SUB_NORET; 1667 } 1668 break; 1669 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32: 1670 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32: 1671 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1672 IntNo = AMDILISD::ATOM_G_INC; 1673 } else { 1674 IntNo = AMDILISD::ATOM_G_ADD; 1675 } 1676 break; 1677 case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret: 1678 case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret: 1679 isRet = false; 1680 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1681 IntNo = AMDILISD::ATOM_G_INC_NORET; 1682 } else { 1683 IntNo = AMDILISD::ATOM_G_ADD_NORET; 1684 } 1685 break; 1686 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32: 1687 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32: 1688 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1689 IntNo = AMDILISD::ATOM_L_INC; 1690 } else { 1691 IntNo = AMDILISD::ATOM_L_ADD; 1692 } 1693 break; 1694 case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret: 1695 case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret: 1696 isRet = false; 1697 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1698 IntNo = AMDILISD::ATOM_L_INC_NORET; 1699 } else { 1700 IntNo = AMDILISD::ATOM_L_ADD_NORET; 1701 } 1702 break; 1703 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32: 1704 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32: 1705 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1706 IntNo = AMDILISD::ATOM_R_INC; 1707 } else { 1708 IntNo = AMDILISD::ATOM_R_ADD; 1709 } 1710 break; 1711 case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret: 1712 case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret: 1713 isRet = false; 1714 if (STM->calVersion() >= CAL_VERSION_SC_136) { 1715 IntNo = AMDILISD::ATOM_R_INC_NORET; 1716 } else { 1717 IntNo = AMDILISD::ATOM_R_ADD_NORET; 1718 } 1719 break; 1720 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32: 1721 IntNo = AMDILISD::ATOM_G_MAX; break; 1722 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32: 1723 IntNo = AMDILISD::ATOM_G_UMAX; break; 1724 case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret: 1725 isRet = false; 1726 IntNo = AMDILISD::ATOM_G_MAX_NORET; break; 1727 case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret: 1728 isRet = false; 1729 IntNo = AMDILISD::ATOM_G_UMAX_NORET; break; 1730 case AMDGPUIntrinsic::AMDIL_atomic_max_li32: 1731 IntNo = AMDILISD::ATOM_L_MAX; break; 1732 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32: 1733 IntNo = AMDILISD::ATOM_L_UMAX; break; 1734 case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret: 1735 isRet = false; 1736 IntNo = AMDILISD::ATOM_L_MAX_NORET; break; 1737 case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret: 1738 isRet = false; 1739 IntNo = AMDILISD::ATOM_L_UMAX_NORET; break; 1740 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32: 1741 IntNo = AMDILISD::ATOM_R_MAX; break; 1742 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32: 1743 IntNo = AMDILISD::ATOM_R_UMAX; break; 1744 case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret: 1745 isRet = false; 1746 IntNo = AMDILISD::ATOM_R_MAX_NORET; break; 1747 case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret: 1748 isRet = false; 1749 IntNo = AMDILISD::ATOM_R_UMAX_NORET; break; 1750 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32: 1751 IntNo = AMDILISD::ATOM_G_MIN; break; 1752 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32: 1753 IntNo = AMDILISD::ATOM_G_UMIN; break; 1754 case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret: 1755 isRet = false; 1756 IntNo = AMDILISD::ATOM_G_MIN_NORET; break; 1757 case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret: 1758 isRet = false; 1759 IntNo = AMDILISD::ATOM_G_UMIN_NORET; break; 1760 case AMDGPUIntrinsic::AMDIL_atomic_min_li32: 1761 IntNo = AMDILISD::ATOM_L_MIN; break; 1762 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32: 1763 IntNo = AMDILISD::ATOM_L_UMIN; break; 1764 case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret: 1765 isRet = false; 1766 IntNo = AMDILISD::ATOM_L_MIN_NORET; break; 1767 case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret: 1768 isRet = false; 1769 IntNo = AMDILISD::ATOM_L_UMIN_NORET; break; 1770 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32: 1771 IntNo = AMDILISD::ATOM_R_MIN; break; 1772 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32: 1773 IntNo = AMDILISD::ATOM_R_UMIN; break; 1774 case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret: 1775 isRet = false; 1776 IntNo = AMDILISD::ATOM_R_MIN_NORET; break; 1777 case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret: 1778 isRet = false; 1779 IntNo = AMDILISD::ATOM_R_UMIN_NORET; break; 1780 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32: 1781 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32: 1782 IntNo = AMDILISD::ATOM_G_OR; break; 1783 case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret: 1784 case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret: 1785 isRet = false; 1786 IntNo = AMDILISD::ATOM_G_OR_NORET; break; 1787 case AMDGPUIntrinsic::AMDIL_atomic_or_li32: 1788 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32: 1789 IntNo = AMDILISD::ATOM_L_OR; break; 1790 case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret: 1791 case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret: 1792 isRet = false; 1793 IntNo = AMDILISD::ATOM_L_OR_NORET; break; 1794 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32: 1795 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32: 1796 IntNo = AMDILISD::ATOM_R_OR; break; 1797 case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret: 1798 case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret: 1799 isRet = false; 1800 IntNo = AMDILISD::ATOM_R_OR_NORET; break; 1801 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32: 1802 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32: 1803 IntNo = AMDILISD::ATOM_G_SUB; break; 1804 case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret: 1805 case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret: 1806 isRet = false; 1807 IntNo = AMDILISD::ATOM_G_SUB_NORET; break; 1808 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32: 1809 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32: 1810 IntNo = AMDILISD::ATOM_L_SUB; break; 1811 case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret: 1812 case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret: 1813 isRet = false; 1814 IntNo = AMDILISD::ATOM_L_SUB_NORET; break; 1815 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32: 1816 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32: 1817 IntNo = AMDILISD::ATOM_R_SUB; break; 1818 case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret: 1819 case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret: 1820 isRet = false; 1821 IntNo = AMDILISD::ATOM_R_SUB_NORET; break; 1822 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32: 1823 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32: 1824 IntNo = AMDILISD::ATOM_G_RSUB; break; 1825 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret: 1826 case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret: 1827 isRet = false; 1828 IntNo = AMDILISD::ATOM_G_RSUB_NORET; break; 1829 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32: 1830 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32: 1831 IntNo = AMDILISD::ATOM_L_RSUB; break; 1832 case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret: 1833 case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret: 1834 isRet = false; 1835 IntNo = AMDILISD::ATOM_L_RSUB_NORET; break; 1836 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32: 1837 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32: 1838 IntNo = AMDILISD::ATOM_R_RSUB; break; 1839 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret: 1840 case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret: 1841 isRet = false; 1842 IntNo = AMDILISD::ATOM_R_RSUB_NORET; break; 1843 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32: 1844 bitCastToInt = true; 1845 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32: 1846 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32: 1847 IntNo = AMDILISD::ATOM_G_XCHG; break; 1848 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret: 1849 bitCastToInt = true; 1850 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret: 1851 case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret: 1852 isRet = false; 1853 IntNo = AMDILISD::ATOM_G_XCHG_NORET; break; 1854 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32: 1855 bitCastToInt = true; 1856 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32: 1857 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32: 1858 IntNo = AMDILISD::ATOM_L_XCHG; break; 1859 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret: 1860 bitCastToInt = true; 1861 case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret: 1862 case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret: 1863 isRet = false; 1864 IntNo = AMDILISD::ATOM_L_XCHG_NORET; break; 1865 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32: 1866 bitCastToInt = true; 1867 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32: 1868 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32: 1869 IntNo = AMDILISD::ATOM_R_XCHG; break; 1870 case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret: 1871 bitCastToInt = true; 1872 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret: 1873 case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret: 1874 isRet = false; 1875 IntNo = AMDILISD::ATOM_R_XCHG_NORET; break; 1876 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32: 1877 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32: 1878 IntNo = AMDILISD::ATOM_G_XOR; break; 1879 case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret: 1880 case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret: 1881 isRet = false; 1882 IntNo = AMDILISD::ATOM_G_XOR_NORET; break; 1883 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32: 1884 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32: 1885 IntNo = AMDILISD::ATOM_L_XOR; break; 1886 case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret: 1887 case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret: 1888 isRet = false; 1889 IntNo = AMDILISD::ATOM_L_XOR_NORET; break; 1890 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32: 1891 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32: 1892 IntNo = AMDILISD::ATOM_R_XOR; break; 1893 case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret: 1894 case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret: 1895 isRet = false; 1896 IntNo = AMDILISD::ATOM_R_XOR_NORET; break; 1897 case AMDGPUIntrinsic::AMDIL_append_alloc_i32: 1898 IntNo = AMDILISD::APPEND_ALLOC; break; 1899 case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret: 1900 isRet = false; 1901 IntNo = AMDILISD::APPEND_ALLOC_NORET; break; 1902 case AMDGPUIntrinsic::AMDIL_append_consume_i32: 1903 IntNo = AMDILISD::APPEND_CONSUME; break; 1904 case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret: 1905 isRet = false; 1906 IntNo = AMDILISD::APPEND_CONSUME_NORET; break; 1907 }; 1908 const AMDILSubtarget *stm = &this->getTargetMachine() 1909 .getSubtarget<AMDILSubtarget>(); 1910 AMDILKernelManager *KM = const_cast<AMDILKernelManager*>( 1911 stm->getKernelManager()); 1912 KM->setOutputInst(); 1913 1914 Info.opc = IntNo; 1915 Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32; 1916 Info.ptrVal = I.getOperand(0); 1917 Info.offset = 0; 1918 Info.align = 4; 1919 Info.vol = true; 1920 Info.readMem = isRet; 1921 Info.writeMem = true; 1922 return true; 1923} 1924// The backend supports 32 and 64 bit floating point immediates 1925bool 1926AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const 1927{ 1928 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1929 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1930 return true; 1931 } else { 1932 return false; 1933 } 1934} 1935 1936bool 1937AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const 1938{ 1939 if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 1940 || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { 1941 return false; 1942 } else { 1943 return true; 1944 } 1945} 1946 1947 1948// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to 1949// be zero. Op is expected to be a target specific node. Used by DAG 1950// combiner. 1951 1952void 1953AMDILTargetLowering::computeMaskedBitsForTargetNode( 1954 const SDValue Op, 1955 APInt &KnownZero, 1956 APInt &KnownOne, 1957 const SelectionDAG &DAG, 1958 unsigned Depth) const 1959{ 1960 APInt KnownZero2; 1961 APInt KnownOne2; 1962 KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything 1963 switch (Op.getOpcode()) { 1964 default: break; 1965 case AMDILISD::SELECT_CC: 1966 DAG.ComputeMaskedBits( 1967 Op.getOperand(1), 1968 KnownZero, 1969 KnownOne, 1970 Depth + 1 1971 ); 1972 DAG.ComputeMaskedBits( 1973 Op.getOperand(0), 1974 KnownZero2, 1975 KnownOne2 1976 ); 1977 assert((KnownZero & KnownOne) == 0 1978 && "Bits known to be one AND zero?"); 1979 assert((KnownZero2 & KnownOne2) == 0 1980 && "Bits known to be one AND zero?"); 1981 // Only known if known in both the LHS and RHS 1982 KnownOne &= KnownOne2; 1983 KnownZero &= KnownZero2; 1984 break; 1985 }; 1986} 1987 1988// This is the function that determines which calling convention should 1989// be used. Currently there is only one calling convention 1990CCAssignFn* 1991AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const 1992{ 1993 //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 1994 return CC_AMDIL32; 1995} 1996 1997// LowerCallResult - Lower the result values of an ISD::CALL into the 1998// appropriate copies out of appropriate physical registers. This assumes that 1999// Chain/InFlag are the input chain/flag to use, and that TheCall is the call 2000// being lowered. The returns a SDNode with the same number of values as the 2001// ISD::CALL. 2002SDValue 2003AMDILTargetLowering::LowerCallResult( 2004 SDValue Chain, 2005 SDValue InFlag, 2006 CallingConv::ID CallConv, 2007 bool isVarArg, 2008 const SmallVectorImpl<ISD::InputArg> &Ins, 2009 DebugLoc dl, 2010 SelectionDAG &DAG, 2011 SmallVectorImpl<SDValue> &InVals) const 2012{ 2013 // Assign locations to each value returned by this call 2014 SmallVector<CCValAssign, 16> RVLocs; 2015 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2016 getTargetMachine(), RVLocs, *DAG.getContext()); 2017 CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32); 2018 2019 // Copy all of the result registers out of their specified physreg. 2020 for (unsigned i = 0; i != RVLocs.size(); ++i) { 2021 EVT CopyVT = RVLocs[i].getValVT(); 2022 if (RVLocs[i].isRegLoc()) { 2023 Chain = DAG.getCopyFromReg( 2024 Chain, 2025 dl, 2026 RVLocs[i].getLocReg(), 2027 CopyVT, 2028 InFlag 2029 ).getValue(1); 2030 SDValue Val = Chain.getValue(0); 2031 InFlag = Chain.getValue(2); 2032 InVals.push_back(Val); 2033 } 2034 } 2035 2036 return Chain; 2037 2038} 2039 2040//===----------------------------------------------------------------------===// 2041// Other Lowering Hooks 2042//===----------------------------------------------------------------------===// 2043 2044MachineBasicBlock * 2045AMDILTargetLowering::EmitInstrWithCustomInserter( 2046 MachineInstr *MI, MachineBasicBlock *BB) const 2047{ 2048 const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo(); 2049 switch (MI->getOpcode()) { 2050 ExpandCaseToAllTypes(AMDIL::CMP); 2051 generateCMPInstr(MI, BB, TII); 2052 MI->eraseFromParent(); 2053 break; 2054 default: 2055 break; 2056 } 2057 return BB; 2058} 2059 2060// Recursively assign SDNodeOrdering to any unordered nodes 2061// This is necessary to maintain source ordering of instructions 2062// under -O0 to avoid odd-looking "skipping around" issues. 2063 static const SDValue 2064Ordered( SelectionDAG &DAG, unsigned order, const SDValue New ) 2065{ 2066 if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) { 2067 DAG.AssignOrdering( New.getNode(), order ); 2068 for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i) 2069 Ordered( DAG, order, New.getOperand(i) ); 2070 } 2071 return New; 2072} 2073 2074#define LOWER(A) \ 2075 case ISD:: A: \ 2076return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) ) 2077 2078SDValue 2079AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const 2080{ 2081 switch (Op.getOpcode()) { 2082 default: 2083 Op.getNode()->dump(); 2084 assert(0 && "Custom lowering code for this" 2085 "instruction is not implemented yet!"); 2086 break; 2087 LOWER(GlobalAddress); 2088 LOWER(JumpTable); 2089 LOWER(ConstantPool); 2090 LOWER(ExternalSymbol); 2091 LOWER(FP_TO_SINT); 2092 LOWER(FP_TO_UINT); 2093 LOWER(SINT_TO_FP); 2094 LOWER(UINT_TO_FP); 2095 LOWER(ADD); 2096 LOWER(MUL); 2097 LOWER(SUB); 2098 LOWER(FDIV); 2099 LOWER(SDIV); 2100 LOWER(SREM); 2101 LOWER(UDIV); 2102 LOWER(UREM); 2103 LOWER(BUILD_VECTOR); 2104 LOWER(INSERT_VECTOR_ELT); 2105 LOWER(EXTRACT_VECTOR_ELT); 2106 LOWER(EXTRACT_SUBVECTOR); 2107 LOWER(SCALAR_TO_VECTOR); 2108 LOWER(CONCAT_VECTORS); 2109 LOWER(AND); 2110 LOWER(OR); 2111 LOWER(SELECT); 2112 LOWER(SELECT_CC); 2113 LOWER(SETCC); 2114 LOWER(SIGN_EXTEND_INREG); 2115 LOWER(BITCAST); 2116 LOWER(DYNAMIC_STACKALLOC); 2117 LOWER(BRCOND); 2118 LOWER(BR_CC); 2119 LOWER(FP_ROUND); 2120 } 2121 return Op; 2122} 2123 2124int 2125AMDILTargetLowering::getVarArgsFrameOffset() const 2126{ 2127 return VarArgsFrameOffset; 2128} 2129#undef LOWER 2130 2131SDValue 2132AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const 2133{ 2134 SDValue DST = Op; 2135 const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op); 2136 const GlobalValue *G = GADN->getGlobal(); 2137 const AMDILSubtarget *stm = &this->getTargetMachine() 2138 .getSubtarget<AMDILSubtarget>(); 2139 const AMDILGlobalManager *GM = stm->getGlobalManager(); 2140 DebugLoc DL = Op.getDebugLoc(); 2141 int64_t base_offset = GADN->getOffset(); 2142 int32_t arrayoffset = GM->getArrayOffset(G->getName()); 2143 int32_t constoffset = GM->getConstOffset(G->getName()); 2144 if (arrayoffset != -1) { 2145 DST = DAG.getConstant(arrayoffset, MVT::i32); 2146 DST = DAG.getNode(ISD::ADD, DL, MVT::i32, 2147 DST, DAG.getConstant(base_offset, MVT::i32)); 2148 } else if (constoffset != -1) { 2149 if (GM->getConstHWBit(G->getName())) { 2150 DST = DAG.getConstant(constoffset, MVT::i32); 2151 DST = DAG.getNode(ISD::ADD, DL, MVT::i32, 2152 DST, DAG.getConstant(base_offset, MVT::i32)); 2153 } else { 2154 SDValue addr = DAG.getTargetGlobalAddress(G, DL, MVT::i32); 2155 SDValue DPReg = DAG.getRegister(AMDIL::SDP, MVT::i32); 2156 DPReg = DAG.getNode(ISD::ADD, DL, MVT::i32, DPReg, 2157 DAG.getConstant(base_offset, MVT::i32)); 2158 DST = DAG.getNode(AMDILISD::ADDADDR, DL, MVT::i32, addr, DPReg); 2159 } 2160 } else { 2161 const GlobalVariable *GV = dyn_cast<GlobalVariable>(G); 2162 if (!GV) { 2163 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 2164 } else { 2165 if (GV->hasInitializer()) { 2166 const Constant *C = dyn_cast<Constant>(GV->getInitializer()); 2167 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) { 2168 DST = DAG.getConstant(CI->getValue(), Op.getValueType()); 2169 2170 } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) { 2171 DST = DAG.getConstantFP(CF->getValueAPF(), 2172 Op.getValueType()); 2173 } else if (dyn_cast<ConstantAggregateZero>(C)) { 2174 EVT VT = Op.getValueType(); 2175 if (VT.isInteger()) { 2176 DST = DAG.getConstant(0, VT); 2177 } else { 2178 DST = DAG.getConstantFP(0, VT); 2179 } 2180 } else { 2181 assert(!"lowering this type of Global Address " 2182 "not implemented yet!"); 2183 C->dump(); 2184 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 2185 } 2186 } else { 2187 DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); 2188 } 2189 } 2190 } 2191 return DST; 2192} 2193 2194SDValue 2195AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const 2196{ 2197 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 2198 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); 2199 return Result; 2200} 2201SDValue 2202AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const 2203{ 2204 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2205 EVT PtrVT = Op.getValueType(); 2206 SDValue Result; 2207 if (CP->isMachineConstantPoolEntry()) { 2208 Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2209 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 2210 } else { 2211 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2212 CP->getAlignment(), CP->getOffset(), CP->getTargetFlags()); 2213 } 2214 return Result; 2215} 2216 2217SDValue 2218AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const 2219{ 2220 const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol(); 2221 SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32); 2222 return Result; 2223} 2224/// LowerFORMAL_ARGUMENTS - transform physical registers into 2225/// virtual registers and generate load operations for 2226/// arguments places on the stack. 2227/// TODO: isVarArg, hasStructRet, isMemReg 2228 SDValue 2229AMDILTargetLowering::LowerFormalArguments(SDValue Chain, 2230 CallingConv::ID CallConv, 2231 bool isVarArg, 2232 const SmallVectorImpl<ISD::InputArg> &Ins, 2233 DebugLoc dl, 2234 SelectionDAG &DAG, 2235 SmallVectorImpl<SDValue> &InVals) 2236const 2237{ 2238 2239 MachineFunction &MF = DAG.getMachineFunction(); 2240 AMDILMachineFunctionInfo *FuncInfo 2241 = MF.getInfo<AMDILMachineFunctionInfo>(); 2242 MachineFrameInfo *MFI = MF.getFrameInfo(); 2243 //const Function *Fn = MF.getFunction(); 2244 //MachineRegisterInfo &RegInfo = MF.getRegInfo(); 2245 2246 SmallVector<CCValAssign, 16> ArgLocs; 2247 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 2248 //bool hasStructRet = MF.getFunction()->hasStructRetAttr(); 2249 2250 CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(), 2251 getTargetMachine(), ArgLocs, *DAG.getContext()); 2252 2253 // When more calling conventions are added, they need to be chosen here 2254 CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32); 2255 SDValue StackPtr; 2256 2257 //unsigned int FirstStackArgLoc = 0; 2258 2259 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 2260 CCValAssign &VA = ArgLocs[i]; 2261 if (VA.isRegLoc()) { 2262 EVT RegVT = VA.getLocVT(); 2263 const TargetRegisterClass *RC = getRegClassFromType( 2264 RegVT.getSimpleVT().SimpleTy); 2265 2266 unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC); 2267 SDValue ArgValue = DAG.getCopyFromReg( 2268 Chain, 2269 dl, 2270 Reg, 2271 RegVT); 2272 // If this is an 8 or 16-bit value, it is really passed 2273 // promoted to 32 bits. Insert an assert[sz]ext to capture 2274 // this, then truncate to the right size. 2275 2276 if (VA.getLocInfo() == CCValAssign::SExt) { 2277 ArgValue = DAG.getNode( 2278 ISD::AssertSext, 2279 dl, 2280 RegVT, 2281 ArgValue, 2282 DAG.getValueType(VA.getValVT())); 2283 } else if (VA.getLocInfo() == CCValAssign::ZExt) { 2284 ArgValue = DAG.getNode( 2285 ISD::AssertZext, 2286 dl, 2287 RegVT, 2288 ArgValue, 2289 DAG.getValueType(VA.getValVT())); 2290 } 2291 if (VA.getLocInfo() != CCValAssign::Full) { 2292 ArgValue = DAG.getNode( 2293 ISD::TRUNCATE, 2294 dl, 2295 VA.getValVT(), 2296 ArgValue); 2297 } 2298 // Add the value to the list of arguments 2299 // to be passed in registers 2300 InVals.push_back(ArgValue); 2301 if (isVarArg) { 2302 assert(0 && "Variable arguments are not yet supported"); 2303 // See MipsISelLowering.cpp for ideas on how to implement 2304 } 2305 } else if(VA.isMemLoc()) { 2306 InVals.push_back(LowerMemArgument(Chain, CallConv, Ins, 2307 dl, DAG, VA, MFI, i)); 2308 } else { 2309 assert(0 && "found a Value Assign that is " 2310 "neither a register or a memory location"); 2311 } 2312 } 2313 /*if (hasStructRet) { 2314 assert(0 && "Has struct return is not yet implemented"); 2315 // See MipsISelLowering.cpp for ideas on how to implement 2316 }*/ 2317 2318 unsigned int StackSize = CCInfo.getNextStackOffset(); 2319 if (isVarArg) { 2320 assert(0 && "Variable arguments are not yet supported"); 2321 // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement 2322 } 2323 // This needs to be changed to non-zero if the return function needs 2324 // to pop bytes 2325 FuncInfo->setBytesToPopOnReturn(StackSize); 2326 return Chain; 2327} 2328/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 2329/// by "Src" to address "Dst" with size and alignment information specified by 2330/// the specific parameter attribute. The copy will be passed as a byval 2331/// function parameter. 2332static SDValue 2333CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 2334 ISD::ArgFlagsTy Flags, SelectionDAG &DAG) { 2335 assert(0 && "MemCopy does not exist yet"); 2336 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 2337 2338 return DAG.getMemcpy(Chain, 2339 Src.getDebugLoc(), 2340 Dst, Src, SizeNode, Flags.getByValAlign(), 2341 /*IsVol=*/false, /*AlwaysInline=*/true, 2342 MachinePointerInfo(), MachinePointerInfo()); 2343} 2344 2345SDValue 2346AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain, 2347 SDValue StackPtr, SDValue Arg, 2348 DebugLoc dl, SelectionDAG &DAG, 2349 const CCValAssign &VA, 2350 ISD::ArgFlagsTy Flags) const 2351{ 2352 unsigned int LocMemOffset = VA.getLocMemOffset(); 2353 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 2354 PtrOff = DAG.getNode(ISD::ADD, 2355 dl, 2356 getPointerTy(), StackPtr, PtrOff); 2357 if (Flags.isByVal()) { 2358 PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG); 2359 } else { 2360 PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff, 2361 MachinePointerInfo::getStack(LocMemOffset), 2362 false, false, 0); 2363 } 2364 return PtrOff; 2365} 2366/// LowerCAL - functions arguments are copied from virtual 2367/// regs to (physical regs)/(stack frame), CALLSEQ_START and 2368/// CALLSEQ_END are emitted. 2369/// TODO: isVarArg, isTailCall, hasStructRet 2370SDValue 2371AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 2372 CallingConv::ID CallConv, bool isVarArg, bool doesNotRet, 2373 bool& isTailCall, 2374 const SmallVectorImpl<ISD::OutputArg> &Outs, 2375 const SmallVectorImpl<SDValue> &OutVals, 2376 const SmallVectorImpl<ISD::InputArg> &Ins, 2377 DebugLoc dl, SelectionDAG &DAG, 2378 SmallVectorImpl<SDValue> &InVals) 2379const 2380{ 2381 isTailCall = false; 2382 MachineFunction& MF = DAG.getMachineFunction(); 2383 // FIXME: DO we need to handle fast calling conventions and tail call 2384 // optimizations?? X86/PPC ISelLowering 2385 /*bool hasStructRet = (TheCall->getNumArgs()) 2386 ? TheCall->getArgFlags(0).device()->isSRet() 2387 : false;*/ 2388 2389 MachineFrameInfo *MFI = MF.getFrameInfo(); 2390 2391 // Analyze operands of the call, assigning locations to each operand 2392 SmallVector<CCValAssign, 16> ArgLocs; 2393 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2394 getTargetMachine(), ArgLocs, *DAG.getContext()); 2395 // Analyize the calling operands, but need to change 2396 // if we have more than one calling convetion 2397 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv)); 2398 2399 unsigned int NumBytes = CCInfo.getNextStackOffset(); 2400 if (isTailCall) { 2401 assert(isTailCall && "Tail Call not handled yet!"); 2402 // See X86/PPC ISelLowering 2403 } 2404 2405 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 2406 2407 SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass; 2408 SmallVector<SDValue, 8> MemOpChains; 2409 SDValue StackPtr; 2410 //unsigned int FirstStacArgLoc = 0; 2411 //int LastArgStackLoc = 0; 2412 2413 // Walk the register/memloc assignments, insert copies/loads 2414 for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) { 2415 CCValAssign &VA = ArgLocs[i]; 2416 //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers 2417 // Arguments start after the 5 first operands of ISD::CALL 2418 SDValue Arg = OutVals[i]; 2419 //Promote the value if needed 2420 switch(VA.getLocInfo()) { 2421 default: assert(0 && "Unknown loc info!"); 2422 case CCValAssign::Full: 2423 break; 2424 case CCValAssign::SExt: 2425 Arg = DAG.getNode(ISD::SIGN_EXTEND, 2426 dl, 2427 VA.getLocVT(), Arg); 2428 break; 2429 case CCValAssign::ZExt: 2430 Arg = DAG.getNode(ISD::ZERO_EXTEND, 2431 dl, 2432 VA.getLocVT(), Arg); 2433 break; 2434 case CCValAssign::AExt: 2435 Arg = DAG.getNode(ISD::ANY_EXTEND, 2436 dl, 2437 VA.getLocVT(), Arg); 2438 break; 2439 } 2440 2441 if (VA.isRegLoc()) { 2442 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 2443 } else if (VA.isMemLoc()) { 2444 // Create the frame index object for this incoming parameter 2445 int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, 2446 VA.getLocMemOffset(), true); 2447 SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); 2448 2449 // emit ISD::STORE whichs stores the 2450 // parameter value to a stack Location 2451 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 2452 MachinePointerInfo::getFixedStack(FI), 2453 false, false, 0)); 2454 } else { 2455 assert(0 && "Not a Reg/Mem Loc, major error!"); 2456 } 2457 } 2458 if (!MemOpChains.empty()) { 2459 Chain = DAG.getNode(ISD::TokenFactor, 2460 dl, 2461 MVT::Other, 2462 &MemOpChains[0], 2463 MemOpChains.size()); 2464 } 2465 SDValue InFlag; 2466 if (!isTailCall) { 2467 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 2468 Chain = DAG.getCopyToReg(Chain, 2469 dl, 2470 RegsToPass[i].first, 2471 RegsToPass[i].second, 2472 InFlag); 2473 InFlag = Chain.getValue(1); 2474 } 2475 } 2476 2477 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 2478 // every direct call is) turn it into a TargetGlobalAddress/ 2479 // TargetExternalSymbol 2480 // node so that legalize doesn't hack it. 2481 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2482 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); 2483 } 2484 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2485 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); 2486 } 2487 else if (isTailCall) { 2488 assert(0 && "Tail calls are not handled yet"); 2489 // see X86 ISelLowering for ideas on implementation: 1708 2490 } 2491 2492 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE); 2493 SmallVector<SDValue, 8> Ops; 2494 2495 if (isTailCall) { 2496 assert(0 && "Tail calls are not handled yet"); 2497 // see X86 ISelLowering for ideas on implementation: 1721 2498 } 2499 // If this is a direct call, pass the chain and the callee 2500 if (Callee.getNode()) { 2501 Ops.push_back(Chain); 2502 Ops.push_back(Callee); 2503 } 2504 2505 if (isTailCall) { 2506 assert(0 && "Tail calls are not handled yet"); 2507 // see X86 ISelLowering for ideas on implementation: 1739 2508 } 2509 2510 // Add argument registers to the end of the list so that they are known 2511 // live into the call 2512 for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) { 2513 Ops.push_back(DAG.getRegister( 2514 RegsToPass[i].first, 2515 RegsToPass[i].second.getValueType())); 2516 } 2517 if (InFlag.getNode()) { 2518 Ops.push_back(InFlag); 2519 } 2520 2521 // Emit Tail Call 2522 if (isTailCall) { 2523 assert(0 && "Tail calls are not handled yet"); 2524 // see X86 ISelLowering for ideas on implementation: 1762 2525 } 2526 2527 Chain = DAG.getNode(AMDILISD::CALL, 2528 dl, 2529 NodeTys, &Ops[0], Ops.size()); 2530 InFlag = Chain.getValue(1); 2531 2532 // Create the CALLSEQ_END node 2533 Chain = DAG.getCALLSEQ_END( 2534 Chain, 2535 DAG.getIntPtrConstant(NumBytes, true), 2536 DAG.getIntPtrConstant(0, true), 2537 InFlag); 2538 InFlag = Chain.getValue(1); 2539 // Handle result values, copying them out of physregs into vregs that 2540 // we return 2541 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 2542 InVals); 2543} 2544static void checkMADType( 2545 SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD) 2546{ 2547 bool globalLoadStore = false; 2548 is24bitMAD = false; 2549 is32bitMAD = false; 2550 return; 2551 assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for " 2552 "this to work correctly!"); 2553 if (Op.getNode()->use_empty()) { 2554 return; 2555 } 2556 for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(), 2557 nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) { 2558 SDNode *ptr = *nBegin; 2559 const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr); 2560 // If we are not a LSBaseSDNode then we don't do this 2561 // optimization. 2562 // If we are a LSBaseSDNode, but the op is not the offset 2563 // or base pointer, then we don't do this optimization 2564 // (i.e. we are the value being stored) 2565 if (!lsNode || 2566 (lsNode->writeMem() && lsNode->getOperand(1) == Op)) { 2567 return; 2568 } 2569 const PointerType *PT = 2570 dyn_cast<PointerType>(lsNode->getSrcValue()->getType()); 2571 unsigned as = PT->getAddressSpace(); 2572 switch(as) { 2573 default: 2574 globalLoadStore = true; 2575 case AMDILAS::PRIVATE_ADDRESS: 2576 if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) { 2577 globalLoadStore = true; 2578 } 2579 break; 2580 case AMDILAS::CONSTANT_ADDRESS: 2581 if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) { 2582 globalLoadStore = true; 2583 } 2584 break; 2585 case AMDILAS::LOCAL_ADDRESS: 2586 if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) { 2587 globalLoadStore = true; 2588 } 2589 break; 2590 case AMDILAS::REGION_ADDRESS: 2591 if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) { 2592 globalLoadStore = true; 2593 } 2594 break; 2595 } 2596 } 2597 if (globalLoadStore) { 2598 is32bitMAD = true; 2599 } else { 2600 is24bitMAD = true; 2601 } 2602} 2603 2604SDValue 2605AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const 2606{ 2607 SDValue LHS = Op.getOperand(0); 2608 SDValue RHS = Op.getOperand(1); 2609 DebugLoc DL = Op.getDebugLoc(); 2610 EVT OVT = Op.getValueType(); 2611 SDValue DST; 2612 const AMDILSubtarget *stm = &this->getTargetMachine() 2613 .getSubtarget<AMDILSubtarget>(); 2614 bool isVec = OVT.isVector(); 2615 if (OVT.getScalarType() == MVT::i64) { 2616 MVT INTTY = MVT::i32; 2617 if (OVT == MVT::v2i64) { 2618 INTTY = MVT::v2i32; 2619 } 2620 if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps) 2621 && INTTY == MVT::i32) { 2622 DST = DAG.getNode(AMDILISD::ADD, 2623 DL, 2624 OVT, 2625 LHS, RHS); 2626 } else { 2627 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; 2628 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 2629 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); 2630 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); 2631 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); 2632 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); 2633 INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO); 2634 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI); 2635 SDValue cmp; 2636 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2637 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 2638 INTLO, RHSLO); 2639 cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp); 2640 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); 2641 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, 2642 INTLO, INTHI); 2643 } 2644 } else { 2645 if (LHS.getOpcode() == ISD::FrameIndex || 2646 RHS.getOpcode() == ISD::FrameIndex) { 2647 DST = DAG.getNode(AMDILISD::ADDADDR, 2648 DL, 2649 OVT, 2650 LHS, RHS); 2651 } else { 2652 if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem) 2653 && LHS.getNumOperands() 2654 && RHS.getNumOperands()) { 2655 bool is24bitMAD = false; 2656 bool is32bitMAD = false; 2657 const ConstantSDNode *LHSConstOpCode = 2658 dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1)); 2659 const ConstantSDNode *RHSConstOpCode = 2660 dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1)); 2661 if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode) 2662 || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) 2663 || LHS.getOpcode() == ISD::MUL 2664 || RHS.getOpcode() == ISD::MUL) { 2665 SDValue Op1, Op2, Op3; 2666 // FIXME: Fix this so that it works for unsigned 24bit ops. 2667 if (LHS.getOpcode() == ISD::MUL) { 2668 Op1 = LHS.getOperand(0); 2669 Op2 = LHS.getOperand(1); 2670 Op3 = RHS; 2671 } else if (RHS.getOpcode() == ISD::MUL) { 2672 Op1 = RHS.getOperand(0); 2673 Op2 = RHS.getOperand(1); 2674 Op3 = LHS; 2675 } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) { 2676 Op1 = LHS.getOperand(0); 2677 Op2 = DAG.getConstant( 2678 1 << LHSConstOpCode->getZExtValue(), MVT::i32); 2679 Op3 = RHS; 2680 } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) { 2681 Op1 = RHS.getOperand(0); 2682 Op2 = DAG.getConstant( 2683 1 << RHSConstOpCode->getZExtValue(), MVT::i32); 2684 Op3 = LHS; 2685 } 2686 checkMADType(Op, stm, is24bitMAD, is32bitMAD); 2687 // We can possibly do a MAD transform! 2688 if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) { 2689 uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32; 2690 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/); 2691 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN, 2692 DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32), 2693 Op1, Op2, Op3); 2694 } else if(is32bitMAD) { 2695 SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/); 2696 DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN, 2697 DL, Tys, DAG.getEntryNode(), 2698 DAG.getConstant( 2699 AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32), 2700 Op1, Op2, Op3); 2701 } 2702 } 2703 } 2704 DST = DAG.getNode(AMDILISD::ADD, 2705 DL, 2706 OVT, 2707 LHS, RHS); 2708 } 2709 } 2710 return DST; 2711} 2712SDValue 2713AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG, 2714 uint32_t bits) const 2715{ 2716 DebugLoc DL = Op.getDebugLoc(); 2717 EVT INTTY = Op.getValueType(); 2718 EVT FPTY; 2719 if (INTTY.isVector()) { 2720 FPTY = EVT(MVT::getVectorVT(MVT::f32, 2721 INTTY.getVectorNumElements())); 2722 } else { 2723 FPTY = EVT(MVT::f32); 2724 } 2725 /* static inline uint 2726 __clz_Nbit(uint x) 2727 { 2728 int xor = 0x3f800000U | x; 2729 float tp = as_float(xor); 2730 float t = tp + -1.0f; 2731 uint tint = as_uint(t); 2732 int cmp = (x != 0); 2733 uint tsrc = tint >> 23; 2734 uint tmask = tsrc & 0xffU; 2735 uint cst = (103 + N)U - tmask; 2736 return cmp ? cst : N; 2737 } 2738 */ 2739 assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32 2740 && "genCLZu16 only works on 32bit types"); 2741 // uint x = Op 2742 SDValue x = Op; 2743 // xornode = 0x3f800000 | x 2744 SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY, 2745 DAG.getConstant(0x3f800000, INTTY), x); 2746 // float tp = as_float(xornode) 2747 SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode); 2748 // float t = tp + -1.0f 2749 SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp, 2750 DAG.getConstantFP(-1.0f, FPTY)); 2751 // uint tint = as_uint(t) 2752 SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t); 2753 // int cmp = (x != 0) 2754 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2755 DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x, 2756 DAG.getConstant(0, INTTY)); 2757 // uint tsrc = tint >> 23 2758 SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint, 2759 DAG.getConstant(23, INTTY)); 2760 // uint tmask = tsrc & 0xFF 2761 SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc, 2762 DAG.getConstant(0xFFU, INTTY)); 2763 // uint cst = (103 + bits) - tmask 2764 SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY, 2765 DAG.getConstant((103U + bits), INTTY), tmask); 2766 // return cmp ? cst : N 2767 cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst, 2768 DAG.getConstant(bits, INTTY)); 2769 return cst; 2770} 2771 2772SDValue 2773AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const 2774{ 2775 SDValue DST = SDValue(); 2776 DebugLoc DL = Op.getDebugLoc(); 2777 EVT INTTY = Op.getValueType(); 2778 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2779 &this->getTargetMachine())->getSubtargetImpl(); 2780 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 2781 //__clz_32bit(uint u) 2782 //{ 2783 // int z = __amdil_ffb_hi(u) ; 2784 // return z < 0 ? 32 : z; 2785 // } 2786 // uint u = op 2787 SDValue u = Op; 2788 // int z = __amdil_ffb_hi(u) 2789 SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u); 2790 // int cmp = z < 0 2791 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2792 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 2793 z, DAG.getConstant(0, INTTY)); 2794 // return cmp ? 32 : z 2795 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, 2796 DAG.getConstant(32, INTTY), z); 2797 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2798 // static inline uint 2799 //__clz_32bit(uint x) 2800 //{ 2801 // uint zh = __clz_16bit(x >> 16); 2802 // uint zl = __clz_16bit(x & 0xffffU); 2803 // return zh == 16U ? 16U + zl : zh; 2804 //} 2805 // uint x = Op 2806 SDValue x = Op; 2807 // uint xs16 = x >> 16 2808 SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x, 2809 DAG.getConstant(16, INTTY)); 2810 // uint zh = __clz_16bit(xs16) 2811 SDValue zh = genCLZuN(xs16, DAG, 16); 2812 // uint xa16 = x & 0xFFFF 2813 SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x, 2814 DAG.getConstant(0xFFFFU, INTTY)); 2815 // uint zl = __clz_16bit(xa16) 2816 SDValue zl = genCLZuN(xa16, DAG, 16); 2817 // uint cmp = zh == 16U 2818 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2819 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2820 zh, DAG.getConstant(16U, INTTY)); 2821 // uint zl16 = zl + 16 2822 SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY, 2823 DAG.getConstant(16, INTTY), zl); 2824 // return cmp ? zl16 : zh 2825 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2826 cmp, zl16, zh); 2827 } else { 2828 assert(0 && "Attempting to generate a CLZ function with an" 2829 " unknown graphics card"); 2830 } 2831 return DST; 2832} 2833SDValue 2834AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const 2835{ 2836 SDValue DST = SDValue(); 2837 DebugLoc DL = Op.getDebugLoc(); 2838 EVT INTTY; 2839 EVT LONGTY = Op.getValueType(); 2840 bool isVec = LONGTY.isVector(); 2841 if (isVec) { 2842 INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType() 2843 .getVectorNumElements())); 2844 } else { 2845 INTTY = EVT(MVT::i32); 2846 } 2847 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2848 &this->getTargetMachine())->getSubtargetImpl(); 2849 if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) { 2850 // Evergreen: 2851 // static inline uint 2852 // __clz_u64(ulong x) 2853 // { 2854 //uint zhi = __clz_32bit((uint)(x >> 32)); 2855 //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL)); 2856 //return zhi == 32U ? 32U + zlo : zhi; 2857 //} 2858 //ulong x = op 2859 SDValue x = Op; 2860 // uint xhi = x >> 32 2861 SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2862 // uint xlo = x & 0xFFFFFFFF 2863 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x); 2864 // uint zhi = __clz_32bit(xhi) 2865 SDValue zhi = genCLZu32(xhi, DAG); 2866 // uint zlo = __clz_32bit(xlo) 2867 SDValue zlo = genCLZu32(xlo, DAG); 2868 // uint cmp = zhi == 32 2869 SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2870 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2871 zhi, DAG.getConstant(32U, INTTY)); 2872 // uint zlop32 = 32 + zlo 2873 SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY, 2874 DAG.getConstant(32U, INTTY), zlo); 2875 // return cmp ? zlop32: zhi 2876 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi); 2877 } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 2878 // HD4XXX: 2879 // static inline uint 2880 //__clz_64bit(ulong x) 2881 //{ 2882 //uint zh = __clz_23bit((uint)(x >> 46)) - 5U; 2883 //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU); 2884 //uint zl = __clz_23bit((uint)x & 0x7fffffU); 2885 //uint r = zh == 18U ? 18U + zm : zh; 2886 //return zh + zm == 41U ? 41U + zl : r; 2887 //} 2888 //ulong x = Op 2889 SDValue x = Op; 2890 // ulong xs46 = x >> 46 2891 SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2892 DAG.getConstant(46, LONGTY)); 2893 // uint ixs46 = (uint)xs46 2894 SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46); 2895 // ulong xs23 = x >> 23 2896 SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x, 2897 DAG.getConstant(23, LONGTY)); 2898 // uint ixs23 = (uint)xs23 2899 SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23); 2900 // uint xs23m23 = ixs23 & 0x7FFFFF 2901 SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23, 2902 DAG.getConstant(0x7fffffU, INTTY)); 2903 // uint ix = (uint)x 2904 SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x); 2905 // uint xm23 = ix & 0x7FFFFF 2906 SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix, 2907 DAG.getConstant(0x7fffffU, INTTY)); 2908 // uint zh = __clz_23bit(ixs46) 2909 SDValue zh = genCLZuN(ixs46, DAG, 23); 2910 // uint zm = __clz_23bit(xs23m23) 2911 SDValue zm = genCLZuN(xs23m23, DAG, 23); 2912 // uint zl = __clz_23bit(xm23) 2913 SDValue zl = genCLZuN(xm23, DAG, 23); 2914 // uint zhm5 = zh - 5 2915 SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh, 2916 DAG.getConstant(-5U, INTTY)); 2917 SDValue const18 = DAG.getConstant(18, INTTY); 2918 SDValue const41 = DAG.getConstant(41, INTTY); 2919 // uint cmp1 = zh = 18 2920 SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2921 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2922 zhm5, const18); 2923 // uint zhm5zm = zhm5 + zh 2924 SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm); 2925 // uint cmp2 = zhm5zm == 41 2926 SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 2927 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 2928 zhm5zm, const41); 2929 // uint zmp18 = zhm5 + 18 2930 SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18); 2931 // uint zlp41 = zl + 41 2932 SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41); 2933 // uint r = cmp1 ? zmp18 : zh 2934 SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, 2935 cmp1, zmp18, zhm5); 2936 // return cmp2 ? zlp41 : r 2937 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r); 2938 } else { 2939 assert(0 && "Attempting to generate a CLZ function with an" 2940 " unknown graphics card"); 2941 } 2942 return DST; 2943} 2944SDValue 2945AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG, 2946 bool includeSign) const 2947{ 2948 EVT INTVT; 2949 EVT LONGVT; 2950 SDValue DST; 2951 DebugLoc DL = RHS.getDebugLoc(); 2952 EVT RHSVT = RHS.getValueType(); 2953 bool isVec = RHSVT.isVector(); 2954 if (isVec) { 2955 LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT 2956 .getVectorNumElements())); 2957 INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT 2958 .getVectorNumElements())); 2959 } else { 2960 LONGVT = EVT(MVT::i64); 2961 INTVT = EVT(MVT::i32); 2962 } 2963 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 2964 &this->getTargetMachine())->getSubtargetImpl(); 2965 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 2966 // unsigned version: 2967 // uint uhi = (uint)(d * 0x1.0p-32); 2968 // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d)); 2969 // return as_ulong2((uint2)(ulo, uhi)); 2970 // 2971 // signed version: 2972 // double ad = fabs(d); 2973 // long l = unsigned_version(ad); 2974 // long nl = -l; 2975 // return d == ad ? l : nl; 2976 SDValue d = RHS; 2977 if (includeSign) { 2978 d = DAG.getNode(ISD::FABS, DL, RHSVT, d); 2979 } 2980 SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d, 2981 DAG.getConstantFP(0x2f800000, RHSVT)); 2982 SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid); 2983 SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi); 2984 ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod, 2985 DAG.getConstantFP(0xcf800000, RHSVT), d); 2986 SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod); 2987 SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi); 2988 if (includeSign) { 2989 SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l); 2990 SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT, 2991 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32), 2992 RHS, d); 2993 l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl); 2994 } 2995 DST = l; 2996 } else { 2997 /* 2998 __attribute__((always_inline)) long 2999 cast_f64_to_i64(double d) 3000 { 3001 // Convert d in to 32-bit components 3002 long x = as_long(d); 3003 xhi = LCOMPHI(x); 3004 xlo = LCOMPLO(x); 3005 3006 // Generate 'normalized' mantissa 3007 mhi = xhi | 0x00100000; // hidden bit 3008 mhi <<= 11; 3009 temp = xlo >> (32 - 11); 3010 mhi |= temp 3011 mlo = xlo << 11; 3012 3013 // Compute shift right count from exponent 3014 e = (xhi >> (52-32)) & 0x7ff; 3015 sr = 1023 + 63 - e; 3016 srge64 = sr >= 64; 3017 srge32 = sr >= 32; 3018 3019 // Compute result for 0 <= sr < 32 3020 rhi0 = mhi >> (sr &31); 3021 rlo0 = mlo >> (sr &31); 3022 temp = mhi << (32 - sr); 3023 temp |= rlo0; 3024 rlo0 = sr ? temp : rlo0; 3025 3026 // Compute result for 32 <= sr 3027 rhi1 = 0; 3028 rlo1 = srge64 ? 0 : rhi0; 3029 3030 // Pick between the 2 results 3031 rhi = srge32 ? rhi1 : rhi0; 3032 rlo = srge32 ? rlo1 : rlo0; 3033 3034 // Optional saturate on overflow 3035 srlt0 = sr < 0; 3036 rhi = srlt0 ? MAXVALUE : rhi; 3037 rlo = srlt0 ? MAXVALUE : rlo; 3038 3039 // Create long 3040 res = LCREATE( rlo, rhi ); 3041 3042 // Deal with sign bit (ignoring whether result is signed or unsigned value) 3043 if (includeSign) { 3044 sign = ((signed int) xhi) >> 31; fill with sign bit 3045 sign = LCREATE( sign, sign ); 3046 res += sign; 3047 res ^= sign; 3048 } 3049 3050 return res; 3051 } 3052 */ 3053 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 3054 SDValue c32 = DAG.getConstant( 32, INTVT ); 3055 3056 // Convert d in to 32-bit components 3057 SDValue d = RHS; 3058 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 3059 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 3060 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 3061 3062 // Generate 'normalized' mantissa 3063 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 3064 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 3065 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 3066 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 3067 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 3068 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 3069 SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 ); 3070 3071 // Compute shift right count from exponent 3072 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 3073 xhi, DAG.getConstant( 52-32, INTVT ) ); 3074 e = DAG.getNode( ISD::AND, DL, INTVT, 3075 e, DAG.getConstant( 0x7ff, INTVT ) ); 3076 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 3077 DAG.getConstant( 1023 + 63, INTVT ), e ); 3078 SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3079 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 3080 sr, DAG.getConstant(64, INTVT)); 3081 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3082 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 3083 sr, DAG.getConstant(32, INTVT)); 3084 3085 // Compute result for 0 <= sr < 32 3086 SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 3087 SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr ); 3088 temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr ); 3089 temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp ); 3090 temp = DAG.getNode( ISD::OR, DL, INTVT, rlo0, temp ); 3091 rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 ); 3092 3093 // Compute result for 32 <= sr 3094 SDValue rhi1 = DAG.getConstant( 0, INTVT ); 3095 SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3096 srge64, rhi1, rhi0 ); 3097 3098 // Pick between the 2 results 3099 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3100 srge32, rhi1, rhi0 ); 3101 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3102 srge32, rlo1, rlo0 ); 3103 3104 // Create long 3105 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 3106 3107 // Deal with sign bit 3108 if (includeSign) { 3109 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 3110 xhi, DAG.getConstant( 31, INTVT ) ); 3111 sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign ); 3112 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign ); 3113 res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign ); 3114 } 3115 DST = res; 3116 } 3117 return DST; 3118} 3119SDValue 3120AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG, 3121 bool includeSign) const 3122{ 3123 EVT INTVT; 3124 EVT LONGVT; 3125 DebugLoc DL = RHS.getDebugLoc(); 3126 EVT RHSVT = RHS.getValueType(); 3127 bool isVec = RHSVT.isVector(); 3128 if (isVec) { 3129 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 3130 RHSVT.getVectorNumElements())); 3131 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3132 RHSVT.getVectorNumElements())); 3133 } else { 3134 LONGVT = EVT(MVT::i64); 3135 INTVT = EVT(MVT::i32); 3136 } 3137 /* 3138 __attribute__((always_inline)) int 3139 cast_f64_to_[u|i]32(double d) 3140 { 3141 // Convert d in to 32-bit components 3142 long x = as_long(d); 3143 xhi = LCOMPHI(x); 3144 xlo = LCOMPLO(x); 3145 3146 // Generate 'normalized' mantissa 3147 mhi = xhi | 0x00100000; // hidden bit 3148 mhi <<= 11; 3149 temp = xlo >> (32 - 11); 3150 mhi |= temp 3151 3152 // Compute shift right count from exponent 3153 e = (xhi >> (52-32)) & 0x7ff; 3154 sr = 1023 + 31 - e; 3155 srge32 = sr >= 32; 3156 3157 // Compute result for 0 <= sr < 32 3158 res = mhi >> (sr &31); 3159 res = srge32 ? 0 : res; 3160 3161 // Optional saturate on overflow 3162 srlt0 = sr < 0; 3163 res = srlt0 ? MAXVALUE : res; 3164 3165 // Deal with sign bit (ignoring whether result is signed or unsigned value) 3166 if (includeSign) { 3167 sign = ((signed int) xhi) >> 31; fill with sign bit 3168 res += sign; 3169 res ^= sign; 3170 } 3171 3172 return res; 3173 } 3174 */ 3175 SDValue c11 = DAG.getConstant( 63 - 52, INTVT ); 3176 3177 // Convert d in to 32-bit components 3178 SDValue d = RHS; 3179 SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d); 3180 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 3181 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 3182 3183 // Generate 'normalized' mantissa 3184 SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT, 3185 xhi, DAG.getConstant( 0x00100000, INTVT ) ); 3186 mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 ); 3187 SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT, 3188 xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) ); 3189 mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp ); 3190 3191 // Compute shift right count from exponent 3192 SDValue e = DAG.getNode( ISD::SRL, DL, INTVT, 3193 xhi, DAG.getConstant( 52-32, INTVT ) ); 3194 e = DAG.getNode( ISD::AND, DL, INTVT, 3195 e, DAG.getConstant( 0x7ff, INTVT ) ); 3196 SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT, 3197 DAG.getConstant( 1023 + 31, INTVT ), e ); 3198 SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3199 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 3200 sr, DAG.getConstant(32, INTVT)); 3201 3202 // Compute result for 0 <= sr < 32 3203 SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr ); 3204 res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3205 srge32, DAG.getConstant(0,INTVT), res ); 3206 3207 // Deal with sign bit 3208 if (includeSign) { 3209 SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT, 3210 xhi, DAG.getConstant( 31, INTVT ) ); 3211 res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign ); 3212 res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign ); 3213 } 3214 return res; 3215} 3216SDValue 3217AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const 3218{ 3219 SDValue RHS = Op.getOperand(0); 3220 EVT RHSVT = RHS.getValueType(); 3221 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3222 EVT LHSVT = Op.getValueType(); 3223 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3224 DebugLoc DL = Op.getDebugLoc(); 3225 SDValue DST; 3226 const AMDILTargetMachine* 3227 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3228 (&this->getTargetMachine()); 3229 const AMDILSubtarget* 3230 stm = dynamic_cast<const AMDILSubtarget*>( 3231 amdtm->getSubtargetImpl()); 3232 if (RST == MVT::f64 && RHSVT.isVector() 3233 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3234 // We dont support vector 64bit floating point convertions. 3235 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { 3236 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3237 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3238 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); 3239 if (!x) { 3240 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3241 } else { 3242 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, 3243 DST, op, DAG.getTargetConstant(x, MVT::i32)); 3244 } 3245 } 3246 } else { 3247 if (RST == MVT::f64 3248 && LST == MVT::i32) { 3249 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3250 DST = SDValue(Op.getNode(), 0); 3251 } else { 3252 DST = genf64toi32(RHS, DAG, true); 3253 } 3254 } else if (RST == MVT::f64 3255 && LST == MVT::i64) { 3256 DST = genf64toi64(RHS, DAG, true); 3257 } else if (RST == MVT::f64 3258 && (LST == MVT::i8 || LST == MVT::i16)) { 3259 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3260 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); 3261 } else { 3262 SDValue ToInt = genf64toi32(RHS, DAG, true); 3263 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); 3264 } 3265 3266 } else { 3267 DST = SDValue(Op.getNode(), 0); 3268 } 3269 } 3270 return DST; 3271} 3272 3273SDValue 3274AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const 3275{ 3276 SDValue DST; 3277 SDValue RHS = Op.getOperand(0); 3278 EVT RHSVT = RHS.getValueType(); 3279 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3280 EVT LHSVT = Op.getValueType(); 3281 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3282 DebugLoc DL = Op.getDebugLoc(); 3283 const AMDILTargetMachine* 3284 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3285 (&this->getTargetMachine()); 3286 const AMDILSubtarget* 3287 stm = dynamic_cast<const AMDILSubtarget*>( 3288 amdtm->getSubtargetImpl()); 3289 if (RST == MVT::f64 && RHSVT.isVector() 3290 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3291 // We dont support vector 64bit floating point convertions. 3292 for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) { 3293 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3294 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3295 op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op); 3296 if (!x) { 3297 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3298 } else { 3299 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, 3300 DST, op, DAG.getTargetConstant(x, MVT::i32)); 3301 } 3302 3303 } 3304 } else { 3305 if (RST == MVT::f64 3306 && LST == MVT::i32) { 3307 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3308 DST = SDValue(Op.getNode(), 0); 3309 } else { 3310 DST = genf64toi32(RHS, DAG, false); 3311 } 3312 } else if (RST == MVT::f64 3313 && LST == MVT::i64) { 3314 DST = genf64toi64(RHS, DAG, false); 3315 } else if (RST == MVT::f64 3316 && (LST == MVT::i8 || LST == MVT::i16)) { 3317 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3318 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0)); 3319 } else { 3320 SDValue ToInt = genf64toi32(RHS, DAG, false); 3321 DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt); 3322 } 3323 3324 } else { 3325 DST = SDValue(Op.getNode(), 0); 3326 } 3327 } 3328 return DST; 3329} 3330SDValue 3331AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT, 3332 SelectionDAG &DAG) const 3333{ 3334 EVT RHSVT = RHS.getValueType(); 3335 DebugLoc DL = RHS.getDebugLoc(); 3336 EVT INTVT; 3337 EVT LONGVT; 3338 bool isVec = RHSVT.isVector(); 3339 if (isVec) { 3340 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 3341 RHSVT.getVectorNumElements())); 3342 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3343 RHSVT.getVectorNumElements())); 3344 } else { 3345 LONGVT = EVT(MVT::i64); 3346 INTVT = EVT(MVT::i32); 3347 } 3348 SDValue x = RHS; 3349 const AMDILTargetMachine* 3350 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3351 (&this->getTargetMachine()); 3352 const AMDILSubtarget* 3353 stm = dynamic_cast<const AMDILSubtarget*>( 3354 amdtm->getSubtargetImpl()); 3355 if (stm->calVersion() >= CAL_VERSION_SC_135) { 3356 // unsigned x = RHS; 3357 // ulong xd = (ulong)(0x4330_0000 << 32) | x; 3358 // double d = as_double( xd ); 3359 // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000 3360 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x, 3361 DAG.getConstant( 0x43300000, INTVT ) ); 3362 SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 3363 SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT, 3364 DAG.getConstant( 0x4330000000000000ULL, LONGVT ) ); 3365 return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd ); 3366 } else { 3367 SDValue clz = genCLZu32(x, DAG); 3368 3369 // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2 3370 // Except for an input 0... which requires a 0 exponent 3371 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 3372 DAG.getConstant( (1023+31), INTVT), clz ); 3373 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x ); 3374 3375 // Normalize frac 3376 SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz ); 3377 3378 // Eliminate hidden bit 3379 rhi = DAG.getNode( ISD::AND, DL, INTVT, 3380 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 3381 3382 // Pack exponent and frac 3383 SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT, 3384 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 3385 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 3386 rhi, DAG.getConstant( 11, INTVT ) ); 3387 exp = DAG.getNode( ISD::SHL, DL, INTVT, 3388 exp, DAG.getConstant( 20, INTVT ) ); 3389 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 3390 3391 // Convert 2 x 32 in to 1 x 64, then to double precision float type 3392 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 3393 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 3394 } 3395} 3396SDValue 3397AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT, 3398 SelectionDAG &DAG) const 3399{ 3400 EVT RHSVT = RHS.getValueType(); 3401 DebugLoc DL = RHS.getDebugLoc(); 3402 EVT INTVT; 3403 EVT LONGVT; 3404 bool isVec = RHSVT.isVector(); 3405 if (isVec) { 3406 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3407 RHSVT.getVectorNumElements())); 3408 } else { 3409 INTVT = EVT(MVT::i32); 3410 } 3411 LONGVT = RHSVT; 3412 SDValue x = RHS; 3413 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 3414 &this->getTargetMachine())->getSubtargetImpl(); 3415 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3416 // double dhi = (double)(as_uint2(x).y); 3417 // double dlo = (double)(as_uint2(x).x); 3418 // return mad(dhi, 0x1.0p+32, dlo) 3419 SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x); 3420 dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi); 3421 SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x); 3422 dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo); 3423 return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi, 3424 DAG.getConstantFP(0x4f800000, LHSVT), dlo); 3425 } else if (stm->calVersion() >= CAL_VERSION_SC_135) { 3426 // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL)); 3427 // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32)); 3428 // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo; 3429 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); // x & 0xffff_ffffUL 3430 SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) ); 3431 SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd ); 3432 SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32 3433 SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) ); 3434 SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe ); 3435 SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT, 3436 DAG.getConstant( 0x4530000000100000ULL, LONGVT ) ); 3437 hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c ); 3438 return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo ); 3439 3440 } else { 3441 SDValue clz = genCLZu64(x, DAG); 3442 SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x ); 3443 SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x ); 3444 3445 // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2 3446 SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT, 3447 DAG.getConstant( (1023+63), INTVT), clz ); 3448 SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo ); 3449 exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3450 mash, exp, mash ); // exp = exp, or 0 if input was 0 3451 3452 // Normalize frac 3453 SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT, 3454 clz, DAG.getConstant( 31, INTVT ) ); 3455 SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT, 3456 DAG.getConstant( 32, INTVT ), clz31 ); 3457 SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 ); 3458 SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift ); 3459 t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 ); 3460 SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 ); 3461 SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 3462 SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 ); 3463 SDValue rlo2 = DAG.getConstant( 0, INTVT ); 3464 SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT, 3465 clz, DAG.getConstant( 32, INTVT ) ); 3466 SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3467 clz32, rhi2, rhi1 ); 3468 SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, 3469 clz32, rlo2, rlo1 ); 3470 3471 // Eliminate hidden bit 3472 rhi = DAG.getNode( ISD::AND, DL, INTVT, 3473 rhi, DAG.getConstant( 0x7fffffff, INTVT ) ); 3474 3475 // Save bits needed to round properly 3476 SDValue round = DAG.getNode( ISD::AND, DL, INTVT, 3477 rlo, DAG.getConstant( 0x7ff, INTVT ) ); 3478 3479 // Pack exponent and frac 3480 rlo = DAG.getNode( ISD::SRL, DL, INTVT, 3481 rlo, DAG.getConstant( 11, INTVT ) ); 3482 SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT, 3483 rhi, DAG.getConstant( (32 - 11), INTVT ) ); 3484 rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp ); 3485 rhi = DAG.getNode( ISD::SRL, DL, INTVT, 3486 rhi, DAG.getConstant( 11, INTVT ) ); 3487 exp = DAG.getNode( ISD::SHL, DL, INTVT, 3488 exp, DAG.getConstant( 20, INTVT ) ); 3489 rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp ); 3490 3491 // Compute rounding bit 3492 SDValue even = DAG.getNode( ISD::AND, DL, INTVT, 3493 rlo, DAG.getConstant( 1, INTVT ) ); 3494 SDValue grs = DAG.getNode( ISD::AND, DL, INTVT, 3495 round, DAG.getConstant( 0x3ff, INTVT ) ); 3496 grs = DAG.getNode( AMDILISD::CMP, DL, INTVT, 3497 DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32), 3498 grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none 3499 grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even ); 3500 round = DAG.getNode( ISD::SRL, DL, INTVT, 3501 round, DAG.getConstant( 10, INTVT ) ); 3502 round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1 3503 3504 // Add rounding bit 3505 SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, 3506 round, DAG.getConstant( 0, INTVT ) ); 3507 SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi ); 3508 res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround ); 3509 return DAG.getNode(ISDBITCAST, DL, LHSVT, res); 3510 } 3511} 3512SDValue 3513AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const 3514{ 3515 SDValue RHS = Op.getOperand(0); 3516 EVT RHSVT = RHS.getValueType(); 3517 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3518 EVT LHSVT = Op.getValueType(); 3519 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3520 DebugLoc DL = Op.getDebugLoc(); 3521 SDValue DST; 3522 EVT INTVT; 3523 EVT LONGVT; 3524 const AMDILTargetMachine* 3525 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3526 (&this->getTargetMachine()); 3527 const AMDILSubtarget* 3528 stm = dynamic_cast<const AMDILSubtarget*>( 3529 amdtm->getSubtargetImpl()); 3530 if (LST == MVT::f64 && LHSVT.isVector() 3531 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3532 // We dont support vector 64bit floating point convertions. 3533 DST = Op; 3534 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { 3535 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3536 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3537 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); 3538 if (!x) { 3539 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3540 } else { 3541 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, 3542 op, DAG.getTargetConstant(x, MVT::i32)); 3543 } 3544 3545 } 3546 } else { 3547 3548 if (RST == MVT::i32 3549 && LST == MVT::f64) { 3550 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3551 DST = SDValue(Op.getNode(), 0); 3552 } else { 3553 DST = genu32tof64(RHS, LHSVT, DAG); 3554 } 3555 } else if (RST == MVT::i64 3556 && LST == MVT::f64) { 3557 DST = genu64tof64(RHS, LHSVT, DAG); 3558 } else { 3559 DST = SDValue(Op.getNode(), 0); 3560 } 3561 } 3562 return DST; 3563} 3564 3565SDValue 3566AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const 3567{ 3568 SDValue RHS = Op.getOperand(0); 3569 EVT RHSVT = RHS.getValueType(); 3570 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3571 EVT INTVT; 3572 EVT LONGVT; 3573 SDValue DST; 3574 bool isVec = RHSVT.isVector(); 3575 DebugLoc DL = Op.getDebugLoc(); 3576 EVT LHSVT = Op.getValueType(); 3577 MVT LST = LHSVT.getScalarType().getSimpleVT(); 3578 const AMDILTargetMachine* 3579 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3580 (&this->getTargetMachine()); 3581 const AMDILSubtarget* 3582 stm = dynamic_cast<const AMDILSubtarget*>( 3583 amdtm->getSubtargetImpl()); 3584 if (LST == MVT::f64 && LHSVT.isVector() 3585 && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3586 // We dont support vector 64bit floating point convertions. 3587 for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) { 3588 SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3589 DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32)); 3590 op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op); 3591 if (!x) { 3592 DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op); 3593 } else { 3594 DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST, 3595 op, DAG.getTargetConstant(x, MVT::i32)); 3596 } 3597 3598 } 3599 } else { 3600 3601 if (isVec) { 3602 LONGVT = EVT(MVT::getVectorVT(MVT::i64, 3603 RHSVT.getVectorNumElements())); 3604 INTVT = EVT(MVT::getVectorVT(MVT::i32, 3605 RHSVT.getVectorNumElements())); 3606 } else { 3607 LONGVT = EVT(MVT::i64); 3608 INTVT = EVT(MVT::i32); 3609 } 3610 MVT RST = RHSVT.getScalarType().getSimpleVT(); 3611 if ((RST == MVT::i32 || RST == MVT::i64) 3612 && LST == MVT::f64) { 3613 if (RST == MVT::i32) { 3614 if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) { 3615 DST = SDValue(Op.getNode(), 0); 3616 return DST; 3617 } 3618 } 3619 SDValue c31 = DAG.getConstant( 31, INTVT ); 3620 SDValue cSbit = DAG.getConstant( 0x80000000, INTVT ); 3621 3622 SDValue S; // Sign, as 0 or -1 3623 SDValue Sbit; // Sign bit, as one bit, MSB only. 3624 if (RST == MVT::i32) { 3625 Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit ); 3626 S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 ); 3627 } else { // 64-bit case... SRA of 64-bit values is slow 3628 SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS ); 3629 Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit ); 3630 SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 ); 3631 S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp ); 3632 } 3633 3634 // get abs() of input value, given sign as S (0 or -1) 3635 // SpI = RHS + S 3636 SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S); 3637 // SpIxS = SpI ^ S 3638 SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S); 3639 3640 // Convert unsigned value to double precision 3641 SDValue R; 3642 if (RST == MVT::i32) { 3643 // r = cast_u32_to_f64(SpIxS) 3644 R = genu32tof64(SpIxS, LHSVT, DAG); 3645 } else { 3646 // r = cast_u64_to_f64(SpIxS) 3647 R = genu64tof64(SpIxS, LHSVT, DAG); 3648 } 3649 3650 // drop in the sign bit 3651 SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R ); 3652 SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t ); 3653 SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t ); 3654 thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit ); 3655 t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi ); 3656 DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t ); 3657 } else { 3658 DST = SDValue(Op.getNode(), 0); 3659 } 3660 } 3661 return DST; 3662} 3663SDValue 3664AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const 3665{ 3666 SDValue LHS = Op.getOperand(0); 3667 SDValue RHS = Op.getOperand(1); 3668 DebugLoc DL = Op.getDebugLoc(); 3669 EVT OVT = Op.getValueType(); 3670 SDValue DST; 3671 bool isVec = RHS.getValueType().isVector(); 3672 if (OVT.getScalarType() == MVT::i64) { 3673 /*const AMDILTargetMachine* 3674 amdtm = reinterpret_cast<const AMDILTargetMachine*> 3675 (&this->getTargetMachine()); 3676 const AMDILSubtarget* 3677 stm = dynamic_cast<const AMDILSubtarget*>( 3678 amdtm->getSubtargetImpl());*/ 3679 MVT INTTY = MVT::i32; 3680 if (OVT == MVT::v2i64) { 3681 INTTY = MVT::v2i32; 3682 } 3683 SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI; 3684 // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32 3685 LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS); 3686 RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS); 3687 LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS); 3688 RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS); 3689 INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO); 3690 INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI); 3691 //TODO: need to use IBORROW on HD5XXX and later hardware 3692 SDValue cmp; 3693 if (OVT == MVT::i64) { 3694 cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY, 3695 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 3696 LHSLO, RHSLO); 3697 } else { 3698 SDValue cmplo; 3699 SDValue cmphi; 3700 SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3701 DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32)); 3702 SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3703 DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32)); 3704 SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3705 DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32)); 3706 SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 3707 DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32)); 3708 cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 3709 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 3710 LHSRLO, RHSRLO); 3711 cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32, 3712 DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32), 3713 LHSRHI, RHSRHI); 3714 cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo); 3715 cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32, 3716 cmp, cmphi, DAG.getTargetConstant(1, MVT::i32)); 3717 } 3718 INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp); 3719 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT, 3720 INTLO, INTHI); 3721 } else { 3722 DST = SDValue(Op.getNode(), 0); 3723 } 3724 return DST; 3725} 3726SDValue 3727AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const 3728{ 3729 EVT OVT = Op.getValueType(); 3730 SDValue DST; 3731 if (OVT.getScalarType() == MVT::f64) { 3732 DST = LowerFDIV64(Op, DAG); 3733 } else if (OVT.getScalarType() == MVT::f32) { 3734 DST = LowerFDIV32(Op, DAG); 3735 } else { 3736 DST = SDValue(Op.getNode(), 0); 3737 } 3738 return DST; 3739} 3740 3741SDValue 3742AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const 3743{ 3744 EVT OVT = Op.getValueType(); 3745 SDValue DST; 3746 if (OVT.getScalarType() == MVT::i64) { 3747 DST = LowerSDIV64(Op, DAG); 3748 } else if (OVT.getScalarType() == MVT::i32) { 3749 DST = LowerSDIV32(Op, DAG); 3750 } else if (OVT.getScalarType() == MVT::i16 3751 || OVT.getScalarType() == MVT::i8) { 3752 DST = LowerSDIV24(Op, DAG); 3753 } else { 3754 DST = SDValue(Op.getNode(), 0); 3755 } 3756 return DST; 3757} 3758 3759SDValue 3760AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const 3761{ 3762 EVT OVT = Op.getValueType(); 3763 SDValue DST; 3764 if (OVT.getScalarType() == MVT::i64) { 3765 DST = LowerUDIV64(Op, DAG); 3766 } else if (OVT.getScalarType() == MVT::i32) { 3767 DST = LowerUDIV32(Op, DAG); 3768 } else if (OVT.getScalarType() == MVT::i16 3769 || OVT.getScalarType() == MVT::i8) { 3770 DST = LowerUDIV24(Op, DAG); 3771 } else { 3772 DST = SDValue(Op.getNode(), 0); 3773 } 3774 return DST; 3775} 3776 3777SDValue 3778AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const 3779{ 3780 EVT OVT = Op.getValueType(); 3781 SDValue DST; 3782 if (OVT.getScalarType() == MVT::i64) { 3783 DST = LowerSREM64(Op, DAG); 3784 } else if (OVT.getScalarType() == MVT::i32) { 3785 DST = LowerSREM32(Op, DAG); 3786 } else if (OVT.getScalarType() == MVT::i16) { 3787 DST = LowerSREM16(Op, DAG); 3788 } else if (OVT.getScalarType() == MVT::i8) { 3789 DST = LowerSREM8(Op, DAG); 3790 } else { 3791 DST = SDValue(Op.getNode(), 0); 3792 } 3793 return DST; 3794} 3795 3796SDValue 3797AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const 3798{ 3799 EVT OVT = Op.getValueType(); 3800 SDValue DST; 3801 if (OVT.getScalarType() == MVT::i64) { 3802 DST = LowerUREM64(Op, DAG); 3803 } else if (OVT.getScalarType() == MVT::i32) { 3804 DST = LowerUREM32(Op, DAG); 3805 } else if (OVT.getScalarType() == MVT::i16) { 3806 DST = LowerUREM16(Op, DAG); 3807 } else if (OVT.getScalarType() == MVT::i8) { 3808 DST = LowerUREM8(Op, DAG); 3809 } else { 3810 DST = SDValue(Op.getNode(), 0); 3811 } 3812 return DST; 3813} 3814 3815SDValue 3816AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const 3817{ 3818 DebugLoc DL = Op.getDebugLoc(); 3819 EVT OVT = Op.getValueType(); 3820 SDValue DST; 3821 bool isVec = OVT.isVector(); 3822 if (OVT.getScalarType() != MVT::i64) 3823 { 3824 DST = SDValue(Op.getNode(), 0); 3825 } else { 3826 assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!"); 3827 // TODO: This needs to be turned into a tablegen pattern 3828 SDValue LHS = Op.getOperand(0); 3829 SDValue RHS = Op.getOperand(1); 3830 3831 MVT INTTY = MVT::i32; 3832 if (OVT == MVT::v2i64) { 3833 INTTY = MVT::v2i32; 3834 } 3835 // mul64(h1, l1, h0, l0) 3836 SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 3837 DL, 3838 INTTY, LHS); 3839 SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 3840 DL, 3841 INTTY, LHS); 3842 SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, 3843 DL, 3844 INTTY, RHS); 3845 SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, 3846 DL, 3847 INTTY, RHS); 3848 // MULLO_UINT_1 r1, h0, l1 3849 SDValue RHILLO = DAG.getNode(AMDILISD::UMUL, 3850 DL, 3851 INTTY, RHSHI, LHSLO); 3852 // MULLO_UINT_1 r2, h1, l0 3853 SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL, 3854 DL, 3855 INTTY, RHSLO, LHSHI); 3856 // ADD_INT hr, r1, r2 3857 SDValue ADDHI = DAG.getNode(ISD::ADD, 3858 DL, 3859 INTTY, RHILLO, RLOHHI); 3860 // MULHI_UINT_1 r3, l1, l0 3861 SDValue RLOLLO = DAG.getNode(ISD::MULHU, 3862 DL, 3863 INTTY, RHSLO, LHSLO); 3864 // ADD_INT hr, hr, r3 3865 SDValue HIGH = DAG.getNode(ISD::ADD, 3866 DL, 3867 INTTY, ADDHI, RLOLLO); 3868 // MULLO_UINT_1 l3, l1, l0 3869 SDValue LOW = DAG.getNode(AMDILISD::UMUL, 3870 DL, 3871 INTTY, LHSLO, RHSLO); 3872 DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, 3873 DL, 3874 OVT, LOW, HIGH); 3875 } 3876 return DST; 3877} 3878SDValue 3879AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const 3880{ 3881 EVT VT = Op.getValueType(); 3882 //printSDValue(Op, 1); 3883 SDValue Nodes1; 3884 SDValue second; 3885 SDValue third; 3886 SDValue fourth; 3887 DebugLoc DL = Op.getDebugLoc(); 3888 Nodes1 = DAG.getNode(AMDILISD::VBUILD, 3889 DL, 3890 VT, Op.getOperand(0)); 3891 bool allEqual = true; 3892 for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) { 3893 if (Op.getOperand(0) != Op.getOperand(x)) { 3894 allEqual = false; 3895 break; 3896 } 3897 } 3898 if (allEqual) { 3899 return Nodes1; 3900 } 3901 switch(Op.getNumOperands()) { 3902 default: 3903 case 1: 3904 break; 3905 case 4: 3906 fourth = Op.getOperand(3); 3907 if (fourth.getOpcode() != ISD::UNDEF) { 3908 Nodes1 = DAG.getNode( 3909 ISD::INSERT_VECTOR_ELT, 3910 DL, 3911 Op.getValueType(), 3912 Nodes1, 3913 fourth, 3914 DAG.getConstant(7, MVT::i32)); 3915 } 3916 case 3: 3917 third = Op.getOperand(2); 3918 if (third.getOpcode() != ISD::UNDEF) { 3919 Nodes1 = DAG.getNode( 3920 ISD::INSERT_VECTOR_ELT, 3921 DL, 3922 Op.getValueType(), 3923 Nodes1, 3924 third, 3925 DAG.getConstant(6, MVT::i32)); 3926 } 3927 case 2: 3928 second = Op.getOperand(1); 3929 if (second.getOpcode() != ISD::UNDEF) { 3930 Nodes1 = DAG.getNode( 3931 ISD::INSERT_VECTOR_ELT, 3932 DL, 3933 Op.getValueType(), 3934 Nodes1, 3935 second, 3936 DAG.getConstant(5, MVT::i32)); 3937 } 3938 break; 3939 }; 3940 return Nodes1; 3941} 3942 3943SDValue 3944AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, 3945 SelectionDAG &DAG) const 3946{ 3947 DebugLoc DL = Op.getDebugLoc(); 3948 EVT VT = Op.getValueType(); 3949 const SDValue *ptr = NULL; 3950 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 3951 uint32_t swizzleNum = 0; 3952 SDValue DST; 3953 if (!VT.isVector()) { 3954 SDValue Res = Op.getOperand(0); 3955 return Res; 3956 } 3957 3958 if (Op.getOperand(1).getOpcode() != ISD::UNDEF) { 3959 ptr = &Op.getOperand(1); 3960 } else { 3961 ptr = &Op.getOperand(0); 3962 } 3963 if (CSDN) { 3964 swizzleNum = (uint32_t)CSDN->getZExtValue(); 3965 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 3966 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 3967 DST = DAG.getNode(AMDILISD::VINSERT, 3968 DL, 3969 VT, 3970 Op.getOperand(0), 3971 *ptr, 3972 DAG.getTargetConstant(mask2, MVT::i32), 3973 DAG.getTargetConstant(mask3, MVT::i32)); 3974 } else { 3975 uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8)); 3976 uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8)); 3977 SDValue res = DAG.getNode(AMDILISD::VINSERT, 3978 DL, VT, Op.getOperand(0), *ptr, 3979 DAG.getTargetConstant(mask2, MVT::i32), 3980 DAG.getTargetConstant(mask3, MVT::i32)); 3981 for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) { 3982 mask2 = 0x04030201 & ~(0xFF << (x * 8)); 3983 mask3 = 0x01010101 & (0xFF << (x * 8)); 3984 SDValue t = DAG.getNode(AMDILISD::VINSERT, 3985 DL, VT, Op.getOperand(0), *ptr, 3986 DAG.getTargetConstant(mask2, MVT::i32), 3987 DAG.getTargetConstant(mask3, MVT::i32)); 3988 SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(), 3989 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 3990 Op.getOperand(2), DAG.getConstant(x, MVT::i32)); 3991 c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c); 3992 res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res); 3993 } 3994 DST = res; 3995 } 3996 return DST; 3997} 3998 3999SDValue 4000AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, 4001 SelectionDAG &DAG) const 4002{ 4003 EVT VT = Op.getValueType(); 4004 //printSDValue(Op, 1); 4005 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4006 uint64_t swizzleNum = 0; 4007 DebugLoc DL = Op.getDebugLoc(); 4008 SDValue Res; 4009 if (!Op.getOperand(0).getValueType().isVector()) { 4010 Res = Op.getOperand(0); 4011 return Res; 4012 } 4013 if (CSDN) { 4014 // Static vector extraction 4015 swizzleNum = CSDN->getZExtValue() + 1; 4016 Res = DAG.getNode(AMDILISD::VEXTRACT, 4017 DL, VT, 4018 Op.getOperand(0), 4019 DAG.getTargetConstant(swizzleNum, MVT::i32)); 4020 } else { 4021 SDValue Op1 = Op.getOperand(1); 4022 uint32_t vecSize = 4; 4023 SDValue Op0 = Op.getOperand(0); 4024 SDValue res = DAG.getNode(AMDILISD::VEXTRACT, 4025 DL, VT, Op0, 4026 DAG.getTargetConstant(1, MVT::i32)); 4027 if (Op0.getValueType().isVector()) { 4028 vecSize = Op0.getValueType().getVectorNumElements(); 4029 } 4030 for (uint32_t x = 2; x <= vecSize; ++x) { 4031 SDValue t = DAG.getNode(AMDILISD::VEXTRACT, 4032 DL, VT, Op0, 4033 DAG.getTargetConstant(x, MVT::i32)); 4034 SDValue c = DAG.getNode(AMDILISD::CMP, 4035 DL, Op1.getValueType(), 4036 DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32), 4037 Op1, DAG.getConstant(x, MVT::i32)); 4038 res = DAG.getNode(AMDILISD::CMOVLOG, DL, 4039 VT, c, t, res); 4040 4041 } 4042 Res = res; 4043 } 4044 return Res; 4045} 4046 4047SDValue 4048AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 4049 SelectionDAG &DAG) const 4050{ 4051 uint32_t vecSize = Op.getValueType().getVectorNumElements(); 4052 SDValue src = Op.getOperand(0); 4053 const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 4054 uint64_t offset = 0; 4055 EVT vecType = Op.getValueType().getVectorElementType(); 4056 DebugLoc DL = Op.getDebugLoc(); 4057 SDValue Result; 4058 if (CSDN) { 4059 offset = CSDN->getZExtValue(); 4060 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 4061 DL,vecType, src, DAG.getConstant(offset, MVT::i32)); 4062 Result = DAG.getNode(AMDILISD::VBUILD, DL, 4063 Op.getValueType(), Result); 4064 for (uint32_t x = 1; x < vecSize; ++x) { 4065 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 4066 src, DAG.getConstant(offset + x, MVT::i32)); 4067 if (elt.getOpcode() != ISD::UNDEF) { 4068 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4069 Op.getValueType(), Result, elt, 4070 DAG.getConstant(x, MVT::i32)); 4071 } 4072 } 4073 } else { 4074 SDValue idx = Op.getOperand(1); 4075 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 4076 DL, vecType, src, idx); 4077 Result = DAG.getNode(AMDILISD::VBUILD, DL, 4078 Op.getValueType(), Result); 4079 for (uint32_t x = 1; x < vecSize; ++x) { 4080 idx = DAG.getNode(ISD::ADD, DL, vecType, 4081 idx, DAG.getConstant(1, MVT::i32)); 4082 SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType, 4083 src, idx); 4084 if (elt.getOpcode() != ISD::UNDEF) { 4085 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4086 Op.getValueType(), Result, elt, idx); 4087 } 4088 } 4089 } 4090 return Result; 4091} 4092SDValue 4093AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 4094 SelectionDAG &DAG) const 4095{ 4096 SDValue Res = DAG.getNode(AMDILISD::VBUILD, 4097 Op.getDebugLoc(), 4098 Op.getValueType(), 4099 Op.getOperand(0)); 4100 return Res; 4101} 4102SDValue 4103AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const 4104{ 4105 SDValue andOp; 4106 andOp = DAG.getNode( 4107 AMDILISD::AND, 4108 Op.getDebugLoc(), 4109 Op.getValueType(), 4110 Op.getOperand(0), 4111 Op.getOperand(1)); 4112 return andOp; 4113} 4114SDValue 4115AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const 4116{ 4117 SDValue orOp; 4118 orOp = DAG.getNode(AMDILISD::OR, 4119 Op.getDebugLoc(), 4120 Op.getValueType(), 4121 Op.getOperand(0), 4122 Op.getOperand(1)); 4123 return orOp; 4124} 4125SDValue 4126AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const 4127{ 4128 SDValue Cond = Op.getOperand(0); 4129 SDValue LHS = Op.getOperand(1); 4130 SDValue RHS = Op.getOperand(2); 4131 DebugLoc DL = Op.getDebugLoc(); 4132 Cond = getConversionNode(DAG, Cond, Op, true); 4133 Cond = DAG.getNode(AMDILISD::CMOVLOG, 4134 DL, 4135 Op.getValueType(), Cond, LHS, RHS); 4136 return Cond; 4137} 4138SDValue 4139AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const 4140{ 4141 SDValue Cond; 4142 SDValue LHS = Op.getOperand(0); 4143 SDValue RHS = Op.getOperand(1); 4144 SDValue TRUE = Op.getOperand(2); 4145 SDValue FALSE = Op.getOperand(3); 4146 SDValue CC = Op.getOperand(4); 4147 DebugLoc DL = Op.getDebugLoc(); 4148 bool skipCMov = false; 4149 bool genINot = false; 4150 EVT OVT = Op.getValueType(); 4151 4152 // Check for possible elimination of cmov 4153 if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) { 4154 const ConstantSDNode *trueConst 4155 = dyn_cast<ConstantSDNode>( TRUE.getNode() ); 4156 const ConstantSDNode *falseConst 4157 = dyn_cast<ConstantSDNode>( FALSE.getNode() ); 4158 if (trueConst && falseConst) { 4159 // both possible result values are constants 4160 if (trueConst->isAllOnesValue() 4161 && falseConst->isNullValue()) { // and convenient constants 4162 skipCMov = true; 4163 } 4164 else if (trueConst->isNullValue() 4165 && falseConst->isAllOnesValue()) { // less convenient 4166 skipCMov = true; 4167 genINot = true; 4168 } 4169 } 4170 } 4171 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4172 unsigned int AMDILCC = CondCCodeToCC( 4173 SetCCOpcode, 4174 LHS.getValueType().getSimpleVT().SimpleTy); 4175 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 4176 Cond = DAG.getNode( 4177 AMDILISD::CMP, 4178 DL, 4179 LHS.getValueType(), 4180 DAG.getConstant(AMDILCC, MVT::i32), 4181 LHS, 4182 RHS); 4183 Cond = getConversionNode(DAG, Cond, Op, true); 4184 if (genINot) { 4185 Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond); 4186 } 4187 if (!skipCMov) { 4188 Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE); 4189 } 4190 return Cond; 4191} 4192SDValue 4193AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const 4194{ 4195 SDValue Cond; 4196 SDValue LHS = Op.getOperand(0); 4197 SDValue RHS = Op.getOperand(1); 4198 SDValue CC = Op.getOperand(2); 4199 DebugLoc DL = Op.getDebugLoc(); 4200 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 4201 unsigned int AMDILCC = CondCCodeToCC( 4202 SetCCOpcode, 4203 LHS.getValueType().getSimpleVT().SimpleTy); 4204 assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!"); 4205 Cond = DAG.getNode( 4206 AMDILISD::CMP, 4207 DL, 4208 LHS.getValueType(), 4209 DAG.getConstant(AMDILCC, MVT::i32), 4210 LHS, 4211 RHS); 4212 Cond = getConversionNode(DAG, Cond, Op, true); 4213 Cond = DAG.getNode( 4214 ISD::AND, 4215 DL, 4216 Cond.getValueType(), 4217 DAG.getConstant(1, Cond.getValueType()), 4218 Cond); 4219 return Cond; 4220} 4221 4222SDValue 4223AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const 4224{ 4225 SDValue Data = Op.getOperand(0); 4226 VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); 4227 DebugLoc DL = Op.getDebugLoc(); 4228 EVT DVT = Data.getValueType(); 4229 EVT BVT = BaseType->getVT(); 4230 unsigned baseBits = BVT.getScalarType().getSizeInBits(); 4231 unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; 4232 unsigned shiftBits = srcBits - baseBits; 4233 if (srcBits < 32) { 4234 // If the op is less than 32 bits, then it needs to extend to 32bits 4235 // so it can properly keep the upper bits valid. 4236 EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); 4237 Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); 4238 shiftBits = 32 - baseBits; 4239 DVT = IVT; 4240 } 4241 SDValue Shift = DAG.getConstant(shiftBits, DVT); 4242 // Shift left by 'Shift' bits. 4243 Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); 4244 // Signed shift Right by 'Shift' bits. 4245 Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); 4246 if (srcBits < 32) { 4247 // Once the sign extension is done, the op needs to be converted to 4248 // its original type. 4249 Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); 4250 } 4251 return Data; 4252} 4253EVT 4254AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const 4255{ 4256 int iSize = (size * numEle); 4257 int vEle = (iSize >> ((size == 64) ? 6 : 5)); 4258 if (!vEle) { 4259 vEle = 1; 4260 } 4261 if (size == 64) { 4262 if (vEle == 1) { 4263 return EVT(MVT::i64); 4264 } else { 4265 return EVT(MVT::getVectorVT(MVT::i64, vEle)); 4266 } 4267 } else { 4268 if (vEle == 1) { 4269 return EVT(MVT::i32); 4270 } else { 4271 return EVT(MVT::getVectorVT(MVT::i32, vEle)); 4272 } 4273 } 4274} 4275 4276SDValue 4277AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const 4278{ 4279 SDValue Src = Op.getOperand(0); 4280 SDValue Dst = Op; 4281 SDValue Res; 4282 DebugLoc DL = Op.getDebugLoc(); 4283 EVT SrcVT = Src.getValueType(); 4284 EVT DstVT = Dst.getValueType(); 4285 // Lets bitcast the floating point types to an 4286 // equivalent integer type before converting to vectors. 4287 if (SrcVT.getScalarType().isFloatingPoint()) { 4288 Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType( 4289 SrcVT.getScalarType().getSimpleVT().getSizeInBits(), 4290 SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1), 4291 Src); 4292 SrcVT = Src.getValueType(); 4293 } 4294 uint32_t ScalarSrcSize = SrcVT.getScalarType() 4295 .getSimpleVT().getSizeInBits(); 4296 uint32_t ScalarDstSize = DstVT.getScalarType() 4297 .getSimpleVT().getSizeInBits(); 4298 uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1; 4299 uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1; 4300 bool isVec = SrcVT.isVector(); 4301 if (DstVT.getScalarType().isInteger() && 4302 (SrcVT.getScalarType().isInteger() 4303 || SrcVT.getScalarType().isFloatingPoint())) { 4304 if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16) 4305 || (ScalarSrcSize == 64 4306 && DstNumEle == 4 4307 && ScalarDstSize == 16)) { 4308 // This is the problematic case when bitcasting i64 <-> <4 x i16> 4309 // This approach is a little different as we cannot generate a 4310 // <4 x i64> vector 4311 // as that is illegal in our backend and we are already past 4312 // the DAG legalizer. 4313 // So, in this case, we will do the following conversion. 4314 // Case 1: 4315 // %dst = <4 x i16> %src bitconvert i64 ==> 4316 // %tmp = <4 x i16> %src convert <4 x i32> 4317 // %tmp = <4 x i32> %tmp and 0xFFFF 4318 // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16> 4319 // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw 4320 // %dst = <2 x i32> %tmp bitcast i64 4321 // case 2: 4322 // %dst = i64 %src bitconvert <4 x i16> ==> 4323 // %tmp = i64 %src bitcast <2 x i32> 4324 // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy 4325 // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16> 4326 // %tmp = <4 x i32> %tmp and 0xFFFF 4327 // %dst = <4 x i16> %tmp bitcast <4 x i32> 4328 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32, 4329 DAG.getConstant(0xFFFF, MVT::i32)); 4330 SDValue const16 = DAG.getConstant(16, MVT::i32); 4331 if (ScalarDstSize == 64) { 4332 // case 1 4333 Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32); 4334 Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask); 4335 SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4336 Op, DAG.getConstant(0, MVT::i32)); 4337 SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4338 Op, DAG.getConstant(1, MVT::i32)); 4339 y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16); 4340 SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4341 Op, DAG.getConstant(2, MVT::i32)); 4342 SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, 4343 Op, DAG.getConstant(3, MVT::i32)); 4344 w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16); 4345 x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y); 4346 y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w); 4347 Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y); 4348 return Res; 4349 } else { 4350 // case 2 4351 SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src); 4352 SDValue lor16 4353 = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16); 4354 SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src); 4355 SDValue hir16 4356 = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16); 4357 SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL, 4358 MVT::v4i32, lo); 4359 SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 4360 getPointerTy(), DAG.getConstant(1, MVT::i32)); 4361 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 4362 resVec, lor16, idxVal); 4363 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 4364 getPointerTy(), DAG.getConstant(2, MVT::i32)); 4365 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 4366 resVec, hi, idxVal); 4367 idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL, 4368 getPointerTy(), DAG.getConstant(3, MVT::i32)); 4369 resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, 4370 resVec, hir16, idxVal); 4371 resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask); 4372 Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16); 4373 return Res; 4374 } 4375 } else { 4376 // There are four cases we need to worry about for bitcasts 4377 // where the size of all 4378 // source, intermediates and result is <= 128 bits, unlike 4379 // the above case 4380 // 1) Sub32bit bitcast 32bitAlign 4381 // %dst = <4 x i8> bitcast i32 4382 // (also <[2|4] x i16> to <[2|4] x i32>) 4383 // 2) 32bitAlign bitcast Sub32bit 4384 // %dst = i32 bitcast <4 x i8> 4385 // 3) Sub32bit bitcast LargerSub32bit 4386 // %dst = <2 x i8> bitcast i16 4387 // (also <4 x i8> to <2 x i16>) 4388 // 4) Sub32bit bitcast SmallerSub32bit 4389 // %dst = i16 bitcast <2 x i8> 4390 // (also <2 x i16> to <4 x i8>) 4391 // This also only handles types that are powers of two 4392 if ((ScalarDstSize & (ScalarDstSize - 1)) 4393 || (ScalarSrcSize & (ScalarSrcSize - 1))) { 4394 } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) { 4395 // case 1: 4396 EVT IntTy = genIntType(ScalarDstSize, SrcNumEle); 4397#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors 4398 SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy); 4399#else 4400 SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 4401 DAG.getUNDEF(IntTy.getScalarType())); 4402 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4403 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4404 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4405 SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4406 SrcVT.getScalarType(), Src, 4407 DAG.getConstant(x, MVT::i32)); 4408 temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType()); 4409 res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy, 4410 res, temp, idx); 4411 } 4412#endif 4413 SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 4414 DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32)); 4415 SDValue *newEle = new SDValue[SrcNumEle]; 4416 res = DAG.getNode(ISD::AND, DL, IntTy, res, mask); 4417 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4418 newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4419 IntTy.getScalarType(), res, 4420 DAG.getConstant(x, MVT::i32)); 4421 } 4422 uint32_t Ratio = SrcNumEle / DstNumEle; 4423 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4424 if (x % Ratio) { 4425 newEle[x] = DAG.getNode(ISD::SHL, DL, 4426 IntTy.getScalarType(), newEle[x], 4427 DAG.getConstant(ScalarSrcSize * (x % Ratio), 4428 MVT::i32)); 4429 } 4430 } 4431 for (uint32_t x = 0; x < SrcNumEle; x += 2) { 4432 newEle[x] = DAG.getNode(ISD::OR, DL, 4433 IntTy.getScalarType(), newEle[x], newEle[x + 1]); 4434 } 4435 if (ScalarSrcSize == 8) { 4436 for (uint32_t x = 0; x < SrcNumEle; x += 4) { 4437 newEle[x] = DAG.getNode(ISD::OR, DL, 4438 IntTy.getScalarType(), newEle[x], newEle[x + 2]); 4439 } 4440 if (DstNumEle == 1) { 4441 Dst = newEle[0]; 4442 } else { 4443 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, 4444 newEle[0]); 4445 for (uint32_t x = 1; x < DstNumEle; ++x) { 4446 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4447 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4448 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4449 DstVT, Dst, newEle[x * 4], idx); 4450 } 4451 } 4452 } else { 4453 if (DstNumEle == 1) { 4454 Dst = newEle[0]; 4455 } else { 4456 Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT, 4457 newEle[0]); 4458 for (uint32_t x = 1; x < DstNumEle; ++x) { 4459 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4460 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4461 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 4462 DstVT, Dst, newEle[x * 2], idx); 4463 } 4464 } 4465 } 4466 delete [] newEle; 4467 return Dst; 4468 } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) { 4469 // case 2: 4470 EVT IntTy = genIntType(ScalarSrcSize, DstNumEle); 4471 SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy, 4472 DAG.getUNDEF(IntTy.getScalarType())); 4473 uint32_t mult = (ScalarDstSize == 8) ? 4 : 2; 4474 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4475 for (uint32_t y = 0; y < mult; ++y) { 4476 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4477 getPointerTy(), 4478 DAG.getConstant(x * mult + y, MVT::i32)); 4479 SDValue t; 4480 if (SrcNumEle > 1) { 4481 t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 4482 DL, SrcVT.getScalarType(), Src, 4483 DAG.getConstant(x, MVT::i32)); 4484 } else { 4485 t = Src; 4486 } 4487 if (y != 0) { 4488 t = DAG.getNode(ISD::SRL, DL, t.getValueType(), 4489 t, DAG.getConstant(y * ScalarDstSize, 4490 MVT::i32)); 4491 } 4492 vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, 4493 DL, IntTy, vec, t, idx); 4494 } 4495 } 4496 Dst = DAG.getSExtOrTrunc(vec, DL, DstVT); 4497 return Dst; 4498 } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) { 4499 // case 3: 4500 SDValue *numEle = new SDValue[SrcNumEle]; 4501 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4502 numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4503 MVT::i8, Src, DAG.getConstant(x, MVT::i32)); 4504 numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16); 4505 numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x], 4506 DAG.getConstant(0xFF, MVT::i16)); 4507 } 4508 for (uint32_t x = 1; x < SrcNumEle; x += 2) { 4509 numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x], 4510 DAG.getConstant(8, MVT::i16)); 4511 numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16, 4512 numEle[x-1], numEle[x]); 4513 } 4514 if (DstNumEle > 1) { 4515 // If we are not a scalar i16, the only other case is a 4516 // v2i16 since we can't have v8i8 at this point, v4i16 4517 // cannot be generated 4518 Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16, 4519 numEle[0]); 4520 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4521 getPointerTy(), DAG.getConstant(1, MVT::i32)); 4522 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16, 4523 Dst, numEle[2], idx); 4524 } else { 4525 Dst = numEle[0]; 4526 } 4527 delete [] numEle; 4528 return Dst; 4529 } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) { 4530 // case 4: 4531 SDValue *numEle = new SDValue[DstNumEle]; 4532 for (uint32_t x = 0; x < SrcNumEle; ++x) { 4533 numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 4534 MVT::i16, Src, DAG.getConstant(x, MVT::i32)); 4535 numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16, 4536 numEle[x * 2], DAG.getConstant(8, MVT::i16)); 4537 } 4538 MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16; 4539 Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]); 4540 for (uint32_t x = 1; x < DstNumEle; ++x) { 4541 SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL, 4542 getPointerTy(), DAG.getConstant(x, MVT::i32)); 4543 Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty, 4544 Dst, numEle[x], idx); 4545 } 4546 delete [] numEle; 4547 ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8; 4548 Res = DAG.getSExtOrTrunc(Dst, DL, ty); 4549 return Res; 4550 } 4551 } 4552 } 4553 Res = DAG.getNode(AMDILISD::BITCONV, 4554 Dst.getDebugLoc(), 4555 Dst.getValueType(), Src); 4556 return Res; 4557} 4558 4559SDValue 4560AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 4561 SelectionDAG &DAG) const 4562{ 4563 SDValue Chain = Op.getOperand(0); 4564 SDValue Size = Op.getOperand(1); 4565 unsigned int SPReg = AMDIL::SP; 4566 DebugLoc DL = Op.getDebugLoc(); 4567 SDValue SP = DAG.getCopyFromReg(Chain, 4568 DL, 4569 SPReg, MVT::i32); 4570 SDValue NewSP = DAG.getNode(ISD::ADD, 4571 DL, 4572 MVT::i32, SP, Size); 4573 Chain = DAG.getCopyToReg(SP.getValue(1), 4574 DL, 4575 SPReg, NewSP); 4576 SDValue Ops[2] = {NewSP, Chain}; 4577 Chain = DAG.getMergeValues(Ops, 2 ,DL); 4578 return Chain; 4579} 4580SDValue 4581AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const 4582{ 4583 SDValue Chain = Op.getOperand(0); 4584 SDValue Cond = Op.getOperand(1); 4585 SDValue Jump = Op.getOperand(2); 4586 SDValue Result; 4587 Result = DAG.getNode( 4588 AMDILISD::BRANCH_COND, 4589 Op.getDebugLoc(), 4590 Op.getValueType(), 4591 Chain, Jump, Cond); 4592 return Result; 4593} 4594 4595SDValue 4596AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const 4597{ 4598 SDValue Chain = Op.getOperand(0); 4599 CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1)); 4600 SDValue LHS = Op.getOperand(2); 4601 SDValue RHS = Op.getOperand(3); 4602 SDValue JumpT = Op.getOperand(4); 4603 SDValue CmpValue; 4604 ISD::CondCode CC = CCNode->get(); 4605 SDValue Result; 4606 unsigned int cmpOpcode = CondCCodeToCC( 4607 CC, 4608 LHS.getValueType().getSimpleVT().SimpleTy); 4609 CmpValue = DAG.getNode( 4610 AMDILISD::CMP, 4611 Op.getDebugLoc(), 4612 LHS.getValueType(), 4613 DAG.getConstant(cmpOpcode, MVT::i32), 4614 LHS, RHS); 4615 Result = DAG.getNode( 4616 AMDILISD::BRANCH_COND, 4617 CmpValue.getDebugLoc(), 4618 MVT::Other, Chain, 4619 JumpT, CmpValue); 4620 return Result; 4621} 4622 4623SDValue 4624AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const 4625{ 4626 SDValue Result = DAG.getNode( 4627 AMDILISD::DP_TO_FP, 4628 Op.getDebugLoc(), 4629 Op.getValueType(), 4630 Op.getOperand(0), 4631 Op.getOperand(1)); 4632 return Result; 4633} 4634 4635SDValue 4636AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const 4637{ 4638 SDValue Result = DAG.getNode( 4639 AMDILISD::VCONCAT, 4640 Op.getDebugLoc(), 4641 Op.getValueType(), 4642 Op.getOperand(0), 4643 Op.getOperand(1)); 4644 return Result; 4645} 4646// LowerRET - Lower an ISD::RET node. 4647SDValue 4648AMDILTargetLowering::LowerReturn(SDValue Chain, 4649 CallingConv::ID CallConv, bool isVarArg, 4650 const SmallVectorImpl<ISD::OutputArg> &Outs, 4651 const SmallVectorImpl<SDValue> &OutVals, 4652 DebugLoc dl, SelectionDAG &DAG) 4653const 4654{ 4655 //MachineFunction& MF = DAG.getMachineFunction(); 4656 // CCValAssign - represent the assignment of the return value 4657 // to a location 4658 SmallVector<CCValAssign, 16> RVLocs; 4659 4660 // CCState - Info about the registers and stack slot 4661 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4662 getTargetMachine(), RVLocs, *DAG.getContext()); 4663 4664 // Analyze return values of ISD::RET 4665 CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32); 4666 // If this is the first return lowered for this function, add 4667 // the regs to the liveout set for the function 4668 MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 4669 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 4670 if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) { 4671 MRI.addLiveOut(RVLocs[i].getLocReg()); 4672 } 4673 } 4674 // FIXME: implement this when tail call is implemented 4675 // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL); 4676 // both x86 and ppc implement this in ISelLowering 4677 4678 // Regular return here 4679 SDValue Flag; 4680 SmallVector<SDValue, 6> RetOps; 4681 RetOps.push_back(Chain); 4682 RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32)); 4683 for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) { 4684 CCValAssign &VA = RVLocs[i]; 4685 SDValue ValToCopy = OutVals[i]; 4686 assert(VA.isRegLoc() && "Can only return in registers!"); 4687 // ISD::Ret => ret chain, (regnum1, val1), ... 4688 // So i * 2 + 1 index only the regnums 4689 Chain = DAG.getCopyToReg(Chain, 4690 dl, 4691 VA.getLocReg(), 4692 ValToCopy, 4693 Flag); 4694 // guarantee that all emitted copies are stuck together 4695 // avoiding something bad 4696 Flag = Chain.getValue(1); 4697 } 4698 /*if (MF.getFunction()->hasStructRetAttr()) { 4699 assert(0 && "Struct returns are not yet implemented!"); 4700 // Both MIPS and X86 have this 4701 }*/ 4702 RetOps[0] = Chain; 4703 if (Flag.getNode()) 4704 RetOps.push_back(Flag); 4705 4706 Flag = DAG.getNode(AMDILISD::RET_FLAG, 4707 dl, 4708 MVT::Other, &RetOps[0], RetOps.size()); 4709 return Flag; 4710} 4711void 4712AMDILTargetLowering::generateLongRelational(MachineInstr *MI, 4713 unsigned int opCode) const 4714{ 4715 MachineOperand DST = MI->getOperand(0); 4716 MachineOperand LHS = MI->getOperand(2); 4717 MachineOperand RHS = MI->getOperand(3); 4718 unsigned int opi32Code = 0, si32Code = 0; 4719 unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass; 4720 uint32_t REGS[12]; 4721 // All the relationals can be generated with with 6 temp registers 4722 for (int x = 0; x < 12; ++x) { 4723 REGS[x] = genVReg(simpleVT); 4724 } 4725 // Pull out the high and low components of each 64 bit register 4726 generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg()); 4727 generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg()); 4728 generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg()); 4729 generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg()); 4730 // Determine the correct opcode that we should use 4731 switch(opCode) { 4732 default: 4733 assert(!"comparison case not handled!"); 4734 break; 4735 case AMDIL::LEQ: 4736 si32Code = opi32Code = AMDIL::IEQ; 4737 break; 4738 case AMDIL::LNE: 4739 si32Code = opi32Code = AMDIL::INE; 4740 break; 4741 case AMDIL::LLE: 4742 case AMDIL::ULLE: 4743 case AMDIL::LGE: 4744 case AMDIL::ULGE: 4745 if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) { 4746 std::swap(REGS[0], REGS[2]); 4747 } else { 4748 std::swap(REGS[1], REGS[3]); 4749 } 4750 if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) { 4751 opi32Code = AMDIL::ILT; 4752 } else { 4753 opi32Code = AMDIL::ULT; 4754 } 4755 si32Code = AMDIL::UGE; 4756 break; 4757 case AMDIL::LGT: 4758 case AMDIL::ULGT: 4759 std::swap(REGS[0], REGS[2]); 4760 std::swap(REGS[1], REGS[3]); 4761 case AMDIL::LLT: 4762 case AMDIL::ULLT: 4763 if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) { 4764 opi32Code = AMDIL::ILT; 4765 } else { 4766 opi32Code = AMDIL::ULT; 4767 } 4768 si32Code = AMDIL::ULT; 4769 break; 4770 }; 4771 // Do the initial opcode on the high and low components. 4772 // This leaves the following: 4773 // REGS[4] = L_HI OP R_HI 4774 // REGS[5] = L_LO OP R_LO 4775 generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]); 4776 generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]); 4777 switch(opi32Code) { 4778 case AMDIL::IEQ: 4779 case AMDIL::INE: 4780 { 4781 // combine the results with an and or or depending on if 4782 // we are eq or ne 4783 uint32_t combineOp = (opi32Code == AMDIL::IEQ) 4784 ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32; 4785 generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]); 4786 } 4787 break; 4788 default: 4789 // this finishes codegen for the following pattern 4790 // REGS[4] || (REGS[5] && (L_HI == R_HI)) 4791 generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]); 4792 generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5], 4793 REGS[9]); 4794 generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4], 4795 REGS[10]); 4796 break; 4797 } 4798 generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]); 4799} 4800 4801unsigned int 4802AMDILTargetLowering::getFunctionAlignment(const Function *) const 4803{ 4804 return 0; 4805} 4806 4807void 4808AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB, 4809 MachineBasicBlock::iterator &BBI, 4810 DebugLoc *DL, const TargetInstrInfo *TII) const 4811{ 4812 mBB = BB; 4813 mBBI = BBI; 4814 mDL = DL; 4815 mTII = TII; 4816} 4817uint32_t 4818AMDILTargetLowering::genVReg(uint32_t regType) const 4819{ 4820 return mBB->getParent()->getRegInfo().createVirtualRegister( 4821 getRegClassFromID(regType)); 4822} 4823 4824MachineInstrBuilder 4825AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const 4826{ 4827 return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst); 4828} 4829 4830MachineInstrBuilder 4831AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 4832 uint32_t src1) const 4833{ 4834 return generateMachineInst(opcode, dst).addReg(src1); 4835} 4836 4837MachineInstrBuilder 4838AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 4839 uint32_t src1, uint32_t src2) const 4840{ 4841 return generateMachineInst(opcode, dst, src1).addReg(src2); 4842} 4843 4844MachineInstrBuilder 4845AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst, 4846 uint32_t src1, uint32_t src2, uint32_t src3) const 4847{ 4848 return generateMachineInst(opcode, dst, src1, src2).addReg(src3); 4849} 4850 4851 4852SDValue 4853AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const 4854{ 4855 DebugLoc DL = Op.getDebugLoc(); 4856 EVT OVT = Op.getValueType(); 4857 SDValue LHS = Op.getOperand(0); 4858 SDValue RHS = Op.getOperand(1); 4859 MVT INTTY; 4860 MVT FLTTY; 4861 if (!OVT.isVector()) { 4862 INTTY = MVT::i32; 4863 FLTTY = MVT::f32; 4864 } else if (OVT.getVectorNumElements() == 2) { 4865 INTTY = MVT::v2i32; 4866 FLTTY = MVT::v2f32; 4867 } else if (OVT.getVectorNumElements() == 4) { 4868 INTTY = MVT::v4i32; 4869 FLTTY = MVT::v4f32; 4870 } 4871 unsigned bitsize = OVT.getScalarType().getSizeInBits(); 4872 // char|short jq = ia ^ ib; 4873 SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); 4874 4875 // jq = jq >> (bitsize - 2) 4876 jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); 4877 4878 // jq = jq | 0x1 4879 jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); 4880 4881 // jq = (int)jq 4882 jq = DAG.getSExtOrTrunc(jq, DL, INTTY); 4883 4884 // int ia = (int)LHS; 4885 SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); 4886 4887 // int ib, (int)RHS; 4888 SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); 4889 4890 // float fa = (float)ia; 4891 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 4892 4893 // float fb = (float)ib; 4894 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 4895 4896 // float fq = native_divide(fa, fb); 4897 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 4898 4899 // fq = trunc(fq); 4900 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 4901 4902 // float fqneg = -fq; 4903 SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); 4904 4905 // float fr = mad(fqneg, fb, fa); 4906 SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa); 4907 4908 // int iq = (int)fq; 4909 SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 4910 4911 // fr = fabs(fr); 4912 fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); 4913 4914 // fb = fabs(fb); 4915 fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); 4916 4917 // int cv = fr >= fb; 4918 SDValue cv; 4919 if (INTTY == MVT::i32) { 4920 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 4921 } else { 4922 cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); 4923 } 4924 // jq = (cv ? jq : 0); 4925 jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq, 4926 DAG.getConstant(0, OVT)); 4927 // dst = iq + jq; 4928 iq = DAG.getSExtOrTrunc(iq, DL, OVT); 4929 iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); 4930 return iq; 4931} 4932 4933SDValue 4934AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const 4935{ 4936 DebugLoc DL = Op.getDebugLoc(); 4937 EVT OVT = Op.getValueType(); 4938 SDValue LHS = Op.getOperand(0); 4939 SDValue RHS = Op.getOperand(1); 4940 // The LowerSDIV32 function generates equivalent to the following IL. 4941 // mov r0, LHS 4942 // mov r1, RHS 4943 // ilt r10, r0, 0 4944 // ilt r11, r1, 0 4945 // iadd r0, r0, r10 4946 // iadd r1, r1, r11 4947 // ixor r0, r0, r10 4948 // ixor r1, r1, r11 4949 // udiv r0, r0, r1 4950 // ixor r10, r10, r11 4951 // iadd r0, r0, r10 4952 // ixor DST, r0, r10 4953 4954 // mov r0, LHS 4955 SDValue r0 = LHS; 4956 4957 // mov r1, RHS 4958 SDValue r1 = RHS; 4959 4960 // ilt r10, r0, 0 4961 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4962 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 4963 r0, DAG.getConstant(0, OVT)); 4964 4965 // ilt r11, r1, 0 4966 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 4967 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 4968 r1, DAG.getConstant(0, OVT)); 4969 4970 // iadd r0, r0, r10 4971 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 4972 4973 // iadd r1, r1, r11 4974 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 4975 4976 // ixor r0, r0, r10 4977 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 4978 4979 // ixor r1, r1, r11 4980 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 4981 4982 // udiv r0, r0, r1 4983 r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); 4984 4985 // ixor r10, r10, r11 4986 r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); 4987 4988 // iadd r0, r0, r10 4989 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 4990 4991 // ixor DST, r0, r10 4992 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 4993 return DST; 4994} 4995 4996SDValue 4997AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const 4998{ 4999 return SDValue(Op.getNode(), 0); 5000} 5001 5002SDValue 5003AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const 5004{ 5005 DebugLoc DL = Op.getDebugLoc(); 5006 EVT OVT = Op.getValueType(); 5007 SDValue LHS = Op.getOperand(0); 5008 SDValue RHS = Op.getOperand(1); 5009 MVT INTTY; 5010 MVT FLTTY; 5011 if (!OVT.isVector()) { 5012 INTTY = MVT::i32; 5013 FLTTY = MVT::f32; 5014 } else if (OVT.getVectorNumElements() == 2) { 5015 INTTY = MVT::v2i32; 5016 FLTTY = MVT::v2f32; 5017 } else if (OVT.getVectorNumElements() == 4) { 5018 INTTY = MVT::v4i32; 5019 FLTTY = MVT::v4f32; 5020 } 5021 5022 // The LowerUDIV24 function implements the following CL. 5023 // int ia = (int)LHS 5024 // float fa = (float)ia 5025 // int ib = (int)RHS 5026 // float fb = (float)ib 5027 // float fq = native_divide(fa, fb) 5028 // fq = trunc(fq) 5029 // float t = mad(fq, fb, fb) 5030 // int iq = (int)fq - (t <= fa) 5031 // return (type)iq 5032 5033 // int ia = (int)LHS 5034 SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY); 5035 5036 // float fa = (float)ia 5037 SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); 5038 5039 // int ib = (int)RHS 5040 SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY); 5041 5042 // float fb = (float)ib 5043 SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); 5044 5045 // float fq = native_divide(fa, fb) 5046 SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb); 5047 5048 // fq = trunc(fq) 5049 fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); 5050 5051 // float t = mad(fq, fb, fb) 5052 SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb); 5053 5054 // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1 5055 SDValue iq; 5056 fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); 5057 if (INTTY == MVT::i32) { 5058 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 5059 } else { 5060 iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE); 5061 } 5062 iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq); 5063 5064 5065 // return (type)iq 5066 iq = DAG.getZExtOrTrunc(iq, DL, OVT); 5067 return iq; 5068 5069} 5070 5071SDValue 5072AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const 5073{ 5074 return SDValue(Op.getNode(), 0); 5075} 5076 5077SDValue 5078AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const 5079{ 5080 return SDValue(Op.getNode(), 0); 5081} 5082SDValue 5083AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const 5084{ 5085 DebugLoc DL = Op.getDebugLoc(); 5086 EVT OVT = Op.getValueType(); 5087 MVT INTTY = MVT::i32; 5088 if (OVT == MVT::v2i8) { 5089 INTTY = MVT::v2i32; 5090 } else if (OVT == MVT::v4i8) { 5091 INTTY = MVT::v4i32; 5092 } 5093 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 5094 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 5095 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 5096 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 5097 return LHS; 5098} 5099 5100SDValue 5101AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const 5102{ 5103 DebugLoc DL = Op.getDebugLoc(); 5104 EVT OVT = Op.getValueType(); 5105 MVT INTTY = MVT::i32; 5106 if (OVT == MVT::v2i16) { 5107 INTTY = MVT::v2i32; 5108 } else if (OVT == MVT::v4i16) { 5109 INTTY = MVT::v4i32; 5110 } 5111 SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); 5112 SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); 5113 LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); 5114 LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); 5115 return LHS; 5116} 5117 5118SDValue 5119AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const 5120{ 5121 DebugLoc DL = Op.getDebugLoc(); 5122 EVT OVT = Op.getValueType(); 5123 SDValue LHS = Op.getOperand(0); 5124 SDValue RHS = Op.getOperand(1); 5125 // The LowerSREM32 function generates equivalent to the following IL. 5126 // mov r0, LHS 5127 // mov r1, RHS 5128 // ilt r10, r0, 0 5129 // ilt r11, r1, 0 5130 // iadd r0, r0, r10 5131 // iadd r1, r1, r11 5132 // ixor r0, r0, r10 5133 // ixor r1, r1, r11 5134 // udiv r20, r0, r1 5135 // umul r20, r20, r1 5136 // sub r0, r0, r20 5137 // iadd r0, r0, r10 5138 // ixor DST, r0, r10 5139 5140 // mov r0, LHS 5141 SDValue r0 = LHS; 5142 5143 // mov r1, RHS 5144 SDValue r1 = RHS; 5145 5146 // ilt r10, r0, 0 5147 SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT, 5148 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 5149 r0, DAG.getConstant(0, OVT)); 5150 5151 // ilt r11, r1, 0 5152 SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT, 5153 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32), 5154 r1, DAG.getConstant(0, OVT)); 5155 5156 // iadd r0, r0, r10 5157 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 5158 5159 // iadd r1, r1, r11 5160 r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); 5161 5162 // ixor r0, r0, r10 5163 r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 5164 5165 // ixor r1, r1, r11 5166 r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); 5167 5168 // udiv r20, r0, r1 5169 SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); 5170 5171 // umul r20, r20, r1 5172 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1); 5173 5174 // sub r0, r0, r20 5175 r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); 5176 5177 // iadd r0, r0, r10 5178 r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); 5179 5180 // ixor DST, r0, r10 5181 SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); 5182 return DST; 5183} 5184 5185SDValue 5186AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const 5187{ 5188 return SDValue(Op.getNode(), 0); 5189} 5190 5191SDValue 5192AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const 5193{ 5194 DebugLoc DL = Op.getDebugLoc(); 5195 EVT OVT = Op.getValueType(); 5196 MVT INTTY = MVT::i32; 5197 if (OVT == MVT::v2i8) { 5198 INTTY = MVT::v2i32; 5199 } else if (OVT == MVT::v4i8) { 5200 INTTY = MVT::v4i32; 5201 } 5202 SDValue LHS = Op.getOperand(0); 5203 SDValue RHS = Op.getOperand(1); 5204 // The LowerUREM8 function generates equivalent to the following IL. 5205 // mov r0, as_u32(LHS) 5206 // mov r1, as_u32(RHS) 5207 // and r10, r0, 0xFF 5208 // and r11, r1, 0xFF 5209 // cmov_logical r3, r11, r11, 0x1 5210 // udiv r3, r10, r3 5211 // cmov_logical r3, r11, r3, 0 5212 // umul r3, r3, r11 5213 // sub r3, r10, r3 5214 // and as_u8(DST), r3, 0xFF 5215 5216 // mov r0, as_u32(LHS) 5217 SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY); 5218 5219 // mov r1, as_u32(RHS) 5220 SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY); 5221 5222 // and r10, r0, 0xFF 5223 SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0, 5224 DAG.getConstant(0xFF, INTTY)); 5225 5226 // and r11, r1, 0xFF 5227 SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1, 5228 DAG.getConstant(0xFF, INTTY)); 5229 5230 // cmov_logical r3, r11, r11, 0x1 5231 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11, 5232 DAG.getConstant(0x01, INTTY)); 5233 5234 // udiv r3, r10, r3 5235 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 5236 5237 // cmov_logical r3, r11, r3, 0 5238 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3, 5239 DAG.getConstant(0, INTTY)); 5240 5241 // umul r3, r3, r11 5242 r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11); 5243 5244 // sub r3, r10, r3 5245 r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3); 5246 5247 // and as_u8(DST), r3, 0xFF 5248 SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3, 5249 DAG.getConstant(0xFF, INTTY)); 5250 DST = DAG.getZExtOrTrunc(DST, DL, OVT); 5251 return DST; 5252} 5253 5254SDValue 5255AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const 5256{ 5257 DebugLoc DL = Op.getDebugLoc(); 5258 EVT OVT = Op.getValueType(); 5259 MVT INTTY = MVT::i32; 5260 if (OVT == MVT::v2i16) { 5261 INTTY = MVT::v2i32; 5262 } else if (OVT == MVT::v4i16) { 5263 INTTY = MVT::v4i32; 5264 } 5265 SDValue LHS = Op.getOperand(0); 5266 SDValue RHS = Op.getOperand(1); 5267 // The LowerUREM16 function generatest equivalent to the following IL. 5268 // mov r0, LHS 5269 // mov r1, RHS 5270 // DIV = LowerUDIV16(LHS, RHS) 5271 // and r10, r0, 0xFFFF 5272 // and r11, r1, 0xFFFF 5273 // cmov_logical r3, r11, r11, 0x1 5274 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 5275 // and r3, r3, 0xFFFF 5276 // cmov_logical r3, r11, r3, 0 5277 // umul r3, r3, r11 5278 // sub r3, r10, r3 5279 // and DST, r3, 0xFFFF 5280 5281 // mov r0, LHS 5282 SDValue r0 = LHS; 5283 5284 // mov r1, RHS 5285 SDValue r1 = RHS; 5286 5287 // and r10, r0, 0xFFFF 5288 SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0, 5289 DAG.getConstant(0xFFFF, OVT)); 5290 5291 // and r11, r1, 0xFFFF 5292 SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1, 5293 DAG.getConstant(0xFFFF, OVT)); 5294 5295 // cmov_logical r3, r11, r11, 0x1 5296 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11, 5297 DAG.getConstant(0x01, OVT)); 5298 5299 // udiv as_u16(r3), as_u32(r10), as_u32(r3) 5300 r10 = DAG.getZExtOrTrunc(r10, DL, INTTY); 5301 r3 = DAG.getZExtOrTrunc(r3, DL, INTTY); 5302 r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3); 5303 r3 = DAG.getZExtOrTrunc(r3, DL, OVT); 5304 r10 = DAG.getZExtOrTrunc(r10, DL, OVT); 5305 5306 // and r3, r3, 0xFFFF 5307 r3 = DAG.getNode(ISD::AND, DL, OVT, r3, 5308 DAG.getConstant(0xFFFF, OVT)); 5309 5310 // cmov_logical r3, r11, r3, 0 5311 r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3, 5312 DAG.getConstant(0, OVT)); 5313 // umul r3, r3, r11 5314 r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11); 5315 5316 // sub r3, r10, r3 5317 r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3); 5318 5319 // and DST, r3, 0xFFFF 5320 SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3, 5321 DAG.getConstant(0xFFFF, OVT)); 5322 return DST; 5323} 5324 5325SDValue 5326AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const 5327{ 5328 DebugLoc DL = Op.getDebugLoc(); 5329 EVT OVT = Op.getValueType(); 5330 SDValue LHS = Op.getOperand(0); 5331 SDValue RHS = Op.getOperand(1); 5332 // The LowerUREM32 function generates equivalent to the following IL. 5333 // udiv r20, LHS, RHS 5334 // umul r20, r20, RHS 5335 // sub DST, LHS, r20 5336 5337 // udiv r20, LHS, RHS 5338 SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS); 5339 5340 // umul r20, r20, RHS 5341 r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS); 5342 5343 // sub DST, LHS, r20 5344 SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20); 5345 return DST; 5346} 5347 5348SDValue 5349AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const 5350{ 5351 return SDValue(Op.getNode(), 0); 5352} 5353 5354 5355SDValue 5356AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const 5357{ 5358 DebugLoc DL = Op.getDebugLoc(); 5359 EVT OVT = Op.getValueType(); 5360 MVT INTTY = MVT::i32; 5361 if (OVT == MVT::v2f32) { 5362 INTTY = MVT::v2i32; 5363 } else if (OVT == MVT::v4f32) { 5364 INTTY = MVT::v4i32; 5365 } 5366 SDValue LHS = Op.getOperand(0); 5367 SDValue RHS = Op.getOperand(1); 5368 SDValue DST; 5369 const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>( 5370 &this->getTargetMachine())->getSubtargetImpl(); 5371 if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) { 5372 // TODO: This doesn't work for vector types yet 5373 // The LowerFDIV32 function generates equivalent to the following 5374 // IL: 5375 // mov r20, as_int(LHS) 5376 // mov r21, as_int(RHS) 5377 // and r30, r20, 0x7f800000 5378 // and r31, r20, 0x807FFFFF 5379 // and r32, r21, 0x7f800000 5380 // and r33, r21, 0x807FFFFF 5381 // ieq r40, r30, 0x7F800000 5382 // ieq r41, r31, 0x7F800000 5383 // ieq r42, r32, 0 5384 // ieq r43, r33, 0 5385 // and r50, r20, 0x80000000 5386 // and r51, r21, 0x80000000 5387 // ior r32, r32, 0x3f800000 5388 // ior r33, r33, 0x3f800000 5389 // cmov_logical r32, r42, r50, r32 5390 // cmov_logical r33, r43, r51, r33 5391 // cmov_logical r32, r40, r20, r32 5392 // cmov_logical r33, r41, r21, r33 5393 // ior r50, r40, r41 5394 // ior r51, r42, r43 5395 // ior r50, r50, r51 5396 // inegate r52, r31 5397 // iadd r30, r30, r52 5398 // cmov_logical r30, r50, 0, r30 5399 // div_zeroop(infinity) r21, 1.0, r33 5400 // mul_ieee r20, r32, r21 5401 // and r22, r20, 0x7FFFFFFF 5402 // and r23, r20, 0x80000000 5403 // ishr r60, r22, 0x00000017 5404 // ishr r61, r30, 0x00000017 5405 // iadd r20, r20, r30 5406 // iadd r21, r22, r30 5407 // iadd r60, r60, r61 5408 // ige r42, 0, R60 5409 // ior r41, r23, 0x7F800000 5410 // ige r40, r60, 0x000000FF 5411 // cmov_logical r40, r50, 0, r40 5412 // cmov_logical r20, r42, r23, r20 5413 // cmov_logical DST, r40, r41, r20 5414 // as_float(DST) 5415 5416 // mov r20, as_int(LHS) 5417 SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS); 5418 5419 // mov r21, as_int(RHS) 5420 SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS); 5421 5422 // and r30, r20, 0x7f800000 5423 SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5424 DAG.getConstant(0x7F800000, INTTY)); 5425 5426 // and r31, r21, 0x7f800000 5427 SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21, 5428 DAG.getConstant(0x7f800000, INTTY)); 5429 5430 // and r32, r20, 0x807FFFFF 5431 SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5432 DAG.getConstant(0x807FFFFF, INTTY)); 5433 5434 // and r33, r21, 0x807FFFFF 5435 SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21, 5436 DAG.getConstant(0x807FFFFF, INTTY)); 5437 5438 // ieq r40, r30, 0x7F800000 5439 SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5440 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5441 R30, DAG.getConstant(0x7F800000, INTTY)); 5442 5443 // ieq r41, r31, 0x7F800000 5444 SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5445 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5446 R31, DAG.getConstant(0x7F800000, INTTY)); 5447 5448 // ieq r42, r30, 0 5449 SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5450 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5451 R30, DAG.getConstant(0, INTTY)); 5452 5453 // ieq r43, r31, 0 5454 SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5455 DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32), 5456 R31, DAG.getConstant(0, INTTY)); 5457 5458 // and r50, r20, 0x80000000 5459 SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5460 DAG.getConstant(0x80000000, INTTY)); 5461 5462 // and r51, r21, 0x80000000 5463 SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21, 5464 DAG.getConstant(0x80000000, INTTY)); 5465 5466 // ior r32, r32, 0x3f800000 5467 R32 = DAG.getNode(ISD::OR, DL, INTTY, R32, 5468 DAG.getConstant(0x3F800000, INTTY)); 5469 5470 // ior r33, r33, 0x3f800000 5471 R33 = DAG.getNode(ISD::OR, DL, INTTY, R33, 5472 DAG.getConstant(0x3F800000, INTTY)); 5473 5474 // cmov_logical r32, r42, r50, r32 5475 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32); 5476 5477 // cmov_logical r33, r43, r51, r33 5478 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33); 5479 5480 // cmov_logical r32, r40, r20, r32 5481 R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32); 5482 5483 // cmov_logical r33, r41, r21, r33 5484 R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33); 5485 5486 // ior r50, r40, r41 5487 R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41); 5488 5489 // ior r51, r42, r43 5490 R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43); 5491 5492 // ior r50, r50, r51 5493 R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51); 5494 5495 // inegate r52, r31 5496 SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31); 5497 5498 // iadd r30, r30, r52 5499 R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52); 5500 5501 // cmov_logical r30, r50, 0, r30 5502 R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 5503 DAG.getConstant(0, INTTY), R30); 5504 5505 // div_zeroop(infinity) r21, 1.0, as_float(r33) 5506 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 5507 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 5508 DAG.getConstantFP(1.0f, OVT), R33); 5509 5510 // mul_ieee as_int(r20), as_float(r32), r21 5511 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 5512 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 5513 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 5514 5515 // div_zeroop(infinity) r21, 1.0, as_float(r33) 5516 R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33); 5517 R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, 5518 DAG.getConstantFP(1.0f, OVT), R33); 5519 5520 // mul_ieee as_int(r20), as_float(r32), r21 5521 R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32); 5522 R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21); 5523 R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20); 5524 5525 // and r22, r20, 0x7FFFFFFF 5526 SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5527 DAG.getConstant(0x7FFFFFFF, INTTY)); 5528 5529 // and r23, r20, 0x80000000 5530 SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20, 5531 DAG.getConstant(0x80000000, INTTY)); 5532 5533 // ishr r60, r22, 0x00000017 5534 SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22, 5535 DAG.getConstant(0x00000017, INTTY)); 5536 5537 // ishr r61, r30, 0x00000017 5538 SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30, 5539 DAG.getConstant(0x00000017, INTTY)); 5540 5541 // iadd r20, r20, r30 5542 R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30); 5543 5544 // iadd r21, r22, r30 5545 R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30); 5546 5547 // iadd r60, r60, r61 5548 R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61); 5549 5550 // ige r42, 0, R60 5551 R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5552 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 5553 DAG.getConstant(0, INTTY), 5554 R60); 5555 5556 // ior r41, r23, 0x7F800000 5557 R41 = DAG.getNode(ISD::OR, DL, INTTY, R23, 5558 DAG.getConstant(0x7F800000, INTTY)); 5559 5560 // ige r40, r60, 0x000000FF 5561 R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY, 5562 DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32), 5563 R60, 5564 DAG.getConstant(0x0000000FF, INTTY)); 5565 5566 // cmov_logical r40, r50, 0, r40 5567 R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50, 5568 DAG.getConstant(0, INTTY), 5569 R40); 5570 5571 // cmov_logical r20, r42, r23, r20 5572 R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20); 5573 5574 // cmov_logical DST, r40, r41, r20 5575 DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20); 5576 5577 // as_float(DST) 5578 DST = DAG.getNode(ISDBITCAST, DL, OVT, DST); 5579 } else { 5580 // The following sequence of DAG nodes produce the following IL: 5581 // fabs r1, RHS 5582 // lt r2, 0x1.0p+96f, r1 5583 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 5584 // mul_ieee r1, RHS, r3 5585 // div_zeroop(infinity) r0, LHS, r1 5586 // mul_ieee DST, r0, r3 5587 5588 // fabs r1, RHS 5589 SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS); 5590 // lt r2, 0x1.0p+96f, r1 5591 SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT, 5592 DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32), 5593 DAG.getConstant(0x6f800000, INTTY), r1); 5594 // cmov_logical r3, r2, 0x1.0p-23f, 1.0f 5595 SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2, 5596 DAG.getConstant(0x2f800000, INTTY), 5597 DAG.getConstant(0x3f800000, INTTY)); 5598 // mul_ieee r1, RHS, r3 5599 r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3); 5600 // div_zeroop(infinity) r0, LHS, r1 5601 SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1); 5602 // mul_ieee DST, r0, r3 5603 DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3); 5604 } 5605 return DST; 5606} 5607 5608SDValue 5609AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const 5610{ 5611 return SDValue(Op.getNode(), 0); 5612} 5613