PPCISelLowering.cpp revision a619d012c13bee0572b96b045723dff5a117a5c2
12949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 22949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project// 32949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project// The LLVM Compiler Infrastructure 42949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project// 52949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project// This file is distributed under the University of Illinois Open Source 62949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project// License. See LICENSE.TXT for details. 72949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project// 82949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project//===----------------------------------------------------------------------===// 92949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project// 102949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project// This file implements the PPCISelLowering class. 112949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project// 122949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project//===----------------------------------------------------------------------===// 132949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 142949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "PPCISelLowering.h" 152949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "PPCMachineFunctionInfo.h" 162949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "PPCPredicates.h" 172949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "PPCTargetMachine.h" 182949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "PPCPerfectShuffle.h" 192949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/ADT/STLExtras.h" 202949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/ADT/VectorExtras.h" 212949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/CodeGen/CallingConvLower.h" 222949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/CodeGen/MachineFrameInfo.h" 232949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/CodeGen/MachineFunction.h" 242949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/CodeGen/MachineInstrBuilder.h" 252949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/CodeGen/MachineRegisterInfo.h" 262949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/CodeGen/PseudoSourceValue.h" 272949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/CodeGen/SelectionDAG.h" 282949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/CallingConv.h" 292949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/Constants.h" 302949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/Function.h" 312949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/Intrinsics.h" 322949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/ParameterAttributes.h" 332949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/Support/MathExtras.h" 342949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/Target/TargetOptions.h" 352949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project#include "llvm/Support/CommandLine.h" 362949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Projectusing namespace llvm; 372949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 382949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Projectstatic cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", 392949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Projectcl::desc("enable preincrement load/store generation on PPC (experimental)"), 402949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project cl::Hidden); 412949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 422949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source ProjectPPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 432949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) { 442949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 452949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setPow2DivIsCheap(); 462949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 472949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // Use _setjmp/_longjmp instead of setjmp/longjmp. 482949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setUseUnderscoreSetJmp(true); 492949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setUseUnderscoreLongJmp(true); 502949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 512949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // Set up the register classes. 522949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 532949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 542949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 552949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 562949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 572949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); 582949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand); 592949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 602949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setTruncStoreAction(MVT::f64, MVT::f32, Expand); 612949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 622949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC has pre-inc load and store's. 632949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 642949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 652949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 662949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 672949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 682949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 692949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 702949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 712949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 722949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 732949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 742949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg) 752949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setConvertAction(MVT::ppcf128, MVT::f64, Expand); 762949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setConvertAction(MVT::ppcf128, MVT::f32, Expand); 772949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // This is used in the ppcf128->int sequence. Note it has different semantics 782949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // from FP_ROUND: that rounds to nearest, this rounds to zero. 792949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 802949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 812949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC has no SREM/UREM instructions 822949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SREM, MVT::i32, Expand); 832949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UREM, MVT::i32, Expand); 842949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SREM, MVT::i64, Expand); 852949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UREM, MVT::i64, Expand); 862949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 872949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 882949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 892949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 902949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 912949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 922949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 932949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 942949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 952949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 962949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 972949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // We don't support sin/cos/sqrt/fmod/pow 982949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FSIN , MVT::f64, Expand); 992949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FCOS , MVT::f64, Expand); 1002949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FREM , MVT::f64, Expand); 1012949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FPOW , MVT::f64, Expand); 1022949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FSIN , MVT::f32, Expand); 1032949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FCOS , MVT::f32, Expand); 1042949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FREM , MVT::f32, Expand); 1052949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FPOW , MVT::f32, Expand); 1062949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1072949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 1082949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1092949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // If we're enabling GP optimizations, use hardware square root 1102949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 1112949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FSQRT, MVT::f64, Expand); 1122949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FSQRT, MVT::f32, Expand); 1132949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } 1142949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1152949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 1162949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 1172949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1182949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC does not have BSWAP, CTPOP or CTTZ 1192949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 1202949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 1212949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 1222949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 1232949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 1242949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 1252949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1262949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC does not have ROTR 1272949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::ROTR, MVT::i32 , Expand); 1282949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::ROTR, MVT::i64 , Expand); 1292949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1302949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC does not have Select 1312949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SELECT, MVT::i32, Expand); 1322949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SELECT, MVT::i64, Expand); 1332949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SELECT, MVT::f32, Expand); 1342949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SELECT, MVT::f64, Expand); 1352949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1362949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC wants to turn select_cc of FP into fsel when possible. 1372949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 1382949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 1392949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1402949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC wants to optimize integer setcc a bit 1412949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SETCC, MVT::i32, Custom); 1422949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1432949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC does not have BRCOND which requires SetCC 1442949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BRCOND, MVT::Other, Expand); 1452949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1462949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BR_JT, MVT::Other, Expand); 1472949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1482949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 1492949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 1502949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1512949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC does not have [U|S]INT_TO_FP 1522949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 1532949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 1542949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1552949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 1562949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 1572949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); 1582949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); 1592949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1602949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // We cannot sextinreg(i1). Expand to shifts. 1612949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 1622949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1632949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // Support label based line numbers. 1642949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); 1652949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 1662949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1672949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 1682949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 1692949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 1702949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 1712949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1722949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1732949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // We want to legalize GlobalAddress and ConstantPool nodes into the 1742949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // appropriate instructions to materialize the address. 1752949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 1762949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 1772949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 1782949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::JumpTable, MVT::i32, Custom); 1792949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 1802949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 1812949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 1822949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::JumpTable, MVT::i64, Custom); 1832949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1842949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // RET must be custom lowered, to meet ABI requirements. 1852949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::RET , MVT::Other, Custom); 1862949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1872949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // TRAP is legal. 1882949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::TRAP, MVT::Other, Legal); 1892949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1902949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // VASTART needs to be custom lowered to use the VarArgsFrameIndex 1912949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::VASTART , MVT::Other, Custom); 1922949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1932949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // VAARG is custom lowered with ELF 32 ABI 1942949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI()) 1952949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::VAARG, MVT::Other, Custom); 1962949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project else 1972949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::VAARG, MVT::Other, Expand); 1982949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 1992949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // Use the default implementation. 2002949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::VACOPY , MVT::Other, Expand); 2012949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::VAEND , MVT::Other, Expand); 2022949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 2032949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 2042949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 2052949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 2062949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2072949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // We want to custom lower some of our intrinsics. 2082949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 2092949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2102949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 2112949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // They also have instructions for converting between i64 and fp. 2122949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 2132949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 2142949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 2152949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 2162949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 2172949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2182949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // FIXME: disable this lowered code. This generates 64-bit register values, 2192949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // and we don't model the fact that the top part is clobbered by calls. We 2202949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // need to flag these together so that the value isn't live across a call. 2212949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 2222949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2232949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 2242949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 2252949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } else { 2262949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // PowerPC does not have FP_TO_UINT on 32-bit implementations. 2272949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 2282949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } 2292949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2302949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { 2312949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // 64-bit PowerPC implementations can support i64 types directly 2322949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 2332949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 2342949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 2352949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // 64-bit PowerPC wants to expand i128 shifts itself. 2362949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 2372949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 2382949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 2392949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } else { 2402949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // 32-bit PowerPC wants to expand i64 shifts itself. 2412949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 2422949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 2432949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 2442949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } 2452949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2462949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 2472949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // First set operation action for all vector types to expand. Then we 2482949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // will selectively turn on ones that can be effectively codegen'd. 2492949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 2502949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 2512949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project MVT VT = (MVT::SimpleValueType)i; 2522949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2532949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // add/sub are legal for all supported vector VT's. 2542949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::ADD , VT, Legal); 2552949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SUB , VT, Legal); 2562949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2572949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // We promote all shuffles to v16i8. 2582949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 2592949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 2602949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2612949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // We promote all non-typed operations to v4i32. 2622949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::AND , VT, Promote); 2632949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project AddPromotedToType (ISD::AND , VT, MVT::v4i32); 2642949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::OR , VT, Promote); 2652949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project AddPromotedToType (ISD::OR , VT, MVT::v4i32); 2662949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::XOR , VT, Promote); 2672949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 2682949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::LOAD , VT, Promote); 2692949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 2702949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SELECT, VT, Promote); 2712949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 2722949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::STORE, VT, Promote); 2732949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 2742949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2752949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // No other operations are legal. 2762949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::MUL , VT, Expand); 2772949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SDIV, VT, Expand); 2782949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SREM, VT, Expand); 2792949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UDIV, VT, Expand); 2802949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UREM, VT, Expand); 2812949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FDIV, VT, Expand); 2822949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FNEG, VT, Expand); 2832949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 2842949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 2852949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 2862949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UMUL_LOHI, VT, Expand); 2872949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SMUL_LOHI, VT, Expand); 2882949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::UDIVREM, VT, Expand); 2892949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SDIVREM, VT, Expand); 2902949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 2912949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::FPOW, VT, Expand); 2922949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::CTPOP, VT, Expand); 2932949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::CTLZ, VT, Expand); 2942949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::CTTZ, VT, Expand); 2952949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } 2962949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 2972949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 2982949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // with merges, splats, etc. 2992949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 3002949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3012949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::AND , MVT::v4i32, Legal); 3022949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::OR , MVT::v4i32, Legal); 3032949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::XOR , MVT::v4i32, Legal); 3042949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 3052949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 3062949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::STORE , MVT::v4i32, Legal); 3072949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3082949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 3092949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 3102949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 3112949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 3122949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3132949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::MUL, MVT::v4f32, Legal); 3142949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::MUL, MVT::v4i32, Custom); 3152949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::MUL, MVT::v8i16, Custom); 3162949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::MUL, MVT::v16i8, Custom); 3172949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3182949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 3192949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 3202949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3212949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 3222949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 3232949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 3242949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 3252949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } 3262949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3272949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setShiftAmountType(MVT::i32); 3282949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setSetCCResultContents(ZeroOrOneSetCCResult); 3292949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3302949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { 3312949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setStackPointerRegisterToSaveRestore(PPC::X1); 3322949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setExceptionPointerRegister(PPC::X3); 3332949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setExceptionSelectorRegister(PPC::X4); 3342949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } else { 3352949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setStackPointerRegisterToSaveRestore(PPC::R1); 3362949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setExceptionPointerRegister(PPC::R3); 3372949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setExceptionSelectorRegister(PPC::R4); 3382949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } 3392949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3402949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // We have target-specific dag combine patterns for the following nodes: 3412949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setTargetDAGCombine(ISD::SINT_TO_FP); 3422949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setTargetDAGCombine(ISD::STORE); 3432949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setTargetDAGCombine(ISD::BR_CC); 3442949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setTargetDAGCombine(ISD::BSWAP); 3452949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3462949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // Darwin long double math library functions have $LDBL128 appended. 3472949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { 3482949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 3492949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 3502949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 3512949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 3522949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 3532949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project } 3542949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3552949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project computeRegisterProperties(); 3562949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project} 3572949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3582949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 3592949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project/// function arguments in the caller parameter area. 3602949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Projectunsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { 3612949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project TargetMachine &TM = getTargetMachine(); 3622949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // Darwin passes everything on 4 byte boundary. 3632949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project if (TM.getSubtarget<PPCSubtarget>().isDarwin()) 3642949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project return 4; 3652949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project // FIXME Elf TBD 3662949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project return 4; 3672949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project} 3682949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Project 3692949f58a438f6fd85f66a8b7ed4708042cde4b37The Android Open Source Projectconst char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 370 switch (Opcode) { 371 default: return 0; 372 case PPCISD::FSEL: return "PPCISD::FSEL"; 373 case PPCISD::FCFID: return "PPCISD::FCFID"; 374 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 375 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 376 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 377 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 378 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 379 case PPCISD::VPERM: return "PPCISD::VPERM"; 380 case PPCISD::Hi: return "PPCISD::Hi"; 381 case PPCISD::Lo: return "PPCISD::Lo"; 382 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 383 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 384 case PPCISD::SRL: return "PPCISD::SRL"; 385 case PPCISD::SRA: return "PPCISD::SRA"; 386 case PPCISD::SHL: return "PPCISD::SHL"; 387 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 388 case PPCISD::STD_32: return "PPCISD::STD_32"; 389 case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF"; 390 case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho"; 391 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 392 case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho"; 393 case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF"; 394 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 395 case PPCISD::MFCR: return "PPCISD::MFCR"; 396 case PPCISD::VCMP: return "PPCISD::VCMP"; 397 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 398 case PPCISD::LBRX: return "PPCISD::LBRX"; 399 case PPCISD::STBRX: return "PPCISD::STBRX"; 400 case PPCISD::LARX: return "PPCISD::LARX"; 401 case PPCISD::STCX: return "PPCISD::STCX"; 402 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 403 case PPCISD::MFFS: return "PPCISD::MFFS"; 404 case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; 405 case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; 406 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 407 case PPCISD::MTFSF: return "PPCISD::MTFSF"; 408 case PPCISD::TAILCALL: return "PPCISD::TAILCALL"; 409 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 410 } 411} 412 413 414MVT PPCTargetLowering::getSetCCResultType(const SDValue &) const { 415 return MVT::i32; 416} 417 418 419//===----------------------------------------------------------------------===// 420// Node matching predicates, for use by the tblgen matching code. 421//===----------------------------------------------------------------------===// 422 423/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 424static bool isFloatingPointZero(SDValue Op) { 425 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 426 return CFP->getValueAPF().isZero(); 427 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 428 // Maybe this has already been legalized into the constant pool? 429 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 430 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 431 return CFP->getValueAPF().isZero(); 432 } 433 return false; 434} 435 436/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 437/// true if Op is undef or if it matches the specified value. 438static bool isConstantOrUndef(SDValue Op, unsigned Val) { 439 return Op.getOpcode() == ISD::UNDEF || 440 cast<ConstantSDNode>(Op)->getValue() == Val; 441} 442 443/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 444/// VPKUHUM instruction. 445bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { 446 if (!isUnary) { 447 for (unsigned i = 0; i != 16; ++i) 448 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 449 return false; 450 } else { 451 for (unsigned i = 0; i != 8; ++i) 452 if (!isConstantOrUndef(N->getOperand(i), i*2+1) || 453 !isConstantOrUndef(N->getOperand(i+8), i*2+1)) 454 return false; 455 } 456 return true; 457} 458 459/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 460/// VPKUWUM instruction. 461bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { 462 if (!isUnary) { 463 for (unsigned i = 0; i != 16; i += 2) 464 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 465 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 466 return false; 467 } else { 468 for (unsigned i = 0; i != 8; i += 2) 469 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 470 !isConstantOrUndef(N->getOperand(i+1), i*2+3) || 471 !isConstantOrUndef(N->getOperand(i+8), i*2+2) || 472 !isConstantOrUndef(N->getOperand(i+9), i*2+3)) 473 return false; 474 } 475 return true; 476} 477 478/// isVMerge - Common function, used to match vmrg* shuffles. 479/// 480static bool isVMerge(SDNode *N, unsigned UnitSize, 481 unsigned LHSStart, unsigned RHSStart) { 482 assert(N->getOpcode() == ISD::BUILD_VECTOR && 483 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 484 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 485 "Unsupported merge size!"); 486 487 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 488 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 489 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 490 LHSStart+j+i*UnitSize) || 491 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 492 RHSStart+j+i*UnitSize)) 493 return false; 494 } 495 return true; 496} 497 498/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 499/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 500bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 501 if (!isUnary) 502 return isVMerge(N, UnitSize, 8, 24); 503 return isVMerge(N, UnitSize, 8, 8); 504} 505 506/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 507/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 508bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 509 if (!isUnary) 510 return isVMerge(N, UnitSize, 0, 16); 511 return isVMerge(N, UnitSize, 0, 0); 512} 513 514 515/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 516/// amount, otherwise return -1. 517int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 518 assert(N->getOpcode() == ISD::BUILD_VECTOR && 519 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 520 // Find the first non-undef value in the shuffle mask. 521 unsigned i; 522 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 523 /*search*/; 524 525 if (i == 16) return -1; // all undef. 526 527 // Otherwise, check to see if the rest of the elements are consequtively 528 // numbered from this value. 529 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 530 if (ShiftAmt < i) return -1; 531 ShiftAmt -= i; 532 533 if (!isUnary) { 534 // Check the rest of the elements to see if they are consequtive. 535 for (++i; i != 16; ++i) 536 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 537 return -1; 538 } else { 539 // Check the rest of the elements to see if they are consequtive. 540 for (++i; i != 16; ++i) 541 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 542 return -1; 543 } 544 545 return ShiftAmt; 546} 547 548/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 549/// specifies a splat of a single element that is suitable for input to 550/// VSPLTB/VSPLTH/VSPLTW. 551bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 552 assert(N->getOpcode() == ISD::BUILD_VECTOR && 553 N->getNumOperands() == 16 && 554 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 555 556 // This is a splat operation if each element of the permute is the same, and 557 // if the value doesn't reference the second vector. 558 unsigned ElementBase = 0; 559 SDValue Elt = N->getOperand(0); 560 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 561 ElementBase = EltV->getValue(); 562 else 563 return false; // FIXME: Handle UNDEF elements too! 564 565 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 566 return false; 567 568 // Check that they are consequtive. 569 for (unsigned i = 1; i != EltSize; ++i) { 570 if (!isa<ConstantSDNode>(N->getOperand(i)) || 571 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 572 return false; 573 } 574 575 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 576 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 577 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 578 assert(isa<ConstantSDNode>(N->getOperand(i)) && 579 "Invalid VECTOR_SHUFFLE mask!"); 580 for (unsigned j = 0; j != EltSize; ++j) 581 if (N->getOperand(i+j) != N->getOperand(j)) 582 return false; 583 } 584 585 return true; 586} 587 588/// isAllNegativeZeroVector - Returns true if all elements of build_vector 589/// are -0.0. 590bool PPC::isAllNegativeZeroVector(SDNode *N) { 591 assert(N->getOpcode() == ISD::BUILD_VECTOR); 592 if (PPC::isSplatShuffleMask(N, N->getNumOperands())) 593 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N)) 594 return CFP->getValueAPF().isNegZero(); 595 return false; 596} 597 598/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 599/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 600unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 601 assert(isSplatShuffleMask(N, EltSize)); 602 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 603} 604 605/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 606/// by using a vspltis[bhw] instruction of the specified element size, return 607/// the constant being splatted. The ByteSize field indicates the number of 608/// bytes of each element [124] -> [bhw]. 609SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 610 SDValue OpVal(0, 0); 611 612 // If ByteSize of the splat is bigger than the element size of the 613 // build_vector, then we have a case where we are checking for a splat where 614 // multiple elements of the buildvector are folded together into a single 615 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 616 unsigned EltSize = 16/N->getNumOperands(); 617 if (EltSize < ByteSize) { 618 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 619 SDValue UniquedVals[4]; 620 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 621 622 // See if all of the elements in the buildvector agree across. 623 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 624 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 625 // If the element isn't a constant, bail fully out. 626 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 627 628 629 if (UniquedVals[i&(Multiple-1)].getNode() == 0) 630 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 631 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 632 return SDValue(); // no match. 633 } 634 635 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 636 // either constant or undef values that are identical for each chunk. See 637 // if these chunks can form into a larger vspltis*. 638 639 // Check to see if all of the leading entries are either 0 or -1. If 640 // neither, then this won't fit into the immediate field. 641 bool LeadingZero = true; 642 bool LeadingOnes = true; 643 for (unsigned i = 0; i != Multiple-1; ++i) { 644 if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. 645 646 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 647 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 648 } 649 // Finally, check the least significant entry. 650 if (LeadingZero) { 651 if (UniquedVals[Multiple-1].getNode() == 0) 652 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 653 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue(); 654 if (Val < 16) 655 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 656 } 657 if (LeadingOnes) { 658 if (UniquedVals[Multiple-1].getNode() == 0) 659 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 660 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended(); 661 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 662 return DAG.getTargetConstant(Val, MVT::i32); 663 } 664 665 return SDValue(); 666 } 667 668 // Check to see if this buildvec has a single non-undef value in its elements. 669 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 670 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 671 if (OpVal.getNode() == 0) 672 OpVal = N->getOperand(i); 673 else if (OpVal != N->getOperand(i)) 674 return SDValue(); 675 } 676 677 if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. 678 679 unsigned ValSizeInBytes = 0; 680 uint64_t Value = 0; 681 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 682 Value = CN->getValue(); 683 ValSizeInBytes = CN->getValueType(0).getSizeInBits()/8; 684 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 685 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 686 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 687 ValSizeInBytes = 4; 688 } 689 690 // If the splat value is larger than the element value, then we can never do 691 // this splat. The only case that we could fit the replicated bits into our 692 // immediate field for would be zero, and we prefer to use vxor for it. 693 if (ValSizeInBytes < ByteSize) return SDValue(); 694 695 // If the element value is larger than the splat value, cut it in half and 696 // check to see if the two halves are equal. Continue doing this until we 697 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 698 while (ValSizeInBytes > ByteSize) { 699 ValSizeInBytes >>= 1; 700 701 // If the top half equals the bottom half, we're still ok. 702 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 703 (Value & ((1 << (8*ValSizeInBytes))-1))) 704 return SDValue(); 705 } 706 707 // Properly sign extend the value. 708 int ShAmt = (4-ByteSize)*8; 709 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 710 711 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 712 if (MaskVal == 0) return SDValue(); 713 714 // Finally, if this value fits in a 5 bit sext field, return it 715 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 716 return DAG.getTargetConstant(MaskVal, MVT::i32); 717 return SDValue(); 718} 719 720//===----------------------------------------------------------------------===// 721// Addressing Mode Selection 722//===----------------------------------------------------------------------===// 723 724/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 725/// or 64-bit immediate, and if the value can be accurately represented as a 726/// sign extension from a 16-bit value. If so, this returns true and the 727/// immediate. 728static bool isIntS16Immediate(SDNode *N, short &Imm) { 729 if (N->getOpcode() != ISD::Constant) 730 return false; 731 732 Imm = (short)cast<ConstantSDNode>(N)->getValue(); 733 if (N->getValueType(0) == MVT::i32) 734 return Imm == (int32_t)cast<ConstantSDNode>(N)->getValue(); 735 else 736 return Imm == (int64_t)cast<ConstantSDNode>(N)->getValue(); 737} 738static bool isIntS16Immediate(SDValue Op, short &Imm) { 739 return isIntS16Immediate(Op.getNode(), Imm); 740} 741 742 743/// SelectAddressRegReg - Given the specified addressed, check to see if it 744/// can be represented as an indexed [r+r] operation. Returns false if it 745/// can be more efficiently represented with [r+imm]. 746bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 747 SDValue &Index, 748 SelectionDAG &DAG) { 749 short imm = 0; 750 if (N.getOpcode() == ISD::ADD) { 751 if (isIntS16Immediate(N.getOperand(1), imm)) 752 return false; // r+i 753 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 754 return false; // r+i 755 756 Base = N.getOperand(0); 757 Index = N.getOperand(1); 758 return true; 759 } else if (N.getOpcode() == ISD::OR) { 760 if (isIntS16Immediate(N.getOperand(1), imm)) 761 return false; // r+i can fold it if we can. 762 763 // If this is an or of disjoint bitfields, we can codegen this as an add 764 // (for better address arithmetic) if the LHS and RHS of the OR are provably 765 // disjoint. 766 APInt LHSKnownZero, LHSKnownOne; 767 APInt RHSKnownZero, RHSKnownOne; 768 DAG.ComputeMaskedBits(N.getOperand(0), 769 APInt::getAllOnesValue(N.getOperand(0) 770 .getValueSizeInBits()), 771 LHSKnownZero, LHSKnownOne); 772 773 if (LHSKnownZero.getBoolValue()) { 774 DAG.ComputeMaskedBits(N.getOperand(1), 775 APInt::getAllOnesValue(N.getOperand(1) 776 .getValueSizeInBits()), 777 RHSKnownZero, RHSKnownOne); 778 // If all of the bits are known zero on the LHS or RHS, the add won't 779 // carry. 780 if (~(LHSKnownZero | RHSKnownZero) == 0) { 781 Base = N.getOperand(0); 782 Index = N.getOperand(1); 783 return true; 784 } 785 } 786 } 787 788 return false; 789} 790 791/// Returns true if the address N can be represented by a base register plus 792/// a signed 16-bit displacement [r+imm], and if it is not better 793/// represented as reg+reg. 794bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 795 SDValue &Base, SelectionDAG &DAG){ 796 // If this can be more profitably realized as r+r, fail. 797 if (SelectAddressRegReg(N, Disp, Base, DAG)) 798 return false; 799 800 if (N.getOpcode() == ISD::ADD) { 801 short imm = 0; 802 if (isIntS16Immediate(N.getOperand(1), imm)) { 803 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 804 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 805 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 806 } else { 807 Base = N.getOperand(0); 808 } 809 return true; // [r+i] 810 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 811 // Match LOAD (ADD (X, Lo(G))). 812 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 813 && "Cannot handle constant offsets yet!"); 814 Disp = N.getOperand(1).getOperand(0); // The global address. 815 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 816 Disp.getOpcode() == ISD::TargetConstantPool || 817 Disp.getOpcode() == ISD::TargetJumpTable); 818 Base = N.getOperand(0); 819 return true; // [&g+r] 820 } 821 } else if (N.getOpcode() == ISD::OR) { 822 short imm = 0; 823 if (isIntS16Immediate(N.getOperand(1), imm)) { 824 // If this is an or of disjoint bitfields, we can codegen this as an add 825 // (for better address arithmetic) if the LHS and RHS of the OR are 826 // provably disjoint. 827 APInt LHSKnownZero, LHSKnownOne; 828 DAG.ComputeMaskedBits(N.getOperand(0), 829 APInt::getAllOnesValue(N.getOperand(0) 830 .getValueSizeInBits()), 831 LHSKnownZero, LHSKnownOne); 832 833 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 834 // If all of the bits are known zero on the LHS or RHS, the add won't 835 // carry. 836 Base = N.getOperand(0); 837 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 838 return true; 839 } 840 } 841 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 842 // Loading from a constant address. 843 844 // If this address fits entirely in a 16-bit sext immediate field, codegen 845 // this as "d, 0" 846 short Imm; 847 if (isIntS16Immediate(CN, Imm)) { 848 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 849 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 850 return true; 851 } 852 853 // Handle 32-bit sext immediates with LIS + addr mode. 854 if (CN->getValueType(0) == MVT::i32 || 855 (int64_t)CN->getValue() == (int)CN->getValue()) { 856 int Addr = (int)CN->getValue(); 857 858 // Otherwise, break this down into an LIS + disp. 859 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 860 861 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 862 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 863 Base = SDValue(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 864 return true; 865 } 866 } 867 868 Disp = DAG.getTargetConstant(0, getPointerTy()); 869 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 870 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 871 else 872 Base = N; 873 return true; // [r+0] 874} 875 876/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 877/// represented as an indexed [r+r] operation. 878bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 879 SDValue &Index, 880 SelectionDAG &DAG) { 881 // Check to see if we can easily represent this as an [r+r] address. This 882 // will fail if it thinks that the address is more profitably represented as 883 // reg+imm, e.g. where imm = 0. 884 if (SelectAddressRegReg(N, Base, Index, DAG)) 885 return true; 886 887 // If the operand is an addition, always emit this as [r+r], since this is 888 // better (for code size, and execution, as the memop does the add for free) 889 // than emitting an explicit add. 890 if (N.getOpcode() == ISD::ADD) { 891 Base = N.getOperand(0); 892 Index = N.getOperand(1); 893 return true; 894 } 895 896 // Otherwise, do it the hard way, using R0 as the base register. 897 Base = DAG.getRegister(PPC::R0, N.getValueType()); 898 Index = N; 899 return true; 900} 901 902/// SelectAddressRegImmShift - Returns true if the address N can be 903/// represented by a base register plus a signed 14-bit displacement 904/// [r+imm*4]. Suitable for use by STD and friends. 905bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, 906 SDValue &Base, 907 SelectionDAG &DAG) { 908 // If this can be more profitably realized as r+r, fail. 909 if (SelectAddressRegReg(N, Disp, Base, DAG)) 910 return false; 911 912 if (N.getOpcode() == ISD::ADD) { 913 short imm = 0; 914 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 915 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 916 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 917 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 918 } else { 919 Base = N.getOperand(0); 920 } 921 return true; // [r+i] 922 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 923 // Match LOAD (ADD (X, Lo(G))). 924 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 925 && "Cannot handle constant offsets yet!"); 926 Disp = N.getOperand(1).getOperand(0); // The global address. 927 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 928 Disp.getOpcode() == ISD::TargetConstantPool || 929 Disp.getOpcode() == ISD::TargetJumpTable); 930 Base = N.getOperand(0); 931 return true; // [&g+r] 932 } 933 } else if (N.getOpcode() == ISD::OR) { 934 short imm = 0; 935 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 936 // If this is an or of disjoint bitfields, we can codegen this as an add 937 // (for better address arithmetic) if the LHS and RHS of the OR are 938 // provably disjoint. 939 APInt LHSKnownZero, LHSKnownOne; 940 DAG.ComputeMaskedBits(N.getOperand(0), 941 APInt::getAllOnesValue(N.getOperand(0) 942 .getValueSizeInBits()), 943 LHSKnownZero, LHSKnownOne); 944 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 945 // If all of the bits are known zero on the LHS or RHS, the add won't 946 // carry. 947 Base = N.getOperand(0); 948 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 949 return true; 950 } 951 } 952 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 953 // Loading from a constant address. Verify low two bits are clear. 954 if ((CN->getValue() & 3) == 0) { 955 // If this address fits entirely in a 14-bit sext immediate field, codegen 956 // this as "d, 0" 957 short Imm; 958 if (isIntS16Immediate(CN, Imm)) { 959 Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); 960 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 961 return true; 962 } 963 964 // Fold the low-part of 32-bit absolute addresses into addr mode. 965 if (CN->getValueType(0) == MVT::i32 || 966 (int64_t)CN->getValue() == (int)CN->getValue()) { 967 int Addr = (int)CN->getValue(); 968 969 // Otherwise, break this down into an LIS + disp. 970 Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); 971 972 Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); 973 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 974 Base = SDValue(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 975 return true; 976 } 977 } 978 } 979 980 Disp = DAG.getTargetConstant(0, getPointerTy()); 981 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 982 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 983 else 984 Base = N; 985 return true; // [r+0] 986} 987 988 989/// getPreIndexedAddressParts - returns true by value, base pointer and 990/// offset pointer and addressing mode by reference if the node's address 991/// can be legally represented as pre-indexed load / store address. 992bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 993 SDValue &Offset, 994 ISD::MemIndexedMode &AM, 995 SelectionDAG &DAG) { 996 // Disabled by default for now. 997 if (!EnablePPCPreinc) return false; 998 999 SDValue Ptr; 1000 MVT VT; 1001 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1002 Ptr = LD->getBasePtr(); 1003 VT = LD->getMemoryVT(); 1004 1005 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1006 ST = ST; 1007 Ptr = ST->getBasePtr(); 1008 VT = ST->getMemoryVT(); 1009 } else 1010 return false; 1011 1012 // PowerPC doesn't have preinc load/store instructions for vectors. 1013 if (VT.isVector()) 1014 return false; 1015 1016 // TODO: Check reg+reg first. 1017 1018 // LDU/STU use reg+imm*4, others use reg+imm. 1019 if (VT != MVT::i64) { 1020 // reg + imm 1021 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) 1022 return false; 1023 } else { 1024 // reg + imm * 4. 1025 if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) 1026 return false; 1027 } 1028 1029 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1030 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1031 // sext i32 to i64 when addr mode is r+i. 1032 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1033 LD->getExtensionType() == ISD::SEXTLOAD && 1034 isa<ConstantSDNode>(Offset)) 1035 return false; 1036 } 1037 1038 AM = ISD::PRE_INC; 1039 return true; 1040} 1041 1042//===----------------------------------------------------------------------===// 1043// LowerOperation implementation 1044//===----------------------------------------------------------------------===// 1045 1046SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1047 SelectionDAG &DAG) { 1048 MVT PtrVT = Op.getValueType(); 1049 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1050 Constant *C = CP->getConstVal(); 1051 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 1052 SDValue Zero = DAG.getConstant(0, PtrVT); 1053 1054 const TargetMachine &TM = DAG.getTarget(); 1055 1056 SDValue Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero); 1057 SDValue Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero); 1058 1059 // If this is a non-darwin platform, we don't support non-static relo models 1060 // yet. 1061 if (TM.getRelocationModel() == Reloc::Static || 1062 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1063 // Generate non-pic code that has direct accesses to the constant pool. 1064 // The address of the global is just (hi(&g)+lo(&g)). 1065 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1066 } 1067 1068 if (TM.getRelocationModel() == Reloc::PIC_) { 1069 // With PIC, the first instruction is actually "GR+hi(&G)". 1070 Hi = DAG.getNode(ISD::ADD, PtrVT, 1071 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1072 } 1073 1074 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1075 return Lo; 1076} 1077 1078SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) { 1079 MVT PtrVT = Op.getValueType(); 1080 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1081 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1082 SDValue Zero = DAG.getConstant(0, PtrVT); 1083 1084 const TargetMachine &TM = DAG.getTarget(); 1085 1086 SDValue Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero); 1087 SDValue Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero); 1088 1089 // If this is a non-darwin platform, we don't support non-static relo models 1090 // yet. 1091 if (TM.getRelocationModel() == Reloc::Static || 1092 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1093 // Generate non-pic code that has direct accesses to the constant pool. 1094 // The address of the global is just (hi(&g)+lo(&g)). 1095 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1096 } 1097 1098 if (TM.getRelocationModel() == Reloc::PIC_) { 1099 // With PIC, the first instruction is actually "GR+hi(&G)". 1100 Hi = DAG.getNode(ISD::ADD, PtrVT, 1101 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1102 } 1103 1104 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1105 return Lo; 1106} 1107 1108SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1109 SelectionDAG &DAG) { 1110 assert(0 && "TLS not implemented for PPC."); 1111 return SDValue(); // Not reached 1112} 1113 1114SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1115 SelectionDAG &DAG) { 1116 MVT PtrVT = Op.getValueType(); 1117 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1118 GlobalValue *GV = GSDN->getGlobal(); 1119 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); 1120 // If it's a debug information descriptor, don't mess with it. 1121 if (DAG.isVerifiedDebugInfoDesc(Op)) 1122 return GA; 1123 SDValue Zero = DAG.getConstant(0, PtrVT); 1124 1125 const TargetMachine &TM = DAG.getTarget(); 1126 1127 SDValue Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero); 1128 SDValue Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero); 1129 1130 // If this is a non-darwin platform, we don't support non-static relo models 1131 // yet. 1132 if (TM.getRelocationModel() == Reloc::Static || 1133 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1134 // Generate non-pic code that has direct accesses to globals. 1135 // The address of the global is just (hi(&g)+lo(&g)). 1136 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1137 } 1138 1139 if (TM.getRelocationModel() == Reloc::PIC_) { 1140 // With PIC, the first instruction is actually "GR+hi(&G)". 1141 Hi = DAG.getNode(ISD::ADD, PtrVT, 1142 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1143 } 1144 1145 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1146 1147 if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV)) 1148 return Lo; 1149 1150 // If the global is weak or external, we have to go through the lazy 1151 // resolution stub. 1152 return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0); 1153} 1154 1155SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { 1156 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1157 1158 // If we're comparing for equality to zero, expose the fact that this is 1159 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1160 // fold the new nodes. 1161 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1162 if (C->isNullValue() && CC == ISD::SETEQ) { 1163 MVT VT = Op.getOperand(0).getValueType(); 1164 SDValue Zext = Op.getOperand(0); 1165 if (VT.bitsLT(MVT::i32)) { 1166 VT = MVT::i32; 1167 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 1168 } 1169 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1170 SDValue Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 1171 SDValue Scc = DAG.getNode(ISD::SRL, VT, Clz, 1172 DAG.getConstant(Log2b, MVT::i32)); 1173 return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); 1174 } 1175 // Leave comparisons against 0 and -1 alone for now, since they're usually 1176 // optimized. FIXME: revisit this when we can custom lower all setcc 1177 // optimizations. 1178 if (C->isAllOnesValue() || C->isNullValue()) 1179 return SDValue(); 1180 } 1181 1182 // If we have an integer seteq/setne, turn it into a compare against zero 1183 // by xor'ing the rhs with the lhs, which is faster than setting a 1184 // condition register, reading it back out, and masking the correct bit. The 1185 // normal approach here uses sub to do this instead of xor. Using xor exposes 1186 // the result to other bit-twiddling opportunities. 1187 MVT LHSVT = Op.getOperand(0).getValueType(); 1188 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1189 MVT VT = Op.getValueType(); 1190 SDValue Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0), 1191 Op.getOperand(1)); 1192 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 1193 } 1194 return SDValue(); 1195} 1196 1197SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1198 int VarArgsFrameIndex, 1199 int VarArgsStackOffset, 1200 unsigned VarArgsNumGPR, 1201 unsigned VarArgsNumFPR, 1202 const PPCSubtarget &Subtarget) { 1203 1204 assert(0 && "VAARG in ELF32 ABI not implemented yet!"); 1205 return SDValue(); // Not reached 1206} 1207 1208SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 1209 int VarArgsFrameIndex, 1210 int VarArgsStackOffset, 1211 unsigned VarArgsNumGPR, 1212 unsigned VarArgsNumFPR, 1213 const PPCSubtarget &Subtarget) { 1214 1215 if (Subtarget.isMachoABI()) { 1216 // vastart just stores the address of the VarArgsFrameIndex slot into the 1217 // memory location argument. 1218 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1219 SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1220 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1221 return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0); 1222 } 1223 1224 // For ELF 32 ABI we follow the layout of the va_list struct. 1225 // We suppose the given va_list is already allocated. 1226 // 1227 // typedef struct { 1228 // char gpr; /* index into the array of 8 GPRs 1229 // * stored in the register save area 1230 // * gpr=0 corresponds to r3, 1231 // * gpr=1 to r4, etc. 1232 // */ 1233 // char fpr; /* index into the array of 8 FPRs 1234 // * stored in the register save area 1235 // * fpr=0 corresponds to f1, 1236 // * fpr=1 to f2, etc. 1237 // */ 1238 // char *overflow_arg_area; 1239 // /* location on stack that holds 1240 // * the next overflow argument 1241 // */ 1242 // char *reg_save_area; 1243 // /* where r3:r10 and f1:f8 (if saved) 1244 // * are stored 1245 // */ 1246 // } va_list[1]; 1247 1248 1249 SDValue ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); 1250 SDValue ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); 1251 1252 1253 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1254 1255 SDValue StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); 1256 SDValue FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1257 1258 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 1259 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1260 1261 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 1262 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1263 1264 uint64_t FPROffset = 1; 1265 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1266 1267 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1268 1269 // Store first byte : number of int regs 1270 SDValue firstStore = DAG.getStore(Op.getOperand(0), ArgGPR, 1271 Op.getOperand(1), SV, 0); 1272 uint64_t nextOffset = FPROffset; 1273 SDValue nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1), 1274 ConstFPROffset); 1275 1276 // Store second byte : number of float regs 1277 SDValue secondStore = 1278 DAG.getStore(firstStore, ArgFPR, nextPtr, SV, nextOffset); 1279 nextOffset += StackOffset; 1280 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset); 1281 1282 // Store second word : arguments given on stack 1283 SDValue thirdStore = 1284 DAG.getStore(secondStore, StackOffsetFI, nextPtr, SV, nextOffset); 1285 nextOffset += FrameOffset; 1286 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset); 1287 1288 // Store third word : arguments given in registers 1289 return DAG.getStore(thirdStore, FR, nextPtr, SV, nextOffset); 1290 1291} 1292 1293#include "PPCGenCallingConv.inc" 1294 1295/// GetFPR - Get the set of FP registers that should be allocated for arguments, 1296/// depending on which subtarget is selected. 1297static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { 1298 if (Subtarget.isMachoABI()) { 1299 static const unsigned FPR[] = { 1300 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1301 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1302 }; 1303 return FPR; 1304 } 1305 1306 1307 static const unsigned FPR[] = { 1308 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1309 PPC::F8 1310 }; 1311 return FPR; 1312} 1313 1314/// CalculateStackSlotSize - Calculates the size reserved for this argument on 1315/// the stack. 1316static unsigned CalculateStackSlotSize(SDValue Arg, SDValue Flag, 1317 bool isVarArg, unsigned PtrByteSize) { 1318 MVT ArgVT = Arg.getValueType(); 1319 ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Flag)->getArgFlags(); 1320 unsigned ArgSize =ArgVT.getSizeInBits()/8; 1321 if (Flags.isByVal()) 1322 ArgSize = Flags.getByValSize(); 1323 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1324 1325 return ArgSize; 1326} 1327 1328SDValue 1329PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, 1330 SelectionDAG &DAG, 1331 int &VarArgsFrameIndex, 1332 int &VarArgsStackOffset, 1333 unsigned &VarArgsNumGPR, 1334 unsigned &VarArgsNumFPR, 1335 const PPCSubtarget &Subtarget) { 1336 // TODO: add description of PPC stack frame format, or at least some docs. 1337 // 1338 MachineFunction &MF = DAG.getMachineFunction(); 1339 MachineFrameInfo *MFI = MF.getFrameInfo(); 1340 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1341 SmallVector<SDValue, 8> ArgValues; 1342 SDValue Root = Op.getOperand(0); 1343 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1344 1345 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1346 bool isPPC64 = PtrVT == MVT::i64; 1347 bool isMachoABI = Subtarget.isMachoABI(); 1348 bool isELF32_ABI = Subtarget.isELF32_ABI(); 1349 // Potential tail calls could cause overwriting of argument stack slots. 1350 unsigned CC = MF.getFunction()->getCallingConv(); 1351 bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); 1352 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1353 1354 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1355 // Area that is at least reserved in caller of this function. 1356 unsigned MinReservedArea = ArgOffset; 1357 1358 static const unsigned GPR_32[] = { // 32-bit registers. 1359 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1360 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1361 }; 1362 static const unsigned GPR_64[] = { // 64-bit registers. 1363 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 1364 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 1365 }; 1366 1367 static const unsigned *FPR = GetFPR(Subtarget); 1368 1369 static const unsigned VR[] = { 1370 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 1371 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 1372 }; 1373 1374 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 1375 const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8; 1376 const unsigned Num_VR_Regs = array_lengthof( VR); 1377 1378 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 1379 1380 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 1381 1382 // In 32-bit non-varargs functions, the stack space for vectors is after the 1383 // stack space for non-vectors. We do not use this space unless we have 1384 // too many vectors to fit in registers, something that only occurs in 1385 // constructed examples:), but we have to walk the arglist to figure 1386 // that out...for the pathological case, compute VecArgOffset as the 1387 // start of the vector parameter area. Computing VecArgOffset is the 1388 // entire point of the following loop. 1389 // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying 1390 // to handle Elf here. 1391 unsigned VecArgOffset = ArgOffset; 1392 if (!isVarArg && !isPPC64) { 1393 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues()-1; ArgNo != e; 1394 ++ArgNo) { 1395 MVT ObjectVT = Op.getValue(ArgNo).getValueType(); 1396 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1397 ISD::ArgFlagsTy Flags = 1398 cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); 1399 1400 if (Flags.isByVal()) { 1401 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 1402 ObjSize = Flags.getByValSize(); 1403 unsigned ArgSize = 1404 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1405 VecArgOffset += ArgSize; 1406 continue; 1407 } 1408 1409 switch(ObjectVT.getSimpleVT()) { 1410 default: assert(0 && "Unhandled argument type!"); 1411 case MVT::i32: 1412 case MVT::f32: 1413 VecArgOffset += isPPC64 ? 8 : 4; 1414 break; 1415 case MVT::i64: // PPC64 1416 case MVT::f64: 1417 VecArgOffset += 8; 1418 break; 1419 case MVT::v4f32: 1420 case MVT::v4i32: 1421 case MVT::v8i16: 1422 case MVT::v16i8: 1423 // Nothing to do, we're only looking at Nonvector args here. 1424 break; 1425 } 1426 } 1427 } 1428 // We've found where the vector parameter area in memory is. Skip the 1429 // first 12 parameters; these don't use that memory. 1430 VecArgOffset = ((VecArgOffset+15)/16)*16; 1431 VecArgOffset += 12*16; 1432 1433 // Add DAG nodes to load the arguments or copy them out of registers. On 1434 // entry to a function on PPC, the arguments start after the linkage area, 1435 // although the first ones are often in registers. 1436 // 1437 // In the ELF 32 ABI, GPRs and stack are double word align: an argument 1438 // represented with two words (long long or double) must be copied to an 1439 // even GPR_idx value or to an even ArgOffset value. 1440 1441 SmallVector<SDValue, 8> MemOps; 1442 unsigned nAltivecParamsAtEnd = 0; 1443 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1; 1444 ArgNo != e; ++ArgNo) { 1445 SDValue ArgVal; 1446 bool needsLoad = false; 1447 MVT ObjectVT = Op.getValue(ArgNo).getValueType(); 1448 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1449 unsigned ArgSize = ObjSize; 1450 ISD::ArgFlagsTy Flags = 1451 cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); 1452 // See if next argument requires stack alignment in ELF 1453 bool Align = Flags.isSplit(); 1454 1455 unsigned CurArgOffset = ArgOffset; 1456 1457 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 1458 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 1459 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 1460 if (isVarArg || isPPC64) { 1461 MinReservedArea = ((MinReservedArea+15)/16)*16; 1462 MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), 1463 Op.getOperand(ArgNo+3), 1464 isVarArg, 1465 PtrByteSize); 1466 } else nAltivecParamsAtEnd++; 1467 } else 1468 // Calculate min reserved area. 1469 MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), 1470 Op.getOperand(ArgNo+3), 1471 isVarArg, 1472 PtrByteSize); 1473 1474 // FIXME alignment for ELF may not be right 1475 // FIXME the codegen can be much improved in some cases. 1476 // We do not have to keep everything in memory. 1477 if (Flags.isByVal()) { 1478 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 1479 ObjSize = Flags.getByValSize(); 1480 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1481 // Double word align in ELF 1482 if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); 1483 // Objects of size 1 and 2 are right justified, everything else is 1484 // left justified. This means the memory address is adjusted forwards. 1485 if (ObjSize==1 || ObjSize==2) { 1486 CurArgOffset = CurArgOffset + (4 - ObjSize); 1487 } 1488 // The value of the object is its address. 1489 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); 1490 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1491 ArgValues.push_back(FIN); 1492 if (ObjSize==1 || ObjSize==2) { 1493 if (GPR_idx != Num_GPR_Regs) { 1494 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1495 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1496 SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1497 SDValue Store = DAG.getTruncStore(Val.getValue(1), Val, FIN, 1498 NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); 1499 MemOps.push_back(Store); 1500 ++GPR_idx; 1501 if (isMachoABI) ArgOffset += PtrByteSize; 1502 } else { 1503 ArgOffset += PtrByteSize; 1504 } 1505 continue; 1506 } 1507 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 1508 // Store whatever pieces of the object are in registers 1509 // to memory. ArgVal will be address of the beginning of 1510 // the object. 1511 if (GPR_idx != Num_GPR_Regs) { 1512 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1513 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1514 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); 1515 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1516 SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1517 SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1518 MemOps.push_back(Store); 1519 ++GPR_idx; 1520 if (isMachoABI) ArgOffset += PtrByteSize; 1521 } else { 1522 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 1523 break; 1524 } 1525 } 1526 continue; 1527 } 1528 1529 switch (ObjectVT.getSimpleVT()) { 1530 default: assert(0 && "Unhandled argument type!"); 1531 case MVT::i32: 1532 if (!isPPC64) { 1533 // Double word align in ELF 1534 if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); 1535 1536 if (GPR_idx != Num_GPR_Regs) { 1537 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1538 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1539 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); 1540 ++GPR_idx; 1541 } else { 1542 needsLoad = true; 1543 ArgSize = PtrByteSize; 1544 } 1545 // Stack align in ELF 1546 if (needsLoad && Align && isELF32_ABI) 1547 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1548 // All int arguments reserve stack space in Macho ABI. 1549 if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; 1550 break; 1551 } 1552 // FALLTHROUGH 1553 case MVT::i64: // PPC64 1554 if (GPR_idx != Num_GPR_Regs) { 1555 unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1556 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1557 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1558 1559 if (ObjectVT == MVT::i32) { 1560 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 1561 // value to MVT::i64 and then truncate to the correct register size. 1562 if (Flags.isSExt()) 1563 ArgVal = DAG.getNode(ISD::AssertSext, MVT::i64, ArgVal, 1564 DAG.getValueType(ObjectVT)); 1565 else if (Flags.isZExt()) 1566 ArgVal = DAG.getNode(ISD::AssertZext, MVT::i64, ArgVal, 1567 DAG.getValueType(ObjectVT)); 1568 1569 ArgVal = DAG.getNode(ISD::TRUNCATE, MVT::i32, ArgVal); 1570 } 1571 1572 ++GPR_idx; 1573 } else { 1574 needsLoad = true; 1575 ArgSize = PtrByteSize; 1576 } 1577 // All int arguments reserve stack space in Macho ABI. 1578 if (isMachoABI || needsLoad) ArgOffset += 8; 1579 break; 1580 1581 case MVT::f32: 1582 case MVT::f64: 1583 // Every 4 bytes of argument space consumes one of the GPRs available for 1584 // argument passing. 1585 if (GPR_idx != Num_GPR_Regs && isMachoABI) { 1586 ++GPR_idx; 1587 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 1588 ++GPR_idx; 1589 } 1590 if (FPR_idx != Num_FPR_Regs) { 1591 unsigned VReg; 1592 if (ObjectVT == MVT::f32) 1593 VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass); 1594 else 1595 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1596 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1597 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1598 ++FPR_idx; 1599 } else { 1600 needsLoad = true; 1601 } 1602 1603 // Stack align in ELF 1604 if (needsLoad && Align && isELF32_ABI) 1605 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1606 // All FP arguments reserve stack space in Macho ABI. 1607 if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize; 1608 break; 1609 case MVT::v4f32: 1610 case MVT::v4i32: 1611 case MVT::v8i16: 1612 case MVT::v16i8: 1613 // Note that vector arguments in registers don't reserve stack space, 1614 // except in varargs functions. 1615 if (VR_idx != Num_VR_Regs) { 1616 unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); 1617 RegInfo.addLiveIn(VR[VR_idx], VReg); 1618 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1619 if (isVarArg) { 1620 while ((ArgOffset % 16) != 0) { 1621 ArgOffset += PtrByteSize; 1622 if (GPR_idx != Num_GPR_Regs) 1623 GPR_idx++; 1624 } 1625 ArgOffset += 16; 1626 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); 1627 } 1628 ++VR_idx; 1629 } else { 1630 if (!isVarArg && !isPPC64) { 1631 // Vectors go after all the nonvectors. 1632 CurArgOffset = VecArgOffset; 1633 VecArgOffset += 16; 1634 } else { 1635 // Vectors are aligned. 1636 ArgOffset = ((ArgOffset+15)/16)*16; 1637 CurArgOffset = ArgOffset; 1638 ArgOffset += 16; 1639 } 1640 needsLoad = true; 1641 } 1642 break; 1643 } 1644 1645 // We need to load the argument to a virtual register if we determined above 1646 // that we ran out of physical registers of the appropriate type. 1647 if (needsLoad) { 1648 int FI = MFI->CreateFixedObject(ObjSize, 1649 CurArgOffset + (ArgSize - ObjSize), 1650 isImmutable); 1651 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1652 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); 1653 } 1654 1655 ArgValues.push_back(ArgVal); 1656 } 1657 1658 // Set the size that is at least reserved in caller of this function. Tail 1659 // call optimized function's reserved stack space needs to be aligned so that 1660 // taking the difference between two stack areas will result in an aligned 1661 // stack. 1662 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1663 // Add the Altivec parameters at the end, if needed. 1664 if (nAltivecParamsAtEnd) { 1665 MinReservedArea = ((MinReservedArea+15)/16)*16; 1666 MinReservedArea += 16*nAltivecParamsAtEnd; 1667 } 1668 MinReservedArea = 1669 std::max(MinReservedArea, 1670 PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); 1671 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> 1672 getStackAlignment(); 1673 unsigned AlignMask = TargetAlign-1; 1674 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 1675 FI->setMinReservedArea(MinReservedArea); 1676 1677 // If the function takes variable number of arguments, make a frame index for 1678 // the start of the first vararg value... for expansion of llvm.va_start. 1679 if (isVarArg) { 1680 1681 int depth; 1682 if (isELF32_ABI) { 1683 VarArgsNumGPR = GPR_idx; 1684 VarArgsNumFPR = FPR_idx; 1685 1686 // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame 1687 // pointer. 1688 depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 + 1689 Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 + 1690 PtrVT.getSizeInBits()/8); 1691 1692 VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 1693 ArgOffset); 1694 1695 } 1696 else 1697 depth = ArgOffset; 1698 1699 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 1700 depth); 1701 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1702 1703 // In ELF 32 ABI, the fixed integer arguments of a variadic function are 1704 // stored to the VarArgsFrameIndex on the stack. 1705 if (isELF32_ABI) { 1706 for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { 1707 SDValue Val = DAG.getRegister(GPR[GPR_idx], PtrVT); 1708 SDValue Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1709 MemOps.push_back(Store); 1710 // Increment the address by four for the next argument to store 1711 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 1712 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1713 } 1714 } 1715 1716 // If this function is vararg, store any remaining integer argument regs 1717 // to their spots on the stack so that they may be loaded by deferencing the 1718 // result of va_next. 1719 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 1720 unsigned VReg; 1721 if (isPPC64) 1722 VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1723 else 1724 VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1725 1726 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1727 SDValue Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1728 SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1729 MemOps.push_back(Store); 1730 // Increment the address by four for the next argument to store 1731 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 1732 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1733 } 1734 1735 // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex 1736 // on the stack. 1737 if (isELF32_ABI) { 1738 for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { 1739 SDValue Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); 1740 SDValue Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1741 MemOps.push_back(Store); 1742 // Increment the address by eight for the next argument to store 1743 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 1744 PtrVT); 1745 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1746 } 1747 1748 for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { 1749 unsigned VReg; 1750 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1751 1752 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1753 SDValue Val = DAG.getCopyFromReg(Root, VReg, MVT::f64); 1754 SDValue Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1755 MemOps.push_back(Store); 1756 // Increment the address by eight for the next argument to store 1757 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 1758 PtrVT); 1759 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1760 } 1761 } 1762 } 1763 1764 if (!MemOps.empty()) 1765 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); 1766 1767 ArgValues.push_back(Root); 1768 1769 // Return the new list of results. 1770 return DAG.getMergeValues(Op.getNode()->getVTList(), &ArgValues[0], 1771 ArgValues.size()); 1772} 1773 1774/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus 1775/// linkage area. 1776static unsigned 1777CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, 1778 bool isPPC64, 1779 bool isMachoABI, 1780 bool isVarArg, 1781 unsigned CC, 1782 SDValue Call, 1783 unsigned &nAltivecParamsAtEnd) { 1784 // Count how many bytes are to be pushed on the stack, including the linkage 1785 // area, and parameter passing area. We start with 24/48 bytes, which is 1786 // prereserved space for [SP][CR][LR][3 x unused]. 1787 unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1788 unsigned NumOps = (Call.getNumOperands() - 5) / 2; 1789 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1790 1791 // Add up all the space actually used. 1792 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 1793 // they all go in registers, but we must reserve stack space for them for 1794 // possible use by the caller. In varargs or 64-bit calls, parameters are 1795 // assigned stack space in order, with padding so Altivec parameters are 1796 // 16-byte aligned. 1797 nAltivecParamsAtEnd = 0; 1798 for (unsigned i = 0; i != NumOps; ++i) { 1799 SDValue Arg = Call.getOperand(5+2*i); 1800 SDValue Flag = Call.getOperand(5+2*i+1); 1801 MVT ArgVT = Arg.getValueType(); 1802 // Varargs Altivec parameters are padded to a 16 byte boundary. 1803 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 1804 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { 1805 if (!isVarArg && !isPPC64) { 1806 // Non-varargs Altivec parameters go after all the non-Altivec 1807 // parameters; handle those later so we know how much padding we need. 1808 nAltivecParamsAtEnd++; 1809 continue; 1810 } 1811 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 1812 NumBytes = ((NumBytes+15)/16)*16; 1813 } 1814 NumBytes += CalculateStackSlotSize(Arg, Flag, isVarArg, PtrByteSize); 1815 } 1816 1817 // Allow for Altivec parameters at the end, if needed. 1818 if (nAltivecParamsAtEnd) { 1819 NumBytes = ((NumBytes+15)/16)*16; 1820 NumBytes += 16*nAltivecParamsAtEnd; 1821 } 1822 1823 // The prolog code of the callee may store up to 8 GPR argument registers to 1824 // the stack, allowing va_start to index over them in memory if its varargs. 1825 // Because we cannot tell if this is needed on the caller side, we have to 1826 // conservatively assume that it is needed. As such, make sure we have at 1827 // least enough stack space for the caller to store the 8 GPRs. 1828 NumBytes = std::max(NumBytes, 1829 PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); 1830 1831 // Tail call needs the stack to be aligned. 1832 if (CC==CallingConv::Fast && PerformTailCallOpt) { 1833 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> 1834 getStackAlignment(); 1835 unsigned AlignMask = TargetAlign-1; 1836 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 1837 } 1838 1839 return NumBytes; 1840} 1841 1842/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 1843/// adjusted to accomodate the arguments for the tailcall. 1844static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall, 1845 unsigned ParamSize) { 1846 1847 if (!IsTailCall) return 0; 1848 1849 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 1850 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 1851 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 1852 // Remember only if the new adjustement is bigger. 1853 if (SPDiff < FI->getTailCallSPDelta()) 1854 FI->setTailCallSPDelta(SPDiff); 1855 1856 return SPDiff; 1857} 1858 1859/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1860/// following the call is a return. A function is eligible if caller/callee 1861/// calling conventions match, currently only fastcc supports tail calls, and 1862/// the function CALL is immediatly followed by a RET. 1863bool 1864PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Call, 1865 SDValue Ret, 1866 SelectionDAG& DAG) const { 1867 // Variable argument functions are not supported. 1868 if (!PerformTailCallOpt || 1869 cast<ConstantSDNode>(Call.getOperand(2))->getValue() != 0) return false; 1870 1871 if (CheckTailCallReturnConstraints(Call, Ret)) { 1872 MachineFunction &MF = DAG.getMachineFunction(); 1873 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1874 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1875 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1876 // Functions containing by val parameters are not supported. 1877 for (unsigned i = 0; i != ((Call.getNumOperands()-5)/2); i++) { 1878 ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Call.getOperand(5+2*i+1)) 1879 ->getArgFlags(); 1880 if (Flags.isByVal()) return false; 1881 } 1882 1883 SDValue Callee = Call.getOperand(4); 1884 // Non PIC/GOT tail calls are supported. 1885 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 1886 return true; 1887 1888 // At the moment we can only do local tail calls (in same module, hidden 1889 // or protected) if we are generating PIC. 1890 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1891 return G->getGlobal()->hasHiddenVisibility() 1892 || G->getGlobal()->hasProtectedVisibility(); 1893 } 1894 } 1895 1896 return false; 1897} 1898 1899/// isCallCompatibleAddress - Return the immediate to use if the specified 1900/// 32-bit value is representable in the immediate field of a BxA instruction. 1901static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 1902 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1903 if (!C) return 0; 1904 1905 int Addr = C->getValue(); 1906 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1907 (Addr << 6 >> 6) != Addr) 1908 return 0; // Top 6 bits have to be sext of immediate. 1909 1910 return DAG.getConstant((int)C->getValue() >> 2, 1911 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 1912} 1913 1914namespace { 1915 1916struct TailCallArgumentInfo { 1917 SDValue Arg; 1918 SDValue FrameIdxOp; 1919 int FrameIdx; 1920 1921 TailCallArgumentInfo() : FrameIdx(0) {} 1922}; 1923 1924} 1925 1926/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 1927static void 1928StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 1929 SDValue Chain, 1930 const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs, 1931 SmallVector<SDValue, 8> &MemOpChains) { 1932 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 1933 SDValue Arg = TailCallArgs[i].Arg; 1934 SDValue FIN = TailCallArgs[i].FrameIdxOp; 1935 int FI = TailCallArgs[i].FrameIdx; 1936 // Store relative to framepointer. 1937 MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, 1938 PseudoSourceValue::getFixedStack(FI), 1939 0)); 1940 } 1941} 1942 1943/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 1944/// the appropriate stack slot for the tail call optimized function call. 1945static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 1946 MachineFunction &MF, 1947 SDValue Chain, 1948 SDValue OldRetAddr, 1949 SDValue OldFP, 1950 int SPDiff, 1951 bool isPPC64, 1952 bool isMachoABI) { 1953 if (SPDiff) { 1954 // Calculate the new stack slot for the return address. 1955 int SlotSize = isPPC64 ? 8 : 4; 1956 int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, 1957 isMachoABI); 1958 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 1959 NewRetAddrLoc); 1960 int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, 1961 isMachoABI); 1962 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); 1963 1964 MVT VT = isPPC64 ? MVT::i64 : MVT::i32; 1965 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 1966 Chain = DAG.getStore(Chain, OldRetAddr, NewRetAddrFrIdx, 1967 PseudoSourceValue::getFixedStack(NewRetAddr), 0); 1968 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 1969 Chain = DAG.getStore(Chain, OldFP, NewFramePtrIdx, 1970 PseudoSourceValue::getFixedStack(NewFPIdx), 0); 1971 } 1972 return Chain; 1973} 1974 1975/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 1976/// the position of the argument. 1977static void 1978CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 1979 SDValue Arg, int SPDiff, unsigned ArgOffset, 1980 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 1981 int Offset = ArgOffset + SPDiff; 1982 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 1983 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1984 MVT VT = isPPC64 ? MVT::i64 : MVT::i32; 1985 SDValue FIN = DAG.getFrameIndex(FI, VT); 1986 TailCallArgumentInfo Info; 1987 Info.Arg = Arg; 1988 Info.FrameIdxOp = FIN; 1989 Info.FrameIdx = FI; 1990 TailCallArguments.push_back(Info); 1991} 1992 1993/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 1994/// stack slot. Returns the chain as result and the loaded frame pointers in 1995/// LROpOut/FPOpout. Used when tail calling. 1996SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 1997 int SPDiff, 1998 SDValue Chain, 1999 SDValue &LROpOut, 2000 SDValue &FPOpOut) { 2001 if (SPDiff) { 2002 // Load the LR and FP stack slot for later adjusting. 2003 MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; 2004 LROpOut = getReturnAddrFrameIndex(DAG); 2005 LROpOut = DAG.getLoad(VT, Chain, LROpOut, NULL, 0); 2006 Chain = SDValue(LROpOut.getNode(), 1); 2007 FPOpOut = getFramePointerFrameIndex(DAG); 2008 FPOpOut = DAG.getLoad(VT, Chain, FPOpOut, NULL, 0); 2009 Chain = SDValue(FPOpOut.getNode(), 1); 2010 } 2011 return Chain; 2012} 2013 2014/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 2015/// by "Src" to address "Dst" of size "Size". Alignment information is 2016/// specified by the specific parameter attribute. The copy will be passed as 2017/// a byval function parameter. 2018/// Sometimes what we are copying is the end of a larger object, the part that 2019/// does not fit in registers. 2020static SDValue 2021CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 2022 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 2023 unsigned Size) { 2024 SDValue SizeNode = DAG.getConstant(Size, MVT::i32); 2025 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false, 2026 NULL, 0, NULL, 0); 2027} 2028 2029/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 2030/// tail calls. 2031static void 2032LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 2033 SDValue Arg, SDValue PtrOff, int SPDiff, 2034 unsigned ArgOffset, bool isPPC64, bool isTailCall, 2035 bool isVector, SmallVector<SDValue, 8> &MemOpChains, 2036 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 2037 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2038 if (!isTailCall) { 2039 if (isVector) { 2040 SDValue StackPtr; 2041 if (isPPC64) 2042 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 2043 else 2044 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 2045 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, 2046 DAG.getConstant(ArgOffset, PtrVT)); 2047 } 2048 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 2049 // Calculate and remember argument location. 2050 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 2051 TailCallArguments); 2052} 2053 2054SDValue PPCTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG, 2055 const PPCSubtarget &Subtarget, 2056 TargetMachine &TM) { 2057 SDValue Chain = Op.getOperand(0); 2058 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 2059 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2060 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 && 2061 CC == CallingConv::Fast && PerformTailCallOpt; 2062 SDValue Callee = Op.getOperand(4); 2063 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 2064 2065 bool isMachoABI = Subtarget.isMachoABI(); 2066 bool isELF32_ABI = Subtarget.isELF32_ABI(); 2067 2068 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2069 bool isPPC64 = PtrVT == MVT::i64; 2070 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2071 2072 MachineFunction &MF = DAG.getMachineFunction(); 2073 2074 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 2075 // SelectExpr to use to put the arguments in the appropriate registers. 2076 std::vector<SDValue> args_to_use; 2077 2078 // Mark this function as potentially containing a function that contains a 2079 // tail call. As a consequence the frame pointer will be used for dynamicalloc 2080 // and restoring the callers stack pointer in this functions epilog. This is 2081 // done because by tail calling the called function might overwrite the value 2082 // in this function's (MF) stack pointer stack slot 0(SP). 2083 if (PerformTailCallOpt && CC==CallingConv::Fast) 2084 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 2085 2086 unsigned nAltivecParamsAtEnd = 0; 2087 2088 // Count how many bytes are to be pushed on the stack, including the linkage 2089 // area, and parameter passing area. We start with 24/48 bytes, which is 2090 // prereserved space for [SP][CR][LR][3 x unused]. 2091 unsigned NumBytes = 2092 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC, 2093 Op, nAltivecParamsAtEnd); 2094 2095 // Calculate by how many bytes the stack has to be adjusted in case of tail 2096 // call optimization. 2097 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 2098 2099 // Adjust the stack pointer for the new arguments... 2100 // These operations are automatically eliminated by the prolog/epilog pass 2101 Chain = DAG.getCALLSEQ_START(Chain, 2102 DAG.getConstant(NumBytes, PtrVT)); 2103 SDValue CallSeqStart = Chain; 2104 2105 // Load the return address and frame pointer so it can be move somewhere else 2106 // later. 2107 SDValue LROp, FPOp; 2108 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp); 2109 2110 // Set up a copy of the stack pointer for use loading and storing any 2111 // arguments that may not fit in the registers available for argument 2112 // passing. 2113 SDValue StackPtr; 2114 if (isPPC64) 2115 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 2116 else 2117 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 2118 2119 // Figure out which arguments are going to go in registers, and which in 2120 // memory. Also, if this is a vararg function, floating point operations 2121 // must be stored to our stack, and loaded into integer regs as well, if 2122 // any integer regs are available for argument passing. 2123 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 2124 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2125 2126 static const unsigned GPR_32[] = { // 32-bit registers. 2127 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2128 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2129 }; 2130 static const unsigned GPR_64[] = { // 64-bit registers. 2131 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2132 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2133 }; 2134 static const unsigned *FPR = GetFPR(Subtarget); 2135 2136 static const unsigned VR[] = { 2137 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2138 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2139 }; 2140 const unsigned NumGPRs = array_lengthof(GPR_32); 2141 const unsigned NumFPRs = isMachoABI ? 13 : 8; 2142 const unsigned NumVRs = array_lengthof( VR); 2143 2144 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 2145 2146 std::vector<std::pair<unsigned, SDValue> > RegsToPass; 2147 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 2148 2149 SmallVector<SDValue, 8> MemOpChains; 2150 for (unsigned i = 0; i != NumOps; ++i) { 2151 bool inMem = false; 2152 SDValue Arg = Op.getOperand(5+2*i); 2153 ISD::ArgFlagsTy Flags = 2154 cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags(); 2155 // See if next argument requires stack alignment in ELF 2156 bool Align = Flags.isSplit(); 2157 2158 // PtrOff will be used to store the current argument to the stack if a 2159 // register cannot be found for it. 2160 SDValue PtrOff; 2161 2162 // Stack align in ELF 32 2163 if (isELF32_ABI && Align) 2164 PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, 2165 StackPtr.getValueType()); 2166 else 2167 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 2168 2169 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); 2170 2171 // On PPC64, promote integers to 64-bit values. 2172 if (isPPC64 && Arg.getValueType() == MVT::i32) { 2173 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 2174 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 2175 Arg = DAG.getNode(ExtOp, MVT::i64, Arg); 2176 } 2177 2178 // FIXME Elf untested, what are alignment rules? 2179 // FIXME memcpy is used way more than necessary. Correctness first. 2180 if (Flags.isByVal()) { 2181 unsigned Size = Flags.getByValSize(); 2182 if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); 2183 if (Size==1 || Size==2) { 2184 // Very small objects are passed right-justified. 2185 // Everything else is passed left-justified. 2186 MVT VT = (Size==1) ? MVT::i8 : MVT::i16; 2187 if (GPR_idx != NumGPRs) { 2188 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, Chain, Arg, 2189 NULL, 0, VT); 2190 MemOpChains.push_back(Load.getValue(1)); 2191 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2192 if (isMachoABI) 2193 ArgOffset += PtrByteSize; 2194 } else { 2195 SDValue Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); 2196 SDValue AddPtr = DAG.getNode(ISD::ADD, PtrVT, PtrOff, Const); 2197 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, 2198 CallSeqStart.getNode()->getOperand(0), 2199 Flags, DAG, Size); 2200 // This must go outside the CALLSEQ_START..END. 2201 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 2202 CallSeqStart.getNode()->getOperand(1)); 2203 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 2204 NewCallSeqStart.getNode()); 2205 Chain = CallSeqStart = NewCallSeqStart; 2206 ArgOffset += PtrByteSize; 2207 } 2208 continue; 2209 } 2210 // Copy entire object into memory. There are cases where gcc-generated 2211 // code assumes it is there, even if it could be put entirely into 2212 // registers. (This is not what the doc says.) 2213 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 2214 CallSeqStart.getNode()->getOperand(0), 2215 Flags, DAG, Size); 2216 // This must go outside the CALLSEQ_START..END. 2217 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 2218 CallSeqStart.getNode()->getOperand(1)); 2219 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), NewCallSeqStart.getNode()); 2220 Chain = CallSeqStart = NewCallSeqStart; 2221 // And copy the pieces of it that fit into registers. 2222 for (unsigned j=0; j<Size; j+=PtrByteSize) { 2223 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 2224 SDValue AddArg = DAG.getNode(ISD::ADD, PtrVT, Arg, Const); 2225 if (GPR_idx != NumGPRs) { 2226 SDValue Load = DAG.getLoad(PtrVT, Chain, AddArg, NULL, 0); 2227 MemOpChains.push_back(Load.getValue(1)); 2228 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2229 if (isMachoABI) 2230 ArgOffset += PtrByteSize; 2231 } else { 2232 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 2233 break; 2234 } 2235 } 2236 continue; 2237 } 2238 2239 switch (Arg.getValueType().getSimpleVT()) { 2240 default: assert(0 && "Unexpected ValueType for argument!"); 2241 case MVT::i32: 2242 case MVT::i64: 2243 // Double word align in ELF 2244 if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); 2245 if (GPR_idx != NumGPRs) { 2246 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 2247 } else { 2248 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2249 isPPC64, isTailCall, false, MemOpChains, 2250 TailCallArguments); 2251 inMem = true; 2252 } 2253 if (inMem || isMachoABI) { 2254 // Stack align in ELF 2255 if (isELF32_ABI && Align) 2256 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 2257 2258 ArgOffset += PtrByteSize; 2259 } 2260 break; 2261 case MVT::f32: 2262 case MVT::f64: 2263 if (FPR_idx != NumFPRs) { 2264 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 2265 2266 if (isVarArg) { 2267 SDValue Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 2268 MemOpChains.push_back(Store); 2269 2270 // Float varargs are always shadowed in available integer registers 2271 if (GPR_idx != NumGPRs) { 2272 SDValue Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 2273 MemOpChains.push_back(Load.getValue(1)); 2274 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 2275 Load)); 2276 } 2277 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 2278 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 2279 PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour); 2280 SDValue Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 2281 MemOpChains.push_back(Load.getValue(1)); 2282 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 2283 Load)); 2284 } 2285 } else { 2286 // If we have any FPRs remaining, we may also have GPRs remaining. 2287 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 2288 // GPRs. 2289 if (isMachoABI) { 2290 if (GPR_idx != NumGPRs) 2291 ++GPR_idx; 2292 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 2293 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 2294 ++GPR_idx; 2295 } 2296 } 2297 } else { 2298 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2299 isPPC64, isTailCall, false, MemOpChains, 2300 TailCallArguments); 2301 inMem = true; 2302 } 2303 if (inMem || isMachoABI) { 2304 // Stack align in ELF 2305 if (isELF32_ABI && Align) 2306 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 2307 if (isPPC64) 2308 ArgOffset += 8; 2309 else 2310 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 2311 } 2312 break; 2313 case MVT::v4f32: 2314 case MVT::v4i32: 2315 case MVT::v8i16: 2316 case MVT::v16i8: 2317 if (isVarArg) { 2318 // These go aligned on the stack, or in the corresponding R registers 2319 // when within range. The Darwin PPC ABI doc claims they also go in 2320 // V registers; in fact gcc does this only for arguments that are 2321 // prototyped, not for those that match the ... We do it for all 2322 // arguments, seems to work. 2323 while (ArgOffset % 16 !=0) { 2324 ArgOffset += PtrByteSize; 2325 if (GPR_idx != NumGPRs) 2326 GPR_idx++; 2327 } 2328 // We could elide this store in the case where the object fits 2329 // entirely in R registers. Maybe later. 2330 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, 2331 DAG.getConstant(ArgOffset, PtrVT)); 2332 SDValue Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 2333 MemOpChains.push_back(Store); 2334 if (VR_idx != NumVRs) { 2335 SDValue Load = DAG.getLoad(MVT::v4f32, Store, PtrOff, NULL, 0); 2336 MemOpChains.push_back(Load.getValue(1)); 2337 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 2338 } 2339 ArgOffset += 16; 2340 for (unsigned i=0; i<16; i+=PtrByteSize) { 2341 if (GPR_idx == NumGPRs) 2342 break; 2343 SDValue Ix = DAG.getNode(ISD::ADD, PtrVT, PtrOff, 2344 DAG.getConstant(i, PtrVT)); 2345 SDValue Load = DAG.getLoad(PtrVT, Store, Ix, NULL, 0); 2346 MemOpChains.push_back(Load.getValue(1)); 2347 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2348 } 2349 break; 2350 } 2351 2352 // Non-varargs Altivec params generally go in registers, but have 2353 // stack space allocated at the end. 2354 if (VR_idx != NumVRs) { 2355 // Doesn't have GPR space allocated. 2356 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 2357 } else if (nAltivecParamsAtEnd==0) { 2358 // We are emitting Altivec params in order. 2359 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2360 isPPC64, isTailCall, true, MemOpChains, 2361 TailCallArguments); 2362 ArgOffset += 16; 2363 } 2364 break; 2365 } 2366 } 2367 // If all Altivec parameters fit in registers, as they usually do, 2368 // they get stack space following the non-Altivec parameters. We 2369 // don't track this here because nobody below needs it. 2370 // If there are more Altivec parameters than fit in registers emit 2371 // the stores here. 2372 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 2373 unsigned j = 0; 2374 // Offset is aligned; skip 1st 12 params which go in V registers. 2375 ArgOffset = ((ArgOffset+15)/16)*16; 2376 ArgOffset += 12*16; 2377 for (unsigned i = 0; i != NumOps; ++i) { 2378 SDValue Arg = Op.getOperand(5+2*i); 2379 MVT ArgType = Arg.getValueType(); 2380 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 2381 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 2382 if (++j > NumVRs) { 2383 SDValue PtrOff; 2384 // We are emitting Altivec params in order. 2385 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2386 isPPC64, isTailCall, true, MemOpChains, 2387 TailCallArguments); 2388 ArgOffset += 16; 2389 } 2390 } 2391 } 2392 } 2393 2394 if (!MemOpChains.empty()) 2395 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 2396 &MemOpChains[0], MemOpChains.size()); 2397 2398 // Build a sequence of copy-to-reg nodes chained together with token chain 2399 // and flag operands which copy the outgoing args into the appropriate regs. 2400 SDValue InFlag; 2401 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 2402 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 2403 InFlag); 2404 InFlag = Chain.getValue(1); 2405 } 2406 2407 // With the ELF 32 ABI, set CR6 to true if this is a vararg call. 2408 if (isVarArg && isELF32_ABI) { 2409 SDValue SetCR(DAG.getTargetNode(PPC::CRSET, MVT::i32), 0); 2410 Chain = DAG.getCopyToReg(Chain, PPC::CR1EQ, SetCR, InFlag); 2411 InFlag = Chain.getValue(1); 2412 } 2413 2414 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 2415 // might overwrite each other in case of tail call optimization. 2416 if (isTailCall) { 2417 SmallVector<SDValue, 8> MemOpChains2; 2418 // Do not flag preceeding copytoreg stuff together with the following stuff. 2419 InFlag = SDValue(); 2420 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 2421 MemOpChains2); 2422 if (!MemOpChains2.empty()) 2423 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 2424 &MemOpChains2[0], MemOpChains2.size()); 2425 2426 // Store the return address to the appropriate stack slot. 2427 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 2428 isPPC64, isMachoABI); 2429 } 2430 2431 // Emit callseq_end just before tailcall node. 2432 if (isTailCall) { 2433 SmallVector<SDValue, 8> CallSeqOps; 2434 SDVTList CallSeqNodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2435 CallSeqOps.push_back(Chain); 2436 CallSeqOps.push_back(DAG.getIntPtrConstant(NumBytes)); 2437 CallSeqOps.push_back(DAG.getIntPtrConstant(0)); 2438 if (InFlag.getNode()) 2439 CallSeqOps.push_back(InFlag); 2440 Chain = DAG.getNode(ISD::CALLSEQ_END, CallSeqNodeTys, &CallSeqOps[0], 2441 CallSeqOps.size()); 2442 InFlag = Chain.getValue(1); 2443 } 2444 2445 std::vector<MVT> NodeTys; 2446 NodeTys.push_back(MVT::Other); // Returns a chain 2447 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 2448 2449 SmallVector<SDValue, 8> Ops; 2450 unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF; 2451 2452 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 2453 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 2454 // node so that legalize doesn't hack it. 2455 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2456 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); 2457 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 2458 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); 2459 else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) 2460 // If this is an absolute destination address, use the munged value. 2461 Callee = SDValue(Dest, 0); 2462 else { 2463 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 2464 // to do the call, we can't use PPCISD::CALL. 2465 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 2466 Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, 2467 2 + (InFlag.getNode() != 0)); 2468 InFlag = Chain.getValue(1); 2469 2470 // Copy the callee address into R12/X12 on darwin. 2471 if (isMachoABI) { 2472 unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12; 2473 Chain = DAG.getCopyToReg(Chain, Reg, Callee, InFlag); 2474 InFlag = Chain.getValue(1); 2475 } 2476 2477 NodeTys.clear(); 2478 NodeTys.push_back(MVT::Other); 2479 NodeTys.push_back(MVT::Flag); 2480 Ops.push_back(Chain); 2481 CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF; 2482 Callee.setNode(0); 2483 // Add CTR register as callee so a bctr can be emitted later. 2484 if (isTailCall) 2485 Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy())); 2486 } 2487 2488 // If this is a direct call, pass the chain and the callee. 2489 if (Callee.getNode()) { 2490 Ops.push_back(Chain); 2491 Ops.push_back(Callee); 2492 } 2493 // If this is a tail call add stack pointer delta. 2494 if (isTailCall) 2495 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 2496 2497 // Add argument registers to the end of the list so that they are known live 2498 // into the call. 2499 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 2500 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 2501 RegsToPass[i].second.getValueType())); 2502 2503 // When performing tail call optimization the callee pops its arguments off 2504 // the stack. Account for this here so these bytes can be pushed back on in 2505 // PPCRegisterInfo::eliminateCallFramePseudoInstr. 2506 int BytesCalleePops = 2507 (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; 2508 2509 if (InFlag.getNode()) 2510 Ops.push_back(InFlag); 2511 2512 // Emit tail call. 2513 if (isTailCall) { 2514 assert(InFlag.getNode() && 2515 "Flag must be set. Depend on flag being set in LowerRET"); 2516 Chain = DAG.getNode(PPCISD::TAILCALL, 2517 Op.getNode()->getVTList(), &Ops[0], Ops.size()); 2518 return SDValue(Chain.getNode(), Op.getResNo()); 2519 } 2520 2521 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); 2522 InFlag = Chain.getValue(1); 2523 2524 Chain = DAG.getCALLSEQ_END(Chain, 2525 DAG.getConstant(NumBytes, PtrVT), 2526 DAG.getConstant(BytesCalleePops, PtrVT), 2527 InFlag); 2528 if (Op.getNode()->getValueType(0) != MVT::Other) 2529 InFlag = Chain.getValue(1); 2530 2531 SmallVector<SDValue, 16> ResultVals; 2532 SmallVector<CCValAssign, 16> RVLocs; 2533 unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); 2534 CCState CCInfo(CallerCC, isVarArg, TM, RVLocs); 2535 CCInfo.AnalyzeCallResult(Op.getNode(), RetCC_PPC); 2536 2537 // Copy all of the result registers out of their specified physreg. 2538 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 2539 CCValAssign &VA = RVLocs[i]; 2540 MVT VT = VA.getValVT(); 2541 assert(VA.isRegLoc() && "Can only return in registers!"); 2542 Chain = DAG.getCopyFromReg(Chain, VA.getLocReg(), VT, InFlag).getValue(1); 2543 ResultVals.push_back(Chain.getValue(0)); 2544 InFlag = Chain.getValue(2); 2545 } 2546 2547 // If the function returns void, just return the chain. 2548 if (RVLocs.empty()) 2549 return Chain; 2550 2551 // Otherwise, merge everything together with a MERGE_VALUES node. 2552 ResultVals.push_back(Chain); 2553 SDValue Res = DAG.getMergeValues(Op.getNode()->getVTList(), &ResultVals[0], 2554 ResultVals.size()); 2555 return Res.getValue(Op.getResNo()); 2556} 2557 2558SDValue PPCTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG, 2559 TargetMachine &TM) { 2560 SmallVector<CCValAssign, 16> RVLocs; 2561 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 2562 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 2563 CCState CCInfo(CC, isVarArg, TM, RVLocs); 2564 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_PPC); 2565 2566 // If this is the first return lowered for this function, add the regs to the 2567 // liveout set for the function. 2568 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 2569 for (unsigned i = 0; i != RVLocs.size(); ++i) 2570 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 2571 } 2572 2573 SDValue Chain = Op.getOperand(0); 2574 2575 Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL); 2576 if (Chain.getOpcode() == PPCISD::TAILCALL) { 2577 SDValue TailCall = Chain; 2578 SDValue TargetAddress = TailCall.getOperand(1); 2579 SDValue StackAdjustment = TailCall.getOperand(2); 2580 2581 assert(((TargetAddress.getOpcode() == ISD::Register && 2582 cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) || 2583 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 2584 TargetAddress.getOpcode() == ISD::TargetGlobalAddress || 2585 isa<ConstantSDNode>(TargetAddress)) && 2586 "Expecting an global address, external symbol, absolute value or register"); 2587 2588 assert(StackAdjustment.getOpcode() == ISD::Constant && 2589 "Expecting a const value"); 2590 2591 SmallVector<SDValue,8> Operands; 2592 Operands.push_back(Chain.getOperand(0)); 2593 Operands.push_back(TargetAddress); 2594 Operands.push_back(StackAdjustment); 2595 // Copy registers used by the call. Last operand is a flag so it is not 2596 // copied. 2597 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 2598 Operands.push_back(Chain.getOperand(i)); 2599 } 2600 return DAG.getNode(PPCISD::TC_RETURN, MVT::Other, &Operands[0], 2601 Operands.size()); 2602 } 2603 2604 SDValue Flag; 2605 2606 // Copy the result values into the output registers. 2607 for (unsigned i = 0; i != RVLocs.size(); ++i) { 2608 CCValAssign &VA = RVLocs[i]; 2609 assert(VA.isRegLoc() && "Can only return in registers!"); 2610 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); 2611 Flag = Chain.getValue(1); 2612 } 2613 2614 if (Flag.getNode()) 2615 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain, Flag); 2616 else 2617 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain); 2618} 2619 2620SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 2621 const PPCSubtarget &Subtarget) { 2622 // When we pop the dynamic allocation we need to restore the SP link. 2623 2624 // Get the corect type for pointers. 2625 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2626 2627 // Construct the stack pointer operand. 2628 bool IsPPC64 = Subtarget.isPPC64(); 2629 unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1; 2630 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 2631 2632 // Get the operands for the STACKRESTORE. 2633 SDValue Chain = Op.getOperand(0); 2634 SDValue SaveSP = Op.getOperand(1); 2635 2636 // Load the old link SP. 2637 SDValue LoadLinkSP = DAG.getLoad(PtrVT, Chain, StackPtr, NULL, 0); 2638 2639 // Restore the stack pointer. 2640 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), SP, SaveSP); 2641 2642 // Store the old link SP. 2643 return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0); 2644} 2645 2646 2647 2648SDValue 2649PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 2650 MachineFunction &MF = DAG.getMachineFunction(); 2651 bool IsPPC64 = PPCSubTarget.isPPC64(); 2652 bool isMachoABI = PPCSubTarget.isMachoABI(); 2653 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2654 2655 // Get current frame pointer save index. The users of this index will be 2656 // primarily DYNALLOC instructions. 2657 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2658 int RASI = FI->getReturnAddrSaveIndex(); 2659 2660 // If the frame pointer save index hasn't been defined yet. 2661 if (!RASI) { 2662 // Find out what the fix offset of the frame pointer save area. 2663 int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI); 2664 // Allocate the frame index for frame pointer save area. 2665 RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset); 2666 // Save the result. 2667 FI->setReturnAddrSaveIndex(RASI); 2668 } 2669 return DAG.getFrameIndex(RASI, PtrVT); 2670} 2671 2672SDValue 2673PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 2674 MachineFunction &MF = DAG.getMachineFunction(); 2675 bool IsPPC64 = PPCSubTarget.isPPC64(); 2676 bool isMachoABI = PPCSubTarget.isMachoABI(); 2677 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2678 2679 // Get current frame pointer save index. The users of this index will be 2680 // primarily DYNALLOC instructions. 2681 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2682 int FPSI = FI->getFramePointerSaveIndex(); 2683 2684 // If the frame pointer save index hasn't been defined yet. 2685 if (!FPSI) { 2686 // Find out what the fix offset of the frame pointer save area. 2687 int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI); 2688 2689 // Allocate the frame index for frame pointer save area. 2690 FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); 2691 // Save the result. 2692 FI->setFramePointerSaveIndex(FPSI); 2693 } 2694 return DAG.getFrameIndex(FPSI, PtrVT); 2695} 2696 2697SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 2698 SelectionDAG &DAG, 2699 const PPCSubtarget &Subtarget) { 2700 // Get the inputs. 2701 SDValue Chain = Op.getOperand(0); 2702 SDValue Size = Op.getOperand(1); 2703 2704 // Get the corect type for pointers. 2705 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2706 // Negate the size. 2707 SDValue NegSize = DAG.getNode(ISD::SUB, PtrVT, 2708 DAG.getConstant(0, PtrVT), Size); 2709 // Construct a node for the frame pointer save index. 2710 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 2711 // Build a DYNALLOC node. 2712 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 2713 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 2714 return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3); 2715} 2716 2717/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 2718/// possible. 2719SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { 2720 // Not FP? Not a fsel. 2721 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 2722 !Op.getOperand(2).getValueType().isFloatingPoint()) 2723 return SDValue(); 2724 2725 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2726 2727 // Cannot handle SETEQ/SETNE. 2728 if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDValue(); 2729 2730 MVT ResVT = Op.getValueType(); 2731 MVT CmpVT = Op.getOperand(0).getValueType(); 2732 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 2733 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 2734 2735 // If the RHS of the comparison is a 0.0, we don't need to do the 2736 // subtraction at all. 2737 if (isFloatingPointZero(RHS)) 2738 switch (CC) { 2739 default: break; // SETUO etc aren't handled by fsel. 2740 case ISD::SETULT: 2741 case ISD::SETLT: 2742 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2743 case ISD::SETOGE: 2744 case ISD::SETGE: 2745 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2746 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2747 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 2748 case ISD::SETUGT: 2749 case ISD::SETGT: 2750 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2751 case ISD::SETOLE: 2752 case ISD::SETLE: 2753 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2754 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2755 return DAG.getNode(PPCISD::FSEL, ResVT, 2756 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 2757 } 2758 2759 SDValue Cmp; 2760 switch (CC) { 2761 default: break; // SETUO etc aren't handled by fsel. 2762 case ISD::SETULT: 2763 case ISD::SETLT: 2764 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2765 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2766 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2767 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2768 case ISD::SETOGE: 2769 case ISD::SETGE: 2770 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2771 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2772 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2773 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2774 case ISD::SETUGT: 2775 case ISD::SETGT: 2776 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2777 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2778 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2779 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2780 case ISD::SETOLE: 2781 case ISD::SETLE: 2782 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2783 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2784 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2785 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2786 } 2787 return SDValue(); 2788} 2789 2790// FIXME: Split this code up when LegalizeDAGTypes lands. 2791SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { 2792 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 2793 SDValue Src = Op.getOperand(0); 2794 if (Src.getValueType() == MVT::f32) 2795 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 2796 2797 SDValue Tmp; 2798 switch (Op.getValueType().getSimpleVT()) { 2799 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 2800 case MVT::i32: 2801 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 2802 break; 2803 case MVT::i64: 2804 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 2805 break; 2806 } 2807 2808 // Convert the FP value to an int value through memory. 2809 SDValue FIPtr = DAG.CreateStackTemporary(MVT::f64); 2810 2811 // Emit a store to the stack slot. 2812 SDValue Chain = DAG.getStore(DAG.getEntryNode(), Tmp, FIPtr, NULL, 0); 2813 2814 // Result is a load from the stack slot. If loading 4 bytes, make sure to 2815 // add in a bias. 2816 if (Op.getValueType() == MVT::i32) 2817 FIPtr = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, 2818 DAG.getConstant(4, FIPtr.getValueType())); 2819 return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0); 2820} 2821 2822SDValue PPCTargetLowering::LowerFP_ROUND_INREG(SDValue Op, 2823 SelectionDAG &DAG) { 2824 assert(Op.getValueType() == MVT::ppcf128); 2825 SDNode *Node = Op.getNode(); 2826 assert(Node->getOperand(0).getValueType() == MVT::ppcf128); 2827 assert(Node->getOperand(0).getNode()->getOpcode() == ISD::BUILD_PAIR); 2828 SDValue Lo = Node->getOperand(0).getNode()->getOperand(0); 2829 SDValue Hi = Node->getOperand(0).getNode()->getOperand(1); 2830 2831 // This sequence changes FPSCR to do round-to-zero, adds the two halves 2832 // of the long double, and puts FPSCR back the way it was. We do not 2833 // actually model FPSCR. 2834 std::vector<MVT> NodeTys; 2835 SDValue Ops[4], Result, MFFSreg, InFlag, FPreg; 2836 2837 NodeTys.push_back(MVT::f64); // Return register 2838 NodeTys.push_back(MVT::Flag); // Returns a flag for later insns 2839 Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); 2840 MFFSreg = Result.getValue(0); 2841 InFlag = Result.getValue(1); 2842 2843 NodeTys.clear(); 2844 NodeTys.push_back(MVT::Flag); // Returns a flag 2845 Ops[0] = DAG.getConstant(31, MVT::i32); 2846 Ops[1] = InFlag; 2847 Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2); 2848 InFlag = Result.getValue(0); 2849 2850 NodeTys.clear(); 2851 NodeTys.push_back(MVT::Flag); // Returns a flag 2852 Ops[0] = DAG.getConstant(30, MVT::i32); 2853 Ops[1] = InFlag; 2854 Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2); 2855 InFlag = Result.getValue(0); 2856 2857 NodeTys.clear(); 2858 NodeTys.push_back(MVT::f64); // result of add 2859 NodeTys.push_back(MVT::Flag); // Returns a flag 2860 Ops[0] = Lo; 2861 Ops[1] = Hi; 2862 Ops[2] = InFlag; 2863 Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3); 2864 FPreg = Result.getValue(0); 2865 InFlag = Result.getValue(1); 2866 2867 NodeTys.clear(); 2868 NodeTys.push_back(MVT::f64); 2869 Ops[0] = DAG.getConstant(1, MVT::i32); 2870 Ops[1] = MFFSreg; 2871 Ops[2] = FPreg; 2872 Ops[3] = InFlag; 2873 Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4); 2874 FPreg = Result.getValue(0); 2875 2876 // We know the low half is about to be thrown away, so just use something 2877 // convenient. 2878 return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg); 2879} 2880 2881SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2882 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 2883 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 2884 return SDValue(); 2885 2886 if (Op.getOperand(0).getValueType() == MVT::i64) { 2887 SDValue Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 2888 SDValue FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 2889 if (Op.getValueType() == MVT::f32) 2890 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 2891 return FP; 2892 } 2893 2894 assert(Op.getOperand(0).getValueType() == MVT::i32 && 2895 "Unhandled SINT_TO_FP type in custom expander!"); 2896 // Since we only generate this in 64-bit mode, we can take advantage of 2897 // 64-bit registers. In particular, sign extend the input value into the 2898 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 2899 // then lfd it and fcfid it. 2900 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 2901 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 2902 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2903 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 2904 2905 SDValue Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 2906 Op.getOperand(0)); 2907 2908 // STD the extended value into the stack slot. 2909 MachineMemOperand MO(PseudoSourceValue::getFixedStack(FrameIdx), 2910 MachineMemOperand::MOStore, 0, 8, 8); 2911 SDValue Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 2912 DAG.getEntryNode(), Ext64, FIdx, 2913 DAG.getMemOperand(MO)); 2914 // Load the value as a double. 2915 SDValue Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0); 2916 2917 // FCFID it and return it. 2918 SDValue FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 2919 if (Op.getValueType() == MVT::f32) 2920 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 2921 return FP; 2922} 2923 2924SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { 2925 /* 2926 The rounding mode is in bits 30:31 of FPSR, and has the following 2927 settings: 2928 00 Round to nearest 2929 01 Round to 0 2930 10 Round to +inf 2931 11 Round to -inf 2932 2933 FLT_ROUNDS, on the other hand, expects the following: 2934 -1 Undefined 2935 0 Round to 0 2936 1 Round to nearest 2937 2 Round to +inf 2938 3 Round to -inf 2939 2940 To perform the conversion, we do: 2941 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 2942 */ 2943 2944 MachineFunction &MF = DAG.getMachineFunction(); 2945 MVT VT = Op.getValueType(); 2946 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2947 std::vector<MVT> NodeTys; 2948 SDValue MFFSreg, InFlag; 2949 2950 // Save FP Control Word to register 2951 NodeTys.push_back(MVT::f64); // return register 2952 NodeTys.push_back(MVT::Flag); // unused in this context 2953 SDValue Chain = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); 2954 2955 // Save FP register to stack slot 2956 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2957 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 2958 SDValue Store = DAG.getStore(DAG.getEntryNode(), Chain, 2959 StackSlot, NULL, 0); 2960 2961 // Load FP Control Word from low 32 bits of stack slot. 2962 SDValue Four = DAG.getConstant(4, PtrVT); 2963 SDValue Addr = DAG.getNode(ISD::ADD, PtrVT, StackSlot, Four); 2964 SDValue CWD = DAG.getLoad(MVT::i32, Store, Addr, NULL, 0); 2965 2966 // Transform as necessary 2967 SDValue CWD1 = 2968 DAG.getNode(ISD::AND, MVT::i32, 2969 CWD, DAG.getConstant(3, MVT::i32)); 2970 SDValue CWD2 = 2971 DAG.getNode(ISD::SRL, MVT::i32, 2972 DAG.getNode(ISD::AND, MVT::i32, 2973 DAG.getNode(ISD::XOR, MVT::i32, 2974 CWD, DAG.getConstant(3, MVT::i32)), 2975 DAG.getConstant(3, MVT::i32)), 2976 DAG.getConstant(1, MVT::i8)); 2977 2978 SDValue RetVal = 2979 DAG.getNode(ISD::XOR, MVT::i32, CWD1, CWD2); 2980 2981 return DAG.getNode((VT.getSizeInBits() < 16 ? 2982 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 2983} 2984 2985SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) { 2986 MVT VT = Op.getValueType(); 2987 unsigned BitWidth = VT.getSizeInBits(); 2988 assert(Op.getNumOperands() == 3 && 2989 VT == Op.getOperand(1).getValueType() && 2990 "Unexpected SHL!"); 2991 2992 // Expand into a bunch of logical ops. Note that these ops 2993 // depend on the PPC behavior for oversized shift amounts. 2994 SDValue Lo = Op.getOperand(0); 2995 SDValue Hi = Op.getOperand(1); 2996 SDValue Amt = Op.getOperand(2); 2997 MVT AmtVT = Amt.getValueType(); 2998 2999 SDValue Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 3000 DAG.getConstant(BitWidth, AmtVT), Amt); 3001 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, VT, Hi, Amt); 3002 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, VT, Lo, Tmp1); 3003 SDValue Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 3004 SDValue Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 3005 DAG.getConstant(-BitWidth, AmtVT)); 3006 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, VT, Lo, Tmp5); 3007 SDValue OutHi = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); 3008 SDValue OutLo = DAG.getNode(PPCISD::SHL, VT, Lo, Amt); 3009 SDValue OutOps[] = { OutLo, OutHi }; 3010 return DAG.getMergeValues(OutOps, 2); 3011} 3012 3013SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) { 3014 MVT VT = Op.getValueType(); 3015 unsigned BitWidth = VT.getSizeInBits(); 3016 assert(Op.getNumOperands() == 3 && 3017 VT == Op.getOperand(1).getValueType() && 3018 "Unexpected SRL!"); 3019 3020 // Expand into a bunch of logical ops. Note that these ops 3021 // depend on the PPC behavior for oversized shift amounts. 3022 SDValue Lo = Op.getOperand(0); 3023 SDValue Hi = Op.getOperand(1); 3024 SDValue Amt = Op.getOperand(2); 3025 MVT AmtVT = Amt.getValueType(); 3026 3027 SDValue Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 3028 DAG.getConstant(BitWidth, AmtVT), Amt); 3029 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); 3030 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); 3031 SDValue Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 3032 SDValue Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 3033 DAG.getConstant(-BitWidth, AmtVT)); 3034 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, VT, Hi, Tmp5); 3035 SDValue OutLo = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); 3036 SDValue OutHi = DAG.getNode(PPCISD::SRL, VT, Hi, Amt); 3037 SDValue OutOps[] = { OutLo, OutHi }; 3038 return DAG.getMergeValues(OutOps, 2); 3039} 3040 3041SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) { 3042 MVT VT = Op.getValueType(); 3043 unsigned BitWidth = VT.getSizeInBits(); 3044 assert(Op.getNumOperands() == 3 && 3045 VT == Op.getOperand(1).getValueType() && 3046 "Unexpected SRA!"); 3047 3048 // Expand into a bunch of logical ops, followed by a select_cc. 3049 SDValue Lo = Op.getOperand(0); 3050 SDValue Hi = Op.getOperand(1); 3051 SDValue Amt = Op.getOperand(2); 3052 MVT AmtVT = Amt.getValueType(); 3053 3054 SDValue Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 3055 DAG.getConstant(BitWidth, AmtVT), Amt); 3056 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); 3057 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); 3058 SDValue Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 3059 SDValue Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 3060 DAG.getConstant(-BitWidth, AmtVT)); 3061 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, VT, Hi, Tmp5); 3062 SDValue OutHi = DAG.getNode(PPCISD::SRA, VT, Hi, Amt); 3063 SDValue OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, AmtVT), 3064 Tmp4, Tmp6, ISD::SETLE); 3065 SDValue OutOps[] = { OutLo, OutHi }; 3066 return DAG.getMergeValues(OutOps, 2); 3067} 3068 3069//===----------------------------------------------------------------------===// 3070// Vector related lowering. 3071// 3072 3073// If this is a vector of constants or undefs, get the bits. A bit in 3074// UndefBits is set if the corresponding element of the vector is an 3075// ISD::UNDEF value. For undefs, the corresponding VectorBits values are 3076// zero. Return true if this is not an array of constants, false if it is. 3077// 3078static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 3079 uint64_t UndefBits[2]) { 3080 // Start with zero'd results. 3081 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 3082 3083 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits(); 3084 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 3085 SDValue OpVal = BV->getOperand(i); 3086 3087 unsigned PartNo = i >= e/2; // In the upper 128 bits? 3088 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 3089 3090 uint64_t EltBits = 0; 3091 if (OpVal.getOpcode() == ISD::UNDEF) { 3092 uint64_t EltUndefBits = ~0U >> (32-EltBitSize); 3093 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 3094 continue; 3095 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 3096 EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); 3097 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 3098 assert(CN->getValueType(0) == MVT::f32 && 3099 "Only one legal FP vector type!"); 3100 EltBits = FloatToBits(CN->getValueAPF().convertToFloat()); 3101 } else { 3102 // Nonconstant element. 3103 return true; 3104 } 3105 3106 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 3107 } 3108 3109 //printf("%llx %llx %llx %llx\n", 3110 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 3111 return false; 3112} 3113 3114// If this is a splat (repetition) of a value across the whole vector, return 3115// the smallest size that splats it. For example, "0x01010101010101..." is a 3116// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 3117// SplatSize = 1 byte. 3118static bool isConstantSplat(const uint64_t Bits128[2], 3119 const uint64_t Undef128[2], 3120 unsigned &SplatBits, unsigned &SplatUndef, 3121 unsigned &SplatSize) { 3122 3123 // Don't let undefs prevent splats from matching. See if the top 64-bits are 3124 // the same as the lower 64-bits, ignoring undefs. 3125 if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) 3126 return false; // Can't be a splat if two pieces don't match. 3127 3128 uint64_t Bits64 = Bits128[0] | Bits128[1]; 3129 uint64_t Undef64 = Undef128[0] & Undef128[1]; 3130 3131 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 3132 // undefs. 3133 if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) 3134 return false; // Can't be a splat if two pieces don't match. 3135 3136 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 3137 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 3138 3139 // If the top 16-bits are different than the lower 16-bits, ignoring 3140 // undefs, we have an i32 splat. 3141 if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { 3142 SplatBits = Bits32; 3143 SplatUndef = Undef32; 3144 SplatSize = 4; 3145 return true; 3146 } 3147 3148 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 3149 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 3150 3151 // If the top 8-bits are different than the lower 8-bits, ignoring 3152 // undefs, we have an i16 splat. 3153 if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { 3154 SplatBits = Bits16; 3155 SplatUndef = Undef16; 3156 SplatSize = 2; 3157 return true; 3158 } 3159 3160 // Otherwise, we have an 8-bit splat. 3161 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 3162 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 3163 SplatSize = 1; 3164 return true; 3165} 3166 3167/// BuildSplatI - Build a canonical splati of Val with an element size of 3168/// SplatSize. Cast the result to VT. 3169static SDValue BuildSplatI(int Val, unsigned SplatSize, MVT VT, 3170 SelectionDAG &DAG) { 3171 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 3172 3173 static const MVT VTys[] = { // canonical VT to use for each size. 3174 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 3175 }; 3176 3177 MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 3178 3179 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 3180 if (Val == -1) 3181 SplatSize = 1; 3182 3183 MVT CanonicalVT = VTys[SplatSize-1]; 3184 3185 // Build a canonical splat for this value. 3186 SDValue Elt = DAG.getConstant(Val, CanonicalVT.getVectorElementType()); 3187 SmallVector<SDValue, 8> Ops; 3188 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 3189 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, 3190 &Ops[0], Ops.size()); 3191 return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res); 3192} 3193 3194/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 3195/// specified intrinsic ID. 3196static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 3197 SelectionDAG &DAG, 3198 MVT DestVT = MVT::Other) { 3199 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 3200 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 3201 DAG.getConstant(IID, MVT::i32), LHS, RHS); 3202} 3203 3204/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 3205/// specified intrinsic ID. 3206static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 3207 SDValue Op2, SelectionDAG &DAG, 3208 MVT DestVT = MVT::Other) { 3209 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 3210 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 3211 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 3212} 3213 3214 3215/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 3216/// amount. The result has the specified value type. 3217static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 3218 MVT VT, SelectionDAG &DAG) { 3219 // Force LHS/RHS to be the right type. 3220 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); 3221 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); 3222 3223 SDValue Ops[16]; 3224 for (unsigned i = 0; i != 16; ++i) 3225 Ops[i] = DAG.getConstant(i+Amt, MVT::i8); 3226 SDValue T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, 3227 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops,16)); 3228 return DAG.getNode(ISD::BIT_CONVERT, VT, T); 3229} 3230 3231// If this is a case we can't handle, return null and let the default 3232// expansion code take care of it. If we CAN select this case, and if it 3233// selects to a single instruction, return Op. Otherwise, if we can codegen 3234// this case more efficiently than a constant pool load, lower it to the 3235// sequence of ops that should be used. 3236SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 3237 SelectionDAG &DAG) { 3238 // If this is a vector of constants or undefs, get the bits. A bit in 3239 // UndefBits is set if the corresponding element of the vector is an 3240 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 3241 // zero. 3242 uint64_t VectorBits[2]; 3243 uint64_t UndefBits[2]; 3244 if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)) 3245 return SDValue(); // Not a constant vector. 3246 3247 // If this is a splat (repetition) of a value across the whole vector, return 3248 // the smallest size that splats it. For example, "0x01010101010101..." is a 3249 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 3250 // SplatSize = 1 byte. 3251 unsigned SplatBits, SplatUndef, SplatSize; 3252 if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ 3253 bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; 3254 3255 // First, handle single instruction cases. 3256 3257 // All zeros? 3258 if (SplatBits == 0) { 3259 // Canonicalize all zero vectors to be v4i32. 3260 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 3261 SDValue Z = DAG.getConstant(0, MVT::i32); 3262 Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); 3263 Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); 3264 } 3265 return Op; 3266 } 3267 3268 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 3269 int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); 3270 if (SextVal >= -16 && SextVal <= 15) 3271 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); 3272 3273 3274 // Two instruction sequences. 3275 3276 // If this value is in the range [-32,30] and is even, use: 3277 // tmp = VSPLTI[bhw], result = add tmp, tmp 3278 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 3279 SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG); 3280 Res = DAG.getNode(ISD::ADD, Res.getValueType(), Res, Res); 3281 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3282 } 3283 3284 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 3285 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 3286 // for fneg/fabs. 3287 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 3288 // Make -1 and vspltisw -1: 3289 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); 3290 3291 // Make the VSLW intrinsic, computing 0x8000_0000. 3292 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 3293 OnesV, DAG); 3294 3295 // xor by OnesV to invert it. 3296 Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); 3297 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3298 } 3299 3300 // Check to see if this is a wide variety of vsplti*, binop self cases. 3301 unsigned SplatBitSize = SplatSize*8; 3302 static const signed char SplatCsts[] = { 3303 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 3304 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 3305 }; 3306 3307 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 3308 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 3309 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 3310 int i = SplatCsts[idx]; 3311 3312 // Figure out what shift amount will be used by altivec if shifted by i in 3313 // this splat size. 3314 unsigned TypeShiftAmt = i & (SplatBitSize-1); 3315 3316 // vsplti + shl self. 3317 if (SextVal == (i << (int)TypeShiftAmt)) { 3318 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3319 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3320 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 3321 Intrinsic::ppc_altivec_vslw 3322 }; 3323 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3324 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3325 } 3326 3327 // vsplti + srl self. 3328 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 3329 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3330 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3331 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 3332 Intrinsic::ppc_altivec_vsrw 3333 }; 3334 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3335 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3336 } 3337 3338 // vsplti + sra self. 3339 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 3340 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3341 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3342 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 3343 Intrinsic::ppc_altivec_vsraw 3344 }; 3345 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3346 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3347 } 3348 3349 // vsplti + rol self. 3350 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 3351 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 3352 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3353 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3354 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 3355 Intrinsic::ppc_altivec_vrlw 3356 }; 3357 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3358 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3359 } 3360 3361 // t = vsplti c, result = vsldoi t, t, 1 3362 if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { 3363 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 3364 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); 3365 } 3366 // t = vsplti c, result = vsldoi t, t, 2 3367 if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { 3368 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 3369 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); 3370 } 3371 // t = vsplti c, result = vsldoi t, t, 3 3372 if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { 3373 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 3374 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); 3375 } 3376 } 3377 3378 // Three instruction sequences. 3379 3380 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 3381 if (SextVal >= 0 && SextVal <= 31) { 3382 SDValue LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG); 3383 SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 3384 LHS = DAG.getNode(ISD::SUB, LHS.getValueType(), LHS, RHS); 3385 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 3386 } 3387 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 3388 if (SextVal >= -31 && SextVal <= 0) { 3389 SDValue LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG); 3390 SDValue RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 3391 LHS = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS); 3392 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 3393 } 3394 } 3395 3396 return SDValue(); 3397} 3398 3399/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 3400/// the specified operations to build the shuffle. 3401static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 3402 SDValue RHS, SelectionDAG &DAG) { 3403 unsigned OpNum = (PFEntry >> 26) & 0x0F; 3404 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 3405 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 3406 3407 enum { 3408 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 3409 OP_VMRGHW, 3410 OP_VMRGLW, 3411 OP_VSPLTISW0, 3412 OP_VSPLTISW1, 3413 OP_VSPLTISW2, 3414 OP_VSPLTISW3, 3415 OP_VSLDOI4, 3416 OP_VSLDOI8, 3417 OP_VSLDOI12 3418 }; 3419 3420 if (OpNum == OP_COPY) { 3421 if (LHSID == (1*9+2)*9+3) return LHS; 3422 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 3423 return RHS; 3424 } 3425 3426 SDValue OpLHS, OpRHS; 3427 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); 3428 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); 3429 3430 unsigned ShufIdxs[16]; 3431 switch (OpNum) { 3432 default: assert(0 && "Unknown i32 permute!"); 3433 case OP_VMRGHW: 3434 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 3435 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 3436 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 3437 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 3438 break; 3439 case OP_VMRGLW: 3440 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 3441 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 3442 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 3443 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 3444 break; 3445 case OP_VSPLTISW0: 3446 for (unsigned i = 0; i != 16; ++i) 3447 ShufIdxs[i] = (i&3)+0; 3448 break; 3449 case OP_VSPLTISW1: 3450 for (unsigned i = 0; i != 16; ++i) 3451 ShufIdxs[i] = (i&3)+4; 3452 break; 3453 case OP_VSPLTISW2: 3454 for (unsigned i = 0; i != 16; ++i) 3455 ShufIdxs[i] = (i&3)+8; 3456 break; 3457 case OP_VSPLTISW3: 3458 for (unsigned i = 0; i != 16; ++i) 3459 ShufIdxs[i] = (i&3)+12; 3460 break; 3461 case OP_VSLDOI4: 3462 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); 3463 case OP_VSLDOI8: 3464 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); 3465 case OP_VSLDOI12: 3466 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); 3467 } 3468 SDValue Ops[16]; 3469 for (unsigned i = 0; i != 16; ++i) 3470 Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i8); 3471 3472 return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, 3473 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 3474} 3475 3476/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 3477/// is a shuffle we can handle in a single instruction, return it. Otherwise, 3478/// return the code it can be lowered into. Worst case, it can always be 3479/// lowered into a vperm. 3480SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 3481 SelectionDAG &DAG) { 3482 SDValue V1 = Op.getOperand(0); 3483 SDValue V2 = Op.getOperand(1); 3484 SDValue PermMask = Op.getOperand(2); 3485 3486 // Cases that are handled by instructions that take permute immediates 3487 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 3488 // selected by the instruction selector. 3489 if (V2.getOpcode() == ISD::UNDEF) { 3490 if (PPC::isSplatShuffleMask(PermMask.getNode(), 1) || 3491 PPC::isSplatShuffleMask(PermMask.getNode(), 2) || 3492 PPC::isSplatShuffleMask(PermMask.getNode(), 4) || 3493 PPC::isVPKUWUMShuffleMask(PermMask.getNode(), true) || 3494 PPC::isVPKUHUMShuffleMask(PermMask.getNode(), true) || 3495 PPC::isVSLDOIShuffleMask(PermMask.getNode(), true) != -1 || 3496 PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, true) || 3497 PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, true) || 3498 PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, true) || 3499 PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, true) || 3500 PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, true) || 3501 PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, true)) { 3502 return Op; 3503 } 3504 } 3505 3506 // Altivec has a variety of "shuffle immediates" that take two vector inputs 3507 // and produce a fixed permutation. If any of these match, do not lower to 3508 // VPERM. 3509 if (PPC::isVPKUWUMShuffleMask(PermMask.getNode(), false) || 3510 PPC::isVPKUHUMShuffleMask(PermMask.getNode(), false) || 3511 PPC::isVSLDOIShuffleMask(PermMask.getNode(), false) != -1 || 3512 PPC::isVMRGLShuffleMask(PermMask.getNode(), 1, false) || 3513 PPC::isVMRGLShuffleMask(PermMask.getNode(), 2, false) || 3514 PPC::isVMRGLShuffleMask(PermMask.getNode(), 4, false) || 3515 PPC::isVMRGHShuffleMask(PermMask.getNode(), 1, false) || 3516 PPC::isVMRGHShuffleMask(PermMask.getNode(), 2, false) || 3517 PPC::isVMRGHShuffleMask(PermMask.getNode(), 4, false)) 3518 return Op; 3519 3520 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 3521 // perfect shuffle table to emit an optimal matching sequence. 3522 unsigned PFIndexes[4]; 3523 bool isFourElementShuffle = true; 3524 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 3525 unsigned EltNo = 8; // Start out undef. 3526 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 3527 if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) 3528 continue; // Undef, ignore it. 3529 3530 unsigned ByteSource = 3531 cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue(); 3532 if ((ByteSource & 3) != j) { 3533 isFourElementShuffle = false; 3534 break; 3535 } 3536 3537 if (EltNo == 8) { 3538 EltNo = ByteSource/4; 3539 } else if (EltNo != ByteSource/4) { 3540 isFourElementShuffle = false; 3541 break; 3542 } 3543 } 3544 PFIndexes[i] = EltNo; 3545 } 3546 3547 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 3548 // perfect shuffle vector to determine if it is cost effective to do this as 3549 // discrete instructions, or whether we should use a vperm. 3550 if (isFourElementShuffle) { 3551 // Compute the index in the perfect shuffle table. 3552 unsigned PFTableIndex = 3553 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3554 3555 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3556 unsigned Cost = (PFEntry >> 30); 3557 3558 // Determining when to avoid vperm is tricky. Many things affect the cost 3559 // of vperm, particularly how many times the perm mask needs to be computed. 3560 // For example, if the perm mask can be hoisted out of a loop or is already 3561 // used (perhaps because there are multiple permutes with the same shuffle 3562 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 3563 // the loop requires an extra register. 3564 // 3565 // As a compromise, we only emit discrete instructions if the shuffle can be 3566 // generated in 3 or fewer operations. When we have loop information 3567 // available, if this block is within a loop, we should avoid using vperm 3568 // for 3-operation perms and use a constant pool load instead. 3569 if (Cost < 3) 3570 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); 3571 } 3572 3573 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 3574 // vector that will get spilled to the constant pool. 3575 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 3576 3577 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 3578 // that it is in input element units, not in bytes. Convert now. 3579 MVT EltVT = V1.getValueType().getVectorElementType(); 3580 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 3581 3582 SmallVector<SDValue, 16> ResultMask; 3583 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 3584 unsigned SrcElt; 3585 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 3586 SrcElt = 0; 3587 else 3588 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 3589 3590 for (unsigned j = 0; j != BytesPerElement; ++j) 3591 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 3592 MVT::i8)); 3593 } 3594 3595 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, 3596 &ResultMask[0], ResultMask.size()); 3597 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 3598} 3599 3600/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 3601/// altivec comparison. If it is, return true and fill in Opc/isDot with 3602/// information about the intrinsic. 3603static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 3604 bool &isDot) { 3605 unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue(); 3606 CompareOpc = -1; 3607 isDot = false; 3608 switch (IntrinsicID) { 3609 default: return false; 3610 // Comparison predicates. 3611 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 3612 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 3613 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 3614 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 3615 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 3616 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 3617 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 3618 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 3619 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 3620 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 3621 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 3622 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 3623 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 3624 3625 // Normal Comparisons. 3626 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 3627 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 3628 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 3629 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 3630 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 3631 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 3632 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 3633 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 3634 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 3635 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 3636 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 3637 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 3638 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 3639 } 3640 return true; 3641} 3642 3643/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 3644/// lower, do it, otherwise return null. 3645SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 3646 SelectionDAG &DAG) { 3647 // If this is a lowered altivec predicate compare, CompareOpc is set to the 3648 // opcode number of the comparison. 3649 int CompareOpc; 3650 bool isDot; 3651 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 3652 return SDValue(); // Don't custom lower most intrinsics. 3653 3654 // If this is a non-dot comparison, make the VCMP node and we are done. 3655 if (!isDot) { 3656 SDValue Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 3657 Op.getOperand(1), Op.getOperand(2), 3658 DAG.getConstant(CompareOpc, MVT::i32)); 3659 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); 3660 } 3661 3662 // Create the PPCISD altivec 'dot' comparison node. 3663 SDValue Ops[] = { 3664 Op.getOperand(2), // LHS 3665 Op.getOperand(3), // RHS 3666 DAG.getConstant(CompareOpc, MVT::i32) 3667 }; 3668 std::vector<MVT> VTs; 3669 VTs.push_back(Op.getOperand(2).getValueType()); 3670 VTs.push_back(MVT::Flag); 3671 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 3672 3673 // Now that we have the comparison, emit a copy from the CR to a GPR. 3674 // This is flagged to the above dot comparison. 3675 SDValue Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 3676 DAG.getRegister(PPC::CR6, MVT::i32), 3677 CompNode.getValue(1)); 3678 3679 // Unpack the result based on how the target uses it. 3680 unsigned BitNo; // Bit # of CR6. 3681 bool InvertBit; // Invert result? 3682 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 3683 default: // Can't happen, don't crash on invalid number though. 3684 case 0: // Return the value of the EQ bit of CR6. 3685 BitNo = 0; InvertBit = false; 3686 break; 3687 case 1: // Return the inverted value of the EQ bit of CR6. 3688 BitNo = 0; InvertBit = true; 3689 break; 3690 case 2: // Return the value of the LT bit of CR6. 3691 BitNo = 2; InvertBit = false; 3692 break; 3693 case 3: // Return the inverted value of the LT bit of CR6. 3694 BitNo = 2; InvertBit = true; 3695 break; 3696 } 3697 3698 // Shift the bit into the low position. 3699 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 3700 DAG.getConstant(8-(3-BitNo), MVT::i32)); 3701 // Isolate the bit. 3702 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 3703 DAG.getConstant(1, MVT::i32)); 3704 3705 // If we are supposed to, toggle the bit. 3706 if (InvertBit) 3707 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 3708 DAG.getConstant(1, MVT::i32)); 3709 return Flags; 3710} 3711 3712SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 3713 SelectionDAG &DAG) { 3714 // Create a stack slot that is 16-byte aligned. 3715 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 3716 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 3717 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3718 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 3719 3720 // Store the input value into Value#0 of the stack slot. 3721 SDValue Store = DAG.getStore(DAG.getEntryNode(), 3722 Op.getOperand(0), FIdx, NULL, 0); 3723 // Load it out. 3724 return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0); 3725} 3726 3727SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) { 3728 if (Op.getValueType() == MVT::v4i32) { 3729 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3730 3731 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); 3732 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. 3733 3734 SDValue RHSSwap = // = vrlw RHS, 16 3735 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); 3736 3737 // Shrinkify inputs to v8i16. 3738 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); 3739 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); 3740 RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); 3741 3742 // Low parts multiplied together, generating 32-bit results (we ignore the 3743 // top parts). 3744 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 3745 LHS, RHS, DAG, MVT::v4i32); 3746 3747 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 3748 LHS, RHSSwap, Zero, DAG, MVT::v4i32); 3749 // Shift the high parts up 16 bits. 3750 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); 3751 return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); 3752 } else if (Op.getValueType() == MVT::v8i16) { 3753 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3754 3755 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG); 3756 3757 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 3758 LHS, RHS, Zero, DAG); 3759 } else if (Op.getValueType() == MVT::v16i8) { 3760 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3761 3762 // Multiply the even 8-bit parts, producing 16-bit sums. 3763 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 3764 LHS, RHS, DAG, MVT::v8i16); 3765 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); 3766 3767 // Multiply the odd 8-bit parts, producing 16-bit sums. 3768 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 3769 LHS, RHS, DAG, MVT::v8i16); 3770 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); 3771 3772 // Merge the results together. 3773 SDValue Ops[16]; 3774 for (unsigned i = 0; i != 8; ++i) { 3775 Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); 3776 Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); 3777 } 3778 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, 3779 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 3780 } else { 3781 assert(0 && "Unknown mul to lower!"); 3782 abort(); 3783 } 3784} 3785 3786/// LowerOperation - Provide custom lowering hooks for some operations. 3787/// 3788SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { 3789 switch (Op.getOpcode()) { 3790 default: assert(0 && "Wasn't expecting to be able to lower this!"); 3791 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3792 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 3793 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3794 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 3795 case ISD::SETCC: return LowerSETCC(Op, DAG); 3796 case ISD::VASTART: 3797 return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3798 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3799 3800 case ISD::VAARG: 3801 return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3802 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3803 3804 case ISD::FORMAL_ARGUMENTS: 3805 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, 3806 VarArgsStackOffset, VarArgsNumGPR, 3807 VarArgsNumFPR, PPCSubTarget); 3808 3809 case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget, 3810 getTargetMachine()); 3811 case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); 3812 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 3813 case ISD::DYNAMIC_STACKALLOC: 3814 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 3815 3816 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3817 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 3818 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 3819 case ISD::FP_ROUND_INREG: return LowerFP_ROUND_INREG(Op, DAG); 3820 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 3821 3822 // Lower 64-bit shifts. 3823 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 3824 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 3825 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 3826 3827 // Vector-related lowering. 3828 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3829 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3830 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3831 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 3832 case ISD::MUL: return LowerMUL(Op, DAG); 3833 3834 // Frame & Return address. 3835 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 3836 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3837 } 3838 return SDValue(); 3839} 3840 3841SDNode *PPCTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) { 3842 switch (N->getOpcode()) { 3843 default: assert(0 && "Wasn't expecting to be able to lower this!"); 3844 case ISD::FP_TO_SINT: { 3845 SDValue Res = LowerFP_TO_SINT(SDValue(N, 0), DAG); 3846 // Use MERGE_VALUES to drop the chain result value and get a node with one 3847 // result. This requires turning off getMergeValues simplification, since 3848 // otherwise it will give us Res back. 3849 return DAG.getMergeValues(&Res, 1, false).getNode(); 3850 } 3851 } 3852} 3853 3854 3855//===----------------------------------------------------------------------===// 3856// Other Lowering Code 3857//===----------------------------------------------------------------------===// 3858 3859MachineBasicBlock * 3860PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 3861 bool is64bit, unsigned BinOpcode) { 3862 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 3863 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3864 3865 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3866 MachineFunction *F = BB->getParent(); 3867 MachineFunction::iterator It = BB; 3868 ++It; 3869 3870 unsigned dest = MI->getOperand(0).getReg(); 3871 unsigned ptrA = MI->getOperand(1).getReg(); 3872 unsigned ptrB = MI->getOperand(2).getReg(); 3873 unsigned incr = MI->getOperand(3).getReg(); 3874 3875 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 3876 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 3877 F->insert(It, loopMBB); 3878 F->insert(It, exitMBB); 3879 exitMBB->transferSuccessors(BB); 3880 3881 MachineRegisterInfo &RegInfo = F->getRegInfo(); 3882 unsigned TmpReg = (!BinOpcode) ? incr : 3883 RegInfo.createVirtualRegister( 3884 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 3885 (const TargetRegisterClass *) &PPC::GPRCRegClass); 3886 3887 // thisMBB: 3888 // ... 3889 // fallthrough --> loopMBB 3890 BB->addSuccessor(loopMBB); 3891 3892 // loopMBB: 3893 // l[wd]arx dest, ptr 3894 // add r0, dest, incr 3895 // st[wd]cx. r0, ptr 3896 // bne- loopMBB 3897 // fallthrough --> exitMBB 3898 BB = loopMBB; 3899 BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 3900 .addReg(ptrA).addReg(ptrB); 3901 if (BinOpcode) 3902 BuildMI(BB, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 3903 BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 3904 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 3905 BuildMI(BB, TII->get(PPC::BCC)) 3906 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 3907 BB->addSuccessor(loopMBB); 3908 BB->addSuccessor(exitMBB); 3909 3910 // exitMBB: 3911 // ... 3912 BB = exitMBB; 3913 return BB; 3914} 3915 3916MachineBasicBlock * 3917PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 3918 MachineBasicBlock *BB, 3919 bool is8bit, // operation 3920 unsigned BinOpcode) { 3921 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 3922 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3923 // In 64 bit mode we have to use 64 bits for addresses, even though the 3924 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 3925 // registers without caring whether they're 32 or 64, but here we're 3926 // doing actual arithmetic on the addresses. 3927 bool is64bit = PPCSubTarget.isPPC64(); 3928 3929 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3930 MachineFunction *F = BB->getParent(); 3931 MachineFunction::iterator It = BB; 3932 ++It; 3933 3934 unsigned dest = MI->getOperand(0).getReg(); 3935 unsigned ptrA = MI->getOperand(1).getReg(); 3936 unsigned ptrB = MI->getOperand(2).getReg(); 3937 unsigned incr = MI->getOperand(3).getReg(); 3938 3939 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 3940 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 3941 F->insert(It, loopMBB); 3942 F->insert(It, exitMBB); 3943 exitMBB->transferSuccessors(BB); 3944 3945 MachineRegisterInfo &RegInfo = F->getRegInfo(); 3946 const TargetRegisterClass *RC = 3947 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 3948 (const TargetRegisterClass *) &PPC::GPRCRegClass; 3949 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 3950 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 3951 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 3952 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 3953 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 3954 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 3955 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 3956 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 3957 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 3958 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 3959 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 3960 unsigned Ptr1Reg; 3961 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 3962 3963 // thisMBB: 3964 // ... 3965 // fallthrough --> loopMBB 3966 BB->addSuccessor(loopMBB); 3967 3968 // The 4-byte load must be aligned, while a char or short may be 3969 // anywhere in the word. Hence all this nasty bookkeeping code. 3970 // add ptr1, ptrA, ptrB [copy if ptrA==0] 3971 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 3972 // xori shift, shift1, 24 [16] 3973 // rlwinm ptr, ptr1, 0, 0, 29 3974 // slw incr2, incr, shift 3975 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 3976 // slw mask, mask2, shift 3977 // loopMBB: 3978 // lwarx tmpDest, ptr 3979 // add tmp, tmpDest, incr2 3980 // andc tmp2, tmpDest, mask 3981 // and tmp3, tmp, mask 3982 // or tmp4, tmp3, tmp2 3983 // stwcx. tmp4, ptr 3984 // bne- loopMBB 3985 // fallthrough --> exitMBB 3986 // srw dest, tmpDest, shift 3987 3988 if (ptrA!=PPC::R0) { 3989 Ptr1Reg = RegInfo.createVirtualRegister(RC); 3990 BuildMI(BB, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 3991 .addReg(ptrA).addReg(ptrB); 3992 } else { 3993 Ptr1Reg = ptrB; 3994 } 3995 BuildMI(BB, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 3996 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 3997 BuildMI(BB, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 3998 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 3999 if (is64bit) 4000 BuildMI(BB, TII->get(PPC::RLDICR), PtrReg) 4001 .addReg(Ptr1Reg).addImm(0).addImm(61); 4002 else 4003 BuildMI(BB, TII->get(PPC::RLWINM), PtrReg) 4004 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 4005 BuildMI(BB, TII->get(PPC::SLW), Incr2Reg) 4006 .addReg(incr).addReg(ShiftReg); 4007 if (is8bit) 4008 BuildMI(BB, TII->get(PPC::LI), Mask2Reg).addImm(255); 4009 else { 4010 BuildMI(BB, TII->get(PPC::LI), Mask3Reg).addImm(0); 4011 BuildMI(BB, TII->get(PPC::ORI), Mask2Reg).addReg(Mask3Reg).addImm(65535); 4012 } 4013 BuildMI(BB, TII->get(PPC::SLW), MaskReg) 4014 .addReg(Mask2Reg).addReg(ShiftReg); 4015 4016 BB = loopMBB; 4017 BuildMI(BB, TII->get(PPC::LWARX), TmpDestReg) 4018 .addReg(PPC::R0).addReg(PtrReg); 4019 if (BinOpcode) 4020 BuildMI(BB, TII->get(BinOpcode), TmpReg) 4021 .addReg(Incr2Reg).addReg(TmpDestReg); 4022 BuildMI(BB, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 4023 .addReg(TmpDestReg).addReg(MaskReg); 4024 BuildMI(BB, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 4025 .addReg(TmpReg).addReg(MaskReg); 4026 BuildMI(BB, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 4027 .addReg(Tmp3Reg).addReg(Tmp2Reg); 4028 BuildMI(BB, TII->get(PPC::STWCX)) 4029 .addReg(Tmp4Reg).addReg(PPC::R0).addReg(PtrReg); 4030 BuildMI(BB, TII->get(PPC::BCC)) 4031 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 4032 BB->addSuccessor(loopMBB); 4033 BB->addSuccessor(exitMBB); 4034 4035 // exitMBB: 4036 // ... 4037 BB = exitMBB; 4038 BuildMI(BB, TII->get(PPC::SRW), dest).addReg(TmpDestReg).addReg(ShiftReg); 4039 return BB; 4040} 4041 4042MachineBasicBlock * 4043PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 4044 MachineBasicBlock *BB) { 4045 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4046 4047 // To "insert" these instructions we actually have to insert their 4048 // control-flow patterns. 4049 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4050 MachineFunction::iterator It = BB; 4051 ++It; 4052 4053 MachineFunction *F = BB->getParent(); 4054 4055 if (MI->getOpcode() == PPC::SELECT_CC_I4 || 4056 MI->getOpcode() == PPC::SELECT_CC_I8 || 4057 MI->getOpcode() == PPC::SELECT_CC_F4 || 4058 MI->getOpcode() == PPC::SELECT_CC_F8 || 4059 MI->getOpcode() == PPC::SELECT_CC_VRRC) { 4060 4061 // The incoming instruction knows the destination vreg to set, the 4062 // condition code register to branch on, the true/false values to 4063 // select between, and a branch opcode to use. 4064 4065 // thisMBB: 4066 // ... 4067 // TrueVal = ... 4068 // cmpTY ccX, r1, r2 4069 // bCC copy1MBB 4070 // fallthrough --> copy0MBB 4071 MachineBasicBlock *thisMBB = BB; 4072 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 4073 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 4074 unsigned SelectPred = MI->getOperand(4).getImm(); 4075 BuildMI(BB, TII->get(PPC::BCC)) 4076 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 4077 F->insert(It, copy0MBB); 4078 F->insert(It, sinkMBB); 4079 // Update machine-CFG edges by transferring all successors of the current 4080 // block to the new block which will contain the Phi node for the select. 4081 sinkMBB->transferSuccessors(BB); 4082 // Next, add the true and fallthrough blocks as its successors. 4083 BB->addSuccessor(copy0MBB); 4084 BB->addSuccessor(sinkMBB); 4085 4086 // copy0MBB: 4087 // %FalseValue = ... 4088 // # fallthrough to sinkMBB 4089 BB = copy0MBB; 4090 4091 // Update machine-CFG edges 4092 BB->addSuccessor(sinkMBB); 4093 4094 // sinkMBB: 4095 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4096 // ... 4097 BB = sinkMBB; 4098 BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg()) 4099 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 4100 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4101 } 4102 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 4103 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 4104 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 4105 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 4106 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 4107 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 4108 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 4109 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 4110 4111 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 4112 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 4113 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 4114 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 4115 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 4116 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 4117 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 4118 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 4119 4120 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 4121 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 4122 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 4123 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 4124 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 4125 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 4126 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 4127 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 4128 4129 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 4130 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 4131 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 4132 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 4133 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 4134 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 4135 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 4136 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 4137 4138 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 4139 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); 4140 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 4141 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); 4142 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 4143 BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); 4144 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 4145 BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); 4146 4147 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 4148 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 4149 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 4150 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 4151 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 4152 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 4153 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 4154 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 4155 4156 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 4157 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 4158 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 4159 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 4160 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 4161 BB = EmitAtomicBinary(MI, BB, false, 0); 4162 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 4163 BB = EmitAtomicBinary(MI, BB, true, 0); 4164 4165 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 4166 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 4167 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 4168 4169 unsigned dest = MI->getOperand(0).getReg(); 4170 unsigned ptrA = MI->getOperand(1).getReg(); 4171 unsigned ptrB = MI->getOperand(2).getReg(); 4172 unsigned oldval = MI->getOperand(3).getReg(); 4173 unsigned newval = MI->getOperand(4).getReg(); 4174 4175 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 4176 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 4177 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 4178 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 4179 F->insert(It, loop1MBB); 4180 F->insert(It, loop2MBB); 4181 F->insert(It, midMBB); 4182 F->insert(It, exitMBB); 4183 exitMBB->transferSuccessors(BB); 4184 4185 // thisMBB: 4186 // ... 4187 // fallthrough --> loopMBB 4188 BB->addSuccessor(loop1MBB); 4189 4190 // loop1MBB: 4191 // l[wd]arx dest, ptr 4192 // cmp[wd] dest, oldval 4193 // bne- midMBB 4194 // loop2MBB: 4195 // st[wd]cx. newval, ptr 4196 // bne- loopMBB 4197 // b exitBB 4198 // midMBB: 4199 // st[wd]cx. dest, ptr 4200 // exitBB: 4201 BB = loop1MBB; 4202 BuildMI(BB, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 4203 .addReg(ptrA).addReg(ptrB); 4204 BuildMI(BB, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 4205 .addReg(oldval).addReg(dest); 4206 BuildMI(BB, TII->get(PPC::BCC)) 4207 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 4208 BB->addSuccessor(loop2MBB); 4209 BB->addSuccessor(midMBB); 4210 4211 BB = loop2MBB; 4212 BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 4213 .addReg(newval).addReg(ptrA).addReg(ptrB); 4214 BuildMI(BB, TII->get(PPC::BCC)) 4215 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 4216 BuildMI(BB, TII->get(PPC::B)).addMBB(exitMBB); 4217 BB->addSuccessor(loop1MBB); 4218 BB->addSuccessor(exitMBB); 4219 4220 BB = midMBB; 4221 BuildMI(BB, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 4222 .addReg(dest).addReg(ptrA).addReg(ptrB); 4223 BB->addSuccessor(exitMBB); 4224 4225 // exitMBB: 4226 // ... 4227 BB = exitMBB; 4228 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 4229 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 4230 // We must use 64-bit registers for addresses when targeting 64-bit, 4231 // since we're actually doing arithmetic on them. Other registers 4232 // can be 32-bit. 4233 bool is64bit = PPCSubTarget.isPPC64(); 4234 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 4235 4236 unsigned dest = MI->getOperand(0).getReg(); 4237 unsigned ptrA = MI->getOperand(1).getReg(); 4238 unsigned ptrB = MI->getOperand(2).getReg(); 4239 unsigned oldval = MI->getOperand(3).getReg(); 4240 unsigned newval = MI->getOperand(4).getReg(); 4241 4242 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 4243 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 4244 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 4245 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 4246 F->insert(It, loop1MBB); 4247 F->insert(It, loop2MBB); 4248 F->insert(It, midMBB); 4249 F->insert(It, exitMBB); 4250 exitMBB->transferSuccessors(BB); 4251 4252 MachineRegisterInfo &RegInfo = F->getRegInfo(); 4253 const TargetRegisterClass *RC = 4254 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 4255 (const TargetRegisterClass *) &PPC::GPRCRegClass; 4256 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 4257 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 4258 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 4259 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 4260 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 4261 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 4262 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 4263 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 4264 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 4265 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 4266 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 4267 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 4268 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 4269 unsigned Ptr1Reg; 4270 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 4271 // thisMBB: 4272 // ... 4273 // fallthrough --> loopMBB 4274 BB->addSuccessor(loop1MBB); 4275 4276 // The 4-byte load must be aligned, while a char or short may be 4277 // anywhere in the word. Hence all this nasty bookkeeping code. 4278 // add ptr1, ptrA, ptrB [copy if ptrA==0] 4279 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 4280 // xori shift, shift1, 24 [16] 4281 // rlwinm ptr, ptr1, 0, 0, 29 4282 // slw newval2, newval, shift 4283 // slw oldval2, oldval,shift 4284 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 4285 // slw mask, mask2, shift 4286 // and newval3, newval2, mask 4287 // and oldval3, oldval2, mask 4288 // loop1MBB: 4289 // lwarx tmpDest, ptr 4290 // and tmp, tmpDest, mask 4291 // cmpw tmp, oldval3 4292 // bne- midMBB 4293 // loop2MBB: 4294 // andc tmp2, tmpDest, mask 4295 // or tmp4, tmp2, newval3 4296 // stwcx. tmp4, ptr 4297 // bne- loop1MBB 4298 // b exitBB 4299 // midMBB: 4300 // stwcx. tmpDest, ptr 4301 // exitBB: 4302 // srw dest, tmpDest, shift 4303 if (ptrA!=PPC::R0) { 4304 Ptr1Reg = RegInfo.createVirtualRegister(RC); 4305 BuildMI(BB, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 4306 .addReg(ptrA).addReg(ptrB); 4307 } else { 4308 Ptr1Reg = ptrB; 4309 } 4310 BuildMI(BB, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 4311 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 4312 BuildMI(BB, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 4313 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 4314 if (is64bit) 4315 BuildMI(BB, TII->get(PPC::RLDICR), PtrReg) 4316 .addReg(Ptr1Reg).addImm(0).addImm(61); 4317 else 4318 BuildMI(BB, TII->get(PPC::RLWINM), PtrReg) 4319 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 4320 BuildMI(BB, TII->get(PPC::SLW), NewVal2Reg) 4321 .addReg(newval).addReg(ShiftReg); 4322 BuildMI(BB, TII->get(PPC::SLW), OldVal2Reg) 4323 .addReg(oldval).addReg(ShiftReg); 4324 if (is8bit) 4325 BuildMI(BB, TII->get(PPC::LI), Mask2Reg).addImm(255); 4326 else { 4327 BuildMI(BB, TII->get(PPC::LI), Mask3Reg).addImm(0); 4328 BuildMI(BB, TII->get(PPC::ORI), Mask2Reg).addReg(Mask3Reg).addImm(65535); 4329 } 4330 BuildMI(BB, TII->get(PPC::SLW), MaskReg) 4331 .addReg(Mask2Reg).addReg(ShiftReg); 4332 BuildMI(BB, TII->get(PPC::AND), NewVal3Reg) 4333 .addReg(NewVal2Reg).addReg(MaskReg); 4334 BuildMI(BB, TII->get(PPC::AND), OldVal3Reg) 4335 .addReg(OldVal2Reg).addReg(MaskReg); 4336 4337 BB = loop1MBB; 4338 BuildMI(BB, TII->get(PPC::LWARX), TmpDestReg) 4339 .addReg(PPC::R0).addReg(PtrReg); 4340 BuildMI(BB, TII->get(PPC::AND),TmpReg).addReg(TmpDestReg).addReg(MaskReg); 4341 BuildMI(BB, TII->get(PPC::CMPW), PPC::CR0) 4342 .addReg(TmpReg).addReg(OldVal3Reg); 4343 BuildMI(BB, TII->get(PPC::BCC)) 4344 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 4345 BB->addSuccessor(loop2MBB); 4346 BB->addSuccessor(midMBB); 4347 4348 BB = loop2MBB; 4349 BuildMI(BB, TII->get(PPC::ANDC),Tmp2Reg).addReg(TmpDestReg).addReg(MaskReg); 4350 BuildMI(BB, TII->get(PPC::OR),Tmp4Reg).addReg(Tmp2Reg).addReg(NewVal3Reg); 4351 BuildMI(BB, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 4352 .addReg(PPC::R0).addReg(PtrReg); 4353 BuildMI(BB, TII->get(PPC::BCC)) 4354 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 4355 BuildMI(BB, TII->get(PPC::B)).addMBB(exitMBB); 4356 BB->addSuccessor(loop1MBB); 4357 BB->addSuccessor(exitMBB); 4358 4359 BB = midMBB; 4360 BuildMI(BB, TII->get(PPC::STWCX)).addReg(TmpDestReg) 4361 .addReg(PPC::R0).addReg(PtrReg); 4362 BB->addSuccessor(exitMBB); 4363 4364 // exitMBB: 4365 // ... 4366 BB = exitMBB; 4367 BuildMI(BB, TII->get(PPC::SRW),dest).addReg(TmpReg).addReg(ShiftReg); 4368 } else { 4369 assert(0 && "Unexpected instr type to insert"); 4370 } 4371 4372 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 4373 return BB; 4374} 4375 4376//===----------------------------------------------------------------------===// 4377// Target Optimization Hooks 4378//===----------------------------------------------------------------------===// 4379 4380SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 4381 DAGCombinerInfo &DCI) const { 4382 TargetMachine &TM = getTargetMachine(); 4383 SelectionDAG &DAG = DCI.DAG; 4384 switch (N->getOpcode()) { 4385 default: break; 4386 case PPCISD::SHL: 4387 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 4388 if (C->getValue() == 0) // 0 << V -> 0. 4389 return N->getOperand(0); 4390 } 4391 break; 4392 case PPCISD::SRL: 4393 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 4394 if (C->getValue() == 0) // 0 >>u V -> 0. 4395 return N->getOperand(0); 4396 } 4397 break; 4398 case PPCISD::SRA: 4399 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 4400 if (C->getValue() == 0 || // 0 >>s V -> 0. 4401 C->isAllOnesValue()) // -1 >>s V -> -1. 4402 return N->getOperand(0); 4403 } 4404 break; 4405 4406 case ISD::SINT_TO_FP: 4407 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 4408 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 4409 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 4410 // We allow the src/dst to be either f32/f64, but the intermediate 4411 // type must be i64. 4412 if (N->getOperand(0).getValueType() == MVT::i64 && 4413 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 4414 SDValue Val = N->getOperand(0).getOperand(0); 4415 if (Val.getValueType() == MVT::f32) { 4416 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 4417 DCI.AddToWorklist(Val.getNode()); 4418 } 4419 4420 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 4421 DCI.AddToWorklist(Val.getNode()); 4422 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 4423 DCI.AddToWorklist(Val.getNode()); 4424 if (N->getValueType(0) == MVT::f32) { 4425 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val, 4426 DAG.getIntPtrConstant(0)); 4427 DCI.AddToWorklist(Val.getNode()); 4428 } 4429 return Val; 4430 } else if (N->getOperand(0).getValueType() == MVT::i32) { 4431 // If the intermediate type is i32, we can avoid the load/store here 4432 // too. 4433 } 4434 } 4435 } 4436 break; 4437 case ISD::STORE: 4438 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 4439 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 4440 !cast<StoreSDNode>(N)->isTruncatingStore() && 4441 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 4442 N->getOperand(1).getValueType() == MVT::i32 && 4443 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 4444 SDValue Val = N->getOperand(1).getOperand(0); 4445 if (Val.getValueType() == MVT::f32) { 4446 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 4447 DCI.AddToWorklist(Val.getNode()); 4448 } 4449 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 4450 DCI.AddToWorklist(Val.getNode()); 4451 4452 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 4453 N->getOperand(2), N->getOperand(3)); 4454 DCI.AddToWorklist(Val.getNode()); 4455 return Val; 4456 } 4457 4458 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 4459 if (N->getOperand(1).getOpcode() == ISD::BSWAP && 4460 N->getOperand(1).getNode()->hasOneUse() && 4461 (N->getOperand(1).getValueType() == MVT::i32 || 4462 N->getOperand(1).getValueType() == MVT::i16)) { 4463 SDValue BSwapOp = N->getOperand(1).getOperand(0); 4464 // Do an any-extend to 32-bits if this is a half-word input. 4465 if (BSwapOp.getValueType() == MVT::i16) 4466 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp); 4467 4468 return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp, 4469 N->getOperand(2), N->getOperand(3), 4470 DAG.getValueType(N->getOperand(1).getValueType())); 4471 } 4472 break; 4473 case ISD::BSWAP: 4474 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 4475 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 4476 N->getOperand(0).hasOneUse() && 4477 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { 4478 SDValue Load = N->getOperand(0); 4479 LoadSDNode *LD = cast<LoadSDNode>(Load); 4480 // Create the byte-swapping load. 4481 std::vector<MVT> VTs; 4482 VTs.push_back(MVT::i32); 4483 VTs.push_back(MVT::Other); 4484 SDValue MO = DAG.getMemOperand(LD->getMemOperand()); 4485 SDValue Ops[] = { 4486 LD->getChain(), // Chain 4487 LD->getBasePtr(), // Ptr 4488 MO, // MemOperand 4489 DAG.getValueType(N->getValueType(0)) // VT 4490 }; 4491 SDValue BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4); 4492 4493 // If this is an i16 load, insert the truncate. 4494 SDValue ResVal = BSLoad; 4495 if (N->getValueType(0) == MVT::i16) 4496 ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad); 4497 4498 // First, combine the bswap away. This makes the value produced by the 4499 // load dead. 4500 DCI.CombineTo(N, ResVal); 4501 4502 // Next, combine the load away, we give it a bogus result value but a real 4503 // chain result. The result value is dead because the bswap is dead. 4504 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 4505 4506 // Return N so it doesn't get rechecked! 4507 return SDValue(N, 0); 4508 } 4509 4510 break; 4511 case PPCISD::VCMP: { 4512 // If a VCMPo node already exists with exactly the same operands as this 4513 // node, use its result instead of this node (VCMPo computes both a CR6 and 4514 // a normal output). 4515 // 4516 if (!N->getOperand(0).hasOneUse() && 4517 !N->getOperand(1).hasOneUse() && 4518 !N->getOperand(2).hasOneUse()) { 4519 4520 // Scan all of the users of the LHS, looking for VCMPo's that match. 4521 SDNode *VCMPoNode = 0; 4522 4523 SDNode *LHSN = N->getOperand(0).getNode(); 4524 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 4525 UI != E; ++UI) 4526 if (UI->getOpcode() == PPCISD::VCMPo && 4527 UI->getOperand(1) == N->getOperand(1) && 4528 UI->getOperand(2) == N->getOperand(2) && 4529 UI->getOperand(0) == N->getOperand(0)) { 4530 VCMPoNode = *UI; 4531 break; 4532 } 4533 4534 // If there is no VCMPo node, or if the flag value has a single use, don't 4535 // transform this. 4536 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 4537 break; 4538 4539 // Look at the (necessarily single) use of the flag value. If it has a 4540 // chain, this transformation is more complex. Note that multiple things 4541 // could use the value result, which we should ignore. 4542 SDNode *FlagUser = 0; 4543 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 4544 FlagUser == 0; ++UI) { 4545 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 4546 SDNode *User = *UI; 4547 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 4548 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 4549 FlagUser = User; 4550 break; 4551 } 4552 } 4553 } 4554 4555 // If the user is a MFCR instruction, we know this is safe. Otherwise we 4556 // give up for right now. 4557 if (FlagUser->getOpcode() == PPCISD::MFCR) 4558 return SDValue(VCMPoNode, 0); 4559 } 4560 break; 4561 } 4562 case ISD::BR_CC: { 4563 // If this is a branch on an altivec predicate comparison, lower this so 4564 // that we don't have to do a MFCR: instead, branch directly on CR6. This 4565 // lowering is done pre-legalize, because the legalizer lowers the predicate 4566 // compare down to code that is difficult to reassemble. 4567 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 4568 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 4569 int CompareOpc; 4570 bool isDot; 4571 4572 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 4573 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 4574 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 4575 assert(isDot && "Can't compare against a vector result!"); 4576 4577 // If this is a comparison against something other than 0/1, then we know 4578 // that the condition is never/always true. 4579 unsigned Val = cast<ConstantSDNode>(RHS)->getValue(); 4580 if (Val != 0 && Val != 1) { 4581 if (CC == ISD::SETEQ) // Cond never true, remove branch. 4582 return N->getOperand(0); 4583 // Always !=, turn it into an unconditional branch. 4584 return DAG.getNode(ISD::BR, MVT::Other, 4585 N->getOperand(0), N->getOperand(4)); 4586 } 4587 4588 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 4589 4590 // Create the PPCISD altivec 'dot' comparison node. 4591 std::vector<MVT> VTs; 4592 SDValue Ops[] = { 4593 LHS.getOperand(2), // LHS of compare 4594 LHS.getOperand(3), // RHS of compare 4595 DAG.getConstant(CompareOpc, MVT::i32) 4596 }; 4597 VTs.push_back(LHS.getOperand(2).getValueType()); 4598 VTs.push_back(MVT::Flag); 4599 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 4600 4601 // Unpack the result based on how the target uses it. 4602 PPC::Predicate CompOpc; 4603 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) { 4604 default: // Can't happen, don't crash on invalid number though. 4605 case 0: // Branch on the value of the EQ bit of CR6. 4606 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 4607 break; 4608 case 1: // Branch on the inverted value of the EQ bit of CR6. 4609 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 4610 break; 4611 case 2: // Branch on the value of the LT bit of CR6. 4612 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 4613 break; 4614 case 3: // Branch on the inverted value of the LT bit of CR6. 4615 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 4616 break; 4617 } 4618 4619 return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0), 4620 DAG.getConstant(CompOpc, MVT::i32), 4621 DAG.getRegister(PPC::CR6, MVT::i32), 4622 N->getOperand(4), CompNode.getValue(1)); 4623 } 4624 break; 4625 } 4626 } 4627 4628 return SDValue(); 4629} 4630 4631//===----------------------------------------------------------------------===// 4632// Inline Assembly Support 4633//===----------------------------------------------------------------------===// 4634 4635void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 4636 const APInt &Mask, 4637 APInt &KnownZero, 4638 APInt &KnownOne, 4639 const SelectionDAG &DAG, 4640 unsigned Depth) const { 4641 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 4642 switch (Op.getOpcode()) { 4643 default: break; 4644 case PPCISD::LBRX: { 4645 // lhbrx is known to have the top bits cleared out. 4646 if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16) 4647 KnownZero = 0xFFFF0000; 4648 break; 4649 } 4650 case ISD::INTRINSIC_WO_CHAIN: { 4651 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 4652 default: break; 4653 case Intrinsic::ppc_altivec_vcmpbfp_p: 4654 case Intrinsic::ppc_altivec_vcmpeqfp_p: 4655 case Intrinsic::ppc_altivec_vcmpequb_p: 4656 case Intrinsic::ppc_altivec_vcmpequh_p: 4657 case Intrinsic::ppc_altivec_vcmpequw_p: 4658 case Intrinsic::ppc_altivec_vcmpgefp_p: 4659 case Intrinsic::ppc_altivec_vcmpgtfp_p: 4660 case Intrinsic::ppc_altivec_vcmpgtsb_p: 4661 case Intrinsic::ppc_altivec_vcmpgtsh_p: 4662 case Intrinsic::ppc_altivec_vcmpgtsw_p: 4663 case Intrinsic::ppc_altivec_vcmpgtub_p: 4664 case Intrinsic::ppc_altivec_vcmpgtuh_p: 4665 case Intrinsic::ppc_altivec_vcmpgtuw_p: 4666 KnownZero = ~1U; // All bits but the low one are known to be zero. 4667 break; 4668 } 4669 } 4670 } 4671} 4672 4673 4674/// getConstraintType - Given a constraint, return the type of 4675/// constraint it is for this target. 4676PPCTargetLowering::ConstraintType 4677PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 4678 if (Constraint.size() == 1) { 4679 switch (Constraint[0]) { 4680 default: break; 4681 case 'b': 4682 case 'r': 4683 case 'f': 4684 case 'v': 4685 case 'y': 4686 return C_RegisterClass; 4687 } 4688 } 4689 return TargetLowering::getConstraintType(Constraint); 4690} 4691 4692std::pair<unsigned, const TargetRegisterClass*> 4693PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4694 MVT VT) const { 4695 if (Constraint.size() == 1) { 4696 // GCC RS6000 Constraint Letters 4697 switch (Constraint[0]) { 4698 case 'b': // R1-R31 4699 case 'r': // R0-R31 4700 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 4701 return std::make_pair(0U, PPC::G8RCRegisterClass); 4702 return std::make_pair(0U, PPC::GPRCRegisterClass); 4703 case 'f': 4704 if (VT == MVT::f32) 4705 return std::make_pair(0U, PPC::F4RCRegisterClass); 4706 else if (VT == MVT::f64) 4707 return std::make_pair(0U, PPC::F8RCRegisterClass); 4708 break; 4709 case 'v': 4710 return std::make_pair(0U, PPC::VRRCRegisterClass); 4711 case 'y': // crrc 4712 return std::make_pair(0U, PPC::CRRCRegisterClass); 4713 } 4714 } 4715 4716 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4717} 4718 4719 4720/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 4721/// vector. If it is invalid, don't add anything to Ops. 4722void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, char Letter, 4723 std::vector<SDValue>&Ops, 4724 SelectionDAG &DAG) const { 4725 SDValue Result(0,0); 4726 switch (Letter) { 4727 default: break; 4728 case 'I': 4729 case 'J': 4730 case 'K': 4731 case 'L': 4732 case 'M': 4733 case 'N': 4734 case 'O': 4735 case 'P': { 4736 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 4737 if (!CST) return; // Must be an immediate to match. 4738 unsigned Value = CST->getValue(); 4739 switch (Letter) { 4740 default: assert(0 && "Unknown constraint letter!"); 4741 case 'I': // "I" is a signed 16-bit constant. 4742 if ((short)Value == (int)Value) 4743 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4744 break; 4745 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 4746 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 4747 if ((short)Value == 0) 4748 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4749 break; 4750 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 4751 if ((Value >> 16) == 0) 4752 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4753 break; 4754 case 'M': // "M" is a constant that is greater than 31. 4755 if (Value > 31) 4756 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4757 break; 4758 case 'N': // "N" is a positive constant that is an exact power of two. 4759 if ((int)Value > 0 && isPowerOf2_32(Value)) 4760 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4761 break; 4762 case 'O': // "O" is the constant zero. 4763 if (Value == 0) 4764 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4765 break; 4766 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 4767 if ((short)-Value == (int)-Value) 4768 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4769 break; 4770 } 4771 break; 4772 } 4773 } 4774 4775 if (Result.getNode()) { 4776 Ops.push_back(Result); 4777 return; 4778 } 4779 4780 // Handle standard constraint letters. 4781 TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG); 4782} 4783 4784// isLegalAddressingMode - Return true if the addressing mode represented 4785// by AM is legal for this target, for a load/store of the specified type. 4786bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 4787 const Type *Ty) const { 4788 // FIXME: PPC does not allow r+i addressing modes for vectors! 4789 4790 // PPC allows a sign-extended 16-bit immediate field. 4791 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 4792 return false; 4793 4794 // No global is ever allowed as a base. 4795 if (AM.BaseGV) 4796 return false; 4797 4798 // PPC only support r+r, 4799 switch (AM.Scale) { 4800 case 0: // "r+i" or just "i", depending on HasBaseReg. 4801 break; 4802 case 1: 4803 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 4804 return false; 4805 // Otherwise we have r+r or r+i. 4806 break; 4807 case 2: 4808 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 4809 return false; 4810 // Allow 2*r as r+r. 4811 break; 4812 default: 4813 // No other scales are supported. 4814 return false; 4815 } 4816 4817 return true; 4818} 4819 4820/// isLegalAddressImmediate - Return true if the integer value can be used 4821/// as the offset of the target addressing mode for load / store of the 4822/// given type. 4823bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ 4824 // PPC allows a sign-extended 16-bit immediate field. 4825 return (V > -(1 << 16) && V < (1 << 16)-1); 4826} 4827 4828bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 4829 return false; 4830} 4831 4832SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { 4833 // Depths > 0 not supported yet! 4834 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4835 return SDValue(); 4836 4837 MachineFunction &MF = DAG.getMachineFunction(); 4838 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 4839 4840 // Just load the return address off the stack. 4841 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 4842 4843 // Make sure the function really does not optimize away the store of the RA 4844 // to the stack. 4845 FuncInfo->setLRStoreRequired(); 4846 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4847} 4848 4849SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { 4850 // Depths > 0 not supported yet! 4851 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4852 return SDValue(); 4853 4854 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4855 bool isPPC64 = PtrVT == MVT::i64; 4856 4857 MachineFunction &MF = DAG.getMachineFunction(); 4858 MachineFrameInfo *MFI = MF.getFrameInfo(); 4859 bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) 4860 && MFI->getStackSize(); 4861 4862 if (isPPC64) 4863 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1, 4864 MVT::i64); 4865 else 4866 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::R31 : PPC::R1, 4867 MVT::i32); 4868} 4869