1//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the SystemZSelectionDAGInfo class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "SystemZTargetMachine.h" 15#include "llvm/CodeGen/SelectionDAG.h" 16 17using namespace llvm; 18 19#define DEBUG_TYPE "systemz-selectiondag-info" 20 21// Decide whether it is best to use a loop or straight-line code for 22// a block operation of Size bytes with source address Src and destination 23// address Dest. Sequence is the opcode to use for straight-line code 24// (such as MVC) and Loop is the opcode to use for loops (such as MVC_LOOP). 25// Return the chain for the completed operation. 26static SDValue emitMemMem(SelectionDAG &DAG, const SDLoc &DL, unsigned Sequence, 27 unsigned Loop, SDValue Chain, SDValue Dst, 28 SDValue Src, uint64_t Size) { 29 EVT PtrVT = Src.getValueType(); 30 // The heuristic we use is to prefer loops for anything that would 31 // require 7 or more MVCs. With these kinds of sizes there isn't 32 // much to choose between straight-line code and looping code, 33 // since the time will be dominated by the MVCs themselves. 34 // However, the loop has 4 or 5 instructions (depending on whether 35 // the base addresses can be proved equal), so there doesn't seem 36 // much point using a loop for 5 * 256 bytes or fewer. Anything in 37 // the range (5 * 256, 6 * 256) will need another instruction after 38 // the loop, so it doesn't seem worth using a loop then either. 39 // The next value up, 6 * 256, can be implemented in the same 40 // number of straight-line MVCs as 6 * 256 - 1. 41 if (Size > 6 * 256) 42 return DAG.getNode(Loop, DL, MVT::Other, Chain, Dst, Src, 43 DAG.getConstant(Size, DL, PtrVT), 44 DAG.getConstant(Size / 256, DL, PtrVT)); 45 return DAG.getNode(Sequence, DL, MVT::Other, Chain, Dst, Src, 46 DAG.getConstant(Size, DL, PtrVT)); 47} 48 49SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemcpy( 50 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, SDValue Src, 51 SDValue Size, unsigned Align, bool IsVolatile, bool AlwaysInline, 52 MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { 53 if (IsVolatile) 54 return SDValue(); 55 56 if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) 57 return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, 58 Chain, Dst, Src, CSize->getZExtValue()); 59 return SDValue(); 60} 61 62// Handle a memset of 1, 2, 4 or 8 bytes with the operands given by 63// Chain, Dst, ByteVal and Size. These cases are expected to use 64// MVI, MVHHI, MVHI and MVGHI respectively. 65static SDValue memsetStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, 66 SDValue Dst, uint64_t ByteVal, uint64_t Size, 67 unsigned Align, MachinePointerInfo DstPtrInfo) { 68 uint64_t StoreVal = ByteVal; 69 for (unsigned I = 1; I < Size; ++I) 70 StoreVal |= ByteVal << (I * 8); 71 return DAG.getStore(Chain, DL, 72 DAG.getConstant(StoreVal, DL, 73 MVT::getIntegerVT(Size * 8)), 74 Dst, DstPtrInfo, false, false, Align); 75} 76 77SDValue SystemZSelectionDAGInfo::EmitTargetCodeForMemset( 78 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dst, 79 SDValue Byte, SDValue Size, unsigned Align, bool IsVolatile, 80 MachinePointerInfo DstPtrInfo) const { 81 EVT PtrVT = Dst.getValueType(); 82 83 if (IsVolatile) 84 return SDValue(); 85 86 if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { 87 uint64_t Bytes = CSize->getZExtValue(); 88 if (Bytes == 0) 89 return SDValue(); 90 if (auto *CByte = dyn_cast<ConstantSDNode>(Byte)) { 91 // Handle cases that can be done using at most two of 92 // MVI, MVHI, MVHHI and MVGHI. The latter two can only be 93 // used if ByteVal is all zeros or all ones; in other casees, 94 // we can move at most 2 halfwords. 95 uint64_t ByteVal = CByte->getZExtValue(); 96 if (ByteVal == 0 || ByteVal == 255 ? 97 Bytes <= 16 && countPopulation(Bytes) <= 2 : 98 Bytes <= 4) { 99 unsigned Size1 = Bytes == 16 ? 8 : 1 << findLastSet(Bytes); 100 unsigned Size2 = Bytes - Size1; 101 SDValue Chain1 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size1, 102 Align, DstPtrInfo); 103 if (Size2 == 0) 104 return Chain1; 105 Dst = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 106 DAG.getConstant(Size1, DL, PtrVT)); 107 DstPtrInfo = DstPtrInfo.getWithOffset(Size1); 108 SDValue Chain2 = memsetStore(DAG, DL, Chain, Dst, ByteVal, Size2, 109 std::min(Align, Size1), DstPtrInfo); 110 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); 111 } 112 } else { 113 // Handle one and two bytes using STC. 114 if (Bytes <= 2) { 115 SDValue Chain1 = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, 116 false, false, Align); 117 if (Bytes == 1) 118 return Chain1; 119 SDValue Dst2 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 120 DAG.getConstant(1, DL, PtrVT)); 121 SDValue Chain2 = DAG.getStore(Chain, DL, Byte, Dst2, 122 DstPtrInfo.getWithOffset(1), 123 false, false, 1); 124 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain1, Chain2); 125 } 126 } 127 assert(Bytes >= 2 && "Should have dealt with 0- and 1-byte cases already"); 128 129 // Handle the special case of a memset of 0, which can use XC. 130 auto *CByte = dyn_cast<ConstantSDNode>(Byte); 131 if (CByte && CByte->getZExtValue() == 0) 132 return emitMemMem(DAG, DL, SystemZISD::XC, SystemZISD::XC_LOOP, 133 Chain, Dst, Dst, Bytes); 134 135 // Copy the byte to the first location and then use MVC to copy 136 // it to the rest. 137 Chain = DAG.getStore(Chain, DL, Byte, Dst, DstPtrInfo, 138 false, false, Align); 139 SDValue DstPlus1 = DAG.getNode(ISD::ADD, DL, PtrVT, Dst, 140 DAG.getConstant(1, DL, PtrVT)); 141 return emitMemMem(DAG, DL, SystemZISD::MVC, SystemZISD::MVC_LOOP, 142 Chain, DstPlus1, Dst, Bytes - 1); 143 } 144 return SDValue(); 145} 146 147// Use CLC to compare [Src1, Src1 + Size) with [Src2, Src2 + Size), 148// deciding whether to use a loop or straight-line code. 149static SDValue emitCLC(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, 150 SDValue Src1, SDValue Src2, uint64_t Size) { 151 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 152 EVT PtrVT = Src1.getValueType(); 153 // A two-CLC sequence is a clear win over a loop, not least because it 154 // needs only one branch. A three-CLC sequence needs the same number 155 // of branches as a loop (i.e. 2), but is shorter. That brings us to 156 // lengths greater than 768 bytes. It seems relatively likely that 157 // a difference will be found within the first 768 bytes, so we just 158 // optimize for the smallest number of branch instructions, in order 159 // to avoid polluting the prediction buffer too much. A loop only ever 160 // needs 2 branches, whereas a straight-line sequence would need 3 or more. 161 if (Size > 3 * 256) 162 return DAG.getNode(SystemZISD::CLC_LOOP, DL, VTs, Chain, Src1, Src2, 163 DAG.getConstant(Size, DL, PtrVT), 164 DAG.getConstant(Size / 256, DL, PtrVT)); 165 return DAG.getNode(SystemZISD::CLC, DL, VTs, Chain, Src1, Src2, 166 DAG.getConstant(Size, DL, PtrVT)); 167} 168 169// Convert the current CC value into an integer that is 0 if CC == 0, 170// less than zero if CC == 1 and greater than zero if CC >= 2. 171// The sequence starts with IPM, which puts CC into bits 29 and 28 172// of an integer and clears bits 30 and 31. 173static SDValue addIPMSequence(const SDLoc &DL, SDValue Glue, 174 SelectionDAG &DAG) { 175 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue); 176 SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 177 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); 178 SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL, 179 DAG.getConstant(31, DL, MVT::i32)); 180 return ROTL; 181} 182 183std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemcmp( 184 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, 185 SDValue Src2, SDValue Size, MachinePointerInfo Op1PtrInfo, 186 MachinePointerInfo Op2PtrInfo) const { 187 if (auto *CSize = dyn_cast<ConstantSDNode>(Size)) { 188 uint64_t Bytes = CSize->getZExtValue(); 189 assert(Bytes > 0 && "Caller should have handled 0-size case"); 190 Chain = emitCLC(DAG, DL, Chain, Src1, Src2, Bytes); 191 SDValue Glue = Chain.getValue(1); 192 return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); 193 } 194 return std::make_pair(SDValue(), SDValue()); 195} 196 197std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForMemchr( 198 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, 199 SDValue Char, SDValue Length, MachinePointerInfo SrcPtrInfo) const { 200 // Use SRST to find the character. End is its address on success. 201 EVT PtrVT = Src.getValueType(); 202 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); 203 Length = DAG.getZExtOrTrunc(Length, DL, PtrVT); 204 Char = DAG.getZExtOrTrunc(Char, DL, MVT::i32); 205 Char = DAG.getNode(ISD::AND, DL, MVT::i32, Char, 206 DAG.getConstant(255, DL, MVT::i32)); 207 SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, Length); 208 SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, 209 Limit, Src, Char); 210 Chain = End.getValue(1); 211 SDValue Glue = End.getValue(2); 212 213 // Now select between End and null, depending on whether the character 214 // was found. 215 SDValue Ops[] = {End, DAG.getConstant(0, DL, PtrVT), 216 DAG.getConstant(SystemZ::CCMASK_SRST, DL, MVT::i32), 217 DAG.getConstant(SystemZ::CCMASK_SRST_FOUND, DL, MVT::i32), 218 Glue}; 219 VTs = DAG.getVTList(PtrVT, MVT::Glue); 220 End = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VTs, Ops); 221 return std::make_pair(End, Chain); 222} 223 224std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcpy( 225 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Dest, 226 SDValue Src, MachinePointerInfo DestPtrInfo, MachinePointerInfo SrcPtrInfo, 227 bool isStpcpy) const { 228 SDVTList VTs = DAG.getVTList(Dest.getValueType(), MVT::Other); 229 SDValue EndDest = DAG.getNode(SystemZISD::STPCPY, DL, VTs, Chain, Dest, Src, 230 DAG.getConstant(0, DL, MVT::i32)); 231 return std::make_pair(isStpcpy ? EndDest : Dest, EndDest.getValue(1)); 232} 233 234std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrcmp( 235 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src1, 236 SDValue Src2, MachinePointerInfo Op1PtrInfo, 237 MachinePointerInfo Op2PtrInfo) const { 238 SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue); 239 SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2, 240 DAG.getConstant(0, DL, MVT::i32)); 241 Chain = Unused.getValue(1); 242 SDValue Glue = Chain.getValue(2); 243 return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain); 244} 245 246// Search from Src for a null character, stopping once Src reaches Limit. 247// Return a pair of values, the first being the number of nonnull characters 248// and the second being the out chain. 249// 250// This can be used for strlen by setting Limit to 0. 251static std::pair<SDValue, SDValue> getBoundedStrlen(SelectionDAG &DAG, 252 const SDLoc &DL, 253 SDValue Chain, SDValue Src, 254 SDValue Limit) { 255 EVT PtrVT = Src.getValueType(); 256 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other, MVT::Glue); 257 SDValue End = DAG.getNode(SystemZISD::SEARCH_STRING, DL, VTs, Chain, 258 Limit, Src, DAG.getConstant(0, DL, MVT::i32)); 259 Chain = End.getValue(1); 260 SDValue Len = DAG.getNode(ISD::SUB, DL, PtrVT, End, Src); 261 return std::make_pair(Len, Chain); 262} 263 264std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrlen( 265 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, 266 MachinePointerInfo SrcPtrInfo) const { 267 EVT PtrVT = Src.getValueType(); 268 return getBoundedStrlen(DAG, DL, Chain, Src, DAG.getConstant(0, DL, PtrVT)); 269} 270 271std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::EmitTargetCodeForStrnlen( 272 SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Src, 273 SDValue MaxLength, MachinePointerInfo SrcPtrInfo) const { 274 EVT PtrVT = Src.getValueType(); 275 MaxLength = DAG.getZExtOrTrunc(MaxLength, DL, PtrVT); 276 SDValue Limit = DAG.getNode(ISD::ADD, DL, PtrVT, Src, MaxLength); 277 return getBoundedStrlen(DAG, DL, Chain, Src, Limit); 278} 279