X86ShuffleDecode.cpp revision 36e36ace77cf84d7c8326957925550624b3fc89c
1//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Define several functions to decode x86 specific shuffle semantics into a 11// generic vector mask. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86ShuffleDecode.h" 16 17//===----------------------------------------------------------------------===// 18// Vector Mask Decoding 19//===----------------------------------------------------------------------===// 20 21namespace llvm { 22 23void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { 24 // Defaults the copying the dest value. 25 ShuffleMask.push_back(0); 26 ShuffleMask.push_back(1); 27 ShuffleMask.push_back(2); 28 ShuffleMask.push_back(3); 29 30 // Decode the immediate. 31 unsigned ZMask = Imm & 15; 32 unsigned CountD = (Imm >> 4) & 3; 33 unsigned CountS = (Imm >> 6) & 3; 34 35 // CountS selects which input element to use. 36 unsigned InVal = 4+CountS; 37 // CountD specifies which element of destination to update. 38 ShuffleMask[CountD] = InVal; 39 // ZMask zaps values, potentially overriding the CountD elt. 40 if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 41 if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 42 if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 43 if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 44} 45 46// <3,1> or <6,7,2,3> 47void DecodeMOVHLPSMask(unsigned NElts, 48 SmallVectorImpl<unsigned> &ShuffleMask) { 49 for (unsigned i = NElts/2; i != NElts; ++i) 50 ShuffleMask.push_back(NElts+i); 51 52 for (unsigned i = NElts/2; i != NElts; ++i) 53 ShuffleMask.push_back(i); 54} 55 56// <0,2> or <0,1,4,5> 57void DecodeMOVLHPSMask(unsigned NElts, 58 SmallVectorImpl<unsigned> &ShuffleMask) { 59 for (unsigned i = 0; i != NElts/2; ++i) 60 ShuffleMask.push_back(i); 61 62 for (unsigned i = 0; i != NElts/2; ++i) 63 ShuffleMask.push_back(NElts+i); 64} 65 66void DecodePSHUFMask(unsigned NElts, unsigned Imm, 67 SmallVectorImpl<unsigned> &ShuffleMask) { 68 for (unsigned i = 0; i != NElts; ++i) { 69 ShuffleMask.push_back(Imm % NElts); 70 Imm /= NElts; 71 } 72} 73 74void DecodePSHUFHWMask(unsigned Imm, 75 SmallVectorImpl<unsigned> &ShuffleMask) { 76 ShuffleMask.push_back(0); 77 ShuffleMask.push_back(1); 78 ShuffleMask.push_back(2); 79 ShuffleMask.push_back(3); 80 for (unsigned i = 0; i != 4; ++i) { 81 ShuffleMask.push_back(4+(Imm & 3)); 82 Imm >>= 2; 83 } 84} 85 86void DecodePSHUFLWMask(unsigned Imm, 87 SmallVectorImpl<unsigned> &ShuffleMask) { 88 for (unsigned i = 0; i != 4; ++i) { 89 ShuffleMask.push_back((Imm & 3)); 90 Imm >>= 2; 91 } 92 ShuffleMask.push_back(4); 93 ShuffleMask.push_back(5); 94 ShuffleMask.push_back(6); 95 ShuffleMask.push_back(7); 96} 97 98void DecodePUNPCKLBWMask(unsigned NElts, 99 SmallVectorImpl<unsigned> &ShuffleMask) { 100 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask); 101} 102 103void DecodePUNPCKLWDMask(unsigned NElts, 104 SmallVectorImpl<unsigned> &ShuffleMask) { 105 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask); 106} 107 108void DecodePUNPCKLDQMask(unsigned NElts, 109 SmallVectorImpl<unsigned> &ShuffleMask) { 110 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); 111} 112 113void DecodePUNPCKLQDQMask(unsigned NElts, 114 SmallVectorImpl<unsigned> &ShuffleMask) { 115 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); 116} 117 118void DecodePUNPCKLMask(EVT VT, 119 SmallVectorImpl<unsigned> &ShuffleMask) { 120 DecodeUNPCKLPMask(VT, ShuffleMask); 121} 122 123void DecodePUNPCKHMask(unsigned NElts, 124 SmallVectorImpl<unsigned> &ShuffleMask) { 125 for (unsigned i = 0; i != NElts/2; ++i) { 126 ShuffleMask.push_back(i+NElts/2); 127 ShuffleMask.push_back(i+NElts+NElts/2); 128 } 129} 130 131void DecodeSHUFPMask(EVT VT, unsigned Imm, 132 SmallVectorImpl<unsigned> &ShuffleMask) { 133 unsigned NumElts = VT.getVectorNumElements(); 134 135 unsigned NumLanes = VT.getSizeInBits() / 128; 136 unsigned NumLaneElts = NumElts / NumLanes; 137 138 int NewImm = Imm; 139 for (unsigned l = 0; l < NumLanes; ++l) { 140 unsigned LaneStart = l * NumLaneElts; 141 // Part that reads from dest. 142 for (unsigned i = 0; i != NumLaneElts/2; ++i) { 143 ShuffleMask.push_back(NewImm % NumLaneElts + LaneStart); 144 NewImm /= NumLaneElts; 145 } 146 // Part that reads from src. 147 for (unsigned i = 0; i != NumLaneElts/2; ++i) { 148 ShuffleMask.push_back(NewImm % NumLaneElts + NumElts + LaneStart); 149 NewImm /= NumLaneElts; 150 } 151 if (NumLaneElts == 4) NewImm = Imm; // reload imm 152 } 153} 154 155void DecodeUNPCKHPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { 156 unsigned NumElts = VT.getVectorNumElements(); 157 158 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 159 // independently on 128-bit lanes. 160 unsigned NumLanes = VT.getSizeInBits() / 128; 161 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 162 unsigned NumLaneElts = NumElts / NumLanes; 163 164 for (unsigned s = 0; s < NumLanes; ++s) { 165 unsigned Start = s * NumLaneElts + NumLaneElts/2; 166 unsigned End = s * NumLaneElts + NumLaneElts; 167 for (unsigned i = Start; i != End; ++i) { 168 ShuffleMask.push_back(i); // Reads from dest/src1 169 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 170 } 171 } 172} 173 174/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd 175/// etc. VT indicates the type of the vector allowing it to handle different 176/// datatypes and vector widths. 177void DecodeUNPCKLPMask(EVT VT, SmallVectorImpl<unsigned> &ShuffleMask) { 178 unsigned NumElts = VT.getVectorNumElements(); 179 180 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 181 // independently on 128-bit lanes. 182 unsigned NumLanes = VT.getSizeInBits() / 128; 183 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 184 unsigned NumLaneElts = NumElts / NumLanes; 185 186 for (unsigned s = 0; s < NumLanes; ++s) { 187 unsigned Start = s * NumLaneElts; 188 unsigned End = s * NumLaneElts + NumLaneElts/2; 189 for (unsigned i = Start; i != End; ++i) { 190 ShuffleMask.push_back(i); // Reads from dest/src1 191 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 192 } 193 } 194} 195 196// DecodeVPERMILPSMask - Decodes VPERMILPS permutes for any 128-bit 32-bit 197// elements. For 256-bit vectors, it's considered as two 128 lanes, the 198// referenced elements can't cross lanes and the mask of the first lane must 199// be the same of the second. 200void DecodeVPERMILPSMask(unsigned NumElts, unsigned Imm, 201 SmallVectorImpl<unsigned> &ShuffleMask) { 202 unsigned NumLanes = (NumElts*32)/128; 203 unsigned LaneSize = NumElts/NumLanes; 204 205 for (unsigned l = 0; l != NumLanes; ++l) { 206 for (unsigned i = 0; i != LaneSize; ++i) { 207 unsigned Idx = (Imm >> (i*2)) & 0x3 ; 208 ShuffleMask.push_back(Idx+(l*LaneSize)); 209 } 210 } 211} 212 213// DecodeVPERMILPDMask - Decodes VPERMILPD permutes for any 128-bit 64-bit 214// elements. For 256-bit vectors, it's considered as two 128 lanes, the 215// referenced elements can't cross lanes but the mask of the first lane can 216// be the different of the second (not like VPERMILPS). 217void DecodeVPERMILPDMask(unsigned NumElts, unsigned Imm, 218 SmallVectorImpl<unsigned> &ShuffleMask) { 219 unsigned NumLanes = (NumElts*64)/128; 220 unsigned LaneSize = NumElts/NumLanes; 221 222 for (unsigned l = 0; l < NumLanes; ++l) { 223 for (unsigned i = l*LaneSize; i < LaneSize*(l+1); ++i) { 224 unsigned Idx = (Imm >> i) & 0x1; 225 ShuffleMask.push_back(Idx+(l*LaneSize)); 226 } 227 } 228} 229 230void DecodeVPERM2F128Mask(EVT VT, unsigned Imm, 231 SmallVectorImpl<unsigned> &ShuffleMask) { 232 unsigned HalfSize = VT.getVectorNumElements()/2; 233 unsigned FstHalfBegin = (Imm & 0x3) * HalfSize; 234 unsigned SndHalfBegin = ((Imm >> 4) & 0x3) * HalfSize; 235 236 for (int i = FstHalfBegin, e = FstHalfBegin+HalfSize; i != e; ++i) 237 ShuffleMask.push_back(i); 238 for (int i = SndHalfBegin, e = SndHalfBegin+HalfSize; i != e; ++i) 239 ShuffleMask.push_back(i); 240} 241 242void DecodeVPERM2F128Mask(unsigned Imm, 243 SmallVectorImpl<unsigned> &ShuffleMask) { 244 // VPERM2F128 is used by any 256-bit EVT, but X86InstComments only 245 // has information about the instruction and not the types. So for 246 // instruction comments purpose, assume the 256-bit vector is v4i64. 247 return DecodeVPERM2F128Mask(MVT::v4i64, Imm, ShuffleMask); 248} 249 250} // llvm namespace 251