1//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Define several functions to decode x86 specific shuffle semantics into a 11// generic vector mask. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86ShuffleDecode.h" 16 17//===----------------------------------------------------------------------===// 18// Vector Mask Decoding 19//===----------------------------------------------------------------------===// 20 21namespace llvm { 22 23void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 24 // Defaults the copying the dest value. 25 ShuffleMask.push_back(0); 26 ShuffleMask.push_back(1); 27 ShuffleMask.push_back(2); 28 ShuffleMask.push_back(3); 29 30 // Decode the immediate. 31 unsigned ZMask = Imm & 15; 32 unsigned CountD = (Imm >> 4) & 3; 33 unsigned CountS = (Imm >> 6) & 3; 34 35 // CountS selects which input element to use. 36 unsigned InVal = 4+CountS; 37 // CountD specifies which element of destination to update. 38 ShuffleMask[CountD] = InVal; 39 // ZMask zaps values, potentially overriding the CountD elt. 40 if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 41 if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 42 if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 43 if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 44} 45 46// <3,1> or <6,7,2,3> 47void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 48 for (unsigned i = NElts/2; i != NElts; ++i) 49 ShuffleMask.push_back(NElts+i); 50 51 for (unsigned i = NElts/2; i != NElts; ++i) 52 ShuffleMask.push_back(i); 53} 54 55// <0,2> or <0,1,4,5> 56void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 57 for (unsigned i = 0; i != NElts/2; ++i) 58 ShuffleMask.push_back(i); 59 60 for (unsigned i = 0; i != NElts/2; ++i) 61 ShuffleMask.push_back(NElts+i); 62} 63 64void DecodePALIGNRMask(MVT VT, unsigned Imm, 65 SmallVectorImpl<int> &ShuffleMask) { 66 unsigned NumElts = VT.getVectorNumElements(); 67 unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); 68 69 unsigned NumLanes = VT.getSizeInBits() / 128; 70 unsigned NumLaneElts = NumElts / NumLanes; 71 72 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 73 for (unsigned i = 0; i != NumLaneElts; ++i) { 74 unsigned Base = i + Offset; 75 // if i+offset is out of this lane then we actually need the other source 76 if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 77 ShuffleMask.push_back(Base + l); 78 } 79 } 80} 81 82/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. 83/// VT indicates the type of the vector allowing it to handle different 84/// datatypes and vector widths. 85void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 86 unsigned NumElts = VT.getVectorNumElements(); 87 88 unsigned NumLanes = VT.getSizeInBits() / 128; 89 unsigned NumLaneElts = NumElts / NumLanes; 90 91 unsigned NewImm = Imm; 92 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 93 for (unsigned i = 0; i != NumLaneElts; ++i) { 94 ShuffleMask.push_back(NewImm % NumLaneElts + l); 95 NewImm /= NumLaneElts; 96 } 97 if (NumLaneElts == 4) NewImm = Imm; // reload imm 98 } 99} 100 101void DecodePSHUFHWMask(MVT VT, unsigned Imm, 102 SmallVectorImpl<int> &ShuffleMask) { 103 unsigned NumElts = VT.getVectorNumElements(); 104 105 for (unsigned l = 0; l != NumElts; l += 8) { 106 unsigned NewImm = Imm; 107 for (unsigned i = 0, e = 4; i != e; ++i) { 108 ShuffleMask.push_back(l + i); 109 } 110 for (unsigned i = 4, e = 8; i != e; ++i) { 111 ShuffleMask.push_back(l + 4 + (NewImm & 3)); 112 NewImm >>= 2; 113 } 114 } 115} 116 117void DecodePSHUFLWMask(MVT VT, unsigned Imm, 118 SmallVectorImpl<int> &ShuffleMask) { 119 unsigned NumElts = VT.getVectorNumElements(); 120 121 for (unsigned l = 0; l != NumElts; l += 8) { 122 unsigned NewImm = Imm; 123 for (unsigned i = 0, e = 4; i != e; ++i) { 124 ShuffleMask.push_back(l + (NewImm & 3)); 125 NewImm >>= 2; 126 } 127 for (unsigned i = 4, e = 8; i != e; ++i) { 128 ShuffleMask.push_back(l + i); 129 } 130 } 131} 132 133/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates 134/// the type of the vector allowing it to handle different datatypes and vector 135/// widths. 136void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 137 unsigned NumElts = VT.getVectorNumElements(); 138 139 unsigned NumLanes = VT.getSizeInBits() / 128; 140 unsigned NumLaneElts = NumElts / NumLanes; 141 142 unsigned NewImm = Imm; 143 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 144 // each half of a lane comes from different source 145 for (unsigned s = 0; s != NumElts*2; s += NumElts) { 146 for (unsigned i = 0; i != NumLaneElts/2; ++i) { 147 ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 148 NewImm /= NumLaneElts; 149 } 150 } 151 if (NumLaneElts == 4) NewImm = Imm; // reload imm 152 } 153} 154 155/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd 156/// and punpckh*. VT indicates the type of the vector allowing it to handle 157/// different datatypes and vector widths. 158void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 159 unsigned NumElts = VT.getVectorNumElements(); 160 161 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 162 // independently on 128-bit lanes. 163 unsigned NumLanes = VT.getSizeInBits() / 128; 164 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 165 unsigned NumLaneElts = NumElts / NumLanes; 166 167 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 168 for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { 169 ShuffleMask.push_back(i); // Reads from dest/src1 170 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 171 } 172 } 173} 174 175/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd 176/// and punpckl*. VT indicates the type of the vector allowing it to handle 177/// different datatypes and vector widths. 178void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 179 unsigned NumElts = VT.getVectorNumElements(); 180 181 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 182 // independently on 128-bit lanes. 183 unsigned NumLanes = VT.getSizeInBits() / 128; 184 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 185 unsigned NumLaneElts = NumElts / NumLanes; 186 187 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 188 for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { 189 ShuffleMask.push_back(i); // Reads from dest/src1 190 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 191 } 192 } 193} 194 195void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, 196 SmallVectorImpl<int> &ShuffleMask) { 197 if (Imm & 0x88) 198 return; // Not a shuffle 199 200 unsigned HalfSize = VT.getVectorNumElements()/2; 201 202 for (unsigned l = 0; l != 2; ++l) { 203 unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; 204 for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) 205 ShuffleMask.push_back(i); 206 } 207} 208 209/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. 210/// No VT provided since it only works on 256-bit, 4 element vectors. 211void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 212 for (unsigned i = 0; i != 4; ++i) { 213 ShuffleMask.push_back((Imm >> (2*i)) & 3); 214 } 215} 216 217} // llvm namespace 218