1//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Define several functions to decode x86 specific shuffle semantics into a 11// generic vector mask. 12// 13//===----------------------------------------------------------------------===// 14 15#include "X86ShuffleDecode.h" 16 17//===----------------------------------------------------------------------===// 18// Vector Mask Decoding 19//===----------------------------------------------------------------------===// 20 21namespace llvm { 22 23void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 24 // Defaults the copying the dest value. 25 ShuffleMask.push_back(0); 26 ShuffleMask.push_back(1); 27 ShuffleMask.push_back(2); 28 ShuffleMask.push_back(3); 29 30 // Decode the immediate. 31 unsigned ZMask = Imm & 15; 32 unsigned CountD = (Imm >> 4) & 3; 33 unsigned CountS = (Imm >> 6) & 3; 34 35 // CountS selects which input element to use. 36 unsigned InVal = 4+CountS; 37 // CountD specifies which element of destination to update. 38 ShuffleMask[CountD] = InVal; 39 // ZMask zaps values, potentially overriding the CountD elt. 40 if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 41 if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 42 if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 43 if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 44} 45 46// <3,1> or <6,7,2,3> 47void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 48 for (unsigned i = NElts/2; i != NElts; ++i) 49 ShuffleMask.push_back(NElts+i); 50 51 for (unsigned i = NElts/2; i != NElts; ++i) 52 ShuffleMask.push_back(i); 53} 54 55// <0,2> or <0,1,4,5> 56void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 57 for (unsigned i = 0; i != NElts/2; ++i) 58 ShuffleMask.push_back(i); 59 60 for (unsigned i = 0; i != NElts/2; ++i) 61 ShuffleMask.push_back(NElts+i); 62} 63 64/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. 65/// VT indicates the type of the vector allowing it to handle different 66/// datatypes and vector widths. 67void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 68 unsigned NumElts = VT.getVectorNumElements(); 69 70 unsigned NumLanes = VT.getSizeInBits() / 128; 71 unsigned NumLaneElts = NumElts / NumLanes; 72 73 unsigned NewImm = Imm; 74 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 75 for (unsigned i = 0; i != NumLaneElts; ++i) { 76 ShuffleMask.push_back(NewImm % NumLaneElts + l); 77 NewImm /= NumLaneElts; 78 } 79 if (NumLaneElts == 4) NewImm = Imm; // reload imm 80 } 81} 82 83void DecodePSHUFHWMask(MVT VT, unsigned Imm, 84 SmallVectorImpl<int> &ShuffleMask) { 85 unsigned NumElts = VT.getVectorNumElements(); 86 87 for (unsigned l = 0; l != NumElts; l += 8) { 88 unsigned NewImm = Imm; 89 for (unsigned i = 0, e = 4; i != e; ++i) { 90 ShuffleMask.push_back(l + i); 91 } 92 for (unsigned i = 4, e = 8; i != e; ++i) { 93 ShuffleMask.push_back(l + 4 + (NewImm & 3)); 94 NewImm >>= 2; 95 } 96 } 97} 98 99void DecodePSHUFLWMask(MVT VT, unsigned Imm, 100 SmallVectorImpl<int> &ShuffleMask) { 101 unsigned NumElts = VT.getVectorNumElements(); 102 103 for (unsigned l = 0; l != NumElts; l += 8) { 104 unsigned NewImm = Imm; 105 for (unsigned i = 0, e = 4; i != e; ++i) { 106 ShuffleMask.push_back(l + (NewImm & 3)); 107 NewImm >>= 2; 108 } 109 for (unsigned i = 4, e = 8; i != e; ++i) { 110 ShuffleMask.push_back(l + i); 111 } 112 } 113} 114 115/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates 116/// the type of the vector allowing it to handle different datatypes and vector 117/// widths. 118void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 119 unsigned NumElts = VT.getVectorNumElements(); 120 121 unsigned NumLanes = VT.getSizeInBits() / 128; 122 unsigned NumLaneElts = NumElts / NumLanes; 123 124 unsigned NewImm = Imm; 125 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 126 // each half of a lane comes from different source 127 for (unsigned s = 0; s != NumElts*2; s += NumElts) { 128 for (unsigned i = 0; i != NumLaneElts/2; ++i) { 129 ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 130 NewImm /= NumLaneElts; 131 } 132 } 133 if (NumLaneElts == 4) NewImm = Imm; // reload imm 134 } 135} 136 137/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd 138/// and punpckh*. VT indicates the type of the vector allowing it to handle 139/// different datatypes and vector widths. 140void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 141 unsigned NumElts = VT.getVectorNumElements(); 142 143 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 144 // independently on 128-bit lanes. 145 unsigned NumLanes = VT.getSizeInBits() / 128; 146 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 147 unsigned NumLaneElts = NumElts / NumLanes; 148 149 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 150 for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { 151 ShuffleMask.push_back(i); // Reads from dest/src1 152 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 153 } 154 } 155} 156 157/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd 158/// and punpckl*. VT indicates the type of the vector allowing it to handle 159/// different datatypes and vector widths. 160void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 161 unsigned NumElts = VT.getVectorNumElements(); 162 163 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 164 // independently on 128-bit lanes. 165 unsigned NumLanes = VT.getSizeInBits() / 128; 166 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 167 unsigned NumLaneElts = NumElts / NumLanes; 168 169 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 170 for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { 171 ShuffleMask.push_back(i); // Reads from dest/src1 172 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 173 } 174 } 175} 176 177void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, 178 SmallVectorImpl<int> &ShuffleMask) { 179 if (Imm & 0x88) 180 return; // Not a shuffle 181 182 unsigned HalfSize = VT.getVectorNumElements()/2; 183 184 for (unsigned l = 0; l != 2; ++l) { 185 unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; 186 for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) 187 ShuffleMask.push_back(i); 188 } 189} 190 191/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. 192/// No VT provided since it only works on 256-bit, 4 element vectors. 193void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 194 for (unsigned i = 0; i != 4; ++i) { 195 ShuffleMask.push_back((Imm >> (2*i)) & 3); 196 } 197} 198 199} // llvm namespace 200