11d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 21d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// 31d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// The LLVM Compiler Infrastructure 41d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// 51d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// This file is distributed under the University of Illinois Open Source 61d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// License. See LICENSE.TXT for details. 71d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// 81d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===----------------------------------------------------------------------===// 91d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// 101d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// Define several functions to decode x86 specific shuffle semantics into a 111d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// generic vector mask. 121d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// 131d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===----------------------------------------------------------------------===// 141d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 151d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand#include "X86ShuffleDecode.h" 161d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 171d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===----------------------------------------------------------------------===// 181d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// Vector Mask Decoding 191d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===----------------------------------------------------------------------===// 201d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 211d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandnamespace llvm { 221d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 231d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 241d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand // Defaults the copying the dest value. 251d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(0); 261d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(1); 271d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(2); 281d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(3); 291d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 30f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford // Decode the immediate. 31f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford unsigned ZMask = Imm & 15; 32f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford unsigned CountD = (Imm >> 4) & 3; 33f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford unsigned CountS = (Imm >> 6) & 3; 34f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford 35f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford // CountS selects which input element to use. 36f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford unsigned InVal = 4+CountS; 37f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford // CountD specifies which element of destination to update. 38f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford ShuffleMask[CountD] = InVal; 39f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford // ZMask zaps values, potentially overriding the CountD elt. 40f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 41f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 42f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 43f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 44f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford} 45f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford 46f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford// <3,1> or <6,7,2,3> 47f917bc0406e47866eb1f6c0378de16498018b620Richard Sandifordvoid DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 48f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford for (unsigned i = NElts/2; i != NElts; ++i) 49f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford ShuffleMask.push_back(NElts+i); 50f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford 51f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford for (unsigned i = NElts/2; i != NElts; ++i) 52f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford ShuffleMask.push_back(i); 53f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford} 54f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford 55f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford// <0,2> or <0,1,4,5> 56f917bc0406e47866eb1f6c0378de16498018b620Richard Sandifordvoid DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 57f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford for (unsigned i = 0; i != NElts/2; ++i) 58f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford ShuffleMask.push_back(i); 59f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford 60f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford for (unsigned i = 0; i != NElts/2; ++i) 61f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford ShuffleMask.push_back(NElts+i); 62f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford} 63f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford 64f917bc0406e47866eb1f6c0378de16498018b620Richard Sandifordvoid DecodePALIGNRMask(MVT VT, unsigned Imm, 65f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford SmallVectorImpl<int> &ShuffleMask) { 66f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford unsigned NumElts = VT.getVectorNumElements(); 67f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); 68f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford 69f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford unsigned NumLanes = VT.getSizeInBits() / 128; 70f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford unsigned NumLaneElts = NumElts / NumLanes; 71f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford 724a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 734a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola for (unsigned i = 0; i != NumLaneElts; ++i) { 746e53180db120b30f600ac31611a9dd47ef7f4921Rafael Espindola unsigned Base = i + Offset; 754a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola // if i+offset is out of this lane then we actually need the other source 764a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 774a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola ShuffleMask.push_back(Base + l); 784a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola } 791d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 801d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand} 811d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 821d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. 831d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// VT indicates the type of the vector allowing it to handle different 841d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// datatypes and vector widths. 851d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 861d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumElts = VT.getVectorNumElements(); 871d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 881d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumLanes = VT.getSizeInBits() / 128; 891d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumLaneElts = NumElts / NumLanes; 901d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 911d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NewImm = Imm; 921d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 931d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned i = 0; i != NumLaneElts; ++i) { 941d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(NewImm % NumLaneElts + l); 951d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand NewImm /= NumLaneElts; 961d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 971d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand if (NumLaneElts == 4) NewImm = Imm; // reload imm 981d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 991d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand} 1001d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1011d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodePSHUFHWMask(MVT VT, unsigned Imm, 1021d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand SmallVectorImpl<int> &ShuffleMask) { 1031d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumElts = VT.getVectorNumElements(); 1047a1ead46fb629839e5ce25574246ee743ab8b54fRichard Sandiford 1051d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned l = 0; l != NumElts; l += 8) { 1061d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NewImm = Imm; 1071d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned i = 0, e = 4; i != e; ++i) { 1081d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(l + i); 1091d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1101d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned i = 4, e = 8; i != e; ++i) { 1111d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(l + 4 + (NewImm & 3)); 1121d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand NewImm >>= 2; 1131d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1141d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1151d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand} 1161d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1171d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodePSHUFLWMask(MVT VT, unsigned Imm, 1181d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand SmallVectorImpl<int> &ShuffleMask) { 1191d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumElts = VT.getVectorNumElements(); 1201d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1211d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned l = 0; l != NumElts; l += 8) { 1221d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NewImm = Imm; 1231d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned i = 0, e = 4; i != e; ++i) { 1241d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(l + (NewImm & 3)); 1251d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand NewImm >>= 2; 1261d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1271d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned i = 4, e = 8; i != e; ++i) { 1281d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(l + i); 1291d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1301d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1311d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand} 1321d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1331d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates 1341d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// the type of the vector allowing it to handle different datatypes and vector 1351d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// widths. 1361d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 1371d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumElts = VT.getVectorNumElements(); 1381d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1391d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumLanes = VT.getSizeInBits() / 128; 1401d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumLaneElts = NumElts / NumLanes; 1411d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1421d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NewImm = Imm; 1431d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 1441d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand // each half of a lane comes from different source 1457a1ead46fb629839e5ce25574246ee743ab8b54fRichard Sandiford for (unsigned s = 0; s != NumElts*2; s += NumElts) { 1461d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned i = 0; i != NumLaneElts/2; ++i) { 1471d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 1481d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand NewImm /= NumLaneElts; 1491d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1501d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1511d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand if (NumLaneElts == 4) NewImm = Imm; // reload imm 1521d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1531d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand} 1541d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1551d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd 1561d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// and punpckh*. VT indicates the type of the vector allowing it to handle 1571d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// different datatypes and vector widths. 1581d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 1591d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumElts = VT.getVectorNumElements(); 1601d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1611d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 1621d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand // independently on 128-bit lanes. 1631d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumLanes = VT.getSizeInBits() / 128; 1641d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 1651d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumLaneElts = NumElts / NumLanes; 1661d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1671d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 1681d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { 1691d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(i); // Reads from dest/src1 1701d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(i+NumElts); // Reads from src/src2 1711d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1721d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1731d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand} 1741d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1751d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd 1761d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// and punpckl*. VT indicates the type of the vector allowing it to handle 1771d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// different datatypes and vector widths. 1781d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 1791d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumElts = VT.getVectorNumElements(); 1801d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1811d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 1821d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand // independently on 128-bit lanes. 1831d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumLanes = VT.getSizeInBits() / 128; 1841d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 1851d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned NumLaneElts = NumElts / NumLanes; 1861d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1871d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 1881d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { 1891d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(i); // Reads from dest/src1 1901d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand ShuffleMask.push_back(i+NumElts); // Reads from src/src2 1911d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1921d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand } 1931d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand} 1941d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 1951d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeVPERM2X128Mask(MVT VT, unsigned Imm, 1961d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand SmallVectorImpl<int> &ShuffleMask) { 1971d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand if (Imm & 0x88) 1981d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand return; // Not a shuffle 1991d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 2001d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned HalfSize = VT.getVectorNumElements()/2; 2011d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand 2021d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned l = 0; l != 2; ++l) { 2031d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; 2041d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) 205 ShuffleMask.push_back(i); 206 } 207} 208 209/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. 210/// No VT provided since it only works on 256-bit, 4 element vectors. 211void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 212 for (unsigned i = 0; i != 4; ++i) { 213 ShuffleMask.push_back((Imm >> (2*i)) & 3); 214 } 215} 216 217} // llvm namespace 218