1c4db4e5105ccd82df19f141957511f735a9be2d0David Greene//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 26b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 36b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// The LLVM Compiler Infrastructure 46b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 56b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// This file is distributed under the University of Illinois Open Source 66b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// License. See LICENSE.TXT for details. 76b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 86b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===// 96b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 106b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// Define several functions to decode x86 specific shuffle semantics into a 116b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// generic vector mask. 126b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 136b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===// 146b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 15583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greene#include "X86ShuffleDecode.h" 166b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 176b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===// 186b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// Vector Mask Decoding 196b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===// 206b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 21583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greenenamespace llvm { 226b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 23a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 24ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // Defaults the copying the dest value. 25ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask.push_back(0); 26ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask.push_back(1); 27ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask.push_back(2); 28ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask.push_back(3); 29ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes 30ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // Decode the immediate. 31ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes unsigned ZMask = Imm & 15; 32ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes unsigned CountD = (Imm >> 4) & 3; 33ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes unsigned CountS = (Imm >> 6) & 3; 34ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes 35ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // CountS selects which input element to use. 36ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes unsigned InVal = 4+CountS; 37ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // CountD specifies which element of destination to update. 38ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask[CountD] = InVal; 39ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // ZMask zaps values, potentially overriding the CountD elt. 40ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 41ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 42ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 43ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 44ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes} 45ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes 4655945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes// <3,1> or <6,7,2,3> 47a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 4855945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes for (unsigned i = NElts/2; i != NElts; ++i) 4955945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes ShuffleMask.push_back(NElts+i); 506b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 5155945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes for (unsigned i = NElts/2; i != NElts; ++i) 5255945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes ShuffleMask.push_back(i); 536b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 546b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 5555945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes// <0,2> or <0,1,4,5> 56a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 5755945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes for (unsigned i = 0; i != NElts/2; ++i) 5855945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes ShuffleMask.push_back(i); 596b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 6055945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes for (unsigned i = 0; i != NElts/2; ++i) 6155945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes ShuffleMask.push_back(NElts+i); 626b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 636b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 644aee1bb2223e59efb814a694edaecd07a3418da0Craig Toppervoid DecodePALIGNRMask(MVT VT, unsigned Imm, 654aee1bb2223e59efb814a694edaecd07a3418da0Craig Topper SmallVectorImpl<int> &ShuffleMask) { 66200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer unsigned NumElts = VT.getVectorNumElements(); 67200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); 68200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer 69467016e58d57021b14f2ae562d221f00b07cb254Craig Topper unsigned NumLanes = VT.getSizeInBits() / 128; 70467016e58d57021b14f2ae562d221f00b07cb254Craig Topper unsigned NumLaneElts = NumElts / NumLanes; 71467016e58d57021b14f2ae562d221f00b07cb254Craig Topper 72467016e58d57021b14f2ae562d221f00b07cb254Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 73467016e58d57021b14f2ae562d221f00b07cb254Craig Topper for (unsigned i = 0; i != NumLaneElts; ++i) { 74467016e58d57021b14f2ae562d221f00b07cb254Craig Topper unsigned Base = i + Offset; 75467016e58d57021b14f2ae562d221f00b07cb254Craig Topper // if i+offset is out of this lane then we actually need the other source 76467016e58d57021b14f2ae562d221f00b07cb254Craig Topper if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 77467016e58d57021b14f2ae562d221f00b07cb254Craig Topper ShuffleMask.push_back(Base + l); 78467016e58d57021b14f2ae562d221f00b07cb254Craig Topper } 79467016e58d57021b14f2ae562d221f00b07cb254Craig Topper } 80200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer} 81200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer 82d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. 83d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// VT indicates the type of the vector allowing it to handle different 84d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// datatypes and vector widths. 85d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 86d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper unsigned NumElts = VT.getVectorNumElements(); 87d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper 88d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper unsigned NumLanes = VT.getSizeInBits() / 128; 89d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper unsigned NumLaneElts = NumElts / NumLanes; 90d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper 91a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper unsigned NewImm = Imm; 92d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 93d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned i = 0; i != NumLaneElts; ++i) { 94d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper ShuffleMask.push_back(NewImm % NumLaneElts + l); 95d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper NewImm /= NumLaneElts; 96d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper } 97d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper if (NumLaneElts == 4) NewImm = Imm; // reload imm 986b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 996b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1006b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 101d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFHWMask(MVT VT, unsigned Imm, 102a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper SmallVectorImpl<int> &ShuffleMask) { 1036b28d356c56d656e8e4d23c71de80162bb2eba5eCraig Topper unsigned NumElts = VT.getVectorNumElements(); 104a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper 105a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned l = 0; l != NumElts; l += 8) { 106a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper unsigned NewImm = Imm; 107a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned i = 0, e = 4; i != e; ++i) { 108a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper ShuffleMask.push_back(l + i); 109a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper } 110a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned i = 4, e = 8; i != e; ++i) { 111a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper ShuffleMask.push_back(l + 4 + (NewImm & 3)); 112a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper NewImm >>= 2; 113a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper } 1146b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1156b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1166b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 117d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFLWMask(MVT VT, unsigned Imm, 118a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper SmallVectorImpl<int> &ShuffleMask) { 1196b28d356c56d656e8e4d23c71de80162bb2eba5eCraig Topper unsigned NumElts = VT.getVectorNumElements(); 120a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper 121a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned l = 0; l != NumElts; l += 8) { 122a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper unsigned NewImm = Imm; 123a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned i = 0, e = 4; i != e; ++i) { 124a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper ShuffleMask.push_back(l + (NewImm & 3)); 125a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper NewImm >>= 2; 126a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper } 127a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned i = 4, e = 8; i != e; ++i) { 128a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper ShuffleMask.push_back(l + i); 129a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper } 1306b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1316b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1326b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 133d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates 134d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// the type of the vector allowing it to handle different datatypes and vector 135d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// widths. 136d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 13736e36ace77cf84d7c8326957925550624b3fc89cCraig Topper unsigned NumElts = VT.getVectorNumElements(); 13836e36ace77cf84d7c8326957925550624b3fc89cCraig Topper 13936e36ace77cf84d7c8326957925550624b3fc89cCraig Topper unsigned NumLanes = VT.getSizeInBits() / 128; 14036e36ace77cf84d7c8326957925550624b3fc89cCraig Topper unsigned NumLaneElts = NumElts / NumLanes; 14136e36ace77cf84d7c8326957925550624b3fc89cCraig Topper 142a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper unsigned NewImm = Imm; 143d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 144d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper // each half of a lane comes from different source 145d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper for (unsigned s = 0; s != NumElts*2; s += NumElts) { 146d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper for (unsigned i = 0; i != NumLaneElts/2; ++i) { 147d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 148d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper NewImm /= NumLaneElts; 149d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper } 15036e36ace77cf84d7c8326957925550624b3fc89cCraig Topper } 15136e36ace77cf84d7c8326957925550624b3fc89cCraig Topper if (NumLaneElts == 4) NewImm = Imm; // reload imm 1526b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1536b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1546b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 155d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd 156d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// and punpckh*. VT indicates the type of the vector allowing it to handle 157d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// different datatypes and vector widths. 158d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 159f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper unsigned NumElts = VT.getVectorNumElements(); 160f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper 161f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 162f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper // independently on 128-bit lanes. 163f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper unsigned NumLanes = VT.getSizeInBits() / 128; 164f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 165f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper unsigned NumLaneElts = NumElts / NumLanes; 166f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper 167d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 168d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { 169f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper ShuffleMask.push_back(i); // Reads from dest/src1 170f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper ShuffleMask.push_back(i+NumElts); // Reads from src/src2 171f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper } 1726b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1736b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1746b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 1753d8c2ce3e44bc161118a8922e7aa412ef00f6034Craig Topper/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd 176d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// and punpckl*. VT indicates the type of the vector allowing it to handle 177d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// different datatypes and vector widths. 178d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 179a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene unsigned NumElts = VT.getVectorNumElements(); 180a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene 1814ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 1824ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes // independently on 128-bit lanes. 1834ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes unsigned NumLanes = VT.getSizeInBits() / 128; 1844ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 1854ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes unsigned NumLaneElts = NumElts / NumLanes; 186a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene 187d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 188d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { 189f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper ShuffleMask.push_back(i); // Reads from dest/src1 190f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper ShuffleMask.push_back(i+NumElts); // Reads from src/src2 191a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene } 1926b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1936b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1946b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 195d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeVPERM2X128Mask(MVT VT, unsigned Imm, 196a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Topper SmallVectorImpl<int> &ShuffleMask) { 1972091df3d09bba0705fc62d020e5177a246d67978Craig Topper if (Imm & 0x88) 1982091df3d09bba0705fc62d020e5177a246d67978Craig Topper return; // Not a shuffle 1992091df3d09bba0705fc62d020e5177a246d67978Craig Topper 20053cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes unsigned HalfSize = VT.getVectorNumElements()/2; 20153cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes 202d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper for (unsigned l = 0; l != 2; ++l) { 203d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; 204d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) 205d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper ShuffleMask.push_back(i); 206d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper } 20753cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes} 20853cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes 209156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. 210156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper/// No VT provided since it only works on 256-bit, 4 element vectors. 211156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Toppervoid DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 212156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper for (unsigned i = 0; i != 4; ++i) { 213156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper ShuffleMask.push_back((Imm >> (2*i)) & 3); 214156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper } 215156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper} 216156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper 217583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greene} // llvm namespace 218