1c4db4e5105ccd82df19f141957511f735a9be2d0David Greene//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 26b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 36b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// The LLVM Compiler Infrastructure 46b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 56b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// This file is distributed under the University of Illinois Open Source 66b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// License. See LICENSE.TXT for details. 76b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 86b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===// 96b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 106b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// Define several functions to decode x86 specific shuffle semantics into a 116b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// generic vector mask. 126b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// 136b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===// 146b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 15583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greene#include "X86ShuffleDecode.h" 1636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineValueType.h" 176b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 186b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===// 196b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// Vector Mask Decoding 206b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===// 216b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 22583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greenenamespace llvm { 236b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 24a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 25ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // Defaults the copying the dest value. 26ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask.push_back(0); 27ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask.push_back(1); 28ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask.push_back(2); 29ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask.push_back(3); 30ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes 31ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // Decode the immediate. 32ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes unsigned ZMask = Imm & 15; 33ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes unsigned CountD = (Imm >> 4) & 3; 34ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes unsigned CountS = (Imm >> 6) & 3; 35ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes 36ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // CountS selects which input element to use. 37ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes unsigned InVal = 4+CountS; 38ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // CountD specifies which element of destination to update. 39ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes ShuffleMask[CountD] = InVal; 40ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes // ZMask zaps values, potentially overriding the CountD elt. 41ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 42ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 43ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 44ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 45ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes} 46ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes 4755945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes// <3,1> or <6,7,2,3> 48a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 4955945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes for (unsigned i = NElts/2; i != NElts; ++i) 5055945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes ShuffleMask.push_back(NElts+i); 516b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 5255945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes for (unsigned i = NElts/2; i != NElts; ++i) 5355945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes ShuffleMask.push_back(i); 546b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 556b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 5655945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes// <0,2> or <0,1,4,5> 57a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 5855945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes for (unsigned i = 0; i != NElts/2; ++i) 5955945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes ShuffleMask.push_back(i); 606b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 6155945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes for (unsigned i = 0; i != NElts/2; ++i) 6255945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes ShuffleMask.push_back(NElts+i); 636b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 646b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 654aee1bb2223e59efb814a694edaecd07a3418da0Craig Toppervoid DecodePALIGNRMask(MVT VT, unsigned Imm, 664aee1bb2223e59efb814a694edaecd07a3418da0Craig Topper SmallVectorImpl<int> &ShuffleMask) { 67200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer unsigned NumElts = VT.getVectorNumElements(); 68200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); 69200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer 70467016e58d57021b14f2ae562d221f00b07cb254Craig Topper unsigned NumLanes = VT.getSizeInBits() / 128; 71467016e58d57021b14f2ae562d221f00b07cb254Craig Topper unsigned NumLaneElts = NumElts / NumLanes; 72467016e58d57021b14f2ae562d221f00b07cb254Craig Topper 73467016e58d57021b14f2ae562d221f00b07cb254Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 74467016e58d57021b14f2ae562d221f00b07cb254Craig Topper for (unsigned i = 0; i != NumLaneElts; ++i) { 75467016e58d57021b14f2ae562d221f00b07cb254Craig Topper unsigned Base = i + Offset; 76467016e58d57021b14f2ae562d221f00b07cb254Craig Topper // if i+offset is out of this lane then we actually need the other source 77467016e58d57021b14f2ae562d221f00b07cb254Craig Topper if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 78467016e58d57021b14f2ae562d221f00b07cb254Craig Topper ShuffleMask.push_back(Base + l); 79467016e58d57021b14f2ae562d221f00b07cb254Craig Topper } 80467016e58d57021b14f2ae562d221f00b07cb254Craig Topper } 81200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer} 82200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer 83d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. 84d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// VT indicates the type of the vector allowing it to handle different 85d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// datatypes and vector widths. 86d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 87d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper unsigned NumElts = VT.getVectorNumElements(); 88d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper 89d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper unsigned NumLanes = VT.getSizeInBits() / 128; 90d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper unsigned NumLaneElts = NumElts / NumLanes; 91d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper 92a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper unsigned NewImm = Imm; 93d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 94d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned i = 0; i != NumLaneElts; ++i) { 95d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper ShuffleMask.push_back(NewImm % NumLaneElts + l); 96d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper NewImm /= NumLaneElts; 97d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper } 98d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper if (NumLaneElts == 4) NewImm = Imm; // reload imm 996b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1006b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1016b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 102d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFHWMask(MVT VT, unsigned Imm, 103a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper SmallVectorImpl<int> &ShuffleMask) { 1046b28d356c56d656e8e4d23c71de80162bb2eba5eCraig Topper unsigned NumElts = VT.getVectorNumElements(); 105a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper 106a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned l = 0; l != NumElts; l += 8) { 107a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper unsigned NewImm = Imm; 108a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned i = 0, e = 4; i != e; ++i) { 109a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper ShuffleMask.push_back(l + i); 110a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper } 111a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned i = 4, e = 8; i != e; ++i) { 112a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper ShuffleMask.push_back(l + 4 + (NewImm & 3)); 113a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper NewImm >>= 2; 114a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper } 1156b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1166b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1176b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 118d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFLWMask(MVT VT, unsigned Imm, 119a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper SmallVectorImpl<int> &ShuffleMask) { 1206b28d356c56d656e8e4d23c71de80162bb2eba5eCraig Topper unsigned NumElts = VT.getVectorNumElements(); 121a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper 122a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned l = 0; l != NumElts; l += 8) { 123a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper unsigned NewImm = Imm; 124a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned i = 0, e = 4; i != e; ++i) { 125a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper ShuffleMask.push_back(l + (NewImm & 3)); 126a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper NewImm >>= 2; 127a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper } 128a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper for (unsigned i = 4, e = 8; i != e; ++i) { 129a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper ShuffleMask.push_back(l + i); 130a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper } 1316b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1326b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1336b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 134d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates 135d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// the type of the vector allowing it to handle different datatypes and vector 136d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// widths. 137d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 13836e36ace77cf84d7c8326957925550624b3fc89cCraig Topper unsigned NumElts = VT.getVectorNumElements(); 13936e36ace77cf84d7c8326957925550624b3fc89cCraig Topper 14036e36ace77cf84d7c8326957925550624b3fc89cCraig Topper unsigned NumLanes = VT.getSizeInBits() / 128; 14136e36ace77cf84d7c8326957925550624b3fc89cCraig Topper unsigned NumLaneElts = NumElts / NumLanes; 14236e36ace77cf84d7c8326957925550624b3fc89cCraig Topper 143a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper unsigned NewImm = Imm; 144d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 145d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper // each half of a lane comes from different source 146d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper for (unsigned s = 0; s != NumElts*2; s += NumElts) { 147d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper for (unsigned i = 0; i != NumLaneElts/2; ++i) { 148d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 149d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper NewImm /= NumLaneElts; 150d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper } 15136e36ace77cf84d7c8326957925550624b3fc89cCraig Topper } 15236e36ace77cf84d7c8326957925550624b3fc89cCraig Topper if (NumLaneElts == 4) NewImm = Imm; // reload imm 1536b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1546b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1556b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 156d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd 157d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// and punpckh*. VT indicates the type of the vector allowing it to handle 158d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// different datatypes and vector widths. 159d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 160f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper unsigned NumElts = VT.getVectorNumElements(); 161f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper 162f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 163f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper // independently on 128-bit lanes. 164f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper unsigned NumLanes = VT.getSizeInBits() / 128; 165f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 166f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper unsigned NumLaneElts = NumElts / NumLanes; 167f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper 168d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 169d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { 170f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper ShuffleMask.push_back(i); // Reads from dest/src1 171f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper ShuffleMask.push_back(i+NumElts); // Reads from src/src2 172f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper } 1736b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1746b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1756b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 1763d8c2ce3e44bc161118a8922e7aa412ef00f6034Craig Topper/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd 177d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// and punpckl*. VT indicates the type of the vector allowing it to handle 178d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// different datatypes and vector widths. 179d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 180a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene unsigned NumElts = VT.getVectorNumElements(); 181a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene 1824ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 1834ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes // independently on 128-bit lanes. 1844ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes unsigned NumLanes = VT.getSizeInBits() / 128; 1854ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 1864ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes unsigned NumLaneElts = NumElts / NumLanes; 187a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene 188d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 189d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { 190f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper ShuffleMask.push_back(i); // Reads from dest/src1 191f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper ShuffleMask.push_back(i+NumElts); // Reads from src/src2 192a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene } 1936b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes } 1946b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes} 1956b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes 196d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeVPERM2X128Mask(MVT VT, unsigned Imm, 197a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Topper SmallVectorImpl<int> &ShuffleMask) { 1982091df3d09bba0705fc62d020e5177a246d67978Craig Topper if (Imm & 0x88) 1992091df3d09bba0705fc62d020e5177a246d67978Craig Topper return; // Not a shuffle 2002091df3d09bba0705fc62d020e5177a246d67978Craig Topper 20153cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes unsigned HalfSize = VT.getVectorNumElements()/2; 20253cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes 203d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper for (unsigned l = 0; l != 2; ++l) { 204d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; 205d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) 206d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper ShuffleMask.push_back(i); 207d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper } 20853cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes} 20953cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes 210156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. 211156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper/// No VT provided since it only works on 256-bit, 4 element vectors. 212156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Toppervoid DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 213156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper for (unsigned i = 0; i != 4; ++i) { 214156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper ShuffleMask.push_back((Imm >> (2*i)) & 3); 215156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper } 216156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper} 217156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper 218583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greene} // llvm namespace 219