1c4db4e5105ccd82df19f141957511f735a9be2d0David Greene//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
26b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
36b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//                     The LLVM Compiler Infrastructure
46b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
56b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// This file is distributed under the University of Illinois Open Source
66b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// License. See LICENSE.TXT for details.
76b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
86b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===//
96b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
106b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// Define several functions to decode x86 specific shuffle semantics into a
116b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// generic vector mask.
126b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
136b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===//
146b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
15583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greene#include "X86ShuffleDecode.h"
1636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "llvm/CodeGen/MachineValueType.h"
176b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
186b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===//
196b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//  Vector Mask Decoding
206b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===//
216b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
22583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greenenamespace llvm {
236b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
24a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
25ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // Defaults the copying the dest value.
26ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask.push_back(0);
27ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask.push_back(1);
28ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask.push_back(2);
29ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask.push_back(3);
30ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes
31ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // Decode the immediate.
32ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  unsigned ZMask = Imm & 15;
33ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  unsigned CountD = (Imm >> 4) & 3;
34ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  unsigned CountS = (Imm >> 6) & 3;
35ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes
36ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // CountS selects which input element to use.
37ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  unsigned InVal = 4+CountS;
38ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // CountD specifies which element of destination to update.
39ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask[CountD] = InVal;
40ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // ZMask zaps values, potentially overriding the CountD elt.
41ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
42ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
43ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
44ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
45ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes}
46ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes
4755945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes// <3,1> or <6,7,2,3>
48a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
4955945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes  for (unsigned i = NElts/2; i != NElts; ++i)
5055945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes    ShuffleMask.push_back(NElts+i);
516b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
5255945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes  for (unsigned i = NElts/2; i != NElts; ++i)
5355945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes    ShuffleMask.push_back(i);
546b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
556b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
5655945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes// <0,2> or <0,1,4,5>
57a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
5855945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes  for (unsigned i = 0; i != NElts/2; ++i)
5955945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes    ShuffleMask.push_back(i);
606b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
6155945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes  for (unsigned i = 0; i != NElts/2; ++i)
6255945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes    ShuffleMask.push_back(NElts+i);
636b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
646b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
654aee1bb2223e59efb814a694edaecd07a3418da0Craig Toppervoid DecodePALIGNRMask(MVT VT, unsigned Imm,
664aee1bb2223e59efb814a694edaecd07a3418da0Craig Topper                       SmallVectorImpl<int> &ShuffleMask) {
67200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer  unsigned NumElts = VT.getVectorNumElements();
68200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer  unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
69200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer
70467016e58d57021b14f2ae562d221f00b07cb254Craig Topper  unsigned NumLanes = VT.getSizeInBits() / 128;
71467016e58d57021b14f2ae562d221f00b07cb254Craig Topper  unsigned NumLaneElts = NumElts / NumLanes;
72467016e58d57021b14f2ae562d221f00b07cb254Craig Topper
73467016e58d57021b14f2ae562d221f00b07cb254Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
74467016e58d57021b14f2ae562d221f00b07cb254Craig Topper    for (unsigned i = 0; i != NumLaneElts; ++i) {
75467016e58d57021b14f2ae562d221f00b07cb254Craig Topper      unsigned Base = i + Offset;
76467016e58d57021b14f2ae562d221f00b07cb254Craig Topper      // if i+offset is out of this lane then we actually need the other source
77467016e58d57021b14f2ae562d221f00b07cb254Craig Topper      if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
78467016e58d57021b14f2ae562d221f00b07cb254Craig Topper      ShuffleMask.push_back(Base + l);
79467016e58d57021b14f2ae562d221f00b07cb254Craig Topper    }
80467016e58d57021b14f2ae562d221f00b07cb254Craig Topper  }
81200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer}
82200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer
83d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
84d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// VT indicates the type of the vector allowing it to handle different
85d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// datatypes and vector widths.
86d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
87d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  unsigned NumElts = VT.getVectorNumElements();
88d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper
89d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  unsigned NumLanes = VT.getSizeInBits() / 128;
90d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  unsigned NumLaneElts = NumElts / NumLanes;
91d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper
92a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper  unsigned NewImm = Imm;
93d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
94d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    for (unsigned i = 0; i != NumLaneElts; ++i) {
95d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper      ShuffleMask.push_back(NewImm % NumLaneElts + l);
96d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper      NewImm /= NumLaneElts;
97d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    }
98d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    if (NumLaneElts == 4) NewImm = Imm; // reload imm
996b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1006b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1016b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
102d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFHWMask(MVT VT, unsigned Imm,
103a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper                       SmallVectorImpl<int> &ShuffleMask) {
1046b28d356c56d656e8e4d23c71de80162bb2eba5eCraig Topper  unsigned NumElts = VT.getVectorNumElements();
105a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper
106a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper  for (unsigned l = 0; l != NumElts; l += 8) {
107a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    unsigned NewImm = Imm;
108a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    for (unsigned i = 0, e = 4; i != e; ++i) {
109a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      ShuffleMask.push_back(l + i);
110a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    }
111a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    for (unsigned i = 4, e = 8; i != e; ++i) {
112a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      ShuffleMask.push_back(l + 4 + (NewImm & 3));
113a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      NewImm >>= 2;
114a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    }
1156b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1166b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1176b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
118d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFLWMask(MVT VT, unsigned Imm,
119a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper                       SmallVectorImpl<int> &ShuffleMask) {
1206b28d356c56d656e8e4d23c71de80162bb2eba5eCraig Topper  unsigned NumElts = VT.getVectorNumElements();
121a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper
122a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper  for (unsigned l = 0; l != NumElts; l += 8) {
123a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    unsigned NewImm = Imm;
124a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    for (unsigned i = 0, e = 4; i != e; ++i) {
125a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      ShuffleMask.push_back(l + (NewImm & 3));
126a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      NewImm >>= 2;
127a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    }
128a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    for (unsigned i = 4, e = 8; i != e; ++i) {
129a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      ShuffleMask.push_back(l + i);
130a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    }
1316b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1326b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1336b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
134d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
135d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// the type of the vector allowing it to handle different datatypes and vector
136d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// widths.
137d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
13836e36ace77cf84d7c8326957925550624b3fc89cCraig Topper  unsigned NumElts = VT.getVectorNumElements();
13936e36ace77cf84d7c8326957925550624b3fc89cCraig Topper
14036e36ace77cf84d7c8326957925550624b3fc89cCraig Topper  unsigned NumLanes = VT.getSizeInBits() / 128;
14136e36ace77cf84d7c8326957925550624b3fc89cCraig Topper  unsigned NumLaneElts = NumElts / NumLanes;
14236e36ace77cf84d7c8326957925550624b3fc89cCraig Topper
143a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper  unsigned NewImm = Imm;
144d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
145d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper    // each half of a lane comes from different source
146d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper    for (unsigned s = 0; s != NumElts*2; s += NumElts) {
147d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper      for (unsigned i = 0; i != NumLaneElts/2; ++i) {
148d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper        ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
149d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper        NewImm /= NumLaneElts;
150d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper      }
15136e36ace77cf84d7c8326957925550624b3fc89cCraig Topper    }
15236e36ace77cf84d7c8326957925550624b3fc89cCraig Topper    if (NumLaneElts == 4) NewImm = Imm; // reload imm
1536b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1546b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1556b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
156d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
157d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// and punpckh*. VT indicates the type of the vector allowing it to handle
158d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// different datatypes and vector widths.
159d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
160f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  unsigned NumElts = VT.getVectorNumElements();
161f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper
162f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
163f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  // independently on 128-bit lanes.
164f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  unsigned NumLanes = VT.getSizeInBits() / 128;
165f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
166f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  unsigned NumLaneElts = NumElts / NumLanes;
167f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper
168d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
169d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {
170f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper      ShuffleMask.push_back(i);          // Reads from dest/src1
171f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
172f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper    }
1736b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1746b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1756b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
1763d8c2ce3e44bc161118a8922e7aa412ef00f6034Craig Topper/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
177d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// and punpckl*. VT indicates the type of the vector allowing it to handle
178d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// different datatypes and vector widths.
179d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
180a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene  unsigned NumElts = VT.getVectorNumElements();
181a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene
1824ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
1834ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  // independently on 128-bit lanes.
1844ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  unsigned NumLanes = VT.getSizeInBits() / 128;
1854ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
1864ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  unsigned NumLaneElts = NumElts / NumLanes;
187a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene
188d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
189d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {
190f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper      ShuffleMask.push_back(i);          // Reads from dest/src1
191f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
192a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene    }
1936b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1946b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1956b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
196d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
197a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Topper                          SmallVectorImpl<int> &ShuffleMask) {
1982091df3d09bba0705fc62d020e5177a246d67978Craig Topper  if (Imm & 0x88)
1992091df3d09bba0705fc62d020e5177a246d67978Craig Topper    return; // Not a shuffle
2002091df3d09bba0705fc62d020e5177a246d67978Craig Topper
20153cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes  unsigned HalfSize = VT.getVectorNumElements()/2;
20253cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes
203d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper  for (unsigned l = 0; l != 2; ++l) {
204d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper    unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize;
205d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper    for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i)
206d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper      ShuffleMask.push_back(i);
207d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper  }
20853cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes}
20953cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes
210156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
211156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper/// No VT provided since it only works on 256-bit, 4 element vectors.
212156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Toppervoid DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
213156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper  for (unsigned i = 0; i != 4; ++i) {
214156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper    ShuffleMask.push_back((Imm >> (2*i)) & 3);
215156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper  }
216156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper}
217156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper
218583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greene} // llvm namespace
219