1c4db4e5105ccd82df19f141957511f735a9be2d0David Greene//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
26b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
36b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//                     The LLVM Compiler Infrastructure
46b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
56b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// This file is distributed under the University of Illinois Open Source
66b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// License. See LICENSE.TXT for details.
76b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
86b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===//
96b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
106b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// Define several functions to decode x86 specific shuffle semantics into a
116b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes// generic vector mask.
126b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//
136b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===//
146b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
15583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greene#include "X86ShuffleDecode.h"
166b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
176b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===//
186b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//  Vector Mask Decoding
196b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes//===----------------------------------------------------------------------===//
206b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
21583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greenenamespace llvm {
226b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
23a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
24ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // Defaults the copying the dest value.
25ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask.push_back(0);
26ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask.push_back(1);
27ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask.push_back(2);
28ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask.push_back(3);
29ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes
30ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // Decode the immediate.
31ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  unsigned ZMask = Imm & 15;
32ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  unsigned CountD = (Imm >> 4) & 3;
33ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  unsigned CountS = (Imm >> 6) & 3;
34ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes
35ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // CountS selects which input element to use.
36ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  unsigned InVal = 4+CountS;
37ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // CountD specifies which element of destination to update.
38ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  ShuffleMask[CountD] = InVal;
39ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  // ZMask zaps values, potentially overriding the CountD elt.
40ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
41ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
42ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
43ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
44ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes}
45ed5c711a6e1b3d03952c670c5c0c1c3ed6c63ad4Bruno Cardoso Lopes
4655945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes// <3,1> or <6,7,2,3>
47a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
4855945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes  for (unsigned i = NElts/2; i != NElts; ++i)
4955945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes    ShuffleMask.push_back(NElts+i);
506b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
5155945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes  for (unsigned i = NElts/2; i != NElts; ++i)
5255945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes    ShuffleMask.push_back(i);
536b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
546b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
5555945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes// <0,2> or <0,1,4,5>
56a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Toppervoid DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
5755945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes  for (unsigned i = 0; i != NElts/2; ++i)
5855945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes    ShuffleMask.push_back(i);
596b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
6055945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes  for (unsigned i = 0; i != NElts/2; ++i)
6155945607667e71dc1d4d32cffa60e3b817f2f3efBruno Cardoso Lopes    ShuffleMask.push_back(NElts+i);
626b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
636b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
644aee1bb2223e59efb814a694edaecd07a3418da0Craig Toppervoid DecodePALIGNRMask(MVT VT, unsigned Imm,
654aee1bb2223e59efb814a694edaecd07a3418da0Craig Topper                       SmallVectorImpl<int> &ShuffleMask) {
66200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer  unsigned NumElts = VT.getVectorNumElements();
67200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer  unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
68200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer
69467016e58d57021b14f2ae562d221f00b07cb254Craig Topper  unsigned NumLanes = VT.getSizeInBits() / 128;
70467016e58d57021b14f2ae562d221f00b07cb254Craig Topper  unsigned NumLaneElts = NumElts / NumLanes;
71467016e58d57021b14f2ae562d221f00b07cb254Craig Topper
72467016e58d57021b14f2ae562d221f00b07cb254Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
73467016e58d57021b14f2ae562d221f00b07cb254Craig Topper    for (unsigned i = 0; i != NumLaneElts; ++i) {
74467016e58d57021b14f2ae562d221f00b07cb254Craig Topper      unsigned Base = i + Offset;
75467016e58d57021b14f2ae562d221f00b07cb254Craig Topper      // if i+offset is out of this lane then we actually need the other source
76467016e58d57021b14f2ae562d221f00b07cb254Craig Topper      if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
77467016e58d57021b14f2ae562d221f00b07cb254Craig Topper      ShuffleMask.push_back(Base + l);
78467016e58d57021b14f2ae562d221f00b07cb254Craig Topper    }
79467016e58d57021b14f2ae562d221f00b07cb254Craig Topper  }
80200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer}
81200b306f2006533a0e7a0ca75cb3103620e7aa84Benjamin Kramer
82d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
83d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// VT indicates the type of the vector allowing it to handle different
84d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// datatypes and vector widths.
85d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
86d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  unsigned NumElts = VT.getVectorNumElements();
87d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper
88d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  unsigned NumLanes = VT.getSizeInBits() / 128;
89d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  unsigned NumLaneElts = NumElts / NumLanes;
90d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper
91a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper  unsigned NewImm = Imm;
92d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
93d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    for (unsigned i = 0; i != NumLaneElts; ++i) {
94d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper      ShuffleMask.push_back(NewImm % NumLaneElts + l);
95d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper      NewImm /= NumLaneElts;
96d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    }
97d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    if (NumLaneElts == 4) NewImm = Imm; // reload imm
986b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
996b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1006b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
101d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFHWMask(MVT VT, unsigned Imm,
102a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper                       SmallVectorImpl<int> &ShuffleMask) {
1036b28d356c56d656e8e4d23c71de80162bb2eba5eCraig Topper  unsigned NumElts = VT.getVectorNumElements();
104a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper
105a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper  for (unsigned l = 0; l != NumElts; l += 8) {
106a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    unsigned NewImm = Imm;
107a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    for (unsigned i = 0, e = 4; i != e; ++i) {
108a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      ShuffleMask.push_back(l + i);
109a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    }
110a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    for (unsigned i = 4, e = 8; i != e; ++i) {
111a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      ShuffleMask.push_back(l + 4 + (NewImm & 3));
112a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      NewImm >>= 2;
113a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    }
1146b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1156b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1166b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
117d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodePSHUFLWMask(MVT VT, unsigned Imm,
118a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper                       SmallVectorImpl<int> &ShuffleMask) {
1196b28d356c56d656e8e4d23c71de80162bb2eba5eCraig Topper  unsigned NumElts = VT.getVectorNumElements();
120a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper
121a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper  for (unsigned l = 0; l != NumElts; l += 8) {
122a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    unsigned NewImm = Imm;
123a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    for (unsigned i = 0, e = 4; i != e; ++i) {
124a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      ShuffleMask.push_back(l + (NewImm & 3));
125a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      NewImm >>= 2;
126a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    }
127a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    for (unsigned i = 4, e = 8; i != e; ++i) {
128a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper      ShuffleMask.push_back(l + i);
129a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper    }
1306b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1316b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1326b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
133d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
134d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// the type of the vector allowing it to handle different datatypes and vector
135d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// widths.
136d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
13736e36ace77cf84d7c8326957925550624b3fc89cCraig Topper  unsigned NumElts = VT.getVectorNumElements();
13836e36ace77cf84d7c8326957925550624b3fc89cCraig Topper
13936e36ace77cf84d7c8326957925550624b3fc89cCraig Topper  unsigned NumLanes = VT.getSizeInBits() / 128;
14036e36ace77cf84d7c8326957925550624b3fc89cCraig Topper  unsigned NumLaneElts = NumElts / NumLanes;
14136e36ace77cf84d7c8326957925550624b3fc89cCraig Topper
142a9a568a79dbaf7315db863b4808d31ad9f5f91dcCraig Topper  unsigned NewImm = Imm;
143d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
144d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper    // each half of a lane comes from different source
145d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper    for (unsigned s = 0; s != NumElts*2; s += NumElts) {
146d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper      for (unsigned i = 0; i != NumLaneElts/2; ++i) {
147d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper        ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
148d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper        NewImm /= NumLaneElts;
149d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper      }
15036e36ace77cf84d7c8326957925550624b3fc89cCraig Topper    }
15136e36ace77cf84d7c8326957925550624b3fc89cCraig Topper    if (NumLaneElts == 4) NewImm = Imm; // reload imm
1526b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1536b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1546b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
155d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
156d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// and punpckh*. VT indicates the type of the vector allowing it to handle
157d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// different datatypes and vector widths.
158d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
159f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  unsigned NumElts = VT.getVectorNumElements();
160f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper
161f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
162f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  // independently on 128-bit lanes.
163f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  unsigned NumLanes = VT.getSizeInBits() / 128;
164f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
165f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper  unsigned NumLaneElts = NumElts / NumLanes;
166f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper
167d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
168d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {
169f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper      ShuffleMask.push_back(i);          // Reads from dest/src1
170f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
171f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper    }
1726b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1736b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1746b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
1753d8c2ce3e44bc161118a8922e7aa412ef00f6034Craig Topper/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
176d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// and punpckl*. VT indicates the type of the vector allowing it to handle
177d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper/// different datatypes and vector widths.
178d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
179a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene  unsigned NumElts = VT.getVectorNumElements();
180a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene
1814ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
1824ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  // independently on 128-bit lanes.
1834ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  unsigned NumLanes = VT.getSizeInBits() / 128;
1844ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
1854ea496846a84918c5e5cdd67ae43d6e2b0a110eaBruno Cardoso Lopes  unsigned NumLaneElts = NumElts / NumLanes;
186a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene
187d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
188d156dc11f9acd83e3369c069d5006a0203be13d6Craig Topper    for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {
189f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper      ShuffleMask.push_back(i);          // Reads from dest/src1
190f7de577a08a705970f0fd8f3c1bb40f7040e4476Craig Topper      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
191a20244d1bab2ae6dad1c82c16670d7eb1b3a9087David Greene    }
1926b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes  }
1936b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes}
1946b1d0a3b366a637c0c96ea5cf7cfd8769469e6f3Bruno Cardoso Lopes
195d978c54e607fbcf426db20727d5fed71e1def2f6Craig Toppervoid DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
196a1ffc681ed7372bd371c44a6e186291b6416fe47Craig Topper                          SmallVectorImpl<int> &ShuffleMask) {
1972091df3d09bba0705fc62d020e5177a246d67978Craig Topper  if (Imm & 0x88)
1982091df3d09bba0705fc62d020e5177a246d67978Craig Topper    return; // Not a shuffle
1992091df3d09bba0705fc62d020e5177a246d67978Craig Topper
20053cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes  unsigned HalfSize = VT.getVectorNumElements()/2;
20153cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes
202d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper  for (unsigned l = 0; l != 2; ++l) {
203d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper    unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize;
204d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper    for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i)
205d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper      ShuffleMask.push_back(i);
206d978c54e607fbcf426db20727d5fed71e1def2f6Craig Topper  }
20753cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes}
20853cae1362dca8aa312c3e36c10b106ea7d349f93Bruno Cardoso Lopes
209156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
210156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper/// No VT provided since it only works on 256-bit, 4 element vectors.
211156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Toppervoid DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
212156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper  for (unsigned i = 0; i != 4; ++i) {
213156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper    ShuffleMask.push_back((Imm >> (2*i)) & 3);
214156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper  }
215156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper}
216156f5bb56e2730013fcddf90e8cad0e5e3ee1928Craig Topper
217583b68f34fd9bdb6114fa48dadc8def2ec96edacDavid Greene} // llvm namespace
218