11d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
21d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//
31d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//                     The LLVM Compiler Infrastructure
41d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//
51d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// This file is distributed under the University of Illinois Open Source
61d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// License. See LICENSE.TXT for details.
71d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//
81d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===----------------------------------------------------------------------===//
91d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//
101d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// Define several functions to decode x86 specific shuffle semantics into a
111d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand// generic vector mask.
121d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//
131d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===----------------------------------------------------------------------===//
141d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
151d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand#include "X86ShuffleDecode.h"
161d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
171d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===----------------------------------------------------------------------===//
181d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//  Vector Mask Decoding
191d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand//===----------------------------------------------------------------------===//
201d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
211d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandnamespace llvm {
221d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
231d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
241d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  // Defaults the copying the dest value.
251d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  ShuffleMask.push_back(0);
261d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  ShuffleMask.push_back(1);
271d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  ShuffleMask.push_back(2);
281d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  ShuffleMask.push_back(3);
291d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
30f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  // Decode the immediate.
31f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  unsigned ZMask = Imm & 15;
32f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  unsigned CountD = (Imm >> 4) & 3;
33f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  unsigned CountS = (Imm >> 6) & 3;
34f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford
35f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  // CountS selects which input element to use.
36f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  unsigned InVal = 4+CountS;
37f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  // CountD specifies which element of destination to update.
38f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  ShuffleMask[CountD] = InVal;
39f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  // ZMask zaps values, potentially overriding the CountD elt.
40f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
41f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
42f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
43f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
44f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford}
45f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford
46f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford// <3,1> or <6,7,2,3>
47f917bc0406e47866eb1f6c0378de16498018b620Richard Sandifordvoid DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
48f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  for (unsigned i = NElts/2; i != NElts; ++i)
49f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford    ShuffleMask.push_back(NElts+i);
50f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford
51f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  for (unsigned i = NElts/2; i != NElts; ++i)
52f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford    ShuffleMask.push_back(i);
53f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford}
54f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford
55f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford// <0,2> or <0,1,4,5>
56f917bc0406e47866eb1f6c0378de16498018b620Richard Sandifordvoid DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
57f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  for (unsigned i = 0; i != NElts/2; ++i)
58f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford    ShuffleMask.push_back(i);
59f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford
60f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  for (unsigned i = 0; i != NElts/2; ++i)
61f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford    ShuffleMask.push_back(NElts+i);
62f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford}
63f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford
64f917bc0406e47866eb1f6c0378de16498018b620Richard Sandifordvoid DecodePALIGNRMask(MVT VT, unsigned Imm,
65f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford                       SmallVectorImpl<int> &ShuffleMask) {
66f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  unsigned NumElts = VT.getVectorNumElements();
67f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
68f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford
69f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  unsigned NumLanes = VT.getSizeInBits() / 128;
70f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford  unsigned NumLaneElts = NumElts / NumLanes;
71f917bc0406e47866eb1f6c0378de16498018b620Richard Sandiford
724a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
734a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola    for (unsigned i = 0; i != NumLaneElts; ++i) {
746e53180db120b30f600ac31611a9dd47ef7f4921Rafael Espindola      unsigned Base = i + Offset;
754a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola      // if i+offset is out of this lane then we actually need the other source
764a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola      if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
774a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola      ShuffleMask.push_back(Base + l);
784a971705bc6030dc2e4338b3cd5cffa2e0f88b7bRafael Espindola    }
791d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  }
801d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand}
811d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
821d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*.
831d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// VT indicates the type of the vector allowing it to handle different
841d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// datatypes and vector widths.
851d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
861d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumElts = VT.getVectorNumElements();
871d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
881d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumLanes = VT.getSizeInBits() / 128;
891d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumLaneElts = NumElts / NumLanes;
901d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
911d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NewImm = Imm;
921d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
931d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    for (unsigned i = 0; i != NumLaneElts; ++i) {
941d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      ShuffleMask.push_back(NewImm % NumLaneElts + l);
951d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      NewImm /= NumLaneElts;
961d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    }
971d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    if (NumLaneElts == 4) NewImm = Imm; // reload imm
981d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  }
991d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand}
1001d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1011d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodePSHUFHWMask(MVT VT, unsigned Imm,
1021d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand                       SmallVectorImpl<int> &ShuffleMask) {
1031d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumElts = VT.getVectorNumElements();
1047a1ead46fb629839e5ce25574246ee743ab8b54fRichard Sandiford
1051d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  for (unsigned l = 0; l != NumElts; l += 8) {
1061d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    unsigned NewImm = Imm;
1071d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    for (unsigned i = 0, e = 4; i != e; ++i) {
1081d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      ShuffleMask.push_back(l + i);
1091d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    }
1101d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    for (unsigned i = 4, e = 8; i != e; ++i) {
1111d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      ShuffleMask.push_back(l + 4 + (NewImm & 3));
1121d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      NewImm >>= 2;
1131d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    }
1141d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  }
1151d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand}
1161d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1171d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodePSHUFLWMask(MVT VT, unsigned Imm,
1181d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand                       SmallVectorImpl<int> &ShuffleMask) {
1191d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumElts = VT.getVectorNumElements();
1201d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1211d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  for (unsigned l = 0; l != NumElts; l += 8) {
1221d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    unsigned NewImm = Imm;
1231d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    for (unsigned i = 0, e = 4; i != e; ++i) {
1241d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      ShuffleMask.push_back(l + (NewImm & 3));
1251d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      NewImm >>= 2;
1261d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    }
1271d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    for (unsigned i = 4, e = 8; i != e; ++i) {
1281d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      ShuffleMask.push_back(l + i);
1291d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    }
1301d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  }
1311d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand}
1321d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1331d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates
1341d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// the type of the vector allowing it to handle different datatypes and vector
1351d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// widths.
1361d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
1371d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumElts = VT.getVectorNumElements();
1381d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1391d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumLanes = VT.getSizeInBits() / 128;
1401d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumLaneElts = NumElts / NumLanes;
1411d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1421d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NewImm = Imm;
1431d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1441d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    // each half of a lane comes from different source
1457a1ead46fb629839e5ce25574246ee743ab8b54fRichard Sandiford    for (unsigned s = 0; s != NumElts*2; s += NumElts) {
1461d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      for (unsigned i = 0; i != NumLaneElts/2; ++i) {
1471d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand        ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
1481d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand        NewImm /= NumLaneElts;
1491d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      }
1501d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    }
1511d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    if (NumLaneElts == 4) NewImm = Imm; // reload imm
1521d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  }
1531d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand}
1541d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1551d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd
1561d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// and punpckh*. VT indicates the type of the vector allowing it to handle
1571d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// different datatypes and vector widths.
1581d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
1591d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumElts = VT.getVectorNumElements();
1601d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1611d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
1621d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  // independently on 128-bit lanes.
1631d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumLanes = VT.getSizeInBits() / 128;
1641d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
1651d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumLaneElts = NumElts / NumLanes;
1661d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1671d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1681d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) {
1691d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      ShuffleMask.push_back(i);          // Reads from dest/src1
1701d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
1711d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    }
1721d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  }
1731d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand}
1741d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1751d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd
1761d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// and punpckl*. VT indicates the type of the vector allowing it to handle
1771d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand/// different datatypes and vector widths.
1781d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) {
1791d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumElts = VT.getVectorNumElements();
1801d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1811d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
1821d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  // independently on 128-bit lanes.
1831d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumLanes = VT.getSizeInBits() / 128;
1841d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  if (NumLanes == 0 ) NumLanes = 1;  // Handle MMX
1851d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned NumLaneElts = NumElts / NumLanes;
1861d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1871d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
1881d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) {
1891d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      ShuffleMask.push_back(i);          // Reads from dest/src1
1901d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand      ShuffleMask.push_back(i+NumElts);  // Reads from src/src2
1911d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    }
1921d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  }
1931d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand}
1941d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
1951d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigandvoid DecodeVPERM2X128Mask(MVT VT, unsigned Imm,
1961d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand                          SmallVectorImpl<int> &ShuffleMask) {
1971d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  if (Imm & 0x88)
1981d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    return; // Not a shuffle
1991d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
2001d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  unsigned HalfSize = VT.getVectorNumElements()/2;
2011d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand
2021d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand  for (unsigned l = 0; l != 2; ++l) {
2031d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize;
2041d09d56fe1e3f3faadd4bf4ccf3e585ddb3c3b07Ulrich Weigand    for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i)
205      ShuffleMask.push_back(i);
206  }
207}
208
209/// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD.
210/// No VT provided since it only works on 256-bit, 4 element vectors.
211void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
212  for (unsigned i = 0; i != 4; ++i) {
213    ShuffleMask.push_back((Imm >> (2*i)) & 3);
214  }
215}
216
217} // llvm namespace
218