14ad53bdd19539e9781ed1c7644c7a3ea061028b9Chris Lattner//===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===//
227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//
327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//                     The LLVM Compiler Infrastructure
427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//
53060910e290949a9ac5eda8726d030790c4d60ffChris Lattner// This file is distributed under the University of Illinois Open Source
63060910e290949a9ac5eda8726d030790c4d60ffChris Lattner// License. See LICENSE.TXT for details.
727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//
827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//===----------------------------------------------------------------------===//
927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//
1027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// This file computes an optimal sequence of instructions for doing all shuffles
1127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// of two 4-element vectors.  With a release build and when configured to emit
1227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// an altivec instruction table, this takes about 30s to run on a 2.7Ghz
1327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// PowerPC G5.
1427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//
1527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//===----------------------------------------------------------------------===//
1627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
17309db81c107d72a4762e781d44b07f7fc830c055Chris Lattner#include <cassert>
18ae9f3a3b7c915f725aef5a7250e88eaeddda03c6Anton Korobeynikov#include <cstdlib>
194ffd89fa4d2788611187d1a534d2ed46adf1702cChandler Carruth#include <iomanip>
204ffd89fa4d2788611187d1a534d2ed46adf1702cChandler Carruth#include <iostream>
214ffd89fa4d2788611187d1a534d2ed46adf1702cChandler Carruth#include <vector>
2227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct Operator;
2327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
2427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// Masks are 4-nibble hex numbers.  Values 0-7 in any nibble means that it takes
25cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov// an element from that value of the input vectors.  A value of 8 means the
2627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// entry is undefined.
2727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
2827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// Mask manipulation functions.
29cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikovstatic inline unsigned short MakeMask(unsigned V0, unsigned V1,
3027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner                                      unsigned V2, unsigned V3) {
3127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4));
3227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
3327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
3427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// getMaskElt - Return element N of the specified mask.
3527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned getMaskElt(unsigned Mask, unsigned Elt) {
3627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  return (Mask >> ((3-Elt)*4)) & 0xF;
3727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
3827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
3927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) {
4027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned FieldShift = ((3-Elt)*4);
4127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift);
4227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
4327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
4427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// Reject elements where the values are 9-15.
4527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic bool isValidMask(unsigned short Mask) {
4627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned short UndefBits = Mask & 0x8888;
4727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0;
4827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
4927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
5027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// hasUndefElements - Return true if any of the elements in the mask are undefs
5127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner///
5227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic bool hasUndefElements(unsigned short Mask) {
5327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  return (Mask & 0x8888) != 0;
5427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
5527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
5627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// isOnlyLHSMask - Return true if this mask only refers to its LHS, not
5727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// including undef values..
5827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic bool isOnlyLHSMask(unsigned short Mask) {
5927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  return (Mask & 0x4444) == 0;
6027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
6127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
6227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to
6327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// refer to the LHS only (for when one argument value is passed into the same
6427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// function twice).
6564a8dddb73bba20dd24fb3a233a39cbc79040fefChris Lattner#if 0
6627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned short getLHSOnlyMask(unsigned short Mask) {
6727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  return Mask & 0xBBBB;  // Keep only LHS and Undefs.
6827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
6964a8dddb73bba20dd24fb3a233a39cbc79040fefChris Lattner#endif
7027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
7127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4
7227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// bits) into a compressed 13-bit mask, where each elt is multiplied by 9.
7327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned getCompressedMask(unsigned short Mask) {
74cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov  return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 +
7527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner         getMaskElt(Mask, 2)*9     + getMaskElt(Mask, 3);
7627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
7727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
7827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic void PrintMask(unsigned i, std::ostream &OS) {
7927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0)))
8027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner     << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1)))
8127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner     << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2)))
8227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner     << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3)))
8327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner     << ">";
8427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
8527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
8627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// ShuffleVal - This represents a shufflevector operation.
8727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct ShuffleVal {
8827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  Operator *Op;   // The Operation used to generate this value.
89cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  unsigned Cost;  // Number of instrs used to generate this value.
9027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned short Arg0, Arg1;  // Input operands for this value.
91cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
9227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  ShuffleVal() : Cost(1000000) {}
9327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner};
9427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
9527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
9627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// ShufTab - This is the actual shuffle table that we are trying to generate.
9727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner///
9827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic ShuffleVal ShufTab[65536];
9927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
10027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// TheOperators - All of the operators that this target supports.
10127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic std::vector<Operator*> TheOperators;
10227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
10327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// Operator - This is a vector operation that is available for use.
10427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct Operator {
105cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar  const char *Name;
10627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned short ShuffleMask;
10727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned short OpNum;
108c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov  unsigned Cost;
109c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov
110c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov  Operator(unsigned short shufflemask, const char *name, unsigned opnum,
111c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov           unsigned cost = 1)
112cddc3e03e4ec99c0268c03a126195173e519ed58Pirama Arumuga Nainar    :  Name(name), ShuffleMask(shufflemask), OpNum(opnum),Cost(cost) {
11327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    TheOperators.push_back(this);
11427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
11527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  ~Operator() {
11627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    assert(TheOperators.back() == this);
11727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    TheOperators.pop_back();
11827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
119cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
12027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  bool isOnlyLHSOperator() const {
12127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    return isOnlyLHSMask(ShuffleMask);
12227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
123cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
12427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  const char *getName() const { return Name; }
125c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov  unsigned getCost() const { return Cost; }
126c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov
12727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) {
12827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    // Extract the elements from LHSMask and RHSMask, as appropriate.
12927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    unsigned Result = 0;
13027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    for (unsigned i = 0; i != 4; ++i) {
13127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF;
13227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      unsigned ResElt;
13327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (SrcElt < 4)
13427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        ResElt = getMaskElt(LHSMask, SrcElt);
13527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      else if (SrcElt < 8)
13627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        ResElt = getMaskElt(RHSMask, SrcElt-4);
13727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      else {
13827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        assert(SrcElt == 8 && "Bad src elt!");
13927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        ResElt = 8;
14027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      }
14127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      Result |= ResElt << (4*i);
14227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    }
14327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    return Result;
14427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
14527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner};
14627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
14727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic const char *getZeroCostOpName(unsigned short Op) {
14827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  if (ShufTab[Op].Arg0 == 0x0123)
14927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    return "LHS";
15027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  else if (ShufTab[Op].Arg0 == 0x4567)
15127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    return "RHS";
15227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  else {
15327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    assert(0 && "bad zero cost operation");
15427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    abort();
15527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
15627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
15727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
15827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic void PrintOperation(unsigned ValNo, unsigned short Vals[]) {
15927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned short ThisOp = Vals[ValNo];
16027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  std::cerr << "t" << ValNo;
16127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  PrintMask(ThisOp, std::cerr);
16227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "(";
163cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
16427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) {
16527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0);
16627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    PrintMask(ShufTab[ThisOp].Arg0, std::cerr);
16727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  } else {
16827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    // Figure out what tmp # it is.
16927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    for (unsigned i = 0; ; ++i)
17027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (Vals[i] == ShufTab[ThisOp].Arg0) {
17127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        std::cerr << "t" << i;
17227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        break;
17327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      }
17427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
175cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
17627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) {
17727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    std::cerr << ", ";
17827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) {
17927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1);
18027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      PrintMask(ShufTab[ThisOp].Arg1, std::cerr);
18127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    } else {
18227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      // Figure out what tmp # it is.
18327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      for (unsigned i = 0; ; ++i)
18427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        if (Vals[i] == ShufTab[ThisOp].Arg1) {
18527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          std::cerr << "t" << i;
18627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          break;
18727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        }
18827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    }
18927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
19027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  std::cerr << ")  ";
19127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
19227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
19327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned getNumEntered() {
19427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned Count = 0;
19527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  for (unsigned i = 0; i != 65536; ++i)
19627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    Count += ShufTab[i].Cost < 100;
19727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  return Count;
19827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
19927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
200cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikovstatic void EvaluateOps(unsigned short Elt, unsigned short Vals[],
20127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner                        unsigned &NumVals) {
20227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  if (ShufTab[Elt].Cost == 0) return;
20327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
20427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  // If this value has already been evaluated, it is free.  FIXME: match undefs.
20527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  for (unsigned i = 0, e = NumVals; i != e; ++i)
20627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    if (Vals[i] == Elt) return;
207cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
20827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  // Otherwise, get the operands of the value, then add it.
20927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1;
21027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  if (ShufTab[Arg0].Cost)
21127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    EvaluateOps(Arg0, Vals, NumVals);
21227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  if (Arg0 != Arg1 && ShufTab[Arg1].Cost)
21327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    EvaluateOps(Arg1, Vals, NumVals);
214cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
21527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  Vals[NumVals++] = Elt;
21627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
21727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
21827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
21927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerint main() {
22027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  // Seed the table with accesses to the LHS and RHS.
22127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  ShufTab[0x0123].Cost = 0;
222c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  ShufTab[0x0123].Op = nullptr;
22327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  ShufTab[0x0123].Arg0 = 0x0123;
22427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  ShufTab[0x4567].Cost = 0;
225c6a4f5e819217e1e12c458aed8e7b122e23a3a58Stephen Hines  ShufTab[0x4567].Op = nullptr;
22627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  ShufTab[0x4567].Arg0 = 0x4567;
227cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
22827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  // Seed the first-level of shuffles, shuffles whose inputs are the input to
22927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  // the vectorshuffle operation.
23027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  bool MadeChange = true;
23127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned OpCount = 0;
23227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  while (MadeChange) {
23327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    MadeChange = false;
23427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    ++OpCount;
23527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    std::cerr << "Starting iteration #" << OpCount << " with "
23627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner              << getNumEntered() << " entries established.\n";
237cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
23827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    // Scan the table for two reasons: First, compute the maximum cost of any
23927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    // operation left in the table.  Second, make sure that values with undefs
24027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    // have the cheapest alternative that they match.
24127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    unsigned MaxCost = ShufTab[0].Cost;
24227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    for (unsigned i = 1; i != 0x8889; ++i) {
24327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (!isValidMask(i)) continue;
24427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (ShufTab[i].Cost > MaxCost)
24527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        MaxCost = ShufTab[i].Cost;
246cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
24727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      // If this value has an undef, make it be computed the cheapest possible
24827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      // way of any of the things that it matches.
24927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (hasUndefElements(i)) {
25027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // This code is a little bit tricky, so here's the idea: consider some
25127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // permutation, like 7u4u.  To compute the lowest cost for 7u4u, we
25227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries.  If
25327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // there are 3 undefs, the number rises to 729 entries we have to scan,
25427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // and for the 4 undef case, we have to scan the whole table.
25527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        //
25627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // Instead of doing this huge amount of scanning, we process the table
25727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // entries *in order*, and use the fact that 'u' is 8, larger than any
25827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // valid index.  Given an entry like 7u4u then, we only need to scan
25927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // 7[0-7]4u - 8 entries.  We can get away with this, because we already
26027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // know that each of 704u, 714u, 724u, etc contain the minimum value of
26127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively.
26227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        unsigned UndefIdx;
26327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        if (i & 0x8000)
26427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          UndefIdx = 0;
26527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        else if (i & 0x0800)
26627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          UndefIdx = 1;
26727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        else if (i & 0x0080)
26827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          UndefIdx = 2;
26927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        else if (i & 0x0008)
27027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          UndefIdx = 3;
27127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        else
27227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          abort();
273cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
27427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        unsigned MinVal  = i;
27527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        unsigned MinCost = ShufTab[i].Cost;
276cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
27727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // Scan the 8 entries.
27827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        for (unsigned j = 0; j != 8; ++j) {
27927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          unsigned NewElt = setMaskElt(i, UndefIdx, j);
28027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          if (ShufTab[NewElt].Cost < MinCost) {
28127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            MinCost = ShufTab[NewElt].Cost;
28227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            MinVal = NewElt;
28327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          }
28427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        }
285cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
28627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // If we found something cheaper than what was here before, use it.
28727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        if (i != MinVal) {
28827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          MadeChange = true;
28927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          ShufTab[i] = ShufTab[MinVal];
29027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        }
291cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov      }
29227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    }
293cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
29427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    for (unsigned LHS = 0; LHS != 0x8889; ++LHS) {
29527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (!isValidMask(LHS)) continue;
29627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (ShufTab[LHS].Cost > 1000) continue;
29727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
29827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      // If nothing involving this operand could possibly be cheaper than what
29927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      // we already have, don't consider it.
30027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (ShufTab[LHS].Cost + 1 >= MaxCost)
30127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        continue;
302cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
30327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) {
30427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        Operator *Op = TheOperators[opnum];
30527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
30627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // Evaluate op(LHS,LHS)
30727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        unsigned ResultMask = Op->getTransformedMask(LHS, LHS);
30827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
309c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov        unsigned Cost = ShufTab[LHS].Cost + Op->getCost();
31027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        if (Cost < ShufTab[ResultMask].Cost) {
31127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          ShufTab[ResultMask].Cost = Cost;
31227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          ShufTab[ResultMask].Op = Op;
31327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          ShufTab[ResultMask].Arg0 = LHS;
31427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          ShufTab[ResultMask].Arg1 = LHS;
31527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          MadeChange = true;
31627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        }
317cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
31827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // If this is a two input instruction, include the op(x,y) cases.  If
31927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        // this is a one input instruction, skip this.
32027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        if (Op->isOnlyLHSOperator()) continue;
321cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
32227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        for (unsigned RHS = 0; RHS != 0x8889; ++RHS) {
32327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          if (!isValidMask(RHS)) continue;
32427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          if (ShufTab[RHS].Cost > 1000) continue;
325cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
32627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          // If nothing involving this operand could possibly be cheaper than
32727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          // what we already have, don't consider it.
32827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          if (ShufTab[RHS].Cost + 1 >= MaxCost)
32927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            continue;
330cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
33127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
33227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          // Evaluate op(LHS,RHS)
33327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          unsigned ResultMask = Op->getTransformedMask(LHS, RHS);
33427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
33527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          if (ShufTab[ResultMask].Cost <= OpCount ||
33627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner              ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost ||
33727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner              ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost)
33827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            continue;
339cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
34027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          // Figure out the cost to evaluate this, knowing that CSE's only need
34127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          // to be evaluated once.
34227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          unsigned short Vals[30];
34327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          unsigned NumVals = 0;
34427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          EvaluateOps(LHS, Vals, NumVals);
34527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          EvaluateOps(RHS, Vals, NumVals);
34627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
347c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov          unsigned Cost = NumVals + Op->getCost();
34827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          if (Cost < ShufTab[ResultMask].Cost) {
34927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            ShufTab[ResultMask].Cost = Cost;
35027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            ShufTab[ResultMask].Op = Op;
35127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            ShufTab[ResultMask].Arg0 = LHS;
35227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            ShufTab[ResultMask].Arg1 = RHS;
35327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            MadeChange = true;
35427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          }
35527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        }
35627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      }
35727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    }
35827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
359cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
36027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  std::cerr << "Finished Table has " << getNumEntered()
36127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner            << " entries established.\n";
362cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
36327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  unsigned CostArray[10] = { 0 };
36427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
36527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  // Compute a cost histogram.
36627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  for (unsigned i = 0; i != 65536; ++i) {
36727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    if (!isValidMask(i)) continue;
36827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    if (ShufTab[i].Cost > 9)
36927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      ++CostArray[9];
37027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    else
37127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      ++CostArray[ShufTab[i].Cost];
37227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
373cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
37427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  for (unsigned i = 0; i != 9; ++i)
37527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    if (CostArray[i])
37627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n";
37727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  if (CostArray[9])
37827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    std::cout << "// " << CostArray[9] << " entries have higher cost!\n";
379cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
380cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
38127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  // Build up the table to emit.
38227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n";
3833033d4d3759e3552e7e9d846b02c30c916e6953eChris Lattner  std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n";
384cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
38527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  for (unsigned i = 0; i != 0x8889; ++i) {
38627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    if (!isValidMask(i)) continue;
387cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
38827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    // CostSat - The cost of this operation saturated to two bits.
38927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    unsigned CostSat = ShufTab[i].Cost;
3908c3e8471592bf46631bff0fab554d5ac3fedcff6Chris Lattner    if (CostSat > 4) CostSat = 4;
3918c3e8471592bf46631bff0fab554d5ac3fedcff6Chris Lattner    if (CostSat == 0) CostSat = 1;
3928c3e8471592bf46631bff0fab554d5ac3fedcff6Chris Lattner    --CostSat;  // Cost is now between 0-3.
393cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
39427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0;
39527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    assert(OpNum < 16 && "Too few bits to encode operation!");
396cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
39727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    unsigned LHS = getCompressedMask(ShufTab[i].Arg0);
39827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    unsigned RHS = getCompressedMask(ShufTab[i].Arg1);
399cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
40027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of
40127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    // LHS, and 13 bits of RHS = 32 bits.
402195d8adac9600e90f2c2dbd9e44f62616b10df7dChris Lattner    unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS;
40327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
40441045ba731f37fb6bef980cdc63b0a8538b6c8caJim Grosbach    std::cout << "  " << std::setw(10) << Val << "U, // ";
40527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    PrintMask(i, std::cout);
40627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    std::cout << ": Cost " << ShufTab[i].Cost;
40727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy");
40827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    std::cout << " ";
40927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    if (ShufTab[ShufTab[i].Arg0].Cost == 0) {
41027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      std::cout << getZeroCostOpName(ShufTab[i].Arg0);
41127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    } else {
41227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      PrintMask(ShufTab[i].Arg0, std::cout);
41327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    }
41427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
41527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) {
41627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      std::cout << ", ";
41727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (ShufTab[ShufTab[i].Arg1].Cost == 0) {
41827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        std::cout << getZeroCostOpName(ShufTab[i].Arg1);
41927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      } else {
42027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        PrintMask(ShufTab[i].Arg1, std::cout);
42127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      }
42227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    }
42327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    std::cout << "\n";
424cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov  }
42527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  std::cout << "  0\n};\n";
42627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
42727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  if (0) {
42827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    // Print out the table.
42927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    for (unsigned i = 0; i != 0x8889; ++i) {
43027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (!isValidMask(i)) continue;
43127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      if (ShufTab[i].Cost < 1000) {
43227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        PrintMask(i, std::cerr);
43327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        std::cerr << " - Cost " << ShufTab[i].Cost << " - ";
434cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov
43527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        unsigned short Vals[30];
43627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        unsigned NumVals = 0;
43727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        EvaluateOps(i, Vals, NumVals);
43827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
43927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        for (unsigned j = 0, e = NumVals; j != e; ++j)
44027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner          PrintOperation(j, Vals);
44127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner        std::cerr << "\n";
44227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner      }
44327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner    }
44427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
44527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}
44627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
44727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
448cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner#ifdef GENERATE_ALTIVEC
44927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
45027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner///===---------------------------------------------------------------------===//
45127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// The altivec instruction definitions.  This is the altivec-specific part of
45227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// this file.
45327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner///===---------------------------------------------------------------------===//
45427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
455cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner// Note that the opcode numbers here must match those in the PPC backend.
456cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnerenum {
457cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
458cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  OP_VMRGHW,
459cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  OP_VMRGLW,
460cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  OP_VSPLTISW0,
461cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  OP_VSPLTISW1,
462cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  OP_VSPLTISW2,
463cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  OP_VSPLTISW3,
464cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  OP_VSLDOI4,
465cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  OP_VSLDOI8,
466d74ea2bbd8bb630331f35ead42d385249bd42af8Chris Lattner  OP_VSLDOI12
467cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner};
468cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner
46927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct vmrghw : public Operator {
470cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {}
47127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} the_vmrghw;
47227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
47327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct vmrglw : public Operator {
474cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {}
47527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} the_vmrglw;
47627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
47727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnertemplate<unsigned Elt>
47827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct vspltisw : public Operator {
479cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  vspltisw(const char *N, unsigned Opc)
480cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner    : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
48127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner};
48227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
483cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0);
484cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1);
485cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2);
486cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3);
48727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
48827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnertemplate<unsigned N>
48927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct vsldoi : public Operator {
490cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner  vsldoi(const char *Name, unsigned Opc)
491cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner    : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
49227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner  }
49327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner};
49427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
495cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4);
496cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8);
497cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12);
49827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner
499cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner#endif
5001c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5011c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov#define GENERATE_NEON
5021c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5031c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov#ifdef GENERATE_NEON
5041c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovenum {
5051c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_COPY = 0,   // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5061c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VREV,
5071c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VDUP0,
5081c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VDUP1,
5091c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VDUP2,
5101c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VDUP3,
5111c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VEXT1,
5121c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VEXT2,
5131c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VEXT3,
5141c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VUZPL, // VUZP, left result
5151c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VUZPR, // VUZP, right result
5161c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VZIPL, // VZIP, left result
5171c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VZIPR, // VZIP, right result
5181c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VTRNL, // VTRN, left result
5191c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  OP_VTRNR  // VTRN, right result
5201c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov};
5211c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5221c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vrev : public Operator {
5232a8eb722c7bb0fac2fe09a876f3471dcb25f465eTanya Lattner  vrev() : Operator(0x1032, "vrev", OP_VREV) {}
5241c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vrev;
5251c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5261c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovtemplate<unsigned Elt>
5271c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vdup : public Operator {
5281c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  vdup(const char *N, unsigned Opc)
5291c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov    : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {}
5301c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov};
5311c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5321c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvdup<0> the_vdup0("vdup0", OP_VDUP0);
5331c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvdup<1> the_vdup1("vdup1", OP_VDUP1);
5341c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvdup<2> the_vdup2("vdup2", OP_VDUP2);
5351c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvdup<3> the_vdup3("vdup3", OP_VDUP3);
5361c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5371c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovtemplate<unsigned N>
5381c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vext : public Operator {
5391c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  vext(const char *Name, unsigned Opc)
5401c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov    : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) {
5411c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov  }
5421c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov};
5431c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5441c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvext<1> the_vext1("vext1", OP_VEXT1);
5451c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvext<2> the_vext2("vext2", OP_VEXT2);
5461c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvext<3> the_vext3("vext3", OP_VEXT3);
5471c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5481c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vuzpl : public Operator {
54958cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov  vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 2) {}
5501c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vuzpl;
5511c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5521c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vuzpr : public Operator {
55358cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov  vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 2) {}
5541c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vuzpr;
5551c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5561c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vzipl : public Operator {
55758cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov  vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 2) {}
5581c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vzipl;
5591c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5601c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vzipr : public Operator {
56158cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov  vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 2) {}
5621c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vzipr;
5631c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5641c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vtrnl : public Operator {
56558cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov  vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 2) {}
5661c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vtrnl;
5671c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5681c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vtrnr : public Operator {
56958cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov  vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 2) {}
5701c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vtrnr;
5711c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov
5721c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov#endif
573