14ad53bdd19539e9781ed1c7644c7a3ea061028b9Chris Lattner//===-- PerfectShuffle.cpp - Perfect Shuffle Generator --------------------===// 227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// 327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// The LLVM Compiler Infrastructure 427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// 53060910e290949a9ac5eda8726d030790c4d60ffChris Lattner// This file is distributed under the University of Illinois Open Source 63060910e290949a9ac5eda8726d030790c4d60ffChris Lattner// License. See LICENSE.TXT for details. 727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// 827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//===----------------------------------------------------------------------===// 927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// 1027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// This file computes an optimal sequence of instructions for doing all shuffles 1127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// of two 4-element vectors. With a release build and when configured to emit 1227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// an altivec instruction table, this takes about 30s to run on a 2.7Ghz 1327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// PowerPC G5. 1427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// 1527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner//===----------------------------------------------------------------------===// 1627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 17309db81c107d72a4762e781d44b07f7fc830c055Chris Lattner#include <cassert> 18ae9f3a3b7c915f725aef5a7250e88eaeddda03c6Anton Korobeynikov#include <cstdlib> 194ffd89fa4d2788611187d1a534d2ed46adf1702cChandler Carruth#include <iomanip> 204ffd89fa4d2788611187d1a534d2ed46adf1702cChandler Carruth#include <iostream> 214ffd89fa4d2788611187d1a534d2ed46adf1702cChandler Carruth#include <vector> 2227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct Operator; 2327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 2427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// Masks are 4-nibble hex numbers. Values 0-7 in any nibble means that it takes 25cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov// an element from that value of the input vectors. A value of 8 means the 2627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// entry is undefined. 2727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 2827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// Mask manipulation functions. 29cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikovstatic inline unsigned short MakeMask(unsigned V0, unsigned V1, 3027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned V2, unsigned V3) { 3127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return (V0 << (3*4)) | (V1 << (2*4)) | (V2 << (1*4)) | (V3 << (0*4)); 3227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 3327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 3427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// getMaskElt - Return element N of the specified mask. 3527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned getMaskElt(unsigned Mask, unsigned Elt) { 3627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return (Mask >> ((3-Elt)*4)) & 0xF; 3727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 3827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 3927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned setMaskElt(unsigned Mask, unsigned Elt, unsigned NewVal) { 4027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned FieldShift = ((3-Elt)*4); 4127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return (Mask & ~(0xF << FieldShift)) | (NewVal << FieldShift); 4227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 4327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 4427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner// Reject elements where the values are 9-15. 4527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic bool isValidMask(unsigned short Mask) { 4627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned short UndefBits = Mask & 0x8888; 4727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return (Mask & ((UndefBits >> 1)|(UndefBits>>2)|(UndefBits>>3))) == 0; 4827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 4927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 5027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// hasUndefElements - Return true if any of the elements in the mask are undefs 5127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// 5227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic bool hasUndefElements(unsigned short Mask) { 5327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return (Mask & 0x8888) != 0; 5427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 5527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 5627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// isOnlyLHSMask - Return true if this mask only refers to its LHS, not 5727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// including undef values.. 5827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic bool isOnlyLHSMask(unsigned short Mask) { 5927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return (Mask & 0x4444) == 0; 6027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 6127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 6227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// getLHSOnlyMask - Given a mask that refers to its LHS and RHS, modify it to 6327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// refer to the LHS only (for when one argument value is passed into the same 6427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// function twice). 6564a8dddb73bba20dd24fb3a233a39cbc79040fefChris Lattner#if 0 6627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned short getLHSOnlyMask(unsigned short Mask) { 6727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return Mask & 0xBBBB; // Keep only LHS and Undefs. 6827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 6964a8dddb73bba20dd24fb3a233a39cbc79040fefChris Lattner#endif 7027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 7127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// getCompressedMask - Turn a 16-bit uncompressed mask (where each elt uses 4 7227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// bits) into a compressed 13-bit mask, where each elt is multiplied by 9. 7327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned getCompressedMask(unsigned short Mask) { 74cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov return getMaskElt(Mask, 0)*9*9*9 + getMaskElt(Mask, 1)*9*9 + 7527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner getMaskElt(Mask, 2)*9 + getMaskElt(Mask, 3); 7627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 7727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 7827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic void PrintMask(unsigned i, std::ostream &OS) { 7927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner OS << "<" << (char)(getMaskElt(i, 0) == 8 ? 'u' : ('0'+getMaskElt(i, 0))) 8027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner << "," << (char)(getMaskElt(i, 1) == 8 ? 'u' : ('0'+getMaskElt(i, 1))) 8127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner << "," << (char)(getMaskElt(i, 2) == 8 ? 'u' : ('0'+getMaskElt(i, 2))) 8227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner << "," << (char)(getMaskElt(i, 3) == 8 ? 'u' : ('0'+getMaskElt(i, 3))) 8327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner << ">"; 8427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 8527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 8627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// ShuffleVal - This represents a shufflevector operation. 8727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct ShuffleVal { 8827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned Cost; // Number of instrs used to generate this value. 8927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner Operator *Op; // The Operation used to generate this value. 9027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned short Arg0, Arg1; // Input operands for this value. 91cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 9227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShuffleVal() : Cost(1000000) {} 9327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}; 9427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 9527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 9627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// ShufTab - This is the actual shuffle table that we are trying to generate. 9727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// 9827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic ShuffleVal ShufTab[65536]; 9927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 10027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// TheOperators - All of the operators that this target supports. 10127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic std::vector<Operator*> TheOperators; 10227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 10327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// Operator - This is a vector operation that is available for use. 10427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct Operator { 10527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned short ShuffleMask; 10627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned short OpNum; 10727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner const char *Name; 108c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov unsigned Cost; 109c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov 110c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov Operator(unsigned short shufflemask, const char *name, unsigned opnum, 111c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov unsigned cost = 1) 112c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov : ShuffleMask(shufflemask), OpNum(opnum), Name(name), Cost(cost) { 11327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner TheOperators.push_back(this); 11427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 11527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ~Operator() { 11627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner assert(TheOperators.back() == this); 11727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner TheOperators.pop_back(); 11827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 119cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 12027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner bool isOnlyLHSOperator() const { 12127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return isOnlyLHSMask(ShuffleMask); 12227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 123cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 12427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner const char *getName() const { return Name; } 125c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov unsigned getCost() const { return Cost; } 126c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov 12727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned short getTransformedMask(unsigned short LHSMask, unsigned RHSMask) { 12827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Extract the elements from LHSMask and RHSMask, as appropriate. 12927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned Result = 0; 13027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 0; i != 4; ++i) { 13127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned SrcElt = (ShuffleMask >> (4*i)) & 0xF; 13227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned ResElt; 13327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (SrcElt < 4) 13427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ResElt = getMaskElt(LHSMask, SrcElt); 13527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner else if (SrcElt < 8) 13627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ResElt = getMaskElt(RHSMask, SrcElt-4); 13727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner else { 13827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner assert(SrcElt == 8 && "Bad src elt!"); 13927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ResElt = 8; 14027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 14127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner Result |= ResElt << (4*i); 14227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 14327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return Result; 14427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 14527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}; 14627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 14727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic const char *getZeroCostOpName(unsigned short Op) { 14827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[Op].Arg0 == 0x0123) 14927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return "LHS"; 15027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner else if (ShufTab[Op].Arg0 == 0x4567) 15127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return "RHS"; 15227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner else { 15327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner assert(0 && "bad zero cost operation"); 15427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner abort(); 15527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 15627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 15727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 15827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic void PrintOperation(unsigned ValNo, unsigned short Vals[]) { 15927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned short ThisOp = Vals[ValNo]; 16027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << "t" << ValNo; 16127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner PrintMask(ThisOp, std::cerr); 16227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << " = " << ShufTab[ThisOp].Op->getName() << "("; 163cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 16427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[ShufTab[ThisOp].Arg0].Cost == 0) { 16527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg0); 16627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner PrintMask(ShufTab[ThisOp].Arg0, std::cerr); 16727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } else { 16827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Figure out what tmp # it is. 16927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 0; ; ++i) 17027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (Vals[i] == ShufTab[ThisOp].Arg0) { 17127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << "t" << i; 17227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner break; 17327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 17427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 175cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 17627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (!ShufTab[Vals[ValNo]].Op->isOnlyLHSOperator()) { 17727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << ", "; 17827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[ShufTab[ThisOp].Arg1].Cost == 0) { 17927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << getZeroCostOpName(ShufTab[ThisOp].Arg1); 18027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner PrintMask(ShufTab[ThisOp].Arg1, std::cerr); 18127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } else { 18227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Figure out what tmp # it is. 18327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 0; ; ++i) 18427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (Vals[i] == ShufTab[ThisOp].Arg1) { 18527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << "t" << i; 18627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner break; 18727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 18827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 18927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 19027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << ") "; 19127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 19227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 19327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstatic unsigned getNumEntered() { 19427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned Count = 0; 19527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 0; i != 65536; ++i) 19627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner Count += ShufTab[i].Cost < 100; 19727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner return Count; 19827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 19927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 200cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikovstatic void EvaluateOps(unsigned short Elt, unsigned short Vals[], 20127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned &NumVals) { 20227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[Elt].Cost == 0) return; 20327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 20427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // If this value has already been evaluated, it is free. FIXME: match undefs. 20527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 0, e = NumVals; i != e; ++i) 20627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (Vals[i] == Elt) return; 207cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 20827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Otherwise, get the operands of the value, then add it. 20927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned Arg0 = ShufTab[Elt].Arg0, Arg1 = ShufTab[Elt].Arg1; 21027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[Arg0].Cost) 21127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner EvaluateOps(Arg0, Vals, NumVals); 21227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (Arg0 != Arg1 && ShufTab[Arg1].Cost) 21327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner EvaluateOps(Arg1, Vals, NumVals); 214cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 21527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner Vals[NumVals++] = Elt; 21627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 21727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 21827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 21927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerint main() { 22027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Seed the table with accesses to the LHS and RHS. 22127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[0x0123].Cost = 0; 222cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines ShufTab[0x0123].Op = nullptr; 22327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[0x0123].Arg0 = 0x0123; 22427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[0x4567].Cost = 0; 225cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines ShufTab[0x4567].Op = nullptr; 22627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[0x4567].Arg0 = 0x4567; 227cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 22827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Seed the first-level of shuffles, shuffles whose inputs are the input to 22927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // the vectorshuffle operation. 23027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner bool MadeChange = true; 23127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned OpCount = 0; 23227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner while (MadeChange) { 23327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner MadeChange = false; 23427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ++OpCount; 23527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << "Starting iteration #" << OpCount << " with " 23627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner << getNumEntered() << " entries established.\n"; 237cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 23827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Scan the table for two reasons: First, compute the maximum cost of any 23927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // operation left in the table. Second, make sure that values with undefs 24027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // have the cheapest alternative that they match. 24127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned MaxCost = ShufTab[0].Cost; 24227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 1; i != 0x8889; ++i) { 24327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (!isValidMask(i)) continue; 24427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[i].Cost > MaxCost) 24527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner MaxCost = ShufTab[i].Cost; 246cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 24727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // If this value has an undef, make it be computed the cheapest possible 24827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // way of any of the things that it matches. 24927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (hasUndefElements(i)) { 25027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // This code is a little bit tricky, so here's the idea: consider some 25127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // permutation, like 7u4u. To compute the lowest cost for 7u4u, we 25227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // need to take the minimum cost of all of 7[0-8]4[0-8], 81 entries. If 25327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // there are 3 undefs, the number rises to 729 entries we have to scan, 25427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // and for the 4 undef case, we have to scan the whole table. 25527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // 25627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Instead of doing this huge amount of scanning, we process the table 25727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // entries *in order*, and use the fact that 'u' is 8, larger than any 25827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // valid index. Given an entry like 7u4u then, we only need to scan 25927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // 7[0-7]4u - 8 entries. We can get away with this, because we already 26027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // know that each of 704u, 714u, 724u, etc contain the minimum value of 26127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // all of the 704[0-8], 714[0-8] and 724[0-8] entries respectively. 26227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned UndefIdx; 26327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (i & 0x8000) 26427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner UndefIdx = 0; 26527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner else if (i & 0x0800) 26627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner UndefIdx = 1; 26727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner else if (i & 0x0080) 26827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner UndefIdx = 2; 26927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner else if (i & 0x0008) 27027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner UndefIdx = 3; 27127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner else 27227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner abort(); 273cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 27427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned MinVal = i; 27527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned MinCost = ShufTab[i].Cost; 276cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 27727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Scan the 8 entries. 27827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned j = 0; j != 8; ++j) { 27927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned NewElt = setMaskElt(i, UndefIdx, j); 28027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[NewElt].Cost < MinCost) { 28127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner MinCost = ShufTab[NewElt].Cost; 28227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner MinVal = NewElt; 28327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 28427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 285cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 28627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // If we found something cheaper than what was here before, use it. 28727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (i != MinVal) { 28827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner MadeChange = true; 28927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[i] = ShufTab[MinVal]; 29027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 291cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov } 29227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 293cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 29427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned LHS = 0; LHS != 0x8889; ++LHS) { 29527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (!isValidMask(LHS)) continue; 29627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[LHS].Cost > 1000) continue; 29727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 29827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // If nothing involving this operand could possibly be cheaper than what 29927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // we already have, don't consider it. 30027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[LHS].Cost + 1 >= MaxCost) 30127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner continue; 302cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 30327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned opnum = 0, e = TheOperators.size(); opnum != e; ++opnum) { 30427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner Operator *Op = TheOperators[opnum]; 30527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 30627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Evaluate op(LHS,LHS) 30727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned ResultMask = Op->getTransformedMask(LHS, LHS); 30827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 309c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov unsigned Cost = ShufTab[LHS].Cost + Op->getCost(); 31027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (Cost < ShufTab[ResultMask].Cost) { 31127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Cost = Cost; 31227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Op = Op; 31327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Arg0 = LHS; 31427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Arg1 = LHS; 31527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner MadeChange = true; 31627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 317cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 31827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // If this is a two input instruction, include the op(x,y) cases. If 31927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // this is a one input instruction, skip this. 32027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (Op->isOnlyLHSOperator()) continue; 321cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 32227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned RHS = 0; RHS != 0x8889; ++RHS) { 32327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (!isValidMask(RHS)) continue; 32427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[RHS].Cost > 1000) continue; 325cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 32627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // If nothing involving this operand could possibly be cheaper than 32727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // what we already have, don't consider it. 32827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[RHS].Cost + 1 >= MaxCost) 32927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner continue; 330cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 33127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 33227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Evaluate op(LHS,RHS) 33327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned ResultMask = Op->getTransformedMask(LHS, RHS); 33427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 33527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[ResultMask].Cost <= OpCount || 33627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Cost <= ShufTab[LHS].Cost || 33727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Cost <= ShufTab[RHS].Cost) 33827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner continue; 339cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 34027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Figure out the cost to evaluate this, knowing that CSE's only need 34127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // to be evaluated once. 34227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned short Vals[30]; 34327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned NumVals = 0; 34427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner EvaluateOps(LHS, Vals, NumVals); 34527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner EvaluateOps(RHS, Vals, NumVals); 34627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 347c9d44f0d9c609f86e13fbf2e27165b663f94efe3Anton Korobeynikov unsigned Cost = NumVals + Op->getCost(); 34827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (Cost < ShufTab[ResultMask].Cost) { 34927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Cost = Cost; 35027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Op = Op; 35127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Arg0 = LHS; 35227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ShufTab[ResultMask].Arg1 = RHS; 35327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner MadeChange = true; 35427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 35527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 35627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 35727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 35827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 359cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 36027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << "Finished Table has " << getNumEntered() 36127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner << " entries established.\n"; 362cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 36327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned CostArray[10] = { 0 }; 36427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 36527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Compute a cost histogram. 36627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 0; i != 65536; ++i) { 36727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (!isValidMask(i)) continue; 36827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[i].Cost > 9) 36927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ++CostArray[9]; 37027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner else 37127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner ++CostArray[ShufTab[i].Cost]; 37227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 373cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 37427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 0; i != 9; ++i) 37527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (CostArray[i]) 37627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << "// " << CostArray[i] << " entries have cost " << i << "\n"; 37727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (CostArray[9]) 37827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << "// " << CostArray[9] << " entries have higher cost!\n"; 379cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 380cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 38127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Build up the table to emit. 38227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << "\n// This table is 6561*4 = 26244 bytes in size.\n"; 3833033d4d3759e3552e7e9d846b02c30c916e6953eChris Lattner std::cout << "static const unsigned PerfectShuffleTable[6561+1] = {\n"; 384cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 38527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 0; i != 0x8889; ++i) { 38627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (!isValidMask(i)) continue; 387cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 38827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // CostSat - The cost of this operation saturated to two bits. 38927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned CostSat = ShufTab[i].Cost; 3908c3e8471592bf46631bff0fab554d5ac3fedcff6Chris Lattner if (CostSat > 4) CostSat = 4; 3918c3e8471592bf46631bff0fab554d5ac3fedcff6Chris Lattner if (CostSat == 0) CostSat = 1; 3928c3e8471592bf46631bff0fab554d5ac3fedcff6Chris Lattner --CostSat; // Cost is now between 0-3. 393cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 39427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned OpNum = ShufTab[i].Op ? ShufTab[i].Op->OpNum : 0; 39527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner assert(OpNum < 16 && "Too few bits to encode operation!"); 396cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 39727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned LHS = getCompressedMask(ShufTab[i].Arg0); 39827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned RHS = getCompressedMask(ShufTab[i].Arg1); 399cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 40027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Encode this as 2 bits of saturated cost, 4 bits of opcodes, 13 bits of 40127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // LHS, and 13 bits of RHS = 32 bits. 402195d8adac9600e90f2c2dbd9e44f62616b10df7dChris Lattner unsigned Val = (CostSat << 30) | (OpNum << 26) | (LHS << 13) | RHS; 40327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 40441045ba731f37fb6bef980cdc63b0a8538b6c8caJim Grosbach std::cout << " " << std::setw(10) << Val << "U, // "; 40527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner PrintMask(i, std::cout); 40627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << ": Cost " << ShufTab[i].Cost; 40727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << " " << (ShufTab[i].Op ? ShufTab[i].Op->getName() : "copy"); 40827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << " "; 40927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[ShufTab[i].Arg0].Cost == 0) { 41027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << getZeroCostOpName(ShufTab[i].Arg0); 41127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } else { 41227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner PrintMask(ShufTab[i].Arg0, std::cout); 41327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 41427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 41527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[i].Op && !ShufTab[i].Op->isOnlyLHSOperator()) { 41627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << ", "; 41727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[ShufTab[i].Arg1].Cost == 0) { 41827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << getZeroCostOpName(ShufTab[i].Arg1); 41927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } else { 42027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner PrintMask(ShufTab[i].Arg1, std::cout); 42127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 42227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 42327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << "\n"; 424cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov } 42527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cout << " 0\n};\n"; 42627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 42727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (0) { 42827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner // Print out the table. 42927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned i = 0; i != 0x8889; ++i) { 43027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (!isValidMask(i)) continue; 43127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner if (ShufTab[i].Cost < 1000) { 43227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner PrintMask(i, std::cerr); 43327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << " - Cost " << ShufTab[i].Cost << " - "; 434cb02ddeda0cbbc23075a9be0bf59ad3f7eb8f5e1Anton Korobeynikov 43527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned short Vals[30]; 43627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner unsigned NumVals = 0; 43727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner EvaluateOps(i, Vals, NumVals); 43827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 43927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner for (unsigned j = 0, e = NumVals; j != e; ++j) 44027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner PrintOperation(j, Vals); 44127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner std::cerr << "\n"; 44227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 44327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 44427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 44527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} 44627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 44727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 448cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner#ifdef GENERATE_ALTIVEC 44927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 45027e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner///===---------------------------------------------------------------------===// 45127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// The altivec instruction definitions. This is the altivec-specific part of 45227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner/// this file. 45327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner///===---------------------------------------------------------------------===// 45427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 455cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner// Note that the opcode numbers here must match those in the PPC backend. 456cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnerenum { 457cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 458cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner OP_VMRGHW, 459cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner OP_VMRGLW, 460cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner OP_VSPLTISW0, 461cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner OP_VSPLTISW1, 462cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner OP_VSPLTISW2, 463cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner OP_VSPLTISW3, 464cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner OP_VSLDOI4, 465cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner OP_VSLDOI8, 466d74ea2bbd8bb630331f35ead42d385249bd42af8Chris Lattner OP_VSLDOI12 467cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner}; 468cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner 46927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct vmrghw : public Operator { 470cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner vmrghw() : Operator(0x0415, "vmrghw", OP_VMRGHW) {} 47127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} the_vmrghw; 47227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 47327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct vmrglw : public Operator { 474cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner vmrglw() : Operator(0x2637, "vmrglw", OP_VMRGLW) {} 47527e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner} the_vmrglw; 47627e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 47727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnertemplate<unsigned Elt> 47827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct vspltisw : public Operator { 479cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner vspltisw(const char *N, unsigned Opc) 480cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {} 48127e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}; 48227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 483cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervspltisw<0> the_vspltisw0("vspltisw0", OP_VSPLTISW0); 484cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervspltisw<1> the_vspltisw1("vspltisw1", OP_VSPLTISW1); 485cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervspltisw<2> the_vspltisw2("vspltisw2", OP_VSPLTISW2); 486cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervspltisw<3> the_vspltisw3("vspltisw3", OP_VSPLTISW3); 48727e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 48827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnertemplate<unsigned N> 48927e98aa07c836276fc7f546da62c791ad63863d1Chris Lattnerstruct vsldoi : public Operator { 490cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner vsldoi(const char *Name, unsigned Opc) 491cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) { 49227e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner } 49327e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner}; 49427e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 495cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervsldoi<1> the_vsldoi1("vsldoi4" , OP_VSLDOI4); 496cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervsldoi<2> the_vsldoi2("vsldoi8" , OP_VSLDOI8); 497cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattnervsldoi<3> the_vsldoi3("vsldoi12", OP_VSLDOI12); 49827e98aa07c836276fc7f546da62c791ad63863d1Chris Lattner 499cf1f6440d9d4795d89e6ba4be8c621e31ee15a1cChris Lattner#endif 5001c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5011c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov#define GENERATE_NEON 5021c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5031c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov#ifdef GENERATE_NEON 5041c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovenum { 5051c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 5061c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VREV, 5071c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VDUP0, 5081c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VDUP1, 5091c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VDUP2, 5101c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VDUP3, 5111c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VEXT1, 5121c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VEXT2, 5131c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VEXT3, 5141c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VUZPL, // VUZP, left result 5151c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VUZPR, // VUZP, right result 5161c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VZIPL, // VZIP, left result 5171c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VZIPR, // VZIP, right result 5181c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VTRNL, // VTRN, left result 5191c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov OP_VTRNR // VTRN, right result 5201c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov}; 5211c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5221c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vrev : public Operator { 5232a8eb722c7bb0fac2fe09a876f3471dcb25f465eTanya Lattner vrev() : Operator(0x1032, "vrev", OP_VREV) {} 5241c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vrev; 5251c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5261c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovtemplate<unsigned Elt> 5271c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vdup : public Operator { 5281c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov vdup(const char *N, unsigned Opc) 5291c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov : Operator(MakeMask(Elt, Elt, Elt, Elt), N, Opc) {} 5301c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov}; 5311c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5321c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvdup<0> the_vdup0("vdup0", OP_VDUP0); 5331c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvdup<1> the_vdup1("vdup1", OP_VDUP1); 5341c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvdup<2> the_vdup2("vdup2", OP_VDUP2); 5351c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvdup<3> the_vdup3("vdup3", OP_VDUP3); 5361c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5371c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovtemplate<unsigned N> 5381c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vext : public Operator { 5391c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov vext(const char *Name, unsigned Opc) 5401c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov : Operator(MakeMask(N&7, (N+1)&7, (N+2)&7, (N+3)&7), Name, Opc) { 5411c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov } 5421c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov}; 5431c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5441c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvext<1> the_vext1("vext1", OP_VEXT1); 5451c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvext<2> the_vext2("vext2", OP_VEXT2); 5461c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovvext<3> the_vext3("vext3", OP_VEXT3); 5471c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5481c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vuzpl : public Operator { 54958cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov vuzpl() : Operator(0x0246, "vuzpl", OP_VUZPL, 2) {} 5501c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vuzpl; 5511c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5521c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vuzpr : public Operator { 55358cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov vuzpr() : Operator(0x1357, "vuzpr", OP_VUZPR, 2) {} 5541c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vuzpr; 5551c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5561c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vzipl : public Operator { 55758cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov vzipl() : Operator(0x0415, "vzipl", OP_VZIPL, 2) {} 5581c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vzipl; 5591c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5601c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vzipr : public Operator { 56158cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov vzipr() : Operator(0x2637, "vzipr", OP_VZIPR, 2) {} 5621c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vzipr; 5631c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5641c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vtrnl : public Operator { 56558cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov vtrnl() : Operator(0x0426, "vtrnl", OP_VTRNL, 2) {} 5661c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vtrnl; 5671c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5681c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikovstruct vtrnr : public Operator { 56958cd84dc775ec4b4fb11e7d0e383449dfd81cf11Anton Korobeynikov vtrnr() : Operator(0x1537, "vtrnr", OP_VTRNR, 2) {} 5701c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov} the_vtrnr; 5711c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov 5721c8e581832440a114c9587d41473d107de4cac74Anton Korobeynikov#endif 573