AsmMatcherEmitter.cpp revision 245f05843f7f17406f394696e7330950e6d88e6d
1//===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend emits a target specifier matcher for converting parsed
11// assembly operands in the MCInst structures.
12//
13// The input to the target specific matcher is a list of literal tokens and
14// operands. The target specific parser should generally eliminate any syntax
15// which is not relevant for matching; for example, comma tokens should have
16// already been consumed and eliminated by the parser. Most instructions will
17// end up with a single literal token (the instruction name) and some number of
18// operands.
19//
20// Some example inputs, for X86:
21//   'addl' (immediate ...) (register ...)
22//   'add' (immediate ...) (memory ...)
23//   'call' '*' %epc
24//
25// The assembly matcher is responsible for converting this input into a precise
26// machine instruction (i.e., an instruction with a well defined encoding). This
27// mapping has several properties which complicate matching:
28//
29//  - It may be ambiguous; many architectures can legally encode particular
30//    variants of an instruction in different ways (for example, using a smaller
31//    encoding for small immediates). Such ambiguities should never be
32//    arbitrarily resolved by the assembler, the assembler is always responsible
33//    for choosing the "best" available instruction.
34//
35//  - It may depend on the subtarget or the assembler context. Instructions
36//    which are invalid for the current mode, but otherwise unambiguous (e.g.,
37//    an SSE instruction in a file being assembled for i486) should be accepted
38//    and rejected by the assembler front end. However, if the proper encoding
39//    for an instruction is dependent on the assembler context then the matcher
40//    is responsible for selecting the correct machine instruction for the
41//    current mode.
42//
43// The core matching algorithm attempts to exploit the regularity in most
44// instruction sets to quickly determine the set of possibly matching
45// instructions, and the simplify the generated code. Additionally, this helps
46// to ensure that the ambiguities are intentionally resolved by the user.
47//
48// The matching is divided into two distinct phases:
49//
50//   1. Classification: Each operand is mapped to the unique set which (a)
51//      contains it, and (b) is the largest such subset for which a single
52//      instruction could match all members.
53//
54//      For register classes, we can generate these subgroups automatically. For
55//      arbitrary operands, we expect the user to define the classes and their
56//      relations to one another (for example, 8-bit signed immediates as a
57//      subset of 32-bit immediates).
58//
59//      By partitioning the operands in this way, we guarantee that for any
60//      tuple of classes, any single instruction must match either all or none
61//      of the sets of operands which could classify to that tuple.
62//
63//      In addition, the subset relation amongst classes induces a partial order
64//      on such tuples, which we use to resolve ambiguities.
65//
66//      FIXME: What do we do if a crazy case shows up where this is the wrong
67//      resolution?
68//
69//   2. The input can now be treated as a tuple of classes (static tokens are
70//      simple singleton sets). Each such tuple should generally map to a single
71//      instruction (we currently ignore cases where this isn't true, whee!!!),
72//      which we can emit a simple matcher for.
73//
74//===----------------------------------------------------------------------===//
75
76#include "AsmMatcherEmitter.h"
77#include "CodeGenTarget.h"
78#include "Record.h"
79#include "llvm/ADT/OwningPtr.h"
80#include "llvm/ADT/SmallVector.h"
81#include "llvm/ADT/StringExtras.h"
82#include "llvm/Support/CommandLine.h"
83#include "llvm/Support/Debug.h"
84#include <list>
85#include <map>
86#include <set>
87using namespace llvm;
88
89namespace {
90static cl::opt<std::string>
91MatchOneInstr("match-one-instr", cl::desc("Match only the named instruction"),
92              cl::init(""));
93}
94
95/// FlattenVariants - Flatten an .td file assembly string by selecting the
96/// variant at index \arg N.
97static std::string FlattenVariants(const std::string &AsmString,
98                                   unsigned N) {
99  StringRef Cur = AsmString;
100  std::string Res = "";
101
102  for (;;) {
103    // Find the start of the next variant string.
104    size_t VariantsStart = 0;
105    for (size_t e = Cur.size(); VariantsStart != e; ++VariantsStart)
106      if (Cur[VariantsStart] == '{' &&
107          (VariantsStart == 0 || (Cur[VariantsStart-1] != '$' &&
108                                  Cur[VariantsStart-1] != '\\')))
109        break;
110
111    // Add the prefix to the result.
112    Res += Cur.slice(0, VariantsStart);
113    if (VariantsStart == Cur.size())
114      break;
115
116    ++VariantsStart; // Skip the '{'.
117
118    // Scan to the end of the variants string.
119    size_t VariantsEnd = VariantsStart;
120    unsigned NestedBraces = 1;
121    for (size_t e = Cur.size(); VariantsEnd != e; ++VariantsEnd) {
122      if (Cur[VariantsEnd] == '}' && Cur[VariantsEnd-1] != '\\') {
123        if (--NestedBraces == 0)
124          break;
125      } else if (Cur[VariantsEnd] == '{')
126        ++NestedBraces;
127    }
128
129    // Select the Nth variant (or empty).
130    StringRef Selection = Cur.slice(VariantsStart, VariantsEnd);
131    for (unsigned i = 0; i != N; ++i)
132      Selection = Selection.split('|').second;
133    Res += Selection.split('|').first;
134
135    assert(VariantsEnd != Cur.size() &&
136           "Unterminated variants in assembly string!");
137    Cur = Cur.substr(VariantsEnd + 1);
138  }
139
140  return Res;
141}
142
143/// TokenizeAsmString - Tokenize a simplified assembly string.
144static void TokenizeAsmString(const StringRef &AsmString,
145                              SmallVectorImpl<StringRef> &Tokens) {
146  unsigned Prev = 0;
147  bool InTok = true;
148  for (unsigned i = 0, e = AsmString.size(); i != e; ++i) {
149    switch (AsmString[i]) {
150    case '[':
151    case ']':
152    case '*':
153    case '!':
154    case ' ':
155    case '\t':
156    case ',':
157      if (InTok) {
158        Tokens.push_back(AsmString.slice(Prev, i));
159        InTok = false;
160      }
161      if (!isspace(AsmString[i]) && AsmString[i] != ',')
162        Tokens.push_back(AsmString.substr(i, 1));
163      Prev = i + 1;
164      break;
165
166    case '\\':
167      if (InTok) {
168        Tokens.push_back(AsmString.slice(Prev, i));
169        InTok = false;
170      }
171      ++i;
172      assert(i != AsmString.size() && "Invalid quoted character");
173      Tokens.push_back(AsmString.substr(i, 1));
174      Prev = i + 1;
175      break;
176
177    case '$': {
178      // If this isn't "${", treat like a normal token.
179      if (i + 1 == AsmString.size() || AsmString[i + 1] != '{') {
180        if (InTok) {
181          Tokens.push_back(AsmString.slice(Prev, i));
182          InTok = false;
183        }
184        Prev = i;
185        break;
186      }
187
188      if (InTok) {
189        Tokens.push_back(AsmString.slice(Prev, i));
190        InTok = false;
191      }
192
193      StringRef::iterator End =
194        std::find(AsmString.begin() + i, AsmString.end(), '}');
195      assert(End != AsmString.end() && "Missing brace in operand reference!");
196      size_t EndPos = End - AsmString.begin();
197      Tokens.push_back(AsmString.slice(i, EndPos+1));
198      Prev = EndPos + 1;
199      i = EndPos;
200      break;
201    }
202
203    default:
204      InTok = true;
205    }
206  }
207  if (InTok && Prev != AsmString.size())
208    Tokens.push_back(AsmString.substr(Prev));
209}
210
211static bool IsAssemblerInstruction(const StringRef &Name,
212                                   const CodeGenInstruction &CGI,
213                                   const SmallVectorImpl<StringRef> &Tokens) {
214  // Ignore psuedo ops.
215  //
216  // FIXME: This is a hack.
217  if (const RecordVal *Form = CGI.TheDef->getValue("Form"))
218    if (Form->getValue()->getAsString() == "Pseudo")
219      return false;
220
221  // Ignore "PHI" node.
222  //
223  // FIXME: This is also a hack.
224  if (Name == "PHI")
225    return false;
226
227  // Ignore instructions with no .s string.
228  //
229  // FIXME: What are these?
230  if (CGI.AsmString.empty())
231    return false;
232
233  // FIXME: Hack; ignore any instructions with a newline in them.
234  if (std::find(CGI.AsmString.begin(),
235                CGI.AsmString.end(), '\n') != CGI.AsmString.end())
236    return false;
237
238  // Ignore instructions with attributes, these are always fake instructions for
239  // simplifying codegen.
240  //
241  // FIXME: Is this true?
242  //
243  // Also, we ignore instructions which reference the operand multiple times;
244  // this implies a constraint we would not currently honor. These are
245  // currently always fake instructions for simplifying codegen.
246  //
247  // FIXME: Encode this assumption in the .td, so we can error out here.
248  std::set<std::string> OperandNames;
249  for (unsigned i = 1, e = Tokens.size(); i < e; ++i) {
250    if (Tokens[i][0] == '$' &&
251        std::find(Tokens[i].begin(),
252                  Tokens[i].end(), ':') != Tokens[i].end()) {
253      DEBUG({
254          errs() << "warning: '" << Name << "': "
255                 << "ignoring instruction; operand with attribute '"
256                 << Tokens[i] << "', \n";
257        });
258      return false;
259    }
260
261    if (Tokens[i][0] == '$' && !OperandNames.insert(Tokens[i]).second) {
262      DEBUG({
263          errs() << "warning: '" << Name << "': "
264                 << "ignoring instruction; tied operand '"
265                 << Tokens[i] << "', \n";
266        });
267      return false;
268    }
269  }
270
271  return true;
272}
273
274namespace {
275
276/// ClassInfo - Helper class for storing the information about a particular
277/// class of operands which can be matched.
278struct ClassInfo {
279  enum {
280    Token, ///< The class for a particular token.
281    Register, ///< A register class.
282    User ///< A user defined class.
283  } Kind;
284
285  /// Name - The class name, suitable for use as an enum.
286  std::string Name;
287
288  /// ValueName - The name of the value this class represents; for a token this
289  /// is the literal token string, for an operand it is the TableGen class (or
290  /// empty if this is a derived class).
291  std::string ValueName;
292
293  /// PredicateMethod - The name of the operand method to test whether the
294  /// operand matches this class; this is not valid for Token kinds.
295  std::string PredicateMethod;
296
297  /// RenderMethod - The name of the operand method to add this operand to an
298  /// MCInst; this is not valid for Token kinds.
299  std::string RenderMethod;
300};
301
302/// InstructionInfo - Helper class for storing the necessary information for an
303/// instruction which is capable of being matched.
304struct InstructionInfo {
305  struct Operand {
306    /// The unique class instance this operand should match.
307    ClassInfo *Class;
308
309    /// The original operand this corresponds to, if any.
310    const CodeGenInstruction::OperandInfo *OperandInfo;
311  };
312
313  /// InstrName - The target name for this instruction.
314  std::string InstrName;
315
316  /// Instr - The instruction this matches.
317  const CodeGenInstruction *Instr;
318
319  /// AsmString - The assembly string for this instruction (with variants
320  /// removed).
321  std::string AsmString;
322
323  /// Tokens - The tokenized assembly pattern that this instruction matches.
324  SmallVector<StringRef, 4> Tokens;
325
326  /// Operands - The operands that this instruction matches.
327  SmallVector<Operand, 4> Operands;
328
329  /// ConversionFnKind - The enum value which is passed to the generated
330  /// ConvertToMCInst to convert parsed operands into an MCInst for this
331  /// function.
332  std::string ConversionFnKind;
333
334public:
335  void dump();
336};
337
338class AsmMatcherInfo {
339public:
340  /// The classes which are needed for matching.
341  std::vector<ClassInfo*> Classes;
342
343  /// The information on the instruction to match.
344  std::vector<InstructionInfo*> Instructions;
345
346private:
347  /// Map of token to class information which has already been constructed.
348  std::map<std::string, ClassInfo*> TokenClasses;
349
350  /// Map of operand name to class information which has already been
351  /// constructed.
352  std::map<std::string, ClassInfo*> OperandClasses;
353
354private:
355  /// getTokenClass - Lookup or create the class for the given token.
356  ClassInfo *getTokenClass(const StringRef &Token);
357
358  /// getOperandClass - Lookup or create the class for the given operand.
359  ClassInfo *getOperandClass(const StringRef &Token,
360                             const CodeGenInstruction::OperandInfo &OI);
361
362public:
363  /// BuildInfo - Construct the various tables used during matching.
364  void BuildInfo(CodeGenTarget &Target);
365};
366
367}
368
369void InstructionInfo::dump() {
370  errs() << InstrName << " -- " << "flattened:\"" << AsmString << '\"'
371         << ", tokens:[";
372  for (unsigned i = 0, e = Tokens.size(); i != e; ++i) {
373    errs() << Tokens[i];
374    if (i + 1 != e)
375      errs() << ", ";
376  }
377  errs() << "]\n";
378
379  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
380    Operand &Op = Operands[i];
381    errs() << "  op[" << i << "] = ";
382    if (Op.Class->Kind == ClassInfo::Token) {
383      errs() << '\"' << Tokens[i] << "\"\n";
384      continue;
385    }
386
387    const CodeGenInstruction::OperandInfo &OI = *Op.OperandInfo;
388    errs() << OI.Name << " " << OI.Rec->getName()
389           << " (" << OI.MIOperandNo << ", " << OI.MINumOperands << ")\n";
390  }
391}
392
393static std::string getEnumNameForToken(const StringRef &Str) {
394  std::string Res;
395
396  for (StringRef::iterator it = Str.begin(), ie = Str.end(); it != ie; ++it) {
397    switch (*it) {
398    case '*': Res += "_STAR_"; break;
399    case '%': Res += "_PCT_"; break;
400    case ':': Res += "_COLON_"; break;
401
402    default:
403      if (isalnum(*it))  {
404        Res += *it;
405      } else {
406        Res += "_" + utostr((unsigned) *it) + "_";
407      }
408    }
409  }
410
411  return Res;
412}
413
414ClassInfo *AsmMatcherInfo::getTokenClass(const StringRef &Token) {
415  ClassInfo *&Entry = TokenClasses[Token];
416
417  if (!Entry) {
418    Entry = new ClassInfo();
419    Entry->Kind = ClassInfo::Token;
420    Entry->Name = "MCK_" + getEnumNameForToken(Token);
421    Entry->ValueName = Token;
422    Entry->PredicateMethod = "<invalid>";
423    Entry->RenderMethod = "<invalid>";
424    Classes.push_back(Entry);
425  }
426
427  return Entry;
428}
429
430ClassInfo *
431AsmMatcherInfo::getOperandClass(const StringRef &Token,
432                                const CodeGenInstruction::OperandInfo &OI) {
433  std::string ClassName;
434  if (OI.Rec->isSubClassOf("RegisterClass")) {
435    ClassName = "Reg";
436  } else if (OI.Rec->isSubClassOf("Operand")) {
437    // FIXME: This should not be hard coded.
438    const RecordVal *RV = OI.Rec->getValue("Type");
439
440    // FIXME: Yet another total hack.
441    if (RV->getValue()->getAsString() == "iPTR" ||
442        OI.Rec->getName() == "i8mem_NOREX" ||
443        OI.Rec->getName() == "lea32mem" ||
444        OI.Rec->getName() == "lea64mem" ||
445        OI.Rec->getName() == "i128mem" ||
446        OI.Rec->getName() == "sdmem" ||
447        OI.Rec->getName() == "ssmem" ||
448        OI.Rec->getName() == "lea64_32mem") {
449      ClassName = "Mem";
450    } else {
451      ClassName = "Imm";
452    }
453  }
454
455  ClassInfo *&Entry = OperandClasses[ClassName];
456
457  if (!Entry) {
458    Entry = new ClassInfo();
459    // FIXME: Hack.
460    if (ClassName == "Reg") {
461      Entry->Kind = ClassInfo::Register;
462    } else {
463      Entry->Kind = ClassInfo::User;
464    }
465    Entry->Name = "MCK_" + ClassName;
466    Entry->ValueName = OI.Rec->getName();
467    Entry->PredicateMethod = "is" + ClassName;
468    Entry->RenderMethod = "add" + ClassName + "Operands";
469    Classes.push_back(Entry);
470  }
471
472  return Entry;
473}
474
475void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) {
476  for (std::map<std::string, CodeGenInstruction>::const_iterator
477         it = Target.getInstructions().begin(),
478         ie = Target.getInstructions().end();
479       it != ie; ++it) {
480    const CodeGenInstruction &CGI = it->second;
481
482    if (!MatchOneInstr.empty() && it->first != MatchOneInstr)
483      continue;
484
485    OwningPtr<InstructionInfo> II(new InstructionInfo);
486
487    II->InstrName = it->first;
488    II->Instr = &it->second;
489    II->AsmString = FlattenVariants(CGI.AsmString, 0);
490
491    TokenizeAsmString(II->AsmString, II->Tokens);
492
493    // Ignore instructions which shouldn't be matched.
494    if (!IsAssemblerInstruction(it->first, CGI, II->Tokens))
495      continue;
496
497    for (unsigned i = 0, e = II->Tokens.size(); i != e; ++i) {
498      StringRef Token = II->Tokens[i];
499
500      // Check for simple tokens.
501      if (Token[0] != '$') {
502        InstructionInfo::Operand Op;
503        Op.Class = getTokenClass(Token);
504        Op.OperandInfo = 0;
505        II->Operands.push_back(Op);
506        continue;
507      }
508
509      // Otherwise this is an operand reference.
510      StringRef OperandName;
511      if (Token[1] == '{')
512        OperandName = Token.substr(2, Token.size() - 3);
513      else
514        OperandName = Token.substr(1);
515
516      // Map this token to an operand. FIXME: Move elsewhere.
517      unsigned Idx;
518      try {
519        Idx = CGI.getOperandNamed(OperandName);
520      } catch(...) {
521        errs() << "error: unable to find operand: '" << OperandName << "'!\n";
522        break;
523      }
524
525      const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[Idx];
526      InstructionInfo::Operand Op;
527      Op.Class = getOperandClass(Token, OI);
528      Op.OperandInfo = &OI;
529      II->Operands.push_back(Op);
530    }
531
532    // If we broke out, ignore the instruction.
533    if (II->Operands.size() != II->Tokens.size())
534      continue;
535
536    Instructions.push_back(II.take());
537  }
538}
539
540static void ConstructConversionFunctions(CodeGenTarget &Target,
541                                         std::vector<InstructionInfo*> &Infos,
542                                         raw_ostream &OS) {
543  // Write the convert function to a separate stream, so we can drop it after
544  // the enum.
545  std::string ConvertFnBody;
546  raw_string_ostream CvtOS(ConvertFnBody);
547
548  // Function we have already generated.
549  std::set<std::string> GeneratedFns;
550
551  // Start the unified conversion function.
552
553  CvtOS << "static bool ConvertToMCInst(ConversionKind Kind, MCInst &Inst, "
554        << "unsigned Opcode,\n"
555        << "                            SmallVectorImpl<"
556        << Target.getName() << "Operand> &Operands) {\n";
557  CvtOS << "  Inst.setOpcode(Opcode);\n";
558  CvtOS << "  switch (Kind) {\n";
559  CvtOS << "  default:\n";
560
561  // Start the enum, which we will generate inline.
562
563  OS << "// Unified function for converting operants to MCInst instances.\n\n";
564  OS << "enum ConversionKind {\n";
565
566  for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(),
567         ie = Infos.end(); it != ie; ++it) {
568    InstructionInfo &II = **it;
569
570    // Order the (class) operands by the order to convert them into an MCInst.
571    SmallVector<std::pair<unsigned, unsigned>, 4> MIOperandList;
572    for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
573      InstructionInfo::Operand &Op = II.Operands[i];
574      if (Op.OperandInfo)
575        MIOperandList.push_back(std::make_pair(Op.OperandInfo->MIOperandNo, i));
576    }
577    std::sort(MIOperandList.begin(), MIOperandList.end());
578
579    // Compute the total number of operands.
580    unsigned NumMIOperands = 0;
581    for (unsigned i = 0, e = II.Instr->OperandList.size(); i != e; ++i) {
582      const CodeGenInstruction::OperandInfo &OI = II.Instr->OperandList[i];
583      NumMIOperands = std::max(NumMIOperands,
584                               OI.MIOperandNo + OI.MINumOperands);
585    }
586
587    // Build the conversion function signature.
588    std::string Signature = "Convert";
589    unsigned CurIndex = 0;
590    for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
591      InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
592      assert(CurIndex <= Op.OperandInfo->MIOperandNo &&
593             "Duplicate match for instruction operand!");
594
595      Signature += "_";
596
597      // Skip operands which weren't matched by anything, this occurs when the
598      // .td file encodes "implicit" operands as explicit ones.
599      //
600      // FIXME: This should be removed from the MCInst structure.
601      for (; CurIndex != Op.OperandInfo->MIOperandNo; ++CurIndex)
602        Signature += "Imp";
603
604      Signature += Op.Class->Name;
605      Signature += utostr(Op.OperandInfo->MINumOperands);
606      Signature += "_" + utostr(MIOperandList[i].second);
607
608      CurIndex += Op.OperandInfo->MINumOperands;
609    }
610
611    // Add any trailing implicit operands.
612    for (; CurIndex != NumMIOperands; ++CurIndex)
613      Signature += "Imp";
614
615    II.ConversionFnKind = Signature;
616
617    // Check if we have already generated this signature.
618    if (!GeneratedFns.insert(Signature).second)
619      continue;
620
621    // If not, emit it now.
622
623    // Add to the enum list.
624    OS << "  " << Signature << ",\n";
625
626    // And to the convert function.
627    CvtOS << "  case " << Signature << ":\n";
628    CurIndex = 0;
629    for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) {
630      InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second];
631
632      // Add the implicit operands.
633      for (; CurIndex != Op.OperandInfo->MIOperandNo; ++CurIndex)
634        CvtOS << "    Inst.addOperand(MCOperand::CreateReg(0));\n";
635
636      CvtOS << "    Operands[" << MIOperandList[i].second
637         << "]." << Op.Class->RenderMethod
638         << "(Inst, " << Op.OperandInfo->MINumOperands << ");\n";
639      CurIndex += Op.OperandInfo->MINumOperands;
640    }
641
642    // And add trailing implicit operands.
643    for (; CurIndex != NumMIOperands; ++CurIndex)
644      CvtOS << "    Inst.addOperand(MCOperand::CreateReg(0));\n";
645    CvtOS << "    break;\n";
646  }
647
648  // Finish the convert function.
649
650  CvtOS << "  }\n";
651  CvtOS << "  return false;\n";
652  CvtOS << "}\n\n";
653
654  // Finish the enum, and drop the convert function after it.
655
656  OS << "  NumConversionVariants\n";
657  OS << "};\n\n";
658
659  OS << CvtOS.str();
660}
661
662/// EmitMatchClassEnumeration - Emit the enumeration for match class kinds.
663static void EmitMatchClassEnumeration(CodeGenTarget &Target,
664                                      std::vector<ClassInfo*> &Infos,
665                                      raw_ostream &OS) {
666  OS << "namespace {\n\n";
667
668  OS << "/// MatchClassKind - The kinds of classes which participate in\n"
669     << "/// instruction matching.\n";
670  OS << "enum MatchClassKind {\n";
671  OS << "  InvalidMatchClass = 0,\n";
672  for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
673         ie = Infos.end(); it != ie; ++it) {
674    ClassInfo &CI = **it;
675    OS << "  " << CI.Name << ", // ";
676    if (CI.Kind == ClassInfo::Token) {
677      OS << "'" << CI.ValueName << "'\n";
678    } else if (CI.Kind == ClassInfo::Register) {
679      if (!CI.ValueName.empty())
680        OS << "register class '" << CI.ValueName << "'\n";
681      else
682        OS << "derived register class\n";
683    } else {
684      OS << "user defined class '" << CI.ValueName << "'\n";
685    }
686  }
687  OS << "  NumMatchClassKinds\n";
688  OS << "};\n\n";
689
690  OS << "}\n\n";
691}
692
693/// EmitClassifyOperand - Emit the function to classify an operand.
694static void EmitClassifyOperand(CodeGenTarget &Target,
695                                std::vector<ClassInfo*> &Infos,
696                                raw_ostream &OS) {
697  OS << "static MatchClassKind ClassifyOperand("
698     << Target.getName() << "Operand &Operand) {\n";
699  OS << "  if (Operand.isToken())\n";
700  OS << "    return MatchTokenString(Operand.getToken());\n\n";
701  for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
702         ie = Infos.end(); it != ie; ++it) {
703    ClassInfo &CI = **it;
704
705    if (CI.Kind != ClassInfo::Token) {
706      OS << "  if (Operand." << CI.PredicateMethod << "())\n";
707      OS << "    return " << CI.Name << ";\n\n";
708    }
709  }
710  OS << "  return InvalidMatchClass;\n";
711  OS << "}\n\n";
712}
713
714typedef std::pair<std::string, std::string> StringPair;
715
716/// FindFirstNonCommonLetter - Find the first character in the keys of the
717/// string pairs that is not shared across the whole set of strings.  All
718/// strings are assumed to have the same length.
719static unsigned
720FindFirstNonCommonLetter(const std::vector<const StringPair*> &Matches) {
721  assert(!Matches.empty());
722  for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) {
723    // Check to see if letter i is the same across the set.
724    char Letter = Matches[0]->first[i];
725
726    for (unsigned str = 0, e = Matches.size(); str != e; ++str)
727      if (Matches[str]->first[i] != Letter)
728        return i;
729  }
730
731  return Matches[0]->first.size();
732}
733
734/// EmitStringMatcherForChar - Given a set of strings that are known to be the
735/// same length and whose characters leading up to CharNo are the same, emit
736/// code to verify that CharNo and later are the same.
737static void EmitStringMatcherForChar(const std::string &StrVariableName,
738                                  const std::vector<const StringPair*> &Matches,
739                                     unsigned CharNo, unsigned IndentCount,
740                                     raw_ostream &OS) {
741  assert(!Matches.empty() && "Must have at least one string to match!");
742  std::string Indent(IndentCount*2+4, ' ');
743
744  // If we have verified that the entire string matches, we're done: output the
745  // matching code.
746  if (CharNo == Matches[0]->first.size()) {
747    assert(Matches.size() == 1 && "Had duplicate keys to match on");
748
749    // FIXME: If Matches[0].first has embeded \n, this will be bad.
750    OS << Indent << Matches[0]->second << "\t // \"" << Matches[0]->first
751       << "\"\n";
752    return;
753  }
754
755  // Bucket the matches by the character we are comparing.
756  std::map<char, std::vector<const StringPair*> > MatchesByLetter;
757
758  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
759    MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]);
760
761
762  // If we have exactly one bucket to match, see how many characters are common
763  // across the whole set and match all of them at once.
764  // length, just verify the rest of it with one if.
765  if (MatchesByLetter.size() == 1) {
766    unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches);
767    unsigned NumChars = FirstNonCommonLetter-CharNo;
768
769    if (NumChars == 1) {
770      // Do the comparison with if (Str[1] == 'f')
771      // FIXME: Need to escape general characters.
772      OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] == '"
773         << Matches[0]->first[CharNo] << "') {\n";
774    } else {
775      // Do the comparison with if (Str.substr(1,3) == "foo").
776      OS << Indent << "if (" << StrVariableName << ".substr(" << CharNo << ","
777         << NumChars << ") == \"";
778
779      // FIXME: Need to escape general strings.
780      OS << Matches[0]->first.substr(CharNo, NumChars) << "\") {\n";
781    }
782
783    EmitStringMatcherForChar(StrVariableName, Matches, FirstNonCommonLetter,
784                             IndentCount+1, OS);
785    OS << Indent << "}\n";
786    return;
787  }
788
789  // Otherwise, we have multiple possible things, emit a switch on the
790  // character.
791  OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n";
792  OS << Indent << "default: break;\n";
793
794  for (std::map<char, std::vector<const StringPair*> >::iterator LI =
795       MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) {
796    // TODO: escape hard stuff (like \n) if we ever care about it.
797    OS << Indent << "case '" << LI->first << "':\t // "
798       << LI->second.size() << " strings to match.\n";
799    EmitStringMatcherForChar(StrVariableName, LI->second, CharNo+1,
800                             IndentCount+1, OS);
801    OS << Indent << "  break;\n";
802  }
803
804  OS << Indent << "}\n";
805
806}
807
808
809/// EmitStringMatcher - Given a list of strings and code to execute when they
810/// match, output a simple switch tree to classify the input string.  If a
811/// match is found, the code in Vals[i].second is executed.  This code should do
812/// a return to avoid falling through.  If nothing matches, execution falls
813/// through.  StrVariableName is the name of teh variable to test.
814static void EmitStringMatcher(const std::string &StrVariableName,
815                              const std::vector<StringPair> &Matches,
816                              raw_ostream &OS) {
817  // First level categorization: group strings by length.
818  std::map<unsigned, std::vector<const StringPair*> > MatchesByLength;
819
820  for (unsigned i = 0, e = Matches.size(); i != e; ++i)
821    MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]);
822
823  // Output a switch statement on length and categorize the elements within each
824  // bin.
825  OS << "  switch (" << StrVariableName << ".size()) {\n";
826  OS << "  default: break;\n";
827
828
829  for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI =
830       MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) {
831    OS << "  case " << LI->first << ":\t // " << LI->second.size()
832       << " strings to match.\n";
833    EmitStringMatcherForChar(StrVariableName, LI->second, 0, 0, OS);
834    OS << "    break;\n";
835  }
836
837
838  OS << "  }\n";
839}
840
841
842/// EmitMatchTokenString - Emit the function to match a token string to the
843/// appropriate match class value.
844static void EmitMatchTokenString(CodeGenTarget &Target,
845                                 std::vector<ClassInfo*> &Infos,
846                                 raw_ostream &OS) {
847  // Construct the match list.
848  std::vector<StringPair> Matches;
849  for (std::vector<ClassInfo*>::iterator it = Infos.begin(),
850         ie = Infos.end(); it != ie; ++it) {
851    ClassInfo &CI = **it;
852
853    if (CI.Kind == ClassInfo::Token)
854      Matches.push_back(StringPair(CI.ValueName, "return " + CI.Name + ";"));
855  }
856
857  OS << "static MatchClassKind MatchTokenString(const StringRef &Name) {\n";
858
859  EmitStringMatcher("Name", Matches, OS);
860
861  OS << "  return InvalidMatchClass;\n";
862  OS << "}\n\n";
863}
864
865/// EmitMatchRegisterName - Emit the function to match a string to the target
866/// specific register enum.
867static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser,
868                                  raw_ostream &OS) {
869  // Construct the match list.
870  std::vector<StringPair> Matches;
871  for (unsigned i = 0, e = Target.getRegisters().size(); i != e; ++i) {
872    const CodeGenRegister &Reg = Target.getRegisters()[i];
873    if (Reg.TheDef->getValueAsString("AsmName").empty())
874      continue;
875
876    Matches.push_back(StringPair(Reg.TheDef->getValueAsString("AsmName"),
877                                 "return " + utostr(i + 1) + ";"));
878  }
879
880  OS << "unsigned " << Target.getName()
881     << AsmParser->getValueAsString("AsmParserClassName")
882     << "::MatchRegisterName(const StringRef &Name) {\n";
883
884  EmitStringMatcher("Name", Matches, OS);
885
886  OS << "  return 0;\n";
887  OS << "}\n\n";
888}
889
890void AsmMatcherEmitter::run(raw_ostream &OS) {
891  CodeGenTarget Target;
892  Record *AsmParser = Target.getAsmParser();
893  std::string ClassName = AsmParser->getValueAsString("AsmParserClassName");
894
895  EmitSourceFileHeader("Assembly Matcher Source Fragment", OS);
896
897  // Emit the function to match a register name to number.
898  EmitMatchRegisterName(Target, AsmParser, OS);
899
900  // Compute the information on the instructions to match.
901  AsmMatcherInfo Info;
902  Info.BuildInfo(Target);
903
904  DEBUG_WITH_TYPE("instruction_info", {
905      for (std::vector<InstructionInfo*>::iterator
906             it = Info.Instructions.begin(), ie = Info.Instructions.end();
907           it != ie; ++it)
908        (*it)->dump();
909    });
910
911  // FIXME: At this point we should be able to totally order Infos, if not then
912  // we have an ambiguity which the .td file should be forced to resolve.
913
914  // Generate the terminal actions to convert operands into an MCInst.
915  ConstructConversionFunctions(Target, Info.Instructions, OS);
916
917  // Emit the enumeration for classes which participate in matching.
918  EmitMatchClassEnumeration(Target, Info.Classes, OS);
919
920  // Emit the routine to match token strings to their match class.
921  EmitMatchTokenString(Target, Info.Classes, OS);
922
923  // Emit the routine to classify an operand.
924  EmitClassifyOperand(Target, Info.Classes, OS);
925
926  // Finally, build the match function.
927
928  size_t MaxNumOperands = 0;
929  for (std::vector<InstructionInfo*>::const_iterator it =
930         Info.Instructions.begin(), ie = Info.Instructions.end();
931       it != ie; ++it)
932    MaxNumOperands = std::max(MaxNumOperands, (*it)->Operands.size());
933
934  OS << "bool " << Target.getName() << ClassName
935     << "::MatchInstruction("
936     << "SmallVectorImpl<" << Target.getName() << "Operand> &Operands, "
937     << "MCInst &Inst) {\n";
938
939  // Emit the static match table; unused classes get initalized to 0 which is
940  // guaranteed to be InvalidMatchClass.
941  //
942  // FIXME: We can reduce the size of this table very easily. First, we change
943  // it so that store the kinds in separate bit-fields for each index, which
944  // only needs to be the max width used for classes at that index (we also need
945  // to reject based on this during classification). If we then make sure to
946  // order the match kinds appropriately (putting mnemonics last), then we
947  // should only end up using a few bits for each class, especially the ones
948  // following the mnemonic.
949  OS << "  static const struct MatchEntry {\n";
950  OS << "    unsigned Opcode;\n";
951  OS << "    ConversionKind ConvertFn;\n";
952  OS << "    MatchClassKind Classes[" << MaxNumOperands << "];\n";
953  OS << "  } MatchTable[" << Info.Instructions.size() << "] = {\n";
954
955  for (std::vector<InstructionInfo*>::const_iterator it =
956         Info.Instructions.begin(), ie = Info.Instructions.end();
957       it != ie; ++it) {
958    InstructionInfo &II = **it;
959
960    OS << "    { " << Target.getName() << "::" << II.InstrName
961       << ", " << II.ConversionFnKind << ", { ";
962    for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) {
963      InstructionInfo::Operand &Op = II.Operands[i];
964
965      if (i) OS << ", ";
966      OS << Op.Class->Name;
967    }
968    OS << " } },\n";
969  }
970
971  OS << "  };\n\n";
972
973  // Emit code to compute the class list for this operand vector.
974  OS << "  // Eliminate obvious mismatches.\n";
975  OS << "  if (Operands.size() > " << MaxNumOperands << ")\n";
976  OS << "    return true;\n\n";
977
978  OS << "  // Compute the class list for this operand vector.\n";
979  OS << "  MatchClassKind Classes[" << MaxNumOperands << "];\n";
980  OS << "  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {\n";
981  OS << "    Classes[i] = ClassifyOperand(Operands[i]);\n\n";
982
983  OS << "    // Check for invalid operands before matching.\n";
984  OS << "    if (Classes[i] == InvalidMatchClass)\n";
985  OS << "      return true;\n";
986  OS << "  }\n\n";
987
988  OS << "  // Mark unused classes.\n";
989  OS << "  for (unsigned i = Operands.size(), e = " << MaxNumOperands << "; "
990     << "i != e; ++i)\n";
991  OS << "    Classes[i] = InvalidMatchClass;\n\n";
992
993  // Emit code to search the table.
994  OS << "  // Search the table.\n";
995  OS << "  for (const MatchEntry *it = MatchTable, "
996     << "*ie = MatchTable + " << Info.Instructions.size()
997     << "; it != ie; ++it) {\n";
998  for (unsigned i = 0; i != MaxNumOperands; ++i) {
999    OS << "    if (Classes[" << i << "] != it->Classes[" << i << "])\n";
1000    OS << "      continue;\n";
1001  }
1002  OS << "\n";
1003  OS << "    return ConvertToMCInst(it->ConvertFn, Inst, "
1004     << "it->Opcode, Operands);\n";
1005  OS << "  }\n\n";
1006
1007  OS << "  return true;\n";
1008  OS << "}\n\n";
1009}
1010