AsmMatcherEmitter.cpp revision 245f05843f7f17406f394696e7330950e6d88e6d
1//===- AsmMatcherEmitter.cpp - Generate an assembly matcher ---------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This tablegen backend emits a target specifier matcher for converting parsed 11// assembly operands in the MCInst structures. 12// 13// The input to the target specific matcher is a list of literal tokens and 14// operands. The target specific parser should generally eliminate any syntax 15// which is not relevant for matching; for example, comma tokens should have 16// already been consumed and eliminated by the parser. Most instructions will 17// end up with a single literal token (the instruction name) and some number of 18// operands. 19// 20// Some example inputs, for X86: 21// 'addl' (immediate ...) (register ...) 22// 'add' (immediate ...) (memory ...) 23// 'call' '*' %epc 24// 25// The assembly matcher is responsible for converting this input into a precise 26// machine instruction (i.e., an instruction with a well defined encoding). This 27// mapping has several properties which complicate matching: 28// 29// - It may be ambiguous; many architectures can legally encode particular 30// variants of an instruction in different ways (for example, using a smaller 31// encoding for small immediates). Such ambiguities should never be 32// arbitrarily resolved by the assembler, the assembler is always responsible 33// for choosing the "best" available instruction. 34// 35// - It may depend on the subtarget or the assembler context. Instructions 36// which are invalid for the current mode, but otherwise unambiguous (e.g., 37// an SSE instruction in a file being assembled for i486) should be accepted 38// and rejected by the assembler front end. However, if the proper encoding 39// for an instruction is dependent on the assembler context then the matcher 40// is responsible for selecting the correct machine instruction for the 41// current mode. 42// 43// The core matching algorithm attempts to exploit the regularity in most 44// instruction sets to quickly determine the set of possibly matching 45// instructions, and the simplify the generated code. Additionally, this helps 46// to ensure that the ambiguities are intentionally resolved by the user. 47// 48// The matching is divided into two distinct phases: 49// 50// 1. Classification: Each operand is mapped to the unique set which (a) 51// contains it, and (b) is the largest such subset for which a single 52// instruction could match all members. 53// 54// For register classes, we can generate these subgroups automatically. For 55// arbitrary operands, we expect the user to define the classes and their 56// relations to one another (for example, 8-bit signed immediates as a 57// subset of 32-bit immediates). 58// 59// By partitioning the operands in this way, we guarantee that for any 60// tuple of classes, any single instruction must match either all or none 61// of the sets of operands which could classify to that tuple. 62// 63// In addition, the subset relation amongst classes induces a partial order 64// on such tuples, which we use to resolve ambiguities. 65// 66// FIXME: What do we do if a crazy case shows up where this is the wrong 67// resolution? 68// 69// 2. The input can now be treated as a tuple of classes (static tokens are 70// simple singleton sets). Each such tuple should generally map to a single 71// instruction (we currently ignore cases where this isn't true, whee!!!), 72// which we can emit a simple matcher for. 73// 74//===----------------------------------------------------------------------===// 75 76#include "AsmMatcherEmitter.h" 77#include "CodeGenTarget.h" 78#include "Record.h" 79#include "llvm/ADT/OwningPtr.h" 80#include "llvm/ADT/SmallVector.h" 81#include "llvm/ADT/StringExtras.h" 82#include "llvm/Support/CommandLine.h" 83#include "llvm/Support/Debug.h" 84#include <list> 85#include <map> 86#include <set> 87using namespace llvm; 88 89namespace { 90static cl::opt<std::string> 91MatchOneInstr("match-one-instr", cl::desc("Match only the named instruction"), 92 cl::init("")); 93} 94 95/// FlattenVariants - Flatten an .td file assembly string by selecting the 96/// variant at index \arg N. 97static std::string FlattenVariants(const std::string &AsmString, 98 unsigned N) { 99 StringRef Cur = AsmString; 100 std::string Res = ""; 101 102 for (;;) { 103 // Find the start of the next variant string. 104 size_t VariantsStart = 0; 105 for (size_t e = Cur.size(); VariantsStart != e; ++VariantsStart) 106 if (Cur[VariantsStart] == '{' && 107 (VariantsStart == 0 || (Cur[VariantsStart-1] != '$' && 108 Cur[VariantsStart-1] != '\\'))) 109 break; 110 111 // Add the prefix to the result. 112 Res += Cur.slice(0, VariantsStart); 113 if (VariantsStart == Cur.size()) 114 break; 115 116 ++VariantsStart; // Skip the '{'. 117 118 // Scan to the end of the variants string. 119 size_t VariantsEnd = VariantsStart; 120 unsigned NestedBraces = 1; 121 for (size_t e = Cur.size(); VariantsEnd != e; ++VariantsEnd) { 122 if (Cur[VariantsEnd] == '}' && Cur[VariantsEnd-1] != '\\') { 123 if (--NestedBraces == 0) 124 break; 125 } else if (Cur[VariantsEnd] == '{') 126 ++NestedBraces; 127 } 128 129 // Select the Nth variant (or empty). 130 StringRef Selection = Cur.slice(VariantsStart, VariantsEnd); 131 for (unsigned i = 0; i != N; ++i) 132 Selection = Selection.split('|').second; 133 Res += Selection.split('|').first; 134 135 assert(VariantsEnd != Cur.size() && 136 "Unterminated variants in assembly string!"); 137 Cur = Cur.substr(VariantsEnd + 1); 138 } 139 140 return Res; 141} 142 143/// TokenizeAsmString - Tokenize a simplified assembly string. 144static void TokenizeAsmString(const StringRef &AsmString, 145 SmallVectorImpl<StringRef> &Tokens) { 146 unsigned Prev = 0; 147 bool InTok = true; 148 for (unsigned i = 0, e = AsmString.size(); i != e; ++i) { 149 switch (AsmString[i]) { 150 case '[': 151 case ']': 152 case '*': 153 case '!': 154 case ' ': 155 case '\t': 156 case ',': 157 if (InTok) { 158 Tokens.push_back(AsmString.slice(Prev, i)); 159 InTok = false; 160 } 161 if (!isspace(AsmString[i]) && AsmString[i] != ',') 162 Tokens.push_back(AsmString.substr(i, 1)); 163 Prev = i + 1; 164 break; 165 166 case '\\': 167 if (InTok) { 168 Tokens.push_back(AsmString.slice(Prev, i)); 169 InTok = false; 170 } 171 ++i; 172 assert(i != AsmString.size() && "Invalid quoted character"); 173 Tokens.push_back(AsmString.substr(i, 1)); 174 Prev = i + 1; 175 break; 176 177 case '$': { 178 // If this isn't "${", treat like a normal token. 179 if (i + 1 == AsmString.size() || AsmString[i + 1] != '{') { 180 if (InTok) { 181 Tokens.push_back(AsmString.slice(Prev, i)); 182 InTok = false; 183 } 184 Prev = i; 185 break; 186 } 187 188 if (InTok) { 189 Tokens.push_back(AsmString.slice(Prev, i)); 190 InTok = false; 191 } 192 193 StringRef::iterator End = 194 std::find(AsmString.begin() + i, AsmString.end(), '}'); 195 assert(End != AsmString.end() && "Missing brace in operand reference!"); 196 size_t EndPos = End - AsmString.begin(); 197 Tokens.push_back(AsmString.slice(i, EndPos+1)); 198 Prev = EndPos + 1; 199 i = EndPos; 200 break; 201 } 202 203 default: 204 InTok = true; 205 } 206 } 207 if (InTok && Prev != AsmString.size()) 208 Tokens.push_back(AsmString.substr(Prev)); 209} 210 211static bool IsAssemblerInstruction(const StringRef &Name, 212 const CodeGenInstruction &CGI, 213 const SmallVectorImpl<StringRef> &Tokens) { 214 // Ignore psuedo ops. 215 // 216 // FIXME: This is a hack. 217 if (const RecordVal *Form = CGI.TheDef->getValue("Form")) 218 if (Form->getValue()->getAsString() == "Pseudo") 219 return false; 220 221 // Ignore "PHI" node. 222 // 223 // FIXME: This is also a hack. 224 if (Name == "PHI") 225 return false; 226 227 // Ignore instructions with no .s string. 228 // 229 // FIXME: What are these? 230 if (CGI.AsmString.empty()) 231 return false; 232 233 // FIXME: Hack; ignore any instructions with a newline in them. 234 if (std::find(CGI.AsmString.begin(), 235 CGI.AsmString.end(), '\n') != CGI.AsmString.end()) 236 return false; 237 238 // Ignore instructions with attributes, these are always fake instructions for 239 // simplifying codegen. 240 // 241 // FIXME: Is this true? 242 // 243 // Also, we ignore instructions which reference the operand multiple times; 244 // this implies a constraint we would not currently honor. These are 245 // currently always fake instructions for simplifying codegen. 246 // 247 // FIXME: Encode this assumption in the .td, so we can error out here. 248 std::set<std::string> OperandNames; 249 for (unsigned i = 1, e = Tokens.size(); i < e; ++i) { 250 if (Tokens[i][0] == '$' && 251 std::find(Tokens[i].begin(), 252 Tokens[i].end(), ':') != Tokens[i].end()) { 253 DEBUG({ 254 errs() << "warning: '" << Name << "': " 255 << "ignoring instruction; operand with attribute '" 256 << Tokens[i] << "', \n"; 257 }); 258 return false; 259 } 260 261 if (Tokens[i][0] == '$' && !OperandNames.insert(Tokens[i]).second) { 262 DEBUG({ 263 errs() << "warning: '" << Name << "': " 264 << "ignoring instruction; tied operand '" 265 << Tokens[i] << "', \n"; 266 }); 267 return false; 268 } 269 } 270 271 return true; 272} 273 274namespace { 275 276/// ClassInfo - Helper class for storing the information about a particular 277/// class of operands which can be matched. 278struct ClassInfo { 279 enum { 280 Token, ///< The class for a particular token. 281 Register, ///< A register class. 282 User ///< A user defined class. 283 } Kind; 284 285 /// Name - The class name, suitable for use as an enum. 286 std::string Name; 287 288 /// ValueName - The name of the value this class represents; for a token this 289 /// is the literal token string, for an operand it is the TableGen class (or 290 /// empty if this is a derived class). 291 std::string ValueName; 292 293 /// PredicateMethod - The name of the operand method to test whether the 294 /// operand matches this class; this is not valid for Token kinds. 295 std::string PredicateMethod; 296 297 /// RenderMethod - The name of the operand method to add this operand to an 298 /// MCInst; this is not valid for Token kinds. 299 std::string RenderMethod; 300}; 301 302/// InstructionInfo - Helper class for storing the necessary information for an 303/// instruction which is capable of being matched. 304struct InstructionInfo { 305 struct Operand { 306 /// The unique class instance this operand should match. 307 ClassInfo *Class; 308 309 /// The original operand this corresponds to, if any. 310 const CodeGenInstruction::OperandInfo *OperandInfo; 311 }; 312 313 /// InstrName - The target name for this instruction. 314 std::string InstrName; 315 316 /// Instr - The instruction this matches. 317 const CodeGenInstruction *Instr; 318 319 /// AsmString - The assembly string for this instruction (with variants 320 /// removed). 321 std::string AsmString; 322 323 /// Tokens - The tokenized assembly pattern that this instruction matches. 324 SmallVector<StringRef, 4> Tokens; 325 326 /// Operands - The operands that this instruction matches. 327 SmallVector<Operand, 4> Operands; 328 329 /// ConversionFnKind - The enum value which is passed to the generated 330 /// ConvertToMCInst to convert parsed operands into an MCInst for this 331 /// function. 332 std::string ConversionFnKind; 333 334public: 335 void dump(); 336}; 337 338class AsmMatcherInfo { 339public: 340 /// The classes which are needed for matching. 341 std::vector<ClassInfo*> Classes; 342 343 /// The information on the instruction to match. 344 std::vector<InstructionInfo*> Instructions; 345 346private: 347 /// Map of token to class information which has already been constructed. 348 std::map<std::string, ClassInfo*> TokenClasses; 349 350 /// Map of operand name to class information which has already been 351 /// constructed. 352 std::map<std::string, ClassInfo*> OperandClasses; 353 354private: 355 /// getTokenClass - Lookup or create the class for the given token. 356 ClassInfo *getTokenClass(const StringRef &Token); 357 358 /// getOperandClass - Lookup or create the class for the given operand. 359 ClassInfo *getOperandClass(const StringRef &Token, 360 const CodeGenInstruction::OperandInfo &OI); 361 362public: 363 /// BuildInfo - Construct the various tables used during matching. 364 void BuildInfo(CodeGenTarget &Target); 365}; 366 367} 368 369void InstructionInfo::dump() { 370 errs() << InstrName << " -- " << "flattened:\"" << AsmString << '\"' 371 << ", tokens:["; 372 for (unsigned i = 0, e = Tokens.size(); i != e; ++i) { 373 errs() << Tokens[i]; 374 if (i + 1 != e) 375 errs() << ", "; 376 } 377 errs() << "]\n"; 378 379 for (unsigned i = 0, e = Operands.size(); i != e; ++i) { 380 Operand &Op = Operands[i]; 381 errs() << " op[" << i << "] = "; 382 if (Op.Class->Kind == ClassInfo::Token) { 383 errs() << '\"' << Tokens[i] << "\"\n"; 384 continue; 385 } 386 387 const CodeGenInstruction::OperandInfo &OI = *Op.OperandInfo; 388 errs() << OI.Name << " " << OI.Rec->getName() 389 << " (" << OI.MIOperandNo << ", " << OI.MINumOperands << ")\n"; 390 } 391} 392 393static std::string getEnumNameForToken(const StringRef &Str) { 394 std::string Res; 395 396 for (StringRef::iterator it = Str.begin(), ie = Str.end(); it != ie; ++it) { 397 switch (*it) { 398 case '*': Res += "_STAR_"; break; 399 case '%': Res += "_PCT_"; break; 400 case ':': Res += "_COLON_"; break; 401 402 default: 403 if (isalnum(*it)) { 404 Res += *it; 405 } else { 406 Res += "_" + utostr((unsigned) *it) + "_"; 407 } 408 } 409 } 410 411 return Res; 412} 413 414ClassInfo *AsmMatcherInfo::getTokenClass(const StringRef &Token) { 415 ClassInfo *&Entry = TokenClasses[Token]; 416 417 if (!Entry) { 418 Entry = new ClassInfo(); 419 Entry->Kind = ClassInfo::Token; 420 Entry->Name = "MCK_" + getEnumNameForToken(Token); 421 Entry->ValueName = Token; 422 Entry->PredicateMethod = "<invalid>"; 423 Entry->RenderMethod = "<invalid>"; 424 Classes.push_back(Entry); 425 } 426 427 return Entry; 428} 429 430ClassInfo * 431AsmMatcherInfo::getOperandClass(const StringRef &Token, 432 const CodeGenInstruction::OperandInfo &OI) { 433 std::string ClassName; 434 if (OI.Rec->isSubClassOf("RegisterClass")) { 435 ClassName = "Reg"; 436 } else if (OI.Rec->isSubClassOf("Operand")) { 437 // FIXME: This should not be hard coded. 438 const RecordVal *RV = OI.Rec->getValue("Type"); 439 440 // FIXME: Yet another total hack. 441 if (RV->getValue()->getAsString() == "iPTR" || 442 OI.Rec->getName() == "i8mem_NOREX" || 443 OI.Rec->getName() == "lea32mem" || 444 OI.Rec->getName() == "lea64mem" || 445 OI.Rec->getName() == "i128mem" || 446 OI.Rec->getName() == "sdmem" || 447 OI.Rec->getName() == "ssmem" || 448 OI.Rec->getName() == "lea64_32mem") { 449 ClassName = "Mem"; 450 } else { 451 ClassName = "Imm"; 452 } 453 } 454 455 ClassInfo *&Entry = OperandClasses[ClassName]; 456 457 if (!Entry) { 458 Entry = new ClassInfo(); 459 // FIXME: Hack. 460 if (ClassName == "Reg") { 461 Entry->Kind = ClassInfo::Register; 462 } else { 463 Entry->Kind = ClassInfo::User; 464 } 465 Entry->Name = "MCK_" + ClassName; 466 Entry->ValueName = OI.Rec->getName(); 467 Entry->PredicateMethod = "is" + ClassName; 468 Entry->RenderMethod = "add" + ClassName + "Operands"; 469 Classes.push_back(Entry); 470 } 471 472 return Entry; 473} 474 475void AsmMatcherInfo::BuildInfo(CodeGenTarget &Target) { 476 for (std::map<std::string, CodeGenInstruction>::const_iterator 477 it = Target.getInstructions().begin(), 478 ie = Target.getInstructions().end(); 479 it != ie; ++it) { 480 const CodeGenInstruction &CGI = it->second; 481 482 if (!MatchOneInstr.empty() && it->first != MatchOneInstr) 483 continue; 484 485 OwningPtr<InstructionInfo> II(new InstructionInfo); 486 487 II->InstrName = it->first; 488 II->Instr = &it->second; 489 II->AsmString = FlattenVariants(CGI.AsmString, 0); 490 491 TokenizeAsmString(II->AsmString, II->Tokens); 492 493 // Ignore instructions which shouldn't be matched. 494 if (!IsAssemblerInstruction(it->first, CGI, II->Tokens)) 495 continue; 496 497 for (unsigned i = 0, e = II->Tokens.size(); i != e; ++i) { 498 StringRef Token = II->Tokens[i]; 499 500 // Check for simple tokens. 501 if (Token[0] != '$') { 502 InstructionInfo::Operand Op; 503 Op.Class = getTokenClass(Token); 504 Op.OperandInfo = 0; 505 II->Operands.push_back(Op); 506 continue; 507 } 508 509 // Otherwise this is an operand reference. 510 StringRef OperandName; 511 if (Token[1] == '{') 512 OperandName = Token.substr(2, Token.size() - 3); 513 else 514 OperandName = Token.substr(1); 515 516 // Map this token to an operand. FIXME: Move elsewhere. 517 unsigned Idx; 518 try { 519 Idx = CGI.getOperandNamed(OperandName); 520 } catch(...) { 521 errs() << "error: unable to find operand: '" << OperandName << "'!\n"; 522 break; 523 } 524 525 const CodeGenInstruction::OperandInfo &OI = CGI.OperandList[Idx]; 526 InstructionInfo::Operand Op; 527 Op.Class = getOperandClass(Token, OI); 528 Op.OperandInfo = &OI; 529 II->Operands.push_back(Op); 530 } 531 532 // If we broke out, ignore the instruction. 533 if (II->Operands.size() != II->Tokens.size()) 534 continue; 535 536 Instructions.push_back(II.take()); 537 } 538} 539 540static void ConstructConversionFunctions(CodeGenTarget &Target, 541 std::vector<InstructionInfo*> &Infos, 542 raw_ostream &OS) { 543 // Write the convert function to a separate stream, so we can drop it after 544 // the enum. 545 std::string ConvertFnBody; 546 raw_string_ostream CvtOS(ConvertFnBody); 547 548 // Function we have already generated. 549 std::set<std::string> GeneratedFns; 550 551 // Start the unified conversion function. 552 553 CvtOS << "static bool ConvertToMCInst(ConversionKind Kind, MCInst &Inst, " 554 << "unsigned Opcode,\n" 555 << " SmallVectorImpl<" 556 << Target.getName() << "Operand> &Operands) {\n"; 557 CvtOS << " Inst.setOpcode(Opcode);\n"; 558 CvtOS << " switch (Kind) {\n"; 559 CvtOS << " default:\n"; 560 561 // Start the enum, which we will generate inline. 562 563 OS << "// Unified function for converting operants to MCInst instances.\n\n"; 564 OS << "enum ConversionKind {\n"; 565 566 for (std::vector<InstructionInfo*>::const_iterator it = Infos.begin(), 567 ie = Infos.end(); it != ie; ++it) { 568 InstructionInfo &II = **it; 569 570 // Order the (class) operands by the order to convert them into an MCInst. 571 SmallVector<std::pair<unsigned, unsigned>, 4> MIOperandList; 572 for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) { 573 InstructionInfo::Operand &Op = II.Operands[i]; 574 if (Op.OperandInfo) 575 MIOperandList.push_back(std::make_pair(Op.OperandInfo->MIOperandNo, i)); 576 } 577 std::sort(MIOperandList.begin(), MIOperandList.end()); 578 579 // Compute the total number of operands. 580 unsigned NumMIOperands = 0; 581 for (unsigned i = 0, e = II.Instr->OperandList.size(); i != e; ++i) { 582 const CodeGenInstruction::OperandInfo &OI = II.Instr->OperandList[i]; 583 NumMIOperands = std::max(NumMIOperands, 584 OI.MIOperandNo + OI.MINumOperands); 585 } 586 587 // Build the conversion function signature. 588 std::string Signature = "Convert"; 589 unsigned CurIndex = 0; 590 for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) { 591 InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second]; 592 assert(CurIndex <= Op.OperandInfo->MIOperandNo && 593 "Duplicate match for instruction operand!"); 594 595 Signature += "_"; 596 597 // Skip operands which weren't matched by anything, this occurs when the 598 // .td file encodes "implicit" operands as explicit ones. 599 // 600 // FIXME: This should be removed from the MCInst structure. 601 for (; CurIndex != Op.OperandInfo->MIOperandNo; ++CurIndex) 602 Signature += "Imp"; 603 604 Signature += Op.Class->Name; 605 Signature += utostr(Op.OperandInfo->MINumOperands); 606 Signature += "_" + utostr(MIOperandList[i].second); 607 608 CurIndex += Op.OperandInfo->MINumOperands; 609 } 610 611 // Add any trailing implicit operands. 612 for (; CurIndex != NumMIOperands; ++CurIndex) 613 Signature += "Imp"; 614 615 II.ConversionFnKind = Signature; 616 617 // Check if we have already generated this signature. 618 if (!GeneratedFns.insert(Signature).second) 619 continue; 620 621 // If not, emit it now. 622 623 // Add to the enum list. 624 OS << " " << Signature << ",\n"; 625 626 // And to the convert function. 627 CvtOS << " case " << Signature << ":\n"; 628 CurIndex = 0; 629 for (unsigned i = 0, e = MIOperandList.size(); i != e; ++i) { 630 InstructionInfo::Operand &Op = II.Operands[MIOperandList[i].second]; 631 632 // Add the implicit operands. 633 for (; CurIndex != Op.OperandInfo->MIOperandNo; ++CurIndex) 634 CvtOS << " Inst.addOperand(MCOperand::CreateReg(0));\n"; 635 636 CvtOS << " Operands[" << MIOperandList[i].second 637 << "]." << Op.Class->RenderMethod 638 << "(Inst, " << Op.OperandInfo->MINumOperands << ");\n"; 639 CurIndex += Op.OperandInfo->MINumOperands; 640 } 641 642 // And add trailing implicit operands. 643 for (; CurIndex != NumMIOperands; ++CurIndex) 644 CvtOS << " Inst.addOperand(MCOperand::CreateReg(0));\n"; 645 CvtOS << " break;\n"; 646 } 647 648 // Finish the convert function. 649 650 CvtOS << " }\n"; 651 CvtOS << " return false;\n"; 652 CvtOS << "}\n\n"; 653 654 // Finish the enum, and drop the convert function after it. 655 656 OS << " NumConversionVariants\n"; 657 OS << "};\n\n"; 658 659 OS << CvtOS.str(); 660} 661 662/// EmitMatchClassEnumeration - Emit the enumeration for match class kinds. 663static void EmitMatchClassEnumeration(CodeGenTarget &Target, 664 std::vector<ClassInfo*> &Infos, 665 raw_ostream &OS) { 666 OS << "namespace {\n\n"; 667 668 OS << "/// MatchClassKind - The kinds of classes which participate in\n" 669 << "/// instruction matching.\n"; 670 OS << "enum MatchClassKind {\n"; 671 OS << " InvalidMatchClass = 0,\n"; 672 for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 673 ie = Infos.end(); it != ie; ++it) { 674 ClassInfo &CI = **it; 675 OS << " " << CI.Name << ", // "; 676 if (CI.Kind == ClassInfo::Token) { 677 OS << "'" << CI.ValueName << "'\n"; 678 } else if (CI.Kind == ClassInfo::Register) { 679 if (!CI.ValueName.empty()) 680 OS << "register class '" << CI.ValueName << "'\n"; 681 else 682 OS << "derived register class\n"; 683 } else { 684 OS << "user defined class '" << CI.ValueName << "'\n"; 685 } 686 } 687 OS << " NumMatchClassKinds\n"; 688 OS << "};\n\n"; 689 690 OS << "}\n\n"; 691} 692 693/// EmitClassifyOperand - Emit the function to classify an operand. 694static void EmitClassifyOperand(CodeGenTarget &Target, 695 std::vector<ClassInfo*> &Infos, 696 raw_ostream &OS) { 697 OS << "static MatchClassKind ClassifyOperand(" 698 << Target.getName() << "Operand &Operand) {\n"; 699 OS << " if (Operand.isToken())\n"; 700 OS << " return MatchTokenString(Operand.getToken());\n\n"; 701 for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 702 ie = Infos.end(); it != ie; ++it) { 703 ClassInfo &CI = **it; 704 705 if (CI.Kind != ClassInfo::Token) { 706 OS << " if (Operand." << CI.PredicateMethod << "())\n"; 707 OS << " return " << CI.Name << ";\n\n"; 708 } 709 } 710 OS << " return InvalidMatchClass;\n"; 711 OS << "}\n\n"; 712} 713 714typedef std::pair<std::string, std::string> StringPair; 715 716/// FindFirstNonCommonLetter - Find the first character in the keys of the 717/// string pairs that is not shared across the whole set of strings. All 718/// strings are assumed to have the same length. 719static unsigned 720FindFirstNonCommonLetter(const std::vector<const StringPair*> &Matches) { 721 assert(!Matches.empty()); 722 for (unsigned i = 0, e = Matches[0]->first.size(); i != e; ++i) { 723 // Check to see if letter i is the same across the set. 724 char Letter = Matches[0]->first[i]; 725 726 for (unsigned str = 0, e = Matches.size(); str != e; ++str) 727 if (Matches[str]->first[i] != Letter) 728 return i; 729 } 730 731 return Matches[0]->first.size(); 732} 733 734/// EmitStringMatcherForChar - Given a set of strings that are known to be the 735/// same length and whose characters leading up to CharNo are the same, emit 736/// code to verify that CharNo and later are the same. 737static void EmitStringMatcherForChar(const std::string &StrVariableName, 738 const std::vector<const StringPair*> &Matches, 739 unsigned CharNo, unsigned IndentCount, 740 raw_ostream &OS) { 741 assert(!Matches.empty() && "Must have at least one string to match!"); 742 std::string Indent(IndentCount*2+4, ' '); 743 744 // If we have verified that the entire string matches, we're done: output the 745 // matching code. 746 if (CharNo == Matches[0]->first.size()) { 747 assert(Matches.size() == 1 && "Had duplicate keys to match on"); 748 749 // FIXME: If Matches[0].first has embeded \n, this will be bad. 750 OS << Indent << Matches[0]->second << "\t // \"" << Matches[0]->first 751 << "\"\n"; 752 return; 753 } 754 755 // Bucket the matches by the character we are comparing. 756 std::map<char, std::vector<const StringPair*> > MatchesByLetter; 757 758 for (unsigned i = 0, e = Matches.size(); i != e; ++i) 759 MatchesByLetter[Matches[i]->first[CharNo]].push_back(Matches[i]); 760 761 762 // If we have exactly one bucket to match, see how many characters are common 763 // across the whole set and match all of them at once. 764 // length, just verify the rest of it with one if. 765 if (MatchesByLetter.size() == 1) { 766 unsigned FirstNonCommonLetter = FindFirstNonCommonLetter(Matches); 767 unsigned NumChars = FirstNonCommonLetter-CharNo; 768 769 if (NumChars == 1) { 770 // Do the comparison with if (Str[1] == 'f') 771 // FIXME: Need to escape general characters. 772 OS << Indent << "if (" << StrVariableName << "[" << CharNo << "] == '" 773 << Matches[0]->first[CharNo] << "') {\n"; 774 } else { 775 // Do the comparison with if (Str.substr(1,3) == "foo"). 776 OS << Indent << "if (" << StrVariableName << ".substr(" << CharNo << "," 777 << NumChars << ") == \""; 778 779 // FIXME: Need to escape general strings. 780 OS << Matches[0]->first.substr(CharNo, NumChars) << "\") {\n"; 781 } 782 783 EmitStringMatcherForChar(StrVariableName, Matches, FirstNonCommonLetter, 784 IndentCount+1, OS); 785 OS << Indent << "}\n"; 786 return; 787 } 788 789 // Otherwise, we have multiple possible things, emit a switch on the 790 // character. 791 OS << Indent << "switch (" << StrVariableName << "[" << CharNo << "]) {\n"; 792 OS << Indent << "default: break;\n"; 793 794 for (std::map<char, std::vector<const StringPair*> >::iterator LI = 795 MatchesByLetter.begin(), E = MatchesByLetter.end(); LI != E; ++LI) { 796 // TODO: escape hard stuff (like \n) if we ever care about it. 797 OS << Indent << "case '" << LI->first << "':\t // " 798 << LI->second.size() << " strings to match.\n"; 799 EmitStringMatcherForChar(StrVariableName, LI->second, CharNo+1, 800 IndentCount+1, OS); 801 OS << Indent << " break;\n"; 802 } 803 804 OS << Indent << "}\n"; 805 806} 807 808 809/// EmitStringMatcher - Given a list of strings and code to execute when they 810/// match, output a simple switch tree to classify the input string. If a 811/// match is found, the code in Vals[i].second is executed. This code should do 812/// a return to avoid falling through. If nothing matches, execution falls 813/// through. StrVariableName is the name of teh variable to test. 814static void EmitStringMatcher(const std::string &StrVariableName, 815 const std::vector<StringPair> &Matches, 816 raw_ostream &OS) { 817 // First level categorization: group strings by length. 818 std::map<unsigned, std::vector<const StringPair*> > MatchesByLength; 819 820 for (unsigned i = 0, e = Matches.size(); i != e; ++i) 821 MatchesByLength[Matches[i].first.size()].push_back(&Matches[i]); 822 823 // Output a switch statement on length and categorize the elements within each 824 // bin. 825 OS << " switch (" << StrVariableName << ".size()) {\n"; 826 OS << " default: break;\n"; 827 828 829 for (std::map<unsigned, std::vector<const StringPair*> >::iterator LI = 830 MatchesByLength.begin(), E = MatchesByLength.end(); LI != E; ++LI) { 831 OS << " case " << LI->first << ":\t // " << LI->second.size() 832 << " strings to match.\n"; 833 EmitStringMatcherForChar(StrVariableName, LI->second, 0, 0, OS); 834 OS << " break;\n"; 835 } 836 837 838 OS << " }\n"; 839} 840 841 842/// EmitMatchTokenString - Emit the function to match a token string to the 843/// appropriate match class value. 844static void EmitMatchTokenString(CodeGenTarget &Target, 845 std::vector<ClassInfo*> &Infos, 846 raw_ostream &OS) { 847 // Construct the match list. 848 std::vector<StringPair> Matches; 849 for (std::vector<ClassInfo*>::iterator it = Infos.begin(), 850 ie = Infos.end(); it != ie; ++it) { 851 ClassInfo &CI = **it; 852 853 if (CI.Kind == ClassInfo::Token) 854 Matches.push_back(StringPair(CI.ValueName, "return " + CI.Name + ";")); 855 } 856 857 OS << "static MatchClassKind MatchTokenString(const StringRef &Name) {\n"; 858 859 EmitStringMatcher("Name", Matches, OS); 860 861 OS << " return InvalidMatchClass;\n"; 862 OS << "}\n\n"; 863} 864 865/// EmitMatchRegisterName - Emit the function to match a string to the target 866/// specific register enum. 867static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser, 868 raw_ostream &OS) { 869 // Construct the match list. 870 std::vector<StringPair> Matches; 871 for (unsigned i = 0, e = Target.getRegisters().size(); i != e; ++i) { 872 const CodeGenRegister &Reg = Target.getRegisters()[i]; 873 if (Reg.TheDef->getValueAsString("AsmName").empty()) 874 continue; 875 876 Matches.push_back(StringPair(Reg.TheDef->getValueAsString("AsmName"), 877 "return " + utostr(i + 1) + ";")); 878 } 879 880 OS << "unsigned " << Target.getName() 881 << AsmParser->getValueAsString("AsmParserClassName") 882 << "::MatchRegisterName(const StringRef &Name) {\n"; 883 884 EmitStringMatcher("Name", Matches, OS); 885 886 OS << " return 0;\n"; 887 OS << "}\n\n"; 888} 889 890void AsmMatcherEmitter::run(raw_ostream &OS) { 891 CodeGenTarget Target; 892 Record *AsmParser = Target.getAsmParser(); 893 std::string ClassName = AsmParser->getValueAsString("AsmParserClassName"); 894 895 EmitSourceFileHeader("Assembly Matcher Source Fragment", OS); 896 897 // Emit the function to match a register name to number. 898 EmitMatchRegisterName(Target, AsmParser, OS); 899 900 // Compute the information on the instructions to match. 901 AsmMatcherInfo Info; 902 Info.BuildInfo(Target); 903 904 DEBUG_WITH_TYPE("instruction_info", { 905 for (std::vector<InstructionInfo*>::iterator 906 it = Info.Instructions.begin(), ie = Info.Instructions.end(); 907 it != ie; ++it) 908 (*it)->dump(); 909 }); 910 911 // FIXME: At this point we should be able to totally order Infos, if not then 912 // we have an ambiguity which the .td file should be forced to resolve. 913 914 // Generate the terminal actions to convert operands into an MCInst. 915 ConstructConversionFunctions(Target, Info.Instructions, OS); 916 917 // Emit the enumeration for classes which participate in matching. 918 EmitMatchClassEnumeration(Target, Info.Classes, OS); 919 920 // Emit the routine to match token strings to their match class. 921 EmitMatchTokenString(Target, Info.Classes, OS); 922 923 // Emit the routine to classify an operand. 924 EmitClassifyOperand(Target, Info.Classes, OS); 925 926 // Finally, build the match function. 927 928 size_t MaxNumOperands = 0; 929 for (std::vector<InstructionInfo*>::const_iterator it = 930 Info.Instructions.begin(), ie = Info.Instructions.end(); 931 it != ie; ++it) 932 MaxNumOperands = std::max(MaxNumOperands, (*it)->Operands.size()); 933 934 OS << "bool " << Target.getName() << ClassName 935 << "::MatchInstruction(" 936 << "SmallVectorImpl<" << Target.getName() << "Operand> &Operands, " 937 << "MCInst &Inst) {\n"; 938 939 // Emit the static match table; unused classes get initalized to 0 which is 940 // guaranteed to be InvalidMatchClass. 941 // 942 // FIXME: We can reduce the size of this table very easily. First, we change 943 // it so that store the kinds in separate bit-fields for each index, which 944 // only needs to be the max width used for classes at that index (we also need 945 // to reject based on this during classification). If we then make sure to 946 // order the match kinds appropriately (putting mnemonics last), then we 947 // should only end up using a few bits for each class, especially the ones 948 // following the mnemonic. 949 OS << " static const struct MatchEntry {\n"; 950 OS << " unsigned Opcode;\n"; 951 OS << " ConversionKind ConvertFn;\n"; 952 OS << " MatchClassKind Classes[" << MaxNumOperands << "];\n"; 953 OS << " } MatchTable[" << Info.Instructions.size() << "] = {\n"; 954 955 for (std::vector<InstructionInfo*>::const_iterator it = 956 Info.Instructions.begin(), ie = Info.Instructions.end(); 957 it != ie; ++it) { 958 InstructionInfo &II = **it; 959 960 OS << " { " << Target.getName() << "::" << II.InstrName 961 << ", " << II.ConversionFnKind << ", { "; 962 for (unsigned i = 0, e = II.Operands.size(); i != e; ++i) { 963 InstructionInfo::Operand &Op = II.Operands[i]; 964 965 if (i) OS << ", "; 966 OS << Op.Class->Name; 967 } 968 OS << " } },\n"; 969 } 970 971 OS << " };\n\n"; 972 973 // Emit code to compute the class list for this operand vector. 974 OS << " // Eliminate obvious mismatches.\n"; 975 OS << " if (Operands.size() > " << MaxNumOperands << ")\n"; 976 OS << " return true;\n\n"; 977 978 OS << " // Compute the class list for this operand vector.\n"; 979 OS << " MatchClassKind Classes[" << MaxNumOperands << "];\n"; 980 OS << " for (unsigned i = 0, e = Operands.size(); i != e; ++i) {\n"; 981 OS << " Classes[i] = ClassifyOperand(Operands[i]);\n\n"; 982 983 OS << " // Check for invalid operands before matching.\n"; 984 OS << " if (Classes[i] == InvalidMatchClass)\n"; 985 OS << " return true;\n"; 986 OS << " }\n\n"; 987 988 OS << " // Mark unused classes.\n"; 989 OS << " for (unsigned i = Operands.size(), e = " << MaxNumOperands << "; " 990 << "i != e; ++i)\n"; 991 OS << " Classes[i] = InvalidMatchClass;\n\n"; 992 993 // Emit code to search the table. 994 OS << " // Search the table.\n"; 995 OS << " for (const MatchEntry *it = MatchTable, " 996 << "*ie = MatchTable + " << Info.Instructions.size() 997 << "; it != ie; ++it) {\n"; 998 for (unsigned i = 0; i != MaxNumOperands; ++i) { 999 OS << " if (Classes[" << i << "] != it->Classes[" << i << "])\n"; 1000 OS << " continue;\n"; 1001 } 1002 OS << "\n"; 1003 OS << " return ConvertToMCInst(it->ConvertFn, Inst, " 1004 << "it->Opcode, Operands);\n"; 1005 OS << " }\n\n"; 1006 1007 OS << " return true;\n"; 1008 OS << "}\n\n"; 1009} 1010