EDEmitter.cpp revision 5e81716425dc3373fbc834bfa7936a5c1205579b
1//===- EDEmitter.cpp - Generate instruction descriptions for ED -*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This tablegen backend is responsible for emitting a description of each
11// instruction in a format that the enhanced disassembler can use to tokenize
12// and parse instructions.
13//
14//===----------------------------------------------------------------------===//
15
16#include "EDEmitter.h"
17
18#include "AsmWriterInst.h"
19#include "CodeGenTarget.h"
20#include "Record.h"
21
22#include "llvm/Support/ErrorHandling.h"
23#include "llvm/Support/Format.h"
24#include "llvm/Support/raw_ostream.h"
25
26#include <vector>
27#include <string>
28
29#define MAX_OPERANDS 5
30#define MAX_SYNTAXES 2
31
32using namespace llvm;
33
34///////////////////////////////////////////////////////////
35// Support classes for emitting nested C data structures //
36///////////////////////////////////////////////////////////
37
38namespace {
39
40  class EnumEmitter {
41  private:
42    std::string Name;
43    std::vector<std::string> Entries;
44  public:
45    EnumEmitter(const char *N) : Name(N) {
46    }
47    int addEntry(const char *e) {
48      Entries.push_back(std::string(e));
49      return Entries.size() - 1;
50    }
51    void emit(raw_ostream &o, unsigned int &i) {
52      o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
53      i += 2;
54
55      unsigned int index = 0;
56      unsigned int numEntries = Entries.size();
57      for(index = 0; index < numEntries; ++index) {
58        o.indent(i) << Entries[index];
59        if(index < (numEntries - 1))
60          o << ",";
61        o << "\n";
62      }
63
64      i -= 2;
65      o.indent(i) << "};" << "\n";
66    }
67
68    void emitAsFlags(raw_ostream &o, unsigned int &i) {
69      o.indent(i) << "enum " << Name.c_str() << " {" << "\n";
70      i += 2;
71
72      unsigned int index = 0;
73      unsigned int numEntries = Entries.size();
74      unsigned int flag = 1;
75      for (index = 0; index < numEntries; ++index) {
76        o.indent(i) << Entries[index] << " = " << format("0x%x", flag);
77        if (index < (numEntries - 1))
78          o << ",";
79        o << "\n";
80        flag <<= 1;
81      }
82
83      i -= 2;
84      o.indent(i) << "};" << "\n";
85    }
86  };
87
88  class StructEmitter {
89  private:
90    std::string Name;
91    std::vector<std::string> MemberTypes;
92    std::vector<std::string> MemberNames;
93  public:
94    StructEmitter(const char *N) : Name(N) {
95    }
96    void addMember(const char *t, const char *n) {
97      MemberTypes.push_back(std::string(t));
98      MemberNames.push_back(std::string(n));
99    }
100    void emit(raw_ostream &o, unsigned int &i) {
101      o.indent(i) << "struct " << Name.c_str() << " {" << "\n";
102      i += 2;
103
104      unsigned int index = 0;
105      unsigned int numMembers = MemberTypes.size();
106      for (index = 0; index < numMembers; ++index) {
107        o.indent(i) << MemberTypes[index] << " " << MemberNames[index] << ";";
108        o << "\n";
109      }
110
111      i -= 2;
112      o.indent(i) << "};" << "\n";
113    }
114  };
115
116  class ConstantEmitter {
117  public:
118    virtual ~ConstantEmitter() { }
119    virtual void emit(raw_ostream &o, unsigned int &i) = 0;
120  };
121
122  class LiteralConstantEmitter : public ConstantEmitter {
123  private:
124    std::string Literal;
125  public:
126    LiteralConstantEmitter(const char *literal) : Literal(literal) {
127    }
128    LiteralConstantEmitter(int literal) {
129      char buf[256];
130      snprintf(buf, 256, "%d", literal);
131      Literal = buf;
132    }
133    void emit(raw_ostream &o, unsigned int &i) {
134      o << Literal;
135    }
136  };
137
138  class CompoundConstantEmitter : public ConstantEmitter {
139  private:
140    std::vector<ConstantEmitter*> Entries;
141  public:
142    CompoundConstantEmitter() {
143    }
144    ~CompoundConstantEmitter() {
145      unsigned int index;
146      unsigned int numEntries = Entries.size();
147      for (index = 0; index < numEntries; ++index) {
148        delete Entries[index];
149      }
150    }
151    CompoundConstantEmitter &addEntry(ConstantEmitter *e) {
152      Entries.push_back(e);
153      return *this;
154    }
155    void emit(raw_ostream &o, unsigned int &i) {
156      o << "{" << "\n";
157      i += 2;
158
159      unsigned int index;
160      unsigned int numEntries = Entries.size();
161      for (index = 0; index < numEntries; ++index) {
162        o.indent(i);
163        Entries[index]->emit(o, i);
164        if (index < (numEntries - 1))
165          o << ",";
166        o << "\n";
167      }
168
169      i -= 2;
170      o.indent(i) << "}";
171    }
172  };
173
174  class FlagsConstantEmitter : public ConstantEmitter {
175  private:
176    std::vector<std::string> Flags;
177  public:
178    FlagsConstantEmitter() {
179    }
180    FlagsConstantEmitter &addEntry(const char *f) {
181      Flags.push_back(std::string(f));
182      return *this;
183    }
184    void emit(raw_ostream &o, unsigned int &i) {
185      unsigned int index;
186      unsigned int numFlags = Flags.size();
187      if (numFlags == 0)
188        o << "0";
189
190      for (index = 0; index < numFlags; ++index) {
191        o << Flags[index].c_str();
192        if (index < (numFlags - 1))
193          o << " | ";
194      }
195    }
196  };
197}
198
199EDEmitter::EDEmitter(RecordKeeper &R) : Records(R) {
200}
201
202/// populateOperandOrder - Accepts a CodeGenInstruction and generates its
203///   AsmWriterInst for the desired assembly syntax, giving an ordered list of
204///   operands in the order they appear in the printed instruction.  Then, for
205///   each entry in that list, determines the index of the same operand in the
206///   CodeGenInstruction, and emits the resulting mapping into an array, filling
207///   in unused slots with -1.
208///
209/// @arg operandOrder - The array that will be populated with the operand
210///                     mapping.  Each entry will contain -1 (invalid index
211///                     into the operands present in the AsmString) or a number
212///                     representing an index in the operand descriptor array.
213/// @arg inst         - The instruction to use when looking up the operands
214/// @arg syntax       - The syntax to use, according to LLVM's enumeration
215void populateOperandOrder(CompoundConstantEmitter *operandOrder,
216                          const CodeGenInstruction &inst,
217                          unsigned syntax) {
218  unsigned int numArgs = 0;
219
220  AsmWriterInst awInst(inst, syntax, -1, -1);
221
222  std::vector<AsmWriterOperand>::iterator operandIterator;
223
224  for (operandIterator = awInst.Operands.begin();
225       operandIterator != awInst.Operands.end();
226       ++operandIterator) {
227    if (operandIterator->OperandType ==
228        AsmWriterOperand::isMachineInstrOperand) {
229      char buf[2];
230      snprintf(buf, sizeof(buf), "%u", operandIterator->CGIOpNo);
231      operandOrder->addEntry(new LiteralConstantEmitter(buf));
232      numArgs++;
233    }
234  }
235
236  for(; numArgs < MAX_OPERANDS; numArgs++) {
237    operandOrder->addEntry(new LiteralConstantEmitter("-1"));
238  }
239}
240
241/////////////////////////////////////////////////////
242// Support functions for handling X86 instructions //
243/////////////////////////////////////////////////////
244
245#define ADDFLAG(flag) flags->addEntry(flag)
246
247#define REG(str) if (name == str) { ADDFLAG("kOperandFlagRegister"); return 0; }
248#define MEM(str) if (name == str) { ADDFLAG("kOperandFlagMemory"); return 0; }
249#define LEA(str) if (name == str) { ADDFLAG("kOperandFlagEffectiveAddress"); \
250                                    return 0; }
251#define IMM(str) if (name == str) { ADDFLAG("kOperandFlagImmediate"); \
252                                    return 0; }
253#define PCR(str) if (name == str) { ADDFLAG("kOperandFlagMemory"); \
254                                    ADDFLAG("kOperandFlagPCRelative"); \
255                                    return 0; }
256
257/// X86FlagFromOpName - Processes the name of a single X86 operand (which is
258///   actually its type) and translates it into an operand flag
259///
260/// @arg flags    - The flags object to add the flag to
261/// @arg name     - The name of the operand
262static int X86FlagFromOpName(FlagsConstantEmitter *flags,
263                             const std::string &name) {
264  REG("GR8");
265  REG("GR8_NOREX");
266  REG("GR16");
267  REG("GR32");
268  REG("GR32_NOREX");
269  REG("GR32_TC");
270  REG("FR32");
271  REG("RFP32");
272  REG("GR64");
273  REG("GR64_TC");
274  REG("FR64");
275  REG("VR64");
276  REG("RFP64");
277  REG("RFP80");
278  REG("VR128");
279  REG("RST");
280  REG("SEGMENT_REG");
281  REG("DEBUG_REG");
282  REG("CONTROL_REG_32");
283  REG("CONTROL_REG_64");
284
285  MEM("i8mem");
286  MEM("i8mem_NOREX");
287  MEM("i16mem");
288  MEM("i32mem");
289  MEM("i32mem_TC");
290  MEM("f32mem");
291  MEM("ssmem");
292  MEM("opaque32mem");
293  MEM("opaque48mem");
294  MEM("i64mem");
295  MEM("i64mem_TC");
296  MEM("f64mem");
297  MEM("sdmem");
298  MEM("f80mem");
299  MEM("opaque80mem");
300  MEM("i128mem");
301  MEM("f128mem");
302  MEM("opaque512mem");
303
304  LEA("lea32mem");
305  LEA("lea64_32mem");
306  LEA("lea64mem");
307
308  IMM("i8imm");
309  IMM("i16imm");
310  IMM("i16i8imm");
311  IMM("i32imm");
312  IMM("i32imm_pcrel");
313  IMM("i32i8imm");
314  IMM("i64imm");
315  IMM("i64i8imm");
316  IMM("i64i32imm");
317  IMM("i64i32imm_pcrel");
318  IMM("SSECC");
319
320  PCR("brtarget8");
321  PCR("offset8");
322  PCR("offset16");
323  PCR("offset32");
324  PCR("offset64");
325  PCR("brtarget");
326
327  return 1;
328}
329
330#undef REG
331#undef MEM
332#undef LEA
333#undef IMM
334#undef PCR
335#undef ADDFLAG
336
337/// X86PopulateOperands - Handles all the operands in an X86 instruction, adding
338///   the appropriate flags to their descriptors
339///
340/// @operandFlags - A reference the array of operand flag objects
341/// @inst         - The instruction to use as a source of information
342static void X86PopulateOperands(
343  FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS],
344  const CodeGenInstruction &inst) {
345  if (!inst.TheDef->isSubClassOf("X86Inst"))
346    return;
347
348  unsigned int index;
349  unsigned int numOperands = inst.OperandList.size();
350
351  for (index = 0; index < numOperands; ++index) {
352    const CodeGenInstruction::OperandInfo &operandInfo =
353      inst.OperandList[index];
354    Record &rec = *operandInfo.Rec;
355
356    if (X86FlagFromOpName(operandFlags[index], rec.getName())) {
357      errs() << "Operand type: " << rec.getName().c_str() << "\n";
358      errs() << "Operand name: " << operandInfo.Name.c_str() << "\n";
359      errs() << "Instruction mame: " << inst.TheDef->getName().c_str() << "\n";
360      llvm_unreachable("Unhandled type");
361    }
362  }
363}
364
365/// decorate1 - Decorates a named operand with a new flag
366///
367/// @operandFlags - The array of operand flag objects, which don't have names
368/// @inst         - The CodeGenInstruction, which provides a way to translate
369///                 between names and operand indices
370/// @opName       - The name of the operand
371/// @flag         - The name of the flag to add
372static inline void decorate1(FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS],
373                             const CodeGenInstruction &inst,
374                             const char *opName,
375                             const char *opFlag) {
376  unsigned opIndex;
377
378  opIndex = inst.getOperandNamed(std::string(opName));
379
380  operandFlags[opIndex]->addEntry(opFlag);
381}
382
383#define DECORATE1(opName, opFlag) decorate1(operandFlags, inst, opName, opFlag)
384
385#define MOV(source, target) {                       \
386  instFlags.addEntry("kInstructionFlagMove");       \
387  DECORATE1(source, "kOperandFlagSource");          \
388  DECORATE1(target, "kOperandFlagTarget");          \
389}
390
391#define BRANCH(target) {                            \
392  instFlags.addEntry("kInstructionFlagBranch");     \
393  DECORATE1(target, "kOperandFlagTarget");          \
394}
395
396#define PUSH(source) {                              \
397  instFlags.addEntry("kInstructionFlagPush");       \
398  DECORATE1(source, "kOperandFlagSource");          \
399}
400
401#define POP(target) {                               \
402  instFlags.addEntry("kInstructionFlagPop");        \
403  DECORATE1(target, "kOperandFlagTarget");          \
404}
405
406#define CALL(target) {                              \
407  instFlags.addEntry("kInstructionFlagCall");       \
408  DECORATE1(target, "kOperandFlagTarget");          \
409}
410
411#define RETURN() {                                  \
412  instFlags.addEntry("kInstructionFlagReturn");     \
413}
414
415/// X86ExtractSemantics - Performs various checks on the name of an X86
416///   instruction to determine what sort of an instruction it is and then adds
417///   the appropriate flags to the instruction and its operands
418///
419/// @arg instFlags    - A reference to the flags for the instruction as a whole
420/// @arg operandFlags - A reference to the array of operand flag object pointers
421/// @arg inst         - A reference to the original instruction
422static void X86ExtractSemantics(FlagsConstantEmitter &instFlags,
423                                FlagsConstantEmitter *(&operandFlags)[MAX_OPERANDS],
424                                const CodeGenInstruction &inst) {
425  const std::string &name = inst.TheDef->getName();
426
427  if (name.find("MOV") != name.npos) {
428    if (name.find("MOV_V") != name.npos) {
429      // ignore (this is a pseudoinstruction)
430    }
431    else if (name.find("MASK") != name.npos) {
432      // ignore (this is a masking move)
433    }
434    else if (name.find("r0") != name.npos) {
435      // ignore (this is a pseudoinstruction)
436    }
437    else if (name.find("PS") != name.npos ||
438             name.find("PD") != name.npos) {
439      // ignore (this is a shuffling move)
440    }
441    else if (name.find("MOVS") != name.npos) {
442      // ignore (this is a string move)
443    }
444    else if (name.find("_F") != name.npos) {
445      // TODO handle _F moves to ST(0)
446    }
447    else if (name.find("a") != name.npos) {
448      // TODO handle moves to/from %ax
449    }
450    else if (name.find("CMOV") != name.npos) {
451      MOV("src2", "dst");
452    }
453    else if (name.find("PC") != name.npos) {
454      MOV("label", "reg")
455    }
456    else {
457      MOV("src", "dst");
458    }
459  }
460
461  if (name.find("JMP") != name.npos ||
462      name.find("J") == 0) {
463    if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
464      BRANCH("off");
465    }
466    else {
467      BRANCH("dst");
468    }
469  }
470
471  if (name.find("PUSH") != name.npos) {
472    if (name.find("FS") != name.npos ||
473        name.find("GS") != name.npos) {
474      instFlags.addEntry("kInstructionFlagPush");
475      // TODO add support for fixed operands
476    }
477    else if (name.find("F") != name.npos) {
478      // ignore (this pushes onto the FP stack)
479    }
480    else if (name[name.length() - 1] == 'm') {
481      PUSH("src");
482    }
483    else if (name.find("i") != name.npos) {
484      PUSH("imm");
485    }
486    else {
487      PUSH("reg");
488    }
489  }
490
491  if (name.find("POP") != name.npos) {
492    if (name.find("POPCNT") != name.npos) {
493      // ignore (not a real pop)
494    }
495    else if (name.find("FS") != name.npos ||
496             name.find("GS") != name.npos) {
497      instFlags.addEntry("kInstructionFlagPop");
498      // TODO add support for fixed operands
499    }
500    else if (name.find("F") != name.npos) {
501      // ignore (this pops from the FP stack)
502    }
503    else if (name[name.length() - 1] == 'm') {
504      POP("dst");
505    }
506    else {
507      POP("reg");
508    }
509  }
510
511  if (name.find("CALL") != name.npos) {
512    if (name.find("ADJ") != name.npos) {
513      // ignore (not a call)
514    }
515    else if (name.find("SYSCALL") != name.npos) {
516      // ignore (doesn't go anywhere we know about)
517    }
518    else if (name.find("VMCALL") != name.npos) {
519      // ignore (rather different semantics than a regular call)
520    }
521    else if (name.find("FAR") != name.npos && name.find("i") != name.npos) {
522      CALL("off");
523    }
524    else {
525      CALL("dst");
526    }
527  }
528
529  if (name.find("RET") != name.npos) {
530    RETURN();
531  }
532}
533
534#undef MOV
535#undef BRANCH
536#undef PUSH
537#undef POP
538#undef CALL
539#undef RETURN
540
541#undef COND_DECORATE_2
542#undef COND_DECORATE_1
543#undef DECORATE1
544
545/// populateInstInfo - Fills an array of InstInfos with information about each
546///   instruction in a target
547///
548/// @arg infoArray  - The array of InstInfo objects to populate
549/// @arg target     - The CodeGenTarget to use as a source of instructions
550static void populateInstInfo(CompoundConstantEmitter &infoArray,
551                             CodeGenTarget &target) {
552  std::vector<const CodeGenInstruction*> numberedInstructions;
553  target.getInstructionsByEnumValue(numberedInstructions);
554
555  unsigned int index;
556  unsigned int numInstructions = numberedInstructions.size();
557
558  for (index = 0; index < numInstructions; ++index) {
559    const CodeGenInstruction& inst = *numberedInstructions[index];
560
561    CompoundConstantEmitter *infoStruct = new CompoundConstantEmitter;
562    infoArray.addEntry(infoStruct);
563
564    FlagsConstantEmitter *instFlags = new FlagsConstantEmitter;
565    infoStruct->addEntry(instFlags);
566
567    LiteralConstantEmitter *numOperandsEmitter =
568      new LiteralConstantEmitter(inst.OperandList.size());
569    infoStruct->addEntry(numOperandsEmitter);
570
571    CompoundConstantEmitter *operandFlagArray = new CompoundConstantEmitter;
572    infoStruct->addEntry(operandFlagArray);
573
574    FlagsConstantEmitter *operandFlags[MAX_OPERANDS];
575
576    for (unsigned operandIndex = 0; operandIndex < MAX_OPERANDS; ++operandIndex) {
577      operandFlags[operandIndex] = new FlagsConstantEmitter;
578      operandFlagArray->addEntry(operandFlags[operandIndex]);
579    }
580
581    unsigned numSyntaxes = 0;
582
583    if (target.getName() == "X86") {
584      X86PopulateOperands(operandFlags, inst);
585      X86ExtractSemantics(*instFlags, operandFlags, inst);
586      numSyntaxes = 2;
587    }
588
589    CompoundConstantEmitter *operandOrderArray = new CompoundConstantEmitter;
590    infoStruct->addEntry(operandOrderArray);
591
592    for (unsigned syntaxIndex = 0; syntaxIndex < MAX_SYNTAXES; ++syntaxIndex) {
593      CompoundConstantEmitter *operandOrder = new CompoundConstantEmitter;
594      operandOrderArray->addEntry(operandOrder);
595
596      if (syntaxIndex < numSyntaxes) {
597        populateOperandOrder(operandOrder, inst, syntaxIndex);
598      }
599      else {
600        for (unsigned operandIndex = 0;
601             operandIndex < MAX_OPERANDS;
602             ++operandIndex) {
603          operandOrder->addEntry(new LiteralConstantEmitter("-1"));
604        }
605      }
606    }
607  }
608}
609
610void EDEmitter::run(raw_ostream &o) {
611  unsigned int i = 0;
612
613  CompoundConstantEmitter infoArray;
614  CodeGenTarget target;
615
616  populateInstInfo(infoArray, target);
617
618  o << "InstInfo instInfo" << target.getName().c_str() << "[] = ";
619  infoArray.emit(o, i);
620  o << ";" << "\n";
621}
622
623void EDEmitter::runHeader(raw_ostream &o) {
624  EmitSourceFileHeader("Enhanced Disassembly Info Header", o);
625
626  o << "#ifndef EDInfo_" << "\n";
627  o << "#define EDInfo_" << "\n";
628  o << "\n";
629  o << "#include <inttypes.h>" << "\n";
630  o << "\n";
631  o << "#define MAX_OPERANDS " << format("%d", MAX_OPERANDS) << "\n";
632  o << "#define MAX_SYNTAXES " << format("%d", MAX_SYNTAXES) << "\n";
633  o << "\n";
634
635  unsigned int i = 0;
636
637  EnumEmitter operandFlags("OperandFlags");
638  operandFlags.addEntry("kOperandFlagImmediate");
639  operandFlags.addEntry("kOperandFlagRegister");
640  operandFlags.addEntry("kOperandFlagMemory");
641  operandFlags.addEntry("kOperandFlagEffectiveAddress");
642  operandFlags.addEntry("kOperandFlagPCRelative");
643  operandFlags.addEntry("kOperandFlagSource");
644  operandFlags.addEntry("kOperandFlagTarget");
645  operandFlags.emitAsFlags(o, i);
646
647  o << "\n";
648
649  EnumEmitter instructionFlags("InstructionFlags");
650  instructionFlags.addEntry("kInstructionFlagMove");
651  instructionFlags.addEntry("kInstructionFlagBranch");
652  instructionFlags.addEntry("kInstructionFlagPush");
653  instructionFlags.addEntry("kInstructionFlagPop");
654  instructionFlags.addEntry("kInstructionFlagCall");
655  instructionFlags.addEntry("kInstructionFlagReturn");
656  instructionFlags.emitAsFlags(o, i);
657
658  o << "\n";
659
660  StructEmitter instInfo("InstInfo");
661  instInfo.addMember("uint32_t", "instructionFlags");
662  instInfo.addMember("uint8_t", "numOperands");
663  instInfo.addMember("uint8_t", "operandFlags[MAX_OPERANDS]");
664  instInfo.addMember("const char", "operandOrders[MAX_SYNTAXES][MAX_OPERANDS]");
665  instInfo.emit(o, i);
666
667  o << "\n";
668  o << "#endif" << "\n";
669}
670