DisassemblerEmitter.cpp revision a5d585685493d85d5cb72b831a68ec747ae55a86
1b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul//===- DisassemblerEmitter.cpp - Generate a disassembler ------------------===//
2e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell//
3e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell//                     The LLVM Compiler Infrastructure
4e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell//
5b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul// This file is distributed under the University of Illinois Open Source
622144ab7552f0799bcfca506bf4ffa7f70a06649Gareth Hughes// License. See LICENSE.TXT for details.
7b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul//
822144ab7552f0799bcfca506bf4ffa7f70a06649Gareth Hughes//===----------------------------------------------------------------------===//
9e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell
10e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell#include "DisassemblerEmitter.h"
11e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell#include "CodeGenTarget.h"
12e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell#include "Error.h"
13e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell#include "Record.h"
14e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell#include "X86DisassemblerTables.h"
1522144ab7552f0799bcfca506bf4ffa7f70a06649Gareth Hughes#include "X86RecognizableInstr.h"
16e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell#include "ARMDecoderEmitter.h"
17e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell#include "FixedLenDecoderEmitter.h"
1822144ab7552f0799bcfca506bf4ffa7f70a06649Gareth Hughes
19e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwellusing namespace llvm;
20e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwellusing namespace llvm::X86Disassembler;
21e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell
22e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell/// DisassemblerEmitter - Contains disassembler table emitters for various
23e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell/// architectures.
24e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell
25e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell/// X86 Disassembler Emitter
26e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///
27e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell/// *** IF YOU'RE HERE TO RESOLVE A "Primary decode conflict", LOOK DOWN NEAR
28e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///     THE END OF THIS COMMENT!
2946b0988c673b28e072fd0cbf477632a9ab6f9f18Keith Whitwell///
30e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell/// The X86 disassembler emitter is part of the X86 Disassembler, which is
31e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell/// documented in lib/Target/X86/X86Disassembler.h.
320070d398d13759adc519f9bc764ffd39bc88890eBrian Paul///
33cd03ed4f54444d96e4e47cdb118a3dfd94d92bb0Keith Whitwell/// The emitter produces the tables that the disassembler uses to translate
34e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell/// instructions.  The emitter generates the following tables:
35cd03ed4f54444d96e4e47cdb118a3dfd94d92bb0Keith Whitwell///
36b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// - One table (CONTEXTS_SYM) that contains a mapping of attribute masks to
37b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   instruction contexts.  Although for each attribute there are cases where
38e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///   that attribute determines decoding, in the majority of cases decoding is
39e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///   the same whether or not an attribute is present.  For example, a 64-bit
40b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   instruction with an OPSIZE prefix and an XS prefix decodes the same way in
41b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   all cases as a 64-bit instruction with only OPSIZE set.  (The XS prefix
42b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   may have effects on its execution, but does not change the instruction
43b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   returned.)  This allows considerable space savings in other tables.
44b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// - Six tables (ONEBYTE_SYM, TWOBYTE_SYM, THREEBYTE38_SYM, THREEBYTE3A_SYM,
45b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   THREEBYTEA6_SYM, and THREEBYTEA7_SYM contain the hierarchy that the
46b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   decoder traverses while decoding an instruction.  At the lowest level of
47b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   this hierarchy are instruction UIDs, 16-bit integers that can be used to
48e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///   uniquely identify the instruction and correspond exactly to its position
49b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   in the list of CodeGenInstructions for the target.
50b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// - One table (INSTRUCTIONS_SYM) contains information about the operands of
51b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   each instruction and how to decode them.
52b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///
53b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// During table generation, there may be conflicts between instructions that
54b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// occupy the same space in the decode tables.  These conflicts are resolved as
55b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// follows in setTableFields() (X86DisassemblerTables.cpp)
56b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///
57b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// - If the current context is the native context for one of the instructions
58b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   (that is, the attributes specified for it in the LLVM tables specify
59b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   precisely the current context), then it has priority.
60e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell/// - If the current context isn't native for either of the instructions, then
61e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///   the higher-priority context wins (that is, the one that is more specific).
62e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///   That hierarchy is determined by outranks() (X86DisassemblerTables.cpp)
63b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// - If the current context is native for both instructions, then the table
64e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///   emitter reports a conflict and dies.
65b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///
66b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// *** RESOLUTION FOR "Primary decode conflict"S
67e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///
68b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// If two instructions collide, typically the solution is (in order of
69cd03ed4f54444d96e4e47cdb118a3dfd94d92bb0Keith Whitwell/// likelihood):
70b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///
71e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell/// (1) to filter out one of the instructions by editing filter()
72b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///     (X86RecognizableInstr.cpp).  This is the most common resolution, but
73e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell///     check the Intel manuals first to make sure that (2) and (3) are not the
74b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///     problem.
75b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// (2) to fix the tables (X86.td and its subsidiaries) so the opcodes are
76b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///     accurate.  Sometimes they are not.
77b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// (3) to fix the tables to reflect the actual context (for example, required
78b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///     prefixes), and possibly to add a new context by editing
79b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///     lib/Target/X86/X86DisassemblerDecoderCommon.h.  This is unlikely to be
80b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///     the cause.
81b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///
82b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// DisassemblerEmitter.cpp contains the implementation for the emitter,
83b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   which simply pulls out instructions from the CodeGenTarget and pushes them
84b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   into X86DisassemblerTables.
85b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// X86DisassemblerTables.h contains the interface for the instruction tables,
86b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   which manage and emit the structures discussed above.
87b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// X86DisassemblerTables.cpp contains the implementation for the instruction
88b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   tables.
89b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// X86ModRMFilters.h contains filters that can be used to determine which
90b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   ModR/M values are valid for a particular instruction.  These are used to
91b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   populate ModRMDecisions.
92b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// X86RecognizableInstr.h contains the interface for a single instruction,
93b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   which knows how to translate itself from a CodeGenInstruction and provide
94b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   the information necessary for integration into the tables.
95b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul/// X86RecognizableInstr.cpp contains the implementation for a single
96b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul///   instruction.
97b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul
98b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paulvoid DisassemblerEmitter::run(raw_ostream &OS) {
99b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul  CodeGenTarget Target(Records);
100b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul
101b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul  OS << "/*===- TableGen'erated file "
102b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul     << "---------------------------------------*- C -*-===*\n"
103b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul     << " *\n"
104b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul     << " * " << Target.getName() << " Disassembler\n"
105b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul     << " *\n"
106b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul     << " * Automatically generated file, do not edit!\n"
107b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul     << " *\n"
108b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul     << " *===---------------------------------------------------------------"
109b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul     << "-------===*/\n";
110b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul
111b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul  // X86 uses a custom disassembler.
112b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul  if (Target.getName() == "X86") {
113b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul    DisassemblerTables Tables;
114b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul
115b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul    const std::vector<const CodeGenInstruction*> &numberedInstructions =
116b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul      Target.getInstructionsByEnumValue();
117e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell
118e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell    for (unsigned i = 0, e = numberedInstructions.size(); i != e; ++i)
119e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell      RecognizableInstr::processInstr(Tables, *numberedInstructions[i], i);
120e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell
121b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul    // FIXME: As long as we are using exceptions, might as well drop this to the
122b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul    // actual conflict site.
123b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul    if (Tables.hasConflicts())
124b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul      throw TGError(Target.getTargetRecord()->getLoc(),
125b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul                    "Primary decode conflict");
126b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul
127b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul    Tables.emit(OS);
128b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul    return;
129b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul  }
130e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell
131b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul  // ARM and Thumb have a CHECK() macro to deal with DecodeStatuses.
132b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul  if (Target.getName() == "ARM" ||
133b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul      Target.getName() == "Thumb") {
134b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul    FixedLenDecoderEmitter(Records,
135b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul                           "ARM",
136b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul                           "if (!Check(S, ", ")) return MCDisassembler::Fail;",
137b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul                           "S", "MCDisassembler::Fail",
138b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul                           "  MCDisassembler::DecodeStatus S = MCDisassembler::Success;\n(void)S;").run(OS);
139b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul    return;
140b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul  }
141b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul
142e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell  FixedLenDecoderEmitter(Records, Target.getName()).run(OS);
143e3a051e0538a605551f4d58294c94f5eb00ed07fKeith Whitwell}
144b37a084357dd08573b86d6d8c5ba43d65bdc1bd7Brian Paul