1/*===-- llvm-c/EnhancedDisassembly.h - Disassembler C Interface ---*- C -*-===*\
2|*                                                                            *|
3|*                     The LLVM Compiler Infrastructure                       *|
4|*                                                                            *|
5|* This file is distributed under the University of Illinois Open Source      *|
6|* License. See LICENSE.TXT for details.                                      *|
7|*                                                                            *|
8|*===----------------------------------------------------------------------===*|
9|*                                                                            *|
10|* This header declares the C interface to EnhancedDisassembly.so, which      *|
11|* implements a disassembler with the ability to extract operand values and   *|
12|* individual tokens from assembly instructions.                              *|
13|*                                                                            *|
14|* The header declares additional interfaces if the host compiler supports    *|
15|* the blocks API.                                                            *|
16|*                                                                            *|
17\*===----------------------------------------------------------------------===*/
18
19#ifndef LLVM_C_ENHANCEDDISASSEMBLY_H
20#define LLVM_C_ENHANCEDDISASSEMBLY_H
21
22#include "llvm/Support/DataTypes.h"
23
24#ifdef __cplusplus
25extern "C" {
26#endif
27
28/*!
29 @typedef EDByteReaderCallback
30 Interface to memory from which instructions may be read.
31 @param byte A pointer whose target should be filled in with the data returned.
32 @param address The address of the byte to be read.
33 @param arg An anonymous argument for client use.
34 @result 0 on success; -1 otherwise.
35 */
36typedef int (*EDByteReaderCallback)(uint8_t *byte, uint64_t address, void *arg);
37
38/*!
39 @typedef EDRegisterReaderCallback
40 Interface to registers from which registers may be read.
41 @param value A pointer whose target should be filled in with the value of the
42   register.
43 @param regID The LLVM register identifier for the register to read.
44 @param arg An anonymous argument for client use.
45 @result 0 if the register could be read; -1 otherwise.
46 */
47typedef int (*EDRegisterReaderCallback)(uint64_t *value, unsigned regID,
48                                        void* arg);
49
50/*!
51 @typedef EDAssemblySyntax_t
52 An assembly syntax for use in tokenizing instructions.
53 */
54enum {
55/*! @constant kEDAssemblySyntaxX86Intel Intel syntax for i386 and x86_64. */
56  kEDAssemblySyntaxX86Intel  = 0,
57/*! @constant kEDAssemblySyntaxX86ATT AT&T syntax for i386 and x86_64. */
58  kEDAssemblySyntaxX86ATT    = 1,
59  kEDAssemblySyntaxARMUAL    = 2
60};
61typedef unsigned EDAssemblySyntax_t;
62
63/*!
64 @typedef EDDisassemblerRef
65 Encapsulates a disassembler for a single CPU architecture.
66 */
67typedef void *EDDisassemblerRef;
68
69/*!
70 @typedef EDInstRef
71 Encapsulates a single disassembled instruction in one assembly syntax.
72 */
73typedef void *EDInstRef;
74
75/*!
76 @typedef EDTokenRef
77 Encapsulates a token from the disassembly of an instruction.
78 */
79typedef void *EDTokenRef;
80
81/*!
82 @typedef EDOperandRef
83 Encapsulates an operand of an instruction.
84 */
85typedef void *EDOperandRef;
86
87/*!
88 @functiongroup Getting a disassembler
89 */
90
91/*!
92 @function EDGetDisassembler
93 Gets the disassembler for a given target.
94 @param disassembler A pointer whose target will be filled in with the
95   disassembler.
96 @param triple Identifies the target.  Example: "x86_64-apple-darwin10"
97 @param syntax The assembly syntax to use when decoding instructions.
98 @result 0 on success; -1 otherwise.
99 */
100int EDGetDisassembler(EDDisassemblerRef *disassembler,
101                      const char *triple,
102                      EDAssemblySyntax_t syntax);
103
104/*!
105 @functiongroup Generic architectural queries
106 */
107
108/*!
109 @function EDGetRegisterName
110 Gets the human-readable name for a given register.
111 @param regName A pointer whose target will be pointed at the name of the
112   register.  The name does not need to be deallocated and will be
113 @param disassembler The disassembler to query for the name.
114 @param regID The register identifier, as returned by EDRegisterTokenValue.
115 @result 0 on success; -1 otherwise.
116 */
117int EDGetRegisterName(const char** regName,
118                      EDDisassemblerRef disassembler,
119                      unsigned regID);
120
121/*!
122 @function EDRegisterIsStackPointer
123 Determines if a register is one of the platform's stack-pointer registers.
124 @param disassembler The disassembler to query.
125 @param regID The register identifier, as returned by EDRegisterTokenValue.
126 @result 1 if true; 0 otherwise.
127 */
128int EDRegisterIsStackPointer(EDDisassemblerRef disassembler,
129                             unsigned regID);
130
131/*!
132 @function EDRegisterIsProgramCounter
133 Determines if a register is one of the platform's stack-pointer registers.
134 @param disassembler The disassembler to query.
135 @param regID The register identifier, as returned by EDRegisterTokenValue.
136 @result 1 if true; 0 otherwise.
137 */
138int EDRegisterIsProgramCounter(EDDisassemblerRef disassembler,
139                               unsigned regID);
140
141/*!
142 @functiongroup Creating and querying instructions
143 */
144
145/*!
146 @function EDCreateInst
147 Gets a set of contiguous instructions from a disassembler.
148 @param insts A pointer to an array that will be filled in with the
149   instructions.  Must have at least count entries.  Entries not filled in will
150   be set to NULL.
151 @param count The maximum number of instructions to fill in.
152 @param disassembler The disassembler to use when decoding the instructions.
153 @param byteReader The function to use when reading the instruction's machine
154   code.
155 @param address The address of the first byte of the instruction.
156 @param arg An anonymous argument to be passed to byteReader.
157 @result The number of instructions read on success; 0 otherwise.
158 */
159unsigned int EDCreateInsts(EDInstRef *insts,
160                           unsigned int count,
161                           EDDisassemblerRef disassembler,
162                           EDByteReaderCallback byteReader,
163                           uint64_t address,
164                           void *arg);
165
166/*!
167 @function EDReleaseInst
168 Frees the memory for an instruction.  The instruction can no longer be accessed
169 after this call.
170 @param inst The instruction to be freed.
171 */
172void EDReleaseInst(EDInstRef inst);
173
174/*!
175 @function EDInstByteSize
176 @param inst The instruction to be queried.
177 @result The number of bytes in the instruction's machine-code representation.
178 */
179int EDInstByteSize(EDInstRef inst);
180
181/*!
182 @function EDGetInstString
183 Gets the disassembled text equivalent of the instruction.
184 @param buf A pointer whose target will be filled in with a pointer to the
185   string.  (The string becomes invalid when the instruction is released.)
186 @param inst The instruction to be queried.
187 @result 0 on success; -1 otherwise.
188 */
189int EDGetInstString(const char **buf,
190                    EDInstRef inst);
191
192/*!
193 @function EDInstID
194 @param instID A pointer whose target will be filled in with the LLVM identifier
195   for the instruction.
196 @param inst The instruction to be queried.
197 @result 0 on success; -1 otherwise.
198 */
199int EDInstID(unsigned *instID, EDInstRef inst);
200
201/*!
202 @function EDInstIsBranch
203 @param inst The instruction to be queried.
204 @result 1 if the instruction is a branch instruction; 0 if it is some other
205   type of instruction; -1 if there was an error.
206 */
207int EDInstIsBranch(EDInstRef inst);
208
209/*!
210 @function EDInstIsMove
211 @param inst The instruction to be queried.
212 @result 1 if the instruction is a move instruction; 0 if it is some other
213   type of instruction; -1 if there was an error.
214 */
215int EDInstIsMove(EDInstRef inst);
216
217/*!
218 @function EDBranchTargetID
219 @param inst The instruction to be queried.
220 @result The ID of the branch target operand, suitable for use with
221   EDCopyOperand.  -1 if no such operand exists.
222 */
223int EDBranchTargetID(EDInstRef inst);
224
225/*!
226 @function EDMoveSourceID
227 @param inst The instruction to be queried.
228 @result The ID of the move source operand, suitable for use with
229   EDCopyOperand.  -1 if no such operand exists.
230 */
231int EDMoveSourceID(EDInstRef inst);
232
233/*!
234 @function EDMoveTargetID
235 @param inst The instruction to be queried.
236 @result The ID of the move source operand, suitable for use with
237   EDCopyOperand.  -1 if no such operand exists.
238 */
239int EDMoveTargetID(EDInstRef inst);
240
241/*!
242 @functiongroup Creating and querying tokens
243 */
244
245/*!
246 @function EDNumTokens
247 @param inst The instruction to be queried.
248 @result The number of tokens in the instruction, or -1 on error.
249 */
250int EDNumTokens(EDInstRef inst);
251
252/*!
253 @function EDGetToken
254 Retrieves a token from an instruction.  The token is valid until the
255 instruction is released.
256 @param token A pointer to be filled in with the token.
257 @param inst The instruction to be queried.
258 @param index The index of the token in the instruction.
259 @result 0 on success; -1 otherwise.
260 */
261int EDGetToken(EDTokenRef *token,
262               EDInstRef inst,
263               int index);
264
265/*!
266 @function EDGetTokenString
267 Gets the disassembled text for a token.
268 @param buf A pointer whose target will be filled in with a pointer to the
269   string.  (The string becomes invalid when the token is released.)
270 @param token The token to be queried.
271 @result 0 on success; -1 otherwise.
272 */
273int EDGetTokenString(const char **buf,
274                     EDTokenRef token);
275
276/*!
277 @function EDOperandIndexForToken
278 Returns the index of the operand to which a token belongs.
279 @param token The token to be queried.
280 @result The operand index on success; -1 otherwise
281 */
282int EDOperandIndexForToken(EDTokenRef token);
283
284/*!
285 @function EDTokenIsWhitespace
286 @param token The token to be queried.
287 @result 1 if the token is whitespace; 0 if not; -1 on error.
288 */
289int EDTokenIsWhitespace(EDTokenRef token);
290
291/*!
292 @function EDTokenIsPunctuation
293 @param token The token to be queried.
294 @result 1 if the token is punctuation; 0 if not; -1 on error.
295 */
296int EDTokenIsPunctuation(EDTokenRef token);
297
298/*!
299 @function EDTokenIsOpcode
300 @param token The token to be queried.
301 @result 1 if the token is opcode; 0 if not; -1 on error.
302 */
303int EDTokenIsOpcode(EDTokenRef token);
304
305/*!
306 @function EDTokenIsLiteral
307 @param token The token to be queried.
308 @result 1 if the token is a numeric literal; 0 if not; -1 on error.
309 */
310int EDTokenIsLiteral(EDTokenRef token);
311
312/*!
313 @function EDTokenIsRegister
314 @param token The token to be queried.
315 @result 1 if the token identifies a register; 0 if not; -1 on error.
316 */
317int EDTokenIsRegister(EDTokenRef token);
318
319/*!
320 @function EDTokenIsNegativeLiteral
321 @param token The token to be queried.
322 @result 1 if the token is a negative signed literal; 0 if not; -1 on error.
323 */
324int EDTokenIsNegativeLiteral(EDTokenRef token);
325
326/*!
327 @function EDLiteralTokenAbsoluteValue
328 @param value A pointer whose target will be filled in with the absolute value
329   of the literal.
330 @param token The token to be queried.
331 @result 0 on success; -1 otherwise.
332 */
333int EDLiteralTokenAbsoluteValue(uint64_t *value,
334                                EDTokenRef token);
335
336/*!
337 @function EDRegisterTokenValue
338 @param registerID A pointer whose target will be filled in with the LLVM
339   register identifier for the token.
340 @param token The token to be queried.
341 @result 0 on success; -1 otherwise.
342 */
343int EDRegisterTokenValue(unsigned *registerID,
344                         EDTokenRef token);
345
346/*!
347 @functiongroup Creating and querying operands
348 */
349
350/*!
351 @function EDNumOperands
352 @param inst The instruction to be queried.
353 @result The number of operands in the instruction, or -1 on error.
354 */
355int EDNumOperands(EDInstRef inst);
356
357/*!
358 @function EDGetOperand
359 Retrieves an operand from an instruction.  The operand is valid until the
360 instruction is released.
361 @param operand A pointer to be filled in with the operand.
362 @param inst The instruction to be queried.
363 @param index The index of the operand in the instruction.
364 @result 0 on success; -1 otherwise.
365 */
366int EDGetOperand(EDOperandRef *operand,
367                 EDInstRef inst,
368                 int index);
369
370/*!
371 @function EDOperandIsRegister
372 @param operand The operand to be queried.
373 @result 1 if the operand names a register; 0 if not; -1 on error.
374 */
375int EDOperandIsRegister(EDOperandRef operand);
376
377/*!
378 @function EDOperandIsImmediate
379 @param operand The operand to be queried.
380 @result 1 if the operand specifies an immediate value; 0 if not; -1 on error.
381 */
382int EDOperandIsImmediate(EDOperandRef operand);
383
384/*!
385 @function EDOperandIsMemory
386 @param operand The operand to be queried.
387 @result 1 if the operand specifies a location in memory; 0 if not; -1 on error.
388 */
389int EDOperandIsMemory(EDOperandRef operand);
390
391/*!
392 @function EDRegisterOperandValue
393 @param value A pointer whose target will be filled in with the LLVM register ID
394   of the register named by the operand.
395 @param operand The operand to be queried.
396 @result 0 on success; -1 otherwise.
397 */
398int EDRegisterOperandValue(unsigned *value,
399                           EDOperandRef operand);
400
401/*!
402 @function EDImmediateOperandValue
403 @param value A pointer whose target will be filled in with the value of the
404   immediate.
405 @param operand The operand to be queried.
406 @result 0 on success; -1 otherwise.
407 */
408int EDImmediateOperandValue(uint64_t *value,
409                            EDOperandRef operand);
410
411/*!
412 @function EDEvaluateOperand
413 Evaluates an operand using a client-supplied register state accessor.  Register
414 operands are evaluated by reading the value of the register; immediate operands
415 are evaluated by reporting the immediate value; memory operands are evaluated
416 by computing the target address (with only those relocations applied that were
417 already applied to the original bytes).
418 @param result A pointer whose target is to be filled with the result of
419   evaluating the operand.
420 @param operand The operand to be evaluated.
421 @param regReader The function to use when reading registers from the register
422   state.
423 @param arg An anonymous argument for client use.
424 @result 0 if the operand could be evaluated; -1 otherwise.
425 */
426int EDEvaluateOperand(uint64_t *result,
427                      EDOperandRef operand,
428                      EDRegisterReaderCallback regReader,
429                      void *arg);
430
431#ifdef __BLOCKS__
432
433/*!
434 @typedef EDByteBlock_t
435 Block-based interface to memory from which instructions may be read.
436 @param byte A pointer whose target should be filled in with the data returned.
437 @param address The address of the byte to be read.
438 @result 0 on success; -1 otherwise.
439 */
440typedef int (^EDByteBlock_t)(uint8_t *byte, uint64_t address);
441
442/*!
443 @typedef EDRegisterBlock_t
444 Block-based interface to registers from which registers may be read.
445 @param value A pointer whose target should be filled in with the value of the
446   register.
447 @param regID The LLVM register identifier for the register to read.
448 @result 0 if the register could be read; -1 otherwise.
449 */
450typedef int (^EDRegisterBlock_t)(uint64_t *value, unsigned regID);
451
452/*!
453 @typedef EDTokenVisitor_t
454 Block-based handler for individual tokens.
455 @param token The current token being read.
456 @result 0 to continue; 1 to stop normally; -1 on error.
457 */
458typedef int (^EDTokenVisitor_t)(EDTokenRef token);
459
460/*! @functiongroup Block-based interfaces */
461
462/*!
463 @function EDBlockCreateInsts
464 Gets a set of contiguous instructions from a disassembler, using a block to
465 read memory.
466 @param insts A pointer to an array that will be filled in with the
467   instructions.  Must have at least count entries.  Entries not filled in will
468   be set to NULL.
469 @param count The maximum number of instructions to fill in.
470 @param disassembler The disassembler to use when decoding the instructions.
471 @param byteBlock The block to use when reading the instruction's machine
472   code.
473 @param address The address of the first byte of the instruction.
474 @result The number of instructions read on success; 0 otherwise.
475 */
476unsigned int EDBlockCreateInsts(EDInstRef *insts,
477                                int count,
478                                EDDisassemblerRef disassembler,
479                                EDByteBlock_t byteBlock,
480                                uint64_t address);
481
482/*!
483 @function EDBlockEvaluateOperand
484 Evaluates an operand using a block to read registers.
485 @param result A pointer whose target is to be filled with the result of
486   evaluating the operand.
487 @param operand The operand to be evaluated.
488 @param regBlock The block to use when reading registers from the register
489   state.
490 @result 0 if the operand could be evaluated; -1 otherwise.
491 */
492int EDBlockEvaluateOperand(uint64_t *result,
493                           EDOperandRef operand,
494                           EDRegisterBlock_t regBlock);
495
496/*!
497 @function EDBlockVisitTokens
498 Visits every token with a visitor.
499 @param inst The instruction with the tokens to be visited.
500 @param visitor The visitor.
501 @result 0 if the visit ended normally; -1 if the visitor encountered an error
502   or there was some other error.
503 */
504int EDBlockVisitTokens(EDInstRef inst,
505                       EDTokenVisitor_t visitor);
506
507#endif
508
509#ifdef __cplusplus
510}
511#endif
512
513#endif
514