ocaml.c revision 26ee41aa673b03494292229c6e4e331a668ce7b2
1/* Capstone Disassembler Engine */
2/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
3
4#include <stdio.h>		// debug
5#include <string.h>
6#include <caml/mlvalues.h>
7#include <caml/memory.h>
8#include <caml/alloc.h>
9#include <caml/fail.h>
10
11#include "../../include/capstone.h"
12
13#define ARR_SIZE(a) (sizeof(a)/sizeof(a[0]))
14
15// count the number of positive members in @oplist
16#define ARCH_LIST_COUNT(_arch, _optype) \
17static unsigned int _arch ## _list_count(_optype *list, unsigned int max) \
18{ \
19	unsigned int i; \
20	for(i = 0; i < max; i++) \
21		if (list[i].type == 0) \
22			return i; \
23	return max; \
24}
25
26ARCH_LIST_COUNT(arm, cs_arm_op)
27ARCH_LIST_COUNT(arm64, cs_arm64_op)
28ARCH_LIST_COUNT(mips, cs_mips_op)
29ARCH_LIST_COUNT(x86, cs_x86_op)
30
31// count the number of positive members in @list
32static unsigned int list_count(unsigned int *list, unsigned int max)
33{
34	unsigned int i;
35
36	for(i = 0; i < max; i++)
37		if (list[i] == 0)
38			return i;
39
40	return max;
41}
42
43static CAMLprim value _cs_disasm(cs_arch arch, csh handle, char *code, uint64_t code_len, uint64_t addr, uint64_t count)
44{
45	CAMLparam0();
46	CAMLlocal5(list, cons, rec_insn, array, tmp);
47	CAMLlocal4(arch_info, op_info_val, tmp2, tmp3);
48	cs_insn *insn;
49
50	list = Val_emptylist;
51
52	uint64_t c = cs_disasm_dyn(handle, code, code_len, addr, count, &insn);
53	if (c) {
54		//printf("Found %lu insn, addr: %lx\n", c, addr);
55		uint64_t j;
56		for (j = c; j > 0; j--) {
57			unsigned int lcount, i;
58
59			cons = caml_alloc(2, 0);
60
61			rec_insn = caml_alloc(9, 0);
62			Store_field(rec_insn, 0, Val_int(insn[j-1].id));
63			Store_field(rec_insn, 1, Val_int(insn[j-1].address));
64			Store_field(rec_insn, 2, Val_int(insn[j-1].size));
65			Store_field(rec_insn, 3, caml_copy_string(insn[j-1].mnemonic));
66			Store_field(rec_insn, 4, caml_copy_string(insn[j-1].op_str));
67
68			lcount = list_count(insn[j-1].regs_read, ARR_SIZE(insn[j-1].regs_read));
69			if (lcount) {
70				array = caml_alloc(lcount, 0);
71				for (i = 0; i < lcount; i++) {
72					Store_field(array, i, Val_int(insn[j-1].regs_read[i]));
73				}
74			} else	// empty list
75				array = Atom(0);
76			Store_field(rec_insn, 5, array);
77
78			lcount = list_count(insn[j-1].regs_write, ARR_SIZE(insn[j-1].regs_write));
79			if (lcount) {
80				array = caml_alloc(lcount, 0);
81				for (i = 0; i < lcount; i++) {
82					Store_field(array, i, Val_int(insn[j-1].regs_write[i]));
83				}
84			} else
85				array = Atom(0);	// empty list
86			Store_field(rec_insn, 6, array);
87
88			lcount = list_count(insn[j-1].groups, ARR_SIZE(insn[j-1].groups));
89			if (lcount) {
90				array = caml_alloc(lcount, 0);
91				for (i = 0; i < lcount; i++) {
92					Store_field(array, i, Val_int(insn[j-1].groups[i]));
93				}
94			} else
95				array = Atom(0);	// empty list
96			Store_field(rec_insn, 7, array);
97
98			switch(arch) {
99				default: break;
100				case CS_ARCH_ARM:
101					arch_info = caml_alloc(1, 0);
102
103					op_info_val = caml_alloc(5, 0);
104					Store_field(op_info_val, 0, Val_int(insn[j-1].arm.cc));
105					Store_field(op_info_val, 1, Val_bool(insn[j-1].arm.update_flags));
106					Store_field(op_info_val, 2, Val_bool(insn[j-1].arm.writeback));
107					Store_field(op_info_val, 3, Val_int(insn[j-1].arm.op_count));
108
109					lcount = arm_list_count(insn[j - 1].arm.operands, ARR_SIZE(insn[j - 1].arm.operands));
110					if (lcount > 0) {
111						array = caml_alloc(lcount, 0);
112						for (i = 0; i < lcount; i++) {
113							tmp2 = caml_alloc(2, 0);
114							switch(insn[j-1].arm.operands[i].type) {
115								case ARM_OP_REG:
116									tmp = caml_alloc(1, 1);
117									Store_field(tmp, 0, Val_int(insn[j-1].arm.operands[i].reg));
118									break;
119								case ARM_OP_CIMM:
120									tmp = caml_alloc(1, 2);
121									Store_field(tmp, 0, Val_int(insn[j-1].arm.operands[i].imm));
122									break;
123								case ARM_OP_PIMM:
124									tmp = caml_alloc(1, 3);
125									Store_field(tmp, 0, Val_int(insn[j-1].arm.operands[i].imm));
126									break;
127								case ARM_OP_IMM:
128									tmp = caml_alloc(1, 4);
129									Store_field(tmp, 0, Val_int(insn[j-1].arm.operands[i].imm));
130									break;
131								case ARM_OP_FP:
132									tmp = caml_alloc(1, 5);
133									Store_field(tmp, 0, caml_copy_double(insn[j-1].arm.operands[i].fp));
134									break;
135								case ARM_OP_MEM:
136									tmp = caml_alloc(1, 6);
137									tmp3 = caml_alloc(4, 0);
138									Store_field(tmp3, 0, Val_int(insn[j-1].arm.operands[i].mem.base));
139									Store_field(tmp3, 1, Val_int(insn[j-1].arm.operands[i].mem.index));
140									Store_field(tmp3, 2, Val_int(insn[j-1].arm.operands[i].mem.scale));
141									Store_field(tmp3, 3, Val_int(insn[j-1].arm.operands[i].mem.disp));
142									Store_field(tmp, 0, tmp3);
143									break;
144								default: break;
145							}
146							tmp3 = caml_alloc(2, 0);
147							Store_field(tmp3, 0, Val_int(insn[j-1].arm.operands[i].shift.type));
148							Store_field(tmp3, 1, Val_int(insn[j-1].arm.operands[i].shift.value));
149							Store_field(tmp2, 0, tmp3);
150							Store_field(tmp2, 1, tmp);
151							Store_field(array, i, tmp2);
152						}
153					} else	// empty list
154						array = Atom(0);
155
156					Store_field(op_info_val, 4, array);
157
158					// finally, insert this into arch_info
159					Store_field(arch_info, 0, op_info_val);
160
161					Store_field(rec_insn, 8, arch_info);
162
163					break;
164				case CS_ARCH_ARM64:
165						 arch_info = caml_alloc(1, 1);
166
167						 op_info_val = caml_alloc(5, 0);
168						 Store_field(op_info_val, 0, Val_int(insn[j-1].arm64.cc));
169						 Store_field(op_info_val, 1, Val_bool(insn[j-1].arm64.update_flags));
170						 Store_field(op_info_val, 2, Val_bool(insn[j-1].arm64.writeback));
171						 Store_field(op_info_val, 3, Val_int(insn[j-1].arm64.op_count));
172
173						 lcount = arm64_list_count(insn[j - 1].arm64.operands, ARR_SIZE(insn[j - 1].arm64.operands));
174						 if (lcount > 0) {
175							 array = caml_alloc(lcount, 0);
176							 for (i = 0; i < lcount; i++) {
177								 tmp2 = caml_alloc(3, 0);
178								 switch(insn[j-1].arm64.operands[i].type) {
179									 case ARM64_OP_REG:
180										 tmp = caml_alloc(1, 1);
181										 Store_field(tmp, 0, Val_int(insn[j-1].arm64.operands[i].reg));
182										 break;
183									 case ARM64_OP_CIMM:
184										 tmp = caml_alloc(1, 2);
185										 Store_field(tmp, 0, Val_int(insn[j-1].arm64.operands[i].imm));
186										 break;
187									 case ARM64_OP_IMM:
188										 tmp = caml_alloc(1, 3);
189										 Store_field(tmp, 0, Val_int(insn[j-1].arm64.operands[i].imm));
190										 break;
191									 case ARM64_OP_FP:
192										 tmp = caml_alloc(1, 4);
193										 Store_field(tmp, 0, caml_copy_double(insn[j-1].arm64.operands[i].fp));
194										 break;
195									 case ARM64_OP_MEM:
196										 tmp = caml_alloc(1, 5);
197										 tmp3 = caml_alloc(3, 0);
198										 Store_field(tmp3, 0, Val_int(insn[j-1].arm64.operands[i].mem.base));
199										 Store_field(tmp3, 1, Val_int(insn[j-1].arm64.operands[i].mem.index));
200										 Store_field(tmp3, 2, Val_int(insn[j-1].arm64.operands[i].mem.disp));
201										 Store_field(tmp, 0, tmp3);
202										 break;
203									 default: break;
204								 }
205								 tmp3 = caml_alloc(2, 0);
206								 Store_field(tmp3, 0, Val_int(insn[j-1].arm64.operands[i].shift.type));
207								 Store_field(tmp3, 1, Val_int(insn[j-1].arm64.operands[i].shift.value));
208								 Store_field(tmp2, 0, tmp3);
209								 Store_field(tmp2, 1, Val_int(insn[j-1].arm64.operands[i].ext));
210
211								 Store_field(tmp2, 2, tmp);
212								 Store_field(array, i, tmp2);
213							 }
214						 } else		// empty array
215							 array = Atom(0);
216
217						 Store_field(op_info_val, 4, array);
218
219						 // finally, insert this into arch_info
220						 Store_field(arch_info, 0, op_info_val);
221
222						 Store_field(rec_insn, 8, arch_info);
223
224						 break;
225				case CS_ARCH_MIPS:
226						 arch_info = caml_alloc(1, 2);
227
228						 op_info_val = caml_alloc(2, 0);
229						 Store_field(op_info_val, 0, Val_int(insn[j-1].mips.op_count));
230
231						 lcount = mips_list_count(insn[j - 1].mips.operands, ARR_SIZE(insn[j - 1].mips.operands));
232						 if (lcount > 0) {
233							 array = caml_alloc(lcount, 0);
234							 for (i = 0; i < lcount; i++) {
235								 tmp2 = caml_alloc(1, 0);
236								 switch(insn[j-1].mips.operands[i].type) {
237									 case MIPS_OP_REG:
238										 tmp = caml_alloc(1, 1);
239										 Store_field(tmp, 0, Val_int(insn[j-1].mips.operands[i].reg));
240										 break;
241									 case MIPS_OP_IMM:
242										 tmp = caml_alloc(1, 2);
243										 Store_field(tmp, 0, Val_int(insn[j-1].mips.operands[i].imm));
244										 break;
245									 case MIPS_OP_MEM:
246										 tmp = caml_alloc(1, 3);
247										 tmp3 = caml_alloc(2, 0);
248										 Store_field(tmp3, 0, Val_int(insn[j-1].mips.operands[i].mem.base));
249										 Store_field(tmp3, 1, Val_int(insn[j-1].mips.operands[i].mem.disp));
250										 Store_field(tmp, 0, tmp3);
251										 break;
252									 default: break;
253								 }
254								 Store_field(tmp2, 0, tmp);
255								 Store_field(array, i, tmp2);
256							 }
257						 } else		// empty array
258							 array = Atom(0);
259
260						 Store_field(op_info_val, 1, array);
261
262						 // finally, insert this into arch_info
263						 Store_field(arch_info, 0, op_info_val);
264
265						 Store_field(rec_insn, 8, arch_info);
266
267						 break;
268				case CS_ARCH_X86:
269					arch_info = caml_alloc(1, 3);
270
271					op_info_val = caml_alloc(15, 0);
272
273					array = caml_alloc(ARR_SIZE(insn[0].x86.prefix), 0);
274					for (i = 0; i < ARR_SIZE(insn[0].x86.prefix); i++) {
275					    Store_field(array, i, Val_int(insn[j-1].x86.prefix[i]));
276					}
277					Store_field(op_info_val, 0, array);
278
279					Store_field(op_info_val, 1, Val_int(insn[j-1].x86.segment));
280
281					array = caml_alloc(ARR_SIZE(insn[0].x86.opcode), 0);
282					for (i = 0; i < ARR_SIZE(insn[0].x86.opcode); i++) {
283					    Store_field(array, i, Val_int(insn[j-1].x86.opcode[i]));
284					}
285					Store_field(op_info_val, 2, array);
286
287					Store_field(op_info_val, 3, Val_int(insn[j-1].x86.op_size));
288
289					Store_field(op_info_val, 4, Val_int(insn[j-1].x86.addr_size));
290
291					Store_field(op_info_val, 5, Val_int(insn[j-1].x86.disp_size));
292
293					Store_field(op_info_val, 6, Val_int(insn[j-1].x86.imm_size));
294
295					Store_field(op_info_val, 7, Val_int(insn[j-1].x86.modrm));
296
297					Store_field(op_info_val, 8, Val_int(insn[j-1].x86.sib));
298
299					Store_field(op_info_val, 9, Val_int(insn[j-1].x86.disp));
300
301					Store_field(op_info_val, 10, Val_int(insn[j-1].x86.sib_index));
302
303					Store_field(op_info_val, 11, Val_int(insn[j-1].x86.sib_scale));
304
305					Store_field(op_info_val, 12, Val_int(insn[j-1].x86.sib_base));
306
307					Store_field(op_info_val, 13, Val_int(insn[j-1].x86.op_count));
308
309					lcount = x86_list_count(insn[j - 1].x86.operands, ARR_SIZE(insn[j - 1].x86.operands));
310					if (lcount > 0) {
311						array = caml_alloc(lcount, 0);
312						for (i = 0; i < lcount; i++) {
313							switch(insn[j-1].x86.operands[i].type) {
314								case X86_OP_REG:
315									tmp = caml_alloc(1, 1);
316									Store_field(tmp, 0, Val_int(insn[j-1].x86.operands[i].reg));
317									break;
318								case X86_OP_IMM:
319									tmp = caml_alloc(1, 2);
320									Store_field(tmp, 0, Val_int(insn[j-1].x86.operands[i].imm));
321									break;
322								case X86_OP_FP:
323									tmp = caml_alloc(1, 3);
324									Store_field(tmp, 0, caml_copy_double(insn[j-1].x86.operands[i].fp));
325									break;
326								case X86_OP_MEM:
327									tmp = caml_alloc(1, 4);
328									tmp2 = caml_alloc(4, 0);
329									Store_field(tmp2, 0, Val_int(insn[j-1].x86.operands[i].mem.base));
330									Store_field(tmp2, 1, Val_int(insn[j-1].x86.operands[i].mem.index));
331									Store_field(tmp2, 2, Val_int(insn[j-1].x86.operands[i].mem.scale));
332									Store_field(tmp2, 3, Val_int(insn[j-1].x86.operands[i].mem.disp));
333									Store_field(tmp, 0, tmp2);
334									break;
335								default:
336									break;
337							}
338							Store_field(array, i, tmp);
339						}
340					} else
341						array = Atom(0);	// empty array
342
343					Store_field(op_info_val, 14, array);
344
345					// finally, insert this into arch_info
346					Store_field(arch_info, 0, op_info_val);
347
348					Store_field(rec_insn, 8, arch_info);
349					break;
350			}
351
352			Store_field(cons, 0, rec_insn);	// head
353			Store_field(cons, 1, list);		// tail
354			list = cons;
355		}
356
357		cs_free(insn);
358	}
359
360	cs_close(handle);
361
362    CAMLreturn(list);
363}
364
365CAMLprim value ocaml_cs_disasm_quick(value _arch, value _mode, value _code, value _addr, value _count)
366{
367	CAMLparam5(_arch, _mode, _code, _addr, _count);
368	CAMLlocal1(head);
369	csh handle;
370	cs_arch arch;
371	cs_mode mode = 0;
372	char *code;
373	uint64_t addr, count, code_len;
374
375	switch (Int_val(_arch)) {
376		case 0:
377			arch = CS_ARCH_ARM;
378			break;
379		case 1:
380			arch = CS_ARCH_ARM64;
381			break;
382		case 2:
383			arch = CS_ARCH_MIPS;
384			break;
385		case 3:
386			arch = CS_ARCH_X86;
387			break;
388		default:
389			caml_invalid_argument("Error message");
390			return Val_emptylist;
391	}
392
393	while (_mode != Val_emptylist) {
394		head = Field(_mode, 0);  /* accessing the head */
395		switch (Int_val(head)) {
396			case 0:
397				mode |= CS_MODE_LITTLE_ENDIAN;
398				break;
399			case 1:
400				mode |= CS_MODE_SYNTAX_INTEL;
401				break;
402			case 2:
403				mode |= CS_MODE_ARM;
404				break;
405			case 3:
406				mode |= CS_MODE_16;
407				break;
408			case 4:
409				mode |= CS_MODE_32;
410				break;
411			case 5:
412				mode |= CS_MODE_64;
413				break;
414			case 6:
415				mode |= CS_MODE_THUMB;
416				break;
417			case 7:
418				mode |= CS_MODE_MICRO;
419				break;
420			case 8:
421				mode |= CS_MODE_N64;
422				break;
423			case 9:
424				mode |= CS_MODE_SYNTAX_ATT;
425				break;
426			case 10:
427				mode |= CS_MODE_BIG_ENDIAN;
428				break;
429			default:
430				caml_invalid_argument("Error message");
431				return Val_emptylist;
432		}
433		_mode = Field(_mode, 1);  /* point to the tail for next loop */
434	}
435
436	if (cs_open(arch, mode, &handle) == false)
437		return Val_emptylist;
438
439	code = String_val(_code);
440	code_len = caml_string_length(_code);
441	addr = Int64_val(_addr);
442	count = Int64_val(_count);
443
444    CAMLreturn(_cs_disasm(arch, handle, code, code_len, addr, count));
445}
446
447CAMLprim value ocaml_cs_disasm_dyn(value _arch, value _handle, value _code, value _addr, value _count)
448{
449	CAMLparam5(_arch, _handle, _code, _addr, _count);
450	csh handle;
451	cs_arch arch;
452	char *code;
453	uint64_t addr, count, code_len;
454
455	handle = Int64_val(_handle);
456
457	arch = Int_val(_arch);
458	code = String_val(_code);
459	code_len = caml_string_length(_code);
460	addr = Int64_val(_addr);
461	count = Int64_val(_count);
462
463    CAMLreturn(_cs_disasm(arch, handle, code, code_len, addr, count));
464}
465
466CAMLprim value ocaml_cs_open(value _arch, value _mode)
467{
468	CAMLparam2(_arch, _mode);
469	CAMLlocal2(list, head);
470	csh handle;
471	cs_arch arch;
472	cs_mode mode = 0;
473
474	list = Val_emptylist;
475
476	switch (Int_val(_arch)) {
477		case 0:
478			arch = CS_ARCH_ARM;
479			break;
480		case 1:
481			arch = CS_ARCH_ARM64;
482			break;
483		case 2:
484			arch = CS_ARCH_MIPS;
485			break;
486		case 3:
487			arch = CS_ARCH_X86;
488			break;
489		default:
490			caml_invalid_argument("Error message");
491			return Val_emptylist;
492	}
493
494	while (_mode != Val_emptylist) {
495		head = Field(_mode, 0);  /* accessing the head */
496		switch (Int_val(head)) {
497			case 0:
498				mode |= CS_MODE_LITTLE_ENDIAN;
499				break;
500			case 1:
501				mode |= CS_MODE_SYNTAX_INTEL;
502				break;
503			case 2:
504				mode |= CS_MODE_ARM;
505				break;
506			case 3:
507				mode |= CS_MODE_16;
508				break;
509			case 4:
510				mode |= CS_MODE_32;
511				break;
512			case 5:
513				mode |= CS_MODE_64;
514				break;
515			case 6:
516				mode |= CS_MODE_THUMB;
517				break;
518			case 7:
519				mode |= CS_MODE_MICRO;
520				break;
521			case 8:
522				mode |= CS_MODE_N64;
523				break;
524			case 9:
525				mode |= CS_MODE_SYNTAX_ATT;
526				break;
527			case 10:
528				mode |= CS_MODE_BIG_ENDIAN;
529				break;
530			default:
531				caml_invalid_argument("Error message");
532				return Val_emptylist;
533		}
534		_mode = Field(_mode, 1);  /* point to the tail for next loop */
535	}
536
537	if (cs_open(arch, mode, &handle) == false)
538		CAMLreturn(Val_int(0));
539	else {
540		CAMLlocal1(result);
541		result = caml_alloc(1, 0);
542		Store_field(result, 0, caml_copy_int64(handle));
543		CAMLreturn(result);
544	}
545}
546
547CAMLprim value cs_register_name(value _arch, value _reg)
548{
549	cs_arch arch;
550
551	switch (Int_val(_arch)) {
552		case 0:
553			arch = CS_ARCH_ARM;
554			break;
555		case 1:
556			arch = CS_ARCH_ARM64;
557			break;
558		case 2:
559			arch = CS_ARCH_MIPS;
560			break;
561		case 3:
562			arch = CS_ARCH_X86;
563			break;
564		default:
565			arch = Int_val(_arch);
566			break;
567	}
568
569	char *name = cs_reg_name(arch, Int_val(_reg));
570	return caml_copy_string(name);
571}
572
573CAMLprim value cs_instruction_name(value _handle, value _insn)
574{
575	char *name = cs_insn_name(Int64_val(_handle), Int_val(_insn));
576	return caml_copy_string(name);
577}
578