ocaml.c revision 58fe9716d9c703897182aea420357db64ad6bb5d
1/* Capstone Disassembler Engine */
2/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */
3
4#include <stdio.h>		// debug
5#include <string.h>
6#include <caml/mlvalues.h>
7#include <caml/memory.h>
8#include <caml/alloc.h>
9#include <caml/fail.h>
10
11#include "../../include/capstone.h"
12
13#define ARR_SIZE(a) (sizeof(a)/sizeof(a[0]))
14
15
16// count the number of positive members in @list
17static unsigned int list_count(uint8_t *list, unsigned int max)
18{
19	unsigned int i;
20
21	for(i = 0; i < max; i++)
22		if (list[i] == 0)
23			return i;
24
25	return max;
26}
27
28CAMLprim value _cs_disasm(cs_arch arch, csh handle, const uint8_t * code, size_t code_len, uint64_t addr, size_t count)
29{
30	CAMLparam0();
31	CAMLlocal5(list, cons, rec_insn, array, tmp);
32	CAMLlocal4(arch_info, op_info_val, tmp2, tmp3);
33	cs_insn *insn;
34
35	list = Val_emptylist;
36
37	size_t c = cs_disasm_ex(handle, code, code_len, addr, count, &insn);
38
39	if (c) {
40		//printf("Found %lu insn, addr: %lx\n", c, addr);
41		uint64_t j;
42		for (j = c; j > 0; j--) {
43			unsigned int lcount, i;
44			cons = caml_alloc(2, 0);
45
46			rec_insn = caml_alloc(13, 0);
47			Store_field(rec_insn, 0, Val_int(insn[j-1].id));
48			Store_field(rec_insn, 1, Val_int(insn[j-1].address));
49			Store_field(rec_insn, 2, Val_int(insn[j-1].size));
50
51			Store_field(rec_insn, 4, caml_copy_string(insn[j-1].mnemonic));
52			Store_field(rec_insn, 5, caml_copy_string(insn[j-1].op_str));
53
54			// copy raw bytes of instruction
55			lcount = insn[j-1].size;
56			if (lcount) {
57				array = caml_alloc(lcount, 0);
58				for (i = 0; i < lcount; i++) {
59					Store_field(array, i, Val_int(insn[j-1].bytes[i]));
60				}
61			} else
62				array = Atom(0);	// empty list
63			Store_field(rec_insn, 3, array);
64
65
66			// copy read registers
67			lcount = (insn[j-1]).detail->regs_read_count;
68			if (lcount) {
69				array = caml_alloc(lcount, 0);
70				for (i = 0; i < lcount; i++) {
71					Store_field(array, i, Val_int(insn[j-1].detail->regs_read[i]));
72				}
73			} else
74				array = Atom(0);	// empty list
75			Store_field(rec_insn, 6, array);
76			Store_field(rec_insn, 7, Val_int(lcount));
77
78			lcount = (insn[j-1]).detail->regs_write_count;
79			if (lcount) {
80				array = caml_alloc(lcount, 0);
81				for (i = 0; i < lcount; i++) {
82					Store_field(array, i, Val_int(insn[j-1].detail->regs_write[i]));
83				}
84			} else
85				array = Atom(0);	// empty list
86			Store_field(rec_insn, 8, array);
87			Store_field(rec_insn, 9, Val_int(lcount));
88
89
90			lcount = (insn[j-1]).detail->groups_count;
91			if (lcount) {
92				array = caml_alloc(lcount, 0);
93				for (i = 0; i < lcount; i++) {
94					Store_field(array, i, Val_int(insn[j-1].detail->groups[i]));
95				}
96			} else
97				array = Atom(0);	// empty list
98			Store_field(rec_insn, 10, array);
99			Store_field(rec_insn, 11, Val_int(lcount));
100
101
102
103			if(insn[j-1].detail)
104			switch(arch) {
105				case CS_ARCH_ARM:
106					arch_info = caml_alloc(1, 0);
107
108					op_info_val = caml_alloc(5, 0);
109					Store_field(op_info_val, 0, Val_int(insn[j-1].detail->arm.cc));
110					Store_field(op_info_val, 1, Val_bool(insn[j-1].detail->arm.update_flags));
111					Store_field(op_info_val, 2, Val_bool(insn[j-1].detail->arm.writeback));
112
113					lcount = insn[j-1].detail->arm.op_count;
114
115					Store_field(op_info_val, 3, Val_int(lcount));
116					if (lcount > 0) {
117						array = caml_alloc(lcount, 0);
118						for (i = 0; i < lcount; i++) {
119							tmp2 = caml_alloc(2, 0);
120							switch(insn[j-1].detail->arm.operands[i].type) {
121								case ARM_OP_REG:
122									tmp = caml_alloc(1, 1);
123									Store_field(tmp, 0, Val_int(insn[j-1].detail->arm.operands[i].reg));
124									break;
125								case ARM_OP_CIMM:
126									tmp = caml_alloc(1, 2);
127									Store_field(tmp, 0, Val_int(insn[j-1].detail->arm.operands[i].imm));
128									break;
129								case ARM_OP_PIMM:
130									tmp = caml_alloc(1, 3);
131									Store_field(tmp, 0, Val_int(insn[j-1].detail->arm.operands[i].imm));
132									break;
133								case ARM_OP_IMM:
134									tmp = caml_alloc(1, 4);
135									Store_field(tmp, 0, Val_int(insn[j-1].detail->arm.operands[i].imm));
136									break;
137								case ARM_OP_FP:
138									tmp = caml_alloc(1, 5);
139									Store_field(tmp, 0, caml_copy_double(insn[j-1].detail->arm.operands[i].fp));
140									break;
141								case ARM_OP_MEM:
142									tmp = caml_alloc(1, 6);
143									tmp3 = caml_alloc(4, 0);
144									Store_field(tmp3, 0, Val_int(insn[j-1].detail->arm.operands[i].mem.base));
145									Store_field(tmp3, 1, Val_int(insn[j-1].detail->arm.operands[i].mem.index));
146									Store_field(tmp3, 2, Val_int(insn[j-1].detail->arm.operands[i].mem.scale));
147									Store_field(tmp3, 3, Val_int(insn[j-1].detail->arm.operands[i].mem.disp));
148									Store_field(tmp, 0, tmp3);
149									break;
150								default: break;
151							}
152							tmp3 = caml_alloc(2, 0);
153							Store_field(tmp3, 0, Val_int(insn[j-1].detail->arm.operands[i].shift.type));
154							Store_field(tmp3, 1, Val_int(insn[j-1].detail->arm.operands[i].shift.value));
155							Store_field(tmp2, 0, tmp3);
156							Store_field(tmp2, 1, tmp);
157							Store_field(array, i, tmp2);
158						}
159					} else	// empty list
160						array = Atom(0);
161
162					Store_field(op_info_val, 4, array);
163
164					// finally, insert this into arch_info
165					Store_field(arch_info, 0, op_info_val);
166
167					Store_field(rec_insn, 12, arch_info);
168
169					break;
170				case CS_ARCH_ARM64:
171						 arch_info = caml_alloc(1, 1);
172
173						 op_info_val = caml_alloc(5, 0);
174						 Store_field(op_info_val, 0, Val_int(insn[j-1].detail->arm64.cc));
175						 Store_field(op_info_val, 1, Val_bool(insn[j-1].detail->arm64.update_flags));
176						 Store_field(op_info_val, 2, Val_bool(insn[j-1].detail->arm64.writeback));
177						 lcount = insn[j-1].detail->arm64.op_count;
178
179						 Store_field(op_info_val, 3, Val_int(lcount));
180
181						 if (lcount > 0) {
182							 array = caml_alloc(lcount, 0);
183							 for (i = 0; i < lcount; i++) {
184								 tmp2 = caml_alloc(3, 0);
185								 switch(insn[j-1].detail->arm64.operands[i].type) {
186									 case ARM64_OP_REG:
187										 tmp = caml_alloc(1, 1);
188										 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm64.operands[i].reg));
189										 break;
190									 case ARM64_OP_CIMM:
191										 tmp = caml_alloc(1, 2);
192										 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm64.operands[i].imm));
193										 break;
194									 case ARM64_OP_IMM:
195										 tmp = caml_alloc(1, 3);
196										 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm64.operands[i].imm));
197										 break;
198									 case ARM64_OP_FP:
199										 tmp = caml_alloc(1, 4);
200										 Store_field(tmp, 0, caml_copy_double(insn[j-1].detail->arm64.operands[i].fp));
201										 break;
202									 case ARM64_OP_MEM:
203										 tmp = caml_alloc(1, 5);
204										 tmp3 = caml_alloc(3, 0);
205										 Store_field(tmp3, 0, Val_int(insn[j-1].detail->arm64.operands[i].mem.base));
206										 Store_field(tmp3, 1, Val_int(insn[j-1].detail->arm64.operands[i].mem.index));
207										 Store_field(tmp3, 2, Val_int(insn[j-1].detail->arm64.operands[i].mem.disp));
208										 Store_field(tmp, 0, tmp3);
209										 break;
210									 default: break;
211								 }
212								 tmp3 = caml_alloc(2, 0);
213								 Store_field(tmp3, 0, Val_int(insn[j-1].detail->arm64.operands[i].shift.type));
214								 Store_field(tmp3, 1, Val_int(insn[j-1].detail->arm64.operands[i].shift.value));
215								 Store_field(tmp2, 0, tmp3);
216								 Store_field(tmp2, 1, Val_int(insn[j-1].detail->arm64.operands[i].ext));
217
218								 Store_field(tmp2, 2, tmp);
219								 Store_field(array, i, tmp2);
220							 }
221						 } else		// empty array
222							 array = Atom(0);
223
224						 Store_field(op_info_val, 4, array);
225
226						 // finally, insert this into arch_info
227						 Store_field(arch_info, 0, op_info_val);
228
229						 Store_field(rec_insn, 12, arch_info);
230
231						 break;
232				case CS_ARCH_MIPS:
233						 arch_info = caml_alloc(1, 2);
234
235						 op_info_val = caml_alloc(2, 0);
236
237						 lcount = insn[j-1].detail->mips.op_count;
238
239						 Store_field(op_info_val, 0, Val_int(lcount));
240
241						 if (lcount > 0) {
242							 array = caml_alloc(lcount, 0);
243							 for (i = 0; i < lcount; i++) {
244								 tmp2 = caml_alloc(1, 0);
245								 switch(insn[j-1].detail->mips.operands[i].type) {
246									 case MIPS_OP_REG:
247										 tmp = caml_alloc(1, 1);
248										 Store_field(tmp, 0, Val_int(insn[j-1].detail->mips.operands[i].reg));
249										 break;
250									 case MIPS_OP_IMM:
251										 tmp = caml_alloc(1, 2);
252										 Store_field(tmp, 0, Val_int(insn[j-1].detail->mips.operands[i].imm));
253										 break;
254									 case MIPS_OP_MEM:
255										 tmp = caml_alloc(1, 3);
256										 tmp3 = caml_alloc(2, 0);
257										 Store_field(tmp3, 0, Val_int(insn[j-1].detail->mips.operands[i].mem.base));
258										 Store_field(tmp3, 1, Val_int(insn[j-1].detail->mips.operands[i].mem.disp));
259										 Store_field(tmp, 0, tmp3);
260										 break;
261									 default: break;
262								 }
263								 Store_field(tmp2, 0, tmp);
264								 Store_field(array, i, tmp2);
265							 }
266						 } else		// empty array
267							 array = Atom(0);
268
269						 Store_field(op_info_val, 1, array);
270
271						 // finally, insert this into arch_info
272						 Store_field(arch_info, 0, op_info_val);
273
274						 Store_field(rec_insn, 12, arch_info);
275
276						 break;
277				case CS_ARCH_PPC:
278
279						 arch_info = caml_alloc(1, 3);
280
281						 op_info_val = caml_alloc(5, 0);
282
283						 Store_field(op_info_val, 0, Val_int(insn[j-1].detail->ppc.bc));
284						 Store_field(op_info_val, 1, Val_int(insn[j-1].detail->ppc.bh));
285						 Store_field(op_info_val, 2, Val_bool(insn[j-1].detail->ppc.update_cr0));
286
287						 lcount = insn[j-1].detail->ppc.op_count;
288
289						 Store_field(op_info_val, 3, Val_int(lcount));
290
291						 if (lcount > 0) {
292							 array = caml_alloc(lcount, 0);
293							 for (i = 0; i < lcount; i++) {
294								 tmp2 = caml_alloc(1, 0);
295								 switch(insn[j-1].detail->ppc.operands[i].type) {
296									 case PPC_OP_REG:
297										 tmp = caml_alloc(1, 1);
298										 Store_field(tmp, 0, Val_int(insn[j-1].detail->ppc.operands[i].reg));
299										 break;
300									 case PPC_OP_IMM:
301										 tmp = caml_alloc(1, 2);
302										 Store_field(tmp, 0, Val_int(insn[j-1].detail->ppc.operands[i].imm));
303										 break;
304									 case PPC_OP_MEM:
305										 tmp = caml_alloc(1, 3);
306										 tmp3 = caml_alloc(2, 0);
307										 Store_field(tmp3, 0, Val_int(insn[j-1].detail->ppc.operands[i].mem.base));
308										 Store_field(tmp3, 1, Val_int(insn[j-1].detail->ppc.operands[i].mem.disp));
309										 Store_field(tmp, 0, tmp3);
310										 break;
311									 default: break;
312								 }
313								 Store_field(tmp2, 0, tmp);
314								 Store_field(array, i, tmp2);
315							 }
316						 } else		// empty array
317							 array = Atom(0);
318
319						 Store_field(op_info_val, 4, array);
320
321						 // finally, insert this into arch_info
322						 Store_field(arch_info, 0, op_info_val);
323
324						 Store_field(rec_insn, 12, arch_info);
325
326						 break;
327
328				case CS_ARCH_X86:
329
330					arch_info = caml_alloc(1, 4);
331
332					op_info_val = caml_alloc(15, 0);
333
334					// fill prefix
335					lcount = list_count(insn[j-1].detail->x86.prefix, ARR_SIZE(insn[j-1].detail->x86.prefix));
336					if(lcount) {
337						array = caml_alloc(lcount, 0);
338						for (i = 0; i < lcount; i++) {
339					    		Store_field(array, i, Val_int(insn[j-1].detail->x86.prefix[i]));
340						}
341					} else
342						array = Atom(0);
343					Store_field(op_info_val, 0, array);
344
345					Store_field(op_info_val, 1, Val_int(insn[j-1].detail->x86.segment));
346
347					// fill opcode
348					lcount = list_count(insn[j-1].detail->x86.opcode, ARR_SIZE(insn[j-1].detail->x86.opcode));
349					if(lcount) {
350						array = caml_alloc(lcount, 0);
351						for (i = 0; i < lcount; i++) {
352					    		Store_field(array, i, Val_int(insn[j-1].detail->x86.opcode[i]));
353						}
354					} else
355						array = Atom(0);
356					Store_field(op_info_val, 2, array);
357					Store_field(op_info_val, 3, Val_int(insn[j-1].detail->x86.op_size));
358
359					Store_field(op_info_val, 4, Val_int(insn[j-1].detail->x86.addr_size));
360
361					Store_field(op_info_val, 5, Val_int(insn[j-1].detail->x86.disp_size));
362
363					Store_field(op_info_val, 6, Val_int(insn[j-1].detail->x86.imm_size));
364
365					Store_field(op_info_val, 7, Val_int(insn[j-1].detail->x86.modrm));
366
367					Store_field(op_info_val, 8, Val_int(insn[j-1].detail->x86.sib));
368
369					Store_field(op_info_val, 9, Val_int(insn[j-1].detail->x86.disp));
370
371					Store_field(op_info_val, 10, Val_int(insn[j-1].detail->x86.sib_index));
372
373					Store_field(op_info_val, 11, Val_int(insn[j-1].detail->x86.sib_scale));
374
375					Store_field(op_info_val, 12, Val_int(insn[j-1].detail->x86.sib_base));
376
377					lcount = insn[j-1].detail->x86.op_count;
378
379					Store_field(op_info_val, 13, Val_int(lcount));
380
381					if (lcount > 0) {
382						array = caml_alloc(lcount, 0);
383						for (i = 0; i < lcount; i++) {
384							switch(insn[j-1].detail->x86.operands[i].type) {
385								case X86_OP_REG:
386									tmp = caml_alloc(1, 1);
387									Store_field(tmp, 0, Val_int(insn[j-1].detail->x86.operands[i].reg));
388									break;
389								case X86_OP_IMM:
390									tmp = caml_alloc(1, 2);
391									Store_field(tmp, 0, Val_int(insn[j-1].detail->x86.operands[i].imm));
392									break;
393								case X86_OP_FP:
394									tmp = caml_alloc(1, 3);
395									Store_field(tmp, 0, caml_copy_double(insn[j-1].detail->x86.operands[i].fp));
396									break;
397								case X86_OP_MEM:
398									tmp = caml_alloc(1, 4);
399									tmp2 = caml_alloc(4, 0);
400									Store_field(tmp2, 0, Val_int(insn[j-1].detail->x86.operands[i].mem.base));
401									Store_field(tmp2, 1, Val_int(insn[j-1].detail->x86.operands[i].mem.index));
402									Store_field(tmp2, 2, Val_int(insn[j-1].detail->x86.operands[i].mem.scale));
403									Store_field(tmp2, 3, Val_int(insn[j-1].detail->x86.operands[i].mem.disp));
404									Store_field(tmp, 0, tmp2);
405									break;
406								default:
407									break;
408							}
409							Store_field(array, i, tmp);
410						}
411					} else
412						array = Atom(0);	// empty array
413
414					Store_field(op_info_val, 14, array);
415
416					// finally, insert this into arch_info
417					Store_field(arch_info, 0, op_info_val);
418
419					Store_field(rec_insn, 12, arch_info);
420					break;
421
422				default: break;
423			}
424
425			Store_field(cons, 0, rec_insn);	// head
426			Store_field(cons, 1, list);		// tail
427			list = cons;
428		}
429		cs_free(insn, count);
430	}
431	// do not free the handle here
432	//cs_close(&handle);
433    CAMLreturn(list);
434}
435
436CAMLprim value ocaml_cs_disasm_quick(value _arch, value _mode, value _code, value _addr, value _count)
437{
438	CAMLparam5(_arch, _mode, _code, _addr, _count);
439	CAMLlocal1(head);
440	csh handle;
441	cs_arch arch;
442	cs_mode mode = 0;
443	const uint8_t *code;
444	uint64_t addr;
445	size_t count, code_len;
446
447	switch (Int_val(_arch)) {
448		case 0:
449			arch = CS_ARCH_ARM;
450			break;
451		case 1:
452			arch = CS_ARCH_ARM64;
453			break;
454		case 2:
455			arch = CS_ARCH_MIPS;
456			break;
457		case 3:
458			arch = CS_ARCH_PPC;
459			break;
460		case 4:
461			arch = CS_ARCH_X86;
462			break;
463		default:
464			caml_invalid_argument("Error message");
465			return Val_emptylist;
466	}
467
468	while (_mode != Val_emptylist) {
469		head = Field(_mode, 0);  /* accessing the head */
470		switch (Int_val(head)) {
471			case 0:
472				mode |= CS_MODE_LITTLE_ENDIAN;
473				break;
474			case 1:
475				mode |= CS_OPT_SYNTAX_INTEL;
476				break;
477			case 2:
478				mode |= CS_MODE_ARM;
479				break;
480			case 3:
481				mode |= CS_MODE_16;
482				break;
483			case 4:
484				mode |= CS_MODE_32;
485				break;
486			case 5:
487				mode |= CS_MODE_64;
488				break;
489			case 6:
490				mode |= CS_MODE_THUMB;
491				break;
492			case 7:
493				mode |= CS_MODE_MICRO;
494				break;
495			case 8:
496				mode |= CS_MODE_N64;
497				break;
498			case 9:
499				mode |= CS_OPT_SYNTAX_ATT;
500				break;
501			case 10:
502				mode |= CS_MODE_BIG_ENDIAN;
503				break;
504			default:
505				caml_invalid_argument("Error message");
506				return Val_emptylist;
507		}
508		_mode = Field(_mode, 1);  /* point to the tail for next loop */
509	}
510
511	//CS_ERR_OK = 0,	// No error: everything was fine
512	if (cs_open(arch, mode, &handle) != 0)
513		return Val_emptylist;
514
515	if (cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON) != 0)
516		CAMLreturn(Val_int(0));
517
518	code = (uint8_t *)String_val(_code);
519	code_len = caml_string_length(_code);
520	addr = Int64_val(_addr);
521	count = Int64_val(_count);
522
523    CAMLreturn(_cs_disasm(arch, handle, code, code_len, addr, count));
524}
525
526CAMLprim value ocaml_cs_disasm_dyn(value _arch, value _handle, value _code, value _addr, value _count)
527{
528	CAMLparam5(_arch, _handle, _code, _addr, _count);
529	csh handle;
530	cs_arch arch;
531	const uint8_t *code;
532	uint64_t addr, count, code_len;
533
534	handle = Int64_val(_handle);
535
536	arch = Int_val(_arch);
537	code = (uint8_t *)String_val(_code);
538	code_len = caml_string_length(_code);
539	addr = Int64_val(_addr);
540	count = Int64_val(_count);
541
542    CAMLreturn(_cs_disasm(arch, handle, code, code_len, addr, count));
543}
544
545CAMLprim value ocaml_cs_open(value _arch, value _mode)
546{
547	CAMLparam2(_arch, _mode);
548	CAMLlocal2(list, head);
549	csh handle;
550	cs_arch arch;
551	cs_mode mode = 0;
552
553	list = Val_emptylist;
554
555	switch (Int_val(_arch)) {
556		case 0:
557			arch = CS_ARCH_ARM;
558			break;
559		case 1:
560			arch = CS_ARCH_ARM64;
561			break;
562		case 2:
563			arch = CS_ARCH_MIPS;
564			break;
565		case 3:
566			arch = CS_ARCH_PPC;
567			break;
568		case 4:
569			arch = CS_ARCH_X86;
570			break;
571		default:
572			caml_invalid_argument("Error message");
573			return Val_emptylist;
574	}
575
576
577	while (_mode != Val_emptylist) {
578		head = Field(_mode, 0);  /* accessing the head */
579		switch (Int_val(head)) {
580			case 0:
581				mode |= CS_MODE_LITTLE_ENDIAN;
582				break;
583			case 1:
584				mode |= CS_OPT_SYNTAX_INTEL;
585				break;
586			case 2:
587				mode |= CS_MODE_ARM;
588				break;
589			case 3:
590				mode |= CS_MODE_16;
591				break;
592			case 4:
593				mode |= CS_MODE_32;
594				break;
595			case 5:
596				mode |= CS_MODE_64;
597				break;
598			case 6:
599				mode |= CS_MODE_THUMB;
600				break;
601			case 7:
602				mode |= CS_MODE_MICRO;
603				break;
604			case 8:
605				mode |= CS_MODE_N64;
606				break;
607			case 9:
608				mode |= CS_OPT_SYNTAX_ATT;
609				break;
610			case 10:
611				mode |= CS_MODE_BIG_ENDIAN;
612				break;
613			default:
614				caml_invalid_argument("Error message");
615				return Val_emptylist;
616		}
617		_mode = Field(_mode, 1);  /* point to the tail for next loop */
618	}
619
620	if (cs_open(arch, mode, &handle) != 0)
621		CAMLreturn(Val_int(0));
622
623	if (cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON) != 0)
624		CAMLreturn(Val_int(0));
625
626	CAMLlocal1(result);
627	result = caml_alloc(1, 0);
628	Store_field(result, 0, caml_copy_int64(handle));
629	CAMLreturn(result);
630}
631
632CAMLprim value cs_register_name(value _handle, value _reg)
633{
634	const char *name = cs_reg_name(Int64_val(_handle), Int_val(_reg));
635	if(!name) {
636		caml_invalid_argument("invalid reg_id");
637		name = "invalid";
638	}
639	return caml_copy_string(name);
640}
641
642CAMLprim value cs_instruction_name(value _handle, value _insn)
643{
644	const char *name = cs_insn_name(Int64_val(_handle), Int_val(_insn));
645	if(!name) {
646		caml_invalid_argument("invalid insn_id");
647		name = "invalid";
648	}
649	return caml_copy_string(name);
650}
651