ocaml.c revision 58fe9716d9c703897182aea420357db64ad6bb5d
1/* Capstone Disassembler Engine */ 2/* By Nguyen Anh Quynh <aquynh@gmail.com>, 2013> */ 3 4#include <stdio.h> // debug 5#include <string.h> 6#include <caml/mlvalues.h> 7#include <caml/memory.h> 8#include <caml/alloc.h> 9#include <caml/fail.h> 10 11#include "../../include/capstone.h" 12 13#define ARR_SIZE(a) (sizeof(a)/sizeof(a[0])) 14 15 16// count the number of positive members in @list 17static unsigned int list_count(uint8_t *list, unsigned int max) 18{ 19 unsigned int i; 20 21 for(i = 0; i < max; i++) 22 if (list[i] == 0) 23 return i; 24 25 return max; 26} 27 28CAMLprim value _cs_disasm(cs_arch arch, csh handle, const uint8_t * code, size_t code_len, uint64_t addr, size_t count) 29{ 30 CAMLparam0(); 31 CAMLlocal5(list, cons, rec_insn, array, tmp); 32 CAMLlocal4(arch_info, op_info_val, tmp2, tmp3); 33 cs_insn *insn; 34 35 list = Val_emptylist; 36 37 size_t c = cs_disasm_ex(handle, code, code_len, addr, count, &insn); 38 39 if (c) { 40 //printf("Found %lu insn, addr: %lx\n", c, addr); 41 uint64_t j; 42 for (j = c; j > 0; j--) { 43 unsigned int lcount, i; 44 cons = caml_alloc(2, 0); 45 46 rec_insn = caml_alloc(13, 0); 47 Store_field(rec_insn, 0, Val_int(insn[j-1].id)); 48 Store_field(rec_insn, 1, Val_int(insn[j-1].address)); 49 Store_field(rec_insn, 2, Val_int(insn[j-1].size)); 50 51 Store_field(rec_insn, 4, caml_copy_string(insn[j-1].mnemonic)); 52 Store_field(rec_insn, 5, caml_copy_string(insn[j-1].op_str)); 53 54 // copy raw bytes of instruction 55 lcount = insn[j-1].size; 56 if (lcount) { 57 array = caml_alloc(lcount, 0); 58 for (i = 0; i < lcount; i++) { 59 Store_field(array, i, Val_int(insn[j-1].bytes[i])); 60 } 61 } else 62 array = Atom(0); // empty list 63 Store_field(rec_insn, 3, array); 64 65 66 // copy read registers 67 lcount = (insn[j-1]).detail->regs_read_count; 68 if (lcount) { 69 array = caml_alloc(lcount, 0); 70 for (i = 0; i < lcount; i++) { 71 Store_field(array, i, Val_int(insn[j-1].detail->regs_read[i])); 72 } 73 } else 74 array = Atom(0); // empty list 75 Store_field(rec_insn, 6, array); 76 Store_field(rec_insn, 7, Val_int(lcount)); 77 78 lcount = (insn[j-1]).detail->regs_write_count; 79 if (lcount) { 80 array = caml_alloc(lcount, 0); 81 for (i = 0; i < lcount; i++) { 82 Store_field(array, i, Val_int(insn[j-1].detail->regs_write[i])); 83 } 84 } else 85 array = Atom(0); // empty list 86 Store_field(rec_insn, 8, array); 87 Store_field(rec_insn, 9, Val_int(lcount)); 88 89 90 lcount = (insn[j-1]).detail->groups_count; 91 if (lcount) { 92 array = caml_alloc(lcount, 0); 93 for (i = 0; i < lcount; i++) { 94 Store_field(array, i, Val_int(insn[j-1].detail->groups[i])); 95 } 96 } else 97 array = Atom(0); // empty list 98 Store_field(rec_insn, 10, array); 99 Store_field(rec_insn, 11, Val_int(lcount)); 100 101 102 103 if(insn[j-1].detail) 104 switch(arch) { 105 case CS_ARCH_ARM: 106 arch_info = caml_alloc(1, 0); 107 108 op_info_val = caml_alloc(5, 0); 109 Store_field(op_info_val, 0, Val_int(insn[j-1].detail->arm.cc)); 110 Store_field(op_info_val, 1, Val_bool(insn[j-1].detail->arm.update_flags)); 111 Store_field(op_info_val, 2, Val_bool(insn[j-1].detail->arm.writeback)); 112 113 lcount = insn[j-1].detail->arm.op_count; 114 115 Store_field(op_info_val, 3, Val_int(lcount)); 116 if (lcount > 0) { 117 array = caml_alloc(lcount, 0); 118 for (i = 0; i < lcount; i++) { 119 tmp2 = caml_alloc(2, 0); 120 switch(insn[j-1].detail->arm.operands[i].type) { 121 case ARM_OP_REG: 122 tmp = caml_alloc(1, 1); 123 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm.operands[i].reg)); 124 break; 125 case ARM_OP_CIMM: 126 tmp = caml_alloc(1, 2); 127 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm.operands[i].imm)); 128 break; 129 case ARM_OP_PIMM: 130 tmp = caml_alloc(1, 3); 131 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm.operands[i].imm)); 132 break; 133 case ARM_OP_IMM: 134 tmp = caml_alloc(1, 4); 135 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm.operands[i].imm)); 136 break; 137 case ARM_OP_FP: 138 tmp = caml_alloc(1, 5); 139 Store_field(tmp, 0, caml_copy_double(insn[j-1].detail->arm.operands[i].fp)); 140 break; 141 case ARM_OP_MEM: 142 tmp = caml_alloc(1, 6); 143 tmp3 = caml_alloc(4, 0); 144 Store_field(tmp3, 0, Val_int(insn[j-1].detail->arm.operands[i].mem.base)); 145 Store_field(tmp3, 1, Val_int(insn[j-1].detail->arm.operands[i].mem.index)); 146 Store_field(tmp3, 2, Val_int(insn[j-1].detail->arm.operands[i].mem.scale)); 147 Store_field(tmp3, 3, Val_int(insn[j-1].detail->arm.operands[i].mem.disp)); 148 Store_field(tmp, 0, tmp3); 149 break; 150 default: break; 151 } 152 tmp3 = caml_alloc(2, 0); 153 Store_field(tmp3, 0, Val_int(insn[j-1].detail->arm.operands[i].shift.type)); 154 Store_field(tmp3, 1, Val_int(insn[j-1].detail->arm.operands[i].shift.value)); 155 Store_field(tmp2, 0, tmp3); 156 Store_field(tmp2, 1, tmp); 157 Store_field(array, i, tmp2); 158 } 159 } else // empty list 160 array = Atom(0); 161 162 Store_field(op_info_val, 4, array); 163 164 // finally, insert this into arch_info 165 Store_field(arch_info, 0, op_info_val); 166 167 Store_field(rec_insn, 12, arch_info); 168 169 break; 170 case CS_ARCH_ARM64: 171 arch_info = caml_alloc(1, 1); 172 173 op_info_val = caml_alloc(5, 0); 174 Store_field(op_info_val, 0, Val_int(insn[j-1].detail->arm64.cc)); 175 Store_field(op_info_val, 1, Val_bool(insn[j-1].detail->arm64.update_flags)); 176 Store_field(op_info_val, 2, Val_bool(insn[j-1].detail->arm64.writeback)); 177 lcount = insn[j-1].detail->arm64.op_count; 178 179 Store_field(op_info_val, 3, Val_int(lcount)); 180 181 if (lcount > 0) { 182 array = caml_alloc(lcount, 0); 183 for (i = 0; i < lcount; i++) { 184 tmp2 = caml_alloc(3, 0); 185 switch(insn[j-1].detail->arm64.operands[i].type) { 186 case ARM64_OP_REG: 187 tmp = caml_alloc(1, 1); 188 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm64.operands[i].reg)); 189 break; 190 case ARM64_OP_CIMM: 191 tmp = caml_alloc(1, 2); 192 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm64.operands[i].imm)); 193 break; 194 case ARM64_OP_IMM: 195 tmp = caml_alloc(1, 3); 196 Store_field(tmp, 0, Val_int(insn[j-1].detail->arm64.operands[i].imm)); 197 break; 198 case ARM64_OP_FP: 199 tmp = caml_alloc(1, 4); 200 Store_field(tmp, 0, caml_copy_double(insn[j-1].detail->arm64.operands[i].fp)); 201 break; 202 case ARM64_OP_MEM: 203 tmp = caml_alloc(1, 5); 204 tmp3 = caml_alloc(3, 0); 205 Store_field(tmp3, 0, Val_int(insn[j-1].detail->arm64.operands[i].mem.base)); 206 Store_field(tmp3, 1, Val_int(insn[j-1].detail->arm64.operands[i].mem.index)); 207 Store_field(tmp3, 2, Val_int(insn[j-1].detail->arm64.operands[i].mem.disp)); 208 Store_field(tmp, 0, tmp3); 209 break; 210 default: break; 211 } 212 tmp3 = caml_alloc(2, 0); 213 Store_field(tmp3, 0, Val_int(insn[j-1].detail->arm64.operands[i].shift.type)); 214 Store_field(tmp3, 1, Val_int(insn[j-1].detail->arm64.operands[i].shift.value)); 215 Store_field(tmp2, 0, tmp3); 216 Store_field(tmp2, 1, Val_int(insn[j-1].detail->arm64.operands[i].ext)); 217 218 Store_field(tmp2, 2, tmp); 219 Store_field(array, i, tmp2); 220 } 221 } else // empty array 222 array = Atom(0); 223 224 Store_field(op_info_val, 4, array); 225 226 // finally, insert this into arch_info 227 Store_field(arch_info, 0, op_info_val); 228 229 Store_field(rec_insn, 12, arch_info); 230 231 break; 232 case CS_ARCH_MIPS: 233 arch_info = caml_alloc(1, 2); 234 235 op_info_val = caml_alloc(2, 0); 236 237 lcount = insn[j-1].detail->mips.op_count; 238 239 Store_field(op_info_val, 0, Val_int(lcount)); 240 241 if (lcount > 0) { 242 array = caml_alloc(lcount, 0); 243 for (i = 0; i < lcount; i++) { 244 tmp2 = caml_alloc(1, 0); 245 switch(insn[j-1].detail->mips.operands[i].type) { 246 case MIPS_OP_REG: 247 tmp = caml_alloc(1, 1); 248 Store_field(tmp, 0, Val_int(insn[j-1].detail->mips.operands[i].reg)); 249 break; 250 case MIPS_OP_IMM: 251 tmp = caml_alloc(1, 2); 252 Store_field(tmp, 0, Val_int(insn[j-1].detail->mips.operands[i].imm)); 253 break; 254 case MIPS_OP_MEM: 255 tmp = caml_alloc(1, 3); 256 tmp3 = caml_alloc(2, 0); 257 Store_field(tmp3, 0, Val_int(insn[j-1].detail->mips.operands[i].mem.base)); 258 Store_field(tmp3, 1, Val_int(insn[j-1].detail->mips.operands[i].mem.disp)); 259 Store_field(tmp, 0, tmp3); 260 break; 261 default: break; 262 } 263 Store_field(tmp2, 0, tmp); 264 Store_field(array, i, tmp2); 265 } 266 } else // empty array 267 array = Atom(0); 268 269 Store_field(op_info_val, 1, array); 270 271 // finally, insert this into arch_info 272 Store_field(arch_info, 0, op_info_val); 273 274 Store_field(rec_insn, 12, arch_info); 275 276 break; 277 case CS_ARCH_PPC: 278 279 arch_info = caml_alloc(1, 3); 280 281 op_info_val = caml_alloc(5, 0); 282 283 Store_field(op_info_val, 0, Val_int(insn[j-1].detail->ppc.bc)); 284 Store_field(op_info_val, 1, Val_int(insn[j-1].detail->ppc.bh)); 285 Store_field(op_info_val, 2, Val_bool(insn[j-1].detail->ppc.update_cr0)); 286 287 lcount = insn[j-1].detail->ppc.op_count; 288 289 Store_field(op_info_val, 3, Val_int(lcount)); 290 291 if (lcount > 0) { 292 array = caml_alloc(lcount, 0); 293 for (i = 0; i < lcount; i++) { 294 tmp2 = caml_alloc(1, 0); 295 switch(insn[j-1].detail->ppc.operands[i].type) { 296 case PPC_OP_REG: 297 tmp = caml_alloc(1, 1); 298 Store_field(tmp, 0, Val_int(insn[j-1].detail->ppc.operands[i].reg)); 299 break; 300 case PPC_OP_IMM: 301 tmp = caml_alloc(1, 2); 302 Store_field(tmp, 0, Val_int(insn[j-1].detail->ppc.operands[i].imm)); 303 break; 304 case PPC_OP_MEM: 305 tmp = caml_alloc(1, 3); 306 tmp3 = caml_alloc(2, 0); 307 Store_field(tmp3, 0, Val_int(insn[j-1].detail->ppc.operands[i].mem.base)); 308 Store_field(tmp3, 1, Val_int(insn[j-1].detail->ppc.operands[i].mem.disp)); 309 Store_field(tmp, 0, tmp3); 310 break; 311 default: break; 312 } 313 Store_field(tmp2, 0, tmp); 314 Store_field(array, i, tmp2); 315 } 316 } else // empty array 317 array = Atom(0); 318 319 Store_field(op_info_val, 4, array); 320 321 // finally, insert this into arch_info 322 Store_field(arch_info, 0, op_info_val); 323 324 Store_field(rec_insn, 12, arch_info); 325 326 break; 327 328 case CS_ARCH_X86: 329 330 arch_info = caml_alloc(1, 4); 331 332 op_info_val = caml_alloc(15, 0); 333 334 // fill prefix 335 lcount = list_count(insn[j-1].detail->x86.prefix, ARR_SIZE(insn[j-1].detail->x86.prefix)); 336 if(lcount) { 337 array = caml_alloc(lcount, 0); 338 for (i = 0; i < lcount; i++) { 339 Store_field(array, i, Val_int(insn[j-1].detail->x86.prefix[i])); 340 } 341 } else 342 array = Atom(0); 343 Store_field(op_info_val, 0, array); 344 345 Store_field(op_info_val, 1, Val_int(insn[j-1].detail->x86.segment)); 346 347 // fill opcode 348 lcount = list_count(insn[j-1].detail->x86.opcode, ARR_SIZE(insn[j-1].detail->x86.opcode)); 349 if(lcount) { 350 array = caml_alloc(lcount, 0); 351 for (i = 0; i < lcount; i++) { 352 Store_field(array, i, Val_int(insn[j-1].detail->x86.opcode[i])); 353 } 354 } else 355 array = Atom(0); 356 Store_field(op_info_val, 2, array); 357 Store_field(op_info_val, 3, Val_int(insn[j-1].detail->x86.op_size)); 358 359 Store_field(op_info_val, 4, Val_int(insn[j-1].detail->x86.addr_size)); 360 361 Store_field(op_info_val, 5, Val_int(insn[j-1].detail->x86.disp_size)); 362 363 Store_field(op_info_val, 6, Val_int(insn[j-1].detail->x86.imm_size)); 364 365 Store_field(op_info_val, 7, Val_int(insn[j-1].detail->x86.modrm)); 366 367 Store_field(op_info_val, 8, Val_int(insn[j-1].detail->x86.sib)); 368 369 Store_field(op_info_val, 9, Val_int(insn[j-1].detail->x86.disp)); 370 371 Store_field(op_info_val, 10, Val_int(insn[j-1].detail->x86.sib_index)); 372 373 Store_field(op_info_val, 11, Val_int(insn[j-1].detail->x86.sib_scale)); 374 375 Store_field(op_info_val, 12, Val_int(insn[j-1].detail->x86.sib_base)); 376 377 lcount = insn[j-1].detail->x86.op_count; 378 379 Store_field(op_info_val, 13, Val_int(lcount)); 380 381 if (lcount > 0) { 382 array = caml_alloc(lcount, 0); 383 for (i = 0; i < lcount; i++) { 384 switch(insn[j-1].detail->x86.operands[i].type) { 385 case X86_OP_REG: 386 tmp = caml_alloc(1, 1); 387 Store_field(tmp, 0, Val_int(insn[j-1].detail->x86.operands[i].reg)); 388 break; 389 case X86_OP_IMM: 390 tmp = caml_alloc(1, 2); 391 Store_field(tmp, 0, Val_int(insn[j-1].detail->x86.operands[i].imm)); 392 break; 393 case X86_OP_FP: 394 tmp = caml_alloc(1, 3); 395 Store_field(tmp, 0, caml_copy_double(insn[j-1].detail->x86.operands[i].fp)); 396 break; 397 case X86_OP_MEM: 398 tmp = caml_alloc(1, 4); 399 tmp2 = caml_alloc(4, 0); 400 Store_field(tmp2, 0, Val_int(insn[j-1].detail->x86.operands[i].mem.base)); 401 Store_field(tmp2, 1, Val_int(insn[j-1].detail->x86.operands[i].mem.index)); 402 Store_field(tmp2, 2, Val_int(insn[j-1].detail->x86.operands[i].mem.scale)); 403 Store_field(tmp2, 3, Val_int(insn[j-1].detail->x86.operands[i].mem.disp)); 404 Store_field(tmp, 0, tmp2); 405 break; 406 default: 407 break; 408 } 409 Store_field(array, i, tmp); 410 } 411 } else 412 array = Atom(0); // empty array 413 414 Store_field(op_info_val, 14, array); 415 416 // finally, insert this into arch_info 417 Store_field(arch_info, 0, op_info_val); 418 419 Store_field(rec_insn, 12, arch_info); 420 break; 421 422 default: break; 423 } 424 425 Store_field(cons, 0, rec_insn); // head 426 Store_field(cons, 1, list); // tail 427 list = cons; 428 } 429 cs_free(insn, count); 430 } 431 // do not free the handle here 432 //cs_close(&handle); 433 CAMLreturn(list); 434} 435 436CAMLprim value ocaml_cs_disasm_quick(value _arch, value _mode, value _code, value _addr, value _count) 437{ 438 CAMLparam5(_arch, _mode, _code, _addr, _count); 439 CAMLlocal1(head); 440 csh handle; 441 cs_arch arch; 442 cs_mode mode = 0; 443 const uint8_t *code; 444 uint64_t addr; 445 size_t count, code_len; 446 447 switch (Int_val(_arch)) { 448 case 0: 449 arch = CS_ARCH_ARM; 450 break; 451 case 1: 452 arch = CS_ARCH_ARM64; 453 break; 454 case 2: 455 arch = CS_ARCH_MIPS; 456 break; 457 case 3: 458 arch = CS_ARCH_PPC; 459 break; 460 case 4: 461 arch = CS_ARCH_X86; 462 break; 463 default: 464 caml_invalid_argument("Error message"); 465 return Val_emptylist; 466 } 467 468 while (_mode != Val_emptylist) { 469 head = Field(_mode, 0); /* accessing the head */ 470 switch (Int_val(head)) { 471 case 0: 472 mode |= CS_MODE_LITTLE_ENDIAN; 473 break; 474 case 1: 475 mode |= CS_OPT_SYNTAX_INTEL; 476 break; 477 case 2: 478 mode |= CS_MODE_ARM; 479 break; 480 case 3: 481 mode |= CS_MODE_16; 482 break; 483 case 4: 484 mode |= CS_MODE_32; 485 break; 486 case 5: 487 mode |= CS_MODE_64; 488 break; 489 case 6: 490 mode |= CS_MODE_THUMB; 491 break; 492 case 7: 493 mode |= CS_MODE_MICRO; 494 break; 495 case 8: 496 mode |= CS_MODE_N64; 497 break; 498 case 9: 499 mode |= CS_OPT_SYNTAX_ATT; 500 break; 501 case 10: 502 mode |= CS_MODE_BIG_ENDIAN; 503 break; 504 default: 505 caml_invalid_argument("Error message"); 506 return Val_emptylist; 507 } 508 _mode = Field(_mode, 1); /* point to the tail for next loop */ 509 } 510 511 //CS_ERR_OK = 0, // No error: everything was fine 512 if (cs_open(arch, mode, &handle) != 0) 513 return Val_emptylist; 514 515 if (cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON) != 0) 516 CAMLreturn(Val_int(0)); 517 518 code = (uint8_t *)String_val(_code); 519 code_len = caml_string_length(_code); 520 addr = Int64_val(_addr); 521 count = Int64_val(_count); 522 523 CAMLreturn(_cs_disasm(arch, handle, code, code_len, addr, count)); 524} 525 526CAMLprim value ocaml_cs_disasm_dyn(value _arch, value _handle, value _code, value _addr, value _count) 527{ 528 CAMLparam5(_arch, _handle, _code, _addr, _count); 529 csh handle; 530 cs_arch arch; 531 const uint8_t *code; 532 uint64_t addr, count, code_len; 533 534 handle = Int64_val(_handle); 535 536 arch = Int_val(_arch); 537 code = (uint8_t *)String_val(_code); 538 code_len = caml_string_length(_code); 539 addr = Int64_val(_addr); 540 count = Int64_val(_count); 541 542 CAMLreturn(_cs_disasm(arch, handle, code, code_len, addr, count)); 543} 544 545CAMLprim value ocaml_cs_open(value _arch, value _mode) 546{ 547 CAMLparam2(_arch, _mode); 548 CAMLlocal2(list, head); 549 csh handle; 550 cs_arch arch; 551 cs_mode mode = 0; 552 553 list = Val_emptylist; 554 555 switch (Int_val(_arch)) { 556 case 0: 557 arch = CS_ARCH_ARM; 558 break; 559 case 1: 560 arch = CS_ARCH_ARM64; 561 break; 562 case 2: 563 arch = CS_ARCH_MIPS; 564 break; 565 case 3: 566 arch = CS_ARCH_PPC; 567 break; 568 case 4: 569 arch = CS_ARCH_X86; 570 break; 571 default: 572 caml_invalid_argument("Error message"); 573 return Val_emptylist; 574 } 575 576 577 while (_mode != Val_emptylist) { 578 head = Field(_mode, 0); /* accessing the head */ 579 switch (Int_val(head)) { 580 case 0: 581 mode |= CS_MODE_LITTLE_ENDIAN; 582 break; 583 case 1: 584 mode |= CS_OPT_SYNTAX_INTEL; 585 break; 586 case 2: 587 mode |= CS_MODE_ARM; 588 break; 589 case 3: 590 mode |= CS_MODE_16; 591 break; 592 case 4: 593 mode |= CS_MODE_32; 594 break; 595 case 5: 596 mode |= CS_MODE_64; 597 break; 598 case 6: 599 mode |= CS_MODE_THUMB; 600 break; 601 case 7: 602 mode |= CS_MODE_MICRO; 603 break; 604 case 8: 605 mode |= CS_MODE_N64; 606 break; 607 case 9: 608 mode |= CS_OPT_SYNTAX_ATT; 609 break; 610 case 10: 611 mode |= CS_MODE_BIG_ENDIAN; 612 break; 613 default: 614 caml_invalid_argument("Error message"); 615 return Val_emptylist; 616 } 617 _mode = Field(_mode, 1); /* point to the tail for next loop */ 618 } 619 620 if (cs_open(arch, mode, &handle) != 0) 621 CAMLreturn(Val_int(0)); 622 623 if (cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON) != 0) 624 CAMLreturn(Val_int(0)); 625 626 CAMLlocal1(result); 627 result = caml_alloc(1, 0); 628 Store_field(result, 0, caml_copy_int64(handle)); 629 CAMLreturn(result); 630} 631 632CAMLprim value cs_register_name(value _handle, value _reg) 633{ 634 const char *name = cs_reg_name(Int64_val(_handle), Int_val(_reg)); 635 if(!name) { 636 caml_invalid_argument("invalid reg_id"); 637 name = "invalid"; 638 } 639 return caml_copy_string(name); 640} 641 642CAMLprim value cs_instruction_name(value _handle, value _insn) 643{ 644 const char *name = cs_insn_name(Int64_val(_handle), Int_val(_insn)); 645 if(!name) { 646 caml_invalid_argument("invalid insn_id"); 647 name = "invalid"; 648 } 649 return caml_copy_string(name); 650} 651