1 2/*--------------------------------------------------------------------*/ 3/*--- An example Valgrind tool. lk_main.c ---*/ 4/*--------------------------------------------------------------------*/ 5 6/* 7 This file is part of Lackey, an example Valgrind tool that does 8 some simple program measurement and tracing. 9 10 Copyright (C) 2002-2011 Nicholas Nethercote 11 njn@valgrind.org 12 13 This program is free software; you can redistribute it and/or 14 modify it under the terms of the GNU General Public License as 15 published by the Free Software Foundation; either version 2 of the 16 License, or (at your option) any later version. 17 18 This program is distributed in the hope that it will be useful, but 19 WITHOUT ANY WARRANTY; without even the implied warranty of 20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 21 General Public License for more details. 22 23 You should have received a copy of the GNU General Public License 24 along with this program; if not, write to the Free Software 25 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 26 02111-1307, USA. 27 28 The GNU General Public License is contained in the file COPYING. 29*/ 30 31// This tool shows how to do some basic instrumentation. 32// 33// There are four kinds of instrumentation it can do. They can be turned 34// on/off independently with command line options: 35// 36// * --basic-counts : do basic counts, eg. number of instructions 37// executed, jumps executed, etc. 38// * --detailed-counts: do more detailed counts: number of loads, stores 39// and ALU operations of different sizes. 40// * --trace-mem=yes: trace all (data) memory accesses. 41// * --trace-superblocks=yes: 42// trace all superblock entries. Mostly of interest 43// to the Valgrind developers. 44// 45// The code for each kind of instrumentation is guarded by a clo_* variable: 46// clo_basic_counts, clo_detailed_counts, clo_trace_mem and clo_trace_sbs. 47// 48// If you want to modify any of the instrumentation code, look for the code 49// that is guarded by the relevant clo_* variable (eg. clo_trace_mem) 50// If you're not interested in the other kinds of instrumentation you can 51// remove them. If you want to do more complex modifications, please read 52// VEX/pub/libvex_ir.h to understand the intermediate representation. 53// 54// 55// Specific Details about --trace-mem=yes 56// -------------------------------------- 57// Lackey's --trace-mem code is a good starting point for building Valgrind 58// tools that act on memory loads and stores. It also could be used as is, 59// with its output used as input to a post-mortem processing step. However, 60// because memory traces can be very large, online analysis is generally 61// better. 62// 63// It prints memory data access traces that look like this: 64// 65// I 0023C790,2 # instruction read at 0x0023C790 of size 2 66// I 0023C792,5 67// S BE80199C,4 # data store at 0xBE80199C of size 4 68// I 0025242B,3 69// L BE801950,4 # data load at 0xBE801950 of size 4 70// I 0023D476,7 71// M 0025747C,1 # data modify at 0x0025747C of size 1 72// I 0023DC20,2 73// L 00254962,1 74// L BE801FB3,1 75// I 00252305,1 76// L 00254AEB,1 77// S 00257998,1 78// 79// Every instruction executed has an "instr" event representing it. 80// Instructions that do memory accesses are followed by one or more "load", 81// "store" or "modify" events. Some instructions do more than one load or 82// store, as in the last two examples in the above trace. 83// 84// Here are some examples of x86 instructions that do different combinations 85// of loads, stores, and modifies. 86// 87// Instruction Memory accesses Event sequence 88// ----------- --------------- -------------- 89// add %eax, %ebx No loads or stores instr 90// 91// movl (%eax), %ebx loads (%eax) instr, load 92// 93// movl %eax, (%ebx) stores (%ebx) instr, store 94// 95// incl (%ecx) modifies (%ecx) instr, modify 96// 97// cmpsb loads (%esi), loads(%edi) instr, load, load 98// 99// call*l (%edx) loads (%edx), stores -4(%esp) instr, load, store 100// pushl (%edx) loads (%edx), stores -4(%esp) instr, load, store 101// movsw loads (%esi), stores (%edi) instr, load, store 102// 103// Instructions using x86 "rep" prefixes are traced as if they are repeated 104// N times. 105// 106// Lackey with --trace-mem gives good traces, but they are not perfect, for 107// the following reasons: 108// 109// - It does not trace into the OS kernel, so system calls and other kernel 110// operations (eg. some scheduling and signal handling code) are ignored. 111// 112// - It could model loads and stores done at the system call boundary using 113// the pre_mem_read/post_mem_write events. For example, if you call 114// fstat() you know that the passed in buffer has been written. But it 115// currently does not do this. 116// 117// - Valgrind replaces some code (not much) with its own, notably parts of 118// code for scheduling operations and signal handling. This code is not 119// traced. 120// 121// - There is no consideration of virtual-to-physical address mapping. 122// This may not matter for many purposes. 123// 124// - Valgrind modifies the instruction stream in some very minor ways. For 125// example, on x86 the bts, btc, btr instructions are incorrectly 126// considered to always touch memory (this is a consequence of these 127// instructions being very difficult to simulate). 128// 129// - Valgrind tools layout memory differently to normal programs, so the 130// addresses you get will not be typical. Thus Lackey (and all Valgrind 131// tools) is suitable for getting relative memory traces -- eg. if you 132// want to analyse locality of memory accesses -- but is not good if 133// absolute addresses are important. 134// 135// Despite all these warnings, Lackey's results should be good enough for a 136// wide range of purposes. For example, Cachegrind shares all the above 137// shortcomings and it is still useful. 138// 139// For further inspiration, you should look at cachegrind/cg_main.c which 140// uses the same basic technique for tracing memory accesses, but also groups 141// events together for processing into twos and threes so that fewer C calls 142// are made and things run faster. 143// 144// Specific Details about --trace-superblocks=yes 145// ---------------------------------------------- 146// Valgrind splits code up into single entry, multiple exit blocks 147// known as superblocks. By itself, --trace-superblocks=yes just 148// prints a message as each superblock is run: 149// 150// SB 04013170 151// SB 04013177 152// SB 04013173 153// SB 04013177 154// 155// The hex number is the address of the first instruction in the 156// superblock. You can see the relationship more obviously if you use 157// --trace-superblocks=yes and --trace-mem=yes together. Then a "SB" 158// message at address X is immediately followed by an "instr:" message 159// for that address, as the first instruction in the block is 160// executed, for example: 161// 162// SB 04014073 163// I 04014073,3 164// L 7FEFFF7F8,8 165// I 04014076,4 166// I 0401407A,3 167// I 0401407D,3 168// I 04014080,3 169// I 04014083,6 170 171 172#include "pub_tool_basics.h" 173#include "pub_tool_tooliface.h" 174#include "pub_tool_libcassert.h" 175#include "pub_tool_libcprint.h" 176#include "pub_tool_debuginfo.h" 177#include "pub_tool_libcbase.h" 178#include "pub_tool_options.h" 179#include "pub_tool_machine.h" // VG_(fnptr_to_fnentry) 180 181/*------------------------------------------------------------*/ 182/*--- Command line options ---*/ 183/*------------------------------------------------------------*/ 184 185/* Command line options controlling instrumentation kinds, as described at 186 * the top of this file. */ 187static Bool clo_basic_counts = True; 188static Bool clo_detailed_counts = False; 189static Bool clo_trace_mem = False; 190static Bool clo_trace_sbs = False; 191 192/* The name of the function of which the number of calls (under 193 * --basic-counts=yes) is to be counted, with default. Override with command 194 * line option --fnname. */ 195static Char* clo_fnname = "main"; 196 197static Bool lk_process_cmd_line_option(Char* arg) 198{ 199 if VG_STR_CLO(arg, "--fnname", clo_fnname) {} 200 else if VG_BOOL_CLO(arg, "--basic-counts", clo_basic_counts) {} 201 else if VG_BOOL_CLO(arg, "--detailed-counts", clo_detailed_counts) {} 202 else if VG_BOOL_CLO(arg, "--trace-mem", clo_trace_mem) {} 203 else if VG_BOOL_CLO(arg, "--trace-superblocks", clo_trace_sbs) {} 204 else 205 return False; 206 207 tl_assert(clo_fnname); 208 tl_assert(clo_fnname[0]); 209 return True; 210} 211 212static void lk_print_usage(void) 213{ 214 VG_(printf)( 215" --basic-counts=no|yes count instructions, jumps, etc. [yes]\n" 216" --detailed-counts=no|yes count loads, stores and alu ops [no]\n" 217" --trace-mem=no|yes trace all loads and stores [no]\n" 218" --trace-superblocks=no|yes trace all superblock entries [no]\n" 219" --fnname=<name> count calls to <name> (only used if\n" 220" --basic-count=yes) [main]\n" 221 ); 222} 223 224static void lk_print_debug_usage(void) 225{ 226 VG_(printf)( 227" (none)\n" 228 ); 229} 230 231/*------------------------------------------------------------*/ 232/*--- Stuff for --basic-counts ---*/ 233/*------------------------------------------------------------*/ 234 235/* Nb: use ULongs because the numbers can get very big */ 236static ULong n_func_calls = 0; 237static ULong n_SBs_entered = 0; 238static ULong n_SBs_completed = 0; 239static ULong n_IRStmts = 0; 240static ULong n_guest_instrs = 0; 241static ULong n_Jccs = 0; 242static ULong n_Jccs_untaken = 0; 243static ULong n_IJccs = 0; 244static ULong n_IJccs_untaken = 0; 245 246static void add_one_func_call(void) 247{ 248 n_func_calls++; 249} 250 251static void add_one_SB_entered(void) 252{ 253 n_SBs_entered++; 254} 255 256static void add_one_SB_completed(void) 257{ 258 n_SBs_completed++; 259} 260 261static void add_one_IRStmt(void) 262{ 263 n_IRStmts++; 264} 265 266static void add_one_guest_instr(void) 267{ 268 n_guest_instrs++; 269} 270 271static void add_one_Jcc(void) 272{ 273 n_Jccs++; 274} 275 276static void add_one_Jcc_untaken(void) 277{ 278 n_Jccs_untaken++; 279} 280 281static void add_one_inverted_Jcc(void) 282{ 283 n_IJccs++; 284} 285 286static void add_one_inverted_Jcc_untaken(void) 287{ 288 n_IJccs_untaken++; 289} 290 291/*------------------------------------------------------------*/ 292/*--- Stuff for --detailed-counts ---*/ 293/*------------------------------------------------------------*/ 294 295/* --- Operations --- */ 296 297typedef enum { OpLoad=0, OpStore=1, OpAlu=2 } Op; 298 299#define N_OPS 3 300 301 302/* --- Types --- */ 303 304#define N_TYPES 10 305 306static Int type2index ( IRType ty ) 307{ 308 switch (ty) { 309 case Ity_I1: return 0; 310 case Ity_I8: return 1; 311 case Ity_I16: return 2; 312 case Ity_I32: return 3; 313 case Ity_I64: return 4; 314 case Ity_I128: return 5; 315 case Ity_F32: return 6; 316 case Ity_F64: return 7; 317 case Ity_F128: return 8; 318 case Ity_V128: return 9; 319 default: tl_assert(0); 320 } 321} 322 323static HChar* nameOfTypeIndex ( Int i ) 324{ 325 switch (i) { 326 case 0: return "I1"; break; 327 case 1: return "I8"; break; 328 case 2: return "I16"; break; 329 case 3: return "I32"; break; 330 case 4: return "I64"; break; 331 case 5: return "I128"; break; 332 case 6: return "F32"; break; 333 case 7: return "F64"; break; 334 case 8: return "F128"; break; 335 case 9: return "V128"; break; 336 default: tl_assert(0); 337 } 338} 339 340 341/* --- Counts --- */ 342 343static ULong detailCounts[N_OPS][N_TYPES]; 344 345/* The helper that is called from the instrumented code. */ 346static VG_REGPARM(1) 347void increment_detail(ULong* detail) 348{ 349 (*detail)++; 350} 351 352/* A helper that adds the instrumentation for a detail. */ 353static void instrument_detail(IRSB* sb, Op op, IRType type) 354{ 355 IRDirty* di; 356 IRExpr** argv; 357 const UInt typeIx = type2index(type); 358 359 tl_assert(op < N_OPS); 360 tl_assert(typeIx < N_TYPES); 361 362 argv = mkIRExprVec_1( mkIRExpr_HWord( (HWord)&detailCounts[op][typeIx] ) ); 363 di = unsafeIRDirty_0_N( 1, "increment_detail", 364 VG_(fnptr_to_fnentry)( &increment_detail ), 365 argv); 366 addStmtToIRSB( sb, IRStmt_Dirty(di) ); 367} 368 369/* Summarize and print the details. */ 370static void print_details ( void ) 371{ 372 Int typeIx; 373 VG_(umsg)(" Type Loads Stores AluOps\n"); 374 VG_(umsg)(" -------------------------------------------\n"); 375 for (typeIx = 0; typeIx < N_TYPES; typeIx++) { 376 VG_(umsg)(" %4s %'12llu %'12llu %'12llu\n", 377 nameOfTypeIndex( typeIx ), 378 detailCounts[OpLoad ][typeIx], 379 detailCounts[OpStore][typeIx], 380 detailCounts[OpAlu ][typeIx] 381 ); 382 } 383} 384 385 386/*------------------------------------------------------------*/ 387/*--- Stuff for --trace-mem ---*/ 388/*------------------------------------------------------------*/ 389 390#define MAX_DSIZE 512 391 392typedef 393 IRExpr 394 IRAtom; 395 396typedef 397 enum { Event_Ir, Event_Dr, Event_Dw, Event_Dm } 398 EventKind; 399 400typedef 401 struct { 402 EventKind ekind; 403 IRAtom* addr; 404 Int size; 405 } 406 Event; 407 408/* Up to this many unnotified events are allowed. Must be at least two, 409 so that reads and writes to the same address can be merged into a modify. 410 Beyond that, larger numbers just potentially induce more spilling due to 411 extending live ranges of address temporaries. */ 412#define N_EVENTS 4 413 414/* Maintain an ordered list of memory events which are outstanding, in 415 the sense that no IR has yet been generated to do the relevant 416 helper calls. The SB is scanned top to bottom and memory events 417 are added to the end of the list, merging with the most recent 418 notified event where possible (Dw immediately following Dr and 419 having the same size and EA can be merged). 420 421 This merging is done so that for architectures which have 422 load-op-store instructions (x86, amd64), the instr is treated as if 423 it makes just one memory reference (a modify), rather than two (a 424 read followed by a write at the same address). 425 426 At various points the list will need to be flushed, that is, IR 427 generated from it. That must happen before any possible exit from 428 the block (the end, or an IRStmt_Exit). Flushing also takes place 429 when there is no space to add a new event. 430 431 If we require the simulation statistics to be up to date with 432 respect to possible memory exceptions, then the list would have to 433 be flushed before each memory reference. That's a pain so we don't 434 bother. 435 436 Flushing the list consists of walking it start to end and emitting 437 instrumentation IR for each event, in the order in which they 438 appear. */ 439 440static Event events[N_EVENTS]; 441static Int events_used = 0; 442 443 444static VG_REGPARM(2) void trace_instr(Addr addr, SizeT size) 445{ 446 VG_(printf)("I %08lx,%lu\n", addr, size); 447} 448 449static VG_REGPARM(2) void trace_load(Addr addr, SizeT size) 450{ 451 VG_(printf)(" L %08lx,%lu\n", addr, size); 452} 453 454static VG_REGPARM(2) void trace_store(Addr addr, SizeT size) 455{ 456 VG_(printf)(" S %08lx,%lu\n", addr, size); 457} 458 459static VG_REGPARM(2) void trace_modify(Addr addr, SizeT size) 460{ 461 VG_(printf)(" M %08lx,%lu\n", addr, size); 462} 463 464 465static void flushEvents(IRSB* sb) 466{ 467 Int i; 468 Char* helperName; 469 void* helperAddr; 470 IRExpr** argv; 471 IRDirty* di; 472 Event* ev; 473 474 for (i = 0; i < events_used; i++) { 475 476 ev = &events[i]; 477 478 // Decide on helper fn to call and args to pass it. 479 switch (ev->ekind) { 480 case Event_Ir: helperName = "trace_instr"; 481 helperAddr = trace_instr; break; 482 483 case Event_Dr: helperName = "trace_load"; 484 helperAddr = trace_load; break; 485 486 case Event_Dw: helperName = "trace_store"; 487 helperAddr = trace_store; break; 488 489 case Event_Dm: helperName = "trace_modify"; 490 helperAddr = trace_modify; break; 491 default: 492 tl_assert(0); 493 } 494 495 // Add the helper. 496 argv = mkIRExprVec_2( ev->addr, mkIRExpr_HWord( ev->size ) ); 497 di = unsafeIRDirty_0_N( /*regparms*/2, 498 helperName, VG_(fnptr_to_fnentry)( helperAddr ), 499 argv ); 500 addStmtToIRSB( sb, IRStmt_Dirty(di) ); 501 } 502 503 events_used = 0; 504} 505 506// WARNING: If you aren't interested in instruction reads, you can omit the 507// code that adds calls to trace_instr() in flushEvents(). However, you 508// must still call this function, addEvent_Ir() -- it is necessary to add 509// the Ir events to the events list so that merging of paired load/store 510// events into modify events works correctly. 511static void addEvent_Ir ( IRSB* sb, IRAtom* iaddr, UInt isize ) 512{ 513 Event* evt; 514 tl_assert(clo_trace_mem); 515 tl_assert( (VG_MIN_INSTR_SZB <= isize && isize <= VG_MAX_INSTR_SZB) 516 || VG_CLREQ_SZB == isize ); 517 if (events_used == N_EVENTS) 518 flushEvents(sb); 519 tl_assert(events_used >= 0 && events_used < N_EVENTS); 520 evt = &events[events_used]; 521 evt->ekind = Event_Ir; 522 evt->addr = iaddr; 523 evt->size = isize; 524 events_used++; 525} 526 527static 528void addEvent_Dr ( IRSB* sb, IRAtom* daddr, Int dsize ) 529{ 530 Event* evt; 531 tl_assert(clo_trace_mem); 532 tl_assert(isIRAtom(daddr)); 533 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE); 534 if (events_used == N_EVENTS) 535 flushEvents(sb); 536 tl_assert(events_used >= 0 && events_used < N_EVENTS); 537 evt = &events[events_used]; 538 evt->ekind = Event_Dr; 539 evt->addr = daddr; 540 evt->size = dsize; 541 events_used++; 542} 543 544static 545void addEvent_Dw ( IRSB* sb, IRAtom* daddr, Int dsize ) 546{ 547 Event* lastEvt; 548 Event* evt; 549 tl_assert(clo_trace_mem); 550 tl_assert(isIRAtom(daddr)); 551 tl_assert(dsize >= 1 && dsize <= MAX_DSIZE); 552 553 // Is it possible to merge this write with the preceding read? 554 lastEvt = &events[events_used-1]; 555 if (events_used > 0 556 && lastEvt->ekind == Event_Dr 557 && lastEvt->size == dsize 558 && eqIRAtom(lastEvt->addr, daddr)) 559 { 560 lastEvt->ekind = Event_Dm; 561 return; 562 } 563 564 // No. Add as normal. 565 if (events_used == N_EVENTS) 566 flushEvents(sb); 567 tl_assert(events_used >= 0 && events_used < N_EVENTS); 568 evt = &events[events_used]; 569 evt->ekind = Event_Dw; 570 evt->size = dsize; 571 evt->addr = daddr; 572 events_used++; 573} 574 575 576/*------------------------------------------------------------*/ 577/*--- Stuff for --trace-superblocks ---*/ 578/*------------------------------------------------------------*/ 579 580static void trace_superblock(Addr addr) 581{ 582 VG_(printf)("SB %08lx\n", addr); 583} 584 585 586/*------------------------------------------------------------*/ 587/*--- Basic tool functions ---*/ 588/*------------------------------------------------------------*/ 589 590static void lk_post_clo_init(void) 591{ 592 Int op, tyIx; 593 594 if (clo_detailed_counts) { 595 for (op = 0; op < N_OPS; op++) 596 for (tyIx = 0; tyIx < N_TYPES; tyIx++) 597 detailCounts[op][tyIx] = 0; 598 } 599} 600 601static 602IRSB* lk_instrument ( VgCallbackClosure* closure, 603 IRSB* sbIn, 604 VexGuestLayout* layout, 605 VexGuestExtents* vge, 606 IRType gWordTy, IRType hWordTy ) 607{ 608 IRDirty* di; 609 Int i; 610 IRSB* sbOut; 611 Char fnname[100]; 612 IRType type; 613 IRTypeEnv* tyenv = sbIn->tyenv; 614 Addr iaddr = 0, dst; 615 UInt ilen = 0; 616 Bool condition_inverted = False; 617 618 if (gWordTy != hWordTy) { 619 /* We don't currently support this case. */ 620 VG_(tool_panic)("host/guest word size mismatch"); 621 } 622 623 /* Set up SB */ 624 sbOut = deepCopyIRSBExceptStmts(sbIn); 625 626 // Copy verbatim any IR preamble preceding the first IMark 627 i = 0; 628 while (i < sbIn->stmts_used && sbIn->stmts[i]->tag != Ist_IMark) { 629 addStmtToIRSB( sbOut, sbIn->stmts[i] ); 630 i++; 631 } 632 633 if (clo_basic_counts) { 634 /* Count this superblock. */ 635 di = unsafeIRDirty_0_N( 0, "add_one_SB_entered", 636 VG_(fnptr_to_fnentry)( &add_one_SB_entered ), 637 mkIRExprVec_0() ); 638 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 639 } 640 641 if (clo_trace_sbs) { 642 /* Print this superblock's address. */ 643 di = unsafeIRDirty_0_N( 644 0, "trace_superblock", 645 VG_(fnptr_to_fnentry)( &trace_superblock ), 646 mkIRExprVec_1( mkIRExpr_HWord( vge->base[0] ) ) 647 ); 648 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 649 } 650 651 if (clo_trace_mem) { 652 events_used = 0; 653 } 654 655 for (/*use current i*/; i < sbIn->stmts_used; i++) { 656 IRStmt* st = sbIn->stmts[i]; 657 if (!st || st->tag == Ist_NoOp) continue; 658 659 if (clo_basic_counts) { 660 /* Count one VEX statement. */ 661 di = unsafeIRDirty_0_N( 0, "add_one_IRStmt", 662 VG_(fnptr_to_fnentry)( &add_one_IRStmt ), 663 mkIRExprVec_0() ); 664 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 665 } 666 667 switch (st->tag) { 668 case Ist_NoOp: 669 case Ist_AbiHint: 670 case Ist_Put: 671 case Ist_PutI: 672 case Ist_MBE: 673 addStmtToIRSB( sbOut, st ); 674 break; 675 676 case Ist_IMark: 677 if (clo_basic_counts) { 678 /* Needed to be able to check for inverted condition in Ist_Exit */ 679 iaddr = st->Ist.IMark.addr; 680 ilen = st->Ist.IMark.len; 681 682 /* Count guest instruction. */ 683 di = unsafeIRDirty_0_N( 0, "add_one_guest_instr", 684 VG_(fnptr_to_fnentry)( &add_one_guest_instr ), 685 mkIRExprVec_0() ); 686 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 687 688 /* An unconditional branch to a known destination in the 689 * guest's instructions can be represented, in the IRSB to 690 * instrument, by the VEX statements that are the 691 * translation of that known destination. This feature is 692 * called 'SB chasing' and can be influenced by command 693 * line option --vex-guest-chase-thresh. 694 * 695 * To get an accurate count of the calls to a specific 696 * function, taking SB chasing into account, we need to 697 * check for each guest instruction (Ist_IMark) if it is 698 * the entry point of a function. 699 */ 700 tl_assert(clo_fnname); 701 tl_assert(clo_fnname[0]); 702 if (VG_(get_fnname_if_entry)(st->Ist.IMark.addr, 703 fnname, sizeof(fnname)) 704 && 0 == VG_(strcmp)(fnname, clo_fnname)) { 705 di = unsafeIRDirty_0_N( 706 0, "add_one_func_call", 707 VG_(fnptr_to_fnentry)( &add_one_func_call ), 708 mkIRExprVec_0() ); 709 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 710 } 711 } 712 if (clo_trace_mem) { 713 // WARNING: do not remove this function call, even if you 714 // aren't interested in instruction reads. See the comment 715 // above the function itself for more detail. 716 addEvent_Ir( sbOut, mkIRExpr_HWord( (HWord)st->Ist.IMark.addr ), 717 st->Ist.IMark.len ); 718 } 719 addStmtToIRSB( sbOut, st ); 720 break; 721 722 case Ist_WrTmp: 723 // Add a call to trace_load() if --trace-mem=yes. 724 if (clo_trace_mem) { 725 IRExpr* data = st->Ist.WrTmp.data; 726 if (data->tag == Iex_Load) { 727 addEvent_Dr( sbOut, data->Iex.Load.addr, 728 sizeofIRType(data->Iex.Load.ty) ); 729 } 730 } 731 if (clo_detailed_counts) { 732 IRExpr* expr = st->Ist.WrTmp.data; 733 type = typeOfIRExpr(sbOut->tyenv, expr); 734 tl_assert(type != Ity_INVALID); 735 switch (expr->tag) { 736 case Iex_Load: 737 instrument_detail( sbOut, OpLoad, type ); 738 break; 739 case Iex_Unop: 740 case Iex_Binop: 741 case Iex_Triop: 742 case Iex_Qop: 743 case Iex_Mux0X: 744 instrument_detail( sbOut, OpAlu, type ); 745 break; 746 default: 747 break; 748 } 749 } 750 addStmtToIRSB( sbOut, st ); 751 break; 752 753 case Ist_Store: 754 if (clo_trace_mem) { 755 IRExpr* data = st->Ist.Store.data; 756 addEvent_Dw( sbOut, st->Ist.Store.addr, 757 sizeofIRType(typeOfIRExpr(tyenv, data)) ); 758 } 759 if (clo_detailed_counts) { 760 type = typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data); 761 tl_assert(type != Ity_INVALID); 762 instrument_detail( sbOut, OpStore, type ); 763 } 764 addStmtToIRSB( sbOut, st ); 765 break; 766 767 case Ist_Dirty: { 768 if (clo_trace_mem) { 769 Int dsize; 770 IRDirty* d = st->Ist.Dirty.details; 771 if (d->mFx != Ifx_None) { 772 // This dirty helper accesses memory. Collect the details. 773 tl_assert(d->mAddr != NULL); 774 tl_assert(d->mSize != 0); 775 dsize = d->mSize; 776 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) 777 addEvent_Dr( sbOut, d->mAddr, dsize ); 778 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) 779 addEvent_Dw( sbOut, d->mAddr, dsize ); 780 } else { 781 tl_assert(d->mAddr == NULL); 782 tl_assert(d->mSize == 0); 783 } 784 } 785 addStmtToIRSB( sbOut, st ); 786 break; 787 } 788 789 case Ist_CAS: { 790 /* We treat it as a read and a write of the location. I 791 think that is the same behaviour as it was before IRCAS 792 was introduced, since prior to that point, the Vex 793 front ends would translate a lock-prefixed instruction 794 into a (normal) read followed by a (normal) write. */ 795 Int dataSize; 796 IRType dataTy; 797 IRCAS* cas = st->Ist.CAS.details; 798 tl_assert(cas->addr != NULL); 799 tl_assert(cas->dataLo != NULL); 800 dataTy = typeOfIRExpr(tyenv, cas->dataLo); 801 dataSize = sizeofIRType(dataTy); 802 if (cas->dataHi != NULL) 803 dataSize *= 2; /* since it's a doubleword-CAS */ 804 if (clo_trace_mem) { 805 addEvent_Dr( sbOut, cas->addr, dataSize ); 806 addEvent_Dw( sbOut, cas->addr, dataSize ); 807 } 808 if (clo_detailed_counts) { 809 instrument_detail( sbOut, OpLoad, dataTy ); 810 if (cas->dataHi != NULL) /* dcas */ 811 instrument_detail( sbOut, OpLoad, dataTy ); 812 instrument_detail( sbOut, OpStore, dataTy ); 813 if (cas->dataHi != NULL) /* dcas */ 814 instrument_detail( sbOut, OpStore, dataTy ); 815 } 816 addStmtToIRSB( sbOut, st ); 817 break; 818 } 819 820 case Ist_LLSC: { 821 IRType dataTy; 822 if (st->Ist.LLSC.storedata == NULL) { 823 /* LL */ 824 dataTy = typeOfIRTemp(tyenv, st->Ist.LLSC.result); 825 if (clo_trace_mem) 826 addEvent_Dr( sbOut, st->Ist.LLSC.addr, 827 sizeofIRType(dataTy) ); 828 if (clo_detailed_counts) 829 instrument_detail( sbOut, OpLoad, dataTy ); 830 } else { 831 /* SC */ 832 dataTy = typeOfIRExpr(tyenv, st->Ist.LLSC.storedata); 833 if (clo_trace_mem) 834 addEvent_Dw( sbOut, st->Ist.LLSC.addr, 835 sizeofIRType(dataTy) ); 836 if (clo_detailed_counts) 837 instrument_detail( sbOut, OpStore, dataTy ); 838 } 839 addStmtToIRSB( sbOut, st ); 840 break; 841 } 842 843 case Ist_Exit: 844 if (clo_basic_counts) { 845 // The condition of a branch was inverted by VEX if a taken 846 // branch is in fact a fall trough according to client address 847 tl_assert(iaddr != 0); 848 dst = (sizeof(Addr) == 4) ? st->Ist.Exit.dst->Ico.U32 : 849 st->Ist.Exit.dst->Ico.U64; 850 condition_inverted = (dst == iaddr + ilen); 851 852 /* Count Jcc */ 853 if (!condition_inverted) 854 di = unsafeIRDirty_0_N( 0, "add_one_Jcc", 855 VG_(fnptr_to_fnentry)( &add_one_Jcc ), 856 mkIRExprVec_0() ); 857 else 858 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc", 859 VG_(fnptr_to_fnentry)( 860 &add_one_inverted_Jcc ), 861 mkIRExprVec_0() ); 862 863 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 864 } 865 if (clo_trace_mem) { 866 flushEvents(sbOut); 867 } 868 869 addStmtToIRSB( sbOut, st ); // Original statement 870 871 if (clo_basic_counts) { 872 /* Count non-taken Jcc */ 873 if (!condition_inverted) 874 di = unsafeIRDirty_0_N( 0, "add_one_Jcc_untaken", 875 VG_(fnptr_to_fnentry)( 876 &add_one_Jcc_untaken ), 877 mkIRExprVec_0() ); 878 else 879 di = unsafeIRDirty_0_N( 0, "add_one_inverted_Jcc_untaken", 880 VG_(fnptr_to_fnentry)( 881 &add_one_inverted_Jcc_untaken ), 882 mkIRExprVec_0() ); 883 884 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 885 } 886 break; 887 888 default: 889 tl_assert(0); 890 } 891 } 892 893 if (clo_basic_counts) { 894 /* Count this basic block. */ 895 di = unsafeIRDirty_0_N( 0, "add_one_SB_completed", 896 VG_(fnptr_to_fnentry)( &add_one_SB_completed ), 897 mkIRExprVec_0() ); 898 addStmtToIRSB( sbOut, IRStmt_Dirty(di) ); 899 } 900 901 if (clo_trace_mem) { 902 /* At the end of the sbIn. Flush outstandings. */ 903 flushEvents(sbOut); 904 } 905 906 return sbOut; 907} 908 909static void lk_fini(Int exitcode) 910{ 911 char percentify_buf[5]; /* Two digits, '%' and 0. */ 912 const int percentify_size = sizeof(percentify_buf) - 1; 913 const int percentify_decs = 0; 914 915 tl_assert(clo_fnname); 916 tl_assert(clo_fnname[0]); 917 918 if (clo_basic_counts) { 919 ULong total_Jccs = n_Jccs + n_IJccs; 920 ULong taken_Jccs = (n_Jccs - n_Jccs_untaken) + n_IJccs_untaken; 921 922 VG_(umsg)("Counted %'llu call%s to %s()\n", 923 n_func_calls, ( n_func_calls==1 ? "" : "s" ), clo_fnname); 924 925 VG_(umsg)("\n"); 926 VG_(umsg)("Jccs:\n"); 927 VG_(umsg)(" total: %'llu\n", total_Jccs); 928 VG_(percentify)(taken_Jccs, (total_Jccs ? total_Jccs : 1), 929 percentify_decs, percentify_size, percentify_buf); 930 VG_(umsg)(" taken: %'llu (%s)\n", 931 taken_Jccs, percentify_buf); 932 933 VG_(umsg)("\n"); 934 VG_(umsg)("Executed:\n"); 935 VG_(umsg)(" SBs entered: %'llu\n", n_SBs_entered); 936 VG_(umsg)(" SBs completed: %'llu\n", n_SBs_completed); 937 VG_(umsg)(" guest instrs: %'llu\n", n_guest_instrs); 938 VG_(umsg)(" IRStmts: %'llu\n", n_IRStmts); 939 940 VG_(umsg)("\n"); 941 VG_(umsg)("Ratios:\n"); 942 tl_assert(n_SBs_entered); // Paranoia time. 943 VG_(umsg)(" guest instrs : SB entered = %'llu : 10\n", 944 10 * n_guest_instrs / n_SBs_entered); 945 VG_(umsg)(" IRStmts : SB entered = %'llu : 10\n", 946 10 * n_IRStmts / n_SBs_entered); 947 tl_assert(n_guest_instrs); // Paranoia time. 948 VG_(umsg)(" IRStmts : guest instr = %'llu : 10\n", 949 10 * n_IRStmts / n_guest_instrs); 950 } 951 952 if (clo_detailed_counts) { 953 VG_(umsg)("\n"); 954 VG_(umsg)("IR-level counts by type:\n"); 955 print_details(); 956 } 957 958 if (clo_basic_counts) { 959 VG_(umsg)("\n"); 960 VG_(umsg)("Exit code: %d\n", exitcode); 961 } 962} 963 964static void lk_pre_clo_init(void) 965{ 966 VG_(details_name) ("Lackey"); 967 VG_(details_version) (NULL); 968 VG_(details_description) ("an example Valgrind tool"); 969 VG_(details_copyright_author)( 970 "Copyright (C) 2002-2011, and GNU GPL'd, by Nicholas Nethercote."); 971 VG_(details_bug_reports_to) (VG_BUGS_TO); 972 VG_(details_avg_translation_sizeB) ( 200 ); 973 974 VG_(basic_tool_funcs) (lk_post_clo_init, 975 lk_instrument, 976 lk_fini); 977 VG_(needs_command_line_options)(lk_process_cmd_line_option, 978 lk_print_usage, 979 lk_print_debug_usage); 980} 981 982VG_DETERMINE_INTERFACE_VERSION(lk_pre_clo_init) 983 984/*--------------------------------------------------------------------*/ 985/*--- end lk_main.c ---*/ 986/*--------------------------------------------------------------------*/ 987