1//--------------------------------------------------------------------*/
2//--- BBV: a SimPoint basic block vector generator      bbv_main.c ---*/
3//--------------------------------------------------------------------*/
4
5/*
6   This file is part of BBV, a Valgrind tool for generating SimPoint
7   basic block vectors.
8
9   Copyright (C) 2006-2011 Vince Weaver
10      vince _at_ csl.cornell.edu
11
12   pcfile code is Copyright (C) 2006-2011 Oriol Prat
13      oriol.prat _at _ bsc.es
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31*/
32
33
34#include "pub_tool_basics.h"
35#include "pub_tool_tooliface.h"
36#include "pub_tool_options.h"    /* command line options */
37
38#include "pub_tool_vki.h"        /* vki_stat */
39#include "pub_tool_libcbase.h"   /* VG_(strlen) */
40#include "pub_tool_libcfile.h"   /* VG_(write) */
41#include "pub_tool_libcprint.h"  /* VG_(printf) */
42#include "pub_tool_libcassert.h" /* VG_(exit) */
43#include "pub_tool_mallocfree.h" /* plain_free */
44#include "pub_tool_machine.h"    /* VG_(fnptr_to_fnentry) */
45#include "pub_tool_debuginfo.h"  /* VG_(get_fnname) */
46
47#include "pub_tool_oset.h"       /* ordered set stuff */
48
49   /* instruction special cases */
50#define REP_INSTRUCTION   0x1
51#define FLDCW_INSTRUCTION 0x2
52
53   /* interval variables */
54#define DEFAULT_GRAIN_SIZE 100000000  /* 100 million by default */
55static Int interval_size=DEFAULT_GRAIN_SIZE;
56
57   /* filenames */
58static UChar *clo_bb_out_file="bb.out.%p";
59static UChar *clo_pc_out_file="pc.out.%p";
60static UChar *pc_out_file=NULL;
61static UChar *bb_out_file=NULL;
62
63
64   /* output parameters */
65static Bool instr_count_only=False;
66static Bool generate_pc_file=False;
67
68   /* write buffer */
69static UChar buf[1024];
70
71   /* Global values */
72static OSet* instr_info_table;  /* table that holds the basic block info */
73static Int block_num=1;         /* global next block number */
74static Int current_thread=0;
75static Int allocated_threads=1;
76struct thread_info *bbv_thread=NULL;
77
78   /* Per-thread variables */
79struct thread_info {
80   ULong dyn_instr;         /* Current retired instruction count */
81   ULong total_instr;       /* Total retired instruction count   */
82   Addr last_rep_addr;      /* rep counting values */
83   ULong rep_count;
84   ULong global_rep_count;
85   ULong unique_rep_count;
86   ULong fldcw_count;       /* fldcw count */
87   Int bbtrace_fd;          /* file descriptor */
88};
89
90#define FUNCTION_NAME_LENGTH 20
91
92struct BB_info {
93   Addr       BB_addr;           /* used as key, must be first           */
94   Int        n_instrs;          /* instructions in the basic block      */
95   Int        block_num;         /* unique block identifier              */
96   Int        *inst_counter;     /* times entered * num_instructions     */
97   Bool       is_entry;          /* is this block a function entry point */
98   UChar      fn_name[FUNCTION_NAME_LENGTH];  /* Function block is in    */
99};
100
101
102   /* dump the optional PC file, which contains basic block number to */
103   /*   instruction address and function name mappings                */
104static void dumpPcFile(void)
105{
106   struct BB_info   *bb_elem;
107   Int              pctrace_fd;
108   SysRes           sres;
109
110   pc_out_file =
111          VG_(expand_file_name)("--pc-out-file", clo_pc_out_file);
112
113   sres = VG_(open)(pc_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
114                              VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
115   if (sr_isError(sres)) {
116      VG_(umsg)("Error: cannot create pc file %s\n", pc_out_file);
117      VG_(exit)(1);
118   } else {
119      pctrace_fd = sr_Res(sres);
120   }
121
122      /* Loop through the table, printing the number, address, */
123      /*    and function name for each basic block             */
124   VG_(OSetGen_ResetIter)(instr_info_table);
125   while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
126      VG_(write)(pctrace_fd,"F",1);
127      VG_(sprintf)( buf,":%d:%x:%s\n",
128                       bb_elem->block_num,
129                       (Int)bb_elem->BB_addr,
130                       bb_elem->fn_name);
131      VG_(write)(pctrace_fd, (void*)buf, VG_(strlen)(buf));
132   }
133
134   VG_(close)(pctrace_fd);
135}
136
137static Int open_tracefile(Int thread_num)
138{
139   SysRes  sres;
140   UChar temp_string[2048];
141
142      /* For thread 1, don't append any thread number  */
143      /* This lets the single-thread case not have any */
144      /* extra values appended to the file name.       */
145   if (thread_num==1) {
146      VG_(strncpy)(temp_string,bb_out_file,2047);
147   }
148   else {
149      VG_(sprintf)(temp_string,"%s.%d",bb_out_file,thread_num);
150   }
151
152   sres = VG_(open)(temp_string, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
153                              VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
154
155   if (sr_isError(sres)) {
156      VG_(umsg)("Error: cannot create bb file %s\n",temp_string);
157      VG_(exit)(1);
158   }
159
160   return sr_Res(sres);
161}
162
163static void handle_overflow(void)
164{
165   struct BB_info *bb_elem;
166
167   if (bbv_thread[current_thread].dyn_instr > interval_size) {
168
169      if (!instr_count_only) {
170
171            /* If our output fd hasn't been opened, open it */
172         if (bbv_thread[current_thread].bbtrace_fd < 0) {
173            bbv_thread[current_thread].bbtrace_fd=open_tracefile(current_thread);
174         }
175
176           /* put an entry to the bb.out file */
177
178         VG_(write)(bbv_thread[current_thread].bbtrace_fd,"T",1);
179
180         VG_(OSetGen_ResetIter)(instr_info_table);
181         while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
182            if ( bb_elem->inst_counter[current_thread] != 0 ) {
183               VG_(sprintf)( buf,":%d:%d   ",
184                         bb_elem->block_num,
185                         bb_elem->inst_counter[current_thread]);
186               VG_(write)(bbv_thread[current_thread].bbtrace_fd,
187                          (void*)buf, VG_(strlen)(buf));
188               bb_elem->inst_counter[current_thread] = 0;
189            }
190         }
191
192         VG_(write)(bbv_thread[current_thread].bbtrace_fd,"\n",1);
193      }
194
195      bbv_thread[current_thread].dyn_instr -= interval_size;
196   }
197}
198
199
200static void close_out_reps(void)
201{
202   bbv_thread[current_thread].global_rep_count+=bbv_thread[current_thread].rep_count;
203   bbv_thread[current_thread].unique_rep_count++;
204   bbv_thread[current_thread].rep_count=0;
205}
206
207   /* Generic function to get called each instruction */
208static VG_REGPARM(1) void per_instruction_BBV(struct BB_info *bbInfo)
209{
210   Int n_instrs=1;
211
212   tl_assert(bbInfo);
213
214      /* we finished rep but didn't clear out count */
215   if (bbv_thread[current_thread].rep_count) {
216      n_instrs++;
217      close_out_reps();
218   }
219
220   bbInfo->inst_counter[current_thread]+=n_instrs;
221
222   bbv_thread[current_thread].total_instr+=n_instrs;
223   bbv_thread[current_thread].dyn_instr +=n_instrs;
224
225   handle_overflow();
226}
227
228   /* Function to get called if instruction has a rep prefix */
229static VG_REGPARM(1) void per_instruction_BBV_rep(Addr addr)
230{
231      /* handle back-to-back rep instructions */
232   if (bbv_thread[current_thread].last_rep_addr!=addr) {
233      if (bbv_thread[current_thread].rep_count) {
234         close_out_reps();
235         bbv_thread[current_thread].total_instr++;
236         bbv_thread[current_thread].dyn_instr++;
237      }
238      bbv_thread[current_thread].last_rep_addr=addr;
239   }
240
241   bbv_thread[current_thread].rep_count++;
242
243}
244
245   /* Function to call if our instruction has a fldcw instruction */
246static VG_REGPARM(1) void per_instruction_BBV_fldcw(struct BB_info *bbInfo)
247{
248   Int n_instrs=1;
249
250   tl_assert(bbInfo);
251
252      /* we finished rep but didn't clear out count */
253   if (bbv_thread[current_thread].rep_count) {
254      n_instrs++;
255      close_out_reps();
256   }
257
258      /* count fldcw instructions */
259   bbv_thread[current_thread].fldcw_count++;
260
261   bbInfo->inst_counter[current_thread]+=n_instrs;
262
263   bbv_thread[current_thread].total_instr+=n_instrs;
264   bbv_thread[current_thread].dyn_instr +=n_instrs;
265
266   handle_overflow();
267}
268
269   /* Check if the instruction pointed to is one that needs */
270   /*   special handling.  If so, set a bit in the return   */
271   /*   value indicating what type.                         */
272static Int get_inst_type(Int len, Addr addr)
273{
274   int result=0;
275
276#if defined(VGA_x86) || defined(VGA_amd64)
277
278   unsigned char *inst_pointer;
279   unsigned char inst_byte;
280   int i,possible_rep;
281
282   /* rep prefixed instructions are counted as one instruction on */
283   /*     x86 processors and must be handled as a special case    */
284
285   /* Also, the rep prefix is re-used as part of the opcode for   */
286   /*     SSE instructions.  So we need to specifically check for */
287   /*     the following: movs, cmps, scas, lods, stos, ins, outs  */
288
289   inst_pointer=(unsigned char *)addr;
290   i=0;
291   inst_byte=0;
292   possible_rep=0;
293
294   while (i<len) {
295
296      inst_byte=*inst_pointer;
297
298      if ( (inst_byte == 0x67) ||            /* size override prefix */
299           (inst_byte == 0x66) ||            /* size override prefix */
300           (inst_byte == 0x48) ) {           /* 64-bit prefix */
301      } else if ( (inst_byte == 0xf2) ||     /* rep prefix    */
302                  (inst_byte == 0xf3) ) {    /* repne prefix  */
303         possible_rep=1;
304      } else {
305         break;                              /* other byte, exit */
306      }
307
308      i++;
309      inst_pointer++;
310   }
311
312   if ( possible_rep &&
313        ( ( (inst_byte >= 0xa4) &&     /* movs,cmps,scas */
314            (inst_byte <= 0xaf) ) ||   /* lods,stos      */
315          ( (inst_byte >= 0x6c) &&
316            (inst_byte <= 0x6f) ) ) ) {  /* ins,outs       */
317
318      result|=REP_INSTRUCTION;
319   }
320
321   /* fldcw instructions are double-counted by the hardware       */
322   /*     performance counters on pentium 4 processors so it is   */
323   /*     useful to have that count when doing validation work.   */
324
325   inst_pointer=(unsigned char *)addr;
326   if (len>1) {
327         /* FLDCW detection */
328         /* opcode is 0xd9/5, ie 1101 1001 oo10 1mmm */
329      if ((*inst_pointer==0xd9) &&
330          (*(inst_pointer+1)<0xb0) &&  /* need this case of fldz, etc, count */
331          ( (*(inst_pointer+1) & 0x38) == 0x28)) {
332         result|=FLDCW_INSTRUCTION;
333      }
334   }
335
336#endif
337   return result;
338}
339
340
341
342   /* Our instrumentation function       */
343   /*    sbIn = super block to translate */
344   /*    layout = guest layout           */
345   /*    gWordTy = size of guest word    */
346   /*    hWordTy = size of host word     */
347static IRSB* bbv_instrument ( VgCallbackClosure* closure,
348                             IRSB* sbIn, VexGuestLayout* layout,
349                             VexGuestExtents* vge,
350                             IRType gWordTy, IRType hWordTy )
351{
352   Int      i,n_instrs=1;
353   IRSB     *sbOut;
354   IRStmt   *st;
355   struct BB_info  *bbInfo;
356   Addr64   origAddr,ourAddr;
357   IRDirty  *di;
358   IRExpr   **argv, *arg1;
359   Int      regparms,opcode_type;
360
361      /* We don't handle a host/guest word size mismatch */
362   if (gWordTy != hWordTy) {
363      VG_(tool_panic)("host/guest word size mismatch");
364   }
365
366      /* Set up SB */
367   sbOut = deepCopyIRSBExceptStmts(sbIn);
368
369      /* Copy verbatim any IR preamble preceding the first IMark */
370   i = 0;
371   while ( (i < sbIn->stmts_used) && (sbIn->stmts[i]->tag!=Ist_IMark)) {
372      addStmtToIRSB( sbOut, sbIn->stmts[i] );
373      i++;
374   }
375
376      /* Get the first statement */
377   tl_assert(sbIn->stmts_used > 0);
378   st = sbIn->stmts[i];
379
380      /* double check we are at a Mark statement */
381   tl_assert(Ist_IMark == st->tag);
382
383   origAddr=st->Ist.IMark.addr;
384
385      /* Get the BB_info */
386   bbInfo = VG_(OSetGen_Lookup)(instr_info_table, &origAddr);
387
388   if (bbInfo==NULL) {
389
390         /* BB never translated before (at this address, at least;          */
391         /* could have been unloaded and then reloaded elsewhere in memory) */
392
393         /* allocate and initialize a new basic block structure */
394      bbInfo=VG_(OSetGen_AllocNode)(instr_info_table, sizeof(struct BB_info));
395      bbInfo->BB_addr = origAddr;
396      bbInfo->n_instrs = n_instrs;
397      bbInfo->inst_counter=VG_(calloc)("bbv_instrument",
398                                       allocated_threads,
399                                       sizeof(Int));
400
401         /* assign a unique block number */
402      bbInfo->block_num=block_num;
403      block_num++;
404         /* get function name and entry point information */
405      VG_(get_fnname)(origAddr,bbInfo->fn_name,FUNCTION_NAME_LENGTH);
406      bbInfo->is_entry=VG_(get_fnname_if_entry)(origAddr, bbInfo->fn_name,
407                                                FUNCTION_NAME_LENGTH);
408         /* insert structure into table */
409      VG_(OSetGen_Insert)( instr_info_table, bbInfo );
410   }
411
412      /* Iterate through the basic block, putting the original   */
413      /* instructions in place, plus putting a call to updateBBV */
414      /* for each original instruction                           */
415
416      /* This is less efficient than only instrumenting the BB   */
417      /* But it gives proper results given the fact that         */
418      /* valgrind uses superblocks (not basic blocks) by default */
419
420
421   while(i < sbIn->stmts_used) {
422      st=sbIn->stmts[i];
423
424      if (st->tag == Ist_IMark) {
425
426         ourAddr = st->Ist.IMark.addr;
427
428         opcode_type=get_inst_type(st->Ist.IMark.len,ourAddr);
429
430         regparms=1;
431         arg1= mkIRExpr_HWord( (HWord)bbInfo);
432         argv= mkIRExprVec_1(arg1);
433
434
435         if (opcode_type&REP_INSTRUCTION) {
436            arg1= mkIRExpr_HWord(ourAddr);
437            argv= mkIRExprVec_1(arg1);
438            di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_rep",
439                                VG_(fnptr_to_fnentry)( &per_instruction_BBV_rep ),
440                                argv);
441         }
442         else if (opcode_type&FLDCW_INSTRUCTION) {
443            di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_fldcw",
444                                VG_(fnptr_to_fnentry)( &per_instruction_BBV_fldcw ),
445                                argv);
446         }
447         else {
448         di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV",
449                                VG_(fnptr_to_fnentry)( &per_instruction_BBV ),
450                                argv);
451         }
452
453
454            /* Insert our call */
455         addStmtToIRSB( sbOut,  IRStmt_Dirty(di));
456      }
457
458         /* Insert the original instruction */
459      addStmtToIRSB( sbOut, st );
460
461      i++;
462   }
463
464   return sbOut;
465}
466
467static struct thread_info *allocate_new_thread(struct thread_info *old,
468                                     Int old_number, Int new_number)
469{
470   struct thread_info *temp;
471   struct BB_info   *bb_elem;
472   Int i;
473
474   temp=VG_(realloc)("bbv_main.c allocate_threads",
475                     old,
476                     new_number*sizeof(struct thread_info));
477
478      /* init the new thread */
479      /* We loop in case the new thread is not contiguous */
480   for(i=old_number;i<new_number;i++) {
481      temp[i].last_rep_addr=0;
482      temp[i].dyn_instr=0;
483      temp[i].total_instr=0;
484      temp[i].global_rep_count=0;
485      temp[i].unique_rep_count=0;
486      temp[i].rep_count=0;
487      temp[i].fldcw_count=0;
488      temp[i].bbtrace_fd=-1;
489   }
490      /* expand the inst_counter on all allocated basic blocks */
491   VG_(OSetGen_ResetIter)(instr_info_table);
492   while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
493      bb_elem->inst_counter =
494                    VG_(realloc)("bbv_main.c inst_counter",
495                                 bb_elem->inst_counter,
496                                 new_number*sizeof(Int));
497      for(i=old_number;i<new_number;i++) {
498         bb_elem->inst_counter[i]=0;
499      }
500   }
501
502   return temp;
503}
504
505static void bbv_thread_called ( ThreadId tid, ULong nDisp )
506{
507   if (tid >= allocated_threads) {
508      bbv_thread=allocate_new_thread(bbv_thread,allocated_threads,tid+1);
509      allocated_threads=tid+1;
510   }
511   current_thread=tid;
512}
513
514
515
516
517/*--------------------------------------------------------------------*/
518/*--- Setup                                                        ---*/
519/*--------------------------------------------------------------------*/
520
521static void bbv_post_clo_init(void)
522{
523   bb_out_file =
524          VG_(expand_file_name)("--bb-out-file", clo_bb_out_file);
525
526      /* Try a closer approximation of basic blocks  */
527      /* This is the same as the command line option */
528      /* --vex-guest-chase-thresh=0                  */
529   VG_(clo_vex_control).guest_chase_thresh = 0;
530}
531
532   /* Parse the command line options */
533static Bool bbv_process_cmd_line_option(Char* arg)
534{
535   if VG_INT_CLO       (arg, "--interval-size",    interval_size) {}
536   else if VG_STR_CLO  (arg, "--bb-out-file",      clo_bb_out_file) {}
537   else if VG_STR_CLO  (arg, "--pc-out-file",      clo_pc_out_file) {
538      generate_pc_file = True;
539   }
540   else if VG_BOOL_CLO (arg, "--instr-count-only", instr_count_only) {}
541   else {
542      return False;
543   }
544
545   return True;
546}
547
548static void bbv_print_usage(void)
549{
550   VG_(printf)(
551"   --bb-out-file=<file>       filename for BBV info\n"
552"   --pc-out-file=<file>       filename for BB addresses and function names\n"
553"   --interval-size=<num>      interval size\n"
554"   --instr-count-only=yes|no  only print total instruction count\n"
555   );
556}
557
558static void bbv_print_debug_usage(void)
559{
560   VG_(printf)("    (none)\n");
561}
562
563static void bbv_fini(Int exitcode)
564{
565   Int i;
566
567   if (generate_pc_file) {
568      dumpPcFile();
569   }
570
571   for(i=0;i<allocated_threads;i++) {
572
573      if (bbv_thread[i].total_instr!=0) {
574
575         VG_(sprintf)(buf,"\n\n"
576                          "# Thread %d\n"
577                          "#   Total intervals: %d (Interval Size %d)\n"
578                          "#   Total instructions: %lld\n"
579                          "#   Total reps: %lld\n"
580                          "#   Unique reps: %lld\n"
581                          "#   Total fldcw instructions: %lld\n\n",
582                i,
583                (Int)(bbv_thread[i].total_instr/(ULong)interval_size),
584                interval_size,
585                bbv_thread[i].total_instr,
586                bbv_thread[i].global_rep_count,
587                bbv_thread[i].unique_rep_count,
588                bbv_thread[i].fldcw_count);
589
590            /* Print results to display */
591         VG_(umsg)("%s\n", buf);
592
593            /* open the output file if it hasn't already */
594         if (bbv_thread[i].bbtrace_fd < 0) {
595            bbv_thread[i].bbtrace_fd=open_tracefile(i);
596         }
597            /* Also print to results file */
598         VG_(write)(bbv_thread[i].bbtrace_fd,(void*)buf,VG_(strlen)(buf));
599         VG_(close)(bbv_thread[i].bbtrace_fd);
600      }
601   }
602}
603
604static void bbv_pre_clo_init(void)
605{
606   VG_(details_name)            ("exp-bbv");
607   VG_(details_version)         (NULL);
608   VG_(details_description)     ("a SimPoint basic block vector generator");
609   VG_(details_copyright_author)(
610      "Copyright (C) 2006-2011 Vince Weaver");
611   VG_(details_bug_reports_to)  (VG_BUGS_TO);
612
613   VG_(basic_tool_funcs)          (bbv_post_clo_init,
614                                   bbv_instrument,
615                                   bbv_fini);
616
617   VG_(needs_command_line_options)(bbv_process_cmd_line_option,
618                                   bbv_print_usage,
619                                   bbv_print_debug_usage);
620
621   VG_(track_start_client_code)( bbv_thread_called );
622
623
624   instr_info_table = VG_(OSetGen_Create)(/*keyOff*/0,
625                                          NULL,
626                                          VG_(malloc), "bbv.1", VG_(free));
627
628   bbv_thread=allocate_new_thread(bbv_thread,0,allocated_threads);
629}
630
631VG_DETERMINE_INTERFACE_VERSION(bbv_pre_clo_init)
632
633/*--------------------------------------------------------------------*/
634/*--- end                                                          ---*/
635/*--------------------------------------------------------------------*/
636