1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3//--------------------------------------------------------------------*/
4//--- BBV: a SimPoint basic block vector generator      bbv_main.c ---*/
5//--------------------------------------------------------------------*/
6
7/*
8   This file is part of BBV, a Valgrind tool for generating SimPoint
9   basic block vectors.
10
11   Copyright (C) 2006-2013 Vince Weaver
12      vince _at_ csl.cornell.edu
13
14   pcfile code is Copyright (C) 2006-2013 Oriol Prat
15      oriol.prat _at _ bsc.es
16
17   This program is free software; you can redistribute it and/or
18   modify it under the terms of the GNU General Public License as
19   published by the Free Software Foundation; either version 2 of the
20   License, or (at your option) any later version.
21
22   This program is distributed in the hope that it will be useful, but
23   WITHOUT ANY WARRANTY; without even the implied warranty of
24   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25   General Public License for more details.
26
27   You should have received a copy of the GNU General Public License
28   along with this program; if not, write to the Free Software
29   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
30   02111-1307, USA.
31
32   The GNU General Public License is contained in the file COPYING.
33*/
34
35
36#include "pub_tool_basics.h"
37#include "pub_tool_tooliface.h"
38#include "pub_tool_options.h"    /* command line options */
39
40#include "pub_tool_vki.h"        /* VKI_O_CREAT */
41#include "pub_tool_libcbase.h"   /* VG_(strlen) */
42#include "pub_tool_libcprint.h"  /* VG_(printf) */
43#include "pub_tool_libcassert.h" /* VG_(exit) */
44#include "pub_tool_mallocfree.h" /* VG_(malloc) */
45#include "pub_tool_machine.h"    /* VG_(fnptr_to_fnentry) */
46#include "pub_tool_debuginfo.h"  /* VG_(get_fnname) */
47
48#include "pub_tool_oset.h"       /* ordered set stuff */
49
50   /* instruction special cases */
51#define REP_INSTRUCTION   0x1
52#define FLDCW_INSTRUCTION 0x2
53
54   /* interval variables */
55#define DEFAULT_GRAIN_SIZE 100000000  /* 100 million by default */
56static Int interval_size=DEFAULT_GRAIN_SIZE;
57
58   /* filenames */
59static const HChar *clo_bb_out_file="bb.out.%p";
60static const HChar *clo_pc_out_file="pc.out.%p";
61static HChar *pc_out_file=NULL;
62static HChar *bb_out_file=NULL;
63
64
65   /* output parameters */
66static Bool instr_count_only=False;
67static Bool generate_pc_file=False;
68
69   /* Global values */
70static OSet* instr_info_table;  /* table that holds the basic block info */
71static Int block_num=1;         /* global next block number */
72static Int current_thread=0;
73static Int allocated_threads=1;
74struct thread_info *bbv_thread=NULL;
75
76   /* Per-thread variables */
77struct thread_info {
78   ULong dyn_instr;         /* Current retired instruction count */
79   ULong total_instr;       /* Total retired instruction count   */
80   Addr last_rep_addr;      /* rep counting values */
81   ULong rep_count;
82   ULong global_rep_count;
83   ULong unique_rep_count;
84   ULong fldcw_count;       /* fldcw count */
85   VgFile *bbtrace_fp;      /* file pointer */
86};
87
88struct BB_info {
89   Addr       BB_addr;           /* used as key, must be first           */
90   Int        n_instrs;          /* instructions in the basic block      */
91   Int        block_num;         /* unique block identifier              */
92   Int        *inst_counter;     /* times entered * num_instructions     */
93   Bool       is_entry;          /* is this block a function entry point */
94   const HChar *fn_name;         /* Function block is in                 */
95};
96
97
98   /* dump the optional PC file, which contains basic block number to */
99   /*   instruction address and function name mappings                */
100static void dumpPcFile(void)
101{
102   struct BB_info   *bb_elem;
103   VgFile *fp;
104
105   pc_out_file =
106          VG_(expand_file_name)("--pc-out-file", clo_pc_out_file);
107
108   fp = VG_(fopen)(pc_out_file, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
109                   VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
110   if (fp == NULL) {
111      VG_(umsg)("Error: cannot create pc file %s\n", pc_out_file);
112      VG_(exit)(1);
113   }
114
115      /* Loop through the table, printing the number, address, */
116      /*    and function name for each basic block             */
117   VG_(OSetGen_ResetIter)(instr_info_table);
118   while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
119      VG_(fprintf)( fp, "F:%d:%x:%s\n", bb_elem->block_num,
120                    (Int)bb_elem->BB_addr, bb_elem->fn_name);
121   }
122
123   VG_(fclose)(fp);
124}
125
126static VgFile *open_tracefile(Int thread_num)
127{
128   VgFile *fp;
129   // Allocate a buffer large enough for the general case "%s.%d" below
130   HChar temp_string[VG_(strlen)(bb_out_file) + 1 + 10 + 1];
131
132      /* For thread 1, don't append any thread number  */
133      /* This lets the single-thread case not have any */
134      /* extra values appended to the file name.       */
135   if (thread_num==1) {
136      VG_(strcpy)(temp_string, bb_out_file);
137   }
138   else {
139      VG_(sprintf)(temp_string,"%s.%d",bb_out_file,thread_num);
140   }
141
142   fp = VG_(fopen)(temp_string, VKI_O_CREAT|VKI_O_TRUNC|VKI_O_WRONLY,
143                   VKI_S_IRUSR|VKI_S_IWUSR|VKI_S_IRGRP|VKI_S_IWGRP);
144
145   if (fp == NULL) {
146      VG_(umsg)("Error: cannot create bb file %s\n",temp_string);
147      VG_(exit)(1);
148   }
149
150   return fp;
151}
152
153static void handle_overflow(void)
154{
155   struct BB_info *bb_elem;
156
157   if (bbv_thread[current_thread].dyn_instr > interval_size) {
158
159      if (!instr_count_only) {
160
161            /* If our output file hasn't been opened, open it */
162         if (bbv_thread[current_thread].bbtrace_fp == NULL) {
163            bbv_thread[current_thread].bbtrace_fp=open_tracefile(current_thread);
164         }
165
166           /* put an entry to the bb.out file */
167
168         VG_(fprintf)(bbv_thread[current_thread].bbtrace_fp, "T");
169
170         VG_(OSetGen_ResetIter)(instr_info_table);
171         while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
172            if ( bb_elem->inst_counter[current_thread] != 0 ) {
173               VG_(fprintf)(bbv_thread[current_thread].bbtrace_fp, ":%d:%d   ",
174                            bb_elem->block_num,
175                            bb_elem->inst_counter[current_thread]);
176               bb_elem->inst_counter[current_thread] = 0;
177            }
178         }
179
180         VG_(fprintf)(bbv_thread[current_thread].bbtrace_fp, "\n");
181      }
182
183      bbv_thread[current_thread].dyn_instr -= interval_size;
184   }
185}
186
187
188static void close_out_reps(void)
189{
190   bbv_thread[current_thread].global_rep_count+=bbv_thread[current_thread].rep_count;
191   bbv_thread[current_thread].unique_rep_count++;
192   bbv_thread[current_thread].rep_count=0;
193}
194
195   /* Generic function to get called each instruction */
196static VG_REGPARM(1) void per_instruction_BBV(struct BB_info *bbInfo)
197{
198   Int n_instrs=1;
199
200   tl_assert(bbInfo);
201
202      /* we finished rep but didn't clear out count */
203   if (bbv_thread[current_thread].rep_count) {
204      n_instrs++;
205      close_out_reps();
206   }
207
208   bbInfo->inst_counter[current_thread]+=n_instrs;
209
210   bbv_thread[current_thread].total_instr+=n_instrs;
211   bbv_thread[current_thread].dyn_instr +=n_instrs;
212
213   handle_overflow();
214}
215
216   /* Function to get called if instruction has a rep prefix */
217static VG_REGPARM(1) void per_instruction_BBV_rep(Addr addr)
218{
219      /* handle back-to-back rep instructions */
220   if (bbv_thread[current_thread].last_rep_addr!=addr) {
221      if (bbv_thread[current_thread].rep_count) {
222         close_out_reps();
223         bbv_thread[current_thread].total_instr++;
224         bbv_thread[current_thread].dyn_instr++;
225      }
226      bbv_thread[current_thread].last_rep_addr=addr;
227   }
228
229   bbv_thread[current_thread].rep_count++;
230
231}
232
233   /* Function to call if our instruction has a fldcw instruction */
234static VG_REGPARM(1) void per_instruction_BBV_fldcw(struct BB_info *bbInfo)
235{
236   Int n_instrs=1;
237
238   tl_assert(bbInfo);
239
240      /* we finished rep but didn't clear out count */
241   if (bbv_thread[current_thread].rep_count) {
242      n_instrs++;
243      close_out_reps();
244   }
245
246      /* count fldcw instructions */
247   bbv_thread[current_thread].fldcw_count++;
248
249   bbInfo->inst_counter[current_thread]+=n_instrs;
250
251   bbv_thread[current_thread].total_instr+=n_instrs;
252   bbv_thread[current_thread].dyn_instr +=n_instrs;
253
254   handle_overflow();
255}
256
257   /* Check if the instruction pointed to is one that needs */
258   /*   special handling.  If so, set a bit in the return   */
259   /*   value indicating what type.                         */
260static Int get_inst_type(UInt len, Addr addr)
261{
262   int result=0;
263
264#if defined(VGA_x86) || defined(VGA_amd64)
265
266   UChar *inst_pointer;
267   UChar  inst_byte;
268   int i,possible_rep;
269
270   /* rep prefixed instructions are counted as one instruction on */
271   /*     x86 processors and must be handled as a special case    */
272
273   /* Also, the rep prefix is re-used as part of the opcode for   */
274   /*     SSE instructions.  So we need to specifically check for */
275   /*     the following: movs, cmps, scas, lods, stos, ins, outs  */
276
277   inst_pointer=(UChar *)addr;
278   i=0;
279   inst_byte=0;
280   possible_rep=0;
281
282   while (i<len) {
283
284      inst_byte=*inst_pointer;
285
286      if ( (inst_byte == 0x67) ||            /* size override prefix */
287           (inst_byte == 0x66) ||            /* size override prefix */
288           (inst_byte == 0x48) ) {           /* 64-bit prefix */
289      } else if ( (inst_byte == 0xf2) ||     /* rep prefix    */
290                  (inst_byte == 0xf3) ) {    /* repne prefix  */
291         possible_rep=1;
292      } else {
293         break;                              /* other byte, exit */
294      }
295
296      i++;
297      inst_pointer++;
298   }
299
300   if ( possible_rep &&
301        ( ( (inst_byte >= 0xa4) &&     /* movs,cmps,scas */
302            (inst_byte <= 0xaf) ) ||   /* lods,stos      */
303          ( (inst_byte >= 0x6c) &&
304            (inst_byte <= 0x6f) ) ) ) {  /* ins,outs       */
305
306      result|=REP_INSTRUCTION;
307   }
308
309   /* fldcw instructions are double-counted by the hardware       */
310   /*     performance counters on pentium 4 processors so it is   */
311   /*     useful to have that count when doing validation work.   */
312
313   inst_pointer=(UChar *)addr;
314   if (len>1) {
315         /* FLDCW detection */
316         /* opcode is 0xd9/5, ie 1101 1001 oo10 1mmm */
317      if ((*inst_pointer==0xd9) &&
318          (*(inst_pointer+1)<0xb0) &&  /* need this case of fldz, etc, count */
319          ( (*(inst_pointer+1) & 0x38) == 0x28)) {
320         result|=FLDCW_INSTRUCTION;
321      }
322   }
323
324#endif
325   return result;
326}
327
328
329
330   /* Our instrumentation function       */
331   /*    sbIn = super block to translate */
332   /*    layout = guest layout           */
333   /*    gWordTy = size of guest word    */
334   /*    hWordTy = size of host word     */
335static IRSB* bbv_instrument ( VgCallbackClosure* closure,
336                              IRSB* sbIn, const VexGuestLayout* layout,
337                              const VexGuestExtents* vge,
338                              const VexArchInfo* archinfo_host,
339                              IRType gWordTy, IRType hWordTy )
340{
341   Int      i,n_instrs=1;
342   IRSB     *sbOut;
343   IRStmt   *st;
344   struct BB_info  *bbInfo;
345   Addr     origAddr,ourAddr;
346   IRDirty  *di;
347   IRExpr   **argv, *arg1;
348   Int      regparms,opcode_type;
349
350      /* We don't handle a host/guest word size mismatch */
351   if (gWordTy != hWordTy) {
352      VG_(tool_panic)("host/guest word size mismatch");
353   }
354
355      /* Set up SB */
356   sbOut = deepCopyIRSBExceptStmts(sbIn);
357
358      /* Copy verbatim any IR preamble preceding the first IMark */
359   i = 0;
360   while ( (i < sbIn->stmts_used) && (sbIn->stmts[i]->tag!=Ist_IMark)) {
361      addStmtToIRSB( sbOut, sbIn->stmts[i] );
362      i++;
363   }
364
365      /* Get the first statement */
366   tl_assert(sbIn->stmts_used > 0);
367   st = sbIn->stmts[i];
368
369      /* double check we are at a Mark statement */
370   tl_assert(Ist_IMark == st->tag);
371
372   origAddr=st->Ist.IMark.addr;
373
374      /* Get the BB_info */
375   bbInfo = VG_(OSetGen_Lookup)(instr_info_table, &origAddr);
376
377   if (bbInfo==NULL) {
378
379         /* BB never translated before (at this address, at least;          */
380         /* could have been unloaded and then reloaded elsewhere in memory) */
381
382         /* allocate and initialize a new basic block structure */
383      bbInfo=VG_(OSetGen_AllocNode)(instr_info_table, sizeof(struct BB_info));
384      bbInfo->BB_addr = origAddr;
385      bbInfo->n_instrs = n_instrs;
386      bbInfo->inst_counter=VG_(calloc)("bbv_instrument",
387                                       allocated_threads,
388                                       sizeof(Int));
389
390         /* assign a unique block number */
391      bbInfo->block_num=block_num;
392      block_num++;
393         /* get function name and entry point information */
394      const HChar *fn_name;
395      VG_(get_fnname)(origAddr, &fn_name);
396      bbInfo->is_entry=VG_(get_fnname_if_entry)(origAddr, &fn_name);
397      bbInfo->fn_name =VG_(strdup)("bbv_strings", fn_name);
398         /* insert structure into table */
399      VG_(OSetGen_Insert)( instr_info_table, bbInfo );
400   }
401
402      /* Iterate through the basic block, putting the original   */
403      /* instructions in place, plus putting a call to updateBBV */
404      /* for each original instruction                           */
405
406      /* This is less efficient than only instrumenting the BB   */
407      /* But it gives proper results given the fact that         */
408      /* valgrind uses superblocks (not basic blocks) by default */
409
410
411   while(i < sbIn->stmts_used) {
412      st=sbIn->stmts[i];
413
414      if (st->tag == Ist_IMark) {
415
416         ourAddr = st->Ist.IMark.addr;
417
418         opcode_type=get_inst_type(st->Ist.IMark.len,ourAddr);
419
420         regparms=1;
421         arg1= mkIRExpr_HWord( (HWord)bbInfo);
422         argv= mkIRExprVec_1(arg1);
423
424
425         if (opcode_type&REP_INSTRUCTION) {
426            arg1= mkIRExpr_HWord(ourAddr);
427            argv= mkIRExprVec_1(arg1);
428            di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_rep",
429                                VG_(fnptr_to_fnentry)( &per_instruction_BBV_rep ),
430                                argv);
431         }
432         else if (opcode_type&FLDCW_INSTRUCTION) {
433            di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV_fldcw",
434                                VG_(fnptr_to_fnentry)( &per_instruction_BBV_fldcw ),
435                                argv);
436         }
437         else {
438         di= unsafeIRDirty_0_N( regparms, "per_instruction_BBV",
439                                VG_(fnptr_to_fnentry)( &per_instruction_BBV ),
440                                argv);
441         }
442
443
444            /* Insert our call */
445         addStmtToIRSB( sbOut,  IRStmt_Dirty(di));
446      }
447
448         /* Insert the original instruction */
449      addStmtToIRSB( sbOut, st );
450
451      i++;
452   }
453
454   return sbOut;
455}
456
457static struct thread_info *allocate_new_thread(struct thread_info *old,
458                                     Int old_number, Int new_number)
459{
460   struct thread_info *temp;
461   struct BB_info   *bb_elem;
462   Int i;
463
464   temp=VG_(realloc)("bbv_main.c allocate_threads",
465                     old,
466                     new_number*sizeof(struct thread_info));
467
468      /* init the new thread */
469      /* We loop in case the new thread is not contiguous */
470   for(i=old_number;i<new_number;i++) {
471      temp[i].last_rep_addr=0;
472      temp[i].dyn_instr=0;
473      temp[i].total_instr=0;
474      temp[i].global_rep_count=0;
475      temp[i].unique_rep_count=0;
476      temp[i].rep_count=0;
477      temp[i].fldcw_count=0;
478      temp[i].bbtrace_fp=NULL;
479   }
480      /* expand the inst_counter on all allocated basic blocks */
481   VG_(OSetGen_ResetIter)(instr_info_table);
482   while ( (bb_elem = VG_(OSetGen_Next)(instr_info_table)) ) {
483      bb_elem->inst_counter =
484                    VG_(realloc)("bbv_main.c inst_counter",
485                                 bb_elem->inst_counter,
486                                 new_number*sizeof(Int));
487      for(i=old_number;i<new_number;i++) {
488         bb_elem->inst_counter[i]=0;
489      }
490   }
491
492   return temp;
493}
494
495static void bbv_thread_called ( ThreadId tid, ULong nDisp )
496{
497   if (tid >= allocated_threads) {
498      bbv_thread=allocate_new_thread(bbv_thread,allocated_threads,tid+1);
499      allocated_threads=tid+1;
500   }
501   current_thread=tid;
502}
503
504
505
506
507/*--------------------------------------------------------------------*/
508/*--- Setup                                                        ---*/
509/*--------------------------------------------------------------------*/
510
511static void bbv_post_clo_init(void)
512{
513   bb_out_file =
514          VG_(expand_file_name)("--bb-out-file", clo_bb_out_file);
515
516      /* Try a closer approximation of basic blocks  */
517      /* This is the same as the command line option */
518      /* --vex-guest-chase-thresh=0                  */
519   VG_(clo_vex_control).guest_chase_thresh = 0;
520}
521
522   /* Parse the command line options */
523static Bool bbv_process_cmd_line_option(const HChar* arg)
524{
525   if VG_INT_CLO       (arg, "--interval-size",    interval_size) {}
526   else if VG_STR_CLO  (arg, "--bb-out-file",      clo_bb_out_file) {}
527   else if VG_STR_CLO  (arg, "--pc-out-file",      clo_pc_out_file) {
528      generate_pc_file = True;
529   }
530   else if VG_BOOL_CLO (arg, "--instr-count-only", instr_count_only) {}
531   else {
532      return False;
533   }
534
535   return True;
536}
537
538static void bbv_print_usage(void)
539{
540   VG_(printf)(
541"   --bb-out-file=<file>       filename for BBV info\n"
542"   --pc-out-file=<file>       filename for BB addresses and function names\n"
543"   --interval-size=<num>      interval size\n"
544"   --instr-count-only=yes|no  only print total instruction count\n"
545   );
546}
547
548static void bbv_print_debug_usage(void)
549{
550   VG_(printf)("    (none)\n");
551}
552
553static void bbv_fini(Int exitcode)
554{
555   Int i;
556
557   if (generate_pc_file) {
558      dumpPcFile();
559   }
560
561   for(i=0;i<allocated_threads;i++) {
562
563      if (bbv_thread[i].total_instr!=0) {
564         HChar buf[500];  // large enough
565         VG_(sprintf)(buf,"\n\n"
566                          "# Thread %d\n"
567                          "#   Total intervals: %d (Interval Size %d)\n"
568                          "#   Total instructions: %lld\n"
569                          "#   Total reps: %lld\n"
570                          "#   Unique reps: %lld\n"
571                          "#   Total fldcw instructions: %lld\n\n",
572                i,
573                (Int)(bbv_thread[i].total_instr/(ULong)interval_size),
574                interval_size,
575                bbv_thread[i].total_instr,
576                bbv_thread[i].global_rep_count,
577                bbv_thread[i].unique_rep_count,
578                bbv_thread[i].fldcw_count);
579
580            /* Print results to display */
581         VG_(umsg)("%s\n", buf);
582
583            /* open the output file if it hasn't already */
584         if (bbv_thread[i].bbtrace_fp == NULL) {
585            bbv_thread[i].bbtrace_fp=open_tracefile(i);
586         }
587            /* Also print to results file */
588         VG_(fprintf)(bbv_thread[i].bbtrace_fp, "%s", buf);
589         VG_(fclose)(bbv_thread[i].bbtrace_fp);
590      }
591   }
592}
593
594static void bbv_pre_clo_init(void)
595{
596   VG_(details_name)            ("exp-bbv");
597   VG_(details_version)         (NULL);
598   VG_(details_description)     ("a SimPoint basic block vector generator");
599   VG_(details_copyright_author)(
600      "Copyright (C) 2006-2013 Vince Weaver");
601   VG_(details_bug_reports_to)  (VG_BUGS_TO);
602
603   VG_(basic_tool_funcs)          (bbv_post_clo_init,
604                                   bbv_instrument,
605                                   bbv_fini);
606
607   VG_(needs_command_line_options)(bbv_process_cmd_line_option,
608                                   bbv_print_usage,
609                                   bbv_print_debug_usage);
610
611   VG_(track_start_client_code)( bbv_thread_called );
612
613
614   instr_info_table = VG_(OSetGen_Create)(/*keyOff*/0,
615                                          NULL,
616                                          VG_(malloc), "bbv.1", VG_(free));
617
618   bbv_thread=allocate_new_thread(bbv_thread,0,allocated_threads);
619}
620
621VG_DETERMINE_INTERFACE_VERSION(bbv_pre_clo_init)
622
623/*--------------------------------------------------------------------*/
624/*--- end                                                          ---*/
625/*--------------------------------------------------------------------*/
626