1/*--------------------------------------------------------------------*/
2/*--- Callgrind                                                    ---*/
3/*---                                                       dump.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Callgrind, a Valgrind tool for call tracing.
8
9   Copyright (C) 2002-2017, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
10
11   This program is free software; you can redistribute it and/or
12   modify it under the terms of the GNU General Public License as
13   published by the Free Software Foundation; either version 2 of the
14   License, or (at your option) any later version.
15
16   This program is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19   General Public License for more details.
20
21   You should have received a copy of the GNU General Public License
22   along with this program; if not, write to the Free Software
23   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24   02111-1307, USA.
25
26   The GNU General Public License is contained in the file COPYING.
27*/
28
29#include "config.h"
30#include "global.h"
31
32#include "pub_tool_threadstate.h"
33#include "pub_tool_libcfile.h"
34
35
36/* Dump Part Counter */
37static Int out_counter = 0;
38
39static HChar* out_file = 0;
40static Bool dumps_initialized = False;
41
42/* Command */
43static HChar *cmdbuf;
44
45/* Total reads/writes/misses sum over all dumps and threads.
46 * Updated during CC traversal at dump time.
47 */
48FullCost CLG_(total_cost) = 0;
49static FullCost dump_total_cost = 0;
50
51EventMapping* CLG_(dumpmap) = 0;
52
53Int CLG_(get_dump_counter)(void)
54{
55  return out_counter;
56}
57
58/*------------------------------------------------------------*/
59/*--- Output file related stuff                            ---*/
60/*------------------------------------------------------------*/
61
62/* Boolean dumping array */
63static Bool* dump_array = 0;
64static Int   dump_array_size = 0;
65static Bool* obj_dumped = 0;
66static Bool* file_dumped = 0;
67static Bool* fn_dumped = 0;
68static Bool* cxt_dumped = 0;
69
70static
71void reset_dump_array(void)
72{
73    int i;
74
75    CLG_ASSERT(dump_array != 0);
76
77    for(i=0;i<dump_array_size;i++)
78	dump_array[i] = False;
79}
80
81static
82void init_dump_array(void)
83{
84    dump_array_size = CLG_(stat).distinct_objs +
85      CLG_(stat).distinct_files +
86      CLG_(stat).distinct_fns +
87      CLG_(stat).context_counter;
88    CLG_ASSERT(dump_array == 0);
89    dump_array = (Bool*) CLG_MALLOC("cl.dump.ida.1",
90                                    dump_array_size * sizeof(Bool));
91    obj_dumped  = dump_array;
92    file_dumped = obj_dumped + CLG_(stat).distinct_objs;
93    fn_dumped   = file_dumped + CLG_(stat).distinct_files;
94    cxt_dumped  = fn_dumped + CLG_(stat).distinct_fns;
95
96    reset_dump_array();
97
98    CLG_DEBUG(1, "  init_dump_array: size %d\n", dump_array_size);
99}
100
101static __inline__
102void free_dump_array(void)
103{
104    CLG_ASSERT(dump_array != 0);
105    VG_(free)(dump_array);
106
107    dump_array = 0;
108    obj_dumped = 0;
109    file_dumped = 0;
110    fn_dumped = 0;
111    cxt_dumped = 0;
112}
113
114
115/* Initialize to an invalid position */
116static __inline__
117void init_fpos(FnPos* p)
118 {
119    p->file = 0;
120    p->fn = 0;
121    p->obj = 0;
122    p->cxt = 0;
123    p->rec_index = 0;
124}
125
126
127static void print_obj(VgFile *fp, const HChar* prefix, obj_node* obj)
128{
129    if (CLG_(clo).compress_strings) {
130	CLG_ASSERT(obj_dumped != 0);
131	if (obj_dumped[obj->number])
132            VG_(fprintf)(fp, "%s(%u)\n", prefix, obj->number);
133	else {
134            VG_(fprintf)(fp, "%s(%u) %s\n", prefix, obj->number, obj->name);
135	}
136    }
137    else
138        VG_(fprintf)(fp, "%s%s\n", prefix, obj->name);
139
140#if 0
141    /* add mapping parameters the first time a object is dumped
142     * format: mp=0xSTART SIZE 0xOFFSET */
143    if (!obj_dumped[obj->number]) {
144	obj_dumped[obj->number];
145	VG_(fprintf)(fp, "mp=%p %p %p\n",
146		     pos->obj->start, pos->obj->size, pos->obj->offset);
147    }
148#else
149    obj_dumped[obj->number] = True;
150#endif
151}
152
153static void print_file(VgFile *fp, const char *prefix, const file_node* file)
154{
155    if (CLG_(clo).compress_strings) {
156	CLG_ASSERT(file_dumped != 0);
157	if (file_dumped[file->number])
158            VG_(fprintf)(fp, "%s(%u)\n", prefix, file->number);
159	else {
160            VG_(fprintf)(fp, "%s(%u) %s\n", prefix, file->number, file->name);
161	    file_dumped[file->number] = True;
162	}
163    }
164    else
165        VG_(fprintf)(fp, "%s%s\n", prefix, file->name);
166}
167
168/*
169 * tag can be "fn", "cfn", "jfn"
170 */
171static void print_fn(VgFile *fp, const HChar* tag, const fn_node* fn)
172{
173    VG_(fprintf)(fp, "%s=",tag);
174    if (CLG_(clo).compress_strings) {
175	CLG_ASSERT(fn_dumped != 0);
176	if (fn_dumped[fn->number])
177	    VG_(fprintf)(fp, "(%u)\n", fn->number);
178	else {
179	    VG_(fprintf)(fp, "(%u) %s\n", fn->number, fn->name);
180	    fn_dumped[fn->number] = True;
181	}
182    }
183    else
184        VG_(fprintf)(fp, "%s\n", fn->name);
185}
186
187static void print_mangled_fn(VgFile *fp, const HChar* tag,
188			     Context* cxt, int rec_index)
189{
190    int i;
191
192    if (CLG_(clo).compress_strings && CLG_(clo).compress_mangled) {
193
194	int n;
195	Context* last;
196
197	CLG_ASSERT(cxt_dumped != 0);
198	if (cxt_dumped[cxt->base_number+rec_index]) {
199            VG_(fprintf)(fp, "%s=(%u)\n",
200			     tag, cxt->base_number + rec_index);
201	    return;
202	}
203
204	last = 0;
205	/* make sure that for all context parts compressed data is written */
206	for(i=cxt->size;i>0;i--) {
207	    CLG_ASSERT(cxt->fn[i-1]->pure_cxt != 0);
208	    n = cxt->fn[i-1]->pure_cxt->base_number;
209	    if (cxt_dumped[n]) continue;
210	    VG_(fprintf)(fp, "%s=(%d) %s\n",
211			     tag, n, cxt->fn[i-1]->name);
212
213	    cxt_dumped[n] = True;
214	    last = cxt->fn[i-1]->pure_cxt;
215	}
216	/* If the last context was the context to print, we are finished */
217	if ((last == cxt) && (rec_index == 0)) return;
218
219	VG_(fprintf)(fp, "%s=(%u) (%u)", tag,
220			 cxt->base_number + rec_index,
221			 cxt->fn[0]->pure_cxt->base_number);
222	if (rec_index >0)
223	    VG_(fprintf)(fp, "'%d", rec_index +1);
224	for(i=1;i<cxt->size;i++)
225	    VG_(fprintf)(fp, "'(%u)",
226			      cxt->fn[i]->pure_cxt->base_number);
227	VG_(fprintf)(fp, "\n");
228
229	cxt_dumped[cxt->base_number+rec_index] = True;
230	return;
231    }
232
233
234    VG_(fprintf)(fp, "%s=", tag);
235    if (CLG_(clo).compress_strings) {
236	CLG_ASSERT(cxt_dumped != 0);
237	if (cxt_dumped[cxt->base_number+rec_index]) {
238	    VG_(fprintf)(fp, "(%u)\n", cxt->base_number + rec_index);
239	    return;
240	}
241	else {
242	    VG_(fprintf)(fp, "(%u) ", cxt->base_number + rec_index);
243	    cxt_dumped[cxt->base_number+rec_index] = True;
244	}
245    }
246
247    VG_(fprintf)(fp, "%s", cxt->fn[0]->name);
248    if (rec_index >0)
249	VG_(fprintf)(fp, "'%d", rec_index +1);
250    for(i=1;i<cxt->size;i++)
251	VG_(fprintf)(fp, "'%s", cxt->fn[i]->name);
252
253    VG_(fprintf)(fp, "\n");
254}
255
256
257
258/**
259 * Print function position of the BBCC, but only print info differing to
260 * the <last> position, update <last>
261 * Return True if something changes.
262 */
263static Bool print_fn_pos(VgFile *fp, FnPos* last, BBCC* bbcc)
264{
265    Bool res = False;
266
267    CLG_ASSERT(bbcc && bbcc->cxt);
268
269    CLG_DEBUGIF(3) {
270	CLG_DEBUG(2, "+ print_fn_pos: ");
271	CLG_(print_cxt)(16, bbcc->cxt, bbcc->rec_index);
272    }
273
274    if (!CLG_(clo).mangle_names) {
275	if (last->rec_index != bbcc->rec_index) {
276	    VG_(fprintf)(fp, "rec=%u\n\n", bbcc->rec_index);
277	    last->rec_index = bbcc->rec_index;
278	    last->cxt = 0; /* reprint context */
279	    res = True;
280	}
281
282	if (last->cxt != bbcc->cxt) {
283	    fn_node* last_from = (last->cxt && last->cxt->size >1) ?
284				 last->cxt->fn[1] : 0;
285	    fn_node* curr_from = (bbcc->cxt->size >1) ?
286				 bbcc->cxt->fn[1] : 0;
287	    if (curr_from == 0) {
288		if (last_from != 0) {
289		    /* switch back to no context */
290		    VG_(fprintf)(fp, "frfn=(spontaneous)\n");
291		    res = True;
292		}
293	    }
294	    else if (last_from != curr_from) {
295		print_fn(fp, "frfn", curr_from);
296		res = True;
297	    }
298	    last->cxt = bbcc->cxt;
299	}
300    }
301
302    if (last->obj != bbcc->cxt->fn[0]->file->obj) {
303	print_obj(fp, "ob=", bbcc->cxt->fn[0]->file->obj);
304	last->obj = bbcc->cxt->fn[0]->file->obj;
305	res = True;
306    }
307
308    if (last->file != bbcc->cxt->fn[0]->file) {
309        print_file(fp, "fl=", bbcc->cxt->fn[0]->file);
310	last->file = bbcc->cxt->fn[0]->file;
311	res = True;
312    }
313
314    if (!CLG_(clo).mangle_names) {
315	if (last->fn != bbcc->cxt->fn[0]) {
316	    print_fn(fp, "fn", bbcc->cxt->fn[0]);
317	    last->fn = bbcc->cxt->fn[0];
318	    res = True;
319	}
320    }
321    else {
322	/* Print mangled name if context or rec_index changes */
323	if ((last->rec_index != bbcc->rec_index) ||
324	    (last->cxt != bbcc->cxt)) {
325
326	    print_mangled_fn(fp, "fn", bbcc->cxt, bbcc->rec_index);
327	    last->fn = bbcc->cxt->fn[0];
328	    last->rec_index = bbcc->rec_index;
329	    res = True;
330	}
331    }
332
333    last->cxt = bbcc->cxt;
334
335    CLG_DEBUG(2, "- print_fn_pos: %s\n", res ? "changed" : "");
336
337    return res;
338}
339
340/* the debug lookup cache is useful if BBCC for same BB are
341 * dumped directly in a row. This is a direct mapped cache.
342 */
343#define DEBUG_CACHE_SIZE 1777
344
345static Addr       debug_cache_addr[DEBUG_CACHE_SIZE];
346static file_node* debug_cache_file[DEBUG_CACHE_SIZE];
347static int        debug_cache_line[DEBUG_CACHE_SIZE];
348static Bool       debug_cache_info[DEBUG_CACHE_SIZE];
349
350static __inline__
351void init_debug_cache(void)
352{
353    int i;
354    for(i=0;i<DEBUG_CACHE_SIZE;i++) {
355	debug_cache_addr[i] = 0;
356	debug_cache_file[i] = 0;
357	debug_cache_line[i] = 0;
358	debug_cache_info[i] = 0;
359    }
360}
361
362static /* __inline__ */
363Bool get_debug_pos(BBCC* bbcc, Addr addr, AddrPos* p)
364{
365    const HChar *file, *dir;
366    Bool found_file_line;
367
368    int cachepos = addr % DEBUG_CACHE_SIZE;
369
370    if (debug_cache_addr[cachepos] == addr) {
371	p->line = debug_cache_line[cachepos];
372	p->file = debug_cache_file[cachepos];
373	found_file_line = debug_cache_info[cachepos];
374    }
375    else {
376	found_file_line = VG_(get_filename_linenum)(addr,
377						    &file,
378						    &dir,
379						    &(p->line));
380	if (!found_file_line) {
381            file = "???";
382	    p->line = 0;
383	}
384	p->file    = CLG_(get_file_node)(bbcc->bb->obj, dir, file);
385
386	debug_cache_info[cachepos] = found_file_line;
387	debug_cache_addr[cachepos] = addr;
388	debug_cache_line[cachepos] = p->line;
389	debug_cache_file[cachepos] = p->file;
390    }
391
392    /* Address offset from bbcc start address */
393    p->addr = addr - bbcc->bb->obj->offset;
394    p->bb_addr = bbcc->bb->offset;
395
396    CLG_DEBUG(3, "  get_debug_pos(%#lx): BB %#lx, fn '%s', file '%s', line %u\n",
397	     addr, bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name,
398	     p->file->name, p->line);
399
400    return found_file_line;
401}
402
403
404/* copy file position and init cost */
405static void init_apos(AddrPos* p, Addr addr, Addr bbaddr, file_node* file)
406{
407    p->addr    = addr;
408    p->bb_addr = bbaddr;
409    p->file    = file;
410    p->line    = 0;
411}
412
413static void copy_apos(AddrPos* dst, AddrPos* src)
414{
415    dst->addr    = src->addr;
416    dst->bb_addr = src->bb_addr;
417    dst->file    = src->file;
418    dst->line    = src->line;
419}
420
421/* copy file position and init cost */
422static void init_fcost(AddrCost* c, Addr addr, Addr bbaddr, file_node* file)
423{
424    init_apos( &(c->p), addr, bbaddr, file);
425    /* FIXME: This is a memory leak as a AddrCost is inited multiple times */
426    c->cost = CLG_(get_eventset_cost)( CLG_(sets).full );
427    CLG_(init_cost)( CLG_(sets).full, c->cost );
428}
429
430
431/**
432 * print position change inside of a BB (last -> curr)
433 * this doesn't update last to curr!
434 */
435static void fprint_apos(VgFile *fp, AddrPos* curr, AddrPos* last,
436                        file_node* func_file)
437{
438    CLG_ASSERT(curr->file != 0);
439    CLG_DEBUG(2, "    print_apos(file '%s', line %u, bb %#lx, addr %#lx) fnFile '%s'\n",
440	     curr->file->name, curr->line, curr->bb_addr, curr->addr,
441	     func_file->name);
442
443    if (curr->file != last->file) {
444
445	/* if we switch back to orig file, use fe=... */
446	if (curr->file == func_file)
447            print_file(fp, "fe=", curr->file);
448	else
449            print_file(fp, "fi=", curr->file);
450    }
451
452    if (CLG_(clo).dump_bbs) {
453	if (curr->line != last->line) {
454	    VG_(fprintf)(fp, "ln=%u\n", curr->line);
455	}
456    }
457}
458
459
460
461/**
462 * Print a position.
463 * This prints out differences if allowed
464 *
465 * This doesn't set last to curr afterwards!
466 */
467static
468void fprint_pos(VgFile *fp, const AddrPos* curr, const AddrPos* last)
469{
470    if (0) //CLG_(clo).dump_bbs)
471	VG_(fprintf)(fp, "%lu ", curr->addr - curr->bb_addr);
472    else {
473	if (CLG_(clo).dump_instr) {
474	    int diff = curr->addr - last->addr;
475	    if ( CLG_(clo).compress_pos && (last->addr >0) &&
476		 (diff > -100) && (diff < 100)) {
477		if (diff >0)
478		    VG_(fprintf)(fp, "+%d ", diff);
479		else if (diff==0)
480		    VG_(fprintf)(fp, "* ");
481	        else
482		    VG_(fprintf)(fp, "%d ", diff);
483	    }
484	    else
485		VG_(fprintf)(fp, "%#lx ", curr->addr);
486	}
487
488	if (CLG_(clo).dump_bb) {
489	    int diff = curr->bb_addr - last->bb_addr;
490	    if ( CLG_(clo).compress_pos && (last->bb_addr >0) &&
491		 (diff > -100) && (diff < 100)) {
492		if (diff >0)
493		    VG_(fprintf)(fp, "+%d ", diff);
494		else if (diff==0)
495		    VG_(fprintf)(fp, "* ");
496	        else
497		    VG_(fprintf)(fp, "%d ", diff);
498	    }
499	    else
500		VG_(fprintf)(fp, "%#lx ", curr->bb_addr);
501	}
502
503	if (CLG_(clo).dump_line) {
504	    int diff = curr->line - last->line;
505	    if ( CLG_(clo).compress_pos && (last->line >0) &&
506		 (diff > -100) && (diff < 100)) {
507
508		if (diff >0)
509		    VG_(fprintf)(fp, "+%d ", diff);
510		else if (diff==0)
511		    VG_(fprintf)(fp, "* ");
512	        else
513		    VG_(fprintf)(fp, "%d ", diff);
514	    }
515	    else
516		VG_(fprintf)(fp, "%u ", curr->line);
517	}
518    }
519}
520
521
522/**
523 * Print events.
524 */
525
526static
527void fprint_cost(VgFile *fp, const EventMapping* es, const ULong* cost)
528{
529  HChar *mcost = CLG_(mappingcost_as_string)(es, cost);
530  VG_(fprintf)(fp, "%s\n", mcost);
531  CLG_FREE(mcost);
532}
533
534
535
536/* Write the cost of a source line; only that parts of the source
537 * position are written that changed relative to last written position.
538 * funcPos is the source position of the first line of actual function.
539 * Something is written only if cost != 0; returns True in this case.
540 */
541static void fprint_fcost(VgFile *fp, AddrCost* c, AddrPos* last)
542{
543  CLG_DEBUGIF(3) {
544    CLG_DEBUG(2, "   print_fcost(file '%s', line %u, bb %#lx, addr %#lx):\n",
545	     c->p.file->name, c->p.line, c->p.bb_addr, c->p.addr);
546    CLG_(print_cost)(-5, CLG_(sets).full, c->cost);
547  }
548
549  fprint_pos(fp, &(c->p), last);
550  copy_apos( last, &(c->p) ); /* update last to current position */
551
552  fprint_cost(fp, CLG_(dumpmap), c->cost);
553
554  /* add cost to total */
555  CLG_(add_and_zero_cost)( CLG_(sets).full, dump_total_cost, c->cost );
556}
557
558
559/* Write out the calls from jcc (at pos)
560 */
561static void fprint_jcc(VgFile *fp, jCC* jcc, AddrPos* curr, AddrPos* last,
562                       ULong ecounter)
563{
564    static AddrPos target;
565    file_node* file;
566    obj_node*  obj;
567
568    CLG_DEBUGIF(2) {
569      CLG_DEBUG(2, "   fprint_jcc (jkind %d)\n", (Int)jcc->jmpkind);
570      CLG_(print_jcc)(-10, jcc);
571    }
572
573    CLG_ASSERT(jcc->to !=0);
574    CLG_ASSERT(jcc->from !=0);
575
576    if (!get_debug_pos(jcc->to, bb_addr(jcc->to->bb), &target)) {
577	/* if we don't have debug info, don't switch to file "???" */
578	target.file = last->file;
579    }
580
581    if ((jcc->jmpkind == jk_CondJump) || (jcc->jmpkind == jk_Jump)) {
582
583      /* this is a JCC for a followed conditional or boring jump. */
584      CLG_ASSERT(CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost));
585
586      /* objects among jumps should be the same.
587       * Otherwise this jump would have been changed to a call
588       *  (see setup_bbcc)
589       */
590      CLG_ASSERT(jcc->from->bb->obj == jcc->to->bb->obj);
591
592	/* only print if target position info is useful */
593	if (!CLG_(clo).dump_instr && !CLG_(clo).dump_bb && target.line==0) {
594	  jcc->call_counter = 0;
595	  return;
596	}
597
598	/* Different files/functions are possible e.g. with longjmp's
599	 * which change the stack, and thus context
600	 */
601	if (last->file != target.file) {
602            print_file(fp, "jfi=", target.file);
603	}
604
605	if (jcc->from->cxt != jcc->to->cxt) {
606	    if (CLG_(clo).mangle_names)
607		print_mangled_fn(fp, "jfn",
608				 jcc->to->cxt, jcc->to->rec_index);
609	    else
610		print_fn(fp, "jfn", jcc->to->cxt->fn[0]);
611	}
612
613	if (jcc->jmpkind == jk_CondJump) {
614	    /* format: jcnd=<followed>/<executions> <target> */
615	    VG_(fprintf)(fp, "jcnd=%llu/%llu ",
616			 jcc->call_counter, ecounter);
617	}
618	else {
619	    /* format: jump=<jump count> <target> */
620	    VG_(fprintf)(fp, "jump=%llu ",
621			 jcc->call_counter);
622	}
623
624	fprint_pos(fp, &target, last);
625	VG_(fprintf)(fp, "\n");
626	fprint_pos(fp, curr, last);
627	VG_(fprintf)(fp, "\n");
628
629	jcc->call_counter = 0;
630	return;
631    }
632
633    file = jcc->to->cxt->fn[0]->file;
634    obj  = jcc->to->bb->obj;
635
636    /* object of called position different to object of this function?*/
637    if (jcc->from->cxt->fn[0]->file->obj != obj) {
638	print_obj(fp, "cob=", obj);
639    }
640
641    /* file of called position different to current file? */
642    if (last->file != file) {
643        print_file(fp, "cfi=", file);
644    }
645
646    if (CLG_(clo).mangle_names)
647	print_mangled_fn(fp, "cfn", jcc->to->cxt, jcc->to->rec_index);
648    else
649	print_fn(fp, "cfn", jcc->to->cxt->fn[0]);
650
651    if (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost)) {
652        VG_(fprintf)(fp, "calls=%llu ",
653		   jcc->call_counter);
654
655	fprint_pos(fp, &target, last);
656        VG_(fprintf)(fp, "\n");
657	fprint_pos(fp, curr, last);
658	fprint_cost(fp, CLG_(dumpmap), jcc->cost);
659
660	CLG_(init_cost)( CLG_(sets).full, jcc->cost );
661
662	jcc->call_counter = 0;
663    }
664}
665
666
667
668/* Cost summation of functions.We use alternately ccSum[0/1], thus
669 * ssSum[currSum] for recently read lines with same line number.
670 */
671static AddrCost ccSum[2];
672static int currSum;
673
674/*
675 * Print all costs of a BBCC:
676 * - FCCs of instructions
677 * - JCCs of the unique jump of this BB
678 * returns True if something was written
679 */
680static Bool fprint_bbcc(VgFile *fp, BBCC* bbcc, AddrPos* last)
681{
682  InstrInfo* instr_info;
683  ULong ecounter;
684  Bool something_written = False;
685  jCC* jcc;
686  AddrCost *currCost, *newCost;
687  Int jcc_count = 0, instr, i, jmp;
688  BB* bb = bbcc->bb;
689
690  CLG_ASSERT(bbcc->cxt != 0);
691  CLG_DEBUGIF(1) {
692    VG_(printf)("+ fprint_bbcc (Instr %u): ", bb->instr_count);
693    CLG_(print_bbcc)(15, bbcc);
694  }
695
696  CLG_ASSERT(currSum == 0 || currSum == 1);
697  currCost = &(ccSum[currSum]);
698  newCost  = &(ccSum[1-currSum]);
699
700  ecounter = bbcc->ecounter_sum;
701  jmp = 0;
702  instr_info = &(bb->instr[0]);
703  for(instr=0; instr<bb->instr_count; instr++, instr_info++) {
704
705    /* get debug info of current instruction address and dump cost
706     * if CLG_(clo).dump_bbs or file/line has changed
707     */
708    if (!get_debug_pos(bbcc, bb_addr(bb) + instr_info->instr_offset,
709		       &(newCost->p))) {
710      /* if we don't have debug info, don't switch to file "???" */
711      newCost->p.file = bbcc->cxt->fn[0]->file;
712    }
713
714    if (CLG_(clo).dump_bbs || CLG_(clo).dump_instr ||
715	(newCost->p.line != currCost->p.line) ||
716	(newCost->p.file != currCost->p.file)) {
717
718      if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
719	something_written = True;
720
721	fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
722	fprint_fcost(fp, currCost, last);
723      }
724
725      /* switch buffers */
726      currSum = 1 - currSum;
727      currCost = &(ccSum[currSum]);
728      newCost  = &(ccSum[1-currSum]);
729    }
730
731    /* add line cost to current cost sum */
732    (*CLG_(cachesim).add_icost)(currCost->cost, bbcc, instr_info, ecounter);
733
734    /* print jcc's if there are: only jumps */
735    if (bb->jmp[jmp].instr == instr) {
736	jcc_count=0;
737	for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from)
738	    if (((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
739		(!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
740	      jcc_count++;
741
742	if (jcc_count>0) {
743	    if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
744		/* no need to switch buffers, as position is the same */
745		fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
746		fprint_fcost(fp, currCost, last);
747	    }
748	    get_debug_pos(bbcc, bb_addr(bb)+instr_info->instr_offset, &(currCost->p));
749	    fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
750	    something_written = True;
751	    for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
752		if (((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
753		    (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
754		    fprint_jcc(fp, jcc, &(currCost->p), last, ecounter);
755	    }
756	}
757    }
758
759    /* update execution counter */
760    if (jmp < bb->cjmp_count)
761	if (bb->jmp[jmp].instr == instr) {
762	    ecounter -= bbcc->jmp[jmp].ecounter;
763	    jmp++;
764	}
765  }
766
767  /* jCCs at end? If yes, dump cumulated line info first */
768  jcc_count = 0;
769  for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
770      /* yes, if JCC only counts jmp arcs or cost >0 */
771      if ( ((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
772	   (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
773	  jcc_count++;
774  }
775
776  if ( (bbcc->skipped &&
777	!CLG_(is_zero_cost)(CLG_(sets).full, bbcc->skipped)) ||
778       (jcc_count>0) ) {
779
780    if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
781      /* no need to switch buffers, as position is the same */
782      fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
783      fprint_fcost(fp, currCost, last);
784    }
785
786    get_debug_pos(bbcc, bb_jmpaddr(bb), &(currCost->p));
787    fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
788    something_written = True;
789
790    /* first, print skipped costs for calls */
791    if (bbcc->skipped && !CLG_(is_zero_cost)( CLG_(sets).full,
792					     bbcc->skipped )) {
793      CLG_(add_and_zero_cost)( CLG_(sets).full,
794			      currCost->cost, bbcc->skipped );
795#if 0
796      VG_(fprintf)(fp, "# Skipped\n");
797#endif
798      fprint_fcost(fp, currCost, last);
799    }
800
801    if (jcc_count > 0)
802	for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
803	    CLG_ASSERT(jcc->jmp == jmp);
804	    if ( ((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
805		 (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
806
807		fprint_jcc(fp, jcc, &(currCost->p), last, ecounter);
808	}
809  }
810
811  if (CLG_(clo).dump_bbs || CLG_(clo).dump_bb) {
812    if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
813      something_written = True;
814
815      fprint_apos(fp, &(currCost->p), last, bbcc->cxt->fn[0]->file);
816      fprint_fcost(fp, currCost, last);
817    }
818    if (CLG_(clo).dump_bbs) VG_(fprintf)(fp, "\n");
819
820    /* when every cost was immediately written, we must have done so,
821     * as this function is only called when there's cost in a BBCC
822     */
823    CLG_ASSERT(something_written);
824  }
825
826  bbcc->ecounter_sum = 0;
827  for(i=0; i<=bbcc->bb->cjmp_count; i++)
828    bbcc->jmp[i].ecounter = 0;
829  bbcc->ret_counter = 0;
830
831  CLG_DEBUG(1, "- fprint_bbcc: JCCs %d\n", jcc_count);
832
833  return something_written;
834}
835
836/* order by
837 *  recursion,
838 *  from->bb->obj, from->bb->fn
839 *  obj, fn[0]->file, fn
840 *  address
841 */
842static int my_cmp(BBCC** pbbcc1, BBCC** pbbcc2)
843{
844#if 0
845    return (*pbbcc1)->bb->offset - (*pbbcc2)->bb->offset;
846#else
847    BBCC *bbcc1 = *pbbcc1;
848    BBCC *bbcc2 = *pbbcc2;
849    Context* cxt1 = bbcc1->cxt;
850    Context* cxt2 = bbcc2->cxt;
851    int off = 1;
852
853    if (cxt1->fn[0]->file->obj != cxt2->fn[0]->file->obj)
854	return cxt1->fn[0]->file->obj - cxt2->fn[0]->file->obj;
855
856    if (cxt1->fn[0]->file != cxt2->fn[0]->file)
857	return cxt1->fn[0]->file - cxt2->fn[0]->file;
858
859    if (cxt1->fn[0] != cxt2->fn[0])
860	return cxt1->fn[0] - cxt2->fn[0];
861
862    if (bbcc1->rec_index != bbcc2->rec_index)
863	return bbcc1->rec_index - bbcc2->rec_index;
864
865    while((off < cxt1->size) && (off < cxt2->size)) {
866	fn_node* ffn1 = cxt1->fn[off];
867	fn_node* ffn2 = cxt2->fn[off];
868	if (ffn1->file->obj != ffn2->file->obj)
869	    return ffn1->file->obj - ffn2->file->obj;
870	if (ffn1 != ffn2)
871	    return ffn1 - ffn2;
872	off++;
873    }
874    if      (cxt1->size > cxt2->size) return 1;
875    else if (cxt1->size < cxt2->size) return -1;
876
877    return bbcc1->bb->offset - bbcc2->bb->offset;
878#endif
879}
880
881
882
883
884
885/* modified version of:
886 *
887 * qsort -- qsort interface implemented by faster quicksort.
888 * J. L. Bentley and M. D. McIlroy, SPE 23 (1993) 1249-1265.
889 * Copyright 1993, John Wiley.
890*/
891
892static __inline__
893void swap(BBCC** a, BBCC** b)
894{
895    BBCC* t;
896    t = *a; *a = *b; *b = t;
897}
898
899#define min(x, y) ((x)<=(y) ? (x) : (y))
900
901static
902BBCC** med3(BBCC **a, BBCC **b, BBCC **c, int (*cmp)(BBCC**,BBCC**))
903{	return cmp(a, b) < 0 ?
904		  (cmp(b, c) < 0 ? b : cmp(a, c) < 0 ? c : a)
905		: (cmp(b, c) > 0 ? b : cmp(a, c) > 0 ? c : a);
906}
907
908static BBCC** qsort_start = 0;
909
910static void CLG_(qsort)(BBCC **a, int n, int (*cmp)(BBCC**,BBCC**))
911{
912	BBCC **pa, **pb, **pc, **pd, **pl, **pm, **pn, **pv;
913	int s, r;
914	BBCC* v;
915
916	CLG_DEBUG(8, "  qsort(%ld,%ld)\n", a-qsort_start + 0L, n + 0L);
917
918	if (n < 7) {	 /* Insertion sort on smallest arrays */
919		for (pm = a+1; pm < a+n; pm++)
920			for (pl = pm; pl > a && cmp(pl-1, pl) > 0; pl --)
921				swap(pl, pl-1);
922
923		CLG_DEBUGIF(8) {
924		    for (pm = a; pm < a+n; pm++) {
925			VG_(printf)("   %3ld BB %#lx, ",
926                                    pm - qsort_start + 0L,
927				    bb_addr((*pm)->bb));
928			CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
929		    }
930		}
931		return;
932	}
933	pm = a + n/2;    /* Small arrays, middle element */
934	if (n > 7) {
935		pl = a;
936		pn = a + (n-1);
937		if (n > 40) {    /* Big arrays, pseudomedian of 9 */
938			s = n/8;
939			pl = med3(pl, pl+s, pl+2*s, cmp);
940			pm = med3(pm-s, pm, pm+s, cmp);
941			pn = med3(pn-2*s, pn-s, pn, cmp);
942		}
943		pm = med3(pl, pm, pn, cmp); /* Mid-size, med of 3 */
944	}
945
946
947	v = *pm;
948	pv = &v;
949	pa = pb = a;
950	pc = pd = a + (n-1);
951	for (;;) {
952		while ((pb <= pc) && ((r=cmp(pb, pv)) <= 0)) {
953		    if (r==0) {
954			/* same as pivot, to start */
955			swap(pa,pb); pa++;
956		    }
957		    pb ++;
958		}
959		while ((pb <= pc) && ((r=cmp(pc, pv)) >= 0)) {
960		    if (r==0) {
961			/* same as pivot, to end */
962			swap(pc,pd); pd--;
963		    }
964		    pc --;
965		}
966		if (pb > pc) { break; }
967		swap(pb, pc);
968		pb ++;
969		pc --;
970	}
971	pb--;
972	pc++;
973
974	/* put pivot from start into middle */
975	if ((s = pa-a)>0) { for(r=0;r<s;r++) swap(a+r, pb+1-s+r); }
976	/* put pivot from end into middle */
977	if ((s = a+n-1-pd)>0) { for(r=0;r<s;r++) swap(pc+r, a+n-s+r); }
978
979	CLG_DEBUGIF(8) {
980	  VG_(printf)("   PV BB %#lx, ", bb_addr((*pv)->bb));
981	    CLG_(print_cxt)(9, (*pv)->cxt, (*pv)->rec_index);
982
983	    s = pb-pa+1;
984	    VG_(printf)("    Lower %ld - %ld:\n",
985                        a-qsort_start + 0L,
986                        a+s-1-qsort_start + 0L);
987	    for (r=0;r<s;r++) {
988		pm = a+r;
989		VG_(printf)("     %3ld BB %#lx, ",
990			    pm-qsort_start + 0L,
991                            bb_addr((*pm)->bb));
992		CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
993	    }
994
995	    s = pd-pc+1;
996	    VG_(printf)("    Upper %ld - %ld:\n",
997			a+n-s-qsort_start + 0L,
998                        a+n-1-qsort_start + 0L);
999	    for (r=0;r<s;r++) {
1000		pm = a+n-s+r;
1001		VG_(printf)("     %3ld BB %#lx, ",
1002			    pm-qsort_start + 0L,
1003                            bb_addr((*pm)->bb));
1004		CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
1005	    }
1006	}
1007
1008	if ((s = pb+1-pa) > 1) CLG_(qsort)(a,     s, cmp);
1009	if ((s = pd+1-pc) > 1) CLG_(qsort)(a+n-s, s, cmp);
1010}
1011
1012
1013/* Helpers for prepare_dump */
1014
1015static Int    prepare_count;
1016static BBCC** prepare_ptr;
1017
1018
1019static void hash_addCount(BBCC* bbcc)
1020{
1021  if ((bbcc->ecounter_sum > 0) || (bbcc->ret_counter>0))
1022    prepare_count++;
1023}
1024
1025static void hash_addPtr(BBCC* bbcc)
1026{
1027  if ((bbcc->ecounter_sum == 0) &&
1028      (bbcc->ret_counter == 0)) return;
1029
1030  *prepare_ptr = bbcc;
1031  prepare_ptr++;
1032}
1033
1034
1035static void cs_addCount(thread_info* ti)
1036{
1037  Int i;
1038  BBCC* bbcc;
1039
1040  /* add BBCCs with active call in call stack of current thread.
1041   * update cost sums for active calls
1042   */
1043
1044  for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1045    call_entry* e = &(CLG_(current_call_stack).entry[i]);
1046    if (e->jcc == 0) continue;
1047
1048    CLG_(add_diff_cost_lz)( CLG_(sets).full, &(e->jcc->cost),
1049			   e->enter_cost, CLG_(current_state).cost);
1050    bbcc = e->jcc->from;
1051
1052    CLG_DEBUG(1, " [%2d] (tid %u), added active: %s\n",
1053	     i,CLG_(current_tid),bbcc->cxt->fn[0]->name);
1054
1055    if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) {
1056      /* already counted */
1057      continue;
1058    }
1059    prepare_count++;
1060  }
1061}
1062
1063static void cs_addPtr(thread_info* ti)
1064{
1065  Int i;
1066  BBCC* bbcc;
1067
1068  /* add BBCCs with active call in call stack of current thread.
1069   * update cost sums for active calls
1070   */
1071
1072  for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1073    call_entry* e = &(CLG_(current_call_stack).entry[i]);
1074    if (e->jcc == 0) continue;
1075
1076    bbcc = e->jcc->from;
1077
1078    if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) {
1079      /* already counted */
1080      continue;
1081    }
1082
1083    *prepare_ptr = bbcc;
1084    prepare_ptr++;
1085  }
1086}
1087
1088
1089/**
1090 * Put all BBCCs with costs into a sorted array.
1091 * The returned arrays ends with a null pointer.
1092 * Must be freed after dumping.
1093 */
1094static
1095BBCC** prepare_dump(void)
1096{
1097    BBCC **array;
1098
1099    prepare_count = 0;
1100
1101    /* if we do not separate among threads, this gives all */
1102    /* count number of BBCCs with >0 executions */
1103    CLG_(forall_bbccs)(hash_addCount);
1104
1105    /* even if we do not separate among threads,
1106     * call stacks are separated */
1107    if (CLG_(clo).separate_threads)
1108      cs_addCount(0);
1109    else
1110      CLG_(forall_threads)(cs_addCount);
1111
1112    CLG_DEBUG(0, "prepare_dump: %d BBCCs\n", prepare_count);
1113
1114    /* allocate bbcc array, insert BBCCs and sort */
1115    prepare_ptr = array =
1116      (BBCC**) CLG_MALLOC("cl.dump.pd.1",
1117                          (prepare_count+1) * sizeof(BBCC*));
1118
1119    CLG_(forall_bbccs)(hash_addPtr);
1120
1121    if (CLG_(clo).separate_threads)
1122      cs_addPtr(0);
1123    else
1124      CLG_(forall_threads)(cs_addPtr);
1125
1126    CLG_ASSERT(array + prepare_count == prepare_ptr);
1127
1128    /* end mark */
1129    *prepare_ptr = 0;
1130
1131    CLG_DEBUG(0,"             BBCCs inserted\n");
1132
1133    qsort_start = array;
1134    CLG_(qsort)(array, prepare_count, my_cmp);
1135
1136    CLG_DEBUG(0,"             BBCCs sorted\n");
1137
1138    return array;
1139}
1140
1141
1142
1143
1144static void fprint_cost_ln(VgFile *fp, const HChar* prefix,
1145			   const EventMapping* em, const ULong* cost)
1146{
1147    HChar *mcost = CLG_(mappingcost_as_string)(em, cost);
1148    VG_(fprintf)(fp, "%s%s\n", prefix, mcost);
1149    CLG_FREE(mcost);
1150}
1151
1152static ULong bbs_done = 0;
1153static HChar* filename = 0;
1154
1155static
1156void file_err(void)
1157{
1158   VG_(message)(Vg_UserMsg,
1159                "Error: can not open cache simulation output file `%s'\n",
1160                filename );
1161   VG_(exit)(1);
1162}
1163
1164/**
1165 * Create a new dump file and write header.
1166 *
1167 * Naming: <CLG_(clo).filename_base>.<pid>[.<part>][-<tid>]
1168 *         <part> is skipped for final dump (trigger==0)
1169 *         <tid>  is skipped for thread 1 with CLG_(clo).separate_threads=no
1170 *
1171 * Returns the file descriptor, and -1 on error (no write permission)
1172 */
1173static VgFile *new_dumpfile(int tid, const HChar* trigger)
1174{
1175    Bool appending = False;
1176    int i;
1177    FullCost sum = 0;
1178    VgFile *fp;
1179
1180    CLG_ASSERT(dumps_initialized);
1181    CLG_ASSERT(filename != 0);
1182
1183    if (!CLG_(clo).combine_dumps) {
1184	i = VG_(sprintf)(filename, "%s", out_file);
1185
1186	if (trigger)
1187	    i += VG_(sprintf)(filename+i, ".%d", out_counter);
1188
1189	if (CLG_(clo).separate_threads)
1190	    VG_(sprintf)(filename+i, "-%02d", tid);
1191
1192	fp = VG_(fopen)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0);
1193    }
1194    else {
1195	VG_(sprintf)(filename, "%s", out_file);
1196        fp = VG_(fopen)(filename, VKI_O_WRONLY|VKI_O_APPEND, 0);
1197	if (fp && out_counter>1)
1198	    appending = True;
1199    }
1200
1201    if (fp == NULL) {
1202	fp = VG_(fopen)(filename, VKI_O_CREAT|VKI_O_WRONLY,
1203                        VKI_S_IRUSR|VKI_S_IWUSR);
1204	if (fp == NULL) {
1205	    /* If the file can not be opened for whatever reason (conflict
1206	       between multiple supervised processes?), give up now. */
1207	    file_err();
1208	}
1209    }
1210
1211    CLG_DEBUG(2, "  new_dumpfile '%s'\n", filename);
1212
1213    if (!appending)
1214	reset_dump_array();
1215
1216
1217    if (!appending) {
1218	/* callgrind format specification, has to be on 1st line */
1219	VG_(fprintf)(fp, "# callgrind format\n");
1220
1221	/* version */
1222	VG_(fprintf)(fp, "version: 1\n");
1223
1224	/* creator */
1225	VG_(fprintf)(fp, "creator: callgrind-" VERSION "\n");
1226
1227	/* "pid:" line */
1228	VG_(fprintf)(fp, "pid: %d\n", VG_(getpid)());
1229
1230	/* "cmd:" line */
1231	VG_(fprintf)(fp, "cmd: %s", cmdbuf);
1232    }
1233
1234    VG_(fprintf)(fp, "\npart: %d\n", out_counter);
1235    if (CLG_(clo).separate_threads) {
1236	VG_(fprintf)(fp, "thread: %d\n", tid);
1237    }
1238
1239    /* "desc:" lines */
1240    if (!appending) {
1241        VG_(fprintf)(fp, "\n");
1242
1243#if 0
1244	/* Global options changing the tracing behaviour */
1245	VG_(fprintf)(fp, "\ndesc: Option: --skip-plt=%s\n",
1246		     CLG_(clo).skip_plt ? "yes" : "no");
1247	VG_(fprintf)(fp, "desc: Option: --collect-jumps=%s\n",
1248		     CLG_(clo).collect_jumps ? "yes" : "no");
1249	VG_(fprintf)(fp, "desc: Option: --separate-recs=%d\n",
1250		     CLG_(clo).separate_recursions);
1251	VG_(fprintf)(fp, "desc: Option: --separate-callers=%d\n",
1252		     CLG_(clo).separate_callers);
1253
1254	VG_(fprintf)(fp, "desc: Option: --dump-bbs=%s\n",
1255		     CLG_(clo).dump_bbs ? "yes" : "no");
1256	VG_(fprintf)(fp, "desc: Option: --separate-threads=%s\n",
1257		     CLG_(clo).separate_threads ? "yes" : "no");
1258#endif
1259
1260	(*CLG_(cachesim).dump_desc)(fp);
1261    }
1262
1263    VG_(fprintf)(fp, "\ndesc: Timerange: Basic block %llu - %llu\n",
1264		 bbs_done, CLG_(stat).bb_executions);
1265
1266    VG_(fprintf)(fp, "desc: Trigger: %s\n",
1267		 trigger ? trigger : "Program termination");
1268
1269#if 0
1270   /* Output function specific config
1271    * FIXME */
1272   for (i = 0; i < N_FNCONFIG_ENTRIES; i++) {
1273       fnc = fnc_table[i];
1274       while (fnc) {
1275	   if (fnc->skip) {
1276	       VG_(fprintf)(fp, "desc: Option: --fn-skip=%s\n", fnc->name);
1277	   }
1278	   if (fnc->dump_at_enter) {
1279	       VG_(fprintf)(fp, "desc: Option: --fn-dump-at-enter=%s\n",
1280			    fnc->name);
1281	   }
1282	   if (fnc->dump_at_leave) {
1283	       VG_(fprintf)(fp, "desc: Option: --fn-dump-at-leave=%s\n",
1284			    fnc->name);
1285	   }
1286	   if (fnc->separate_callers != CLG_(clo).separate_callers) {
1287	       VG_(fprintf)(fp, "desc: Option: --separate-callers%d=%s\n",
1288			    fnc->separate_callers, fnc->name);
1289	   }
1290	   if (fnc->separate_recursions != CLG_(clo).separate_recursions) {
1291	       VG_(fprintf)(fp, "desc: Option: --separate-recs%d=%s\n",
1292			    fnc->separate_recursions, fnc->name);
1293	   }
1294	   fnc = fnc->next;
1295       }
1296   }
1297#endif
1298
1299   /* "positions:" line */
1300   VG_(fprintf)(fp, "\npositions:%s%s%s\n",
1301		CLG_(clo).dump_instr ? " instr" : "",
1302		CLG_(clo).dump_bb    ? " bb" : "",
1303		CLG_(clo).dump_line  ? " line" : "");
1304
1305   /* "events:" line */
1306   HChar *evmap = CLG_(eventmapping_as_string)(CLG_(dumpmap));
1307   VG_(fprintf)(fp, "events: %s\n", evmap);
1308   VG_(free)(evmap);
1309
1310   /* summary lines */
1311   sum = CLG_(get_eventset_cost)( CLG_(sets).full );
1312   CLG_(zero_cost)(CLG_(sets).full, sum);
1313   if (CLG_(clo).separate_threads) {
1314     thread_info* ti = CLG_(get_current_thread)();
1315     CLG_(add_diff_cost)(CLG_(sets).full, sum, ti->lastdump_cost,
1316			   ti->states.entry[0]->cost);
1317   }
1318   else {
1319     /* This function is called once for thread 1, where
1320      * all costs are summed up when not dumping separate per thread.
1321      * But this is not true for summary: we need to add all threads.
1322      */
1323     int t;
1324     thread_info** thr = CLG_(get_threads)();
1325     for(t=1;t<VG_N_THREADS;t++) {
1326       if (!thr[t]) continue;
1327       CLG_(add_diff_cost)(CLG_(sets).full, sum,
1328			  thr[t]->lastdump_cost,
1329			  thr[t]->states.entry[0]->cost);
1330     }
1331   }
1332   fprint_cost_ln(fp, "summary: ", CLG_(dumpmap), sum);
1333
1334   /* all dumped cost will be added to total_fcc */
1335   CLG_(init_cost_lz)( CLG_(sets).full, &dump_total_cost );
1336
1337   VG_(fprintf)(fp, "\n\n");
1338
1339   if (VG_(clo_verbosity) > 1)
1340       VG_(message)(Vg_DebugMsg, "Dump to %s\n", filename);
1341
1342   return fp;
1343}
1344
1345
1346static void close_dumpfile(VgFile *fp)
1347{
1348    if (fp == NULL) return;
1349
1350    fprint_cost_ln(fp, "totals: ", CLG_(dumpmap),
1351		   dump_total_cost);
1352    //fprint_fcc_ln(fp, "summary: ", &dump_total_fcc);
1353    CLG_(add_cost_lz)(CLG_(sets).full,
1354		     &CLG_(total_cost), dump_total_cost);
1355
1356    VG_(fclose)(fp);
1357
1358    if (filename[0] == '.') {
1359	if (-1 == VG_(rename) (filename, filename+1)) {
1360	    /* Can not rename to correct file name: give out warning */
1361	    VG_(message)(Vg_DebugMsg, "Warning: Can not rename .%s to %s\n",
1362			 filename, filename);
1363       }
1364   }
1365}
1366
1367
1368/* Helper for print_bbccs */
1369
1370static const HChar* print_trigger;
1371
1372static void print_bbccs_of_thread(thread_info* ti)
1373{
1374  BBCC **p, **array;
1375  FnPos lastFnPos;
1376  AddrPos lastAPos;
1377
1378  CLG_DEBUG(1, "+ print_bbccs(tid %u)\n", CLG_(current_tid));
1379
1380  VgFile *print_fp = new_dumpfile(CLG_(current_tid), print_trigger);
1381  if (print_fp == NULL) {
1382    CLG_DEBUG(1, "- print_bbccs(tid %u): No output...\n", CLG_(current_tid));
1383    return;
1384  }
1385
1386  p = array = prepare_dump();
1387  init_fpos(&lastFnPos);
1388  init_apos(&lastAPos, 0, 0, 0);
1389
1390  while(1) {
1391
1392    /* on context/function change, print old cost buffer before */
1393    if (lastFnPos.cxt && ((*p==0) ||
1394			 (lastFnPos.cxt != (*p)->cxt) ||
1395			 (lastFnPos.rec_index != (*p)->rec_index))) {
1396      if (!CLG_(is_zero_cost)( CLG_(sets).full, ccSum[currSum].cost )) {
1397	/* no need to switch buffers, as position is the same */
1398	fprint_apos(print_fp, &(ccSum[currSum].p), &lastAPos,
1399		    lastFnPos.cxt->fn[0]->file);
1400	fprint_fcost(print_fp, &ccSum[currSum], &lastAPos);
1401      }
1402
1403      if (ccSum[currSum].p.file != lastFnPos.cxt->fn[0]->file) {
1404	/* switch back to file of function */
1405	print_file(print_fp, "fe=", lastFnPos.cxt->fn[0]->file);
1406      }
1407      VG_(fprintf)(print_fp, "\n");
1408    }
1409
1410    if (*p == 0) break;
1411
1412    if (print_fn_pos(print_fp, &lastFnPos, *p)) {
1413
1414      /* new function */
1415      init_apos(&lastAPos, 0, 0, (*p)->cxt->fn[0]->file);
1416      init_fcost(&ccSum[0], 0, 0, 0);
1417      init_fcost(&ccSum[1], 0, 0, 0);
1418      currSum = 0;
1419    }
1420
1421    if (CLG_(clo).dump_bbs) {
1422	/* FIXME: Specify Object of BB if different to object of fn */
1423        int i;
1424	ULong ecounter = (*p)->ecounter_sum;
1425        VG_(fprintf)(print_fp, "bb=%#lx ", (UWord)(*p)->bb->offset);
1426	for(i = 0; i<(*p)->bb->cjmp_count;i++) {
1427	    VG_(fprintf)(print_fp, "%u %llu ",
1428				(*p)->bb->jmp[i].instr,
1429				ecounter);
1430	    ecounter -= (*p)->jmp[i].ecounter;
1431	}
1432	VG_(fprintf)(print_fp, "%u %llu\n",
1433		     (*p)->bb->instr_count,
1434		     ecounter);
1435    }
1436
1437    fprint_bbcc(print_fp, *p, &lastAPos);
1438
1439    p++;
1440  }
1441
1442  close_dumpfile(print_fp);
1443  VG_(free)(array);
1444
1445  /* set counters of last dump */
1446  CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost,
1447		  CLG_(current_state).cost );
1448
1449  CLG_DEBUG(1, "- print_bbccs(tid %u)\n", CLG_(current_tid));
1450}
1451
1452
1453static void print_bbccs(const HChar* trigger, Bool only_current_thread)
1454{
1455  init_dump_array();
1456  init_debug_cache();
1457
1458  print_trigger = trigger;
1459
1460  if (!CLG_(clo).separate_threads) {
1461    /* All BBCC/JCC costs is stored for thread 1 */
1462    Int orig_tid = CLG_(current_tid);
1463
1464    CLG_(switch_thread)(1);
1465    print_bbccs_of_thread( CLG_(get_current_thread)() );
1466    CLG_(switch_thread)(orig_tid);
1467  }
1468  else if (only_current_thread)
1469    print_bbccs_of_thread( CLG_(get_current_thread)() );
1470  else
1471    CLG_(forall_threads)(print_bbccs_of_thread);
1472
1473  free_dump_array();
1474}
1475
1476
1477void CLG_(dump_profile)(const HChar* trigger, Bool only_current_thread)
1478{
1479   CLG_DEBUG(2, "+ dump_profile(Trigger '%s')\n",
1480	    trigger ? trigger : "Prg.Term.");
1481
1482   CLG_(init_dumps)();
1483
1484   if (VG_(clo_verbosity) > 1)
1485       VG_(message)(Vg_DebugMsg, "Start dumping at BB %llu (%s)...\n",
1486		    CLG_(stat).bb_executions,
1487		    trigger ? trigger : "Prg.Term.");
1488
1489   out_counter++;
1490
1491   print_bbccs(trigger, only_current_thread);
1492
1493   bbs_done = CLG_(stat).bb_executions++;
1494
1495   if (VG_(clo_verbosity) > 1)
1496     VG_(message)(Vg_DebugMsg, "Dumping done.\n");
1497}
1498
1499/* Copy command to cmd buffer. We want to original command line
1500 * (can change at runtime)
1501 */
1502static
1503void init_cmdbuf(void)
1504{
1505  SizeT size;
1506  Int i,j;
1507
1508  /* Pass #1: How many bytes do we need? */
1509  size  = 1;  // leading ' '
1510  size += VG_(strlen)( VG_(args_the_exename) );
1511  for (i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1512     const HChar *arg = *(HChar**)VG_(indexXA)( VG_(args_for_client), i );
1513     size += 1;   // separator ' '
1514     // escape NL in arguments to not break dump format
1515     for(j=0; arg[j]; j++)
1516       switch(arg[j]) {
1517       case '\n':
1518       case '\\':
1519	 size++; // fall through
1520       default:
1521	 size++;
1522       }
1523  }
1524
1525  cmdbuf = CLG_MALLOC("cl.dump.ic.1", size + 1);  // +1 for '\0'
1526
1527  /* Pass #2: Build up the string */
1528  size = VG_(sprintf)(cmdbuf, " %s", VG_(args_the_exename));
1529
1530  for(i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1531     const HChar *arg = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1532     cmdbuf[size++] = ' ';
1533     for(j=0; arg[j]; j++)
1534       switch(arg[j]) {
1535       case '\n':
1536	 cmdbuf[size++] = '\\';
1537	 cmdbuf[size++] = 'n';
1538	 break;
1539       case '\\':
1540	 cmdbuf[size++] = '\\';
1541	 cmdbuf[size++] = '\\';
1542	 break;
1543       default:
1544	 cmdbuf[size++] = arg[j];
1545	 break;
1546       }
1547  }
1548  cmdbuf[size] = '\0';
1549}
1550
1551/*
1552 * Set up file names for dump output: <out_file>.
1553 * <out_file> is derived from the output format string, which defaults
1554 * to "callgrind.out.%p", where %p is replaced with the PID.
1555 * For the final file name, on intermediate dumps a counter is appended,
1556 * and further, if separate dumps per thread are requested, the thread ID.
1557 *
1558 * <out_file> always starts with a full absolute path.
1559 * If the output format string represents a relative path, the current
1560 * working directory at program start is used.
1561 *
1562 * This function has to be called every time a profile dump is generated
1563 * to be able to react on PID changes.
1564 */
1565void CLG_(init_dumps)()
1566{
1567   SysRes res;
1568
1569   static int thisPID = 0;
1570   int currentPID = VG_(getpid)();
1571   if (currentPID == thisPID) {
1572       /* already initialized, and no PID change */
1573       CLG_ASSERT(out_file != 0);
1574       return;
1575   }
1576   thisPID = currentPID;
1577
1578   if (!CLG_(clo).out_format)
1579     CLG_(clo).out_format = DEFAULT_OUTFORMAT;
1580
1581   /* If a file name was already set, clean up before */
1582   if (out_file) {
1583       VG_(free)(out_file);
1584       VG_(free)(filename);
1585       out_counter = 0;
1586   }
1587
1588   // Setup output filename.
1589   out_file =
1590       VG_(expand_file_name)("--callgrind-out-file", CLG_(clo).out_format);
1591
1592   /* allocate space big enough for final filenames */
1593   filename = (HChar*) CLG_MALLOC("cl.dump.init_dumps.2",
1594                                 VG_(strlen)(out_file)+32);
1595
1596   /* Make sure the output base file can be written.
1597    * This is used for the dump at program termination.
1598    * We stop with an error here if we can not create the
1599    * file: This is probably because of missing rights,
1600    * and trace parts wouldn't be allowed to be written, too.
1601    */
1602    VG_(strcpy)(filename, out_file);
1603    res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0);
1604    if (sr_isError(res)) {
1605	res = VG_(open)(filename, VKI_O_CREAT|VKI_O_WRONLY,
1606		       VKI_S_IRUSR|VKI_S_IWUSR);
1607	if (sr_isError(res)) {
1608	    file_err();
1609	}
1610    }
1611    if (!sr_isError(res)) VG_(close)( (Int)sr_Res(res) );
1612
1613    if (!dumps_initialized)
1614	init_cmdbuf();
1615
1616    dumps_initialized = True;
1617}
1618