1/*--------------------------------------------------------------------*/
2/*--- Callgrind                                                    ---*/
3/*---                                                       dump.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Callgrind, a Valgrind tool for call tracing.
8
9   Copyright (C) 2002-2013, Josef Weidendorfer (Josef.Weidendorfer@gmx.de)
10
11   This program is free software; you can redistribute it and/or
12   modify it under the terms of the GNU General Public License as
13   published by the Free Software Foundation; either version 2 of the
14   License, or (at your option) any later version.
15
16   This program is distributed in the hope that it will be useful, but
17   WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19   General Public License for more details.
20
21   You should have received a copy of the GNU General Public License
22   along with this program; if not, write to the Free Software
23   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24   02111-1307, USA.
25
26   The GNU General Public License is contained in the file COPYING.
27*/
28
29#include "config.h"
30#include "global.h"
31
32#include "pub_tool_threadstate.h"
33#include "pub_tool_libcfile.h"
34
35
36/* Dump Part Counter */
37static Int out_counter = 0;
38
39static HChar* out_file = 0;
40static HChar* out_directory = 0;
41static Bool dumps_initialized = False;
42
43/* Command */
44static HChar cmdbuf[BUF_LEN];
45
46/* Total reads/writes/misses sum over all dumps and threads.
47 * Updated during CC traversal at dump time.
48 */
49FullCost CLG_(total_cost) = 0;
50static FullCost dump_total_cost = 0;
51
52EventMapping* CLG_(dumpmap) = 0;
53
54/* Temporary output buffer for
55 *  print_fn_pos, fprint_apos, fprint_fcost, fprint_jcc,
56 *  fprint_fcc_ln, dump_run_info, dump_state_info
57 */
58static HChar outbuf[FILENAME_LEN + FN_NAME_LEN + OBJ_NAME_LEN + COSTS_LEN];
59
60Int CLG_(get_dump_counter)(void)
61{
62  return out_counter;
63}
64
65HChar* CLG_(get_out_file)()
66{
67    CLG_(init_dumps)();
68    return out_file;
69}
70
71HChar* CLG_(get_out_directory)()
72{
73    CLG_(init_dumps)();
74    return out_directory;
75}
76
77/*------------------------------------------------------------*/
78/*--- Output file related stuff                            ---*/
79/*------------------------------------------------------------*/
80
81/* Boolean dumping array */
82static Bool* dump_array = 0;
83static Int   dump_array_size = 0;
84static Bool* obj_dumped = 0;
85static Bool* file_dumped = 0;
86static Bool* fn_dumped = 0;
87static Bool* cxt_dumped = 0;
88
89static
90void reset_dump_array(void)
91{
92    int i;
93
94    CLG_ASSERT(dump_array != 0);
95
96    for(i=0;i<dump_array_size;i++)
97	dump_array[i] = False;
98}
99
100static
101void init_dump_array(void)
102{
103    dump_array_size = CLG_(stat).distinct_objs +
104      CLG_(stat).distinct_files +
105      CLG_(stat).distinct_fns +
106      CLG_(stat).context_counter;
107    CLG_ASSERT(dump_array == 0);
108    dump_array = (Bool*) CLG_MALLOC("cl.dump.ida.1",
109                                    dump_array_size * sizeof(Bool));
110    obj_dumped  = dump_array;
111    file_dumped = obj_dumped + CLG_(stat).distinct_objs;
112    fn_dumped   = file_dumped + CLG_(stat).distinct_files;
113    cxt_dumped  = fn_dumped + CLG_(stat).distinct_fns;
114
115    reset_dump_array();
116
117    CLG_DEBUG(1, "  init_dump_array: size %d\n", dump_array_size);
118}
119
120static __inline__
121void free_dump_array(void)
122{
123    CLG_ASSERT(dump_array != 0);
124    VG_(free)(dump_array);
125
126    dump_array = 0;
127    obj_dumped = 0;
128    file_dumped = 0;
129    fn_dumped = 0;
130    cxt_dumped = 0;
131}
132
133
134/* Initialize to an invalid position */
135static __inline__
136void init_fpos(FnPos* p)
137 {
138    p->file = 0;
139    p->fn = 0;
140    p->obj = 0;
141    p->cxt = 0;
142    p->rec_index = 0;
143}
144
145
146#if 0
147static __inline__
148static void my_fwrite(Int fd, const HChar* buf, Int len)
149{
150	VG_(write)(fd, buf, len);
151}
152#else
153
154#define FWRITE_BUFSIZE 32000
155#define FWRITE_THROUGH 10000
156static HChar fwrite_buf[FWRITE_BUFSIZE];
157static Int fwrite_pos;
158static Int fwrite_fd = -1;
159
160static __inline__
161void fwrite_flush(void)
162{
163    if ((fwrite_fd>=0) && (fwrite_pos>0))
164	VG_(write)(fwrite_fd, fwrite_buf, fwrite_pos);
165    fwrite_pos = 0;
166}
167
168static void my_fwrite(Int fd, const HChar* buf, Int len)
169{
170    if (fwrite_fd != fd) {
171	fwrite_flush();
172	fwrite_fd = fd;
173    }
174    if (len > FWRITE_THROUGH) {
175	fwrite_flush();
176	VG_(write)(fd, buf, len);
177	return;
178    }
179    if (FWRITE_BUFSIZE - fwrite_pos <= len) fwrite_flush();
180    VG_(strncpy)(fwrite_buf + fwrite_pos, buf, len);
181    fwrite_pos += len;
182}
183#endif
184
185
186static void print_obj(HChar* buf, obj_node* obj)
187{
188    //int n;
189
190    if (CLG_(clo).compress_strings) {
191	CLG_ASSERT(obj_dumped != 0);
192	if (obj_dumped[obj->number])
193	    /*n =*/ VG_(sprintf)(buf, "(%d)\n", obj->number);
194	else {
195	    /*n =*/ VG_(sprintf)(buf, "(%d) %s\n",
196			     obj->number, obj->name);
197	}
198    }
199    else
200	/*n =*/ VG_(sprintf)(buf, "%s\n", obj->name);
201
202#if 0
203    /* add mapping parameters the first time a object is dumped
204     * format: mp=0xSTART SIZE 0xOFFSET */
205    if (!obj_dumped[obj->number]) {
206	obj_dumped[obj->number];
207	VG_(sprintf)(buf+n, "mp=%p %p %p\n",
208		     pos->obj->start, pos->obj->size, pos->obj->offset);
209    }
210#else
211    obj_dumped[obj->number] = True;
212#endif
213}
214
215static void print_file(HChar* buf, file_node* file)
216{
217    if (CLG_(clo).compress_strings) {
218	CLG_ASSERT(file_dumped != 0);
219	if (file_dumped[file->number])
220	    VG_(sprintf)(buf, "(%d)\n", file->number);
221	else {
222	    VG_(sprintf)(buf, "(%d) %s\n",
223			 file->number, file->name);
224	    file_dumped[file->number] = True;
225	}
226    }
227    else
228	VG_(sprintf)(buf, "%s\n", file->name);
229}
230
231/*
232 * tag can be "fn", "cfn", "jfn"
233 */
234static void print_fn(Int fd, HChar* buf, const HChar* tag, fn_node* fn)
235{
236    int p;
237    p = VG_(sprintf)(buf, "%s=",tag);
238    if (CLG_(clo).compress_strings) {
239	CLG_ASSERT(fn_dumped != 0);
240	if (fn_dumped[fn->number])
241	    p += VG_(sprintf)(buf+p, "(%d)\n", fn->number);
242	else {
243	    p += VG_(sprintf)(buf+p, "(%d) %s\n",
244			      fn->number, fn->name);
245	    fn_dumped[fn->number] = True;
246	}
247    }
248    else
249	p += VG_(sprintf)(buf+p, "%s\n", fn->name);
250
251    my_fwrite(fd, buf, p);
252}
253
254static void print_mangled_fn(Int fd, HChar* buf, const HChar* tag,
255			     Context* cxt, int rec_index)
256{
257    int p, i;
258
259    if (CLG_(clo).compress_strings && CLG_(clo).compress_mangled) {
260
261	int n;
262	Context* last;
263
264	CLG_ASSERT(cxt_dumped != 0);
265	if (cxt_dumped[cxt->base_number+rec_index]) {
266	    p = VG_(sprintf)(buf, "%s=(%d)\n",
267			     tag, cxt->base_number + rec_index);
268	    my_fwrite(fd, buf, p);
269	    return;
270	}
271
272	last = 0;
273	/* make sure that for all context parts compressed data is written */
274	for(i=cxt->size;i>0;i--) {
275	    CLG_ASSERT(cxt->fn[i-1]->pure_cxt != 0);
276	    n = cxt->fn[i-1]->pure_cxt->base_number;
277	    if (cxt_dumped[n]) continue;
278	    p = VG_(sprintf)(buf, "%s=(%d) %s\n",
279			     tag, n, cxt->fn[i-1]->name);
280	    my_fwrite(fd, buf, p);
281
282	    cxt_dumped[n] = True;
283	    last = cxt->fn[i-1]->pure_cxt;
284	}
285	/* If the last context was the context to print, we are finished */
286	if ((last == cxt) && (rec_index == 0)) return;
287
288	p = VG_(sprintf)(buf, "%s=(%d) (%d)", tag,
289			 cxt->base_number + rec_index,
290			 cxt->fn[0]->pure_cxt->base_number);
291	if (rec_index >0)
292	    p += VG_(sprintf)(buf+p, "'%d", rec_index +1);
293	for(i=1;i<cxt->size;i++)
294	    p += VG_(sprintf)(buf+p, "'(%d)",
295			      cxt->fn[i]->pure_cxt->base_number);
296	p += VG_(sprintf)(buf+p, "\n");
297	my_fwrite(fd, buf, p);
298
299	cxt_dumped[cxt->base_number+rec_index] = True;
300	return;
301    }
302
303
304    p = VG_(sprintf)(buf, "%s=", tag);
305    if (CLG_(clo).compress_strings) {
306	CLG_ASSERT(cxt_dumped != 0);
307	if (cxt_dumped[cxt->base_number+rec_index]) {
308	    p += VG_(sprintf)(buf+p, "(%d)\n", cxt->base_number + rec_index);
309	    my_fwrite(fd, buf, p);
310	    return;
311	}
312	else {
313	    p += VG_(sprintf)(buf+p, "(%d) ", cxt->base_number + rec_index);
314	    cxt_dumped[cxt->base_number+rec_index] = True;
315	}
316    }
317
318    p += VG_(sprintf)(buf+p, "%s", cxt->fn[0]->name);
319    if (rec_index >0)
320	p += VG_(sprintf)(buf+p, "'%d", rec_index +1);
321    for(i=1;i<cxt->size;i++)
322	p += VG_(sprintf)(buf+p, "'%s", cxt->fn[i]->name);
323
324    p += VG_(sprintf)(buf+p, "\n");
325    my_fwrite(fd, buf, p);
326}
327
328
329
330/**
331 * Print function position of the BBCC, but only print info differing to
332 * the <last> position, update <last>
333 * Return True if something changes.
334 */
335static Bool print_fn_pos(int fd, FnPos* last, BBCC* bbcc)
336{
337    Bool res = False;
338
339    CLG_ASSERT(bbcc && bbcc->cxt);
340
341    CLG_DEBUGIF(3) {
342	CLG_DEBUG(2, "+ print_fn_pos: ");
343	CLG_(print_cxt)(16, bbcc->cxt, bbcc->rec_index);
344    }
345
346    if (!CLG_(clo).mangle_names) {
347	if (last->rec_index != bbcc->rec_index) {
348	    VG_(sprintf)(outbuf, "rec=%d\n\n", bbcc->rec_index);
349	    my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
350	    last->rec_index = bbcc->rec_index;
351	    last->cxt = 0; /* reprint context */
352	    res = True;
353	}
354
355	if (last->cxt != bbcc->cxt) {
356	    fn_node* last_from = (last->cxt && last->cxt->size >1) ?
357				 last->cxt->fn[1] : 0;
358	    fn_node* curr_from = (bbcc->cxt->size >1) ?
359				 bbcc->cxt->fn[1] : 0;
360	    if (curr_from == 0) {
361		if (last_from != 0) {
362		    /* switch back to no context */
363		    VG_(sprintf)(outbuf, "frfn=(spontaneous)\n");
364		    my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
365		    res = True;
366		}
367	    }
368	    else if (last_from != curr_from) {
369		print_fn(fd,outbuf,"frfn", curr_from);
370		res = True;
371	    }
372	    last->cxt = bbcc->cxt;
373	}
374    }
375
376    if (last->obj != bbcc->cxt->fn[0]->file->obj) {
377	VG_(sprintf)(outbuf, "ob=");
378	print_obj(outbuf+3, bbcc->cxt->fn[0]->file->obj);
379	my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
380	last->obj = bbcc->cxt->fn[0]->file->obj;
381	res = True;
382    }
383
384    if (last->file != bbcc->cxt->fn[0]->file) {
385	VG_(sprintf)(outbuf, "fl=");
386	print_file(outbuf+3, bbcc->cxt->fn[0]->file);
387	my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
388	last->file = bbcc->cxt->fn[0]->file;
389	res = True;
390    }
391
392    if (!CLG_(clo).mangle_names) {
393	if (last->fn != bbcc->cxt->fn[0]) {
394	    print_fn(fd,outbuf, "fn", bbcc->cxt->fn[0]);
395	    last->fn = bbcc->cxt->fn[0];
396	    res = True;
397	}
398    }
399    else {
400	/* Print mangled name if context or rec_index changes */
401	if ((last->rec_index != bbcc->rec_index) ||
402	    (last->cxt != bbcc->cxt)) {
403
404	    print_mangled_fn(fd, outbuf, "fn", bbcc->cxt, bbcc->rec_index);
405	    last->fn = bbcc->cxt->fn[0];
406	    last->rec_index = bbcc->rec_index;
407	    res = True;
408	}
409    }
410
411    last->cxt = bbcc->cxt;
412
413    CLG_DEBUG(2, "- print_fn_pos: %s\n", res ? "changed" : "");
414
415    return res;
416}
417
418/* the debug lookup cache is useful if BBCC for same BB are
419 * dumped directly in a row. This is a direct mapped cache.
420 */
421#define DEBUG_CACHE_SIZE 1777
422
423static Addr       debug_cache_addr[DEBUG_CACHE_SIZE];
424static file_node* debug_cache_file[DEBUG_CACHE_SIZE];
425static int        debug_cache_line[DEBUG_CACHE_SIZE];
426static Bool       debug_cache_info[DEBUG_CACHE_SIZE];
427
428static __inline__
429void init_debug_cache(void)
430{
431    int i;
432    for(i=0;i<DEBUG_CACHE_SIZE;i++) {
433	debug_cache_addr[i] = 0;
434	debug_cache_file[i] = 0;
435	debug_cache_line[i] = 0;
436	debug_cache_info[i] = 0;
437    }
438}
439
440static /* __inline__ */
441Bool get_debug_pos(BBCC* bbcc, Addr addr, AddrPos* p)
442{
443    HChar file[FILENAME_LEN];
444    HChar dir[FILENAME_LEN];
445    Bool found_file_line, found_dirname;
446
447    int cachepos = addr % DEBUG_CACHE_SIZE;
448
449    if (debug_cache_addr[cachepos] == addr) {
450	p->line = debug_cache_line[cachepos];
451	p->file = debug_cache_file[cachepos];
452	found_file_line = debug_cache_info[cachepos];
453    }
454    else {
455	found_file_line = VG_(get_filename_linenum)(addr,
456						    file, FILENAME_LEN,
457						    dir, FILENAME_LEN,
458						    &found_dirname,
459						    &(p->line));
460	if (!found_file_line) {
461	    VG_(strcpy)(file, "???");
462	    p->line = 0;
463	}
464	if (found_dirname) {
465	    // +1 for the '/'.
466	    CLG_ASSERT(VG_(strlen)(dir) + VG_(strlen)(file) + 1 < FILENAME_LEN);
467	    VG_(strcat)(dir, "/");     // Append '/'
468	    VG_(strcat)(dir, file);    // Append file to dir
469	    VG_(strcpy)(file, dir);    // Move dir+file to file
470	}
471	p->file    = CLG_(get_file_node)(bbcc->bb->obj, file);
472
473	debug_cache_info[cachepos] = found_file_line;
474	debug_cache_addr[cachepos] = addr;
475	debug_cache_line[cachepos] = p->line;
476	debug_cache_file[cachepos] = p->file;
477    }
478
479    /* Address offset from bbcc start address */
480    p->addr = addr - bbcc->bb->obj->offset;
481    p->bb_addr = bbcc->bb->offset;
482
483    CLG_DEBUG(3, "  get_debug_pos(%#lx): BB %#lx, fn '%s', file '%s', line %u\n",
484	     addr, bb_addr(bbcc->bb), bbcc->cxt->fn[0]->name,
485	     p->file->name, p->line);
486
487    return found_file_line;
488}
489
490
491/* copy file position and init cost */
492static void init_apos(AddrPos* p, Addr addr, Addr bbaddr, file_node* file)
493{
494    p->addr    = addr;
495    p->bb_addr = bbaddr;
496    p->file    = file;
497    p->line    = 0;
498}
499
500static void copy_apos(AddrPos* dst, AddrPos* src)
501{
502    dst->addr    = src->addr;
503    dst->bb_addr = src->bb_addr;
504    dst->file    = src->file;
505    dst->line    = src->line;
506}
507
508/* copy file position and init cost */
509static void init_fcost(AddrCost* c, Addr addr, Addr bbaddr, file_node* file)
510{
511    init_apos( &(c->p), addr, bbaddr, file);
512    /* FIXME: This is a memory leak as a AddrCost is inited multiple times */
513    c->cost = CLG_(get_eventset_cost)( CLG_(sets).full );
514    CLG_(init_cost)( CLG_(sets).full, c->cost );
515}
516
517
518/**
519 * print position change inside of a BB (last -> curr)
520 * this doesn't update last to curr!
521 */
522static void fprint_apos(Int fd, AddrPos* curr, AddrPos* last, file_node* func_file)
523{
524    CLG_ASSERT(curr->file != 0);
525    CLG_DEBUG(2, "    print_apos(file '%s', line %d, bb %#lx, addr %#lx) fnFile '%s'\n",
526	     curr->file->name, curr->line, curr->bb_addr, curr->addr,
527	     func_file->name);
528
529    if (curr->file != last->file) {
530
531	/* if we switch back to orig file, use fe=... */
532	if (curr->file == func_file)
533	    VG_(sprintf)(outbuf, "fe=");
534	else
535	    VG_(sprintf)(outbuf, "fi=");
536	print_file(outbuf+3, curr->file);
537	my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
538    }
539
540    if (CLG_(clo).dump_bbs) {
541	if (curr->line != last->line) {
542	    VG_(sprintf)(outbuf, "ln=%d\n", curr->line);
543	    my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
544	}
545    }
546}
547
548
549
550/**
551 * Print a position.
552 * This prints out differences if allowed
553 *
554 * This doesn't set last to curr afterwards!
555 */
556static
557void fprint_pos(Int fd, AddrPos* curr, AddrPos* last)
558{
559    if (0) //CLG_(clo).dump_bbs)
560	VG_(sprintf)(outbuf, "%lu ", curr->addr - curr->bb_addr);
561    else {
562	int p = 0;
563	if (CLG_(clo).dump_instr) {
564	    int diff = curr->addr - last->addr;
565	    if ( CLG_(clo).compress_pos && (last->addr >0) &&
566		 (diff > -100) && (diff < 100)) {
567		if (diff >0)
568		    p = VG_(sprintf)(outbuf, "+%d ", diff);
569		else if (diff==0)
570		    p = VG_(sprintf)(outbuf, "* ");
571	        else
572		    p = VG_(sprintf)(outbuf, "%d ", diff);
573	    }
574	    else
575		p = VG_(sprintf)(outbuf, "%#lx ", curr->addr);
576	}
577
578	if (CLG_(clo).dump_bb) {
579	    int diff = curr->bb_addr - last->bb_addr;
580	    if ( CLG_(clo).compress_pos && (last->bb_addr >0) &&
581		 (diff > -100) && (diff < 100)) {
582		if (diff >0)
583		    p += VG_(sprintf)(outbuf+p, "+%d ", diff);
584		else if (diff==0)
585		    p += VG_(sprintf)(outbuf+p, "* ");
586	        else
587		    p += VG_(sprintf)(outbuf+p, "%d ", diff);
588	    }
589	    else
590		p += VG_(sprintf)(outbuf+p, "%#lx ", curr->bb_addr);
591	}
592
593	if (CLG_(clo).dump_line) {
594	    int diff = curr->line - last->line;
595	    if ( CLG_(clo).compress_pos && (last->line >0) &&
596		 (diff > -100) && (diff < 100)) {
597
598		if (diff >0)
599		    VG_(sprintf)(outbuf+p, "+%d ", diff);
600		else if (diff==0)
601		    VG_(sprintf)(outbuf+p, "* ");
602	        else
603		    VG_(sprintf)(outbuf+p, "%d ", diff);
604	    }
605	    else
606		VG_(sprintf)(outbuf+p, "%u ", curr->line);
607	}
608    }
609    my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
610}
611
612
613/**
614 * Print events.
615 */
616
617static
618void fprint_cost(int fd, EventMapping* es, ULong* cost)
619{
620  int p = CLG_(sprint_mappingcost)(outbuf, es, cost);
621  VG_(sprintf)(outbuf+p, "\n");
622  my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
623  return;
624}
625
626
627
628/* Write the cost of a source line; only that parts of the source
629 * position are written that changed relative to last written position.
630 * funcPos is the source position of the first line of actual function.
631 * Something is written only if cost != 0; returns True in this case.
632 */
633static void fprint_fcost(Int fd, AddrCost* c, AddrPos* last)
634{
635  CLG_DEBUGIF(3) {
636    CLG_DEBUG(2, "   print_fcost(file '%s', line %d, bb %#lx, addr %#lx):\n",
637	     c->p.file->name, c->p.line, c->p.bb_addr, c->p.addr);
638    CLG_(print_cost)(-5, CLG_(sets).full, c->cost);
639  }
640
641  fprint_pos(fd, &(c->p), last);
642  copy_apos( last, &(c->p) ); /* update last to current position */
643
644  fprint_cost(fd, CLG_(dumpmap), c->cost);
645
646  /* add cost to total */
647  CLG_(add_and_zero_cost)( CLG_(sets).full, dump_total_cost, c->cost );
648}
649
650
651/* Write out the calls from jcc (at pos)
652 */
653static void fprint_jcc(Int fd, jCC* jcc, AddrPos* curr, AddrPos* last, ULong ecounter)
654{
655    static AddrPos target;
656    file_node* file;
657    obj_node*  obj;
658
659    CLG_DEBUGIF(2) {
660      CLG_DEBUG(2, "   fprint_jcc (jkind %d)\n", jcc->jmpkind);
661      CLG_(print_jcc)(-10, jcc);
662    }
663
664    CLG_ASSERT(jcc->to !=0);
665    CLG_ASSERT(jcc->from !=0);
666
667    if (!get_debug_pos(jcc->to, bb_addr(jcc->to->bb), &target)) {
668	/* if we don't have debug info, don't switch to file "???" */
669	target.file = last->file;
670    }
671
672    if ((jcc->jmpkind == jk_CondJump) || (jcc->jmpkind == jk_Jump)) {
673
674      /* this is a JCC for a followed conditional or boring jump. */
675      CLG_ASSERT(CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost));
676
677      /* objects among jumps should be the same.
678       * Otherwise this jump would have been changed to a call
679       *  (see setup_bbcc)
680       */
681      CLG_ASSERT(jcc->from->bb->obj == jcc->to->bb->obj);
682
683	/* only print if target position info is usefull */
684	if (!CLG_(clo).dump_instr && !CLG_(clo).dump_bb && target.line==0) {
685	  jcc->call_counter = 0;
686	  return;
687	}
688
689	/* Different files/functions are possible e.g. with longjmp's
690	 * which change the stack, and thus context
691	 */
692	if (last->file != target.file) {
693	    VG_(sprintf)(outbuf, "jfi=");
694	    print_file(outbuf+4, target.file);
695	    my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
696	}
697
698	if (jcc->from->cxt != jcc->to->cxt) {
699	    if (CLG_(clo).mangle_names)
700		print_mangled_fn(fd, outbuf, "jfn",
701				 jcc->to->cxt, jcc->to->rec_index);
702	    else
703		print_fn(fd, outbuf, "jfn", jcc->to->cxt->fn[0]);
704	}
705
706	if (jcc->jmpkind == jk_CondJump) {
707	    /* format: jcnd=<followed>/<executions> <target> */
708	    VG_(sprintf)(outbuf, "jcnd=%llu/%llu ",
709			 jcc->call_counter, ecounter);
710	}
711	else {
712	    /* format: jump=<jump count> <target> */
713	    VG_(sprintf)(outbuf, "jump=%llu ",
714			 jcc->call_counter);
715	}
716	my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
717
718	fprint_pos(fd, &target, last);
719	my_fwrite(fd, "\n", 1);
720	fprint_pos(fd, curr, last);
721	my_fwrite(fd, "\n", 1);
722
723	jcc->call_counter = 0;
724	return;
725    }
726
727    file = jcc->to->cxt->fn[0]->file;
728    obj  = jcc->to->bb->obj;
729
730    /* object of called position different to object of this function?*/
731    if (jcc->from->cxt->fn[0]->file->obj != obj) {
732	VG_(sprintf)(outbuf, "cob=");
733	print_obj(outbuf+4, obj);
734	my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
735    }
736
737    /* file of called position different to current file? */
738    if (last->file != file) {
739	VG_(sprintf)(outbuf, "cfi=");
740	print_file(outbuf+4, file);
741	my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
742    }
743
744    if (CLG_(clo).mangle_names)
745	print_mangled_fn(fd, outbuf, "cfn", jcc->to->cxt, jcc->to->rec_index);
746    else
747	print_fn(fd, outbuf, "cfn", jcc->to->cxt->fn[0]);
748
749    if (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost)) {
750      VG_(sprintf)(outbuf, "calls=%llu ",
751		   jcc->call_counter);
752	my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
753
754	fprint_pos(fd, &target, last);
755	my_fwrite(fd, "\n", 1);
756	fprint_pos(fd, curr, last);
757	fprint_cost(fd, CLG_(dumpmap), jcc->cost);
758
759	CLG_(init_cost)( CLG_(sets).full, jcc->cost );
760
761	jcc->call_counter = 0;
762    }
763}
764
765
766
767/* Cost summation of functions.We use alternately ccSum[0/1], thus
768 * ssSum[currSum] for recently read lines with same line number.
769 */
770static AddrCost ccSum[2];
771static int currSum;
772
773/*
774 * Print all costs of a BBCC:
775 * - FCCs of instructions
776 * - JCCs of the unique jump of this BB
777 * returns True if something was written
778 */
779static Bool fprint_bbcc(Int fd, BBCC* bbcc, AddrPos* last)
780{
781  InstrInfo* instr_info;
782  ULong ecounter;
783  Bool something_written = False;
784  jCC* jcc;
785  AddrCost *currCost, *newCost;
786  Int jcc_count = 0, instr, i, jmp;
787  BB* bb = bbcc->bb;
788
789  CLG_ASSERT(bbcc->cxt != 0);
790  CLG_DEBUGIF(1) {
791    VG_(printf)("+ fprint_bbcc (Instr %d): ", bb->instr_count);
792    CLG_(print_bbcc)(15, bbcc);
793  }
794
795  CLG_ASSERT(currSum == 0 || currSum == 1);
796  currCost = &(ccSum[currSum]);
797  newCost  = &(ccSum[1-currSum]);
798
799  ecounter = bbcc->ecounter_sum;
800  jmp = 0;
801  instr_info = &(bb->instr[0]);
802  for(instr=0; instr<bb->instr_count; instr++, instr_info++) {
803
804    /* get debug info of current instruction address and dump cost
805     * if CLG_(clo).dump_bbs or file/line has changed
806     */
807    if (!get_debug_pos(bbcc, bb_addr(bb) + instr_info->instr_offset,
808		       &(newCost->p))) {
809      /* if we don't have debug info, don't switch to file "???" */
810      newCost->p.file = bbcc->cxt->fn[0]->file;
811    }
812
813    if (CLG_(clo).dump_bbs || CLG_(clo).dump_instr ||
814	(newCost->p.line != currCost->p.line) ||
815	(newCost->p.file != currCost->p.file)) {
816
817      if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
818	something_written = True;
819
820	fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
821	fprint_fcost(fd, currCost, last);
822      }
823
824      /* switch buffers */
825      currSum = 1 - currSum;
826      currCost = &(ccSum[currSum]);
827      newCost  = &(ccSum[1-currSum]);
828    }
829
830    /* add line cost to current cost sum */
831    (*CLG_(cachesim).add_icost)(currCost->cost, bbcc, instr_info, ecounter);
832
833    /* print jcc's if there are: only jumps */
834    if (bb->jmp[jmp].instr == instr) {
835	jcc_count=0;
836	for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from)
837	    if (((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
838		(!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
839	      jcc_count++;
840
841	if (jcc_count>0) {
842	    if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
843		/* no need to switch buffers, as position is the same */
844		fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
845		fprint_fcost(fd, currCost, last);
846	    }
847	    get_debug_pos(bbcc, bb_addr(bb)+instr_info->instr_offset, &(currCost->p));
848	    fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
849	    something_written = True;
850	    for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
851		if (((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
852		    (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
853		    fprint_jcc(fd, jcc, &(currCost->p), last, ecounter);
854	    }
855	}
856    }
857
858    /* update execution counter */
859    if (jmp < bb->cjmp_count)
860	if (bb->jmp[jmp].instr == instr) {
861	    ecounter -= bbcc->jmp[jmp].ecounter;
862	    jmp++;
863	}
864  }
865
866  /* jCCs at end? If yes, dump cumulated line info first */
867  jcc_count = 0;
868  for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
869      /* yes, if JCC only counts jmp arcs or cost >0 */
870      if ( ((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
871	   (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
872	  jcc_count++;
873  }
874
875  if ( (bbcc->skipped &&
876	!CLG_(is_zero_cost)(CLG_(sets).full, bbcc->skipped)) ||
877       (jcc_count>0) ) {
878
879    if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
880      /* no need to switch buffers, as position is the same */
881      fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
882      fprint_fcost(fd, currCost, last);
883    }
884
885    get_debug_pos(bbcc, bb_jmpaddr(bb), &(currCost->p));
886    fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
887    something_written = True;
888
889    /* first, print skipped costs for calls */
890    if (bbcc->skipped && !CLG_(is_zero_cost)( CLG_(sets).full,
891					     bbcc->skipped )) {
892      CLG_(add_and_zero_cost)( CLG_(sets).full,
893			      currCost->cost, bbcc->skipped );
894#if 0
895      VG_(sprintf)(outbuf, "# Skipped\n");
896      my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
897#endif
898      fprint_fcost(fd, currCost, last);
899    }
900
901    if (jcc_count > 0)
902	for(jcc=bbcc->jmp[jmp].jcc_list; jcc; jcc=jcc->next_from) {
903	    CLG_ASSERT(jcc->jmp == jmp);
904	    if ( ((jcc->jmpkind != jk_Call) && (jcc->call_counter >0)) ||
905		 (!CLG_(is_zero_cost)( CLG_(sets).full, jcc->cost )))
906
907		fprint_jcc(fd, jcc, &(currCost->p), last, ecounter);
908	}
909  }
910
911  if (CLG_(clo).dump_bbs || CLG_(clo).dump_bb) {
912    if (!CLG_(is_zero_cost)( CLG_(sets).full, currCost->cost )) {
913      something_written = True;
914
915      fprint_apos(fd, &(currCost->p), last, bbcc->cxt->fn[0]->file);
916      fprint_fcost(fd, currCost, last);
917    }
918    if (CLG_(clo).dump_bbs) my_fwrite(fd, "\n", 1);
919
920    /* when every cost was immediatly written, we must have done so,
921     * as this function is only called when there's cost in a BBCC
922     */
923    CLG_ASSERT(something_written);
924  }
925
926  bbcc->ecounter_sum = 0;
927  for(i=0; i<=bbcc->bb->cjmp_count; i++)
928    bbcc->jmp[i].ecounter = 0;
929  bbcc->ret_counter = 0;
930
931  CLG_DEBUG(1, "- fprint_bbcc: JCCs %d\n", jcc_count);
932
933  return something_written;
934}
935
936/* order by
937 *  recursion,
938 *  from->bb->obj, from->bb->fn
939 *  obj, fn[0]->file, fn
940 *  address
941 */
942static int my_cmp(BBCC** pbbcc1, BBCC** pbbcc2)
943{
944#if 0
945    return (*pbbcc1)->bb->offset - (*pbbcc2)->bb->offset;
946#else
947    BBCC *bbcc1 = *pbbcc1;
948    BBCC *bbcc2 = *pbbcc2;
949    Context* cxt1 = bbcc1->cxt;
950    Context* cxt2 = bbcc2->cxt;
951    int off = 1;
952
953    if (cxt1->fn[0]->file->obj != cxt2->fn[0]->file->obj)
954	return cxt1->fn[0]->file->obj - cxt2->fn[0]->file->obj;
955
956    if (cxt1->fn[0]->file != cxt2->fn[0]->file)
957	return cxt1->fn[0]->file - cxt2->fn[0]->file;
958
959    if (cxt1->fn[0] != cxt2->fn[0])
960	return cxt1->fn[0] - cxt2->fn[0];
961
962    if (bbcc1->rec_index != bbcc2->rec_index)
963	return bbcc1->rec_index - bbcc2->rec_index;
964
965    while((off < cxt1->size) && (off < cxt2->size)) {
966	fn_node* ffn1 = cxt1->fn[off];
967	fn_node* ffn2 = cxt2->fn[off];
968	if (ffn1->file->obj != ffn2->file->obj)
969	    return ffn1->file->obj - ffn2->file->obj;
970	if (ffn1 != ffn2)
971	    return ffn1 - ffn2;
972	off++;
973    }
974    if      (cxt1->size > cxt2->size) return 1;
975    else if (cxt1->size < cxt2->size) return -1;
976
977    return bbcc1->bb->offset - bbcc2->bb->offset;
978#endif
979}
980
981
982
983
984
985/* modified version of:
986 *
987 * qsort -- qsort interface implemented by faster quicksort.
988 * J. L. Bentley and M. D. McIlroy, SPE 23 (1993) 1249-1265.
989 * Copyright 1993, John Wiley.
990*/
991
992static __inline__
993void swapfunc(BBCC** a, BBCC** b, int n)
994{
995    while(n>0) {
996	BBCC* t = *a; *a = *b; *b = t;
997	a++, b++;
998	n--;
999    }
1000}
1001
1002static __inline__
1003void swap(BBCC** a, BBCC** b)
1004{
1005    BBCC* t;
1006    t = *a; *a = *b; *b = t;
1007}
1008
1009#define min(x, y) ((x)<=(y) ? (x) : (y))
1010
1011static
1012BBCC** med3(BBCC **a, BBCC **b, BBCC **c, int (*cmp)(BBCC**,BBCC**))
1013{	return cmp(a, b) < 0 ?
1014		  (cmp(b, c) < 0 ? b : cmp(a, c) < 0 ? c : a)
1015		: (cmp(b, c) > 0 ? b : cmp(a, c) > 0 ? c : a);
1016}
1017
1018static BBCC** qsort_start = 0;
1019
1020static void qsort(BBCC **a, int n, int (*cmp)(BBCC**,BBCC**))
1021{
1022	BBCC **pa, **pb, **pc, **pd, **pl, **pm, **pn, **pv;
1023	int s, r;
1024	BBCC* v;
1025
1026	CLG_DEBUG(8, "  qsort(%ld,%ld)\n", a-qsort_start + 0L, n + 0L);
1027
1028	if (n < 7) {	 /* Insertion sort on smallest arrays */
1029		for (pm = a+1; pm < a+n; pm++)
1030			for (pl = pm; pl > a && cmp(pl-1, pl) > 0; pl --)
1031				swap(pl, pl-1);
1032
1033		CLG_DEBUGIF(8) {
1034		    for (pm = a; pm < a+n; pm++) {
1035			VG_(printf)("   %3ld BB %#lx, ",
1036                                    pm - qsort_start + 0L,
1037				    bb_addr((*pm)->bb));
1038			CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
1039		    }
1040		}
1041		return;
1042	}
1043	pm = a + n/2;    /* Small arrays, middle element */
1044	if (n > 7) {
1045		pl = a;
1046		pn = a + (n-1);
1047		if (n > 40) {    /* Big arrays, pseudomedian of 9 */
1048			s = n/8;
1049			pl = med3(pl, pl+s, pl+2*s, cmp);
1050			pm = med3(pm-s, pm, pm+s, cmp);
1051			pn = med3(pn-2*s, pn-s, pn, cmp);
1052		}
1053		pm = med3(pl, pm, pn, cmp); /* Mid-size, med of 3 */
1054	}
1055
1056
1057	v = *pm;
1058	pv = &v;
1059	pa = pb = a;
1060	pc = pd = a + (n-1);
1061	for (;;) {
1062		while ((pb <= pc) && ((r=cmp(pb, pv)) <= 0)) {
1063		    if (r==0) {
1064			/* same as pivot, to start */
1065			swap(pa,pb); pa++;
1066		    }
1067		    pb ++;
1068		}
1069		while ((pb <= pc) && ((r=cmp(pc, pv)) >= 0)) {
1070		    if (r==0) {
1071			/* same as pivot, to end */
1072			swap(pc,pd); pd--;
1073		    }
1074		    pc --;
1075		}
1076		if (pb > pc) { break; }
1077		swap(pb, pc);
1078		pb ++;
1079		pc --;
1080	}
1081	pb--;
1082	pc++;
1083
1084	/* put pivot from start into middle */
1085	if ((s = pa-a)>0) { for(r=0;r<s;r++) swap(a+r, pb+1-s+r); }
1086	/* put pivot from end into middle */
1087	if ((s = a+n-1-pd)>0) { for(r=0;r<s;r++) swap(pc+r, a+n-s+r); }
1088
1089	CLG_DEBUGIF(8) {
1090	  VG_(printf)("   PV BB %#lx, ", bb_addr((*pv)->bb));
1091	    CLG_(print_cxt)(9, (*pv)->cxt, (*pv)->rec_index);
1092
1093	    s = pb-pa+1;
1094	    VG_(printf)("    Lower %ld - %ld:\n",
1095                        a-qsort_start + 0L,
1096                        a+s-1-qsort_start + 0L);
1097	    for (r=0;r<s;r++) {
1098		pm = a+r;
1099		VG_(printf)("     %3ld BB %#lx, ",
1100			    pm-qsort_start + 0L,
1101                            bb_addr((*pm)->bb));
1102		CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
1103	    }
1104
1105	    s = pd-pc+1;
1106	    VG_(printf)("    Upper %ld - %ld:\n",
1107			a+n-s-qsort_start + 0L,
1108                        a+n-1-qsort_start + 0L);
1109	    for (r=0;r<s;r++) {
1110		pm = a+n-s+r;
1111		VG_(printf)("     %3ld BB %#lx, ",
1112			    pm-qsort_start + 0L,
1113                            bb_addr((*pm)->bb));
1114		CLG_(print_cxt)(9, (*pm)->cxt, (*pm)->rec_index);
1115	    }
1116	}
1117
1118	if ((s = pb+1-pa) > 1) qsort(a,     s, cmp);
1119	if ((s = pd+1-pc) > 1) qsort(a+n-s, s, cmp);
1120}
1121
1122
1123/* Helpers for prepare_dump */
1124
1125static Int    prepare_count;
1126static BBCC** prepare_ptr;
1127
1128
1129static void hash_addCount(BBCC* bbcc)
1130{
1131  if ((bbcc->ecounter_sum > 0) || (bbcc->ret_counter>0))
1132    prepare_count++;
1133}
1134
1135static void hash_addPtr(BBCC* bbcc)
1136{
1137  if ((bbcc->ecounter_sum == 0) &&
1138      (bbcc->ret_counter == 0)) return;
1139
1140  *prepare_ptr = bbcc;
1141  prepare_ptr++;
1142}
1143
1144
1145static void cs_addCount(thread_info* ti)
1146{
1147  Int i;
1148  BBCC* bbcc;
1149
1150  /* add BBCCs with active call in call stack of current thread.
1151   * update cost sums for active calls
1152   */
1153
1154  for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1155    call_entry* e = &(CLG_(current_call_stack).entry[i]);
1156    if (e->jcc == 0) continue;
1157
1158    CLG_(add_diff_cost_lz)( CLG_(sets).full, &(e->jcc->cost),
1159			   e->enter_cost, CLG_(current_state).cost);
1160    bbcc = e->jcc->from;
1161
1162    CLG_DEBUG(1, " [%2d] (tid %d), added active: %s\n",
1163	     i,CLG_(current_tid),bbcc->cxt->fn[0]->name);
1164
1165    if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) {
1166      /* already counted */
1167      continue;
1168    }
1169    prepare_count++;
1170  }
1171}
1172
1173static void cs_addPtr(thread_info* ti)
1174{
1175  Int i;
1176  BBCC* bbcc;
1177
1178  /* add BBCCs with active call in call stack of current thread.
1179   * update cost sums for active calls
1180   */
1181
1182  for(i = 0; i < CLG_(current_call_stack).sp; i++) {
1183    call_entry* e = &(CLG_(current_call_stack).entry[i]);
1184    if (e->jcc == 0) continue;
1185
1186    bbcc = e->jcc->from;
1187
1188    if (bbcc->ecounter_sum>0 || bbcc->ret_counter>0) {
1189      /* already counted */
1190      continue;
1191    }
1192
1193    *prepare_ptr = bbcc;
1194    prepare_ptr++;
1195  }
1196}
1197
1198
1199/**
1200 * Put all BBCCs with costs into a sorted array.
1201 * The returned arrays ends with a null pointer.
1202 * Must be freed after dumping.
1203 */
1204static
1205BBCC** prepare_dump(void)
1206{
1207    BBCC **array;
1208
1209    prepare_count = 0;
1210
1211    /* if we do not separate among threads, this gives all */
1212    /* count number of BBCCs with >0 executions */
1213    CLG_(forall_bbccs)(hash_addCount);
1214
1215    /* even if we do not separate among threads,
1216     * call stacks are separated */
1217    if (CLG_(clo).separate_threads)
1218      cs_addCount(0);
1219    else
1220      CLG_(forall_threads)(cs_addCount);
1221
1222    CLG_DEBUG(0, "prepare_dump: %d BBCCs\n", prepare_count);
1223
1224    /* allocate bbcc array, insert BBCCs and sort */
1225    prepare_ptr = array =
1226      (BBCC**) CLG_MALLOC("cl.dump.pd.1",
1227                          (prepare_count+1) * sizeof(BBCC*));
1228
1229    CLG_(forall_bbccs)(hash_addPtr);
1230
1231    if (CLG_(clo).separate_threads)
1232      cs_addPtr(0);
1233    else
1234      CLG_(forall_threads)(cs_addPtr);
1235
1236    CLG_ASSERT(array + prepare_count == prepare_ptr);
1237
1238    /* end mark */
1239    *prepare_ptr = 0;
1240
1241    CLG_DEBUG(0,"             BBCCs inserted\n");
1242
1243    qsort_start = array;
1244    qsort(array, prepare_count, my_cmp);
1245
1246    CLG_DEBUG(0,"             BBCCs sorted\n");
1247
1248    return array;
1249}
1250
1251
1252
1253
1254static void fprint_cost_ln(int fd, const HChar* prefix,
1255			   EventMapping* em, ULong* cost)
1256{
1257    int p;
1258
1259    p = VG_(sprintf)(outbuf, "%s", prefix);
1260    p += CLG_(sprint_mappingcost)(outbuf + p, em, cost);
1261    VG_(sprintf)(outbuf + p, "\n");
1262    my_fwrite(fd, outbuf, VG_(strlen)(outbuf));
1263}
1264
1265static ULong bbs_done = 0;
1266static HChar* filename = 0;
1267
1268static
1269void file_err(void)
1270{
1271   VG_(message)(Vg_UserMsg,
1272                "Error: can not open cache simulation output file `%s'\n",
1273                filename );
1274   VG_(exit)(1);
1275}
1276
1277/**
1278 * Create a new dump file and write header.
1279 *
1280 * Naming: <CLG_(clo).filename_base>.<pid>[.<part>][-<tid>]
1281 *         <part> is skipped for final dump (trigger==0)
1282 *         <tid>  is skipped for thread 1 with CLG_(clo).separate_threads=no
1283 *
1284 * Returns the file descriptor, and -1 on error (no write permission)
1285 */
1286static int new_dumpfile(HChar buf[BUF_LEN], int tid, const HChar* trigger)
1287{
1288    Bool appending = False;
1289    int i, fd;
1290    FullCost sum = 0;
1291    SysRes res;
1292
1293    CLG_ASSERT(dumps_initialized);
1294    CLG_ASSERT(filename != 0);
1295
1296    if (!CLG_(clo).combine_dumps) {
1297	i = VG_(sprintf)(filename, "%s", out_file);
1298
1299	if (trigger)
1300	    i += VG_(sprintf)(filename+i, ".%d", out_counter);
1301
1302	if (CLG_(clo).separate_threads)
1303	    VG_(sprintf)(filename+i, "-%02d", tid);
1304
1305	res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0);
1306    }
1307    else {
1308	VG_(sprintf)(filename, "%s", out_file);
1309        res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_APPEND, 0);
1310	if (!sr_isError(res) && out_counter>1)
1311	    appending = True;
1312    }
1313
1314    if (sr_isError(res)) {
1315	res = VG_(open)(filename, VKI_O_CREAT|VKI_O_WRONLY,
1316			VKI_S_IRUSR|VKI_S_IWUSR);
1317	if (sr_isError(res)) {
1318	    /* If the file can not be opened for whatever reason (conflict
1319	       between multiple supervised processes?), give up now. */
1320	    file_err();
1321	}
1322    }
1323    fd = (Int) sr_Res(res);
1324
1325    CLG_DEBUG(2, "  new_dumpfile '%s'\n", filename);
1326
1327    if (!appending)
1328	reset_dump_array();
1329
1330
1331    if (!appending) {
1332	/* version */
1333	VG_(sprintf)(buf, "version: 1\n");
1334	my_fwrite(fd, buf, VG_(strlen)(buf));
1335
1336	/* creator */
1337	VG_(sprintf)(buf, "creator: callgrind-" VERSION "\n");
1338	my_fwrite(fd, buf, VG_(strlen)(buf));
1339
1340	/* "pid:" line */
1341	VG_(sprintf)(buf, "pid: %d\n", VG_(getpid)());
1342	my_fwrite(fd, buf, VG_(strlen)(buf));
1343
1344	/* "cmd:" line */
1345	VG_(strcpy)(buf, "cmd: ");
1346	my_fwrite(fd, buf, VG_(strlen)(buf));
1347	my_fwrite(fd, cmdbuf, VG_(strlen)(cmdbuf));
1348    }
1349
1350    VG_(sprintf)(buf, "\npart: %d\n", out_counter);
1351    my_fwrite(fd, buf, VG_(strlen)(buf));
1352    if (CLG_(clo).separate_threads) {
1353	VG_(sprintf)(buf, "thread: %d\n", tid);
1354	my_fwrite(fd, buf, VG_(strlen)(buf));
1355    }
1356
1357    /* "desc:" lines */
1358    if (!appending) {
1359	my_fwrite(fd, "\n", 1);
1360
1361#if 0
1362	/* Global options changing the tracing behaviour */
1363	VG_(sprintf)(buf, "\ndesc: Option: --skip-plt=%s\n",
1364		     CLG_(clo).skip_plt ? "yes" : "no");
1365	my_fwrite(fd, buf, VG_(strlen)(buf));
1366	VG_(sprintf)(buf, "desc: Option: --collect-jumps=%s\n",
1367		     CLG_(clo).collect_jumps ? "yes" : "no");
1368	my_fwrite(fd, buf, VG_(strlen)(buf));
1369	VG_(sprintf)(buf, "desc: Option: --separate-recs=%d\n",
1370		     CLG_(clo).separate_recursions);
1371	my_fwrite(fd, buf, VG_(strlen)(buf));
1372	VG_(sprintf)(buf, "desc: Option: --separate-callers=%d\n",
1373		     CLG_(clo).separate_callers);
1374	my_fwrite(fd, buf, VG_(strlen)(buf));
1375
1376	VG_(sprintf)(buf, "desc: Option: --dump-bbs=%s\n",
1377		     CLG_(clo).dump_bbs ? "yes" : "no");
1378	my_fwrite(fd, buf, VG_(strlen)(buf));
1379	VG_(sprintf)(buf, "desc: Option: --separate-threads=%s\n",
1380		     CLG_(clo).separate_threads ? "yes" : "no");
1381	my_fwrite(fd, buf, VG_(strlen)(buf));
1382#endif
1383
1384	(*CLG_(cachesim).getdesc)(buf);
1385	my_fwrite(fd, buf, VG_(strlen)(buf));
1386    }
1387
1388    VG_(sprintf)(buf, "\ndesc: Timerange: Basic block %llu - %llu\n",
1389		 bbs_done, CLG_(stat).bb_executions);
1390
1391    my_fwrite(fd, buf, VG_(strlen)(buf));
1392    VG_(sprintf)(buf, "desc: Trigger: %s\n",
1393		 trigger ? trigger : "Program termination");
1394    my_fwrite(fd, buf, VG_(strlen)(buf));
1395
1396#if 0
1397   /* Output function specific config
1398    * FIXME */
1399   for (i = 0; i < N_FNCONFIG_ENTRIES; i++) {
1400       fnc = fnc_table[i];
1401       while (fnc) {
1402	   if (fnc->skip) {
1403	       VG_(sprintf)(buf, "desc: Option: --fn-skip=%s\n", fnc->name);
1404	       my_fwrite(fd, buf, VG_(strlen)(buf));
1405	   }
1406	   if (fnc->dump_at_enter) {
1407	       VG_(sprintf)(buf, "desc: Option: --fn-dump-at-enter=%s\n",
1408			    fnc->name);
1409	       my_fwrite(fd, buf, VG_(strlen)(buf));
1410	   }
1411	   if (fnc->dump_at_leave) {
1412	       VG_(sprintf)(buf, "desc: Option: --fn-dump-at-leave=%s\n",
1413			    fnc->name);
1414	       my_fwrite(fd, buf, VG_(strlen)(buf));
1415	   }
1416	   if (fnc->separate_callers != CLG_(clo).separate_callers) {
1417	       VG_(sprintf)(buf, "desc: Option: --separate-callers%d=%s\n",
1418			    fnc->separate_callers, fnc->name);
1419	       my_fwrite(fd, buf, VG_(strlen)(buf));
1420	   }
1421	   if (fnc->separate_recursions != CLG_(clo).separate_recursions) {
1422	       VG_(sprintf)(buf, "desc: Option: --separate-recs%d=%s\n",
1423			    fnc->separate_recursions, fnc->name);
1424	       my_fwrite(fd, buf, VG_(strlen)(buf));
1425	   }
1426	   fnc = fnc->next;
1427       }
1428   }
1429#endif
1430
1431   /* "positions:" line */
1432   VG_(sprintf)(buf, "\npositions:%s%s%s\n",
1433		CLG_(clo).dump_instr ? " instr" : "",
1434		CLG_(clo).dump_bb    ? " bb" : "",
1435		CLG_(clo).dump_line  ? " line" : "");
1436   my_fwrite(fd, buf, VG_(strlen)(buf));
1437
1438   /* "events:" line */
1439   i = VG_(sprintf)(buf, "events: ");
1440   CLG_(sprint_eventmapping)(buf+i, CLG_(dumpmap));
1441   my_fwrite(fd, buf, VG_(strlen)(buf));
1442   my_fwrite(fd, "\n", 1);
1443
1444   /* summary lines */
1445   sum = CLG_(get_eventset_cost)( CLG_(sets).full );
1446   CLG_(zero_cost)(CLG_(sets).full, sum);
1447   if (CLG_(clo).separate_threads) {
1448     thread_info* ti = CLG_(get_current_thread)();
1449     CLG_(add_diff_cost)(CLG_(sets).full, sum, ti->lastdump_cost,
1450			   ti->states.entry[0]->cost);
1451   }
1452   else {
1453     /* This function is called once for thread 1, where
1454      * all costs are summed up when not dumping separate per thread.
1455      * But this is not true for summary: we need to add all threads.
1456      */
1457     int t;
1458     thread_info** thr = CLG_(get_threads)();
1459     for(t=1;t<VG_N_THREADS;t++) {
1460       if (!thr[t]) continue;
1461       CLG_(add_diff_cost)(CLG_(sets).full, sum,
1462			  thr[t]->lastdump_cost,
1463			  thr[t]->states.entry[0]->cost);
1464     }
1465   }
1466   fprint_cost_ln(fd, "summary: ", CLG_(dumpmap), sum);
1467
1468   /* all dumped cost will be added to total_fcc */
1469   CLG_(init_cost_lz)( CLG_(sets).full, &dump_total_cost );
1470
1471   my_fwrite(fd, "\n\n",2);
1472
1473   if (VG_(clo_verbosity) > 1)
1474       VG_(message)(Vg_DebugMsg, "Dump to %s\n", filename);
1475
1476   return fd;
1477}
1478
1479
1480static void close_dumpfile(int fd)
1481{
1482    if (fd <0) return;
1483
1484    fprint_cost_ln(fd, "totals: ", CLG_(dumpmap),
1485		   dump_total_cost);
1486    //fprint_fcc_ln(fd, "summary: ", &dump_total_fcc);
1487    CLG_(add_cost_lz)(CLG_(sets).full,
1488		     &CLG_(total_cost), dump_total_cost);
1489
1490    fwrite_flush();
1491    VG_(close)(fd);
1492
1493    if (filename[0] == '.') {
1494	if (-1 == VG_(rename) (filename, filename+1)) {
1495	    /* Can not rename to correct file name: give out warning */
1496	    VG_(message)(Vg_DebugMsg, "Warning: Can not rename .%s to %s\n",
1497			 filename, filename);
1498       }
1499   }
1500}
1501
1502
1503/* Helper for print_bbccs */
1504
1505static Int   print_fd;
1506static const HChar* print_trigger;
1507static HChar print_buf[BUF_LEN];
1508
1509static void print_bbccs_of_thread(thread_info* ti)
1510{
1511  BBCC **p, **array;
1512  FnPos lastFnPos;
1513  AddrPos lastAPos;
1514
1515  CLG_DEBUG(1, "+ print_bbccs(tid %d)\n", CLG_(current_tid));
1516
1517  print_fd = new_dumpfile(print_buf, CLG_(current_tid), print_trigger);
1518  if (print_fd <0) {
1519    CLG_DEBUG(1, "- print_bbccs(tid %d): No output...\n", CLG_(current_tid));
1520    return;
1521  }
1522
1523  p = array = prepare_dump();
1524  init_fpos(&lastFnPos);
1525  init_apos(&lastAPos, 0, 0, 0);
1526
1527  if (p) while(1) {
1528
1529    /* on context/function change, print old cost buffer before */
1530    if (lastFnPos.cxt && ((*p==0) ||
1531			 (lastFnPos.cxt != (*p)->cxt) ||
1532			 (lastFnPos.rec_index != (*p)->rec_index))) {
1533      if (!CLG_(is_zero_cost)( CLG_(sets).full, ccSum[currSum].cost )) {
1534	/* no need to switch buffers, as position is the same */
1535	fprint_apos(print_fd, &(ccSum[currSum].p), &lastAPos,
1536		    lastFnPos.cxt->fn[0]->file);
1537	fprint_fcost(print_fd, &ccSum[currSum], &lastAPos);
1538      }
1539
1540      if (ccSum[currSum].p.file != lastFnPos.cxt->fn[0]->file) {
1541	/* switch back to file of function */
1542	VG_(sprintf)(print_buf, "fe=");
1543	print_file(print_buf+3, lastFnPos.cxt->fn[0]->file);
1544	my_fwrite(print_fd, print_buf, VG_(strlen)(print_buf));
1545      }
1546      my_fwrite(print_fd, "\n", 1);
1547    }
1548
1549    if (*p == 0) break;
1550
1551    if (print_fn_pos(print_fd, &lastFnPos, *p)) {
1552
1553      /* new function */
1554      init_apos(&lastAPos, 0, 0, (*p)->cxt->fn[0]->file);
1555      init_fcost(&ccSum[0], 0, 0, 0);
1556      init_fcost(&ccSum[1], 0, 0, 0);
1557      currSum = 0;
1558    }
1559
1560    if (CLG_(clo).dump_bbs) {
1561	/* FIXME: Specify Object of BB if different to object of fn */
1562	int i, pos = 0;
1563	ULong ecounter = (*p)->ecounter_sum;
1564	pos = VG_(sprintf)(print_buf, "bb=%#lx ", (*p)->bb->offset);
1565	for(i = 0; i<(*p)->bb->cjmp_count;i++) {
1566	    pos += VG_(sprintf)(print_buf+pos, "%d %llu ",
1567				(*p)->bb->jmp[i].instr,
1568				ecounter);
1569	    ecounter -= (*p)->jmp[i].ecounter;
1570	}
1571	VG_(sprintf)(print_buf+pos, "%d %llu\n",
1572		     (*p)->bb->instr_count,
1573		     ecounter);
1574	my_fwrite(print_fd, print_buf, VG_(strlen)(print_buf));
1575    }
1576
1577    fprint_bbcc(print_fd, *p, &lastAPos);
1578
1579    p++;
1580  }
1581
1582  close_dumpfile(print_fd);
1583  if (array) VG_(free)(array);
1584
1585  /* set counters of last dump */
1586  CLG_(copy_cost)( CLG_(sets).full, ti->lastdump_cost,
1587		  CLG_(current_state).cost );
1588
1589  CLG_DEBUG(1, "- print_bbccs(tid %d)\n", CLG_(current_tid));
1590}
1591
1592
1593static void print_bbccs(const HChar* trigger, Bool only_current_thread)
1594{
1595  init_dump_array();
1596  init_debug_cache();
1597
1598  print_fd = -1;
1599  print_trigger = trigger;
1600
1601  if (!CLG_(clo).separate_threads) {
1602    /* All BBCC/JCC costs is stored for thread 1 */
1603    Int orig_tid = CLG_(current_tid);
1604
1605    CLG_(switch_thread)(1);
1606    print_bbccs_of_thread( CLG_(get_current_thread)() );
1607    CLG_(switch_thread)(orig_tid);
1608  }
1609  else if (only_current_thread)
1610    print_bbccs_of_thread( CLG_(get_current_thread)() );
1611  else
1612    CLG_(forall_threads)(print_bbccs_of_thread);
1613
1614  free_dump_array();
1615}
1616
1617
1618void CLG_(dump_profile)(const HChar* trigger, Bool only_current_thread)
1619{
1620   CLG_DEBUG(2, "+ dump_profile(Trigger '%s')\n",
1621	    trigger ? trigger : "Prg.Term.");
1622
1623   CLG_(init_dumps)();
1624
1625   if (VG_(clo_verbosity) > 1)
1626       VG_(message)(Vg_DebugMsg, "Start dumping at BB %llu (%s)...\n",
1627		    CLG_(stat).bb_executions,
1628		    trigger ? trigger : "Prg.Term.");
1629
1630   out_counter++;
1631
1632   print_bbccs(trigger, only_current_thread);
1633
1634   bbs_done = CLG_(stat).bb_executions++;
1635
1636   if (VG_(clo_verbosity) > 1)
1637     VG_(message)(Vg_DebugMsg, "Dumping done.\n");
1638}
1639
1640/* Copy command to cmd buffer. We want to original command line
1641 * (can change at runtime)
1642 */
1643static
1644void init_cmdbuf(void)
1645{
1646  Int i,j,size = 0;
1647  HChar* argv;
1648
1649  if (VG_(args_the_exename)) {
1650      CLG_ASSERT( VG_(strlen)( VG_(args_the_exename) ) < BUF_LEN-1);
1651      size = VG_(sprintf)(cmdbuf, " %s", VG_(args_the_exename));
1652  }
1653
1654  for(i = 0; i < VG_(sizeXA)( VG_(args_for_client) ); i++) {
1655      argv = * (HChar**) VG_(indexXA)( VG_(args_for_client), i );
1656      if (!argv) continue;
1657      if ((size>0) && (size < BUF_LEN)) cmdbuf[size++] = ' ';
1658      for(j=0;argv[j]!=0;j++)
1659	  if (size < BUF_LEN) cmdbuf[size++] = argv[j];
1660  }
1661
1662  if (size >= BUF_LEN) size = BUF_LEN-1;
1663  cmdbuf[size] = 0;
1664}
1665
1666/*
1667 * Set up file names for dump output: <out_directory>, <out_file>.
1668 * <out_file> is derived from the output format string, which defaults
1669 * to "callgrind.out.%p", where %p is replaced with the PID.
1670 * For the final file name, on intermediate dumps a counter is appended,
1671 * and further, if separate dumps per thread are requested, the thread ID.
1672 *
1673 * <out_file> always starts with a full absolute path.
1674 * If the output format string represents a relative path, the current
1675 * working directory at program start is used.
1676 *
1677 * This function has to be called every time a profile dump is generated
1678 * to be able to react on PID changes.
1679 */
1680void CLG_(init_dumps)()
1681{
1682   Int lastSlash, i;
1683   SysRes res;
1684
1685   static int thisPID = 0;
1686   int currentPID = VG_(getpid)();
1687   if (currentPID == thisPID) {
1688       /* already initialized, and no PID change */
1689       CLG_ASSERT(out_file != 0);
1690       return;
1691   }
1692   thisPID = currentPID;
1693
1694   if (!CLG_(clo).out_format)
1695     CLG_(clo).out_format = DEFAULT_OUTFORMAT;
1696
1697   /* If a file name was already set, clean up before */
1698   if (out_file) {
1699       VG_(free)(out_file);
1700       VG_(free)(out_directory);
1701       VG_(free)(filename);
1702       out_counter = 0;
1703   }
1704
1705   // Setup output filename.
1706   out_file =
1707       VG_(expand_file_name)("--callgrind-out-file", CLG_(clo).out_format);
1708
1709   /* get base directory for dump/command/result files */
1710   CLG_ASSERT(out_file[0] == '/');
1711   lastSlash = 0;
1712   i = 1;
1713   while(out_file[i]) {
1714       if (out_file[i] == '/') lastSlash = i;
1715       i++;
1716   }
1717   i = lastSlash;
1718   out_directory = (HChar*) CLG_MALLOC("cl.dump.init_dumps.1", i+1);
1719   VG_(strncpy)(out_directory, out_file, i);
1720   out_directory[i] = 0;
1721
1722   /* allocate space big enough for final filenames */
1723   filename = (HChar*) CLG_MALLOC("cl.dump.init_dumps.2",
1724                                 VG_(strlen)(out_file)+32);
1725   CLG_ASSERT(filename != 0);
1726
1727   /* Make sure the output base file can be written.
1728    * This is used for the dump at program termination.
1729    * We stop with an error here if we can not create the
1730    * file: This is probably because of missing rights,
1731    * and trace parts wouldn't be allowed to be written, too.
1732    */
1733    VG_(strcpy)(filename, out_file);
1734    res = VG_(open)(filename, VKI_O_WRONLY|VKI_O_TRUNC, 0);
1735    if (sr_isError(res)) {
1736	res = VG_(open)(filename, VKI_O_CREAT|VKI_O_WRONLY,
1737		       VKI_S_IRUSR|VKI_S_IWUSR);
1738	if (sr_isError(res)) {
1739	    file_err();
1740	}
1741    }
1742    if (!sr_isError(res)) VG_(close)( (Int)sr_Res(res) );
1743
1744    if (!dumps_initialized)
1745	init_cmdbuf();
1746
1747    dumps_initialized = True;
1748}
1749