opannotate.cpp revision cc2ee177dbb3befca43e36cfc56778b006c3d050
1/**
2 * @file opannotate.cpp
3 * Implement opannotate utility
4 *
5 * @remark Copyright 2003 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 * @author Philippe Elie
10 */
11
12#include <iostream>
13#include <sstream>
14#include <algorithm>
15#include <iomanip>
16#include <fstream>
17#include <utility>
18
19#include "op_exception.h"
20#include "op_header.h"
21#include "profile.h"
22#include "populate.h"
23#include "op_sample_file.h"
24#include "cverb.h"
25#include "string_manip.h"
26#include "demangle_symbol.h"
27#include "child_reader.h"
28#include "op_file.h"
29#include "file_manip.h"
30#include "arrange_profiles.h"
31#include "opannotate_options.h"
32#include "profile_container.h"
33#include "symbol_sort.h"
34#include "image_errors.h"
35
36using namespace std;
37using namespace options;
38
39namespace {
40
41size_t nr_events;
42
43scoped_ptr<profile_container> samples;
44
45/// how opannotate was invoked
46string cmdline;
47
48/// empty annotation fill string
49string annotation_fill;
50
51/// string used as start / end comment to annotate source
52string const begin_comment("/* ");
53string const in_comment(" * ");
54string const end_comment(" */");
55
56/// field width for the sample count
57unsigned int const count_width = 6;
58
59string get_annotation_fill()
60{
61	string str;
62
63	for (size_t i = 0; i < nr_events; ++i) {
64		str += string(count_width, ' ') + ' ';
65		str += string(percent_width, ' ');
66	}
67
68	for (size_t i = 1; i < nr_events; ++i) {
69		str += "  ";
70	}
71
72	str += " :";
73	return str;
74}
75
76
77symbol_entry const * find_symbol(string const & image_name,
78				 string const & str_vma)
79{
80	// do not use the bfd equivalent:
81	//  - it does not skip space at begin
82	//  - we does not need cross architecture compile so the native
83	// strtoull must work, assuming unsigned long long can contain a vma
84	// and on 32/64 bits box bfd_vma is 64 bits
85	bfd_vma vma = strtoull(str_vma.c_str(), NULL, 16);
86
87	return samples->find_symbol(image_name, vma);
88}
89
90
91void output_info(ostream & out)
92{
93	out << begin_comment << '\n';
94
95	out << in_comment << "Command line: " << cmdline << '\n'
96	    << in_comment << '\n';
97
98	out << in_comment << "Interpretation of command line:" << '\n';
99
100	if (!assembly) {
101		out << in_comment
102		    << "Output annotated source file with samples" << '\n';
103
104		if (options::threshold != 0) {
105			out << in_comment
106			    << "Output files where samples count reach "
107			    << options::threshold << "% of the samples\n";
108		} else {
109			out << in_comment << "Output all files" << '\n';
110		}
111	} else {
112		out << in_comment
113		    << "Output annotated assembly listing with samples"
114		    << '\n';
115
116		if (!objdump_params.empty()) {
117			out << in_comment << "Passing the following "
118				"additional arguments to objdump ; \"";
119			for (size_t i = 0 ; i < objdump_params.size() ; ++i)
120				out << objdump_params[i] << " ";
121			out << "\"" << '\n';
122		}
123	}
124
125	out << in_comment << '\n';
126
127	out << in_comment << classes.cpuinfo << endl;
128	if (!classes.event.empty())
129		out << in_comment << classes.event << endl;
130
131	for (size_t i = 0; i < classes.v.size(); ++i)
132		out << in_comment << classes.v[i].longname << endl;
133
134	out << end_comment << '\n';
135}
136
137
138string count_str(count_array_t const & count,
139		   count_array_t const & total)
140{
141	ostringstream os;
142	for (size_t i = 0; i < nr_events; ++i) {
143		os << setw(count_width) << count[i] << ' ';
144
145		os << format_percent(op_ratio(count[i], total[i]) * 100.0,
146				    percent_int_width, percent_fract_width);
147	}
148	return os.str();
149}
150
151
152string asm_line_annotation(symbol_entry const * last_symbol,
153			   string const & value)
154{
155	// do not use the bfd equivalent:
156	//  - it does not skip space at begin
157	//  - we does not need cross architecture compile so the native
158	// strtoull must work, assuming unsigned long long can contain a vma
159	// and on 32/64 bits box bfd_vma is 64 bits
160	// gcc 2.91.66 workaround
161	bfd_vma vma = 0;
162	vma = strtoull(value.c_str(), NULL, 16);
163
164	string str;
165
166	sample_entry const * sample = samples->find_sample(last_symbol, vma);
167	if (sample) {
168		str += count_str(sample->counts, samples->samples_count());
169		for (size_t i = 1; i < nr_events; ++i)
170			str += "  ";
171		str += " :";
172	} else {
173		str = annotation_fill;
174	}
175
176	return str;
177}
178
179
180string symbol_annotation(symbol_entry const * symbol)
181{
182	if (!symbol)
183		return string();
184
185	string annot = count_str(symbol->sample.counts,
186	                         samples->samples_count());
187	if (annot.empty())
188		return string();
189
190	string const & symname = symbol_names.demangle(symbol->name);
191
192	string str = " ";
193	str += begin_comment + symname + " total: ";
194	str += count_str(symbol->sample.counts, samples->samples_count());
195	str += end_comment;
196	return str;
197}
198
199
200/// return true if  this line contains a symbol name in objdump formatting
201/// symbol are on the form 08030434 <symbol_name>:  we need to be strict
202/// here to avoid any interpretation of a source line as a symbol line
203bool is_symbol_line(string const & str, string::size_type pos)
204{
205	if (str[pos] != ' ' || str[pos + 1] != '<')
206		return false;
207
208	return str[str.length() - 1] == ':';
209}
210
211
212symbol_entry const * output_objdump_asm_line(symbol_entry const * last_symbol,
213		string const & app_name, string const & str,
214		symbol_collection const & symbols,
215		bool & do_output)
216{
217	// output of objdump is a human readable form and can contain some
218	// ambiguity so this code is dirty. It is also optimized a little bit
219	// so it is difficult to simplify it without breaking something ...
220
221	// line of interest are: "[:space:]*[:xdigit:]?[ :]", the last char of
222	// this regexp dis-ambiguate between a symbol line and an asm line. If
223	// source contain line of this form an ambiguity occur and we rely on
224	// the robustness of this code.
225
226	size_t pos = 0;
227	while (pos < str.length() && isspace(str[pos]))
228		++pos;
229
230	if (pos == str.length() || !isxdigit(str[pos])) {
231		if (do_output) {
232			cout << annotation_fill << str << '\n';
233			return last_symbol;
234		}
235	}
236
237	while (pos < str.length() && isxdigit(str[pos]))
238		++pos;
239
240	if (pos == str.length() || (!isspace(str[pos]) && str[pos] != ':')) {
241		if (do_output) {
242			cout << annotation_fill << str << '\n';
243			return last_symbol;
244		}
245	}
246
247	if (is_symbol_line(str, pos)) {
248		last_symbol = find_symbol(app_name, str);
249
250		// ! complexity: linear in number of symbol must use sorted
251		// by address vector and lower_bound ?
252		// Note this use a pointer comparison. It work because symbols
253		// pointer are unique
254		if (find(symbols.begin(), symbols.end(), last_symbol)
255			!= symbols.end()) {
256			do_output = true;
257		} else {
258			do_output = false;
259		}
260
261		if (do_output)
262			cout << str << symbol_annotation(last_symbol) << '\n';
263
264	} else {
265		// not a symbol, probably an asm line.
266		if (do_output)
267			cout << asm_line_annotation(last_symbol, str)
268			     << str << '\n';
269	}
270
271	return last_symbol;
272}
273
274
275void do_one_output_objdump(symbol_collection const & symbols,
276			   string const & app_name, bfd_vma start, bfd_vma end)
277{
278	vector<string> args;
279
280	args.push_back("-d");
281	args.push_back("--no-show-raw-insn");
282	if (source)
283		args.push_back("-S");
284
285	if (start || end != ~(bfd_vma)0) {
286		ostringstream arg1, arg2;
287		arg1 << "--start-address=" << start;
288		arg2 << "--stop-address=" << end;
289		args.push_back(arg1.str());
290		args.push_back(arg2.str());
291	}
292
293	if (!objdump_params.empty()) {
294		for (size_t i = 0 ; i < objdump_params.size() ; ++i)
295			args.push_back(objdump_params[i]);
296	}
297
298	args.push_back(app_name);
299	child_reader reader("objdump", args);
300	if (reader.error()) {
301		cerr << "An error occur during the execution of objdump:\n\n";
302		cerr << reader.error_str() << endl;
303		return;
304	}
305
306	// to filter output of symbols (filter based on command line options)
307	bool do_output = true;
308
309	symbol_entry const * last_symbol = 0;
310	string str;
311	while (reader.getline(str)) {
312		last_symbol = output_objdump_asm_line(last_symbol, app_name,
313					str, symbols, do_output);
314	}
315
316	// objdump always returns SUCCESS so we must rely on the stderr state
317	// of objdump. If objdump error message is cryptic our own error
318	// message will be probably also cryptic
319	ostringstream std_err;
320	ostringstream std_out;
321	reader.get_data(std_out, std_err);
322	if (std_err.str().length()) {
323		cerr << "An error occur during the execution of objdump:\n\n";
324		cerr << std_err.str() << endl;
325		return ;
326	}
327
328	// force error code to be acquired
329	reader.terminate_process();
330
331	// required because if objdump stop by signal all above things suceeed
332	// (signal error message are not output through stdout/stderr)
333	if (reader.error()) {
334		cerr << "An error occur during the execution of objdump:\n\n";
335		cerr << reader.error_str() << endl;
336		return;
337	}
338}
339
340
341void output_objdump_asm(symbol_collection const & symbols,
342			string const & app_name)
343{
344	// this is only an optimisation, we can either filter output by
345	// directly calling objdump and rely on the symbol filtering or
346	// we can call objdump with the right parameter to just disassemble
347	// the needed part. This is a real win only when calling objdump
348	// a medium number of times, I dunno if the used threshold is optimal
349	// but it is a conservative value.
350	size_t const max_objdump_exec = 50;
351	if (symbols.size() <= max_objdump_exec) {
352		symbol_collection::const_iterator cit = symbols.begin();
353		symbol_collection::const_iterator end = symbols.end();
354		for (; cit != end; ++cit) {
355			bfd_vma start = (*cit)->sample.vma;
356			bfd_vma end  = start + (*cit)->size;
357			do_one_output_objdump(symbols, app_name, start, end);
358		}
359	} else {
360		do_one_output_objdump(symbols, app_name, 0, ~bfd_vma(0));
361	}
362}
363
364
365bool output_asm(string const & app_name)
366{
367	profile_container::symbol_choice choice;
368	choice.threshold = options::threshold;
369	choice.image_name = app_name;
370	choice.match_image = true;
371	symbol_collection symbols = samples->select_symbols(choice);
372
373	if (!symbols.empty()) {
374		sort_options options;
375		options.add_sort_option(sort_options::sample);
376		options.sort(symbols, false, false);
377
378		output_info(cout);
379
380		output_objdump_asm(symbols, app_name);
381
382		return true;
383	}
384
385	return false;
386}
387
388
389string const source_line_annotation(debug_name_id filename, size_t linenr)
390{
391	string str;
392
393	count_array_t counts = samples->samples_count(filename, linenr);
394	if (!counts.zero()) {
395		str += count_str(counts, samples->samples_count());
396		for (size_t i = 1; i < nr_events; ++i)
397			str += "  ";
398		str += " :";
399	} else {
400		str = annotation_fill;
401	}
402
403	return str;
404}
405
406
407string source_symbol_annotation(debug_name_id filename, size_t linenr)
408{
409	symbol_entry const * symbol = samples->find_symbol(filename, linenr);
410
411	return symbol_annotation(symbol);
412}
413
414
415void output_per_file_info(ostream & out, debug_name_id filename,
416			  count_array_t const & total_file_count)
417{
418	out << begin_comment << '\n'
419	     << in_comment << "Total samples for file : "
420	     << '"' << debug_names.name(filename) << '"'
421	     << '\n';
422	out << in_comment << '\n' << in_comment
423	    << count_str(total_file_count, samples->samples_count())
424	    << '\n';
425	out << end_comment << '\n' << '\n';
426}
427
428
429string const line0_info(debug_name_id filename)
430{
431	string annotation = source_line_annotation(filename, 0);
432	if (trim(annotation, " \t:").empty())
433		return string();
434
435	string str = "<credited to line zero> ";
436	str += annotation;
437	return str;
438}
439
440
441void do_output_one_file(ostream & out, istream & in, debug_name_id filename,
442                        bool header)
443{
444	count_array_t count = samples->samples_count(filename);
445
446	if (header) {
447		output_per_file_info(out, filename, count);
448		out << line0_info(filename) << '\n';
449	}
450
451
452	if (in) {
453		string str;
454
455		for (size_t linenr = 1 ; getline(in, str) ; ++linenr) {
456			out << source_line_annotation(filename, linenr) << str
457			    << source_symbol_annotation(filename, linenr)
458			    << '\n';
459		}
460
461	} else {
462		// FIXME : we have no input file : we just outputfooter
463		// so on user can known total nr of samples for this source
464		// later we must add code that iterate through symbol in this
465		// file to output one annotation for each symbol. To do this we
466		// need a select_symbol(filename); in profile_container which
467		// fall back to the implementation in symbol_container
468		// using a lazilly build symbol_map sorted by filename
469		// (necessary functors already exist in symbol_functors.h)
470	}
471
472	if (!header) {
473		output_per_file_info(out, filename, count);
474		out << line0_info(filename) << '\n';
475	}
476}
477
478
479void output_one_file(istream & in, debug_name_id filename,
480                     string const & source)
481{
482	if (output_dir.empty()) {
483		do_output_one_file(cout, in, filename, true);
484		return;
485	}
486
487	string const out_file = op_realpath(output_dir + source);
488
489	/* Just because you're paranoid doesn't mean they're not out to
490	 * get you ...
491	 *
492	 * This is just a lame final safety check. If we found the
493	 * source, then "source" should be canonical already, and
494	 * can't escape from the output dir. We can't use op_realpath()
495	 * alone as that needs the file to exist already.
496	 *
497	 * Let's not complain again if we couldn't find the file anyway.
498	 */
499	if (out_file.find("/../") != string::npos) {
500		if (in) {
501			cerr << "refusing to create non-canonical filename "
502			     << out_file  << endl;
503		}
504		return;
505	} else if (!is_prefix(out_file, output_dir)) {
506		if (in) {
507			cerr << "refusing to create file " << out_file
508			     << " outside of output directory " << output_dir
509			     << endl;
510		}
511		return;
512	}
513
514	if (is_files_identical(out_file, source)) {
515		cerr << "input and output files are identical: "
516		     << out_file << endl;
517		return;
518	}
519
520	if (create_path(out_file.c_str())) {
521		cerr << "unable to create file: "
522		     << '"' << op_dirname(out_file) << '"' << endl;
523		return;
524	}
525
526	ofstream out(out_file.c_str());
527	if (!out) {
528		cerr << "unable to open output file "
529		     << '"' << out_file << '"' << endl;
530	} else {
531		do_output_one_file(out, in, filename, false);
532		output_info(out);
533	}
534}
535
536
537/* Locate a source file from debug info, which may be relative */
538string const locate_source_file(debug_name_id filename_id)
539{
540	string const origfile = debug_names.name(filename_id);
541	string file = origfile;
542
543	if (file.empty())
544		return file;
545
546	/* Allow absolute paths to be relocated to a different directory */
547	if (file[0] == '/') {
548		vector<string>::const_iterator cit = base_dirs.begin();
549		vector<string>::const_iterator end = base_dirs.end();
550		for (; cit != end; ++cit) {
551			string path = op_realpath(*cit);
552
553			if (is_prefix(file, path)) {
554				file = file.substr(path.length());
555				break;
556			}
557		}
558	}
559
560	vector<string>::const_iterator cit = search_dirs.begin();
561	vector<string>::const_iterator end = search_dirs.end();
562
563	for (; cit != end; ++cit) {
564		string const absfile = op_realpath(*cit + "/" + file);
565
566		if (op_file_readable(absfile)) {
567			return absfile;
568		}
569	}
570
571	/* We didn't find a relocated absolute file, or a relative file,
572	 * assume the original is correct, accounting for the
573	 * possibility it's relative the cwd
574	 */
575	return op_realpath(origfile);
576}
577
578
579void output_source(path_filter const & filter)
580{
581	bool const separate_file = !output_dir.empty();
582
583	if (!separate_file)
584		output_info(cout);
585
586	vector<debug_name_id> filenames =
587		samples->select_filename(options::threshold);
588
589	for (size_t i = 0 ; i < filenames.size() ; ++i) {
590		string const & source = locate_source_file(filenames[i]);
591
592		if (!filter.match(source))
593			continue;
594
595		ifstream in(source.c_str());
596
597		// it is common to have empty filename due to the lack
598		// of debug info (eg _init function) so warn only
599		// if the filename is non empty. The case: no debug
600		// info at all has already been checked.
601		if ((!in) && source.length()) {
602			cerr << "opannotate (warning): unable to open for "
603			     "reading: " << source << endl;
604		}
605
606		if (source.length()) {
607			output_one_file(in, filenames[i], source);
608		}
609	}
610}
611
612
613bool annotate_source(list<string> const & images)
614{
615	annotation_fill = get_annotation_fill();
616
617	if (!output_dir.empty()) {
618
619		if (create_path(output_dir.c_str())) {
620			cerr << "unable to create " << output_dir
621			     << " directory: " << endl;
622			return false;
623		}
624
625		// Make sure we have an absolute path.
626		output_dir = op_realpath(output_dir);
627		if (output_dir.length() &&
628		    output_dir[output_dir.length() - 1] != '/')
629			output_dir += '/';
630
631		/* Don't let the user stomp on their sources */
632		if (output_dir == "/") {
633			cerr << "Output path of / would over-write the "
634				"source files" << endl;
635			return false;
636		}
637	}
638
639	if (assembly) {
640		bool some_output = false;
641
642		list<string>::const_iterator it = images.begin();
643		list<string>::const_iterator const end = images.end();
644
645		for (; it != end; ++it) {
646			if (output_asm(*it)) {
647				some_output = true;
648			}
649		}
650
651		if (!some_output) {
652			// It's the only case we must care since we know the
653			// selected image set is not empty
654			cerr << "selected image set doesn't contain any of "
655			     << "the selected symbol\n";
656		}
657	} else {
658		output_source(file_filter);
659	}
660
661	return true;
662}
663
664
665int opannotate(options::spec const & spec)
666{
667	handle_options(spec);
668
669	nr_events = classes.v.size();
670
671	samples.reset(new profile_container(true, true));
672
673	list<string> images;
674
675	list<inverted_profile> iprofiles
676		= invert_profiles(options::archive_path, classes,
677				  options::extra_found_images);
678
679	report_image_errors(iprofiles);
680
681	list<inverted_profile>::iterator it = iprofiles.begin();
682	list<inverted_profile>::iterator const end = iprofiles.end();
683
684	bool debug_info = false;
685	for (; it != end; ++it) {
686		bool tmp = false;
687		populate_for_image(options::archive_path, *samples, *it,
688		                   options::symbol_filter, &tmp);
689		images.push_back(it->image);
690		if (tmp)
691			debug_info = true;
692	}
693
694	if (!debug_info && !options::assembly) {
695		cerr << "no debug information available for any binary "
696		     << "selected and --assembly not requested\n";
697		exit(EXIT_FAILURE);
698	}
699
700	annotate_source(images);
701
702	return 0;
703}
704
705} // anonymous namespace
706
707
708int main(int argc, char const * argv[])
709{
710	// set the invocation, for the file headers later
711	for (int i = 0 ; i < argc ; ++i)
712		cmdline += string(argv[i]) + " ";
713
714	return run_pp_tool(argc, argv, opannotate);
715}
716