opannotate.cpp revision cc2ee177dbb3befca43e36cfc56778b006c3d050
1/** 2 * @file opannotate.cpp 3 * Implement opannotate utility 4 * 5 * @remark Copyright 2003 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author John Levon 9 * @author Philippe Elie 10 */ 11 12#include <iostream> 13#include <sstream> 14#include <algorithm> 15#include <iomanip> 16#include <fstream> 17#include <utility> 18 19#include "op_exception.h" 20#include "op_header.h" 21#include "profile.h" 22#include "populate.h" 23#include "op_sample_file.h" 24#include "cverb.h" 25#include "string_manip.h" 26#include "demangle_symbol.h" 27#include "child_reader.h" 28#include "op_file.h" 29#include "file_manip.h" 30#include "arrange_profiles.h" 31#include "opannotate_options.h" 32#include "profile_container.h" 33#include "symbol_sort.h" 34#include "image_errors.h" 35 36using namespace std; 37using namespace options; 38 39namespace { 40 41size_t nr_events; 42 43scoped_ptr<profile_container> samples; 44 45/// how opannotate was invoked 46string cmdline; 47 48/// empty annotation fill string 49string annotation_fill; 50 51/// string used as start / end comment to annotate source 52string const begin_comment("/* "); 53string const in_comment(" * "); 54string const end_comment(" */"); 55 56/// field width for the sample count 57unsigned int const count_width = 6; 58 59string get_annotation_fill() 60{ 61 string str; 62 63 for (size_t i = 0; i < nr_events; ++i) { 64 str += string(count_width, ' ') + ' '; 65 str += string(percent_width, ' '); 66 } 67 68 for (size_t i = 1; i < nr_events; ++i) { 69 str += " "; 70 } 71 72 str += " :"; 73 return str; 74} 75 76 77symbol_entry const * find_symbol(string const & image_name, 78 string const & str_vma) 79{ 80 // do not use the bfd equivalent: 81 // - it does not skip space at begin 82 // - we does not need cross architecture compile so the native 83 // strtoull must work, assuming unsigned long long can contain a vma 84 // and on 32/64 bits box bfd_vma is 64 bits 85 bfd_vma vma = strtoull(str_vma.c_str(), NULL, 16); 86 87 return samples->find_symbol(image_name, vma); 88} 89 90 91void output_info(ostream & out) 92{ 93 out << begin_comment << '\n'; 94 95 out << in_comment << "Command line: " << cmdline << '\n' 96 << in_comment << '\n'; 97 98 out << in_comment << "Interpretation of command line:" << '\n'; 99 100 if (!assembly) { 101 out << in_comment 102 << "Output annotated source file with samples" << '\n'; 103 104 if (options::threshold != 0) { 105 out << in_comment 106 << "Output files where samples count reach " 107 << options::threshold << "% of the samples\n"; 108 } else { 109 out << in_comment << "Output all files" << '\n'; 110 } 111 } else { 112 out << in_comment 113 << "Output annotated assembly listing with samples" 114 << '\n'; 115 116 if (!objdump_params.empty()) { 117 out << in_comment << "Passing the following " 118 "additional arguments to objdump ; \""; 119 for (size_t i = 0 ; i < objdump_params.size() ; ++i) 120 out << objdump_params[i] << " "; 121 out << "\"" << '\n'; 122 } 123 } 124 125 out << in_comment << '\n'; 126 127 out << in_comment << classes.cpuinfo << endl; 128 if (!classes.event.empty()) 129 out << in_comment << classes.event << endl; 130 131 for (size_t i = 0; i < classes.v.size(); ++i) 132 out << in_comment << classes.v[i].longname << endl; 133 134 out << end_comment << '\n'; 135} 136 137 138string count_str(count_array_t const & count, 139 count_array_t const & total) 140{ 141 ostringstream os; 142 for (size_t i = 0; i < nr_events; ++i) { 143 os << setw(count_width) << count[i] << ' '; 144 145 os << format_percent(op_ratio(count[i], total[i]) * 100.0, 146 percent_int_width, percent_fract_width); 147 } 148 return os.str(); 149} 150 151 152string asm_line_annotation(symbol_entry const * last_symbol, 153 string const & value) 154{ 155 // do not use the bfd equivalent: 156 // - it does not skip space at begin 157 // - we does not need cross architecture compile so the native 158 // strtoull must work, assuming unsigned long long can contain a vma 159 // and on 32/64 bits box bfd_vma is 64 bits 160 // gcc 2.91.66 workaround 161 bfd_vma vma = 0; 162 vma = strtoull(value.c_str(), NULL, 16); 163 164 string str; 165 166 sample_entry const * sample = samples->find_sample(last_symbol, vma); 167 if (sample) { 168 str += count_str(sample->counts, samples->samples_count()); 169 for (size_t i = 1; i < nr_events; ++i) 170 str += " "; 171 str += " :"; 172 } else { 173 str = annotation_fill; 174 } 175 176 return str; 177} 178 179 180string symbol_annotation(symbol_entry const * symbol) 181{ 182 if (!symbol) 183 return string(); 184 185 string annot = count_str(symbol->sample.counts, 186 samples->samples_count()); 187 if (annot.empty()) 188 return string(); 189 190 string const & symname = symbol_names.demangle(symbol->name); 191 192 string str = " "; 193 str += begin_comment + symname + " total: "; 194 str += count_str(symbol->sample.counts, samples->samples_count()); 195 str += end_comment; 196 return str; 197} 198 199 200/// return true if this line contains a symbol name in objdump formatting 201/// symbol are on the form 08030434 <symbol_name>: we need to be strict 202/// here to avoid any interpretation of a source line as a symbol line 203bool is_symbol_line(string const & str, string::size_type pos) 204{ 205 if (str[pos] != ' ' || str[pos + 1] != '<') 206 return false; 207 208 return str[str.length() - 1] == ':'; 209} 210 211 212symbol_entry const * output_objdump_asm_line(symbol_entry const * last_symbol, 213 string const & app_name, string const & str, 214 symbol_collection const & symbols, 215 bool & do_output) 216{ 217 // output of objdump is a human readable form and can contain some 218 // ambiguity so this code is dirty. It is also optimized a little bit 219 // so it is difficult to simplify it without breaking something ... 220 221 // line of interest are: "[:space:]*[:xdigit:]?[ :]", the last char of 222 // this regexp dis-ambiguate between a symbol line and an asm line. If 223 // source contain line of this form an ambiguity occur and we rely on 224 // the robustness of this code. 225 226 size_t pos = 0; 227 while (pos < str.length() && isspace(str[pos])) 228 ++pos; 229 230 if (pos == str.length() || !isxdigit(str[pos])) { 231 if (do_output) { 232 cout << annotation_fill << str << '\n'; 233 return last_symbol; 234 } 235 } 236 237 while (pos < str.length() && isxdigit(str[pos])) 238 ++pos; 239 240 if (pos == str.length() || (!isspace(str[pos]) && str[pos] != ':')) { 241 if (do_output) { 242 cout << annotation_fill << str << '\n'; 243 return last_symbol; 244 } 245 } 246 247 if (is_symbol_line(str, pos)) { 248 last_symbol = find_symbol(app_name, str); 249 250 // ! complexity: linear in number of symbol must use sorted 251 // by address vector and lower_bound ? 252 // Note this use a pointer comparison. It work because symbols 253 // pointer are unique 254 if (find(symbols.begin(), symbols.end(), last_symbol) 255 != symbols.end()) { 256 do_output = true; 257 } else { 258 do_output = false; 259 } 260 261 if (do_output) 262 cout << str << symbol_annotation(last_symbol) << '\n'; 263 264 } else { 265 // not a symbol, probably an asm line. 266 if (do_output) 267 cout << asm_line_annotation(last_symbol, str) 268 << str << '\n'; 269 } 270 271 return last_symbol; 272} 273 274 275void do_one_output_objdump(symbol_collection const & symbols, 276 string const & app_name, bfd_vma start, bfd_vma end) 277{ 278 vector<string> args; 279 280 args.push_back("-d"); 281 args.push_back("--no-show-raw-insn"); 282 if (source) 283 args.push_back("-S"); 284 285 if (start || end != ~(bfd_vma)0) { 286 ostringstream arg1, arg2; 287 arg1 << "--start-address=" << start; 288 arg2 << "--stop-address=" << end; 289 args.push_back(arg1.str()); 290 args.push_back(arg2.str()); 291 } 292 293 if (!objdump_params.empty()) { 294 for (size_t i = 0 ; i < objdump_params.size() ; ++i) 295 args.push_back(objdump_params[i]); 296 } 297 298 args.push_back(app_name); 299 child_reader reader("objdump", args); 300 if (reader.error()) { 301 cerr << "An error occur during the execution of objdump:\n\n"; 302 cerr << reader.error_str() << endl; 303 return; 304 } 305 306 // to filter output of symbols (filter based on command line options) 307 bool do_output = true; 308 309 symbol_entry const * last_symbol = 0; 310 string str; 311 while (reader.getline(str)) { 312 last_symbol = output_objdump_asm_line(last_symbol, app_name, 313 str, symbols, do_output); 314 } 315 316 // objdump always returns SUCCESS so we must rely on the stderr state 317 // of objdump. If objdump error message is cryptic our own error 318 // message will be probably also cryptic 319 ostringstream std_err; 320 ostringstream std_out; 321 reader.get_data(std_out, std_err); 322 if (std_err.str().length()) { 323 cerr << "An error occur during the execution of objdump:\n\n"; 324 cerr << std_err.str() << endl; 325 return ; 326 } 327 328 // force error code to be acquired 329 reader.terminate_process(); 330 331 // required because if objdump stop by signal all above things suceeed 332 // (signal error message are not output through stdout/stderr) 333 if (reader.error()) { 334 cerr << "An error occur during the execution of objdump:\n\n"; 335 cerr << reader.error_str() << endl; 336 return; 337 } 338} 339 340 341void output_objdump_asm(symbol_collection const & symbols, 342 string const & app_name) 343{ 344 // this is only an optimisation, we can either filter output by 345 // directly calling objdump and rely on the symbol filtering or 346 // we can call objdump with the right parameter to just disassemble 347 // the needed part. This is a real win only when calling objdump 348 // a medium number of times, I dunno if the used threshold is optimal 349 // but it is a conservative value. 350 size_t const max_objdump_exec = 50; 351 if (symbols.size() <= max_objdump_exec) { 352 symbol_collection::const_iterator cit = symbols.begin(); 353 symbol_collection::const_iterator end = symbols.end(); 354 for (; cit != end; ++cit) { 355 bfd_vma start = (*cit)->sample.vma; 356 bfd_vma end = start + (*cit)->size; 357 do_one_output_objdump(symbols, app_name, start, end); 358 } 359 } else { 360 do_one_output_objdump(symbols, app_name, 0, ~bfd_vma(0)); 361 } 362} 363 364 365bool output_asm(string const & app_name) 366{ 367 profile_container::symbol_choice choice; 368 choice.threshold = options::threshold; 369 choice.image_name = app_name; 370 choice.match_image = true; 371 symbol_collection symbols = samples->select_symbols(choice); 372 373 if (!symbols.empty()) { 374 sort_options options; 375 options.add_sort_option(sort_options::sample); 376 options.sort(symbols, false, false); 377 378 output_info(cout); 379 380 output_objdump_asm(symbols, app_name); 381 382 return true; 383 } 384 385 return false; 386} 387 388 389string const source_line_annotation(debug_name_id filename, size_t linenr) 390{ 391 string str; 392 393 count_array_t counts = samples->samples_count(filename, linenr); 394 if (!counts.zero()) { 395 str += count_str(counts, samples->samples_count()); 396 for (size_t i = 1; i < nr_events; ++i) 397 str += " "; 398 str += " :"; 399 } else { 400 str = annotation_fill; 401 } 402 403 return str; 404} 405 406 407string source_symbol_annotation(debug_name_id filename, size_t linenr) 408{ 409 symbol_entry const * symbol = samples->find_symbol(filename, linenr); 410 411 return symbol_annotation(symbol); 412} 413 414 415void output_per_file_info(ostream & out, debug_name_id filename, 416 count_array_t const & total_file_count) 417{ 418 out << begin_comment << '\n' 419 << in_comment << "Total samples for file : " 420 << '"' << debug_names.name(filename) << '"' 421 << '\n'; 422 out << in_comment << '\n' << in_comment 423 << count_str(total_file_count, samples->samples_count()) 424 << '\n'; 425 out << end_comment << '\n' << '\n'; 426} 427 428 429string const line0_info(debug_name_id filename) 430{ 431 string annotation = source_line_annotation(filename, 0); 432 if (trim(annotation, " \t:").empty()) 433 return string(); 434 435 string str = "<credited to line zero> "; 436 str += annotation; 437 return str; 438} 439 440 441void do_output_one_file(ostream & out, istream & in, debug_name_id filename, 442 bool header) 443{ 444 count_array_t count = samples->samples_count(filename); 445 446 if (header) { 447 output_per_file_info(out, filename, count); 448 out << line0_info(filename) << '\n'; 449 } 450 451 452 if (in) { 453 string str; 454 455 for (size_t linenr = 1 ; getline(in, str) ; ++linenr) { 456 out << source_line_annotation(filename, linenr) << str 457 << source_symbol_annotation(filename, linenr) 458 << '\n'; 459 } 460 461 } else { 462 // FIXME : we have no input file : we just outputfooter 463 // so on user can known total nr of samples for this source 464 // later we must add code that iterate through symbol in this 465 // file to output one annotation for each symbol. To do this we 466 // need a select_symbol(filename); in profile_container which 467 // fall back to the implementation in symbol_container 468 // using a lazilly build symbol_map sorted by filename 469 // (necessary functors already exist in symbol_functors.h) 470 } 471 472 if (!header) { 473 output_per_file_info(out, filename, count); 474 out << line0_info(filename) << '\n'; 475 } 476} 477 478 479void output_one_file(istream & in, debug_name_id filename, 480 string const & source) 481{ 482 if (output_dir.empty()) { 483 do_output_one_file(cout, in, filename, true); 484 return; 485 } 486 487 string const out_file = op_realpath(output_dir + source); 488 489 /* Just because you're paranoid doesn't mean they're not out to 490 * get you ... 491 * 492 * This is just a lame final safety check. If we found the 493 * source, then "source" should be canonical already, and 494 * can't escape from the output dir. We can't use op_realpath() 495 * alone as that needs the file to exist already. 496 * 497 * Let's not complain again if we couldn't find the file anyway. 498 */ 499 if (out_file.find("/../") != string::npos) { 500 if (in) { 501 cerr << "refusing to create non-canonical filename " 502 << out_file << endl; 503 } 504 return; 505 } else if (!is_prefix(out_file, output_dir)) { 506 if (in) { 507 cerr << "refusing to create file " << out_file 508 << " outside of output directory " << output_dir 509 << endl; 510 } 511 return; 512 } 513 514 if (is_files_identical(out_file, source)) { 515 cerr << "input and output files are identical: " 516 << out_file << endl; 517 return; 518 } 519 520 if (create_path(out_file.c_str())) { 521 cerr << "unable to create file: " 522 << '"' << op_dirname(out_file) << '"' << endl; 523 return; 524 } 525 526 ofstream out(out_file.c_str()); 527 if (!out) { 528 cerr << "unable to open output file " 529 << '"' << out_file << '"' << endl; 530 } else { 531 do_output_one_file(out, in, filename, false); 532 output_info(out); 533 } 534} 535 536 537/* Locate a source file from debug info, which may be relative */ 538string const locate_source_file(debug_name_id filename_id) 539{ 540 string const origfile = debug_names.name(filename_id); 541 string file = origfile; 542 543 if (file.empty()) 544 return file; 545 546 /* Allow absolute paths to be relocated to a different directory */ 547 if (file[0] == '/') { 548 vector<string>::const_iterator cit = base_dirs.begin(); 549 vector<string>::const_iterator end = base_dirs.end(); 550 for (; cit != end; ++cit) { 551 string path = op_realpath(*cit); 552 553 if (is_prefix(file, path)) { 554 file = file.substr(path.length()); 555 break; 556 } 557 } 558 } 559 560 vector<string>::const_iterator cit = search_dirs.begin(); 561 vector<string>::const_iterator end = search_dirs.end(); 562 563 for (; cit != end; ++cit) { 564 string const absfile = op_realpath(*cit + "/" + file); 565 566 if (op_file_readable(absfile)) { 567 return absfile; 568 } 569 } 570 571 /* We didn't find a relocated absolute file, or a relative file, 572 * assume the original is correct, accounting for the 573 * possibility it's relative the cwd 574 */ 575 return op_realpath(origfile); 576} 577 578 579void output_source(path_filter const & filter) 580{ 581 bool const separate_file = !output_dir.empty(); 582 583 if (!separate_file) 584 output_info(cout); 585 586 vector<debug_name_id> filenames = 587 samples->select_filename(options::threshold); 588 589 for (size_t i = 0 ; i < filenames.size() ; ++i) { 590 string const & source = locate_source_file(filenames[i]); 591 592 if (!filter.match(source)) 593 continue; 594 595 ifstream in(source.c_str()); 596 597 // it is common to have empty filename due to the lack 598 // of debug info (eg _init function) so warn only 599 // if the filename is non empty. The case: no debug 600 // info at all has already been checked. 601 if ((!in) && source.length()) { 602 cerr << "opannotate (warning): unable to open for " 603 "reading: " << source << endl; 604 } 605 606 if (source.length()) { 607 output_one_file(in, filenames[i], source); 608 } 609 } 610} 611 612 613bool annotate_source(list<string> const & images) 614{ 615 annotation_fill = get_annotation_fill(); 616 617 if (!output_dir.empty()) { 618 619 if (create_path(output_dir.c_str())) { 620 cerr << "unable to create " << output_dir 621 << " directory: " << endl; 622 return false; 623 } 624 625 // Make sure we have an absolute path. 626 output_dir = op_realpath(output_dir); 627 if (output_dir.length() && 628 output_dir[output_dir.length() - 1] != '/') 629 output_dir += '/'; 630 631 /* Don't let the user stomp on their sources */ 632 if (output_dir == "/") { 633 cerr << "Output path of / would over-write the " 634 "source files" << endl; 635 return false; 636 } 637 } 638 639 if (assembly) { 640 bool some_output = false; 641 642 list<string>::const_iterator it = images.begin(); 643 list<string>::const_iterator const end = images.end(); 644 645 for (; it != end; ++it) { 646 if (output_asm(*it)) { 647 some_output = true; 648 } 649 } 650 651 if (!some_output) { 652 // It's the only case we must care since we know the 653 // selected image set is not empty 654 cerr << "selected image set doesn't contain any of " 655 << "the selected symbol\n"; 656 } 657 } else { 658 output_source(file_filter); 659 } 660 661 return true; 662} 663 664 665int opannotate(options::spec const & spec) 666{ 667 handle_options(spec); 668 669 nr_events = classes.v.size(); 670 671 samples.reset(new profile_container(true, true)); 672 673 list<string> images; 674 675 list<inverted_profile> iprofiles 676 = invert_profiles(options::archive_path, classes, 677 options::extra_found_images); 678 679 report_image_errors(iprofiles); 680 681 list<inverted_profile>::iterator it = iprofiles.begin(); 682 list<inverted_profile>::iterator const end = iprofiles.end(); 683 684 bool debug_info = false; 685 for (; it != end; ++it) { 686 bool tmp = false; 687 populate_for_image(options::archive_path, *samples, *it, 688 options::symbol_filter, &tmp); 689 images.push_back(it->image); 690 if (tmp) 691 debug_info = true; 692 } 693 694 if (!debug_info && !options::assembly) { 695 cerr << "no debug information available for any binary " 696 << "selected and --assembly not requested\n"; 697 exit(EXIT_FAILURE); 698 } 699 700 annotate_source(images); 701 702 return 0; 703} 704 705} // anonymous namespace 706 707 708int main(int argc, char const * argv[]) 709{ 710 // set the invocation, for the file headers later 711 for (int i = 0 ; i < argc ; ++i) 712 cmdline += string(argv[i]) + " "; 713 714 return run_pp_tool(argc, argv, opannotate); 715} 716