bfd_support.cpp revision cc2ee177dbb3befca43e36cfc56778b006c3d050
1/** 2 * @file bfd_support.cpp 3 * BFD muck we have to deal with. 4 * 5 * @remark Copyright 2005 OProfile authors 6 * @remark Read the file COPYING 7 * 8 * @author John Levon 9 */ 10 11#include "bfd_support.h" 12 13#include "op_bfd.h" 14#include "op_fileio.h" 15#include "string_manip.h" 16#include "cverb.h" 17 18#include <iostream> 19#include <fstream> 20#include <sstream> 21#include <string> 22 23using namespace std; 24 25extern verbose vbfd; 26 27namespace { 28 29 30void check_format(string const & file, bfd ** ibfd) 31{ 32 if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) { 33 cverb << vbfd << "BFD format failure for " << file << endl; 34 bfd_close(*ibfd); 35 *ibfd = NULL; 36 } 37} 38 39 40bool separate_debug_file_exists(string const & name, unsigned long const crc) 41{ 42 unsigned long file_crc = 0; 43 // The size of 2 * 1024 elements for the buffer is arbitrary. 44 char buffer[2 * 1024]; 45 46 ifstream file(name.c_str()); 47 if (!file) 48 return false; 49 50 cverb << vbfd << "found " << name; 51 while (file) { 52 file.read(buffer, sizeof(buffer)); 53 file_crc = calc_crc32(file_crc, 54 reinterpret_cast<unsigned char *>(&buffer[0]), 55 file.gcount()); 56 } 57 cverb << vbfd << " with crc32 = " << hex << file_crc << endl; 58 return crc == file_crc; 59} 60 61 62bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32) 63{ 64 asection * sect; 65 66 cverb << vbfd << "fetching .gnu_debuglink section" << endl; 67 sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink"); 68 69 if (sect == NULL) 70 return false; 71 72 bfd_size_type debuglink_size = bfd_section_size(ibfd, sect); 73 char contents[debuglink_size]; 74 cverb << vbfd 75 << ".gnu_debuglink section has size " << debuglink_size << endl; 76 77 bfd_get_section_contents(ibfd, sect, 78 reinterpret_cast<unsigned char *>(contents), 79 static_cast<file_ptr>(0), debuglink_size); 80 81 /* CRC value is stored after the filename, aligned up to 4 bytes. */ 82 size_t filename_len = strlen(contents); 83 size_t crc_offset = filename_len + 1; 84 crc_offset = (crc_offset + 3) & ~3; 85 86 crc32 = bfd_get_32(ibfd, 87 reinterpret_cast<bfd_byte *>(contents + crc_offset)); 88 filename = string(contents, filename_len); 89 cverb << vbfd << ".gnu_debuglink filename is " << filename << endl; 90 return true; 91} 92 93 94/** 95 * With Objective C, we'll get strings like: 96 * 97 * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range 98 * 99 * for the symbol name, and: 100 * -[GSUnicodeString rangeOfCharacterFromSet:options:range:] 101 * 102 * for the function name, so we have to do some looser matching 103 * than for other languages (unfortunately, it's not possible 104 * to demangle Objective C symbols). 105 */ 106bool objc_match(string const & sym, string const & method) 107{ 108 if (method.length() < 3) 109 return false; 110 111 string mangled; 112 113 if (is_prefix(method, "-[")) { 114 mangled += "_i_"; 115 } else if (is_prefix(method, "+[")) { 116 mangled += "_c_"; 117 } else { 118 return false; 119 } 120 121 string::const_iterator it = method.begin() + 2; 122 string::const_iterator const end = method.end(); 123 124 bool found_paren = false; 125 126 for (; it != end; ++it) { 127 switch (*it) { 128 case ' ': 129 mangled += '_'; 130 if (!found_paren) 131 mangled += '_'; 132 break; 133 case ':': 134 mangled += '_'; 135 break; 136 case ')': 137 case ']': 138 break; 139 case '(': 140 found_paren = true; 141 mangled += '_'; 142 break; 143 default: 144 mangled += *it; 145 } 146 } 147 148 return sym == mangled; 149} 150 151 152/* 153 * With a binary image where some objects are missing debug 154 * info, we can end up attributing to a completely different 155 * function (#484660): bfd_nearest_line() will happily move from one 156 * symbol to the nearest one it can find with debug information. 157 * To mitigate this problem, we check that the symbol name 158 * matches the returned function name. 159 * 160 * However, this check fails in some cases it shouldn't: 161 * Objective C, and C++ static inline functions (as discussed in 162 * GCC bugzilla #11774). So, we have a looser check that 163 * accepts merely a substring, plus some magic for Objective C. 164 * 165 * If even the loose check fails, then we give up. 166 */ 167bool is_correct_function(string const & function, string const & name) 168{ 169 if (name == function) 170 return true; 171 172 if (objc_match(name, function)) 173 return true; 174 175 // warn the user if we had to use the loose check 176 if (name.find(function) != string::npos) { 177 static bool warned = false; 178 if (!warned) { 179 cerr << "warning: some functions compiled without " 180 << "debug information may have incorrect source " 181 << "line attributions" << endl; 182 warned = true; 183 } 184 cverb << vbfd << "is_correct_function(" << function << ", " 185 << name << ") fuzzy match." << endl; 186 return true; 187 } 188 189 return false; 190} 191 192 193/* 194 * binutils 2.12 and below have a small bug where functions without a 195 * debug entry at the prologue start do not give a useful line number 196 * from bfd_find_nearest_line(). This can happen with certain gcc 197 * versions such as 2.95. 198 * 199 * We work around this problem by scanning forward for a vma with valid 200 * linenr info, if we can't get a valid line number. Problem uncovered 201 * by Norbert Kaufmann. The work-around decreases, on the tincas 202 * application, the number of failure to retrieve linenr info from 835 203 * to 173. Most of the remaining are c++ inline functions mainly from 204 * the STL library. Fix #529622 205 */ 206void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms, 207 string const & name, bfd_vma pc, 208 char const ** filename, unsigned int * line) 209{ 210 char const * cfilename; 211 char const * function; 212 unsigned int linenr; 213 214 // FIXME: looking at debug info for all gcc version shows than 215 // the same problems can -perhaps- occur for epilog code: find a 216 // samples files with samples in epilog and try opreport -l -g 217 // on it, check it also with opannotate. 218 219 // first restrict the search on a sensible range of vma, 16 is 220 // an intuitive value based on epilog code look 221 size_t max_search = 16; 222 size_t section_size = bfd_section_size(abfd, section); 223 if (pc + max_search > section_size) 224 max_search = section_size - pc; 225 226 for (size_t i = 1; i < max_search; ++i) { 227 bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i, 228 &cfilename, &function, 229 &linenr); 230 231 if (ret && cfilename && function && linenr != 0 232 && is_correct_function(function, name)) { 233 *filename = cfilename; 234 *line = linenr; 235 return; 236 } 237 } 238} 239 240 241} // namespace anon 242 243 244bfd * open_bfd(string const & file) 245{ 246 /* bfd keeps its own reference to the filename char *, 247 * so it must have a lifetime longer than the ibfd */ 248 bfd * ibfd = bfd_openr(file.c_str(), NULL); 249 if (!ibfd) { 250 cverb << vbfd << "bfd_openr failed for " << file << endl; 251 return NULL; 252 } 253 254 check_format(file, &ibfd); 255 256 return ibfd; 257} 258 259 260bfd * fdopen_bfd(string const & file, int fd) 261{ 262 /* bfd keeps its own reference to the filename char *, 263 * so it must have a lifetime longer than the ibfd */ 264 bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd); 265 if (!ibfd) { 266 cverb << vbfd << "bfd_openr failed for " << file << endl; 267 return NULL; 268 } 269 270 check_format(file, &ibfd); 271 272 return ibfd; 273} 274 275 276bool find_separate_debug_file(bfd * ibfd, string const & dir_in, 277 string const & global_in, string & filename) 278{ 279 string dir(dir_in); 280 string global(global_in); 281 string basename; 282 unsigned long crc32; 283 284 if (!get_debug_link_info(ibfd, basename, crc32)) 285 return false; 286 287 if (dir.size() > 0 && dir.at(dir.size() - 1) != '/') 288 dir += '/'; 289 290 if (global.size() > 0 && global.at(global.size() - 1) != '/') 291 global += '/'; 292 293 cverb << vbfd << "looking for debugging file " << basename 294 << " with crc32 = " << hex << crc32 << endl; 295 296 string first_try(dir + basename); 297 string second_try(dir + ".debug/" + basename); 298 299 if (dir.size() > 0 && dir[0] == '/') 300 dir = dir.substr(1); 301 302 string third_try(global + dir + basename); 303 304 if (separate_debug_file_exists(first_try, crc32)) 305 filename = first_try; 306 else if (separate_debug_file_exists(second_try, crc32)) 307 filename = second_try; 308 else if (separate_debug_file_exists(third_try, crc32)) 309 filename = third_try; 310 else 311 return false; 312 313 return true; 314} 315 316 317bool interesting_symbol(asymbol * sym) 318{ 319 // #717720 some binutils are miscompiled by gcc 2.95, one of the 320 // typical symptom can be catched here. 321 if (!sym->section) { 322 ostringstream os; 323 os << "Your version of binutils seems to have a bug.\n" 324 << "Read http://oprofile.sf.net/faq/#binutilsbug\n"; 325 throw op_runtime_error(os.str()); 326 } 327 328 if (!(sym->section->flags & SEC_CODE)) 329 return false; 330 331 // returning true for fix up in op_bfd_symbol() 332 if (!sym->name || sym->name[0] == '\0') 333 return true; 334 335 // C++ exception stuff 336 if (sym->name[0] == '.' && sym->name[1] == 'L') 337 return false; 338 339 /* This case cannot be moved to boring_symbol(), 340 * because that's only used for duplicate VMAs, 341 * and sometimes this symbol appears at an address 342 * different from all other symbols. 343 */ 344 if (!strcmp("gcc2_compiled.", sym->name)) 345 return false; 346 347 return true; 348} 349 350 351bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second) 352{ 353 if (first.name() == "Letext") 354 return true; 355 else if (second.name() == "Letext") 356 return false; 357 358 if (first.name().substr(0, 2) == "??") 359 return true; 360 else if (second.name().substr(0, 2) == "??") 361 return false; 362 363 if (first.hidden() && !second.hidden()) 364 return true; 365 else if (!first.hidden() && second.hidden()) 366 return false; 367 368 if (first.name()[0] == '_' && second.name()[0] != '_') 369 return true; 370 else if (first.name()[0] != '_' && second.name()[0] == '_') 371 return false; 372 373 if (first.weak() && !second.weak()) 374 return true; 375 else if (!first.weak() && second.weak()) 376 return false; 377 378 return false; 379} 380 381 382bool bfd_info::has_debug_info() const 383{ 384 if (!valid()) 385 return false; 386 387 for (asection const * sect = abfd->sections; sect; sect = sect->next) { 388 if (sect->flags & SEC_DEBUGGING) 389 return true; 390 } 391 392 return false; 393} 394 395 396bfd_info::~bfd_info() 397{ 398 free(synth_syms); 399 close(); 400} 401 402 403void bfd_info::close() 404{ 405 if (abfd) 406 bfd_close(abfd); 407} 408 409 410#if SYNTHESIZE_SYMBOLS 411bool bfd_info::get_synth_symbols() 412{ 413 extern const bfd_target bfd_elf64_powerpc_vec; 414 extern const bfd_target bfd_elf64_powerpcle_vec; 415 bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec) 416 || (abfd->xvec == &bfd_elf64_powerpcle_vec); 417 418 if (!is_elf64_powerpc_target) 419 return false; 420 421 void * buf; 422 uint tmp; 423 long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp); 424 if (nr_mini_syms < 1) 425 return false; 426 427 asymbol ** mini_syms = (asymbol **)buf; 428 buf = NULL; 429 430 long nr_synth_syms = bfd_get_synthetic_symtab(abfd, nr_mini_syms, 431 mini_syms, 0, 432 NULL, &synth_syms); 433 434 if (nr_synth_syms < 0) { 435 free(mini_syms); 436 return false; 437 } 438 439 cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl; 440 cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl; 441 442 nr_syms = nr_mini_syms + nr_synth_syms; 443 syms.reset(new asymbol *[nr_syms + 1]); 444 445 for (size_t i = 0; i < (size_t)nr_mini_syms; ++i) 446 syms[i] = mini_syms[i]; 447 448 449 for (size_t i = 0; i < (size_t)nr_synth_syms; ++i) 450 syms[nr_mini_syms + i] = synth_syms + i; 451 452 453 free(mini_syms); 454 455 // bfd_canonicalize_symtab does this, so shall we 456 syms[nr_syms] = NULL; 457 458 return true; 459} 460#else 461bool bfd_info::get_synth_symbols() 462{ 463 return false; 464} 465#endif /* SYNTHESIZE_SYMBOLS */ 466 467 468void bfd_info::get_symbols() 469{ 470 if (!abfd) 471 return; 472 473 cverb << vbfd << "bfd_info::get_symbols() for " 474 << bfd_get_filename(abfd) << endl; 475 476 if (get_synth_symbols()) 477 return; 478 479 if (bfd_get_file_flags(abfd) & HAS_SYMS) 480 nr_syms = bfd_get_symtab_upper_bound(abfd); 481 482 cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec 483 << nr_syms << hex << endl; 484 485 nr_syms /= sizeof(asymbol *); 486 487 if (nr_syms < 1) 488 return; 489 490 syms.reset(new asymbol *[nr_syms]); 491 492 nr_syms = bfd_canonicalize_symtab(abfd, syms.get()); 493 494 cverb << vbfd << "bfd_canonicalize_symtab: " << dec 495 << nr_syms << hex << endl; 496} 497 498 499linenr_info const 500find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym, 501 unsigned int offset) 502{ 503 char const * function = ""; 504 char const * cfilename = ""; 505 unsigned int linenr = 0; 506 linenr_info info; 507 bfd * abfd; 508 asymbol ** syms; 509 asection * section; 510 bfd_vma pc; 511 bool ret; 512 513 if (!b.valid()) 514 goto fail; 515 516 // take care about artificial symbol 517 if (!sym.symbol()) 518 goto fail; 519 520 abfd = b.abfd; 521 syms = b.syms.get(); 522 section = sym.symbol()->section; 523 pc = (sym.value() + offset) - sym.filepos(); 524 525 if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0) 526 goto fail; 527 528 if (pc >= bfd_section_size(abfd, section)) 529 goto fail; 530 531 ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename, 532 &function, &linenr); 533 534 if (!ret || !cfilename) 535 goto fail; 536 537 if (!is_correct_function(function, sym.name())) 538 goto fail; 539 540 if (linenr == 0) { 541 fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename, 542 &linenr); 543 } 544 545 info.found = true; 546 info.filename = cfilename; 547 info.line = linenr; 548 return info; 549 550fail: 551 info.found = false; 552 // some stl lacks string::clear() 553 info.filename.erase(info.filename.begin(), info.filename.end()); 554 info.line = 0; 555 return info; 556} 557