bfd_support.cpp revision 8cfa702f803c5ef6a2b062a489a1b2cf66b45b5e
1/**
2 * @file bfd_support.cpp
3 * BFD muck we have to deal with.
4 *
5 * @remark Copyright 2005 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 */
10
11#include "bfd_support.h"
12
13#include "op_bfd.h"
14#include "op_fileio.h"
15#include "op_config.h"
16#include "string_manip.h"
17#include "file_manip.h"
18#include "cverb.h"
19#include "locate_images.h"
20
21#include <cstdlib>
22#include <cstring>
23
24#include <iostream>
25#include <fstream>
26#include <sstream>
27#include <string>
28#include <cstring>
29#include <cstdlib>
30
31using namespace std;
32
33extern verbose vbfd;
34
35namespace {
36
37
38void check_format(string const & file, bfd ** ibfd)
39{
40	if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
41		cverb << vbfd << "BFD format failure for " << file << endl;
42		bfd_close(*ibfd);
43		*ibfd = NULL;
44	}
45}
46
47
48bool separate_debug_file_exists(string & name, unsigned long const crc,
49                                extra_images const & extra)
50{
51	unsigned long file_crc = 0;
52	// The size of 2 * 1024 elements for the buffer is arbitrary.
53	char buffer[2 * 1024];
54
55	image_error img_ok;
56	string const image_path = extra.find_image_path(name, img_ok, true);
57
58	if (img_ok != image_ok)
59		return false;
60
61	name = image_path;
62
63	ifstream file(image_path.c_str());
64	if (!file)
65		return false;
66
67	cverb << vbfd << "found " << name;
68	while (file) {
69		file.read(buffer, sizeof(buffer));
70		file_crc = calc_crc32(file_crc,
71				      reinterpret_cast<unsigned char *>(&buffer[0]),
72				      file.gcount());
73	}
74	cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
75	return crc == file_crc;
76}
77
78
79bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
80{
81	asection * sect;
82
83	cverb << vbfd << "fetching .gnu_debuglink section" << endl;
84	sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
85
86	if (sect == NULL)
87		return false;
88
89	bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);
90	char contents[debuglink_size];
91	cverb << vbfd
92	      << ".gnu_debuglink section has size " << debuglink_size << endl;
93
94	if (!bfd_get_section_contents(ibfd, sect,
95				 reinterpret_cast<unsigned char *>(contents),
96				 static_cast<file_ptr>(0), debuglink_size)) {
97		bfd_perror("bfd_get_section_contents:get_debug:");
98		exit(2);
99	}
100
101	/* CRC value is stored after the filename, aligned up to 4 bytes. */
102	size_t filename_len = strlen(contents);
103	size_t crc_offset = filename_len + 1;
104	crc_offset = (crc_offset + 3) & ~3;
105
106	crc32 = bfd_get_32(ibfd,
107			       reinterpret_cast<bfd_byte *>(contents + crc_offset));
108	filename = string(contents, filename_len);
109	cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
110	return true;
111}
112
113
114/**
115 * With Objective C, we'll get strings like:
116 *
117 * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
118 *
119 * for the symbol name, and:
120 * -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
121 *
122 * for the function name, so we have to do some looser matching
123 * than for other languages (unfortunately, it's not possible
124 * to demangle Objective C symbols).
125 */
126bool objc_match(string const & sym, string const & method)
127{
128	if (method.length() < 3)
129		return false;
130
131	string mangled;
132
133	if (is_prefix(method, "-[")) {
134		mangled += "_i_";
135	} else if (is_prefix(method, "+[")) {
136		mangled += "_c_";
137	} else {
138		return false;
139	}
140
141	string::const_iterator it = method.begin() + 2;
142	string::const_iterator const end = method.end();
143
144	bool found_paren = false;
145
146	for (; it != end; ++it) {
147		switch (*it) {
148		case ' ':
149			mangled += '_';
150			if (!found_paren)
151				mangled += '_';
152			break;
153		case ':':
154			mangled += '_';
155			break;
156		case ')':
157		case ']':
158			break;
159		case '(':
160			found_paren = true;
161			mangled += '_';
162			break;
163		default:
164			mangled += *it;
165		}
166	}
167
168	return sym == mangled;
169}
170
171
172/*
173 * With a binary image where some objects are missing debug
174 * info, we can end up attributing to a completely different
175 * function (#484660): bfd_nearest_line() will happily move from one
176 * symbol to the nearest one it can find with debug information.
177 * To mitigate this problem, we check that the symbol name
178 * matches the returned function name.
179 *
180 * However, this check fails in some cases it shouldn't:
181 * Objective C, and C++ static inline functions (as discussed in
182 * GCC bugzilla #11774). So, we have a looser check that
183 * accepts merely a substring, plus some magic for Objective C.
184 *
185 * If even the loose check fails, then we give up.
186 */
187bool is_correct_function(string const & function, string const & name)
188{
189	if (name == function)
190		return true;
191
192	if (objc_match(name, function))
193		return true;
194
195	// warn the user if we had to use the loose check
196	if (name.find(function) != string::npos) {
197		static bool warned = false;
198		if (!warned) {
199			cerr << "warning: some functions compiled without "
200			     << "debug information may have incorrect source "
201			     << "line attributions" << endl;
202				warned = true;
203		}
204		cverb << vbfd << "is_correct_function(" << function << ", "
205		      << name << ") fuzzy match." << endl;
206		return true;
207	}
208
209	return false;
210}
211
212
213/*
214 * binutils 2.12 and below have a small bug where functions without a
215 * debug entry at the prologue start do not give a useful line number
216 * from bfd_find_nearest_line(). This can happen with certain gcc
217 * versions such as 2.95.
218 *
219 * We work around this problem by scanning forward for a vma with valid
220 * linenr info, if we can't get a valid line number.  Problem uncovered
221 * by Norbert Kaufmann. The work-around decreases, on the tincas
222 * application, the number of failure to retrieve linenr info from 835
223 * to 173. Most of the remaining are c++ inline functions mainly from
224 * the STL library. Fix #529622
225 */
226void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
227		  string const & name, bfd_vma pc,
228                  char const ** filename, unsigned int * line)
229{
230	char const * cfilename;
231	char const * function;
232	unsigned int linenr;
233
234	// FIXME: looking at debug info for all gcc version shows than
235	// the same problems can -perhaps- occur for epilog code: find a
236	// samples files with samples in epilog and try opreport -l -g
237	// on it, check it also with opannotate.
238
239	// first restrict the search on a sensible range of vma, 16 is
240	// an intuitive value based on epilog code look
241	size_t max_search = 16;
242	size_t section_size = bfd_section_size(abfd, section);
243	if (pc + max_search > section_size)
244		max_search = section_size - pc;
245
246	for (size_t i = 1; i < max_search; ++i) {
247		bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
248						 &cfilename, &function,
249						 &linenr);
250
251		if (ret && cfilename && function && linenr != 0
252		    && is_correct_function(function, name)) {
253			*filename = cfilename;
254			*line = linenr;
255			return;
256		}
257	}
258}
259
260
261} // namespace anon
262
263
264bfd * open_bfd(string const & file)
265{
266	/* bfd keeps its own reference to the filename char *,
267	 * so it must have a lifetime longer than the ibfd */
268	bfd * ibfd = bfd_openr(file.c_str(), NULL);
269	if (!ibfd) {
270		cverb << vbfd << "bfd_openr failed for " << file << endl;
271		return NULL;
272	}
273
274	check_format(file, &ibfd);
275
276	return ibfd;
277}
278
279
280bfd * fdopen_bfd(string const & file, int fd)
281{
282	/* bfd keeps its own reference to the filename char *,
283	 * so it must have a lifetime longer than the ibfd */
284	bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
285	if (!ibfd) {
286		cverb << vbfd << "bfd_openr failed for " << file << endl;
287		return NULL;
288	}
289
290	check_format(file, &ibfd);
291
292	return ibfd;
293}
294
295
296bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
297                              string & debug_filename, extra_images const & extra)
298{
299	string filepath(filepath_in);
300	string basename;
301	unsigned long crc32;
302
303	if (!get_debug_link_info(ibfd, basename, crc32))
304		return false;
305
306	// Work out the image file's directory prefix
307	string filedir = op_dirname(filepath);
308	// Make sure it starts with /
309	if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
310		filedir += '/';
311
312	string first_try(filedir + ".debug/" + basename);
313	string second_try(DEBUGDIR + filedir + basename);
314	string third_try(filedir + basename);
315
316	cverb << vbfd << "looking for debugging file " << basename
317	      << " with crc32 = " << hex << crc32 << endl;
318
319	if (separate_debug_file_exists(first_try, crc32, extra))
320		debug_filename = first_try;
321	else if (separate_debug_file_exists(second_try, crc32, extra))
322		debug_filename = second_try;
323	else if (separate_debug_file_exists(third_try, crc32, extra))
324		debug_filename = third_try;
325	else
326		return false;
327
328	return true;
329}
330
331
332bool interesting_symbol(asymbol * sym)
333{
334	// #717720 some binutils are miscompiled by gcc 2.95, one of the
335	// typical symptom can be catched here.
336	if (!sym->section) {
337		ostringstream os;
338		os << "Your version of binutils seems to have a bug.\n"
339		   << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
340		throw op_runtime_error(os.str());
341	}
342
343	if (!(sym->section->flags & SEC_CODE))
344		return false;
345
346	// returning true for fix up in op_bfd_symbol()
347	if (!sym->name || sym->name[0] == '\0')
348		return true;
349	/* ARM assembler internal mapping symbols aren't interesting */
350	if ((strcmp("$a", sym->name) == 0) ||
351	    (strcmp("$t", sym->name) == 0) ||
352	    (strcmp("$d", sym->name) == 0))
353		return false;
354
355	// C++ exception stuff
356	if (sym->name[0] == '.' && sym->name[1] == 'L')
357		return false;
358
359	/* This case cannot be moved to boring_symbol(),
360	 * because that's only used for duplicate VMAs,
361	 * and sometimes this symbol appears at an address
362	 * different from all other symbols.
363	 */
364	if (!strcmp("gcc2_compiled.", sym->name))
365		return false;
366
367        if (sym->flags & BSF_SECTION_SYM)
368                return false;
369
370	if (!(sym->section->flags & SEC_LOAD))
371		return false;
372
373	return true;
374}
375
376
377bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
378{
379	if (first.name() == "Letext")
380		return true;
381	else if (second.name() == "Letext")
382		return false;
383
384	if (first.name().substr(0, 2) == "??")
385		return true;
386	else if (second.name().substr(0, 2) == "??")
387		return false;
388
389	if (first.hidden() && !second.hidden())
390		return true;
391	else if (!first.hidden() && second.hidden())
392		return false;
393
394	if (first.name()[0] == '_' && second.name()[0] != '_')
395		return true;
396	else if (first.name()[0] != '_' && second.name()[0] == '_')
397		return false;
398
399	if (first.weak() && !second.weak())
400		return true;
401	else if (!first.weak() && second.weak())
402		return false;
403
404	return false;
405}
406
407
408bool bfd_info::has_debug_info() const
409{
410	if (!valid())
411		return false;
412
413	for (asection const * sect = abfd->sections; sect; sect = sect->next) {
414		if (sect->flags & SEC_DEBUGGING)
415			return true;
416	}
417
418	return false;
419}
420
421
422bfd_info::~bfd_info()
423{
424	free(synth_syms);
425	close();
426}
427
428
429void bfd_info::close()
430{
431	if (abfd)
432		bfd_close(abfd);
433}
434
435void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
436{
437	bfd_section ** image_sect;
438	unsigned int img_sect_cnt = 0;
439	bfd * image_bfd = image_bfd_info->abfd;
440
441	image_sect = (bfd_section **) malloc(image_bfd->section_count * (sizeof(bfd_section *)));
442
443	for (bfd_section * sect = image_bfd->sections;
444	     sect && img_sect_cnt < image_bfd->section_count;
445	     sect = sect->next) {
446		// A comment section marks the end of the needed sections
447		if (strstr(sect->name, ".comment") == sect->name)
448			break;
449		image_sect[sect->index] = sect;
450		img_sect_cnt++;
451	}
452
453	asymbol * sym = dbg_syms[0];
454	for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
455		if (sym->section->owner && sym->section->owner == abfd) {
456			if ((unsigned int)sym->section->index < img_sect_cnt) {
457				sym->section = image_sect[sym->section->index];
458				sym->the_bfd = image_bfd;
459			}
460		}
461	}
462	free(image_sect);
463}
464
465#if SYNTHESIZE_SYMBOLS
466bool bfd_info::get_synth_symbols()
467{
468	extern const bfd_target bfd_elf64_powerpc_vec;
469	extern const bfd_target bfd_elf64_powerpcle_vec;
470	bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
471		|| (abfd->xvec == &bfd_elf64_powerpcle_vec);
472
473	if (!is_elf64_powerpc_target)
474		return false;
475
476	void * buf;
477	uint tmp;
478	long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
479	if (nr_mini_syms < 1)
480		return false;
481
482	asymbol ** mini_syms = (asymbol **)buf;
483	buf = NULL;
484	bfd * synth_bfd;
485
486	/* For ppc64, a debuginfo file by itself does not hold enough symbol
487	 * information for us to properly attribute samples to symbols.  If
488	 * the image file's bfd has no symbols (as in a super-stripped library),
489	 * then we need to do the extra processing in translate_debuginfo_syms.
490	 */
491	if (image_bfd_info && image_bfd_info->nr_syms == 0) {
492		translate_debuginfo_syms(mini_syms, nr_mini_syms);
493		synth_bfd = image_bfd_info->abfd;
494	} else
495		synth_bfd = abfd;
496
497	long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
498	                                              nr_mini_syms,
499	                                              mini_syms, 0,
500	                                              NULL, &synth_syms);
501
502	if (nr_synth_syms < 0) {
503		free(mini_syms);
504		return false;
505	}
506
507	cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
508	cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
509
510	nr_syms = nr_mini_syms + nr_synth_syms;
511	syms.reset(new asymbol *[nr_syms + 1]);
512
513	for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
514		syms[i] = mini_syms[i];
515
516
517	for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
518		syms[nr_mini_syms + i] = synth_syms + i;
519
520
521	free(mini_syms);
522
523	// bfd_canonicalize_symtab does this, so shall we
524	syms[nr_syms] = NULL;
525
526	return true;
527}
528#else
529bool bfd_info::get_synth_symbols()
530{
531	return false;
532}
533#endif /* SYNTHESIZE_SYMBOLS */
534
535
536void bfd_info::get_symbols()
537{
538	if (!abfd)
539		return;
540
541	cverb << vbfd << "bfd_info::get_symbols() for "
542	      << bfd_get_filename(abfd) << endl;
543
544	if (get_synth_symbols())
545		return;
546
547	if (bfd_get_file_flags(abfd) & HAS_SYMS)
548		nr_syms = bfd_get_symtab_upper_bound(abfd);
549
550	cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
551	      << nr_syms << hex << endl;
552
553	nr_syms /= sizeof(asymbol *);
554
555	if (nr_syms < 1)
556		return;
557
558	syms.reset(new asymbol *[nr_syms]);
559
560	nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
561
562	if (image_bfd_info)
563		translate_debuginfo_syms(syms.get(), nr_syms);
564
565	cverb << vbfd << "bfd_canonicalize_symtab: " << dec
566	      << nr_syms << hex << endl;
567}
568
569
570linenr_info const
571find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
572                  bfd_vma offset, bool anon_obj)
573{
574	char const * function = "";
575	char const * cfilename = "";
576	unsigned int linenr = 0;
577	linenr_info info;
578	bfd * abfd;
579	asymbol ** syms;
580	asection * section;
581	bfd_vma pc;
582	bool ret;
583
584	if (!b.valid())
585		goto fail;
586
587	// take care about artificial symbol
588	if (!sym.symbol())
589		goto fail;
590
591	abfd = b.abfd;
592	syms = b.syms.get();
593	if (!syms)
594		goto fail;
595	section = sym.symbol()->section;
596	if (anon_obj)
597		pc = offset - sym.symbol()->section->vma;
598	else
599		pc = (sym.value() + offset) - sym.filepos();
600
601	if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
602		goto fail;
603
604	if (pc >= bfd_section_size(abfd, section))
605		goto fail;
606
607	ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
608	                                 &function, &linenr);
609
610	if (!ret || !cfilename || !function)
611		goto fail;
612
613	/*
614	 * is_correct_function does not handle the case of static inlines,
615	 * but if the linenr is non-zero in the inline case, it is the correct
616	 * line number.
617	 */
618	if (linenr == 0 && !is_correct_function(function, sym.name()))
619		goto fail;
620
621	if (linenr == 0) {
622		fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
623		             &linenr);
624	}
625
626	info.found = true;
627	info.filename = cfilename;
628	info.line = linenr;
629	return info;
630
631fail:
632	info.found = false;
633	// some stl lacks string::clear()
634	info.filename.erase(info.filename.begin(), info.filename.end());
635	info.line = 0;
636	return info;
637}
638