bfd_support.cpp revision cc2ee177dbb3befca43e36cfc56778b006c3d050
1/**
2 * @file bfd_support.cpp
3 * BFD muck we have to deal with.
4 *
5 * @remark Copyright 2005 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 */
10
11#include "bfd_support.h"
12
13#include "op_bfd.h"
14#include "op_fileio.h"
15#include "string_manip.h"
16#include "cverb.h"
17
18#include <iostream>
19#include <fstream>
20#include <sstream>
21#include <string>
22
23using namespace std;
24
25extern verbose vbfd;
26
27namespace {
28
29
30void check_format(string const & file, bfd ** ibfd)
31{
32	if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
33		cverb << vbfd << "BFD format failure for " << file << endl;
34		bfd_close(*ibfd);
35		*ibfd = NULL;
36	}
37}
38
39
40bool separate_debug_file_exists(string const & name, unsigned long const crc)
41{
42	unsigned long file_crc = 0;
43	// The size of 2 * 1024 elements for the buffer is arbitrary.
44	char buffer[2 * 1024];
45
46	ifstream file(name.c_str());
47	if (!file)
48		return false;
49
50	cverb << vbfd << "found " << name;
51	while (file) {
52		file.read(buffer, sizeof(buffer));
53		file_crc = calc_crc32(file_crc,
54				      reinterpret_cast<unsigned char *>(&buffer[0]),
55				      file.gcount());
56	}
57	cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
58	return crc == file_crc;
59}
60
61
62bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
63{
64	asection * sect;
65
66	cverb << vbfd << "fetching .gnu_debuglink section" << endl;
67	sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
68
69	if (sect == NULL)
70		return false;
71
72	bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);
73	char contents[debuglink_size];
74	cverb << vbfd
75	      << ".gnu_debuglink section has size " << debuglink_size << endl;
76
77	bfd_get_section_contents(ibfd, sect,
78				 reinterpret_cast<unsigned char *>(contents),
79				 static_cast<file_ptr>(0), debuglink_size);
80
81	/* CRC value is stored after the filename, aligned up to 4 bytes. */
82	size_t filename_len = strlen(contents);
83	size_t crc_offset = filename_len + 1;
84	crc_offset = (crc_offset + 3) & ~3;
85
86	crc32 = bfd_get_32(ibfd,
87			       reinterpret_cast<bfd_byte *>(contents + crc_offset));
88	filename = string(contents, filename_len);
89	cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
90	return true;
91}
92
93
94/**
95 * With Objective C, we'll get strings like:
96 *
97 * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
98 *
99 * for the symbol name, and:
100 * -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
101 *
102 * for the function name, so we have to do some looser matching
103 * than for other languages (unfortunately, it's not possible
104 * to demangle Objective C symbols).
105 */
106bool objc_match(string const & sym, string const & method)
107{
108	if (method.length() < 3)
109		return false;
110
111	string mangled;
112
113	if (is_prefix(method, "-[")) {
114		mangled += "_i_";
115	} else if (is_prefix(method, "+[")) {
116		mangled += "_c_";
117	} else {
118		return false;
119	}
120
121	string::const_iterator it = method.begin() + 2;
122	string::const_iterator const end = method.end();
123
124	bool found_paren = false;
125
126	for (; it != end; ++it) {
127		switch (*it) {
128		case ' ':
129			mangled += '_';
130			if (!found_paren)
131				mangled += '_';
132			break;
133		case ':':
134			mangled += '_';
135			break;
136		case ')':
137		case ']':
138			break;
139		case '(':
140			found_paren = true;
141			mangled += '_';
142			break;
143		default:
144			mangled += *it;
145		}
146	}
147
148	return sym == mangled;
149}
150
151
152/*
153 * With a binary image where some objects are missing debug
154 * info, we can end up attributing to a completely different
155 * function (#484660): bfd_nearest_line() will happily move from one
156 * symbol to the nearest one it can find with debug information.
157 * To mitigate this problem, we check that the symbol name
158 * matches the returned function name.
159 *
160 * However, this check fails in some cases it shouldn't:
161 * Objective C, and C++ static inline functions (as discussed in
162 * GCC bugzilla #11774). So, we have a looser check that
163 * accepts merely a substring, plus some magic for Objective C.
164 *
165 * If even the loose check fails, then we give up.
166 */
167bool is_correct_function(string const & function, string const & name)
168{
169	if (name == function)
170		return true;
171
172	if (objc_match(name, function))
173		return true;
174
175	// warn the user if we had to use the loose check
176	if (name.find(function) != string::npos) {
177		static bool warned = false;
178		if (!warned) {
179			cerr << "warning: some functions compiled without "
180			     << "debug information may have incorrect source "
181			     << "line attributions" << endl;
182				warned = true;
183		}
184		cverb << vbfd << "is_correct_function(" << function << ", "
185		      << name << ") fuzzy match." << endl;
186		return true;
187	}
188
189	return false;
190}
191
192
193/*
194 * binutils 2.12 and below have a small bug where functions without a
195 * debug entry at the prologue start do not give a useful line number
196 * from bfd_find_nearest_line(). This can happen with certain gcc
197 * versions such as 2.95.
198 *
199 * We work around this problem by scanning forward for a vma with valid
200 * linenr info, if we can't get a valid line number.  Problem uncovered
201 * by Norbert Kaufmann. The work-around decreases, on the tincas
202 * application, the number of failure to retrieve linenr info from 835
203 * to 173. Most of the remaining are c++ inline functions mainly from
204 * the STL library. Fix #529622
205 */
206void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
207		  string const & name, bfd_vma pc,
208                  char const ** filename, unsigned int * line)
209{
210	char const * cfilename;
211	char const * function;
212	unsigned int linenr;
213
214	// FIXME: looking at debug info for all gcc version shows than
215	// the same problems can -perhaps- occur for epilog code: find a
216	// samples files with samples in epilog and try opreport -l -g
217	// on it, check it also with opannotate.
218
219	// first restrict the search on a sensible range of vma, 16 is
220	// an intuitive value based on epilog code look
221	size_t max_search = 16;
222	size_t section_size = bfd_section_size(abfd, section);
223	if (pc + max_search > section_size)
224		max_search = section_size - pc;
225
226	for (size_t i = 1; i < max_search; ++i) {
227		bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
228						 &cfilename, &function,
229						 &linenr);
230
231		if (ret && cfilename && function && linenr != 0
232		    && is_correct_function(function, name)) {
233			*filename = cfilename;
234			*line = linenr;
235			return;
236		}
237	}
238}
239
240
241} // namespace anon
242
243
244bfd * open_bfd(string const & file)
245{
246	/* bfd keeps its own reference to the filename char *,
247	 * so it must have a lifetime longer than the ibfd */
248	bfd * ibfd = bfd_openr(file.c_str(), NULL);
249	if (!ibfd) {
250		cverb << vbfd << "bfd_openr failed for " << file << endl;
251		return NULL;
252	}
253
254	check_format(file, &ibfd);
255
256	return ibfd;
257}
258
259
260bfd * fdopen_bfd(string const & file, int fd)
261{
262	/* bfd keeps its own reference to the filename char *,
263	 * so it must have a lifetime longer than the ibfd */
264	bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
265	if (!ibfd) {
266		cverb << vbfd << "bfd_openr failed for " << file << endl;
267		return NULL;
268	}
269
270	check_format(file, &ibfd);
271
272	return ibfd;
273}
274
275
276bool find_separate_debug_file(bfd * ibfd, string const & dir_in,
277                              string const & global_in, string & filename)
278{
279	string dir(dir_in);
280	string global(global_in);
281	string basename;
282	unsigned long crc32;
283
284	if (!get_debug_link_info(ibfd, basename, crc32))
285		return false;
286
287	if (dir.size() > 0 && dir.at(dir.size() - 1) != '/')
288		dir += '/';
289
290	if (global.size() > 0 && global.at(global.size() - 1) != '/')
291		global += '/';
292
293	cverb << vbfd << "looking for debugging file " << basename
294	      << " with crc32 = " << hex << crc32 << endl;
295
296	string first_try(dir + basename);
297	string second_try(dir + ".debug/" + basename);
298
299	if (dir.size() > 0 && dir[0] == '/')
300		dir = dir.substr(1);
301
302	string third_try(global + dir + basename);
303
304	if (separate_debug_file_exists(first_try, crc32))
305		filename = first_try;
306	else if (separate_debug_file_exists(second_try, crc32))
307		filename = second_try;
308	else if (separate_debug_file_exists(third_try, crc32))
309		filename = third_try;
310	else
311		return false;
312
313	return true;
314}
315
316
317bool interesting_symbol(asymbol * sym)
318{
319	// #717720 some binutils are miscompiled by gcc 2.95, one of the
320	// typical symptom can be catched here.
321	if (!sym->section) {
322		ostringstream os;
323		os << "Your version of binutils seems to have a bug.\n"
324		   << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
325		throw op_runtime_error(os.str());
326	}
327
328	if (!(sym->section->flags & SEC_CODE))
329		return false;
330
331	// returning true for fix up in op_bfd_symbol()
332	if (!sym->name || sym->name[0] == '\0')
333		return true;
334
335	// C++ exception stuff
336	if (sym->name[0] == '.' && sym->name[1] == 'L')
337		return false;
338
339	/* This case cannot be moved to boring_symbol(),
340	 * because that's only used for duplicate VMAs,
341	 * and sometimes this symbol appears at an address
342	 * different from all other symbols.
343	 */
344	if (!strcmp("gcc2_compiled.", sym->name))
345		return false;
346
347	return true;
348}
349
350
351bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
352{
353	if (first.name() == "Letext")
354		return true;
355	else if (second.name() == "Letext")
356		return false;
357
358	if (first.name().substr(0, 2) == "??")
359		return true;
360	else if (second.name().substr(0, 2) == "??")
361		return false;
362
363	if (first.hidden() && !second.hidden())
364		return true;
365	else if (!first.hidden() && second.hidden())
366		return false;
367
368	if (first.name()[0] == '_' && second.name()[0] != '_')
369		return true;
370	else if (first.name()[0] != '_' && second.name()[0] == '_')
371		return false;
372
373	if (first.weak() && !second.weak())
374		return true;
375	else if (!first.weak() && second.weak())
376		return false;
377
378	return false;
379}
380
381
382bool bfd_info::has_debug_info() const
383{
384	if (!valid())
385		return false;
386
387	for (asection const * sect = abfd->sections; sect; sect = sect->next) {
388		if (sect->flags & SEC_DEBUGGING)
389			return true;
390	}
391
392	return false;
393}
394
395
396bfd_info::~bfd_info()
397{
398	free(synth_syms);
399	close();
400}
401
402
403void bfd_info::close()
404{
405	if (abfd)
406		bfd_close(abfd);
407}
408
409
410#if SYNTHESIZE_SYMBOLS
411bool bfd_info::get_synth_symbols()
412{
413	extern const bfd_target bfd_elf64_powerpc_vec;
414	extern const bfd_target bfd_elf64_powerpcle_vec;
415	bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
416		|| (abfd->xvec == &bfd_elf64_powerpcle_vec);
417
418	if (!is_elf64_powerpc_target)
419		return false;
420
421	void * buf;
422	uint tmp;
423	long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
424	if (nr_mini_syms < 1)
425		return false;
426
427	asymbol ** mini_syms = (asymbol **)buf;
428	buf = NULL;
429
430	long nr_synth_syms = bfd_get_synthetic_symtab(abfd, nr_mini_syms,
431	                                              mini_syms, 0,
432	                                              NULL, &synth_syms);
433
434	if (nr_synth_syms < 0) {
435		free(mini_syms);
436		return false;
437	}
438
439	cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
440	cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
441
442	nr_syms = nr_mini_syms + nr_synth_syms;
443	syms.reset(new asymbol *[nr_syms + 1]);
444
445	for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
446		syms[i] = mini_syms[i];
447
448
449	for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
450		syms[nr_mini_syms + i] = synth_syms + i;
451
452
453	free(mini_syms);
454
455	// bfd_canonicalize_symtab does this, so shall we
456	syms[nr_syms] = NULL;
457
458	return true;
459}
460#else
461bool bfd_info::get_synth_symbols()
462{
463	return false;
464}
465#endif /* SYNTHESIZE_SYMBOLS */
466
467
468void bfd_info::get_symbols()
469{
470	if (!abfd)
471		return;
472
473	cverb << vbfd << "bfd_info::get_symbols() for "
474	      << bfd_get_filename(abfd) << endl;
475
476	if (get_synth_symbols())
477		return;
478
479	if (bfd_get_file_flags(abfd) & HAS_SYMS)
480		nr_syms = bfd_get_symtab_upper_bound(abfd);
481
482	cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
483	      << nr_syms << hex << endl;
484
485	nr_syms /= sizeof(asymbol *);
486
487	if (nr_syms < 1)
488		return;
489
490	syms.reset(new asymbol *[nr_syms]);
491
492	nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
493
494	cverb << vbfd << "bfd_canonicalize_symtab: " << dec
495	      << nr_syms << hex << endl;
496}
497
498
499linenr_info const
500find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
501                  unsigned int offset)
502{
503	char const * function = "";
504	char const * cfilename = "";
505	unsigned int linenr = 0;
506	linenr_info info;
507	bfd * abfd;
508	asymbol ** syms;
509	asection * section;
510	bfd_vma pc;
511	bool ret;
512
513	if (!b.valid())
514		goto fail;
515
516	// take care about artificial symbol
517	if (!sym.symbol())
518		goto fail;
519
520	abfd = b.abfd;
521	syms = b.syms.get();
522	section = sym.symbol()->section;
523	pc = (sym.value() + offset) - sym.filepos();
524
525	if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
526		goto fail;
527
528	if (pc >= bfd_section_size(abfd, section))
529		goto fail;
530
531	ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
532	                                 &function, &linenr);
533
534	if (!ret || !cfilename)
535		goto fail;
536
537	if (!is_correct_function(function, sym.name()))
538		goto fail;
539
540	if (linenr == 0) {
541		fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
542		             &linenr);
543	}
544
545	info.found = true;
546	info.filename = cfilename;
547	info.line = linenr;
548	return info;
549
550fail:
551	info.found = false;
552	// some stl lacks string::clear()
553	info.filename.erase(info.filename.begin(), info.filename.end());
554	info.line = 0;
555	return info;
556}
557