1/**
2 * @file bfd_support.cpp
3 * BFD muck we have to deal with.
4 *
5 * @remark Copyright 2005 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author John Levon
9 */
10
11#include "bfd_support.h"
12
13#include "op_bfd.h"
14#include "op_fileio.h"
15#include "op_config.h"
16#include "string_manip.h"
17#include "file_manip.h"
18#include "cverb.h"
19#include "locate_images.h"
20
21#include <cstdlib>
22#include <cstring>
23#include <cassert>
24#include <iostream>
25#include <fstream>
26#include <sstream>
27#include <string>
28#include <cstring>
29#include <cstdlib>
30
31using namespace std;
32
33extern verbose vbfd;
34
35namespace {
36
37
38void check_format(string const & file, bfd ** ibfd)
39{
40	if (!bfd_check_format_matches(*ibfd, bfd_object, NULL)) {
41		cverb << vbfd << "BFD format failure for " << file << endl;
42		bfd_close(*ibfd);
43		*ibfd = NULL;
44	}
45}
46
47
48bool separate_debug_file_exists(string & name, unsigned long const crc,
49                                extra_images const & extra)
50{
51	unsigned long file_crc = 0;
52	// The size of 2 * 1024 elements for the buffer is arbitrary.
53	char buffer[2 * 1024];
54
55	image_error img_ok;
56	string const image_path = extra.find_image_path(name, img_ok, true);
57
58	if (img_ok != image_ok)
59		return false;
60
61	name = image_path;
62
63	ifstream file(image_path.c_str());
64	if (!file)
65		return false;
66
67	cverb << vbfd << "found " << name;
68	while (file) {
69		file.read(buffer, sizeof(buffer));
70		file_crc = calc_crc32(file_crc,
71				      reinterpret_cast<unsigned char *>(&buffer[0]),
72				      file.gcount());
73	}
74	cverb << vbfd << " with crc32 = " << hex << file_crc << endl;
75	return crc == file_crc;
76}
77
78
79bool get_debug_link_info(bfd * ibfd, string & filename, unsigned long & crc32)
80{
81	asection * sect;
82
83	cverb << vbfd << "fetching .gnu_debuglink section" << endl;
84	sect = bfd_get_section_by_name(ibfd, ".gnu_debuglink");
85
86	if (sect == NULL)
87		return false;
88
89	bfd_size_type debuglink_size = bfd_section_size(ibfd, sect);
90	char contents[debuglink_size];
91	cverb << vbfd
92	      << ".gnu_debuglink section has size " << debuglink_size << endl;
93
94	if (!bfd_get_section_contents(ibfd, sect,
95				 reinterpret_cast<unsigned char *>(contents),
96				 static_cast<file_ptr>(0), debuglink_size)) {
97		bfd_perror("bfd_get_section_contents:get_debug:");
98		exit(2);
99	}
100
101	/* CRC value is stored after the filename, aligned up to 4 bytes. */
102	size_t filename_len = strlen(contents);
103	size_t crc_offset = filename_len + 1;
104	crc_offset = (crc_offset + 3) & ~3;
105
106	crc32 = bfd_get_32(ibfd,
107			       reinterpret_cast<bfd_byte *>(contents + crc_offset));
108	filename = string(contents, filename_len);
109	cverb << vbfd << ".gnu_debuglink filename is " << filename << endl;
110	return true;
111}
112
113
114/**
115 * With Objective C, we'll get strings like:
116 *
117 * _i_GSUnicodeString__rangeOfCharacterSetFromSet_options_range
118 *
119 * for the symbol name, and:
120 * -[GSUnicodeString rangeOfCharacterFromSet:options:range:]
121 *
122 * for the function name, so we have to do some looser matching
123 * than for other languages (unfortunately, it's not possible
124 * to demangle Objective C symbols).
125 */
126bool objc_match(string const & sym, string const & method)
127{
128	if (method.length() < 3)
129		return false;
130
131	string mangled;
132
133	if (is_prefix(method, "-[")) {
134		mangled += "_i_";
135	} else if (is_prefix(method, "+[")) {
136		mangled += "_c_";
137	} else {
138		return false;
139	}
140
141	string::const_iterator it = method.begin() + 2;
142	string::const_iterator const end = method.end();
143
144	bool found_paren = false;
145
146	for (; it != end; ++it) {
147		switch (*it) {
148		case ' ':
149			mangled += '_';
150			if (!found_paren)
151				mangled += '_';
152			break;
153		case ':':
154			mangled += '_';
155			break;
156		case ')':
157		case ']':
158			break;
159		case '(':
160			found_paren = true;
161			mangled += '_';
162			break;
163		default:
164			mangled += *it;
165		}
166	}
167
168	return sym == mangled;
169}
170
171
172/*
173 * With a binary image where some objects are missing debug
174 * info, we can end up attributing to a completely different
175 * function (#484660): bfd_nearest_line() will happily move from one
176 * symbol to the nearest one it can find with debug information.
177 * To mitigate this problem, we check that the symbol name
178 * matches the returned function name.
179 *
180 * However, this check fails in some cases it shouldn't:
181 * Objective C, and C++ static inline functions (as discussed in
182 * GCC bugzilla #11774). So, we have a looser check that
183 * accepts merely a substring, plus some magic for Objective C.
184 *
185 * If even the loose check fails, then we give up.
186 */
187bool is_correct_function(string const & function, string const & name)
188{
189	if (name == function)
190		return true;
191
192	if (objc_match(name, function))
193		return true;
194
195	// warn the user if we had to use the loose check
196	if (name.find(function) != string::npos) {
197		static bool warned = false;
198		if (!warned) {
199			cerr << "warning: some functions compiled without "
200			     << "debug information may have incorrect source "
201			     << "line attributions" << endl;
202				warned = true;
203		}
204		cverb << vbfd << "is_correct_function(" << function << ", "
205		      << name << ") fuzzy match." << endl;
206		return true;
207	}
208
209	return false;
210}
211
212
213/*
214 * binutils 2.12 and below have a small bug where functions without a
215 * debug entry at the prologue start do not give a useful line number
216 * from bfd_find_nearest_line(). This can happen with certain gcc
217 * versions such as 2.95.
218 *
219 * We work around this problem by scanning forward for a vma with valid
220 * linenr info, if we can't get a valid line number.  Problem uncovered
221 * by Norbert Kaufmann. The work-around decreases, on the tincas
222 * application, the number of failure to retrieve linenr info from 835
223 * to 173. Most of the remaining are c++ inline functions mainly from
224 * the STL library. Fix #529622
225 */
226void fixup_linenr(bfd * abfd, asection * section, asymbol ** syms,
227		  string const & name, bfd_vma pc,
228                  char const ** filename, unsigned int * line)
229{
230	char const * cfilename;
231	char const * function;
232	unsigned int linenr;
233
234	// FIXME: looking at debug info for all gcc version shows than
235	// the same problems can -perhaps- occur for epilog code: find a
236	// samples files with samples in epilog and try opreport -l -g
237	// on it, check it also with opannotate.
238
239	// first restrict the search on a sensible range of vma, 16 is
240	// an intuitive value based on epilog code look
241	size_t max_search = 16;
242	size_t section_size = bfd_section_size(abfd, section);
243	if (pc + max_search > section_size)
244		max_search = section_size - pc;
245
246	for (size_t i = 1; i < max_search; ++i) {
247		bool ret = bfd_find_nearest_line(abfd, section, syms, pc + i,
248						 &cfilename, &function,
249						 &linenr);
250
251		if (ret && cfilename && function && linenr != 0
252		    && is_correct_function(function, name)) {
253			*filename = cfilename;
254			*line = linenr;
255			return;
256		}
257	}
258}
259
260
261} // namespace anon
262
263
264bfd * open_bfd(string const & file)
265{
266	/* bfd keeps its own reference to the filename char *,
267	 * so it must have a lifetime longer than the ibfd */
268	bfd * ibfd = bfd_openr(file.c_str(), NULL);
269	if (!ibfd) {
270		cverb << vbfd << "bfd_openr failed for " << file << endl;
271		return NULL;
272	}
273
274	check_format(file, &ibfd);
275
276	return ibfd;
277}
278
279
280bfd * fdopen_bfd(string const & file, int fd)
281{
282	/* bfd keeps its own reference to the filename char *,
283	 * so it must have a lifetime longer than the ibfd */
284	bfd * ibfd = bfd_fdopenr(file.c_str(), NULL, fd);
285	if (!ibfd) {
286		cverb << vbfd << "bfd_openr failed for " << file << endl;
287		return NULL;
288	}
289
290	check_format(file, &ibfd);
291
292	return ibfd;
293}
294
295
296bool find_separate_debug_file(bfd * ibfd, string const & filepath_in,
297                              string & debug_filename, extra_images const & extra)
298{
299	string filepath(filepath_in);
300	string basename;
301	unsigned long crc32;
302
303	if (!get_debug_link_info(ibfd, basename, crc32))
304		return false;
305
306	// Work out the image file's directory prefix
307	string filedir = op_dirname(filepath);
308	// Make sure it starts with /
309	if (filedir.size() > 0 && filedir.at(filedir.size() - 1) != '/')
310		filedir += '/';
311
312	string first_try(filedir + ".debug/" + basename);
313	string second_try(DEBUGDIR + filedir + basename);
314	string third_try(filedir + basename);
315
316	cverb << vbfd << "looking for debugging file " << basename
317	      << " with crc32 = " << hex << crc32 << endl;
318
319	if (separate_debug_file_exists(first_try, crc32, extra))
320		debug_filename = first_try;
321	else if (separate_debug_file_exists(second_try, crc32, extra))
322		debug_filename = second_try;
323	else if (separate_debug_file_exists(third_try, crc32, extra))
324		debug_filename = third_try;
325	else
326		return false;
327
328	return true;
329}
330
331
332bool interesting_symbol(asymbol * sym)
333{
334	// #717720 some binutils are miscompiled by gcc 2.95, one of the
335	// typical symptom can be catched here.
336	if (!sym->section) {
337		ostringstream os;
338		os << "Your version of binutils seems to have a bug.\n"
339		   << "Read http://oprofile.sf.net/faq/#binutilsbug\n";
340		throw op_runtime_error(os.str());
341	}
342
343	if (!(sym->section->flags & SEC_CODE))
344		return false;
345
346	// returning true for fix up in op_bfd_symbol()
347	if (!sym->name || sym->name[0] == '\0')
348		return true;
349	/* ARM assembler internal mapping symbols aren't interesting */
350	if ((strcmp("$a", sym->name) == 0) ||
351	    (strcmp("$t", sym->name) == 0) ||
352	    (strcmp("$d", sym->name) == 0))
353		return false;
354
355	// C++ exception stuff
356	if (sym->name[0] == '.' && sym->name[1] == 'L')
357		return false;
358
359	/* This case cannot be moved to boring_symbol(),
360	 * because that's only used for duplicate VMAs,
361	 * and sometimes this symbol appears at an address
362	 * different from all other symbols.
363	 */
364	if (!strcmp("gcc2_compiled.", sym->name))
365		return false;
366
367        if (sym->flags & BSF_SECTION_SYM)
368                return false;
369
370	if (!(sym->section->flags & SEC_LOAD))
371		return false;
372
373	return true;
374}
375
376
377bool boring_symbol(op_bfd_symbol const & first, op_bfd_symbol const & second)
378{
379	if (first.name() == "Letext")
380		return true;
381	else if (second.name() == "Letext")
382		return false;
383
384	if (first.name().substr(0, 2) == "??")
385		return true;
386	else if (second.name().substr(0, 2) == "??")
387		return false;
388
389	if (first.hidden() && !second.hidden())
390		return true;
391	else if (!first.hidden() && second.hidden())
392		return false;
393
394	if (first.name()[0] == '_' && second.name()[0] != '_')
395		return true;
396	else if (first.name()[0] != '_' && second.name()[0] == '_')
397		return false;
398
399	if (first.weak() && !second.weak())
400		return true;
401	else if (!first.weak() && second.weak())
402		return false;
403
404	return false;
405}
406
407
408bool bfd_info::has_debug_info() const
409{
410	if (!valid())
411		return false;
412
413	for (asection const * sect = abfd->sections; sect; sect = sect->next) {
414		if (sect->flags & SEC_DEBUGGING)
415			return true;
416	}
417
418	return false;
419}
420
421
422bfd_info::~bfd_info()
423{
424	free(synth_syms);
425	close();
426}
427
428
429void bfd_info::close()
430{
431	if (abfd)
432		bfd_close(abfd);
433}
434
435/**
436 * This function is only called when processing symbols retrieved from a
437 * debuginfo file that is separate from the actual runtime binary image.
438 * Separate debuginfo files may be needed in two different cases:
439 *   1) the real image is completely stripped, where there is no symbol
440	information at all
441 *   2) the real image has debuginfo stripped, and the user is requesting "-g"
442 *   (src file/line num info)
443 * After all symbols are gathered up, there will be some filtering/removal of
444 * unnecessary symbols.  In particular, the bfd_info::interesting_symbol()
445 * function filters out symbols whose section's flag value does not include
446 * SEC_LOAD.  This filtering is required, so it must be retained.  However,
447 * we run into a problem with symbols from debuginfo files, since the
448 * section flag does NOT include SEC_LOAD.  To solve this problem, the
449 * translate_debuginfo_syms function maps the debuginfo symbol's sections to
450 * that of their corresponding real image.
451*/
452void bfd_info::translate_debuginfo_syms(asymbol ** dbg_syms, long nr_dbg_syms)
453{
454	unsigned int img_sect_cnt = 0;
455	bfd * image_bfd = image_bfd_info->abfd;
456	multimap<string, bfd_section *> image_sections;
457
458	for (bfd_section * sect = image_bfd->sections;
459	     sect && img_sect_cnt < image_bfd->section_count;
460	     sect = sect->next) {
461		// A comment section marks the end of the needed sections
462		if (strstr(sect->name, ".comment") == sect->name)
463			break;
464		image_sections.insert(pair<string, bfd_section *>(sect->name, sect));
465		img_sect_cnt++;
466	}
467
468	asymbol * sym = dbg_syms[0];
469	string prev_sect_name = "";
470	bfd_section * matched_section = NULL;
471	for (int i = 0; i < nr_dbg_syms; sym = dbg_syms[++i]) {
472		bool section_switch;
473
474		if (strcmp(prev_sect_name.c_str(), sym->section->name)) {
475			section_switch = true;
476			prev_sect_name = sym->section->name;
477		} else {
478			section_switch = false;
479		}
480		if (sym->section->owner && sym->section->owner == abfd) {
481			if (section_switch ) {
482				matched_section = NULL;
483				multimap<string, bfd_section *>::iterator it;
484				pair<multimap<string, bfd_section *>::iterator,
485				     multimap<string, bfd_section *>::iterator> range;
486
487				range = image_sections.equal_range(sym->section->name);
488				for (it = range.first; it != range.second; it++) {
489					if ((*it).second->vma == sym->section->vma) {
490						matched_section = (*it).second;
491						break;
492					}
493				}
494			}
495			if (matched_section) {
496				sym->section = matched_section;
497				sym->the_bfd = image_bfd;
498			}
499		}
500	}
501}
502
503#if SYNTHESIZE_SYMBOLS
504bool bfd_info::get_synth_symbols()
505{
506	extern const bfd_target bfd_elf64_powerpc_vec;
507	extern const bfd_target bfd_elf64_powerpcle_vec;
508	bool is_elf64_powerpc_target = (abfd->xvec == &bfd_elf64_powerpc_vec)
509		|| (abfd->xvec == &bfd_elf64_powerpcle_vec);
510
511	if (!is_elf64_powerpc_target)
512		return false;
513
514	void * buf;
515	uint tmp;
516	long nr_mini_syms = bfd_read_minisymbols(abfd, 0, &buf, &tmp);
517	if (nr_mini_syms < 1)
518		return false;
519
520	asymbol ** mini_syms = (asymbol **)buf;
521	buf = NULL;
522	bfd * synth_bfd;
523
524	/* For ppc64, a debuginfo file by itself does not hold enough symbol
525	 * information for us to properly attribute samples to symbols.  If
526	 * the image file's bfd has no symbols (as in a super-stripped library),
527	 * then we need to do the extra processing in translate_debuginfo_syms.
528	 */
529	if (image_bfd_info && image_bfd_info->nr_syms == 0) {
530		translate_debuginfo_syms(mini_syms, nr_mini_syms);
531		synth_bfd = image_bfd_info->abfd;
532	} else
533		synth_bfd = abfd;
534
535	long nr_synth_syms = bfd_get_synthetic_symtab(synth_bfd,
536	                                              nr_mini_syms,
537	                                              mini_syms, 0,
538	                                              NULL, &synth_syms);
539
540	if (nr_synth_syms < 0) {
541		free(mini_syms);
542		return false;
543	}
544
545	cverb << vbfd << "mini_syms: " << dec << nr_mini_syms << hex << endl;
546	cverb << vbfd << "synth_syms: " << dec << nr_synth_syms << hex << endl;
547
548	nr_syms = nr_mini_syms + nr_synth_syms;
549	syms.reset(new asymbol *[nr_syms + 1]);
550
551	for (size_t i = 0; i < (size_t)nr_mini_syms; ++i)
552		syms[i] = mini_syms[i];
553
554
555	for (size_t i = 0; i < (size_t)nr_synth_syms; ++i)
556		syms[nr_mini_syms + i] = synth_syms + i;
557
558
559	free(mini_syms);
560
561	// bfd_canonicalize_symtab does this, so shall we
562	syms[nr_syms] = NULL;
563
564	return true;
565}
566#else
567bool bfd_info::get_synth_symbols()
568{
569	return false;
570}
571#endif /* SYNTHESIZE_SYMBOLS */
572
573
574void bfd_info::get_symbols()
575{
576	if (!abfd)
577		return;
578
579	cverb << vbfd << "bfd_info::get_symbols() for "
580	      << bfd_get_filename(abfd) << endl;
581
582	if (get_synth_symbols())
583		return;
584
585	if (bfd_get_file_flags(abfd) & HAS_SYMS)
586		nr_syms = bfd_get_symtab_upper_bound(abfd);
587
588	cverb << vbfd << "bfd_get_symtab_upper_bound: " << dec
589	      << nr_syms << hex << endl;
590
591	nr_syms /= sizeof(asymbol *);
592
593	if (nr_syms < 1)
594		return;
595
596	syms.reset(new asymbol *[nr_syms]);
597
598	nr_syms = bfd_canonicalize_symtab(abfd, syms.get());
599
600	if (image_bfd_info)
601		translate_debuginfo_syms(syms.get(), nr_syms);
602
603	cverb << vbfd << "bfd_canonicalize_symtab: " << dec
604	      << nr_syms << hex << endl;
605}
606
607
608linenr_info const
609find_nearest_line(bfd_info const & b, op_bfd_symbol const & sym,
610                  bfd_vma offset, bool anon_obj)
611{
612	char const * function = "";
613	char const * cfilename = "";
614	unsigned int linenr = 0;
615	linenr_info info;
616	bfd * abfd;
617	asymbol ** syms;
618	asection * section;
619	bfd_vma pc;
620	bool ret;
621
622	if (!b.valid())
623		goto fail;
624
625	// take care about artificial symbol
626	if (!sym.symbol())
627		goto fail;
628
629	abfd = b.abfd;
630	syms = b.syms.get();
631	if (!syms)
632		goto fail;
633	section = sym.symbol()->section;
634	if (anon_obj)
635		pc = offset - sym.symbol()->section->vma;
636	else
637		pc = (sym.value() + offset) - sym.filepos();
638
639	if ((bfd_get_section_flags(abfd, section) & SEC_ALLOC) == 0)
640		goto fail;
641
642	if (pc >= bfd_section_size(abfd, section))
643		goto fail;
644
645	ret = bfd_find_nearest_line(abfd, section, syms, pc, &cfilename,
646	                                 &function, &linenr);
647
648	if (!ret || !cfilename || !function)
649		goto fail;
650
651	/*
652	 * is_correct_function does not handle the case of static inlines,
653	 * but if the linenr is non-zero in the inline case, it is the correct
654	 * line number.
655	 */
656	if (linenr == 0 && !is_correct_function(function, sym.name()))
657		goto fail;
658
659	if (linenr == 0) {
660		fixup_linenr(abfd, section, syms, sym.name(), pc, &cfilename,
661		             &linenr);
662	}
663
664	info.found = true;
665	info.filename = cfilename;
666	info.line = linenr;
667	return info;
668
669fail:
670	info.found = false;
671	// some stl lacks string::clear()
672	info.filename.erase(info.filename.begin(), info.filename.end());
673	info.line = 0;
674	return info;
675}
676