1/**
2 * @file jitsymbol.c
3 * Handle symbols found in jitted code dump
4 *
5 * @remark Copyright 2007 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Jens Wilke
9 * @Modifications Maynard Johnson
10 * @Modifications Philippe Elie
11 * @Modifications Daniel Hansel
12 *
13 * Copyright IBM Corporation 2007
14 *
15 */
16
17#include "opjitconv.h"
18#include "opd_printf.h"
19#include "op_libiberty.h"
20#include "op_types.h"
21
22#include <stddef.h>
23#include <stdlib.h>
24#include <stdint.h>
25#include <stdio.h>
26#include <string.h>
27#include <unistd.h>
28#include <limits.h>
29
30typedef int (*compare_symbol)(void const *, void const *);
31
32
33/* count the entries in the jitentry_list */
34static u32 count_entries(void)
35{
36	struct jitentry const * entry;
37	u32 cnt = 0;
38	for (entry = jitentry_list; entry; entry = entry->next)
39		cnt++;
40	return cnt;
41}
42
43
44static void fill_entry_array(struct jitentry * entries[])
45{
46	int i = 0;
47	struct jitentry * entry;
48	for (entry = jitentry_list; entry; entry = entry->next)
49		entries[i++] = entry;
50}
51
52
53/* create an array pointing to the jitentry structures which is sorted
54 * according to the comparator rule given by parameter compar
55 */
56static struct jitentry ** create_sorted_array(compare_symbol compare)
57{
58	struct jitentry ** array =
59		xmalloc(sizeof(struct jitentry *) * entry_count);
60	fill_entry_array(array);
61	qsort(array, entry_count, sizeof(struct jitentry *), compare);
62	return array;
63}
64
65
66/* comparator method for qsort which sorts jitentries by symbol_name */
67static int cmp_symbolname(void const * a, void const * b)
68{
69	struct jitentry * a0 = *(struct jitentry **) a;
70	struct jitentry * b0 = *(struct jitentry **) b;
71	return strcmp(a0->symbol_name, b0->symbol_name);
72}
73
74
75/* comparator method for qsort which sorts jitentries by address */
76static int cmp_address(void const * a, void const * b)
77{
78	struct jitentry * a0 = *(struct jitentry **) a;
79	struct jitentry * b0 = *(struct jitentry **) b;
80	if (a0->vma < b0->vma)
81		return -1;
82	if (a0->vma == b0->vma)
83		return 0;
84	return 1;
85}
86
87
88/* resort address_ascending array */
89static void resort_address(void)
90{
91	u32 i;
92
93	qsort(entries_address_ascending, entry_count,
94	      sizeof(struct jitentry *), cmp_address);
95
96	// lower entry_count if entries are invalidated
97	for (i = 0; i < entry_count; ++i) {
98		if (entries_address_ascending[i]->vma)
99			break;
100	}
101
102	if (i) {
103		entry_count -= i;
104		memmove(&entries_address_ascending[0],
105			&entries_address_ascending[i],
106			sizeof(struct jitentry *) * entry_count);
107	}
108}
109
110
111/* Copy address_ascending array to entries_symbols_ascending and resort it.  */
112static void resort_symbol(void)
113{
114	memcpy(entries_symbols_ascending, entries_address_ascending,
115	       sizeof(struct jitentry *) * entry_count);
116	qsort(entries_symbols_ascending, entry_count,
117	      sizeof(struct jitentry *), cmp_symbolname);
118}
119
120/* allocate, populate and sort the jitentry arrays */
121void create_arrays(void)
122{
123	max_entry_count = entry_count = count_entries();
124	entries_symbols_ascending = create_sorted_array(cmp_symbolname);
125	entries_address_ascending = create_sorted_array(cmp_address);
126}
127
128
129/* add a new create jitentry to the array. mallocs new arrays if space is
130 * needed */
131static void insert_entry(struct jitentry * entry)
132{
133	if (entry_count == max_entry_count) {
134		if (max_entry_count < UINT32_MAX - 18)
135			max_entry_count += 18;
136		else if (max_entry_count < UINT32_MAX)
137			max_entry_count += 1;
138		else {
139			fprintf(stderr, "Amount of JIT dump file entries is too large.\n");
140			exit(EXIT_FAILURE);
141		}
142		entries_symbols_ascending = (struct jitentry **)
143			xrealloc(entries_symbols_ascending,
144				 sizeof(struct jitentry *) * max_entry_count);
145		entries_address_ascending = (struct jitentry **)
146			xrealloc(entries_address_ascending,
147				 sizeof(struct jitentry *) * max_entry_count);
148	}
149	entries_address_ascending[entry_count++] = entry;
150}
151
152
153/* add a suffix to the name to differenciate it */
154static char * replacement_name(char * s, int i)
155{
156	int cnt = 1;
157	int j = i;
158	char * res;
159
160	while ((j = j/10))
161		cnt++;
162	cnt += 2 + strlen(s);
163	res = xmalloc(cnt);
164	snprintf(res, cnt, "%s~%i", s, i);
165	return res;
166}
167
168
169/*
170 * Mark the entry so it is not included in the ELF file. We do this by
171 * writing a 0 address as magic vma and sorting
172 * it out later
173 */
174static void invalidate_entry(struct jitentry * e)
175{
176	verbprintf(debug, "invalidate_entry: addr=%llx, name=%s\n",
177		   e->vma, e->symbol_name);
178	e->vma = 0;
179}
180
181
182/*
183 * Invalidate all symbols that are not alive at sampling start time.
184 */
185static void invalidate_earlybirds(unsigned long long start_time)
186{
187	u32 i;
188	int flag;
189	struct jitentry * a;
190
191	flag = 0;
192	for (i = 0; i < entry_count; i++) {
193		a = entries_address_ascending[i];
194		if (a->life_end < start_time) {
195			invalidate_entry(a);
196			flag = 1;
197		}
198	}
199	if (flag) {
200		resort_address();
201		resort_symbol();
202	}
203}
204
205
206/* select the symbol with the longest life time in the index range */
207static int select_one(int start_idx, int end_idx)
208{
209	int i;
210	int candidate = OP_JIT_CONV_FAIL;
211	unsigned long long lifetime = 0;
212	unsigned long long x;
213	struct jitentry const * e;
214
215	for (i = start_idx; i <= end_idx; i++) {
216		e = entries_address_ascending[i];
217		x = e->life_end - e->life_start;
218		if (candidate == -1 || x > lifetime) {
219			candidate = i;
220			lifetime = x;
221		}
222	}
223	return candidate;
224}
225
226
227/*
228 * We decided to keep one entry in favor of the other. Instead of dropping
229 * the overlapping entry we split or truncate it to not overlap any more.
230 *
231 * Looking on the address regions, we may have the following situation:
232 *
233 *  split:     |------------|
234 *  keep:          |---|
235 *
236 * The split entry may be splitted in a left part and a right part. E.g.:
237 *
238 *  split0/1:  |--|     |---|
239 *  keep:          |---|
240 *
241 * However, both parts may or may not exist.
242 */
243static void split_entry(struct jitentry * split, struct jitentry const * keep)
244{
245	unsigned long long start_addr_keep = keep->vma;
246	unsigned long long end_addr_keep = keep->vma + keep->code_size;
247	unsigned long long end_addr_split = split->vma + split->code_size;
248	unsigned long long start_addr_split = split->vma;
249
250	// do we need a right part?
251	if (end_addr_split > end_addr_keep) {
252		struct jitentry * new_entry =
253			xcalloc(1, sizeof(struct jitentry));
254		char * s = NULL;
255
256		/* Check for max. length to avoid possible integer overflow. */
257		if (strlen(split->symbol_name) > SIZE_MAX - 3) {
258			fprintf(stderr, "Length of symbol name is too large.\n");
259			exit(EXIT_FAILURE);
260		} else {
261			s = xmalloc(strlen(split->symbol_name) + 3);
262			strcpy(s, split->symbol_name);
263			strcat(s, "#1");
264		}
265
266		new_entry->vma = end_addr_keep;
267		new_entry->code_size = end_addr_split - end_addr_keep;
268		new_entry->symbol_name = s;
269		new_entry->sym_name_malloced = 1;
270		new_entry->life_start = split->life_start;
271		new_entry->life_end = split->life_end;
272		// the right part does not have an associated code, because we
273		// don't know whether the split part begins at an opcode
274		new_entry->code = NULL;
275		verbprintf(debug, "split right (new) name=%s, start=%llx,"
276			   " end=%llx\n", new_entry->symbol_name,
277			   new_entry->vma,
278			   new_entry->vma + new_entry->code_size);
279		insert_entry(new_entry);
280	}
281	// do we need a left part?
282	if (start_addr_split < start_addr_keep) {
283		char * s = NULL;
284
285		/* Check for max. length to avoid possible integer overflow. */
286		if (strlen(split->symbol_name) > SIZE_MAX - 3) {
287			fprintf(stderr, "Length of symbol name is too large.\n");
288			exit(EXIT_FAILURE);
289		} else {
290			s = xmalloc(strlen(split->symbol_name) + 3);
291			strcpy(s, split->symbol_name);
292			strcat(s, "#0");
293		}
294
295		split->code_size = start_addr_keep - start_addr_split;
296		if (split->sym_name_malloced)
297			free(split->symbol_name);
298		split->symbol_name = s;
299		split->sym_name_malloced = 1;
300		verbprintf(debug, "split left name=%s, start=%llx, end=%llx\n",
301			   split->symbol_name, split->vma,
302			   split->vma + split->code_size);
303	} else {
304		invalidate_entry(split);
305	}
306}
307
308
309/*
310 * Scans through the index range and splits/truncates entries that overlap
311 * with the one indexed by keep_idx. Returns the total lifetime of the symbols
312 * found to overlap.
313 * Returns ULONG_MAX on error.
314 */
315static unsigned long long eliminate_overlaps(int start_idx, int end_idx,
316					 int keep_idx)
317{
318	unsigned long long retval;
319	struct jitentry const * keep = entries_address_ascending[keep_idx];
320	struct jitentry * e;
321	unsigned long long start_addr_keep = keep->vma;
322	unsigned long long end_addr_keep = keep->vma + keep->code_size;
323	unsigned long long start_addr_entry, end_addr_entry;
324	int i;
325	unsigned long long min_start = keep->life_start;
326	unsigned long long max_end = keep->life_end;
327
328	for (i = start_idx; i <= end_idx; i++) {
329		if (i == keep_idx)
330			continue;
331		e = entries_address_ascending[i];
332		start_addr_entry = e->vma;
333		end_addr_entry = e->vma + e->code_size;
334		if (debug) {
335			if (!(start_addr_entry <= end_addr_entry)) {
336				verbprintf(debug, "assert failed:"
337					   " start_addr_entry <="
338					   " end_addr_entry\n");
339				retval = ULONG_MAX;
340				goto out;
341			}
342			if (!(start_addr_keep <= end_addr_keep)) {
343				verbprintf(debug, "assert failed: "
344					   "start_addr_keep <="
345					   " end_addr_keep\n");
346				retval = ULONG_MAX;
347				goto out;
348			}
349		}
350		if (start_addr_entry < end_addr_keep &&
351		    end_addr_entry > start_addr_keep) {
352			if (e->life_start < min_start)
353				min_start = e->life_start;
354			if (e->life_end > max_end)
355				max_end = e->life_end;
356			split_entry(e, keep);
357		}
358	}
359	retval = max_end - min_start;
360out:
361	return retval;
362}
363
364
365/*
366 * Within the index range (into the array entries_address_ascending), find the
367 * symbol with the maximal lifetime and split/truncate all symbols that overlap
368 * with it (i.e. that there won't be any overlaps anymore).
369 */
370static int handle_overlap_region(int start_idx, int end_idx)
371{
372	int rc = OP_JIT_CONV_OK;
373	int idx;
374	struct jitentry * e;
375	int cnt;
376	char * name;
377	int i, j;
378	unsigned long long totaltime;
379
380	if (debug) {
381		for (i = start_idx; i <= end_idx; i++) {
382			e = entries_address_ascending[i];
383			verbprintf(debug, "overlap idx=%i, name=%s, "
384				   "start=%llx, end=%llx, life_start=%lli, "
385				   "life_end=%lli, lifetime=%lli\n",
386				   i, e->symbol_name, e->vma,
387				   e->vma + e->code_size, e->life_start,
388				   e->life_end, e->life_end - e->life_start);
389		}
390	}
391	idx = select_one(start_idx, end_idx);
392	totaltime = eliminate_overlaps(start_idx, end_idx, idx);
393	if (totaltime == ULONG_MAX) {
394		rc = OP_JIT_CONV_FAIL;
395		goto out;
396	}
397	e = entries_address_ascending[idx];
398
399	cnt = 1;
400	j = (e->life_end - e->life_start) * 100 / totaltime;
401	while ((j = j/10))
402		cnt++;
403
404	// Mark symbol name with a %% to indicate the overlap.
405	cnt += strlen(e->symbol_name) + 2 + 1;
406	name = xmalloc(cnt);
407	snprintf(name, cnt, "%s%%%llu", e->symbol_name,
408		 (e->life_end - e->life_start) * 100 / totaltime);
409	if (e->sym_name_malloced)
410		free(e->symbol_name);
411	e->symbol_name = name;
412	e->sym_name_malloced = 1;
413	verbprintf(debug, "selected idx=%i, name=%s\n", idx, e->symbol_name);
414out:
415	return rc;
416}
417
418
419/*
420 * one scan through the symbols to find overlaps.
421 * return 1 if an overlap is found. this is repeated until no more overlap
422 * is there.
423 * Process: There may be more than two overlapping symbols with each other.
424 * The index range of symbols found to overlap are passed to
425 * handle_overlap_region.
426 */
427static int scan_overlaps(void)
428{
429	int i, j;
430	unsigned long long end_addr, end_addr2;
431	struct jitentry const * a;
432	int flag = 0;
433	// entry_count can be incremented by split_entry() during the loop,
434	// save the inital value as loop count
435	int loop_count = entry_count;
436
437	verbprintf(debug,"count=%i, scan overlaps...\n", entry_count);
438	i = 0;
439	end_addr = 0;
440	for (j = 1; j < loop_count; j++) {
441		/**
442		 * Take a symbol [i] and look for the next symbol(s) [j] that are overlapping
443		 * symbol [i]. If a symbol [j] is found that is not overlapping symbol [i] the
444		 * symbols [i]..[j-1] are handled to be not overlapping anymore.
445		 * See descriptions of handle_overlap_region() and eliminate_overlaps() for
446		 * further details of this handling.
447		 * E.g. possible cases to be handled could be:
448		 *
449		 *   sym1 |-----------|
450		 *   sym2     |---|
451		 *
452		 *   sym1 |--------|
453		 *   sym2     |--------|
454		 *
455		 *   sym1 |--------|
456		 *   sym2     |-------|
457		 *   sym3        |---------|
458		 *
459		 * But in the following case
460		 *
461		 *   sym1 |--------|
462		 *   sym2      |-------------|
463		 *   sym3              |----------|
464		 *
465		 * sym3 would not overlap with sym1. Therefore handle_overlap_regio() would
466		 * only be called for sym1 up to sym2.
467		 */
468		a = entries_address_ascending[j - 1];
469		end_addr2 = a->vma + a->code_size;
470		if (end_addr2 > end_addr)
471			end_addr = end_addr2;
472		a = entries_address_ascending[j];
473		if (end_addr <= a->vma) {
474			if (i != j - 1) {
475				if (handle_overlap_region(i, j - 1) ==
476				    OP_JIT_CONV_FAIL) {
477					flag = OP_JIT_CONV_FAIL;
478					goto out;
479				}
480				flag = 1;
481			}
482			i = j;
483		}
484	}
485	if (i != j - 1) {
486		if (handle_overlap_region(i, j - 1) == OP_JIT_CONV_FAIL)
487			flag = OP_JIT_CONV_FAIL;
488		else
489			flag = 1;
490	}
491out:
492	return flag;
493}
494
495
496/* search for symbols that have overlapping address ranges and decide for
497 * one */
498int resolve_overlaps(unsigned long long start_time)
499{
500	int rc = OP_JIT_CONV_OK;
501	int cnt = 0;
502
503	invalidate_earlybirds(start_time);
504	while ((rc = scan_overlaps()) && rc != OP_JIT_CONV_FAIL) {
505		resort_address();
506		if (cnt == 0) {
507			verbprintf(debug, "WARNING: overlaps detected. "
508				   "Removing overlapping JIT methods\n");
509		}
510		cnt++;
511	}
512	if (cnt > 0 && rc != OP_JIT_CONV_FAIL)
513		resort_symbol();
514	return rc;
515}
516
517
518/*
519 * scan through the sorted array and replace identical symbol names by unique
520 * ones by adding a counter value.
521 */
522void disambiguate_symbol_names(void)
523{
524	u32 j;
525	int cnt, rep_cnt;
526	struct jitentry * a;
527	struct jitentry * b;
528
529	rep_cnt = 0;
530	for (j = 1; j < entry_count; j++) {
531		a = entries_symbols_ascending[j - 1];
532		cnt = 1;
533		do {
534			b = entries_symbols_ascending[j];
535			if (strcmp(a->symbol_name, b->symbol_name) == 0) {
536				if (b->sym_name_malloced)
537					free(b->symbol_name);
538				b->symbol_name =
539					replacement_name(a->symbol_name, cnt);
540				b->sym_name_malloced = 1;
541				j++;
542				cnt++;
543				rep_cnt++;
544			} else {
545				break;
546			}
547		} while (j < entry_count);
548	}
549	/* recurse to avoid that the added suffix also creates a collision */
550	if (rep_cnt) {
551		qsort(entries_symbols_ascending, entry_count,
552		      sizeof(struct jitentry *), cmp_symbolname);
553		disambiguate_symbol_names();
554	}
555}
556