op_alloc_counter.c revision 84862f9f1c18e2906bd17871ed0970751de7eeae
1/**
2 * @file op_alloc_counter.c
3 * hardware counter allocation
4 *
5 * You can have silliness here.
6 *
7 * @remark Copyright 2002 OProfile authors
8 * @remark Read the file COPYING
9 *
10 * @author John Levon
11 * @author Philippe Elie
12 */
13
14#include <stdlib.h>
15#include <ctype.h>
16#include <dirent.h>
17
18#include "op_events.h"
19#include "op_libiberty.h"
20
21
22typedef struct counter_arc_head {
23	/** the head of allowed counter for this event */
24	struct list_head next;
25} counter_arc_head;
26
27
28typedef struct counter_arc {
29	/** counter nr */
30	int counter;
31	/** the next counter allowed for this event */
32	struct list_head next;
33} counter_arc;
34
35
36/**
37 * @param pev  an array of event
38 * @param nr_events  number of entry in pev
39 *
40 * build an array of counter list allowed for each events
41 *  counter_arc_head[i] is the list of allowed counter for pev[i] events
42 * The returned pointer is an array of nr_events entry
43 */
44static counter_arc_head *
45build_counter_arc(struct op_event const * pev[], int nr_events)
46{
47	counter_arc_head * ctr_arc;
48	int i;
49
50	ctr_arc = xmalloc(nr_events * sizeof(*ctr_arc));
51
52	for (i = 0; i < nr_events; ++i) {
53		int j;
54		u32 mask = pev[i]->counter_mask;
55
56		list_init(&ctr_arc[i].next);
57		for (j = 0; mask; ++j) {
58			if (mask & (1 << j)) {
59				counter_arc * arc =
60					xmalloc(sizeof(counter_arc));
61				arc->counter = j;
62				/* we are looping by increasing counter number,
63				 * allocation use a left to right tree walking
64				 * so we add at end to ensure counter will
65				 * be allocated by increasing number: it's not
66				 * required but a bit less surprising when
67				 * debugging code
68				 */
69				list_add_tail(&arc->next, &ctr_arc[i].next);
70				mask &= ~(1 << j);
71			}
72		}
73	}
74
75	return ctr_arc;
76}
77
78
79/**
80 * @param ctr_arc  the array to deallocate
81 * @param nr_events  number of entry in array
82 *
83 *  deallocate all previously allocated resource by build_counter_arc()
84 */
85static void delete_counter_arc(counter_arc_head * ctr_arc, int nr_events)
86{
87	int i;
88	for (i = 0; i < nr_events; ++i) {
89		struct list_head * pos, * pos2;
90		list_for_each_safe(pos, pos2, &ctr_arc[i].next) {
91			counter_arc * arc = list_entry(pos, counter_arc, next);
92			list_del(&arc->next);
93			free(arc);
94		}
95	}
96	free(ctr_arc);
97}
98
99
100/**
101 * @param ctr_arc  tree description, ctr_arc[i] is the i-th level of tree.
102 * @param max_depth  number of entry in array ctr_arc == depth of tree
103 * @param depth  current level we are exploring
104 * @param allocated_mask  current counter already allocated mask
105 * @param counter_map  array of counter number mapping, returned results go
106 *   here
107 *
108 * return non zero on succees, in this case counter_map is set to the counter
109 * mapping number.
110 *
111 * Solution is searched through a simple backtracking exploring recursively all
112 * possible solution until one is found, prunning is done in O(1) by tracking
113 * a bitmask of already allocated counter. Walking through node is done in
114 * preorder left to right.
115 *
116 * In case of extended events (required no phisical counters), the associated
117 * counter_map entry will be -1.
118 *
119 * Possible improvment if neccessary: partition counters in class of counter,
120 * two counter belong to the same class if they allow exactly the same set of
121 * event. Now using a variant of the backtrack algo can works on class of
122 * counter rather on counter (this is not an improvment if each counter goes
123 * in it's own class)
124 */
125static int
126allocate_counter(counter_arc_head const * ctr_arc, int max_depth, int depth,
127		 u32 allocated_mask, size_t * counter_map)
128{
129	struct list_head * pos;
130
131	if (depth == max_depth)
132		return 1;
133
134	/* If ctr_arc is not available, counter_map is -1 */
135	if((&ctr_arc[depth].next)->next == &ctr_arc[depth].next) {
136		counter_map[depth] = -1;
137		if (allocate_counter(ctr_arc, max_depth, depth + 1,
138		                     allocated_mask,
139		                     counter_map))
140			return 1;
141	} else {
142		list_for_each(pos, &ctr_arc[depth].next) {
143			counter_arc const * arc = list_entry(pos, counter_arc, next);
144
145			if (allocated_mask & (1 << arc->counter))
146				continue;
147
148			counter_map[depth] = arc->counter;
149
150			if (allocate_counter(ctr_arc, max_depth, depth + 1,
151					     allocated_mask | (1 << arc->counter),
152					     counter_map))
153				return 1;
154		}
155	}
156
157	return 0;
158}
159
160/* determine which directories are counter directories
161 */
162static int perfcounterdir(const struct dirent * entry)
163{
164	return (isdigit(entry->d_name[0]));
165}
166
167
168/**
169 * @param mask pointer where to place bit mask of unavailable counters
170 *
171 * return >= 0 number of counters that are available
172 *        < 0  could not determine number of counters
173 *
174 */
175static int op_get_counter_mask(u32 * mask)
176{
177	struct dirent **counterlist;
178	int count, i;
179	/* assume nothing is available */
180	u32 available=0;
181
182	count = scandir("/dev/oprofile", &counterlist, perfcounterdir,
183			alphasort);
184	if (count < 0)
185		/* unable to determine bit mask */
186		return -1;
187	/* convert to bit map (0 where counter exists) */
188	for (i=0; i<count; ++i) {
189		available |= 1 << atoi(counterlist[i]->d_name);
190		free(counterlist[i]);
191	}
192	*mask=~available;
193	free(counterlist);
194	return count;
195}
196
197size_t * map_event_to_counter(struct op_event const * pev[], int nr_events,
198                              op_cpu cpu_type)
199{
200	counter_arc_head * ctr_arc;
201	size_t * counter_map;
202	int i, nr_counters, nr_pmc_events;
203	op_cpu curr_cpu_type;
204	u32 unavailable_counters = 0;
205
206	/* Either ophelp or one of the libop tests may invoke this
207	 * function with a non-native cpu_type.  If so, we should not
208	 * call op_get_counter_mask because that will look for real counter
209	 * information in oprofilefs.
210	 */
211	curr_cpu_type = op_get_cpu_type();
212	if (cpu_type != curr_cpu_type)
213		nr_counters = op_get_nr_counters(cpu_type);
214	else
215		nr_counters = op_get_counter_mask(&unavailable_counters);
216
217	/* no counters then probably perfmon managing perfmon hw */
218	if (nr_counters <= 0) {
219		nr_counters = op_get_nr_counters(cpu_type);
220		unavailable_counters = (~0) << nr_counters;
221	}
222
223	/* Check to see if we have enough physical counters to map events*/
224	for (i = 0, nr_pmc_events = 0; i < nr_events; i++)
225		if(pev[i]->ext == NULL)
226			if (++nr_pmc_events > nr_counters)
227				return 0;
228
229	ctr_arc = build_counter_arc(pev, nr_events);
230
231	counter_map = xmalloc(nr_events * sizeof(size_t));
232
233	if (!allocate_counter(ctr_arc, nr_events, 0, unavailable_counters,
234			      counter_map)) {
235		free(counter_map);
236		counter_map = 0;
237	}
238
239	delete_counter_arc(ctr_arc, nr_events);
240	return counter_map;
241}
242