1/**
2 * @file profile_spec.cpp
3 * Contains a PP profile specification
4 *
5 * @remark Copyright 2003 OProfile authors
6 * @remark Read the file COPYING
7 *
8 * @author Philippe Elie
9 */
10
11#include <algorithm>
12#include <set>
13#include <sstream>
14#include <iterator>
15#include <iostream>
16#include <dirent.h>
17
18#include "file_manip.h"
19#include "op_config.h"
20#include "profile_spec.h"
21#include "string_manip.h"
22#include "glob_filter.h"
23#include "locate_images.h"
24#include "op_exception.h"
25#include "op_header.h"
26#include "op_fileio.h"
27
28using namespace std;
29
30namespace {
31
32// PP:3.7, full path, or relative path. If we can't find it,
33// we should maintain the original to maintain the wordexp etc.
34string const fixup_image_spec(string const & str, extra_images const & extra)
35{
36	// On error find_image_path() return str, so if an occur we will
37	// use the provided image_name not the fixed one.
38	image_error error;
39	return extra.find_image_path(str, error, true);
40}
41
42void fixup_image_spec(vector<string> & images, extra_images const & extra)
43{
44	vector<string>::iterator it = images.begin();
45	vector<string>::iterator const end = images.end();
46
47	for (; it != end; ++it)
48		*it = fixup_image_spec(*it, extra);
49}
50
51}  // anon namespace
52
53
54profile_spec::profile_spec()
55	:
56	extra_found_images()
57{
58	parse_table["archive"] = &profile_spec::parse_archive_path;
59	parse_table["session"] = &profile_spec::parse_session;
60	parse_table["session-exclude"] =
61		&profile_spec::parse_session_exclude;
62	parse_table["image"] = &profile_spec::parse_image;
63	parse_table["image-exclude"] = &profile_spec::parse_image_exclude;
64	parse_table["lib-image"] = &profile_spec::parse_lib_image;
65	parse_table["event"] = &profile_spec::parse_event;
66	parse_table["count"] = &profile_spec::parse_count;
67	parse_table["unit-mask"] = &profile_spec::parse_unitmask;
68	parse_table["tid"] = &profile_spec::parse_tid;
69	parse_table["tgid"] = &profile_spec::parse_tgid;
70	parse_table["cpu"] = &profile_spec::parse_cpu;
71}
72
73
74void profile_spec::parse(string const & tag_value)
75{
76	string value;
77	action_t action = get_handler(tag_value, value);
78	if (!action) {
79		throw invalid_argument("profile_spec::parse(): not "
80				       "a valid tag \"" + tag_value + "\"");
81	}
82
83	(this->*action)(value);
84}
85
86
87bool profile_spec::is_valid_tag(string const & tag_value)
88{
89	string value;
90	return get_handler(tag_value, value);
91}
92
93
94void profile_spec::set_image_or_lib_name(string const & str)
95{
96	/* FIXME: what does spec say about this being allowed to be
97	 * a comma list or not ? */
98	image_or_lib_image.push_back(fixup_image_spec(str, extra_found_images));
99}
100
101
102void profile_spec::parse_archive_path(string const & str)
103{
104	archive_path = op_realpath(str);
105}
106
107
108string profile_spec::get_archive_path() const
109{
110	return archive_path;
111}
112
113
114void profile_spec::parse_session(string const & str)
115{
116	session = separate_token(str, ',');
117}
118
119
120void profile_spec::parse_session_exclude(string const & str)
121{
122	session_exclude = separate_token(str, ',');
123}
124
125
126void profile_spec::parse_image(string const & str)
127{
128	image = separate_token(str, ',');
129	fixup_image_spec(image, extra_found_images);
130}
131
132
133void profile_spec::parse_image_exclude(string const & str)
134{
135	image_exclude = separate_token(str, ',');
136	fixup_image_spec(image_exclude, extra_found_images);
137}
138
139
140void profile_spec::parse_lib_image(string const & str)
141{
142	lib_image = separate_token(str, ',');
143	fixup_image_spec(lib_image, extra_found_images);
144}
145
146
147void profile_spec::parse_event(string const & str)
148{
149	event.set(str);
150}
151
152
153void profile_spec::parse_count(string const & str)
154{
155	count.set(str);
156}
157
158
159void profile_spec::parse_unitmask(string const & str)
160{
161	unitmask.set(str);
162}
163
164
165void profile_spec::parse_tid(string const & str)
166{
167	tid.set(str);
168}
169
170
171void profile_spec::parse_tgid(string const & str)
172{
173	tgid.set(str);
174}
175
176
177void profile_spec::parse_cpu(string const & str)
178{
179	cpu.set(str);
180}
181
182
183profile_spec::action_t
184profile_spec::get_handler(string const & tag_value, string & value)
185{
186	string::size_type pos = tag_value.find_first_of(':');
187	if (pos == string::npos)
188		return 0;
189
190	string tag(tag_value.substr(0, pos));
191	value = tag_value.substr(pos + 1);
192
193	parse_table_t::const_iterator it = parse_table.find(tag);
194	if (it == parse_table.end())
195		return 0;
196
197	return it->second;
198}
199
200
201namespace {
202
203/// return true if the value from the profile spec may match the comma
204/// list
205template<typename T>
206bool comma_match(comma_list<T> const & cl, generic_spec<T> const & value)
207{
208	// if the profile spec is "all" we match the sample file
209	if (!cl.is_set())
210		return true;
211
212	// an "all" sample file should never match specified profile
213	// spec values
214	if (!value.is_set())
215		return false;
216
217	// now match each profile spec value against the sample file
218	return cl.match(value.value());
219}
220
221}
222
223
224bool profile_spec::match(filename_spec const & spec) const
225{
226	bool matched_by_image_or_lib_image = false;
227
228	// We need the true image name not the one based on the sample
229	// filename for the benefit of module which have /oprofile in their
230	// sample filename. This allow to specify profile spec based on the
231	// real name of the image, e.g. 'binary:*oprofile.ko'
232	string simage = fixup_image_spec(spec.image, extra_found_images);
233	string slib_image = fixup_image_spec(spec.lib_image,
234					     extra_found_images);
235
236	// PP:3.19
237	if (!image_or_lib_image.empty()) {
238		glob_filter filter(image_or_lib_image, image_exclude);
239		if (filter.match(simage) || filter.match(slib_image))
240			matched_by_image_or_lib_image = true;
241	}
242
243	if (!matched_by_image_or_lib_image) {
244		// PP:3.7 3.8
245		if (!image.empty()) {
246			glob_filter filter(image, image_exclude);
247			if (!filter.match(simage))
248				return false;
249		} else if (!image_or_lib_image.empty()) {
250			// image.empty() means match all except if user
251			// specified image_or_lib_image
252			return false;
253		}
254
255		// PP:3.9 3.10
256		if (!lib_image.empty()) {
257			glob_filter filter(lib_image, image_exclude);
258			if (!filter.match(slib_image))
259				return false;
260		} else if (image.empty() && !image_or_lib_image.empty()) {
261			// lib_image empty means match all except if user
262			// specified image_or_lib_image *or* we already
263			// matched this spec through image
264			return false;
265		}
266	}
267
268	if (!matched_by_image_or_lib_image) {
269		// if we don't match by image_or_lib_image we must try to
270		// exclude from spec, exclusion from image_or_lib_image has
271		// been handled above
272		vector<string> empty;
273		glob_filter filter(empty, image_exclude);
274		if (!filter.match(simage))
275			return false;
276		if (!spec.lib_image.empty() && !filter.match(slib_image))
277			return false;
278	}
279
280	if (!event.match(spec.event))
281		return false;
282
283	if (!count.match(spec.count))
284		return false;
285
286	if (!unitmask.match(spec.unitmask))
287		return false;
288
289	if (!comma_match(cpu, spec.cpu))
290		return false;
291
292	if (!comma_match(tid, spec.tid))
293		return false;
294
295	if (!comma_match(tgid, spec.tgid))
296		return false;
297
298	return true;
299}
300
301
302profile_spec profile_spec::create(list<string> const & args,
303                                  vector<string> const & image_path,
304				  string const & root_path)
305{
306	profile_spec spec;
307	set<string> tag_seen;
308	vector<string> temp_image_or_lib;
309
310	list<string>::const_iterator it = args.begin();
311	list<string>::const_iterator end = args.end();
312
313	for (; it != end; ++it) {
314		if (spec.is_valid_tag(*it)) {
315			if (tag_seen.find(*it) != tag_seen.end()) {
316				throw op_runtime_error("tag specified "
317				       "more than once: " + *it);
318			}
319			tag_seen.insert(*it);
320			spec.parse(*it);
321		} else {
322			string const file = op_realpath(*it);
323			temp_image_or_lib.push_back(file);
324		}
325	}
326
327	// PP:3.5 no session given means use the current session.
328	if (spec.session.empty())
329		spec.session.push_back("current");
330
331	bool ok = true;
332	vector<string>::const_iterator ip_it = image_path.begin();
333	for ( ; ip_it != image_path.end(); ++ip_it) {
334		if (!is_directory(spec.get_archive_path() + "/" + *ip_it)) {
335			cerr << spec.get_archive_path() + "/" + *ip_it << " isn't a valid directory\n";
336			ok = false;
337		}
338	}
339	if (!ok)
340		throw op_runtime_error("invalid --image-path= options");
341
342	spec.extra_found_images.populate(image_path, spec.get_archive_path(),
343					 root_path);
344	vector<string>::const_iterator im = temp_image_or_lib.begin();
345	vector<string>::const_iterator last = temp_image_or_lib.end();
346	for (; im != last; ++im)
347		spec.set_image_or_lib_name(*im);
348
349	return spec;
350}
351
352namespace {
353
354vector<string> filter_session(vector<string> const & session,
355			      vector<string> const & session_exclude)
356{
357	vector<string> result(session);
358
359	if (result.empty())
360		result.push_back("current");
361
362	for (size_t i = 0 ; i < session_exclude.size() ; ++i) {
363		// FIXME: would we use fnmatch on each item, are we allowed
364		// to --session=current* ?
365		vector<string>::iterator it =
366			find(result.begin(), result.end(), session_exclude[i]);
367
368		if (it != result.end())
369			result.erase(it);
370	}
371
372	return result;
373}
374
375static bool invalid_sample_file;
376bool valid_candidate(string const & base_dir, string const & filename,
377                     profile_spec const & spec, bool exclude_dependent,
378                     bool exclude_cg)
379{
380	if (exclude_cg && filename.find("{cg}") != string::npos)
381		return false;
382
383	// strip out non sample files
384	string const & sub = filename.substr(base_dir.size(), string::npos);
385	if (!is_prefix(sub, "/{root}/") && !is_prefix(sub, "/{kern}/"))
386		return false;
387
388	/* When overflows occur in the oprofile kernel driver's sample
389	 * buffers (caused by too high of a sampling rate), it's possible
390	 * for samples to be mis-attributed.  A common scenario is that,
391	 * while profiling process 'abc' running binary 'xzy', the task
392	 * switch for 'abc' gets dropped somehow.  Then, samples are taken
393	 * for the 'xyz' binary.  In the attempt to attribute the samples to
394	 * the associated binary, the oprofile kernel code examines the
395	 * the memory mappings for the last process for which it recorded
396	 * a task switch.  When profiling at a very high rate, the oprofile
397	 * daemon is often the process that is mistakenly examined.  Then the
398	 * sample from binary 'xyz' is matched to some file that's open in
399	 * oprofiled's memory space.  Because oprofiled has many sample files
400	 * open at any given time, there's a good chance the sample's VMA is
401	 * contained within one of those sample files.  So, once finding this
402	 * bogus match, the oprofile kernel records a cookie switch for the
403	 * sample file.  This scenario is made even more likely if a high
404	 * sampling rate (e.g., profiling on several events) is paired with
405	 * callgraph data collection.
406	 *
407	 * When the daemon processes this sample data from the kernel, it
408	 * creates a sample file for the sample file, resulting in something
409	 * of the form:
410	 *    <session-dir>/[blah]<session-dir>/[blah]
411	 *
412	 * When the sample data is post-processed, the sample file is parsed to
413	 * try to determine the name of the binary, but it gets horribly confused.
414	 * At best, the post-processing tool will spit out some warning messages,
415	 * such as:
416	 * warning:
417	 * /lib64/libdl-2.9.so/CYCLES.10000.0.all.all.all/{dep}/{root}/var/lib/oprofile/samples/current/{root}/lib64/libdl-2.9.so/{dep}/{root}/lib64/libdl-2.9.so/PM_RUN_CYC_GRP12.10000.0.all.all.all
418	 * could not be found.
419	 *
420	 * At worst, the parsing may result in an "invalid argument" runtime error
421	 * because of the inability to parse a sample file whose name contains that
422	 * of another sample file.  This typically seems to happen when callgraph
423	 * data is being collected.
424	 *
425	 * The next several lines of code checks if the passed filename
426	 * contains <session-dir>/samples; if so, we discard it as an
427	 * invalid sample file.
428	 */
429
430	unsigned int j = base_dir.rfind('/');
431	string session_samples_dir = base_dir.substr(0, j);
432	if (sub.find(session_samples_dir) != string::npos) {
433		invalid_sample_file = true;
434		return false;
435	}
436
437	// strip out generated JIT object files for samples of anonymous regions
438	if (is_jit_sample(sub))
439		return false;
440
441	filename_spec file_spec(filename, spec.extra_found_images);
442	if (spec.match(file_spec)) {
443		if (exclude_dependent && file_spec.is_dependent())
444			return false;
445		return true;
446	}
447
448	return false;
449}
450
451
452/**
453 * Print a warning message if we detect any sample buffer overflows
454 * occurred in the kernel driver.
455 */
456void warn_if_kern_buffs_overflow(string const & session_samples_dir)
457{
458	DIR * dir;
459	struct dirent * dirent;
460	string stats_path;
461	int ret = 0;
462
463	stats_path = session_samples_dir + "stats/";
464	ret = op_read_int_from_file((stats_path + "event_lost_overflow").
465				    c_str(), 0);
466
467	if (!(dir = opendir(stats_path.c_str()))) {
468		ret = -1;
469		goto done;
470	}
471
472	while ((dirent = readdir(dir)) && !ret) {
473		int cpu_nr;
474		string path;
475		if (sscanf(dirent->d_name, "cpu%d", &cpu_nr) != 1)
476			continue;
477		path = stats_path + dirent->d_name + "/";
478		ret = op_read_int_from_file((path + "sample_lost_overflow").
479					    c_str(), 0);
480	}
481	closedir(dir);
482
483 done:
484	if (ret > 0) {
485		cerr << "WARNING! The OProfile kernel driver reports sample "
486		     << "buffer overflows." << endl;
487		cerr << "Such overflows can result in incorrect sample attribution"
488		     << ", invalid sample" << endl
489		     <<	"files and other symptoms.  "
490		     << "See the oprofiled.log for details." << endl;
491		cerr << "You should adjust your sampling frequency to eliminate"
492		     << " (or at least minimize)" << endl
493		     <<	"these overflows." << endl;
494	}
495}
496
497
498}  // anonymous namespace
499
500
501list<string> profile_spec::generate_file_list(bool exclude_dependent,
502  bool exclude_cg) const
503{
504	// FIXME: isn't remove_duplicates faster than doing this, then copy() ?
505	set<string> unique_files;
506
507	vector<string> sessions = filter_session(session, session_exclude);
508
509	if (sessions.empty()) {
510		ostringstream os;
511		os << "No session given\n"
512		   << "included session was:\n";
513		copy(session.begin(), session.end(),
514		     ostream_iterator<string>(os, "\n"));
515		os << "excluded session was:\n";
516		copy(session_exclude.begin(), session_exclude.end(),
517		     ostream_iterator<string>(os, "\n"));
518		throw invalid_argument(os.str());
519	}
520
521	bool found_file = false;
522
523	vector<string>::const_iterator cit = sessions.begin();
524	vector<string>::const_iterator end = sessions.end();
525
526	for (; cit != end; ++cit) {
527		if (cit->empty())
528			continue;
529
530		string base_dir;
531		invalid_sample_file = false;
532		if ((*cit)[0] != '.' && (*cit)[0] != '/')
533			base_dir = archive_path + op_samples_dir;
534		base_dir += *cit;
535
536		base_dir = op_realpath(base_dir);
537
538		list<string> files;
539		create_file_list(files, base_dir, "*", true);
540
541		if (!files.empty()) {
542			found_file = true;
543			warn_if_kern_buffs_overflow(base_dir + "/");
544		}
545
546		list<string>::const_iterator it = files.begin();
547		list<string>::const_iterator fend = files.end();
548		for (; it != fend; ++it) {
549			if (valid_candidate(base_dir, *it, *this,
550			    exclude_dependent, exclude_cg)) {
551				unique_files.insert(*it);
552			}
553		}
554		if (invalid_sample_file) {
555			cerr << "Warning: Invalid sample files found in "
556			     << base_dir << endl;
557			cerr << "This problem can be caused by too high of a sampling rate."
558			     << endl;
559		}
560	}
561
562	if (!found_file) {
563		ostringstream os;
564		os  << "No sample file found: try running opcontrol --dump\n"
565		    << "or specify a session containing sample files\n";
566		throw op_fatal_error(os.str());
567	}
568
569	list<string> result;
570	copy(unique_files.begin(), unique_files.end(), back_inserter(result));
571
572	return result;
573}
574