1#include <math.h>
2#include "json.h"
3#include "idletime.h"
4
5static volatile struct idle_prof_common ipc;
6
7/*
8 * Get time to complete an unit work on a particular cpu.
9 * The minimum number in CALIBRATE_RUNS runs is returned.
10 */
11static double calibrate_unit(unsigned char *data)
12{
13	unsigned long t, i, j, k;
14	struct timeval tps;
15	double tunit = 0.0;
16
17	for (i = 0; i < CALIBRATE_RUNS; i++) {
18
19		fio_gettime(&tps, NULL);
20		/* scale for less variance */
21		for (j = 0; j < CALIBRATE_SCALE; j++) {
22			/* unit of work */
23			for (k=0; k < page_size; k++) {
24				data[(k + j) % page_size] = k % 256;
25				/*
26				 * we won't see STOP here. this is to match
27				 * the same statement in the profiling loop.
28				 */
29				if (ipc.status == IDLE_PROF_STATUS_PROF_STOP)
30					return 0.0;
31			}
32		}
33
34		t = utime_since_now(&tps);
35		if (!t)
36			continue;
37
38		/* get the minimum time to complete CALIBRATE_SCALE units */
39		if ((i == 0) || ((double)t < tunit))
40			tunit = (double)t;
41	}
42
43	return tunit / CALIBRATE_SCALE;
44}
45
46static void free_cpu_affinity(struct idle_prof_thread *ipt)
47{
48#if defined(FIO_HAVE_CPU_AFFINITY)
49	fio_cpuset_exit(&ipt->cpu_mask);
50#endif
51}
52
53static int set_cpu_affinity(struct idle_prof_thread *ipt)
54{
55#if defined(FIO_HAVE_CPU_AFFINITY)
56	if (fio_cpuset_init(&ipt->cpu_mask)) {
57		log_err("fio: cpuset init failed\n");
58		return -1;
59	}
60
61	fio_cpu_set(&ipt->cpu_mask, ipt->cpu);
62
63	if (fio_setaffinity(gettid(), ipt->cpu_mask)) {
64		log_err("fio: fio_setaffinity failed\n");
65		fio_cpuset_exit(&ipt->cpu_mask);
66		return -1;
67	}
68
69	return 0;
70#else
71	log_err("fio: fio_setaffinity not supported\n");
72	return -1;
73#endif
74}
75
76static void *idle_prof_thread_fn(void *data)
77{
78	int retval;
79	unsigned long j, k;
80	struct idle_prof_thread *ipt = data;
81
82	/* wait for all threads are spawned */
83	pthread_mutex_lock(&ipt->init_lock);
84
85	/* exit if any other thread failed to start */
86	if (ipc.status == IDLE_PROF_STATUS_ABORT) {
87		pthread_mutex_unlock(&ipt->init_lock);
88		return NULL;
89	}
90
91	retval = set_cpu_affinity(ipt);
92	if (retval == -1) {
93		ipt->state = TD_EXITED;
94		pthread_mutex_unlock(&ipt->init_lock);
95		return NULL;
96        }
97
98	ipt->cali_time = calibrate_unit(ipt->data);
99
100	/* delay to set IDLE class till now for better calibration accuracy */
101#if defined(CONFIG_SCHED_IDLE)
102	if ((retval = fio_set_sched_idle()))
103		log_err("fio: fio_set_sched_idle failed\n");
104#else
105	retval = -1;
106	log_err("fio: fio_set_sched_idle not supported\n");
107#endif
108	if (retval == -1) {
109		ipt->state = TD_EXITED;
110		pthread_mutex_unlock(&ipt->init_lock);
111		goto do_exit;
112	}
113
114	ipt->state = TD_INITIALIZED;
115
116	/* signal the main thread that calibration is done */
117	pthread_cond_signal(&ipt->cond);
118	pthread_mutex_unlock(&ipt->init_lock);
119
120	/* wait for other calibration to finish */
121	pthread_mutex_lock(&ipt->start_lock);
122
123	/* exit if other threads failed to initialize */
124	if (ipc.status == IDLE_PROF_STATUS_ABORT) {
125		pthread_mutex_unlock(&ipt->start_lock);
126		goto do_exit;
127	}
128
129	/* exit if we are doing calibration only */
130	if (ipc.status == IDLE_PROF_STATUS_CALI_STOP) {
131		pthread_mutex_unlock(&ipt->start_lock);
132		goto do_exit;
133	}
134
135	fio_gettime(&ipt->tps, NULL);
136	ipt->state = TD_RUNNING;
137
138	j = 0;
139	while (1) {
140		for (k = 0; k < page_size; k++) {
141			ipt->data[(k + j) % page_size] = k % 256;
142			if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) {
143				fio_gettime(&ipt->tpe, NULL);
144				goto idle_prof_done;
145			}
146		}
147		j++;
148	}
149
150idle_prof_done:
151
152	ipt->loops = j + (double) k / page_size;
153	ipt->state = TD_EXITED;
154	pthread_mutex_unlock(&ipt->start_lock);
155
156do_exit:
157	free_cpu_affinity(ipt);
158	return NULL;
159}
160
161/* calculate mean and standard deviation to complete an unit of work */
162static void calibration_stats(void)
163{
164	int i;
165	double sum = 0.0, var = 0.0;
166	struct idle_prof_thread *ipt;
167
168	for (i = 0; i < ipc.nr_cpus; i++) {
169		ipt = &ipc.ipts[i];
170		sum += ipt->cali_time;
171	}
172
173	ipc.cali_mean = sum/ipc.nr_cpus;
174
175	for (i = 0; i < ipc.nr_cpus; i++) {
176		ipt = &ipc.ipts[i];
177		var += pow(ipt->cali_time-ipc.cali_mean, 2);
178	}
179
180	ipc.cali_stddev = sqrt(var/(ipc.nr_cpus-1));
181}
182
183void fio_idle_prof_init(void)
184{
185	int i, ret;
186	struct timeval tp;
187	struct timespec ts;
188	pthread_attr_t tattr;
189	struct idle_prof_thread *ipt;
190
191	ipc.nr_cpus = cpus_online();
192	ipc.status = IDLE_PROF_STATUS_OK;
193
194	if (ipc.opt == IDLE_PROF_OPT_NONE)
195		return;
196
197	if ((ret = pthread_attr_init(&tattr))) {
198		log_err("fio: pthread_attr_init %s\n", strerror(ret));
199		return;
200	}
201	if ((ret = pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM))) {
202		log_err("fio: pthread_attr_setscope %s\n", strerror(ret));
203		return;
204	}
205
206	ipc.ipts = malloc(ipc.nr_cpus * sizeof(struct idle_prof_thread));
207	if (!ipc.ipts) {
208		log_err("fio: malloc failed\n");
209		return;
210	}
211
212	ipc.buf = malloc(ipc.nr_cpus * page_size);
213	if (!ipc.buf) {
214		log_err("fio: malloc failed\n");
215		free(ipc.ipts);
216		return;
217	}
218
219	/*
220	 * profiling aborts on any single thread failure since the
221	 * result won't be accurate if any cpu is not used.
222	 */
223	for (i = 0; i < ipc.nr_cpus; i++) {
224		ipt = &ipc.ipts[i];
225
226		ipt->cpu = i;
227		ipt->state = TD_NOT_CREATED;
228		ipt->data = (unsigned char *)(ipc.buf + page_size * i);
229
230		if ((ret = pthread_mutex_init(&ipt->init_lock, NULL))) {
231			ipc.status = IDLE_PROF_STATUS_ABORT;
232			log_err("fio: pthread_mutex_init %s\n", strerror(ret));
233			break;
234		}
235
236		if ((ret = pthread_mutex_init(&ipt->start_lock, NULL))) {
237			ipc.status = IDLE_PROF_STATUS_ABORT;
238			log_err("fio: pthread_mutex_init %s\n", strerror(ret));
239			break;
240		}
241
242		if ((ret = pthread_cond_init(&ipt->cond, NULL))) {
243			ipc.status = IDLE_PROF_STATUS_ABORT;
244			log_err("fio: pthread_cond_init %s\n", strerror(ret));
245			break;
246		}
247
248		/* make sure all threads are spawned before they start */
249		pthread_mutex_lock(&ipt->init_lock);
250
251		/* make sure all threads finish init before profiling starts */
252		pthread_mutex_lock(&ipt->start_lock);
253
254		if ((ret = pthread_create(&ipt->thread, &tattr, idle_prof_thread_fn, ipt))) {
255			ipc.status = IDLE_PROF_STATUS_ABORT;
256			log_err("fio: pthread_create %s\n", strerror(ret));
257			break;
258		} else
259			ipt->state = TD_CREATED;
260
261		if ((ret = pthread_detach(ipt->thread))) {
262			/* log error and let the thread spin */
263			log_err("fio: pthread_detatch %s\n", strerror(ret));
264		}
265	}
266
267	/*
268	 * let good threads continue so that they can exit
269	 * if errors on other threads occurred previously.
270	 */
271	for (i = 0; i < ipc.nr_cpus; i++) {
272		ipt = &ipc.ipts[i];
273		pthread_mutex_unlock(&ipt->init_lock);
274	}
275
276	if (ipc.status == IDLE_PROF_STATUS_ABORT)
277		return;
278
279	/* wait for calibration to finish */
280	for (i = 0; i < ipc.nr_cpus; i++) {
281		ipt = &ipc.ipts[i];
282		pthread_mutex_lock(&ipt->init_lock);
283		while ((ipt->state != TD_EXITED) &&
284		       (ipt->state!=TD_INITIALIZED)) {
285			fio_gettime(&tp, NULL);
286			ts.tv_sec = tp.tv_sec + 1;
287			ts.tv_nsec = tp.tv_usec * 1000;
288			pthread_cond_timedwait(&ipt->cond, &ipt->init_lock, &ts);
289		}
290		pthread_mutex_unlock(&ipt->init_lock);
291
292		/*
293		 * any thread failed to initialize would abort other threads
294		 * later after fio_idle_prof_start.
295		 */
296		if (ipt->state == TD_EXITED)
297			ipc.status = IDLE_PROF_STATUS_ABORT;
298	}
299
300	if (ipc.status != IDLE_PROF_STATUS_ABORT)
301		calibration_stats();
302	else
303		ipc.cali_mean = ipc.cali_stddev = 0.0;
304
305	if (ipc.opt == IDLE_PROF_OPT_CALI)
306		ipc.status = IDLE_PROF_STATUS_CALI_STOP;
307}
308
309void fio_idle_prof_start(void)
310{
311	int i;
312	struct idle_prof_thread *ipt;
313
314	if (ipc.opt == IDLE_PROF_OPT_NONE)
315		return;
316
317	/* unlock regardless abort is set or not */
318	for (i = 0; i < ipc.nr_cpus; i++) {
319		ipt = &ipc.ipts[i];
320		pthread_mutex_unlock(&ipt->start_lock);
321	}
322}
323
324void fio_idle_prof_stop(void)
325{
326	int i;
327	uint64_t runt;
328	struct timeval tp;
329	struct timespec ts;
330	struct idle_prof_thread *ipt;
331
332	if (ipc.opt == IDLE_PROF_OPT_NONE)
333		return;
334
335	if (ipc.opt == IDLE_PROF_OPT_CALI)
336		return;
337
338	ipc.status = IDLE_PROF_STATUS_PROF_STOP;
339
340	/* wait for all threads to exit from profiling */
341	for (i = 0; i < ipc.nr_cpus; i++) {
342		ipt = &ipc.ipts[i];
343		pthread_mutex_lock(&ipt->start_lock);
344		while ((ipt->state != TD_EXITED) &&
345		       (ipt->state!=TD_NOT_CREATED)) {
346			fio_gettime(&tp, NULL);
347			ts.tv_sec = tp.tv_sec + 1;
348			ts.tv_nsec = tp.tv_usec * 1000;
349			/* timed wait in case a signal is not received */
350			pthread_cond_timedwait(&ipt->cond, &ipt->start_lock, &ts);
351		}
352		pthread_mutex_unlock(&ipt->start_lock);
353
354		/* calculate idleness */
355		if (ipc.cali_mean != 0.0) {
356			runt = utime_since(&ipt->tps, &ipt->tpe);
357			if (runt)
358				ipt->idleness = ipt->loops * ipc.cali_mean / runt;
359			else
360				ipt->idleness = 0.0;
361		} else
362			ipt->idleness = 0.0;
363	}
364
365	/*
366	 * memory allocations are freed via explicit fio_idle_prof_cleanup
367	 * after profiling stats are collected by apps.
368	 */
369}
370
371/*
372 * return system idle percentage when cpu is -1;
373 * return one cpu idle percentage otherwise.
374 */
375static double fio_idle_prof_cpu_stat(int cpu)
376{
377	int i, nr_cpus = ipc.nr_cpus;
378	struct idle_prof_thread *ipt;
379	double p = 0.0;
380
381	if (ipc.opt == IDLE_PROF_OPT_NONE)
382		return 0.0;
383
384	if ((cpu >= nr_cpus) || (cpu < -1)) {
385		log_err("fio: idle profiling invalid cpu index\n");
386		return 0.0;
387	}
388
389	if (cpu == -1) {
390		for (i = 0; i < nr_cpus; i++) {
391			ipt = &ipc.ipts[i];
392			p += ipt->idleness;
393		}
394		p /= nr_cpus;
395	} else {
396		ipt = &ipc.ipts[cpu];
397		p = ipt->idleness;
398	}
399
400	return p * 100.0;
401}
402
403static void fio_idle_prof_cleanup(void)
404{
405	if (ipc.ipts) {
406		free(ipc.ipts);
407		ipc.ipts = NULL;
408	}
409
410	if (ipc.buf) {
411		free(ipc.buf);
412		ipc.buf = NULL;
413	}
414}
415
416int fio_idle_prof_parse_opt(const char *args)
417{
418	ipc.opt = IDLE_PROF_OPT_NONE; /* default */
419
420	if (!args) {
421		log_err("fio: empty idle-prof option string\n");
422		return -1;
423	}
424
425#if defined(FIO_HAVE_CPU_AFFINITY) && defined(CONFIG_SCHED_IDLE)
426	if (strcmp("calibrate", args) == 0) {
427		ipc.opt = IDLE_PROF_OPT_CALI;
428		fio_idle_prof_init();
429		fio_idle_prof_start();
430		fio_idle_prof_stop();
431		show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL);
432		return 1;
433	} else if (strcmp("system", args) == 0) {
434		ipc.opt = IDLE_PROF_OPT_SYSTEM;
435		return 0;
436	} else if (strcmp("percpu", args) == 0) {
437		ipc.opt = IDLE_PROF_OPT_PERCPU;
438		return 0;
439	} else {
440		log_err("fio: incorrect idle-prof option: %s\n", args);
441		return -1;
442	}
443#else
444	log_err("fio: idle-prof not supported on this platform\n");
445	return -1;
446#endif
447}
448
449void show_idle_prof_stats(int output, struct json_object *parent)
450{
451	int i, nr_cpus = ipc.nr_cpus;
452	struct json_object *tmp;
453	char s[MAX_CPU_STR_LEN];
454
455	if (output == FIO_OUTPUT_NORMAL) {
456		if (ipc.opt > IDLE_PROF_OPT_CALI)
457			log_info("\nCPU idleness:\n");
458		else if (ipc.opt == IDLE_PROF_OPT_CALI)
459			log_info("CPU idleness:\n");
460
461		if (ipc.opt >= IDLE_PROF_OPT_SYSTEM)
462			log_info("  system: %3.2f%%\n", fio_idle_prof_cpu_stat(-1));
463
464		if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
465			log_info("  percpu: %3.2f%%", fio_idle_prof_cpu_stat(0));
466			for (i = 1; i < nr_cpus; i++)
467				log_info(", %3.2f%%", fio_idle_prof_cpu_stat(i));
468			log_info("\n");
469		}
470
471		if (ipc.opt >= IDLE_PROF_OPT_CALI) {
472			log_info("  unit work: mean=%3.2fus,", ipc.cali_mean);
473			log_info(" stddev=%3.2f\n", ipc.cali_stddev);
474		}
475
476		/* dynamic mem allocations can now be freed */
477		if (ipc.opt != IDLE_PROF_OPT_NONE)
478			fio_idle_prof_cleanup();
479
480		return;
481	}
482
483	if ((ipc.opt != IDLE_PROF_OPT_NONE) && (output == FIO_OUTPUT_JSON)) {
484		if (!parent)
485			return;
486
487		tmp = json_create_object();
488		if (!tmp)
489			return;
490
491		json_object_add_value_object(parent, "cpu_idleness", tmp);
492		json_object_add_value_float(tmp, "system", fio_idle_prof_cpu_stat(-1));
493
494		if (ipc.opt == IDLE_PROF_OPT_PERCPU) {
495			for (i = 0; i < nr_cpus; i++) {
496				snprintf(s, MAX_CPU_STR_LEN, "cpu-%d", i);
497				json_object_add_value_float(tmp, s, fio_idle_prof_cpu_stat(i));
498			}
499		}
500
501		json_object_add_value_float(tmp, "unit_mean", ipc.cali_mean);
502		json_object_add_value_float(tmp, "unit_stddev", ipc.cali_stddev);
503
504		fio_idle_prof_cleanup();
505	}
506}
507