1/*
2 * Clock functions
3 */
4
5#include <unistd.h>
6#include <math.h>
7#include <sys/time.h>
8#include <time.h>
9
10#include "fio.h"
11#include "smalloc.h"
12
13#include "hash.h"
14#include "os/os.h"
15
16#if defined(ARCH_HAVE_CPU_CLOCK) && !defined(ARCH_CPU_CLOCK_CYCLES_PER_USEC)
17static unsigned long cycles_per_usec;
18static unsigned long inv_cycles_per_usec;
19static uint64_t max_cycles_for_mult;
20#endif
21#ifdef ARCH_CPU_CLOCK_WRAPS
22static unsigned long long cycles_start, cycles_wrap;
23#endif
24int tsc_reliable = 0;
25
26struct tv_valid {
27	uint64_t last_cycles;
28	int last_tv_valid;
29	int warned;
30};
31#ifdef ARCH_HAVE_CPU_CLOCK
32#ifdef CONFIG_TLS_THREAD
33static __thread struct tv_valid static_tv_valid;
34#else
35static pthread_key_t tv_tls_key;
36#endif
37#endif
38
39enum fio_cs fio_clock_source = FIO_PREFERRED_CLOCK_SOURCE;
40int fio_clock_source_set = 0;
41static enum fio_cs fio_clock_source_inited = CS_INVAL;
42
43#ifdef FIO_DEBUG_TIME
44
45#define HASH_BITS	8
46#define HASH_SIZE	(1 << HASH_BITS)
47
48static struct flist_head hash[HASH_SIZE];
49static int gtod_inited;
50
51struct gtod_log {
52	struct flist_head list;
53	void *caller;
54	unsigned long calls;
55};
56
57static struct gtod_log *find_hash(void *caller)
58{
59	unsigned long h = hash_ptr(caller, HASH_BITS);
60	struct flist_head *entry;
61
62	flist_for_each(entry, &hash[h]) {
63		struct gtod_log *log = flist_entry(entry, struct gtod_log,
64									list);
65
66		if (log->caller == caller)
67			return log;
68	}
69
70	return NULL;
71}
72
73static void inc_caller(void *caller)
74{
75	struct gtod_log *log = find_hash(caller);
76
77	if (!log) {
78		unsigned long h;
79
80		log = malloc(sizeof(*log));
81		INIT_FLIST_HEAD(&log->list);
82		log->caller = caller;
83		log->calls = 0;
84
85		h = hash_ptr(caller, HASH_BITS);
86		flist_add_tail(&log->list, &hash[h]);
87	}
88
89	log->calls++;
90}
91
92static void gtod_log_caller(void *caller)
93{
94	if (gtod_inited)
95		inc_caller(caller);
96}
97
98static void fio_exit fio_dump_gtod(void)
99{
100	unsigned long total_calls = 0;
101	int i;
102
103	for (i = 0; i < HASH_SIZE; i++) {
104		struct flist_head *entry;
105		struct gtod_log *log;
106
107		flist_for_each(entry, &hash[i]) {
108			log = flist_entry(entry, struct gtod_log, list);
109
110			printf("function %p, calls %lu\n", log->caller,
111								log->calls);
112			total_calls += log->calls;
113		}
114	}
115
116	printf("Total %lu gettimeofday\n", total_calls);
117}
118
119static void fio_init gtod_init(void)
120{
121	int i;
122
123	for (i = 0; i < HASH_SIZE; i++)
124		INIT_FLIST_HEAD(&hash[i]);
125
126	gtod_inited = 1;
127}
128
129#endif /* FIO_DEBUG_TIME */
130
131#ifdef CONFIG_CLOCK_GETTIME
132static int fill_clock_gettime(struct timespec *ts)
133{
134#ifdef CONFIG_CLOCK_MONOTONIC
135	return clock_gettime(CLOCK_MONOTONIC, ts);
136#else
137	return clock_gettime(CLOCK_REALTIME, ts);
138#endif
139}
140#endif
141
142static void __fio_gettime(struct timeval *tp)
143{
144	switch (fio_clock_source) {
145#ifdef CONFIG_GETTIMEOFDAY
146	case CS_GTOD:
147		gettimeofday(tp, NULL);
148		break;
149#endif
150#ifdef CONFIG_CLOCK_GETTIME
151	case CS_CGETTIME: {
152		struct timespec ts;
153
154		if (fill_clock_gettime(&ts) < 0) {
155			log_err("fio: clock_gettime fails\n");
156			assert(0);
157		}
158
159		tp->tv_sec = ts.tv_sec;
160		tp->tv_usec = ts.tv_nsec / 1000;
161		break;
162		}
163#endif
164#ifdef ARCH_HAVE_CPU_CLOCK
165	case CS_CPUCLOCK: {
166		uint64_t usecs, t;
167		struct tv_valid *tv;
168
169#ifdef CONFIG_TLS_THREAD
170		tv = &static_tv_valid;
171#else
172		tv = pthread_getspecific(tv_tls_key);
173#endif
174
175		t = get_cpu_clock();
176#ifdef ARCH_CPU_CLOCK_WRAPS
177		if (t < cycles_start && !cycles_wrap)
178			cycles_wrap = 1;
179		else if (cycles_wrap && t >= cycles_start && !tv->warned) {
180			log_err("fio: double CPU clock wrap\n");
181			tv->warned = 1;
182		}
183
184		t -= cycles_start;
185#endif
186		tv->last_cycles = t;
187		tv->last_tv_valid = 1;
188#ifdef ARCH_CPU_CLOCK_CYCLES_PER_USEC
189		usecs = t / ARCH_CPU_CLOCK_CYCLES_PER_USEC;
190#else
191		if (t < max_cycles_for_mult)
192			usecs = (t * inv_cycles_per_usec) / 16777216UL;
193		else
194			usecs = t / cycles_per_usec;
195#endif
196		tp->tv_sec = usecs / 1000000;
197		tp->tv_usec = usecs % 1000000;
198		break;
199		}
200#endif
201	default:
202		log_err("fio: invalid clock source %d\n", fio_clock_source);
203		break;
204	}
205}
206
207#ifdef FIO_DEBUG_TIME
208void fio_gettime(struct timeval *tp, void *caller)
209#else
210void fio_gettime(struct timeval *tp, void fio_unused *caller)
211#endif
212{
213#ifdef FIO_DEBUG_TIME
214	if (!caller)
215		caller = __builtin_return_address(0);
216
217	gtod_log_caller(caller);
218#endif
219	if (fio_unlikely(fio_gettime_offload(tp)))
220		return;
221
222	__fio_gettime(tp);
223}
224
225#if defined(ARCH_HAVE_CPU_CLOCK) && !defined(ARCH_CPU_CLOCK_CYCLES_PER_USEC)
226static unsigned long get_cycles_per_usec(void)
227{
228	struct timeval s, e;
229	uint64_t c_s, c_e;
230	enum fio_cs old_cs = fio_clock_source;
231
232#ifdef CONFIG_CLOCK_GETTIME
233	fio_clock_source = CS_CGETTIME;
234#else
235	fio_clock_source = CS_GTOD;
236#endif
237	__fio_gettime(&s);
238
239	c_s = get_cpu_clock();
240	do {
241		uint64_t elapsed;
242
243		__fio_gettime(&e);
244
245		elapsed = utime_since(&s, &e);
246		if (elapsed >= 1280) {
247			c_e = get_cpu_clock();
248			break;
249		}
250	} while (1);
251
252	fio_clock_source = old_cs;
253	return (c_e - c_s + 127) >> 7;
254}
255
256#define NR_TIME_ITERS	50
257
258static int calibrate_cpu_clock(void)
259{
260	double delta, mean, S;
261	uint64_t minc, maxc, avg, cycles[NR_TIME_ITERS];
262	int i, samples;
263
264	cycles[0] = get_cycles_per_usec();
265	S = delta = mean = 0.0;
266	for (i = 0; i < NR_TIME_ITERS; i++) {
267		cycles[i] = get_cycles_per_usec();
268		delta = cycles[i] - mean;
269		if (delta) {
270			mean += delta / (i + 1.0);
271			S += delta * (cycles[i] - mean);
272		}
273	}
274
275	/*
276	 * The most common platform clock breakage is returning zero
277	 * indefinitely. Check for that and return failure.
278	 */
279	if (!cycles[0] && !cycles[NR_TIME_ITERS - 1])
280		return 1;
281
282	S = sqrt(S / (NR_TIME_ITERS - 1.0));
283
284	minc = -1ULL;
285	maxc = samples = avg = 0;
286	for (i = 0; i < NR_TIME_ITERS; i++) {
287		double this = cycles[i];
288
289		minc = min(cycles[i], minc);
290		maxc = max(cycles[i], maxc);
291
292		if ((fmax(this, mean) - fmin(this, mean)) > S)
293			continue;
294		samples++;
295		avg += this;
296	}
297
298	S /= (double) NR_TIME_ITERS;
299	mean /= 10.0;
300
301	for (i = 0; i < NR_TIME_ITERS; i++)
302		dprint(FD_TIME, "cycles[%d]=%llu\n", i,
303					(unsigned long long) cycles[i] / 10);
304
305	avg /= samples;
306	avg = (avg + 5) / 10;
307	minc /= 10;
308	maxc /= 10;
309	dprint(FD_TIME, "avg: %llu\n", (unsigned long long) avg);
310	dprint(FD_TIME, "min=%llu, max=%llu, mean=%f, S=%f\n",
311			(unsigned long long) minc,
312			(unsigned long long) maxc, mean, S);
313
314	cycles_per_usec = avg;
315	inv_cycles_per_usec = 16777216UL / cycles_per_usec;
316	max_cycles_for_mult = ~0ULL / inv_cycles_per_usec;
317	dprint(FD_TIME, "inv_cycles_per_usec=%lu\n", inv_cycles_per_usec);
318#ifdef ARCH_CPU_CLOCK_WRAPS
319	cycles_start = get_cpu_clock();
320	dprint(FD_TIME, "cycles_start=%llu\n", cycles_start);
321#endif
322	return 0;
323}
324#else
325static int calibrate_cpu_clock(void)
326{
327#ifdef ARCH_CPU_CLOCK_CYCLES_PER_USEC
328	return 0;
329#else
330	return 1;
331#endif
332}
333#endif // ARCH_HAVE_CPU_CLOCK
334
335#ifndef CONFIG_TLS_THREAD
336void fio_local_clock_init(int is_thread)
337{
338	struct tv_valid *t;
339
340	t = calloc(1, sizeof(*t));
341	if (pthread_setspecific(tv_tls_key, t)) {
342		log_err("fio: can't set TLS key\n");
343		assert(0);
344	}
345}
346
347static void kill_tv_tls_key(void *data)
348{
349	free(data);
350}
351#else
352void fio_local_clock_init(int is_thread)
353{
354}
355#endif
356
357void fio_clock_init(void)
358{
359	if (fio_clock_source == fio_clock_source_inited)
360		return;
361
362#ifndef CONFIG_TLS_THREAD
363	if (pthread_key_create(&tv_tls_key, kill_tv_tls_key))
364		log_err("fio: can't create TLS key\n");
365#endif
366
367	fio_clock_source_inited = fio_clock_source;
368
369	if (calibrate_cpu_clock())
370		tsc_reliable = 0;
371
372	/*
373	 * If the arch sets tsc_reliable != 0, then it must be good enough
374	 * to use as THE clock source. For x86 CPUs, this means the TSC
375	 * runs at a constant rate and is synced across CPU cores.
376	 */
377	if (tsc_reliable) {
378		if (!fio_clock_source_set && !fio_monotonic_clocktest(0))
379			fio_clock_source = CS_CPUCLOCK;
380	} else if (fio_clock_source == CS_CPUCLOCK)
381		log_info("fio: clocksource=cpu may not be reliable\n");
382}
383
384uint64_t utime_since(const struct timeval *s, const struct timeval *e)
385{
386	long sec, usec;
387	uint64_t ret;
388
389	sec = e->tv_sec - s->tv_sec;
390	usec = e->tv_usec - s->tv_usec;
391	if (sec > 0 && usec < 0) {
392		sec--;
393		usec += 1000000;
394	}
395
396	/*
397	 * time warp bug on some kernels?
398	 */
399	if (sec < 0 || (sec == 0 && usec < 0))
400		return 0;
401
402	ret = sec * 1000000ULL + usec;
403
404	return ret;
405}
406
407uint64_t utime_since_now(const struct timeval *s)
408{
409	struct timeval t;
410
411	fio_gettime(&t, NULL);
412	return utime_since(s, &t);
413}
414
415uint64_t mtime_since(const struct timeval *s, const struct timeval *e)
416{
417	long sec, usec, ret;
418
419	sec = e->tv_sec - s->tv_sec;
420	usec = e->tv_usec - s->tv_usec;
421	if (sec > 0 && usec < 0) {
422		sec--;
423		usec += 1000000;
424	}
425
426	if (sec < 0 || (sec == 0 && usec < 0))
427		return 0;
428
429	sec *= 1000UL;
430	usec /= 1000UL;
431	ret = sec + usec;
432
433	return ret;
434}
435
436uint64_t mtime_since_now(const struct timeval *s)
437{
438	struct timeval t;
439	void *p = __builtin_return_address(0);
440
441	fio_gettime(&t, p);
442	return mtime_since(s, &t);
443}
444
445uint64_t time_since_now(const struct timeval *s)
446{
447	return mtime_since_now(s) / 1000;
448}
449
450#if defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK)  && \
451    defined(CONFIG_SFAA)
452
453#define CLOCK_ENTRIES_DEBUG	100000
454#define CLOCK_ENTRIES_TEST	10000
455
456struct clock_entry {
457	uint32_t seq;
458	uint32_t cpu;
459	uint64_t tsc;
460};
461
462struct clock_thread {
463	pthread_t thread;
464	int cpu;
465	int debug;
466	pthread_mutex_t lock;
467	pthread_mutex_t started;
468	unsigned long nr_entries;
469	uint32_t *seq;
470	struct clock_entry *entries;
471};
472
473static inline uint32_t atomic32_inc_return(uint32_t *seq)
474{
475	return 1 + __sync_fetch_and_add(seq, 1);
476}
477
478static void *clock_thread_fn(void *data)
479{
480	struct clock_thread *t = data;
481	struct clock_entry *c;
482	os_cpu_mask_t cpu_mask;
483	uint32_t last_seq;
484	int i;
485
486	if (fio_cpuset_init(&cpu_mask)) {
487		int __err = errno;
488
489		log_err("clock cpuset init failed: %s\n", strerror(__err));
490		goto err_out;
491	}
492
493	fio_cpu_set(&cpu_mask, t->cpu);
494
495	if (fio_setaffinity(gettid(), cpu_mask) == -1) {
496		int __err = errno;
497
498		log_err("clock setaffinity failed: %s\n", strerror(__err));
499		goto err;
500	}
501
502	pthread_mutex_lock(&t->lock);
503	pthread_mutex_unlock(&t->started);
504
505	last_seq = 0;
506	c = &t->entries[0];
507	for (i = 0; i < t->nr_entries; i++, c++) {
508		uint32_t seq;
509		uint64_t tsc;
510
511		c->cpu = t->cpu;
512		do {
513			seq = atomic32_inc_return(t->seq);
514			if (seq < last_seq)
515				break;
516			tsc = get_cpu_clock();
517		} while (seq != *t->seq);
518
519		c->seq = seq;
520		c->tsc = tsc;
521	}
522
523	if (t->debug) {
524		unsigned long long clocks;
525
526		clocks = t->entries[i - 1].tsc - t->entries[0].tsc;
527		log_info("cs: cpu%3d: %llu clocks seen\n", t->cpu, clocks);
528	}
529
530	/*
531	 * The most common platform clock breakage is returning zero
532	 * indefinitely. Check for that and return failure.
533	 */
534	if (!t->entries[i - 1].tsc && !t->entries[0].tsc)
535		goto err;
536
537	fio_cpuset_exit(&cpu_mask);
538	return NULL;
539err:
540	fio_cpuset_exit(&cpu_mask);
541err_out:
542	return (void *) 1;
543}
544
545static int clock_cmp(const void *p1, const void *p2)
546{
547	const struct clock_entry *c1 = p1;
548	const struct clock_entry *c2 = p2;
549
550	if (c1->seq == c2->seq)
551		log_err("cs: bug in atomic sequence!\n");
552
553	return c1->seq - c2->seq;
554}
555
556int fio_monotonic_clocktest(int debug)
557{
558	struct clock_thread *cthreads;
559	unsigned int nr_cpus = cpus_online();
560	struct clock_entry *entries;
561	unsigned long nr_entries, tentries, failed = 0;
562	struct clock_entry *prev, *this;
563	uint32_t seq = 0;
564	unsigned int i;
565
566	if (debug) {
567		log_info("cs: reliable_tsc: %s\n", tsc_reliable ? "yes" : "no");
568
569#ifdef FIO_INC_DEBUG
570		fio_debug |= 1U << FD_TIME;
571#endif
572		nr_entries = CLOCK_ENTRIES_DEBUG;
573	} else
574		nr_entries = CLOCK_ENTRIES_TEST;
575
576	calibrate_cpu_clock();
577
578	if (debug) {
579#ifdef FIO_INC_DEBUG
580		fio_debug &= ~(1U << FD_TIME);
581#endif
582	}
583
584	cthreads = malloc(nr_cpus * sizeof(struct clock_thread));
585	tentries = nr_entries * nr_cpus;
586	entries = malloc(tentries * sizeof(struct clock_entry));
587
588	if (debug)
589		log_info("cs: Testing %u CPUs\n", nr_cpus);
590
591	for (i = 0; i < nr_cpus; i++) {
592		struct clock_thread *t = &cthreads[i];
593
594		t->cpu = i;
595		t->debug = debug;
596		t->seq = &seq;
597		t->nr_entries = nr_entries;
598		t->entries = &entries[i * nr_entries];
599		pthread_mutex_init(&t->lock, NULL);
600		pthread_mutex_init(&t->started, NULL);
601		pthread_mutex_lock(&t->lock);
602		if (pthread_create(&t->thread, NULL, clock_thread_fn, t)) {
603			failed++;
604			nr_cpus = i;
605			break;
606		}
607	}
608
609	for (i = 0; i < nr_cpus; i++) {
610		struct clock_thread *t = &cthreads[i];
611
612		pthread_mutex_lock(&t->started);
613	}
614
615	for (i = 0; i < nr_cpus; i++) {
616		struct clock_thread *t = &cthreads[i];
617
618		pthread_mutex_unlock(&t->lock);
619	}
620
621	for (i = 0; i < nr_cpus; i++) {
622		struct clock_thread *t = &cthreads[i];
623		void *ret;
624
625		pthread_join(t->thread, &ret);
626		if (ret)
627			failed++;
628	}
629	free(cthreads);
630
631	if (failed) {
632		if (debug)
633			log_err("Clocksource test: %lu threads failed\n", failed);
634		goto err;
635	}
636
637	qsort(entries, tentries, sizeof(struct clock_entry), clock_cmp);
638
639	for (failed = i = 0; i < tentries; i++) {
640		this = &entries[i];
641
642		if (!i) {
643			prev = this;
644			continue;
645		}
646
647		if (prev->tsc > this->tsc) {
648			uint64_t diff = prev->tsc - this->tsc;
649
650			if (!debug) {
651				failed++;
652				break;
653			}
654
655			log_info("cs: CPU clock mismatch (diff=%llu):\n",
656						(unsigned long long) diff);
657			log_info("\t CPU%3u: TSC=%llu, SEQ=%u\n", prev->cpu, (unsigned long long) prev->tsc, prev->seq);
658			log_info("\t CPU%3u: TSC=%llu, SEQ=%u\n", this->cpu, (unsigned long long) this->tsc, this->seq);
659			failed++;
660		}
661
662		prev = this;
663	}
664
665	if (debug) {
666		if (failed)
667			log_info("cs: Failed: %lu\n", failed);
668		else
669			log_info("cs: Pass!\n");
670	}
671err:
672	free(entries);
673	return !!failed;
674}
675
676#else /* defined(FIO_HAVE_CPU_AFFINITY) && defined(ARCH_HAVE_CPU_CLOCK) */
677
678int fio_monotonic_clocktest(int debug)
679{
680	if (debug)
681		log_info("cs: current platform does not support CPU clocks\n");
682	return 1;
683}
684
685#endif
686