1/*
2 * kernel/sched/debug.c
3 *
4 * Print the CFS rbtree
5 *
6 * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
11 */
12
13#include <linux/proc_fs.h>
14#include <linux/sched.h>
15#include <linux/seq_file.h>
16#include <linux/kallsyms.h>
17#include <linux/utsname.h>
18#include <linux/mempolicy.h>
19
20#include "sched.h"
21
22static DEFINE_SPINLOCK(sched_debug_lock);
23
24/*
25 * This allows printing both to /proc/sched_debug and
26 * to the console
27 */
28#define SEQ_printf(m, x...)			\
29 do {						\
30	if (m)					\
31		seq_printf(m, x);		\
32	else					\
33		printk(x);			\
34 } while (0)
35
36/*
37 * Ease the printing of nsec fields:
38 */
39static long long nsec_high(unsigned long long nsec)
40{
41	if ((long long)nsec < 0) {
42		nsec = -nsec;
43		do_div(nsec, 1000000);
44		return -nsec;
45	}
46	do_div(nsec, 1000000);
47
48	return nsec;
49}
50
51static unsigned long nsec_low(unsigned long long nsec)
52{
53	if ((long long)nsec < 0)
54		nsec = -nsec;
55
56	return do_div(nsec, 1000000);
57}
58
59#define SPLIT_NS(x) nsec_high(x), nsec_low(x)
60
61#ifdef CONFIG_FAIR_GROUP_SCHED
62static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group *tg)
63{
64	struct sched_entity *se = tg->se[cpu];
65
66#define P(F) \
67	SEQ_printf(m, "  .%-30s: %lld\n", #F, (long long)F)
68#define PN(F) \
69	SEQ_printf(m, "  .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
70
71	if (!se) {
72		struct sched_avg *avg = &cpu_rq(cpu)->avg;
73		P(avg->runnable_avg_sum);
74		P(avg->runnable_avg_period);
75		return;
76	}
77
78
79	PN(se->exec_start);
80	PN(se->vruntime);
81	PN(se->sum_exec_runtime);
82#ifdef CONFIG_SCHEDSTATS
83	PN(se->statistics.wait_start);
84	PN(se->statistics.sleep_start);
85	PN(se->statistics.block_start);
86	PN(se->statistics.sleep_max);
87	PN(se->statistics.block_max);
88	PN(se->statistics.exec_max);
89	PN(se->statistics.slice_max);
90	PN(se->statistics.wait_max);
91	PN(se->statistics.wait_sum);
92	P(se->statistics.wait_count);
93#endif
94	P(se->load.weight);
95#ifdef CONFIG_SMP
96	P(se->avg.runnable_avg_sum);
97	P(se->avg.runnable_avg_period);
98	P(se->avg.load_avg_contrib);
99	P(se->avg.decay_count);
100#endif
101#undef PN
102#undef P
103}
104#endif
105
106#ifdef CONFIG_CGROUP_SCHED
107static char group_path[PATH_MAX];
108
109static char *task_group_path(struct task_group *tg)
110{
111	if (autogroup_path(tg, group_path, PATH_MAX))
112		return group_path;
113
114	return cgroup_path(tg->css.cgroup, group_path, PATH_MAX);
115}
116#endif
117
118static void
119print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
120{
121	if (rq->curr == p)
122		SEQ_printf(m, "R");
123	else
124		SEQ_printf(m, " ");
125
126	SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
127		p->comm, task_pid_nr(p),
128		SPLIT_NS(p->se.vruntime),
129		(long long)(p->nvcsw + p->nivcsw),
130		p->prio);
131#ifdef CONFIG_SCHEDSTATS
132	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
133		SPLIT_NS(p->se.vruntime),
134		SPLIT_NS(p->se.sum_exec_runtime),
135		SPLIT_NS(p->se.statistics.sum_sleep_runtime));
136#else
137	SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
138		0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
139#endif
140#ifdef CONFIG_NUMA_BALANCING
141	SEQ_printf(m, " %d", task_node(p));
142#endif
143#ifdef CONFIG_CGROUP_SCHED
144	SEQ_printf(m, " %s", task_group_path(task_group(p)));
145#endif
146
147	SEQ_printf(m, "\n");
148}
149
150static void print_rq(struct seq_file *m, struct rq *rq, int rq_cpu)
151{
152	struct task_struct *g, *p;
153
154	SEQ_printf(m,
155	"\nrunnable tasks:\n"
156	"            task   PID         tree-key  switches  prio"
157	"     exec-runtime         sum-exec        sum-sleep\n"
158	"------------------------------------------------------"
159	"----------------------------------------------------\n");
160
161	rcu_read_lock();
162	for_each_process_thread(g, p) {
163		if (task_cpu(p) != rq_cpu)
164			continue;
165
166		print_task(m, rq, p);
167	}
168	rcu_read_unlock();
169}
170
171void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
172{
173	s64 MIN_vruntime = -1, min_vruntime, max_vruntime = -1,
174		spread, rq0_min_vruntime, spread0;
175	struct rq *rq = cpu_rq(cpu);
176	struct sched_entity *last;
177	unsigned long flags;
178
179#ifdef CONFIG_FAIR_GROUP_SCHED
180	SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg));
181#else
182	SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu);
183#endif
184	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "exec_clock",
185			SPLIT_NS(cfs_rq->exec_clock));
186
187	raw_spin_lock_irqsave(&rq->lock, flags);
188	if (cfs_rq->rb_leftmost)
189		MIN_vruntime = (__pick_first_entity(cfs_rq))->vruntime;
190	last = __pick_last_entity(cfs_rq);
191	if (last)
192		max_vruntime = last->vruntime;
193	min_vruntime = cfs_rq->min_vruntime;
194	rq0_min_vruntime = cpu_rq(0)->cfs.min_vruntime;
195	raw_spin_unlock_irqrestore(&rq->lock, flags);
196	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "MIN_vruntime",
197			SPLIT_NS(MIN_vruntime));
198	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "min_vruntime",
199			SPLIT_NS(min_vruntime));
200	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "max_vruntime",
201			SPLIT_NS(max_vruntime));
202	spread = max_vruntime - MIN_vruntime;
203	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread",
204			SPLIT_NS(spread));
205	spread0 = min_vruntime - rq0_min_vruntime;
206	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", "spread0",
207			SPLIT_NS(spread0));
208	SEQ_printf(m, "  .%-30s: %d\n", "nr_spread_over",
209			cfs_rq->nr_spread_over);
210	SEQ_printf(m, "  .%-30s: %d\n", "nr_running", cfs_rq->nr_running);
211	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
212#ifdef CONFIG_SMP
213	SEQ_printf(m, "  .%-30s: %ld\n", "runnable_load_avg",
214			cfs_rq->runnable_load_avg);
215	SEQ_printf(m, "  .%-30s: %ld\n", "blocked_load_avg",
216			cfs_rq->blocked_load_avg);
217#ifdef CONFIG_FAIR_GROUP_SCHED
218	SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_contrib",
219			cfs_rq->tg_load_contrib);
220	SEQ_printf(m, "  .%-30s: %d\n", "tg_runnable_contrib",
221			cfs_rq->tg_runnable_contrib);
222	SEQ_printf(m, "  .%-30s: %ld\n", "tg_load_avg",
223			atomic_long_read(&cfs_rq->tg->load_avg));
224	SEQ_printf(m, "  .%-30s: %d\n", "tg->runnable_avg",
225			atomic_read(&cfs_rq->tg->runnable_avg));
226#endif
227#endif
228#ifdef CONFIG_CFS_BANDWIDTH
229	SEQ_printf(m, "  .%-30s: %d\n", "tg->cfs_bandwidth.timer_active",
230			cfs_rq->tg->cfs_bandwidth.timer_active);
231	SEQ_printf(m, "  .%-30s: %d\n", "throttled",
232			cfs_rq->throttled);
233	SEQ_printf(m, "  .%-30s: %d\n", "throttle_count",
234			cfs_rq->throttle_count);
235#endif
236
237#ifdef CONFIG_FAIR_GROUP_SCHED
238	print_cfs_group_stats(m, cpu, cfs_rq->tg);
239#endif
240}
241
242void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq)
243{
244#ifdef CONFIG_RT_GROUP_SCHED
245	SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg));
246#else
247	SEQ_printf(m, "\nrt_rq[%d]:\n", cpu);
248#endif
249
250#define P(x) \
251	SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
252#define PN(x) \
253	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
254
255	P(rt_nr_running);
256	P(rt_throttled);
257	PN(rt_time);
258	PN(rt_runtime);
259
260#undef PN
261#undef P
262}
263
264extern __read_mostly int sched_clock_running;
265
266static void print_cpu(struct seq_file *m, int cpu)
267{
268	struct rq *rq = cpu_rq(cpu);
269	unsigned long flags;
270
271#ifdef CONFIG_X86
272	{
273		unsigned int freq = cpu_khz ? : 1;
274
275		SEQ_printf(m, "cpu#%d, %u.%03u MHz\n",
276			   cpu, freq / 1000, (freq % 1000));
277	}
278#else
279	SEQ_printf(m, "cpu#%d\n", cpu);
280#endif
281
282#define P(x)								\
283do {									\
284	if (sizeof(rq->x) == 4)						\
285		SEQ_printf(m, "  .%-30s: %ld\n", #x, (long)(rq->x));	\
286	else								\
287		SEQ_printf(m, "  .%-30s: %Ld\n", #x, (long long)(rq->x));\
288} while (0)
289
290#define PN(x) \
291	SEQ_printf(m, "  .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
292
293	P(nr_running);
294	SEQ_printf(m, "  .%-30s: %lu\n", "load",
295		   rq->load.weight);
296	P(nr_switches);
297	P(nr_load_updates);
298	P(nr_uninterruptible);
299	PN(next_balance);
300	SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
301	PN(clock);
302	P(cpu_load[0]);
303	P(cpu_load[1]);
304	P(cpu_load[2]);
305	P(cpu_load[3]);
306	P(cpu_load[4]);
307#undef P
308#undef PN
309
310#ifdef CONFIG_SCHEDSTATS
311#define P(n) SEQ_printf(m, "  .%-30s: %d\n", #n, rq->n);
312#define P64(n) SEQ_printf(m, "  .%-30s: %Ld\n", #n, rq->n);
313
314	P(yld_count);
315
316	P(sched_count);
317	P(sched_goidle);
318#ifdef CONFIG_SMP
319	P64(avg_idle);
320	P64(max_idle_balance_cost);
321#endif
322
323	P(ttwu_count);
324	P(ttwu_local);
325
326#undef P
327#undef P64
328#endif
329	spin_lock_irqsave(&sched_debug_lock, flags);
330	print_cfs_stats(m, cpu);
331	print_rt_stats(m, cpu);
332
333	print_rq(m, rq, cpu);
334	spin_unlock_irqrestore(&sched_debug_lock, flags);
335	SEQ_printf(m, "\n");
336}
337
338static const char *sched_tunable_scaling_names[] = {
339	"none",
340	"logaritmic",
341	"linear"
342};
343
344static void sched_debug_header(struct seq_file *m)
345{
346	u64 ktime, sched_clk, cpu_clk;
347	unsigned long flags;
348
349	local_irq_save(flags);
350	ktime = ktime_to_ns(ktime_get());
351	sched_clk = sched_clock();
352	cpu_clk = local_clock();
353	local_irq_restore(flags);
354
355	SEQ_printf(m, "Sched Debug Version: v0.11, %s %.*s\n",
356		init_utsname()->release,
357		(int)strcspn(init_utsname()->version, " "),
358		init_utsname()->version);
359
360#define P(x) \
361	SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
362#define PN(x) \
363	SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
364	PN(ktime);
365	PN(sched_clk);
366	PN(cpu_clk);
367	P(jiffies);
368#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
369	P(sched_clock_stable());
370#endif
371#undef PN
372#undef P
373
374	SEQ_printf(m, "\n");
375	SEQ_printf(m, "sysctl_sched\n");
376
377#define P(x) \
378	SEQ_printf(m, "  .%-40s: %Ld\n", #x, (long long)(x))
379#define PN(x) \
380	SEQ_printf(m, "  .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
381	PN(sysctl_sched_latency);
382	PN(sysctl_sched_min_granularity);
383	PN(sysctl_sched_wakeup_granularity);
384	P(sysctl_sched_child_runs_first);
385	P(sysctl_sched_features);
386#undef PN
387#undef P
388
389	SEQ_printf(m, "  .%-40s: %d (%s)\n",
390		"sysctl_sched_tunable_scaling",
391		sysctl_sched_tunable_scaling,
392		sched_tunable_scaling_names[sysctl_sched_tunable_scaling]);
393	SEQ_printf(m, "\n");
394}
395
396static int sched_debug_show(struct seq_file *m, void *v)
397{
398	int cpu = (unsigned long)(v - 2);
399
400	if (cpu != -1)
401		print_cpu(m, cpu);
402	else
403		sched_debug_header(m);
404
405	return 0;
406}
407
408void sysrq_sched_debug_show(void)
409{
410	int cpu;
411
412	sched_debug_header(NULL);
413	for_each_online_cpu(cpu)
414		print_cpu(NULL, cpu);
415
416}
417
418/*
419 * This itererator needs some explanation.
420 * It returns 1 for the header position.
421 * This means 2 is cpu 0.
422 * In a hotplugged system some cpus, including cpu 0, may be missing so we have
423 * to use cpumask_* to iterate over the cpus.
424 */
425static void *sched_debug_start(struct seq_file *file, loff_t *offset)
426{
427	unsigned long n = *offset;
428
429	if (n == 0)
430		return (void *) 1;
431
432	n--;
433
434	if (n > 0)
435		n = cpumask_next(n - 1, cpu_online_mask);
436	else
437		n = cpumask_first(cpu_online_mask);
438
439	*offset = n + 1;
440
441	if (n < nr_cpu_ids)
442		return (void *)(unsigned long)(n + 2);
443	return NULL;
444}
445
446static void *sched_debug_next(struct seq_file *file, void *data, loff_t *offset)
447{
448	(*offset)++;
449	return sched_debug_start(file, offset);
450}
451
452static void sched_debug_stop(struct seq_file *file, void *data)
453{
454}
455
456static const struct seq_operations sched_debug_sops = {
457	.start = sched_debug_start,
458	.next = sched_debug_next,
459	.stop = sched_debug_stop,
460	.show = sched_debug_show,
461};
462
463static int sched_debug_release(struct inode *inode, struct file *file)
464{
465	seq_release(inode, file);
466
467	return 0;
468}
469
470static int sched_debug_open(struct inode *inode, struct file *filp)
471{
472	int ret = 0;
473
474	ret = seq_open(filp, &sched_debug_sops);
475
476	return ret;
477}
478
479static const struct file_operations sched_debug_fops = {
480	.open		= sched_debug_open,
481	.read		= seq_read,
482	.llseek		= seq_lseek,
483	.release	= sched_debug_release,
484};
485
486static int __init init_sched_debug_procfs(void)
487{
488	struct proc_dir_entry *pe;
489
490	pe = proc_create("sched_debug", 0444, NULL, &sched_debug_fops);
491	if (!pe)
492		return -ENOMEM;
493	return 0;
494}
495
496__initcall(init_sched_debug_procfs);
497
498#define __P(F) \
499	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
500#define P(F) \
501	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
502#define __PN(F) \
503	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
504#define PN(F) \
505	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
506
507
508static void sched_show_numa(struct task_struct *p, struct seq_file *m)
509{
510#ifdef CONFIG_NUMA_BALANCING
511	struct mempolicy *pol;
512	int node, i;
513
514	if (p->mm)
515		P(mm->numa_scan_seq);
516
517	task_lock(p);
518	pol = p->mempolicy;
519	if (pol && !(pol->flags & MPOL_F_MORON))
520		pol = NULL;
521	mpol_get(pol);
522	task_unlock(p);
523
524	SEQ_printf(m, "numa_migrations, %ld\n", xchg(&p->numa_pages_migrated, 0));
525
526	for_each_online_node(node) {
527		for (i = 0; i < 2; i++) {
528			unsigned long nr_faults = -1;
529			int cpu_current, home_node;
530
531			if (p->numa_faults_memory)
532				nr_faults = p->numa_faults_memory[2*node + i];
533
534			cpu_current = !i ? (task_node(p) == node) :
535				(pol && node_isset(node, pol->v.nodes));
536
537			home_node = (p->numa_preferred_nid == node);
538
539			SEQ_printf(m, "numa_faults_memory, %d, %d, %d, %d, %ld\n",
540				i, node, cpu_current, home_node, nr_faults);
541		}
542	}
543
544	mpol_put(pol);
545#endif
546}
547
548void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
549{
550	unsigned long nr_switches;
551
552	SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr(p),
553						get_nr_threads(p));
554	SEQ_printf(m,
555		"---------------------------------------------------------"
556		"----------\n");
557#define __P(F) \
558	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)F)
559#define P(F) \
560	SEQ_printf(m, "%-45s:%21Ld\n", #F, (long long)p->F)
561#define __PN(F) \
562	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)F))
563#define PN(F) \
564	SEQ_printf(m, "%-45s:%14Ld.%06ld\n", #F, SPLIT_NS((long long)p->F))
565
566	PN(se.exec_start);
567	PN(se.vruntime);
568	PN(se.sum_exec_runtime);
569
570	nr_switches = p->nvcsw + p->nivcsw;
571
572#ifdef CONFIG_SCHEDSTATS
573	PN(se.statistics.wait_start);
574	PN(se.statistics.sleep_start);
575	PN(se.statistics.block_start);
576	PN(se.statistics.sleep_max);
577	PN(se.statistics.block_max);
578	PN(se.statistics.exec_max);
579	PN(se.statistics.slice_max);
580	PN(se.statistics.wait_max);
581	PN(se.statistics.wait_sum);
582	P(se.statistics.wait_count);
583	PN(se.statistics.iowait_sum);
584	P(se.statistics.iowait_count);
585	P(se.nr_migrations);
586	P(se.statistics.nr_migrations_cold);
587	P(se.statistics.nr_failed_migrations_affine);
588	P(se.statistics.nr_failed_migrations_running);
589	P(se.statistics.nr_failed_migrations_hot);
590	P(se.statistics.nr_forced_migrations);
591	P(se.statistics.nr_wakeups);
592	P(se.statistics.nr_wakeups_sync);
593	P(se.statistics.nr_wakeups_migrate);
594	P(se.statistics.nr_wakeups_local);
595	P(se.statistics.nr_wakeups_remote);
596	P(se.statistics.nr_wakeups_affine);
597	P(se.statistics.nr_wakeups_affine_attempts);
598	P(se.statistics.nr_wakeups_passive);
599	P(se.statistics.nr_wakeups_idle);
600
601	{
602		u64 avg_atom, avg_per_cpu;
603
604		avg_atom = p->se.sum_exec_runtime;
605		if (nr_switches)
606			avg_atom = div64_ul(avg_atom, nr_switches);
607		else
608			avg_atom = -1LL;
609
610		avg_per_cpu = p->se.sum_exec_runtime;
611		if (p->se.nr_migrations) {
612			avg_per_cpu = div64_u64(avg_per_cpu,
613						p->se.nr_migrations);
614		} else {
615			avg_per_cpu = -1LL;
616		}
617
618		__PN(avg_atom);
619		__PN(avg_per_cpu);
620	}
621#endif
622	__P(nr_switches);
623	SEQ_printf(m, "%-45s:%21Ld\n",
624		   "nr_voluntary_switches", (long long)p->nvcsw);
625	SEQ_printf(m, "%-45s:%21Ld\n",
626		   "nr_involuntary_switches", (long long)p->nivcsw);
627
628	P(se.load.weight);
629#ifdef CONFIG_SMP
630	P(se.avg.runnable_avg_sum);
631	P(se.avg.runnable_avg_period);
632	P(se.avg.load_avg_contrib);
633	P(se.avg.decay_count);
634#endif
635	P(policy);
636	P(prio);
637#undef PN
638#undef __PN
639#undef P
640#undef __P
641
642	{
643		unsigned int this_cpu = raw_smp_processor_id();
644		u64 t0, t1;
645
646		t0 = cpu_clock(this_cpu);
647		t1 = cpu_clock(this_cpu);
648		SEQ_printf(m, "%-45s:%21Ld\n",
649			   "clock-delta", (long long)(t1-t0));
650	}
651
652	sched_show_numa(p, m);
653}
654
655void proc_sched_set_task(struct task_struct *p)
656{
657#ifdef CONFIG_SCHEDSTATS
658	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
659#endif
660}
661