1#include <linux/cpufreq.h>
2#include <linux/export.h>
3#include <linux/sched.h>
4#include <linux/tsacct_kern.h>
5#include <linux/kernel_stat.h>
6#include <linux/static_key.h>
7#include <linux/context_tracking.h>
8#include "sched.h"
9
10
11#ifdef CONFIG_IRQ_TIME_ACCOUNTING
12
13/*
14 * There are no locks covering percpu hardirq/softirq time.
15 * They are only modified in vtime_account, on corresponding CPU
16 * with interrupts disabled. So, writes are safe.
17 * They are read and saved off onto struct rq in update_rq_clock().
18 * This may result in other CPU reading this CPU's irq time and can
19 * race with irq/vtime_account on this CPU. We would either get old
20 * or new value with a side effect of accounting a slice of irq time to wrong
21 * task when irq is in progress while we read rq->clock. That is a worthy
22 * compromise in place of having locks on each irq in account_system_time.
23 */
24DEFINE_PER_CPU(u64, cpu_hardirq_time);
25DEFINE_PER_CPU(u64, cpu_softirq_time);
26
27static DEFINE_PER_CPU(u64, irq_start_time);
28static int sched_clock_irqtime;
29
30void enable_sched_clock_irqtime(void)
31{
32	sched_clock_irqtime = 1;
33}
34
35void disable_sched_clock_irqtime(void)
36{
37	sched_clock_irqtime = 0;
38}
39
40#ifndef CONFIG_64BIT
41DEFINE_PER_CPU(seqcount_t, irq_time_seq);
42#endif /* CONFIG_64BIT */
43
44/*
45 * Called before incrementing preempt_count on {soft,}irq_enter
46 * and before decrementing preempt_count on {soft,}irq_exit.
47 */
48void irqtime_account_irq(struct task_struct *curr)
49{
50	unsigned long flags;
51	s64 delta;
52	int cpu;
53
54	if (!sched_clock_irqtime)
55		return;
56
57	local_irq_save(flags);
58
59	cpu = smp_processor_id();
60	delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time);
61	__this_cpu_add(irq_start_time, delta);
62
63	irq_time_write_begin();
64	/*
65	 * We do not account for softirq time from ksoftirqd here.
66	 * We want to continue accounting softirq time to ksoftirqd thread
67	 * in that case, so as not to confuse scheduler with a special task
68	 * that do not consume any time, but still wants to run.
69	 */
70	if (hardirq_count())
71		__this_cpu_add(cpu_hardirq_time, delta);
72	else if (in_serving_softirq() && curr != this_cpu_ksoftirqd())
73		__this_cpu_add(cpu_softirq_time, delta);
74
75	irq_time_write_end();
76	local_irq_restore(flags);
77}
78EXPORT_SYMBOL_GPL(irqtime_account_irq);
79
80static int irqtime_account_hi_update(void)
81{
82	u64 *cpustat = kcpustat_this_cpu->cpustat;
83	unsigned long flags;
84	u64 latest_ns;
85	int ret = 0;
86
87	local_irq_save(flags);
88	latest_ns = this_cpu_read(cpu_hardirq_time);
89	if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ])
90		ret = 1;
91	local_irq_restore(flags);
92	return ret;
93}
94
95static int irqtime_account_si_update(void)
96{
97	u64 *cpustat = kcpustat_this_cpu->cpustat;
98	unsigned long flags;
99	u64 latest_ns;
100	int ret = 0;
101
102	local_irq_save(flags);
103	latest_ns = this_cpu_read(cpu_softirq_time);
104	if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ])
105		ret = 1;
106	local_irq_restore(flags);
107	return ret;
108}
109
110#else /* CONFIG_IRQ_TIME_ACCOUNTING */
111
112#define sched_clock_irqtime	(0)
113
114#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */
115
116static inline void task_group_account_field(struct task_struct *p, int index,
117					    u64 tmp)
118{
119	/*
120	 * Since all updates are sure to touch the root cgroup, we
121	 * get ourselves ahead and touch it first. If the root cgroup
122	 * is the only cgroup, then nothing else should be necessary.
123	 *
124	 */
125	__this_cpu_add(kernel_cpustat.cpustat[index], tmp);
126
127	cpuacct_account_field(p, index, tmp);
128}
129
130/*
131 * Account user cpu time to a process.
132 * @p: the process that the cpu time gets accounted to
133 * @cputime: the cpu time spent in user space since the last update
134 * @cputime_scaled: cputime scaled by cpu frequency
135 */
136void account_user_time(struct task_struct *p, cputime_t cputime,
137		       cputime_t cputime_scaled)
138{
139	int index;
140
141	/* Add user time to process. */
142	p->utime += cputime;
143	p->utimescaled += cputime_scaled;
144	account_group_user_time(p, cputime);
145
146	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
147
148	/* Add user time to cpustat. */
149	task_group_account_field(p, index, (__force u64) cputime);
150
151	/* Account for user time used */
152	acct_account_cputime(p);
153
154#ifdef CONFIG_CPU_FREQ_STAT
155	/* Account power usage for user time */
156	acct_update_power(p, cputime);
157#endif
158}
159
160/*
161 * Account guest cpu time to a process.
162 * @p: the process that the cpu time gets accounted to
163 * @cputime: the cpu time spent in virtual machine since the last update
164 * @cputime_scaled: cputime scaled by cpu frequency
165 */
166static void account_guest_time(struct task_struct *p, cputime_t cputime,
167			       cputime_t cputime_scaled)
168{
169	u64 *cpustat = kcpustat_this_cpu->cpustat;
170
171	/* Add guest time to process. */
172	p->utime += cputime;
173	p->utimescaled += cputime_scaled;
174	account_group_user_time(p, cputime);
175	p->gtime += cputime;
176
177	/* Add guest time to cpustat. */
178	if (task_nice(p) > 0) {
179		cpustat[CPUTIME_NICE] += (__force u64) cputime;
180		cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime;
181	} else {
182		cpustat[CPUTIME_USER] += (__force u64) cputime;
183		cpustat[CPUTIME_GUEST] += (__force u64) cputime;
184	}
185}
186
187/*
188 * Account system cpu time to a process and desired cpustat field
189 * @p: the process that the cpu time gets accounted to
190 * @cputime: the cpu time spent in kernel space since the last update
191 * @cputime_scaled: cputime scaled by cpu frequency
192 * @target_cputime64: pointer to cpustat field that has to be updated
193 */
194static inline
195void __account_system_time(struct task_struct *p, cputime_t cputime,
196			cputime_t cputime_scaled, int index)
197{
198	/* Add system time to process. */
199	p->stime += cputime;
200	p->stimescaled += cputime_scaled;
201	account_group_system_time(p, cputime);
202
203	/* Add system time to cpustat. */
204	task_group_account_field(p, index, (__force u64) cputime);
205
206	/* Account for system time used */
207	acct_account_cputime(p);
208
209#ifdef CONFIG_CPU_FREQ_STAT
210	/* Account power usage for system time */
211	acct_update_power(p, cputime);
212#endif
213}
214
215/*
216 * Account system cpu time to a process.
217 * @p: the process that the cpu time gets accounted to
218 * @hardirq_offset: the offset to subtract from hardirq_count()
219 * @cputime: the cpu time spent in kernel space since the last update
220 * @cputime_scaled: cputime scaled by cpu frequency
221 */
222void account_system_time(struct task_struct *p, int hardirq_offset,
223			 cputime_t cputime, cputime_t cputime_scaled)
224{
225	int index;
226
227	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
228		account_guest_time(p, cputime, cputime_scaled);
229		return;
230	}
231
232	if (hardirq_count() - hardirq_offset)
233		index = CPUTIME_IRQ;
234	else if (in_serving_softirq())
235		index = CPUTIME_SOFTIRQ;
236	else
237		index = CPUTIME_SYSTEM;
238
239	__account_system_time(p, cputime, cputime_scaled, index);
240}
241
242/*
243 * Account for involuntary wait time.
244 * @cputime: the cpu time spent in involuntary wait
245 */
246void account_steal_time(cputime_t cputime)
247{
248	u64 *cpustat = kcpustat_this_cpu->cpustat;
249
250	cpustat[CPUTIME_STEAL] += (__force u64) cputime;
251}
252
253/*
254 * Account for idle time.
255 * @cputime: the cpu time spent in idle wait
256 */
257void account_idle_time(cputime_t cputime)
258{
259	u64 *cpustat = kcpustat_this_cpu->cpustat;
260	struct rq *rq = this_rq();
261
262	if (atomic_read(&rq->nr_iowait) > 0)
263		cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
264	else
265		cpustat[CPUTIME_IDLE] += (__force u64) cputime;
266}
267
268static __always_inline bool steal_account_process_tick(void)
269{
270#ifdef CONFIG_PARAVIRT
271	if (static_key_false(&paravirt_steal_enabled)) {
272		u64 steal;
273		cputime_t steal_ct;
274
275		steal = paravirt_steal_clock(smp_processor_id());
276		steal -= this_rq()->prev_steal_time;
277
278		/*
279		 * cputime_t may be less precise than nsecs (eg: if it's
280		 * based on jiffies). Lets cast the result to cputime
281		 * granularity and account the rest on the next rounds.
282		 */
283		steal_ct = nsecs_to_cputime(steal);
284		this_rq()->prev_steal_time += cputime_to_nsecs(steal_ct);
285
286		account_steal_time(steal_ct);
287		return steal_ct;
288	}
289#endif
290	return false;
291}
292
293/*
294 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live
295 * tasks (sum on group iteration) belonging to @tsk's group.
296 */
297void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
298{
299	struct signal_struct *sig = tsk->signal;
300	cputime_t utime, stime;
301	struct task_struct *t;
302	unsigned int seq, nextseq;
303	unsigned long flags;
304
305	rcu_read_lock();
306	/* Attempt a lockless read on the first round. */
307	nextseq = 0;
308	do {
309		seq = nextseq;
310		flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq);
311		times->utime = sig->utime;
312		times->stime = sig->stime;
313		times->sum_exec_runtime = sig->sum_sched_runtime;
314
315		for_each_thread(tsk, t) {
316			task_cputime(t, &utime, &stime);
317			times->utime += utime;
318			times->stime += stime;
319			times->sum_exec_runtime += task_sched_runtime(t);
320		}
321		/* If lockless access failed, take the lock. */
322		nextseq = 1;
323	} while (need_seqretry(&sig->stats_lock, seq));
324	done_seqretry_irqrestore(&sig->stats_lock, seq, flags);
325	rcu_read_unlock();
326}
327
328#ifdef CONFIG_IRQ_TIME_ACCOUNTING
329/*
330 * Account a tick to a process and cpustat
331 * @p: the process that the cpu time gets accounted to
332 * @user_tick: is the tick from userspace
333 * @rq: the pointer to rq
334 *
335 * Tick demultiplexing follows the order
336 * - pending hardirq update
337 * - pending softirq update
338 * - user_time
339 * - idle_time
340 * - system time
341 *   - check for guest_time
342 *   - else account as system_time
343 *
344 * Check for hardirq is done both for system and user time as there is
345 * no timer going off while we are on hardirq and hence we may never get an
346 * opportunity to update it solely in system time.
347 * p->stime and friends are only updated on system time and not on irq
348 * softirq as those do not count in task exec_runtime any more.
349 */
350static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
351					 struct rq *rq, int ticks)
352{
353	cputime_t scaled = cputime_to_scaled(cputime_one_jiffy);
354	u64 cputime = (__force u64) cputime_one_jiffy;
355	u64 *cpustat = kcpustat_this_cpu->cpustat;
356
357	if (steal_account_process_tick())
358		return;
359
360	cputime *= ticks;
361	scaled *= ticks;
362
363	if (irqtime_account_hi_update()) {
364		cpustat[CPUTIME_IRQ] += cputime;
365	} else if (irqtime_account_si_update()) {
366		cpustat[CPUTIME_SOFTIRQ] += cputime;
367	} else if (this_cpu_ksoftirqd() == p) {
368		/*
369		 * ksoftirqd time do not get accounted in cpu_softirq_time.
370		 * So, we have to handle it separately here.
371		 * Also, p->stime needs to be updated for ksoftirqd.
372		 */
373		__account_system_time(p, cputime, scaled, CPUTIME_SOFTIRQ);
374	} else if (user_tick) {
375		account_user_time(p, cputime, scaled);
376	} else if (p == rq->idle) {
377		account_idle_time(cputime);
378	} else if (p->flags & PF_VCPU) { /* System time or guest time */
379		account_guest_time(p, cputime, scaled);
380	} else {
381		__account_system_time(p, cputime, scaled,	CPUTIME_SYSTEM);
382	}
383}
384
385static void irqtime_account_idle_ticks(int ticks)
386{
387	struct rq *rq = this_rq();
388
389	irqtime_account_process_tick(current, 0, rq, ticks);
390}
391#else /* CONFIG_IRQ_TIME_ACCOUNTING */
392static inline void irqtime_account_idle_ticks(int ticks) {}
393static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick,
394						struct rq *rq, int nr_ticks) {}
395#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
396
397/*
398 * Use precise platform statistics if available:
399 */
400#ifdef CONFIG_VIRT_CPU_ACCOUNTING
401
402#ifndef __ARCH_HAS_VTIME_TASK_SWITCH
403void vtime_common_task_switch(struct task_struct *prev)
404{
405	if (is_idle_task(prev))
406		vtime_account_idle(prev);
407	else
408		vtime_account_system(prev);
409
410#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
411	vtime_account_user(prev);
412#endif
413	arch_vtime_task_switch(prev);
414}
415#endif
416
417/*
418 * Archs that account the whole time spent in the idle task
419 * (outside irq) as idle time can rely on this and just implement
420 * vtime_account_system() and vtime_account_idle(). Archs that
421 * have other meaning of the idle time (s390 only includes the
422 * time spent by the CPU when it's in low power mode) must override
423 * vtime_account().
424 */
425#ifndef __ARCH_HAS_VTIME_ACCOUNT
426void vtime_common_account_irq_enter(struct task_struct *tsk)
427{
428	if (!in_interrupt()) {
429		/*
430		 * If we interrupted user, context_tracking_in_user()
431		 * is 1 because the context tracking don't hook
432		 * on irq entry/exit. This way we know if
433		 * we need to flush user time on kernel entry.
434		 */
435		if (context_tracking_in_user()) {
436			vtime_account_user(tsk);
437			return;
438		}
439
440		if (is_idle_task(tsk)) {
441			vtime_account_idle(tsk);
442			return;
443		}
444	}
445	vtime_account_system(tsk);
446}
447EXPORT_SYMBOL_GPL(vtime_common_account_irq_enter);
448#endif /* __ARCH_HAS_VTIME_ACCOUNT */
449#endif /* CONFIG_VIRT_CPU_ACCOUNTING */
450
451
452#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
453void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
454{
455	*ut = p->utime;
456	*st = p->stime;
457}
458
459void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
460{
461	struct task_cputime cputime;
462
463	thread_group_cputime(p, &cputime);
464
465	*ut = cputime.utime;
466	*st = cputime.stime;
467}
468#else /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
469/*
470 * Account a single tick of cpu time.
471 * @p: the process that the cpu time gets accounted to
472 * @user_tick: indicates if the tick is a user or a system tick
473 */
474void account_process_tick(struct task_struct *p, int user_tick)
475{
476	cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy);
477	struct rq *rq = this_rq();
478
479	if (vtime_accounting_enabled())
480		return;
481
482	if (sched_clock_irqtime) {
483		irqtime_account_process_tick(p, user_tick, rq, 1);
484		return;
485	}
486
487	if (steal_account_process_tick())
488		return;
489
490	if (user_tick)
491		account_user_time(p, cputime_one_jiffy, one_jiffy_scaled);
492	else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET))
493		account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy,
494				    one_jiffy_scaled);
495	else
496		account_idle_time(cputime_one_jiffy);
497}
498
499/*
500 * Account multiple ticks of steal time.
501 * @p: the process from which the cpu time has been stolen
502 * @ticks: number of stolen ticks
503 */
504void account_steal_ticks(unsigned long ticks)
505{
506	account_steal_time(jiffies_to_cputime(ticks));
507}
508
509/*
510 * Account multiple ticks of idle time.
511 * @ticks: number of stolen ticks
512 */
513void account_idle_ticks(unsigned long ticks)
514{
515
516	if (sched_clock_irqtime) {
517		irqtime_account_idle_ticks(ticks);
518		return;
519	}
520
521	account_idle_time(jiffies_to_cputime(ticks));
522}
523
524/*
525 * Perform (stime * rtime) / total, but avoid multiplication overflow by
526 * loosing precision when the numbers are big.
527 */
528static cputime_t scale_stime(u64 stime, u64 rtime, u64 total)
529{
530	u64 scaled;
531
532	for (;;) {
533		/* Make sure "rtime" is the bigger of stime/rtime */
534		if (stime > rtime)
535			swap(rtime, stime);
536
537		/* Make sure 'total' fits in 32 bits */
538		if (total >> 32)
539			goto drop_precision;
540
541		/* Does rtime (and thus stime) fit in 32 bits? */
542		if (!(rtime >> 32))
543			break;
544
545		/* Can we just balance rtime/stime rather than dropping bits? */
546		if (stime >> 31)
547			goto drop_precision;
548
549		/* We can grow stime and shrink rtime and try to make them both fit */
550		stime <<= 1;
551		rtime >>= 1;
552		continue;
553
554drop_precision:
555		/* We drop from rtime, it has more bits than stime */
556		rtime >>= 1;
557		total >>= 1;
558	}
559
560	/*
561	 * Make sure gcc understands that this is a 32x32->64 multiply,
562	 * followed by a 64/32->64 divide.
563	 */
564	scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total);
565	return (__force cputime_t) scaled;
566}
567
568/*
569 * Atomically advance counter to the new value. Interrupts, vcpu
570 * scheduling, and scaling inaccuracies can cause cputime_advance
571 * to be occasionally called with a new value smaller than counter.
572 * Let's enforce atomicity.
573 *
574 * Normally a caller will only go through this loop once, or not
575 * at all in case a previous caller updated counter the same jiffy.
576 */
577static void cputime_advance(cputime_t *counter, cputime_t new)
578{
579	cputime_t old;
580
581	while (new > (old = ACCESS_ONCE(*counter)))
582		cmpxchg_cputime(counter, old, new);
583}
584
585/*
586 * Adjust tick based cputime random precision against scheduler
587 * runtime accounting.
588 */
589static void cputime_adjust(struct task_cputime *curr,
590			   struct cputime *prev,
591			   cputime_t *ut, cputime_t *st)
592{
593	cputime_t rtime, stime, utime;
594
595	/*
596	 * Tick based cputime accounting depend on random scheduling
597	 * timeslices of a task to be interrupted or not by the timer.
598	 * Depending on these circumstances, the number of these interrupts
599	 * may be over or under-optimistic, matching the real user and system
600	 * cputime with a variable precision.
601	 *
602	 * Fix this by scaling these tick based values against the total
603	 * runtime accounted by the CFS scheduler.
604	 */
605	rtime = nsecs_to_cputime(curr->sum_exec_runtime);
606
607	/*
608	 * Update userspace visible utime/stime values only if actual execution
609	 * time is bigger than already exported. Note that can happen, that we
610	 * provided bigger values due to scaling inaccuracy on big numbers.
611	 */
612	if (prev->stime + prev->utime >= rtime)
613		goto out;
614
615	stime = curr->stime;
616	utime = curr->utime;
617
618	if (utime == 0) {
619		stime = rtime;
620	} else if (stime == 0) {
621		utime = rtime;
622	} else {
623		cputime_t total = stime + utime;
624
625		stime = scale_stime((__force u64)stime,
626				    (__force u64)rtime, (__force u64)total);
627		utime = rtime - stime;
628	}
629
630	cputime_advance(&prev->stime, stime);
631	cputime_advance(&prev->utime, utime);
632
633out:
634	*ut = prev->utime;
635	*st = prev->stime;
636}
637
638void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
639{
640	struct task_cputime cputime = {
641		.sum_exec_runtime = p->se.sum_exec_runtime,
642	};
643
644	task_cputime(p, &cputime.utime, &cputime.stime);
645	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
646}
647
648void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
649{
650	struct task_cputime cputime;
651
652	thread_group_cputime(p, &cputime);
653	cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st);
654}
655#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
656
657#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
658static unsigned long long vtime_delta(struct task_struct *tsk)
659{
660	unsigned long long clock;
661
662	clock = local_clock();
663	if (clock < tsk->vtime_snap)
664		return 0;
665
666	return clock - tsk->vtime_snap;
667}
668
669static cputime_t get_vtime_delta(struct task_struct *tsk)
670{
671	unsigned long long delta = vtime_delta(tsk);
672
673	WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING);
674	tsk->vtime_snap += delta;
675
676	/* CHECKME: always safe to convert nsecs to cputime? */
677	return nsecs_to_cputime(delta);
678}
679
680static void __vtime_account_system(struct task_struct *tsk)
681{
682	cputime_t delta_cpu = get_vtime_delta(tsk);
683
684	account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu));
685}
686
687void vtime_account_system(struct task_struct *tsk)
688{
689	write_seqlock(&tsk->vtime_seqlock);
690	__vtime_account_system(tsk);
691	write_sequnlock(&tsk->vtime_seqlock);
692}
693
694void vtime_gen_account_irq_exit(struct task_struct *tsk)
695{
696	write_seqlock(&tsk->vtime_seqlock);
697	__vtime_account_system(tsk);
698	if (context_tracking_in_user())
699		tsk->vtime_snap_whence = VTIME_USER;
700	write_sequnlock(&tsk->vtime_seqlock);
701}
702
703void vtime_account_user(struct task_struct *tsk)
704{
705	cputime_t delta_cpu;
706
707	write_seqlock(&tsk->vtime_seqlock);
708	delta_cpu = get_vtime_delta(tsk);
709	tsk->vtime_snap_whence = VTIME_SYS;
710	account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu));
711	write_sequnlock(&tsk->vtime_seqlock);
712}
713
714void vtime_user_enter(struct task_struct *tsk)
715{
716	write_seqlock(&tsk->vtime_seqlock);
717	__vtime_account_system(tsk);
718	tsk->vtime_snap_whence = VTIME_USER;
719	write_sequnlock(&tsk->vtime_seqlock);
720}
721
722void vtime_guest_enter(struct task_struct *tsk)
723{
724	/*
725	 * The flags must be updated under the lock with
726	 * the vtime_snap flush and update.
727	 * That enforces a right ordering and update sequence
728	 * synchronization against the reader (task_gtime())
729	 * that can thus safely catch up with a tickless delta.
730	 */
731	write_seqlock(&tsk->vtime_seqlock);
732	__vtime_account_system(tsk);
733	current->flags |= PF_VCPU;
734	write_sequnlock(&tsk->vtime_seqlock);
735}
736EXPORT_SYMBOL_GPL(vtime_guest_enter);
737
738void vtime_guest_exit(struct task_struct *tsk)
739{
740	write_seqlock(&tsk->vtime_seqlock);
741	__vtime_account_system(tsk);
742	current->flags &= ~PF_VCPU;
743	write_sequnlock(&tsk->vtime_seqlock);
744}
745EXPORT_SYMBOL_GPL(vtime_guest_exit);
746
747void vtime_account_idle(struct task_struct *tsk)
748{
749	cputime_t delta_cpu = get_vtime_delta(tsk);
750
751	account_idle_time(delta_cpu);
752}
753
754void arch_vtime_task_switch(struct task_struct *prev)
755{
756	write_seqlock(&prev->vtime_seqlock);
757	prev->vtime_snap_whence = VTIME_SLEEPING;
758	write_sequnlock(&prev->vtime_seqlock);
759
760	write_seqlock(&current->vtime_seqlock);
761	current->vtime_snap_whence = VTIME_SYS;
762	current->vtime_snap = sched_clock_cpu(smp_processor_id());
763	write_sequnlock(&current->vtime_seqlock);
764}
765
766void vtime_init_idle(struct task_struct *t, int cpu)
767{
768	unsigned long flags;
769
770	write_seqlock_irqsave(&t->vtime_seqlock, flags);
771	t->vtime_snap_whence = VTIME_SYS;
772	t->vtime_snap = sched_clock_cpu(cpu);
773	write_sequnlock_irqrestore(&t->vtime_seqlock, flags);
774}
775
776cputime_t task_gtime(struct task_struct *t)
777{
778	unsigned int seq;
779	cputime_t gtime;
780
781	do {
782		seq = read_seqbegin(&t->vtime_seqlock);
783
784		gtime = t->gtime;
785		if (t->flags & PF_VCPU)
786			gtime += vtime_delta(t);
787
788	} while (read_seqretry(&t->vtime_seqlock, seq));
789
790	return gtime;
791}
792
793/*
794 * Fetch cputime raw values from fields of task_struct and
795 * add up the pending nohz execution time since the last
796 * cputime snapshot.
797 */
798static void
799fetch_task_cputime(struct task_struct *t,
800		   cputime_t *u_dst, cputime_t *s_dst,
801		   cputime_t *u_src, cputime_t *s_src,
802		   cputime_t *udelta, cputime_t *sdelta)
803{
804	unsigned int seq;
805	unsigned long long delta;
806
807	do {
808		*udelta = 0;
809		*sdelta = 0;
810
811		seq = read_seqbegin(&t->vtime_seqlock);
812
813		if (u_dst)
814			*u_dst = *u_src;
815		if (s_dst)
816			*s_dst = *s_src;
817
818		/* Task is sleeping, nothing to add */
819		if (t->vtime_snap_whence == VTIME_SLEEPING ||
820		    is_idle_task(t))
821			continue;
822
823		delta = vtime_delta(t);
824
825		/*
826		 * Task runs either in user or kernel space, add pending nohz time to
827		 * the right place.
828		 */
829		if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) {
830			*udelta = delta;
831		} else {
832			if (t->vtime_snap_whence == VTIME_SYS)
833				*sdelta = delta;
834		}
835	} while (read_seqretry(&t->vtime_seqlock, seq));
836}
837
838
839void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime)
840{
841	cputime_t udelta, sdelta;
842
843	fetch_task_cputime(t, utime, stime, &t->utime,
844			   &t->stime, &udelta, &sdelta);
845	if (utime)
846		*utime += udelta;
847	if (stime)
848		*stime += sdelta;
849}
850
851void task_cputime_scaled(struct task_struct *t,
852			 cputime_t *utimescaled, cputime_t *stimescaled)
853{
854	cputime_t udelta, sdelta;
855
856	fetch_task_cputime(t, utimescaled, stimescaled,
857			   &t->utimescaled, &t->stimescaled, &udelta, &sdelta);
858	if (utimescaled)
859		*utimescaled += cputime_to_scaled(udelta);
860	if (stimescaled)
861		*stimescaled += cputime_to_scaled(sdelta);
862}
863#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */
864