cputime.c revision 6fac4829ce0ef9b7f24369086ce5f0e9f38d37bc
1#include <linux/export.h> 2#include <linux/sched.h> 3#include <linux/tsacct_kern.h> 4#include <linux/kernel_stat.h> 5#include <linux/static_key.h> 6#include <linux/context_tracking.h> 7#include "sched.h" 8 9 10#ifdef CONFIG_IRQ_TIME_ACCOUNTING 11 12/* 13 * There are no locks covering percpu hardirq/softirq time. 14 * They are only modified in vtime_account, on corresponding CPU 15 * with interrupts disabled. So, writes are safe. 16 * They are read and saved off onto struct rq in update_rq_clock(). 17 * This may result in other CPU reading this CPU's irq time and can 18 * race with irq/vtime_account on this CPU. We would either get old 19 * or new value with a side effect of accounting a slice of irq time to wrong 20 * task when irq is in progress while we read rq->clock. That is a worthy 21 * compromise in place of having locks on each irq in account_system_time. 22 */ 23DEFINE_PER_CPU(u64, cpu_hardirq_time); 24DEFINE_PER_CPU(u64, cpu_softirq_time); 25 26static DEFINE_PER_CPU(u64, irq_start_time); 27static int sched_clock_irqtime; 28 29void enable_sched_clock_irqtime(void) 30{ 31 sched_clock_irqtime = 1; 32} 33 34void disable_sched_clock_irqtime(void) 35{ 36 sched_clock_irqtime = 0; 37} 38 39#ifndef CONFIG_64BIT 40DEFINE_PER_CPU(seqcount_t, irq_time_seq); 41#endif /* CONFIG_64BIT */ 42 43/* 44 * Called before incrementing preempt_count on {soft,}irq_enter 45 * and before decrementing preempt_count on {soft,}irq_exit. 46 */ 47void irqtime_account_irq(struct task_struct *curr) 48{ 49 unsigned long flags; 50 s64 delta; 51 int cpu; 52 53 if (!sched_clock_irqtime) 54 return; 55 56 local_irq_save(flags); 57 58 cpu = smp_processor_id(); 59 delta = sched_clock_cpu(cpu) - __this_cpu_read(irq_start_time); 60 __this_cpu_add(irq_start_time, delta); 61 62 irq_time_write_begin(); 63 /* 64 * We do not account for softirq time from ksoftirqd here. 65 * We want to continue accounting softirq time to ksoftirqd thread 66 * in that case, so as not to confuse scheduler with a special task 67 * that do not consume any time, but still wants to run. 68 */ 69 if (hardirq_count()) 70 __this_cpu_add(cpu_hardirq_time, delta); 71 else if (in_serving_softirq() && curr != this_cpu_ksoftirqd()) 72 __this_cpu_add(cpu_softirq_time, delta); 73 74 irq_time_write_end(); 75 local_irq_restore(flags); 76} 77EXPORT_SYMBOL_GPL(irqtime_account_irq); 78 79static int irqtime_account_hi_update(void) 80{ 81 u64 *cpustat = kcpustat_this_cpu->cpustat; 82 unsigned long flags; 83 u64 latest_ns; 84 int ret = 0; 85 86 local_irq_save(flags); 87 latest_ns = this_cpu_read(cpu_hardirq_time); 88 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_IRQ]) 89 ret = 1; 90 local_irq_restore(flags); 91 return ret; 92} 93 94static int irqtime_account_si_update(void) 95{ 96 u64 *cpustat = kcpustat_this_cpu->cpustat; 97 unsigned long flags; 98 u64 latest_ns; 99 int ret = 0; 100 101 local_irq_save(flags); 102 latest_ns = this_cpu_read(cpu_softirq_time); 103 if (nsecs_to_cputime64(latest_ns) > cpustat[CPUTIME_SOFTIRQ]) 104 ret = 1; 105 local_irq_restore(flags); 106 return ret; 107} 108 109#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 110 111#define sched_clock_irqtime (0) 112 113#endif /* !CONFIG_IRQ_TIME_ACCOUNTING */ 114 115static inline void task_group_account_field(struct task_struct *p, int index, 116 u64 tmp) 117{ 118#ifdef CONFIG_CGROUP_CPUACCT 119 struct kernel_cpustat *kcpustat; 120 struct cpuacct *ca; 121#endif 122 /* 123 * Since all updates are sure to touch the root cgroup, we 124 * get ourselves ahead and touch it first. If the root cgroup 125 * is the only cgroup, then nothing else should be necessary. 126 * 127 */ 128 __get_cpu_var(kernel_cpustat).cpustat[index] += tmp; 129 130#ifdef CONFIG_CGROUP_CPUACCT 131 if (unlikely(!cpuacct_subsys.active)) 132 return; 133 134 rcu_read_lock(); 135 ca = task_ca(p); 136 while (ca && (ca != &root_cpuacct)) { 137 kcpustat = this_cpu_ptr(ca->cpustat); 138 kcpustat->cpustat[index] += tmp; 139 ca = parent_ca(ca); 140 } 141 rcu_read_unlock(); 142#endif 143} 144 145/* 146 * Account user cpu time to a process. 147 * @p: the process that the cpu time gets accounted to 148 * @cputime: the cpu time spent in user space since the last update 149 * @cputime_scaled: cputime scaled by cpu frequency 150 */ 151void account_user_time(struct task_struct *p, cputime_t cputime, 152 cputime_t cputime_scaled) 153{ 154 int index; 155 156 /* Add user time to process. */ 157 p->utime += cputime; 158 p->utimescaled += cputime_scaled; 159 account_group_user_time(p, cputime); 160 161 index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; 162 163 /* Add user time to cpustat. */ 164 task_group_account_field(p, index, (__force u64) cputime); 165 166 /* Account for user time used */ 167 acct_account_cputime(p); 168} 169 170/* 171 * Account guest cpu time to a process. 172 * @p: the process that the cpu time gets accounted to 173 * @cputime: the cpu time spent in virtual machine since the last update 174 * @cputime_scaled: cputime scaled by cpu frequency 175 */ 176static void account_guest_time(struct task_struct *p, cputime_t cputime, 177 cputime_t cputime_scaled) 178{ 179 u64 *cpustat = kcpustat_this_cpu->cpustat; 180 181 /* Add guest time to process. */ 182 p->utime += cputime; 183 p->utimescaled += cputime_scaled; 184 account_group_user_time(p, cputime); 185 p->gtime += cputime; 186 187 /* Add guest time to cpustat. */ 188 if (TASK_NICE(p) > 0) { 189 cpustat[CPUTIME_NICE] += (__force u64) cputime; 190 cpustat[CPUTIME_GUEST_NICE] += (__force u64) cputime; 191 } else { 192 cpustat[CPUTIME_USER] += (__force u64) cputime; 193 cpustat[CPUTIME_GUEST] += (__force u64) cputime; 194 } 195} 196 197/* 198 * Account system cpu time to a process and desired cpustat field 199 * @p: the process that the cpu time gets accounted to 200 * @cputime: the cpu time spent in kernel space since the last update 201 * @cputime_scaled: cputime scaled by cpu frequency 202 * @target_cputime64: pointer to cpustat field that has to be updated 203 */ 204static inline 205void __account_system_time(struct task_struct *p, cputime_t cputime, 206 cputime_t cputime_scaled, int index) 207{ 208 /* Add system time to process. */ 209 p->stime += cputime; 210 p->stimescaled += cputime_scaled; 211 account_group_system_time(p, cputime); 212 213 /* Add system time to cpustat. */ 214 task_group_account_field(p, index, (__force u64) cputime); 215 216 /* Account for system time used */ 217 acct_account_cputime(p); 218} 219 220/* 221 * Account system cpu time to a process. 222 * @p: the process that the cpu time gets accounted to 223 * @hardirq_offset: the offset to subtract from hardirq_count() 224 * @cputime: the cpu time spent in kernel space since the last update 225 * @cputime_scaled: cputime scaled by cpu frequency 226 */ 227void account_system_time(struct task_struct *p, int hardirq_offset, 228 cputime_t cputime, cputime_t cputime_scaled) 229{ 230 int index; 231 232 if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) { 233 account_guest_time(p, cputime, cputime_scaled); 234 return; 235 } 236 237 if (hardirq_count() - hardirq_offset) 238 index = CPUTIME_IRQ; 239 else if (in_serving_softirq()) 240 index = CPUTIME_SOFTIRQ; 241 else 242 index = CPUTIME_SYSTEM; 243 244 __account_system_time(p, cputime, cputime_scaled, index); 245} 246 247/* 248 * Account for involuntary wait time. 249 * @cputime: the cpu time spent in involuntary wait 250 */ 251void account_steal_time(cputime_t cputime) 252{ 253 u64 *cpustat = kcpustat_this_cpu->cpustat; 254 255 cpustat[CPUTIME_STEAL] += (__force u64) cputime; 256} 257 258/* 259 * Account for idle time. 260 * @cputime: the cpu time spent in idle wait 261 */ 262void account_idle_time(cputime_t cputime) 263{ 264 u64 *cpustat = kcpustat_this_cpu->cpustat; 265 struct rq *rq = this_rq(); 266 267 if (atomic_read(&rq->nr_iowait) > 0) 268 cpustat[CPUTIME_IOWAIT] += (__force u64) cputime; 269 else 270 cpustat[CPUTIME_IDLE] += (__force u64) cputime; 271} 272 273static __always_inline bool steal_account_process_tick(void) 274{ 275#ifdef CONFIG_PARAVIRT 276 if (static_key_false(¶virt_steal_enabled)) { 277 u64 steal, st = 0; 278 279 steal = paravirt_steal_clock(smp_processor_id()); 280 steal -= this_rq()->prev_steal_time; 281 282 st = steal_ticks(steal); 283 this_rq()->prev_steal_time += st * TICK_NSEC; 284 285 account_steal_time(st); 286 return st; 287 } 288#endif 289 return false; 290} 291 292/* 293 * Accumulate raw cputime values of dead tasks (sig->[us]time) and live 294 * tasks (sum on group iteration) belonging to @tsk's group. 295 */ 296void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) 297{ 298 struct signal_struct *sig = tsk->signal; 299 cputime_t utime, stime; 300 struct task_struct *t; 301 302 times->utime = sig->utime; 303 times->stime = sig->stime; 304 times->sum_exec_runtime = sig->sum_sched_runtime; 305 306 rcu_read_lock(); 307 /* make sure we can trust tsk->thread_group list */ 308 if (!likely(pid_alive(tsk))) 309 goto out; 310 311 t = tsk; 312 do { 313 task_cputime(tsk, &utime, &stime); 314 times->utime += utime; 315 times->stime += stime; 316 times->sum_exec_runtime += task_sched_runtime(t); 317 } while_each_thread(tsk, t); 318out: 319 rcu_read_unlock(); 320} 321 322#ifdef CONFIG_IRQ_TIME_ACCOUNTING 323/* 324 * Account a tick to a process and cpustat 325 * @p: the process that the cpu time gets accounted to 326 * @user_tick: is the tick from userspace 327 * @rq: the pointer to rq 328 * 329 * Tick demultiplexing follows the order 330 * - pending hardirq update 331 * - pending softirq update 332 * - user_time 333 * - idle_time 334 * - system time 335 * - check for guest_time 336 * - else account as system_time 337 * 338 * Check for hardirq is done both for system and user time as there is 339 * no timer going off while we are on hardirq and hence we may never get an 340 * opportunity to update it solely in system time. 341 * p->stime and friends are only updated on system time and not on irq 342 * softirq as those do not count in task exec_runtime any more. 343 */ 344static void irqtime_account_process_tick(struct task_struct *p, int user_tick, 345 struct rq *rq) 346{ 347 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 348 u64 *cpustat = kcpustat_this_cpu->cpustat; 349 350 if (steal_account_process_tick()) 351 return; 352 353 if (irqtime_account_hi_update()) { 354 cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy; 355 } else if (irqtime_account_si_update()) { 356 cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy; 357 } else if (this_cpu_ksoftirqd() == p) { 358 /* 359 * ksoftirqd time do not get accounted in cpu_softirq_time. 360 * So, we have to handle it separately here. 361 * Also, p->stime needs to be updated for ksoftirqd. 362 */ 363 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 364 CPUTIME_SOFTIRQ); 365 } else if (user_tick) { 366 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 367 } else if (p == rq->idle) { 368 account_idle_time(cputime_one_jiffy); 369 } else if (p->flags & PF_VCPU) { /* System time or guest time */ 370 account_guest_time(p, cputime_one_jiffy, one_jiffy_scaled); 371 } else { 372 __account_system_time(p, cputime_one_jiffy, one_jiffy_scaled, 373 CPUTIME_SYSTEM); 374 } 375} 376 377static void irqtime_account_idle_ticks(int ticks) 378{ 379 int i; 380 struct rq *rq = this_rq(); 381 382 for (i = 0; i < ticks; i++) 383 irqtime_account_process_tick(current, 0, rq); 384} 385#else /* CONFIG_IRQ_TIME_ACCOUNTING */ 386static inline void irqtime_account_idle_ticks(int ticks) {} 387static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, 388 struct rq *rq) {} 389#endif /* CONFIG_IRQ_TIME_ACCOUNTING */ 390 391#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 392/* 393 * Account a single tick of cpu time. 394 * @p: the process that the cpu time gets accounted to 395 * @user_tick: indicates if the tick is a user or a system tick 396 */ 397void account_process_tick(struct task_struct *p, int user_tick) 398{ 399 cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); 400 struct rq *rq = this_rq(); 401 402 if (vtime_accounting_enabled()) 403 return; 404 405 if (sched_clock_irqtime) { 406 irqtime_account_process_tick(p, user_tick, rq); 407 return; 408 } 409 410 if (steal_account_process_tick()) 411 return; 412 413 if (user_tick) 414 account_user_time(p, cputime_one_jiffy, one_jiffy_scaled); 415 else if ((p != rq->idle) || (irq_count() != HARDIRQ_OFFSET)) 416 account_system_time(p, HARDIRQ_OFFSET, cputime_one_jiffy, 417 one_jiffy_scaled); 418 else 419 account_idle_time(cputime_one_jiffy); 420} 421 422/* 423 * Account multiple ticks of steal time. 424 * @p: the process from which the cpu time has been stolen 425 * @ticks: number of stolen ticks 426 */ 427void account_steal_ticks(unsigned long ticks) 428{ 429 account_steal_time(jiffies_to_cputime(ticks)); 430} 431 432/* 433 * Account multiple ticks of idle time. 434 * @ticks: number of stolen ticks 435 */ 436void account_idle_ticks(unsigned long ticks) 437{ 438 439 if (sched_clock_irqtime) { 440 irqtime_account_idle_ticks(ticks); 441 return; 442 } 443 444 account_idle_time(jiffies_to_cputime(ticks)); 445} 446#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ 447 448/* 449 * Use precise platform statistics if available: 450 */ 451#ifdef CONFIG_VIRT_CPU_ACCOUNTING 452void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 453{ 454 *ut = p->utime; 455 *st = p->stime; 456} 457 458void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 459{ 460 struct task_cputime cputime; 461 462 thread_group_cputime(p, &cputime); 463 464 *ut = cputime.utime; 465 *st = cputime.stime; 466} 467 468void vtime_account_system_irqsafe(struct task_struct *tsk) 469{ 470 unsigned long flags; 471 472 local_irq_save(flags); 473 vtime_account_system(tsk); 474 local_irq_restore(flags); 475} 476EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); 477 478#ifndef __ARCH_HAS_VTIME_TASK_SWITCH 479void vtime_task_switch(struct task_struct *prev) 480{ 481 if (!vtime_accounting_enabled()) 482 return; 483 484 if (is_idle_task(prev)) 485 vtime_account_idle(prev); 486 else 487 vtime_account_system(prev); 488 489#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE 490 vtime_account_user(prev); 491#endif 492 arch_vtime_task_switch(prev); 493} 494#endif 495 496/* 497 * Archs that account the whole time spent in the idle task 498 * (outside irq) as idle time can rely on this and just implement 499 * vtime_account_system() and vtime_account_idle(). Archs that 500 * have other meaning of the idle time (s390 only includes the 501 * time spent by the CPU when it's in low power mode) must override 502 * vtime_account(). 503 */ 504#ifndef __ARCH_HAS_VTIME_ACCOUNT 505void vtime_account(struct task_struct *tsk) 506{ 507 if (!vtime_accounting_enabled()) 508 return; 509 510 if (!in_interrupt()) { 511 /* 512 * If we interrupted user, context_tracking_in_user() 513 * is 1 because the context tracking don't hook 514 * on irq entry/exit. This way we know if 515 * we need to flush user time on kernel entry. 516 */ 517 if (context_tracking_in_user()) { 518 vtime_account_user(tsk); 519 return; 520 } 521 522 if (is_idle_task(tsk)) { 523 vtime_account_idle(tsk); 524 return; 525 } 526 } 527 vtime_account_system(tsk); 528} 529EXPORT_SYMBOL_GPL(vtime_account); 530#endif /* __ARCH_HAS_VTIME_ACCOUNT */ 531 532#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ 533 534static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) 535{ 536 u64 temp = (__force u64) rtime; 537 538 temp *= (__force u64) utime; 539 540 if (sizeof(cputime_t) == 4) 541 temp = div_u64(temp, (__force u32) total); 542 else 543 temp = div64_u64(temp, (__force u64) total); 544 545 return (__force cputime_t) temp; 546} 547 548/* 549 * Adjust tick based cputime random precision against scheduler 550 * runtime accounting. 551 */ 552static void cputime_adjust(struct task_cputime *curr, 553 struct cputime *prev, 554 cputime_t *ut, cputime_t *st) 555{ 556 cputime_t rtime, utime, total; 557 558 utime = curr->utime; 559 total = utime + curr->stime; 560 561 /* 562 * Tick based cputime accounting depend on random scheduling 563 * timeslices of a task to be interrupted or not by the timer. 564 * Depending on these circumstances, the number of these interrupts 565 * may be over or under-optimistic, matching the real user and system 566 * cputime with a variable precision. 567 * 568 * Fix this by scaling these tick based values against the total 569 * runtime accounted by the CFS scheduler. 570 */ 571 rtime = nsecs_to_cputime(curr->sum_exec_runtime); 572 573 if (total) 574 utime = scale_utime(utime, rtime, total); 575 else 576 utime = rtime; 577 578 /* 579 * If the tick based count grows faster than the scheduler one, 580 * the result of the scaling may go backward. 581 * Let's enforce monotonicity. 582 */ 583 prev->utime = max(prev->utime, utime); 584 prev->stime = max(prev->stime, rtime - prev->utime); 585 586 *ut = prev->utime; 587 *st = prev->stime; 588} 589 590void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 591{ 592 struct task_cputime cputime = { 593 .sum_exec_runtime = p->se.sum_exec_runtime, 594 }; 595 596 task_cputime(p, &cputime.utime, &cputime.stime); 597 cputime_adjust(&cputime, &p->prev_cputime, ut, st); 598} 599 600/* 601 * Must be called with siglock held. 602 */ 603void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) 604{ 605 struct task_cputime cputime; 606 607 thread_group_cputime(p, &cputime); 608 cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); 609} 610#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ 611 612#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN 613static DEFINE_PER_CPU(unsigned long long, cputime_snap); 614 615static cputime_t get_vtime_delta(void) 616{ 617 unsigned long long delta; 618 619 delta = sched_clock() - __this_cpu_read(cputime_snap); 620 __this_cpu_add(cputime_snap, delta); 621 622 /* CHECKME: always safe to convert nsecs to cputime? */ 623 return nsecs_to_cputime(delta); 624} 625 626void vtime_account_system(struct task_struct *tsk) 627{ 628 cputime_t delta_cpu; 629 630 if (!vtime_accounting_enabled()) 631 return; 632 633 delta_cpu = get_vtime_delta(); 634 account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); 635} 636 637void vtime_account_user(struct task_struct *tsk) 638{ 639 cputime_t delta_cpu; 640 641 if (!vtime_accounting_enabled()) 642 return; 643 644 delta_cpu = get_vtime_delta(); 645 646 account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); 647} 648 649void vtime_account_idle(struct task_struct *tsk) 650{ 651 cputime_t delta_cpu = get_vtime_delta(); 652 653 account_idle_time(delta_cpu); 654} 655 656bool vtime_accounting_enabled(void) 657{ 658 return context_tracking_active(); 659} 660#endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ 661