tick-broadcast.c revision a382bf934449ddeb625167537ae81daa0211b477
1/*
2 * linux/kernel/time/tick-broadcast.c
3 *
4 * This file contains functions which emulate a local clock-event
5 * device via a broadcast event source.
6 *
7 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10 *
11 * This code is licenced under the GPL version 2. For details see
12 * kernel-base/COPYING.
13 */
14#include <linux/cpu.h>
15#include <linux/err.h>
16#include <linux/hrtimer.h>
17#include <linux/interrupt.h>
18#include <linux/percpu.h>
19#include <linux/profile.h>
20#include <linux/sched.h>
21#include <linux/smp.h>
22
23#include "tick-internal.h"
24
25/*
26 * Broadcast support for broken x86 hardware, where the local apic
27 * timer stops in C3 state.
28 */
29
30static struct tick_device tick_broadcast_device;
31/* FIXME: Use cpumask_var_t. */
32static DECLARE_BITMAP(tick_broadcast_mask, NR_CPUS);
33static DECLARE_BITMAP(tmpmask, NR_CPUS);
34static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
35static int tick_broadcast_force;
36
37#ifdef CONFIG_TICK_ONESHOT
38static void tick_broadcast_clear_oneshot(int cpu);
39#else
40static inline void tick_broadcast_clear_oneshot(int cpu) { }
41#endif
42
43/*
44 * Debugging: see timer_list.c
45 */
46struct tick_device *tick_get_broadcast_device(void)
47{
48	return &tick_broadcast_device;
49}
50
51struct cpumask *tick_get_broadcast_mask(void)
52{
53	return to_cpumask(tick_broadcast_mask);
54}
55
56/*
57 * Start the device in periodic mode
58 */
59static void tick_broadcast_start_periodic(struct clock_event_device *bc)
60{
61	if (bc)
62		tick_setup_periodic(bc, 1);
63}
64
65/*
66 * Check, if the device can be utilized as broadcast device:
67 */
68int tick_check_broadcast_device(struct clock_event_device *dev)
69{
70	if ((tick_broadcast_device.evtdev &&
71	     tick_broadcast_device.evtdev->rating >= dev->rating) ||
72	     (dev->features & CLOCK_EVT_FEAT_C3STOP))
73		return 0;
74
75	clockevents_exchange_device(tick_broadcast_device.evtdev, dev);
76	tick_broadcast_device.evtdev = dev;
77	if (!cpumask_empty(tick_get_broadcast_mask()))
78		tick_broadcast_start_periodic(dev);
79	return 1;
80}
81
82/*
83 * Check, if the device is the broadcast device
84 */
85int tick_is_broadcast_device(struct clock_event_device *dev)
86{
87	return (dev && tick_broadcast_device.evtdev == dev);
88}
89
90static void err_broadcast(const struct cpumask *mask)
91{
92	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
93}
94
95static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
96{
97	if (!dev->broadcast)
98		dev->broadcast = tick_broadcast;
99	if (!dev->broadcast) {
100		pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
101			     dev->name);
102		dev->broadcast = err_broadcast;
103	}
104}
105
106/*
107 * Check, if the device is disfunctional and a place holder, which
108 * needs to be handled by the broadcast device.
109 */
110int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
111{
112	unsigned long flags;
113	int ret = 0;
114
115	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
116
117	/*
118	 * Devices might be registered with both periodic and oneshot
119	 * mode disabled. This signals, that the device needs to be
120	 * operated from the broadcast device and is a placeholder for
121	 * the cpu local device.
122	 */
123	if (!tick_device_is_functional(dev)) {
124		dev->event_handler = tick_handle_periodic;
125		tick_device_setup_broadcast_func(dev);
126		cpumask_set_cpu(cpu, tick_get_broadcast_mask());
127		tick_broadcast_start_periodic(tick_broadcast_device.evtdev);
128		ret = 1;
129	} else {
130		/*
131		 * When the new device is not affected by the stop
132		 * feature and the cpu is marked in the broadcast mask
133		 * then clear the broadcast bit.
134		 */
135		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) {
136			int cpu = smp_processor_id();
137			cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
138			tick_broadcast_clear_oneshot(cpu);
139		} else {
140			tick_device_setup_broadcast_func(dev);
141		}
142	}
143	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
144	return ret;
145}
146
147#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
148int tick_receive_broadcast(void)
149{
150	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
151	struct clock_event_device *evt = td->evtdev;
152
153	if (!evt)
154		return -ENODEV;
155
156	if (!evt->event_handler)
157		return -EINVAL;
158
159	evt->event_handler(evt);
160	return 0;
161}
162#endif
163
164/*
165 * Broadcast the event to the cpus, which are set in the mask (mangled).
166 */
167static void tick_do_broadcast(struct cpumask *mask)
168{
169	int cpu = smp_processor_id();
170	struct tick_device *td;
171
172	/*
173	 * Check, if the current cpu is in the mask
174	 */
175	if (cpumask_test_cpu(cpu, mask)) {
176		cpumask_clear_cpu(cpu, mask);
177		td = &per_cpu(tick_cpu_device, cpu);
178		td->evtdev->event_handler(td->evtdev);
179	}
180
181	if (!cpumask_empty(mask)) {
182		/*
183		 * It might be necessary to actually check whether the devices
184		 * have different broadcast functions. For now, just use the
185		 * one of the first device. This works as long as we have this
186		 * misfeature only on x86 (lapic)
187		 */
188		td = &per_cpu(tick_cpu_device, cpumask_first(mask));
189		td->evtdev->broadcast(mask);
190	}
191}
192
193/*
194 * Periodic broadcast:
195 * - invoke the broadcast handlers
196 */
197static void tick_do_periodic_broadcast(void)
198{
199	raw_spin_lock(&tick_broadcast_lock);
200
201	cpumask_and(to_cpumask(tmpmask),
202		    cpu_online_mask, tick_get_broadcast_mask());
203	tick_do_broadcast(to_cpumask(tmpmask));
204
205	raw_spin_unlock(&tick_broadcast_lock);
206}
207
208/*
209 * Event handler for periodic broadcast ticks
210 */
211static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
212{
213	ktime_t next;
214
215	tick_do_periodic_broadcast();
216
217	/*
218	 * The device is in periodic mode. No reprogramming necessary:
219	 */
220	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
221		return;
222
223	/*
224	 * Setup the next period for devices, which do not have
225	 * periodic mode. We read dev->next_event first and add to it
226	 * when the event already expired. clockevents_program_event()
227	 * sets dev->next_event only when the event is really
228	 * programmed to the device.
229	 */
230	for (next = dev->next_event; ;) {
231		next = ktime_add(next, tick_period);
232
233		if (!clockevents_program_event(dev, next, false))
234			return;
235		tick_do_periodic_broadcast();
236	}
237}
238
239/*
240 * Powerstate information: The system enters/leaves a state, where
241 * affected devices might stop
242 */
243static void tick_do_broadcast_on_off(unsigned long *reason)
244{
245	struct clock_event_device *bc, *dev;
246	struct tick_device *td;
247	unsigned long flags;
248	int cpu, bc_stopped;
249
250	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
251
252	cpu = smp_processor_id();
253	td = &per_cpu(tick_cpu_device, cpu);
254	dev = td->evtdev;
255	bc = tick_broadcast_device.evtdev;
256
257	/*
258	 * Is the device not affected by the powerstate ?
259	 */
260	if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
261		goto out;
262
263	if (!tick_device_is_functional(dev))
264		goto out;
265
266	bc_stopped = cpumask_empty(tick_get_broadcast_mask());
267
268	switch (*reason) {
269	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
270	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
271		if (!cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
272			cpumask_set_cpu(cpu, tick_get_broadcast_mask());
273			if (tick_broadcast_device.mode ==
274			    TICKDEV_MODE_PERIODIC)
275				clockevents_shutdown(dev);
276		}
277		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
278			tick_broadcast_force = 1;
279		break;
280	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
281		if (!tick_broadcast_force &&
282		    cpumask_test_cpu(cpu, tick_get_broadcast_mask())) {
283			cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
284			if (tick_broadcast_device.mode ==
285			    TICKDEV_MODE_PERIODIC)
286				tick_setup_periodic(dev, 0);
287		}
288		break;
289	}
290
291	if (cpumask_empty(tick_get_broadcast_mask())) {
292		if (!bc_stopped)
293			clockevents_shutdown(bc);
294	} else if (bc_stopped) {
295		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
296			tick_broadcast_start_periodic(bc);
297		else
298			tick_broadcast_setup_oneshot(bc);
299	}
300out:
301	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
302}
303
304/*
305 * Powerstate information: The system enters/leaves a state, where
306 * affected devices might stop.
307 */
308void tick_broadcast_on_off(unsigned long reason, int *oncpu)
309{
310	if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
311		printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
312		       "offline CPU #%d\n", *oncpu);
313	else
314		tick_do_broadcast_on_off(&reason);
315}
316
317/*
318 * Set the periodic handler depending on broadcast on/off
319 */
320void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
321{
322	if (!broadcast)
323		dev->event_handler = tick_handle_periodic;
324	else
325		dev->event_handler = tick_handle_periodic_broadcast;
326}
327
328/*
329 * Remove a CPU from broadcasting
330 */
331void tick_shutdown_broadcast(unsigned int *cpup)
332{
333	struct clock_event_device *bc;
334	unsigned long flags;
335	unsigned int cpu = *cpup;
336
337	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
338
339	bc = tick_broadcast_device.evtdev;
340	cpumask_clear_cpu(cpu, tick_get_broadcast_mask());
341
342	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
343		if (bc && cpumask_empty(tick_get_broadcast_mask()))
344			clockevents_shutdown(bc);
345	}
346
347	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
348}
349
350void tick_suspend_broadcast(void)
351{
352	struct clock_event_device *bc;
353	unsigned long flags;
354
355	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
356
357	bc = tick_broadcast_device.evtdev;
358	if (bc)
359		clockevents_shutdown(bc);
360
361	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
362}
363
364int tick_resume_broadcast(void)
365{
366	struct clock_event_device *bc;
367	unsigned long flags;
368	int broadcast = 0;
369
370	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
371
372	bc = tick_broadcast_device.evtdev;
373
374	if (bc) {
375		clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
376
377		switch (tick_broadcast_device.mode) {
378		case TICKDEV_MODE_PERIODIC:
379			if (!cpumask_empty(tick_get_broadcast_mask()))
380				tick_broadcast_start_periodic(bc);
381			broadcast = cpumask_test_cpu(smp_processor_id(),
382						     tick_get_broadcast_mask());
383			break;
384		case TICKDEV_MODE_ONESHOT:
385			if (!cpumask_empty(tick_get_broadcast_mask()))
386				broadcast = tick_resume_broadcast_oneshot(bc);
387			break;
388		}
389	}
390	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
391
392	return broadcast;
393}
394
395
396#ifdef CONFIG_TICK_ONESHOT
397
398/* FIXME: use cpumask_var_t. */
399static DECLARE_BITMAP(tick_broadcast_oneshot_mask, NR_CPUS);
400
401/*
402 * Exposed for debugging: see timer_list.c
403 */
404struct cpumask *tick_get_broadcast_oneshot_mask(void)
405{
406	return to_cpumask(tick_broadcast_oneshot_mask);
407}
408
409static int tick_broadcast_set_event(ktime_t expires, int force)
410{
411	struct clock_event_device *bc = tick_broadcast_device.evtdev;
412
413	if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
414		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
415
416	return clockevents_program_event(bc, expires, force);
417}
418
419int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
420{
421	clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
422	return 0;
423}
424
425/*
426 * Called from irq_enter() when idle was interrupted to reenable the
427 * per cpu device.
428 */
429void tick_check_oneshot_broadcast(int cpu)
430{
431	if (cpumask_test_cpu(cpu, to_cpumask(tick_broadcast_oneshot_mask))) {
432		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
433
434		clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_ONESHOT);
435	}
436}
437
438/*
439 * Handle oneshot mode broadcasting
440 */
441static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
442{
443	struct tick_device *td;
444	ktime_t now, next_event;
445	int cpu;
446
447	raw_spin_lock(&tick_broadcast_lock);
448again:
449	dev->next_event.tv64 = KTIME_MAX;
450	next_event.tv64 = KTIME_MAX;
451	cpumask_clear(to_cpumask(tmpmask));
452	now = ktime_get();
453	/* Find all expired events */
454	for_each_cpu(cpu, tick_get_broadcast_oneshot_mask()) {
455		td = &per_cpu(tick_cpu_device, cpu);
456		if (td->evtdev->next_event.tv64 <= now.tv64)
457			cpumask_set_cpu(cpu, to_cpumask(tmpmask));
458		else if (td->evtdev->next_event.tv64 < next_event.tv64)
459			next_event.tv64 = td->evtdev->next_event.tv64;
460	}
461
462	/*
463	 * Wakeup the cpus which have an expired event.
464	 */
465	tick_do_broadcast(to_cpumask(tmpmask));
466
467	/*
468	 * Two reasons for reprogram:
469	 *
470	 * - The global event did not expire any CPU local
471	 * events. This happens in dyntick mode, as the maximum PIT
472	 * delta is quite small.
473	 *
474	 * - There are pending events on sleeping CPUs which were not
475	 * in the event mask
476	 */
477	if (next_event.tv64 != KTIME_MAX) {
478		/*
479		 * Rearm the broadcast device. If event expired,
480		 * repeat the above
481		 */
482		if (tick_broadcast_set_event(next_event, 0))
483			goto again;
484	}
485	raw_spin_unlock(&tick_broadcast_lock);
486}
487
488/*
489 * Powerstate information: The system enters/leaves a state, where
490 * affected devices might stop
491 */
492void tick_broadcast_oneshot_control(unsigned long reason)
493{
494	struct clock_event_device *bc, *dev;
495	struct tick_device *td;
496	unsigned long flags;
497	int cpu;
498
499	/*
500	 * Periodic mode does not care about the enter/exit of power
501	 * states
502	 */
503	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
504		return;
505
506	/*
507	 * We are called with preemtion disabled from the depth of the
508	 * idle code, so we can't be moved away.
509	 */
510	cpu = smp_processor_id();
511	td = &per_cpu(tick_cpu_device, cpu);
512	dev = td->evtdev;
513
514	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
515		return;
516
517	bc = tick_broadcast_device.evtdev;
518
519	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
520	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
521		if (!cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
522			cpumask_set_cpu(cpu, tick_get_broadcast_oneshot_mask());
523			clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
524			if (dev->next_event.tv64 < bc->next_event.tv64)
525				tick_broadcast_set_event(dev->next_event, 1);
526		}
527	} else {
528		if (cpumask_test_cpu(cpu, tick_get_broadcast_oneshot_mask())) {
529			cpumask_clear_cpu(cpu,
530					  tick_get_broadcast_oneshot_mask());
531			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
532			if (dev->next_event.tv64 != KTIME_MAX)
533				tick_program_event(dev->next_event, 1);
534		}
535	}
536	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
537}
538
539/*
540 * Reset the one shot broadcast for a cpu
541 *
542 * Called with tick_broadcast_lock held
543 */
544static void tick_broadcast_clear_oneshot(int cpu)
545{
546	cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
547}
548
549static void tick_broadcast_init_next_event(struct cpumask *mask,
550					   ktime_t expires)
551{
552	struct tick_device *td;
553	int cpu;
554
555	for_each_cpu(cpu, mask) {
556		td = &per_cpu(tick_cpu_device, cpu);
557		if (td->evtdev)
558			td->evtdev->next_event = expires;
559	}
560}
561
562/**
563 * tick_broadcast_setup_oneshot - setup the broadcast device
564 */
565void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
566{
567	int cpu = smp_processor_id();
568
569	/* Set it up only once ! */
570	if (bc->event_handler != tick_handle_oneshot_broadcast) {
571		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
572
573		bc->event_handler = tick_handle_oneshot_broadcast;
574
575		/* Take the do_timer update */
576		if (!tick_nohz_extended_cpu(cpu))
577			tick_do_timer_cpu = cpu;
578
579		/*
580		 * We must be careful here. There might be other CPUs
581		 * waiting for periodic broadcast. We need to set the
582		 * oneshot_mask bits for those and program the
583		 * broadcast device to fire.
584		 */
585		cpumask_copy(to_cpumask(tmpmask), tick_get_broadcast_mask());
586		cpumask_clear_cpu(cpu, to_cpumask(tmpmask));
587		cpumask_or(tick_get_broadcast_oneshot_mask(),
588			   tick_get_broadcast_oneshot_mask(),
589			   to_cpumask(tmpmask));
590
591		if (was_periodic && !cpumask_empty(to_cpumask(tmpmask))) {
592			clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
593			tick_broadcast_init_next_event(to_cpumask(tmpmask),
594						       tick_next_period);
595			tick_broadcast_set_event(tick_next_period, 1);
596		} else
597			bc->next_event.tv64 = KTIME_MAX;
598	} else {
599		/*
600		 * The first cpu which switches to oneshot mode sets
601		 * the bit for all other cpus which are in the general
602		 * (periodic) broadcast mask. So the bit is set and
603		 * would prevent the first broadcast enter after this
604		 * to program the bc device.
605		 */
606		tick_broadcast_clear_oneshot(cpu);
607	}
608}
609
610/*
611 * Select oneshot operating mode for the broadcast device
612 */
613void tick_broadcast_switch_to_oneshot(void)
614{
615	struct clock_event_device *bc;
616	unsigned long flags;
617
618	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
619
620	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
621	bc = tick_broadcast_device.evtdev;
622	if (bc)
623		tick_broadcast_setup_oneshot(bc);
624
625	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
626}
627
628
629/*
630 * Remove a dead CPU from broadcasting
631 */
632void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
633{
634	unsigned long flags;
635	unsigned int cpu = *cpup;
636
637	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
638
639	/*
640	 * Clear the broadcast mask flag for the dead cpu, but do not
641	 * stop the broadcast device!
642	 */
643	cpumask_clear_cpu(cpu, tick_get_broadcast_oneshot_mask());
644
645	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
646}
647
648/*
649 * Check, whether the broadcast device is in one shot mode
650 */
651int tick_broadcast_oneshot_active(void)
652{
653	return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
654}
655
656/*
657 * Check whether the broadcast device supports oneshot.
658 */
659bool tick_broadcast_oneshot_available(void)
660{
661	struct clock_event_device *bc = tick_broadcast_device.evtdev;
662
663	return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
664}
665
666#endif
667