* 'timers-for-linus-migration' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
timers: Logic to move non pinned timers
timers: /proc/sys sysctl hook to enable timer migration
timers: Identifying the existing pinned timers
timers: Framework for identifying pinned timers
timers: allow deferrable timers for intervals tv2-tv5 to be deferred
Fix up conflicts in kernel/sched.c and kernel/timer.c manually
uv_set_scir_bits(bits);
/* enable next timer period */
- mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL);
+ mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL);
}
static void __cpuinit uv_heartbeat_enable(int cpu)
#endif
#endif
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+extern ktime_t clockevents_get_next_event(int cpu);
+#else
+static inline ktime_t clockevents_get_next_event(int cpu)
+{
+ return (ktime_t) { .tv64 = KTIME_MAX };
+}
+#endif
* Mode arguments of xxx_hrtimer functions:
*/
enum hrtimer_mode {
- HRTIMER_MODE_ABS, /* Time value is absolute */
- HRTIMER_MODE_REL, /* Time value is relative to now */
+ HRTIMER_MODE_ABS = 0x0, /* Time value is absolute */
+ HRTIMER_MODE_REL = 0x1, /* Time value is relative to now */
+ HRTIMER_MODE_PINNED = 0x02, /* Timer is bound to CPU */
+ HRTIMER_MODE_ABS_PINNED = 0x02,
+ HRTIMER_MODE_REL_PINNED = 0x03,
};
/*
extern cpumask_var_t nohz_cpu_mask;
#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
extern int select_nohz_load_balancer(int cpu);
+extern int get_nohz_load_balancer(void);
#else
static inline int select_nohz_load_balancer(int cpu)
{
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
extern unsigned int sysctl_sched_nr_migrate;
+extern unsigned int sysctl_timer_migration;
int sched_nr_latency_handler(struct ctl_table *table, int write,
struct file *file, void __user *buffer, size_t *length,
loff_t *ppos);
#endif
+#ifdef CONFIG_SCHED_DEBUG
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+ return sysctl_timer_migration;
+}
+#else
+static inline unsigned int get_sysctl_timer_migration(void)
+{
+ return 1;
+}
+#endif
extern unsigned int sysctl_sched_rt_period;
extern int sysctl_sched_rt_runtime;
extern int del_timer(struct timer_list * timer);
extern int mod_timer(struct timer_list *timer, unsigned long expires);
extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
+extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
+#define TIMER_NOT_PINNED 0
+#define TIMER_PINNED 1
/*
* The jiffies value which is added to now, when there is no timer
* in the timer wheel:
#include <linux/seq_file.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
+#include <linux/sched.h>
+#include <linux/timer.h>
#include <asm/uaccess.h>
* Switch the timer base to the current CPU when possible.
*/
static inline struct hrtimer_clock_base *
-switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base)
+switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base,
+ int pinned)
{
struct hrtimer_clock_base *new_base;
struct hrtimer_cpu_base *new_cpu_base;
+ int cpu, preferred_cpu = -1;
+
+ cpu = smp_processor_id();
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
+ preferred_cpu = get_nohz_load_balancer();
+ if (preferred_cpu >= 0)
+ cpu = preferred_cpu;
+ }
+#endif
- new_cpu_base = &__get_cpu_var(hrtimer_bases);
+again:
+ new_cpu_base = &per_cpu(hrtimer_bases, cpu);
new_base = &new_cpu_base->clock_base[base->index];
if (base != new_base) {
timer->base = NULL;
spin_unlock(&base->cpu_base->lock);
spin_lock(&new_base->cpu_base->lock);
+
+ /* Optimized away for NOHZ=n SMP=n */
+ if (cpu == preferred_cpu) {
+ /* Calculate clock monotonic expiry time */
+#ifdef CONFIG_HIGH_RES_TIMERS
+ ktime_t expires = ktime_sub(hrtimer_get_expires(timer),
+ new_base->offset);
+#else
+ ktime_t expires = hrtimer_get_expires(timer);
+#endif
+
+ /*
+ * Get the next event on target cpu from the
+ * clock events layer.
+ * This covers the highres=off nohz=on case as well.
+ */
+ ktime_t next = clockevents_get_next_event(cpu);
+
+ ktime_t delta = ktime_sub(expires, next);
+
+ /*
+ * We do not migrate the timer when it is expiring
+ * before the next event on the target cpu because
+ * we cannot reprogram the target cpu hardware and
+ * we would cause it to fire late.
+ */
+ if (delta.tv64 < 0) {
+ cpu = smp_processor_id();
+ spin_unlock(&new_base->cpu_base->lock);
+ spin_lock(&base->cpu_base->lock);
+ timer->base = base;
+ goto again;
+ }
+ }
timer->base = new_base;
}
return new_base;
return base;
}
-# define switch_hrtimer_base(t, b) (b)
+# define switch_hrtimer_base(t, b, p) (b)
#endif /* !CONFIG_SMP */
ret = remove_hrtimer(timer, base);
/* Switch the timer base, if necessary: */
- new_base = switch_hrtimer_base(timer, base);
+ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
- if (mode == HRTIMER_MODE_REL) {
+ if (mode & HRTIMER_MODE_REL) {
tim = ktime_add_safe(tim, new_base->get_time());
/*
* CONFIG_TIME_LOW_RES is a temporary way for architectures
hard = hrtimer_get_expires(&rt_b->rt_period_timer);
delta = ktime_to_ns(ktime_sub(hard, soft));
__hrtimer_start_range_ns(&rt_b->rt_period_timer, soft, delta,
- HRTIMER_MODE_ABS, 0);
+ HRTIMER_MODE_ABS_PINNED, 0);
}
spin_unlock(&rt_b->rt_runtime_lock);
}
static void hrtick_start(struct rq *rq, u64 delay)
{
__hrtimer_start_range_ns(&rq->hrtick_timer, ns_to_ktime(delay), 0,
- HRTIMER_MODE_REL, 0);
+ HRTIMER_MODE_REL_PINNED, 0);
}
static inline void init_hrtick(void)
.load_balancer = ATOMIC_INIT(-1),
};
+int get_nohz_load_balancer(void)
+{
+ return atomic_read(&nohz.load_balancer);
+}
+
#if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
/**
* lowest_flag_domain - Return lowest sched_domain containing flag.
}
#endif /* CONFIG_SMP */
+const_debug unsigned int sysctl_timer_migration = 1;
+
int in_sched_functions(unsigned long addr)
{
return in_lock_functions(addr) ||
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "timer_migration",
+ .data = &sysctl_timer_migration,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
#endif
{
.ctl_name = CTL_UNNUMBERED,
#include <linux/notifier.h>
#include <linux/smp.h>
#include <linux/sysdev.h>
+#include <linux/tick.h>
/* The registered clock event devices */
static LIST_HEAD(clockevent_devices);
spin_unlock(&clockevents_lock);
}
EXPORT_SYMBOL_GPL(clockevents_notify);
+
+ktime_t clockevents_get_next_event(int cpu)
+{
+ struct tick_device *td;
+ struct clock_event_device *dev;
+
+ td = &per_cpu(tick_cpu_device, cpu);
+ dev = td->evtdev;
+
+ return dev->next_event;
+}
#endif
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start(&ts->sched_timer, expires,
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
goto out;
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
hrtimer_start_expires(&ts->sched_timer,
- HRTIMER_MODE_ABS);
+ HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
break;
for (;;) {
hrtimer_forward(&ts->sched_timer, now, tick_period);
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS);
+ hrtimer_start_expires(&ts->sched_timer,
+ HRTIMER_MODE_ABS_PINNED);
/* Check, if the timer was already in the past */
if (hrtimer_active(&ts->sched_timer))
break;
#include <linux/tick.h>
#include <linux/kallsyms.h>
#include <linux/perf_counter.h>
+#include <linux/sched.h>
#include <asm/uaccess.h>
#include <asm/unistd.h>
}
static inline int
-__mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only)
+__mod_timer(struct timer_list *timer, unsigned long expires,
+ bool pending_only, int pinned)
{
struct tvec_base *base, *new_base;
unsigned long flags;
- int ret;
-
- ret = 0;
+ int ret = 0 , cpu;
timer_stats_timer_set_start_info(timer);
BUG_ON(!timer->function);
new_base = __get_cpu_var(tvec_bases);
+ cpu = smp_processor_id();
+
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+ if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) {
+ int preferred_cpu = get_nohz_load_balancer();
+
+ if (preferred_cpu >= 0)
+ cpu = preferred_cpu;
+ }
+#endif
+ new_base = per_cpu(tvec_bases, cpu);
+
if (base != new_base) {
/*
* We are trying to schedule the timer on the local CPU.
*/
int mod_timer_pending(struct timer_list *timer, unsigned long expires)
{
- return __mod_timer(timer, expires, true);
+ return __mod_timer(timer, expires, true, TIMER_NOT_PINNED);
}
EXPORT_SYMBOL(mod_timer_pending);
if (timer->expires == expires && timer_pending(timer))
return 1;
- return __mod_timer(timer, expires, false);
+ return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
}
EXPORT_SYMBOL(mod_timer);
+/**
+ * mod_timer_pinned - modify a timer's timeout
+ * @timer: the timer to be modified
+ * @expires: new timeout in jiffies
+ *
+ * mod_timer_pinned() is a way to update the expire field of an
+ * active timer (if the timer is inactive it will be activated)
+ * and not allow the timer to be migrated to a different CPU.
+ *
+ * mod_timer_pinned(timer, expires) is equivalent to:
+ *
+ * del_timer(timer); timer->expires = expires; add_timer(timer);
+ */
+int mod_timer_pinned(struct timer_list *timer, unsigned long expires)
+{
+ if (timer->expires == expires && timer_pending(timer))
+ return 1;
+
+ return __mod_timer(timer, expires, false, TIMER_PINNED);
+}
+EXPORT_SYMBOL(mod_timer_pinned);
+
/**
* add_timer - start a timer
* @timer: the timer to be added
index = slot = timer_jiffies & TVN_MASK;
do {
list_for_each_entry(nte, varp->vec + slot, entry) {
+ if (tbase_get_deferrable(nte->base))
+ continue;
+
found = 1;
if (time_before(nte->expires, expires))
expires = nte->expires;
expire = timeout + jiffies;
setup_timer_on_stack(&timer, process_timeout, (unsigned long)current);
- __mod_timer(&timer, expire, false);
+ __mod_timer(&timer, expire, false, TIMER_NOT_PINNED);
schedule();
del_singleshot_timer_sync(&timer);
hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
hrtimer->function = stack_trace_timer_fn;
- hrtimer_start(hrtimer, ns_to_ktime(sample_period), HRTIMER_MODE_REL);
+ hrtimer_start(hrtimer, ns_to_ktime(sample_period),
+ HRTIMER_MODE_REL_PINNED);
}
static void start_stack_timers(void)