__enqueue_entity(cfs_rq, se);
}
-static void update_avg(u64 *avg, u64 sample)
-{
- s64 diff = sample - *avg;
- *avg += diff >> 3;
-}
-
-static void update_avg_stats(struct cfs_rq *cfs_rq, struct sched_entity *se)
-{
- if (!se->last_wakeup)
- return;
-
- update_avg(&se->avg_overlap, se->sum_exec_runtime - se->last_wakeup);
- se->last_wakeup = 0;
-}
-
static void
dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
{
update_stats_dequeue(cfs_rq, se);
if (sleep) {
- update_avg_stats(cfs_rq, se);
#ifdef CONFIG_SCHEDSTATS
if (entity_is_task(se)) {
struct task_struct *tsk = task_of(se);
hrtick_start(rq, delta, requeue);
}
}
-#else
+#else /* !CONFIG_SCHED_HRTICK */
static inline void
hrtick_start_fair(struct rq *rq, struct task_struct *p)
{
}
return cpu;
}
-#else
+#else /* !ARCH_HAS_SCHED_WAKE_IDLE*/
static inline int wake_idle(int cpu, struct task_struct *p)
{
return cpu;
static const struct sched_class fair_sched_class;
#ifdef CONFIG_FAIR_GROUP_SCHED
-static unsigned long effective_load(struct task_group *tg, long wl, int cpu)
+/*
+ * effective_load() calculates the load change as seen from the root_task_group
+ *
+ * Adding load to a group doesn't make a group heavier, but can cause movement
+ * of group shares between cpus. Assuming the shares were perfectly aligned one
+ * can calculate the shift in shares.
+ *
+ * The problem is that perfectly aligning the shares is rather expensive, hence
+ * we try to avoid doing that too often - see update_shares(), which ratelimits
+ * this change.
+ *
+ * We compensate this by not only taking the current delta into account, but
+ * also considering the delta between when the shares were last adjusted and
+ * now.
+ *
+ * We still saw a performance dip, some tracing learned us that between
+ * cgroup:/ and cgroup:/foo balancing the number of affine wakeups increased
+ * significantly. Therefore try to bias the error in direction of failing
+ * the affine wakeup.
+ *
+ */
+static long effective_load(struct task_group *tg, int cpu,
+ long wl, long wg)
{
struct sched_entity *se = tg->se[cpu];
- long wg = wl;
+ long more_w;
+
+ if (!tg->parent)
+ return wl;
+
+ /*
+ * By not taking the decrease of shares on the other cpu into
+ * account our error leans towards reducing the affine wakeups.
+ */
+ if (!wl && sched_feat(ASYM_EFF_LOAD))
+ return wl;
+
+ /*
+ * Instead of using this increment, also add the difference
+ * between when the shares were last updated and now.
+ */
+ more_w = se->my_q->load.weight - se->my_q->rq_weight;
+ wl += more_w;
+ wg += more_w;
for_each_sched_entity(se) {
#define D(n) (likely(n) ? (n) : 1)
S = se->my_q->tg->shares;
s = se->my_q->shares;
- rw = se->my_q->load.weight;
+ rw = se->my_q->rq_weight;
a = S*(rw + wl);
b = S*rw + s*wg;
wl = s*(a-b)/D(b);
+ /*
+ * Assume the group is already running and will
+ * thus already be accounted for in the weight.
+ *
+ * That is, moving shares between CPUs, does not
+ * alter the group weight.
+ */
wg = 0;
#undef D
}
return wl;
}
-static unsigned long task_load_sub(struct task_struct *p)
-{
- return effective_load(task_group(p), -(long)p->se.load.weight, task_cpu(p));
-}
-
-static unsigned long task_load_add(struct task_struct *p, int cpu)
-{
- return effective_load(task_group(p), p->se.load.weight, cpu);
-}
-
#else
-static unsigned long task_load_sub(struct task_struct *p)
+static inline unsigned long effective_load(struct task_group *tg, int cpu,
+ unsigned long wl, unsigned long wg)
{
- return -p->se.load.weight;
-}
-
-static unsigned long task_load_add(struct task_struct *p, int cpu)
-{
- return p->se.load.weight;
+ return wl;
}
#endif
unsigned int imbalance)
{
struct task_struct *curr = this_rq->curr;
+ struct task_group *tg;
unsigned long tl = this_load;
unsigned long tl_per_task;
+ unsigned long weight;
int balanced;
if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
* effect of the currently running task from the load
* of the current CPU:
*/
- if (sync)
- tl += task_load_sub(current);
+ if (sync) {
+ tg = task_group(current);
+ weight = current->se.load.weight;
- balanced = 100*(tl + task_load_add(p, this_cpu)) <= imbalance*load;
+ tl += effective_load(tg, this_cpu, -weight, -weight);
+ load += effective_load(tg, prev_cpu, 0, -weight);
+ }
+
+ tg = task_group(p);
+ weight = p->se.load.weight;
+
+ balanced = 100*(tl + effective_load(tg, this_cpu, weight, weight)) <=
+ imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
/*
* If the currently running task will sleep within
* a reasonable amount of time then attract this newly
* woken task:
*/
- if (sync && balanced && curr->sched_class == &fair_sched_class) {
+ if (sync && balanced) {
if (curr->se.avg_overlap < sysctl_sched_migration_cost &&
- p->se.avg_overlap < sysctl_sched_migration_cost)
+ p->se.avg_overlap < sysctl_sched_migration_cost)
return 1;
}
return;
}
- se->last_wakeup = se->sum_exec_runtime;
if (unlikely(se == pse))
return;
struct cfs_rq *busiest_cfs_rq = tg->cfs_rq[busiest_cpu];
unsigned long busiest_h_load = busiest_cfs_rq->h_load;
unsigned long busiest_weight = busiest_cfs_rq->load.weight;
- long rem_load, moved_load;
+ u64 rem_load, moved_load;
/*
* empty group
if (!busiest_cfs_rq->task_weight)
continue;
- rem_load = rem_load_move * busiest_weight;
- rem_load /= busiest_h_load + 1;
+ rem_load = (u64)rem_load_move * busiest_weight;
+ rem_load = div_u64(rem_load, busiest_h_load + 1);
moved_load = __load_balance_fair(this_rq, this_cpu, busiest,
rem_load, sd, idle, all_pinned, this_best_prio,
continue;
moved_load *= busiest_h_load;
- moved_load /= busiest_weight + 1;
+ moved_load = div_u64(moved_load, busiest_weight + 1);
rem_load_move -= moved_load;
if (rem_load_move < 0)
return 0;
}
-#endif
+#endif /* CONFIG_SMP */
/*
* scheduler tick hitting a task of our scheduling class: