sched: Fix race in migrate_swap_stop()

[~andy/linux] / kernel / sched / sched.h
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h

index b3c5653e1dca6ee3a965d83022e8e9b1905465b3..ffc708717b70d059daf1ad3d6f54a307b0e16eb9 100644 (file)
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -6,6 +6,7 @@
  #include <linux/spinlock.h>
  #include <linux/stop_machine.h>
  #include <linux/tick.h>
+#include <linux/slab.h>
  
  #include "cpupri.h"
  #include "cpuacct.h"
@@ -408,6 +409,10 @@ struct rq {
          * remote CPUs use both these fields when doing load calculation.
          */
         unsigned int nr_running;
+#ifdef CONFIG_NUMA_BALANCING
+       unsigned int nr_numa_running;
+       unsigned int nr_preferred_running;
+#endif
         #define CPU_LOAD_IDX_MAX 5
         unsigned long cpu_load[CPU_LOAD_IDX_MAX];
         unsigned long last_load_update_tick;
@@ -476,6 +481,9 @@ struct rq {
         u64 age_stamp;
         u64 idle_stamp;
         u64 avg_idle;
+
+       /* This is used to determine avg_idle's max value */
+       u64 max_idle_balance_cost;
  #endif
  
  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
@@ -552,6 +560,12 @@ static inline u64 rq_clock_task(struct rq *rq)
         return rq->clock_task;
  }
  
+#ifdef CONFIG_NUMA_BALANCING
+extern void sched_setnuma(struct task_struct *p, int node);
+extern int migrate_task_to(struct task_struct *p, int cpu);
+extern int migrate_swap(struct task_struct *, struct task_struct *);
+#endif /* CONFIG_NUMA_BALANCING */
+
  #ifdef CONFIG_SMP
  
  #define rcu_dereference_check_sched_domain(p) \
@@ -593,9 +607,22 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
         return hsd;
  }
  
+static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
+{
+       struct sched_domain *sd;
+
+       for_each_domain(cpu, sd) {
+               if (sd->flags & flag)
+                       break;
+       }
+
+       return sd;
+}
+
  DECLARE_PER_CPU(struct sched_domain *, sd_llc);
  DECLARE_PER_CPU(int, sd_llc_size);
  DECLARE_PER_CPU(int, sd_llc_id);
+DECLARE_PER_CPU(struct sched_domain *, sd_numa);
  
  struct sched_group_power {
         atomic_t ref;
@@ -605,6 +632,7 @@ struct sched_group_power {
          */
         unsigned int power, power_orig;
         unsigned long next_update;
+       int imbalance; /* XXX unrelated to power but shared group state */
         /*
          * Number of busy cpus in this group.
          */
@@ -719,6 +747,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
          */
         smp_wmb();
         task_thread_info(p)->cpu = cpu;
+       p->wake_cpu = cpu;
  #endif
  }
  
@@ -974,7 +1003,7 @@ struct sched_class {
         void (*put_prev_task) (struct rq *rq, struct task_struct *p);
  
  #ifdef CONFIG_SMP
-       int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
+       int  (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
         void (*migrate_task_rq)(struct task_struct *p, int next_cpu);
  
         void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
@@ -1220,6 +1249,24 @@ static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
         lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
  }
  
+static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
+{
+       if (l1 > l2)
+               swap(l1, l2);
+
+       spin_lock(l1);
+       spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
+}
+
+static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
+{
+       if (l1 > l2)
+               swap(l1, l2);
+
+       raw_spin_lock(l1);
+       raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
+}
+
  /*
   * double_rq_lock - safely lock two runqueues
   *