Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6

[~andy/linux] / kernel / sched.c
diff --git a/kernel/sched.c b/kernel/sched.c

index 5edc549edae8cbb917b9556cea97cfb5e4d745de..f06950c8a6ce17b95624bccc63ecfb399d2eb40c 100644 (file)
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -668,6 +668,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
   */
  unsigned int sysctl_sched_rt_period = 1000000;
  
+static __read_mostly int scheduler_running;
+
  /*
   * part of the period that we allow rt tasks to run in us.
   * default: 0.95s
@@ -689,14 +691,16 @@ unsigned long long cpu_clock(int cpu)
         unsigned long flags;
         struct rq *rq;
  
-       local_irq_save(flags);
-       rq = cpu_rq(cpu);
         /*
          * Only call sched_clock() if the scheduler has already been
          * initialized (some code might call cpu_clock() very early):
          */
-       if (rq->idle)
-               update_rq_clock(rq);
+       if (unlikely(!scheduler_running))
+               return 0;
+
+       local_irq_save(flags);
+       rq = cpu_rq(cpu);
+       update_rq_clock(rq);
         now = rq->clock;
         local_irq_restore(flags);
  
@@ -1831,6 +1835,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
         long old_state;
         struct rq *rq;
  
+       smp_wmb();
         rq = task_rq_lock(p, &flags);
         old_state = p->state;
         if (!(old_state & state))
@@ -3766,7 +3771,7 @@ void scheduler_tick(void)
  
  #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
  
-void add_preempt_count(int val)
+void __kprobes add_preempt_count(int val)
  {
         /*
          * Underflow?
@@ -3782,7 +3787,7 @@ void add_preempt_count(int val)
  }
  EXPORT_SYMBOL(add_preempt_count);
  
-void sub_preempt_count(int val)
+void __kprobes sub_preempt_count(int val)
  {
         /*
          * Underflow?
@@ -3884,7 +3889,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
  asmlinkage void __sched schedule(void)
  {
         struct task_struct *prev, *next;
-       long *switch_count;
+       unsigned long *switch_count;
         struct rq *rq;
         int cpu;
  
@@ -4584,6 +4589,15 @@ recheck:
                         return -EPERM;
         }
  
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Do not allow realtime tasks into groups that have no runtime
+        * assigned.
+        */
+       if (rt_policy(policy) && task_group(p)->rt_runtime == 0)
+               return -EPERM;
+#endif
+
         retval = security_task_setscheduler(p, policy, param);
         if (retval)
                 return retval;
@@ -7274,6 +7288,8 @@ void __init sched_init(void)
          * During early bootup we pretend to be a normal task:
          */
         current->sched_class = &fair_sched_class;
+
+       scheduler_running = 1;
  }
  
  #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -7559,57 +7575,29 @@ static int load_balance_monitor(void *unused)
  }
  #endif /* CONFIG_SMP */
  
-static void free_sched_group(struct task_group *tg)
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void free_fair_sched_group(struct task_group *tg)
  {
         int i;
  
         for_each_possible_cpu(i) {
-#ifdef CONFIG_FAIR_GROUP_SCHED
                 if (tg->cfs_rq)
                         kfree(tg->cfs_rq[i]);
                 if (tg->se)
                         kfree(tg->se[i]);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-               if (tg->rt_rq)
-                       kfree(tg->rt_rq[i]);
-               if (tg->rt_se)
-                       kfree(tg->rt_se[i]);
-#endif
         }
  
-#ifdef CONFIG_FAIR_GROUP_SCHED
         kfree(tg->cfs_rq);
         kfree(tg->se);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-       kfree(tg->rt_rq);
-       kfree(tg->rt_se);
-#endif
-       kfree(tg);
  }
  
-/* allocate runqueue etc for a new task group */
-struct task_group *sched_create_group(void)
+static int alloc_fair_sched_group(struct task_group *tg)
  {
-       struct task_group *tg;
-#ifdef CONFIG_FAIR_GROUP_SCHED
         struct cfs_rq *cfs_rq;
         struct sched_entity *se;
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-       struct rt_rq *rt_rq;
-       struct sched_rt_entity *rt_se;
-#endif
         struct rq *rq;
-       unsigned long flags;
         int i;
  
-       tg = kzalloc(sizeof(*tg), GFP_KERNEL);
-       if (!tg)
-               return ERR_PTR(-ENOMEM);
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
         tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
         if (!tg->cfs_rq)
                 goto err;
@@ -7618,23 +7606,10 @@ struct task_group *sched_create_group(void)
                 goto err;
  
         tg->shares = NICE_0_LOAD;
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
-       tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
-       if (!tg->rt_rq)
-               goto err;
-       tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
-       if (!tg->rt_se)
-               goto err;
-
-       tg->rt_runtime = 0;
-#endif
  
         for_each_possible_cpu(i) {
                 rq = cpu_rq(i);
  
-#ifdef CONFIG_FAIR_GROUP_SCHED
                 cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
                                 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
                 if (!cfs_rq)
@@ -7646,9 +7621,78 @@ struct task_group *sched_create_group(void)
                         goto err;
  
                 init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
+       }
+
+       return 1;
+
+ err:
+       return 0;
+}
+
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
+{
+       list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
+                       &cpu_rq(cpu)->leaf_cfs_rq_list);
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+       list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
+}
+#else
+static inline void free_fair_sched_group(struct task_group *tg)
+{
+}
+
+static inline int alloc_fair_sched_group(struct task_group *tg)
+{
+       return 1;
+}
+
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
+{
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+}
  #endif
  
  #ifdef CONFIG_RT_GROUP_SCHED
+static void free_rt_sched_group(struct task_group *tg)
+{
+       int i;
+
+       for_each_possible_cpu(i) {
+               if (tg->rt_rq)
+                       kfree(tg->rt_rq[i]);
+               if (tg->rt_se)
+                       kfree(tg->rt_se[i]);
+       }
+
+       kfree(tg->rt_rq);
+       kfree(tg->rt_se);
+}
+
+static int alloc_rt_sched_group(struct task_group *tg)
+{
+       struct rt_rq *rt_rq;
+       struct sched_rt_entity *rt_se;
+       struct rq *rq;
+       int i;
+
+       tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
+       if (!tg->rt_rq)
+               goto err;
+       tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
+       if (!tg->rt_se)
+               goto err;
+
+       tg->rt_runtime = 0;
+
+       for_each_possible_cpu(i) {
+               rq = cpu_rq(i);
+
                 rt_rq = kmalloc_node(sizeof(struct rt_rq),
                                 GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
                 if (!rt_rq)
@@ -7660,20 +7704,71 @@ struct task_group *sched_create_group(void)
                         goto err;
  
                 init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
-#endif
         }
  
+       return 1;
+
+ err:
+       return 0;
+}
+
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
+{
+       list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
+                       &cpu_rq(cpu)->leaf_rt_rq_list);
+}
+
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
+{
+       list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
+}
+#else
+static inline void free_rt_sched_group(struct task_group *tg)
+{
+}
+
+static inline int alloc_rt_sched_group(struct task_group *tg)
+{
+       return 1;
+}
+
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
+{
+}
+
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
+{
+}
+#endif
+
+static void free_sched_group(struct task_group *tg)
+{
+       free_fair_sched_group(tg);
+       free_rt_sched_group(tg);
+       kfree(tg);
+}
+
+/* allocate runqueue etc for a new task group */
+struct task_group *sched_create_group(void)
+{
+       struct task_group *tg;
+       unsigned long flags;
+       int i;
+
+       tg = kzalloc(sizeof(*tg), GFP_KERNEL);
+       if (!tg)
+               return ERR_PTR(-ENOMEM);
+
+       if (!alloc_fair_sched_group(tg))
+               goto err;
+
+       if (!alloc_rt_sched_group(tg))
+               goto err;
+
         spin_lock_irqsave(&task_group_lock, flags);
         for_each_possible_cpu(i) {
-               rq = cpu_rq(i);
-#ifdef CONFIG_FAIR_GROUP_SCHED
-               cfs_rq = tg->cfs_rq[i];
-               list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-               rt_rq = tg->rt_rq[i];
-               list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
-#endif
+               register_fair_sched_group(tg, i);
+               register_rt_sched_group(tg, i);
         }
         list_add_rcu(&tg->list, &task_groups);
         spin_unlock_irqrestore(&task_group_lock, flags);
@@ -7700,12 +7795,8 @@ void sched_destroy_group(struct task_group *tg)
  
         spin_lock_irqsave(&task_group_lock, flags);
         for_each_possible_cpu(i) {
-#ifdef CONFIG_FAIR_GROUP_SCHED
-               list_del_rcu(&tg->cfs_rq[i]->leaf_cfs_rq_list);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-               list_del_rcu(&tg->rt_rq[i]->leaf_rt_rq_list);
-#endif
+               unregister_fair_sched_group(tg, i);
+               unregister_rt_sched_group(tg, i);
         }
         list_del_rcu(&tg->list);
         spin_unlock_irqrestore(&task_group_lock, flags);
@@ -7780,8 +7871,6 @@ static DEFINE_MUTEX(shares_mutex);
  int sched_group_set_shares(struct task_group *tg, unsigned long shares)
  {
         int i;
-       struct cfs_rq *cfs_rq;
-       struct rq *rq;
         unsigned long flags;
  
         mutex_lock(&shares_mutex);
@@ -7797,10 +7886,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
          * by taking it off the rq->leaf_cfs_rq_list on each cpu.
          */
         spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i) {
-               cfs_rq = tg->cfs_rq[i];
-               list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
-       }
+       for_each_possible_cpu(i)
+               unregister_fair_sched_group(tg, i);
         spin_unlock_irqrestore(&task_group_lock, flags);
  
         /* wait for any ongoing reference to this group to finish */
@@ -7822,11 +7909,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
          * each cpu's rq->leaf_cfs_rq_list.
          */
         spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i) {
-               rq = cpu_rq(i);
-               cfs_rq = tg->cfs_rq[i];
-               list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
-       }
+       for_each_possible_cpu(i)
+               register_fair_sched_group(tg, i);
         spin_unlock_irqrestore(&task_group_lock, flags);
  done:
         mutex_unlock(&shares_mutex);
@@ -7960,9 +8044,15 @@ static int
  cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
                       struct task_struct *tsk)
  {
+#ifdef CONFIG_RT_GROUP_SCHED
+       /* Don't accept realtime tasks when there is no way for them to run */
+       if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0)
+               return -EINVAL;
+#else
         /* We don't support RT-tasks being in separate groups */
         if (tsk->sched_class != &fair_sched_class)
                 return -EINVAL;
+#endif
  
         return 0;
  }