]> Pileus Git - ~andy/linux/blobdiff - kernel/sched.c
Merge branch 'for-linus' of git://oss.sgi.com:8090/xfs/xfs-2.6
[~andy/linux] / kernel / sched.c
index 5edc549edae8cbb917b9556cea97cfb5e4d745de..f06950c8a6ce17b95624bccc63ecfb399d2eb40c 100644 (file)
@@ -668,6 +668,8 @@ const_debug unsigned int sysctl_sched_nr_migrate = 32;
  */
 unsigned int sysctl_sched_rt_period = 1000000;
 
+static __read_mostly int scheduler_running;
+
 /*
  * part of the period that we allow rt tasks to run in us.
  * default: 0.95s
@@ -689,14 +691,16 @@ unsigned long long cpu_clock(int cpu)
        unsigned long flags;
        struct rq *rq;
 
-       local_irq_save(flags);
-       rq = cpu_rq(cpu);
        /*
         * Only call sched_clock() if the scheduler has already been
         * initialized (some code might call cpu_clock() very early):
         */
-       if (rq->idle)
-               update_rq_clock(rq);
+       if (unlikely(!scheduler_running))
+               return 0;
+
+       local_irq_save(flags);
+       rq = cpu_rq(cpu);
+       update_rq_clock(rq);
        now = rq->clock;
        local_irq_restore(flags);
 
@@ -1831,6 +1835,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)
        long old_state;
        struct rq *rq;
 
+       smp_wmb();
        rq = task_rq_lock(p, &flags);
        old_state = p->state;
        if (!(old_state & state))
@@ -3766,7 +3771,7 @@ void scheduler_tick(void)
 
 #if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
 
-void add_preempt_count(int val)
+void __kprobes add_preempt_count(int val)
 {
        /*
         * Underflow?
@@ -3782,7 +3787,7 @@ void add_preempt_count(int val)
 }
 EXPORT_SYMBOL(add_preempt_count);
 
-void sub_preempt_count(int val)
+void __kprobes sub_preempt_count(int val)
 {
        /*
         * Underflow?
@@ -3884,7 +3889,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev)
 asmlinkage void __sched schedule(void)
 {
        struct task_struct *prev, *next;
-       long *switch_count;
+       unsigned long *switch_count;
        struct rq *rq;
        int cpu;
 
@@ -4584,6 +4589,15 @@ recheck:
                        return -EPERM;
        }
 
+#ifdef CONFIG_RT_GROUP_SCHED
+       /*
+        * Do not allow realtime tasks into groups that have no runtime
+        * assigned.
+        */
+       if (rt_policy(policy) && task_group(p)->rt_runtime == 0)
+               return -EPERM;
+#endif
+
        retval = security_task_setscheduler(p, policy, param);
        if (retval)
                return retval;
@@ -7274,6 +7288,8 @@ void __init sched_init(void)
         * During early bootup we pretend to be a normal task:
         */
        current->sched_class = &fair_sched_class;
+
+       scheduler_running = 1;
 }
 
 #ifdef CONFIG_DEBUG_SPINLOCK_SLEEP
@@ -7559,57 +7575,29 @@ static int load_balance_monitor(void *unused)
 }
 #endif /* CONFIG_SMP */
 
-static void free_sched_group(struct task_group *tg)
+#ifdef CONFIG_FAIR_GROUP_SCHED
+static void free_fair_sched_group(struct task_group *tg)
 {
        int i;
 
        for_each_possible_cpu(i) {
-#ifdef CONFIG_FAIR_GROUP_SCHED
                if (tg->cfs_rq)
                        kfree(tg->cfs_rq[i]);
                if (tg->se)
                        kfree(tg->se[i]);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-               if (tg->rt_rq)
-                       kfree(tg->rt_rq[i]);
-               if (tg->rt_se)
-                       kfree(tg->rt_se[i]);
-#endif
        }
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
        kfree(tg->cfs_rq);
        kfree(tg->se);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-       kfree(tg->rt_rq);
-       kfree(tg->rt_se);
-#endif
-       kfree(tg);
 }
 
-/* allocate runqueue etc for a new task group */
-struct task_group *sched_create_group(void)
+static int alloc_fair_sched_group(struct task_group *tg)
 {
-       struct task_group *tg;
-#ifdef CONFIG_FAIR_GROUP_SCHED
        struct cfs_rq *cfs_rq;
        struct sched_entity *se;
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-       struct rt_rq *rt_rq;
-       struct sched_rt_entity *rt_se;
-#endif
        struct rq *rq;
-       unsigned long flags;
        int i;
 
-       tg = kzalloc(sizeof(*tg), GFP_KERNEL);
-       if (!tg)
-               return ERR_PTR(-ENOMEM);
-
-#ifdef CONFIG_FAIR_GROUP_SCHED
        tg->cfs_rq = kzalloc(sizeof(cfs_rq) * NR_CPUS, GFP_KERNEL);
        if (!tg->cfs_rq)
                goto err;
@@ -7618,23 +7606,10 @@ struct task_group *sched_create_group(void)
                goto err;
 
        tg->shares = NICE_0_LOAD;
-#endif
-
-#ifdef CONFIG_RT_GROUP_SCHED
-       tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
-       if (!tg->rt_rq)
-               goto err;
-       tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
-       if (!tg->rt_se)
-               goto err;
-
-       tg->rt_runtime = 0;
-#endif
 
        for_each_possible_cpu(i) {
                rq = cpu_rq(i);
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
                cfs_rq = kmalloc_node(sizeof(struct cfs_rq),
                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
                if (!cfs_rq)
@@ -7646,9 +7621,78 @@ struct task_group *sched_create_group(void)
                        goto err;
 
                init_tg_cfs_entry(rq, tg, cfs_rq, se, i, 0);
+       }
+
+       return 1;
+
+ err:
+       return 0;
+}
+
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
+{
+       list_add_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list,
+                       &cpu_rq(cpu)->leaf_cfs_rq_list);
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+       list_del_rcu(&tg->cfs_rq[cpu]->leaf_cfs_rq_list);
+}
+#else
+static inline void free_fair_sched_group(struct task_group *tg)
+{
+}
+
+static inline int alloc_fair_sched_group(struct task_group *tg)
+{
+       return 1;
+}
+
+static inline void register_fair_sched_group(struct task_group *tg, int cpu)
+{
+}
+
+static inline void unregister_fair_sched_group(struct task_group *tg, int cpu)
+{
+}
 #endif
 
 #ifdef CONFIG_RT_GROUP_SCHED
+static void free_rt_sched_group(struct task_group *tg)
+{
+       int i;
+
+       for_each_possible_cpu(i) {
+               if (tg->rt_rq)
+                       kfree(tg->rt_rq[i]);
+               if (tg->rt_se)
+                       kfree(tg->rt_se[i]);
+       }
+
+       kfree(tg->rt_rq);
+       kfree(tg->rt_se);
+}
+
+static int alloc_rt_sched_group(struct task_group *tg)
+{
+       struct rt_rq *rt_rq;
+       struct sched_rt_entity *rt_se;
+       struct rq *rq;
+       int i;
+
+       tg->rt_rq = kzalloc(sizeof(rt_rq) * NR_CPUS, GFP_KERNEL);
+       if (!tg->rt_rq)
+               goto err;
+       tg->rt_se = kzalloc(sizeof(rt_se) * NR_CPUS, GFP_KERNEL);
+       if (!tg->rt_se)
+               goto err;
+
+       tg->rt_runtime = 0;
+
+       for_each_possible_cpu(i) {
+               rq = cpu_rq(i);
+
                rt_rq = kmalloc_node(sizeof(struct rt_rq),
                                GFP_KERNEL|__GFP_ZERO, cpu_to_node(i));
                if (!rt_rq)
@@ -7660,20 +7704,71 @@ struct task_group *sched_create_group(void)
                        goto err;
 
                init_tg_rt_entry(rq, tg, rt_rq, rt_se, i, 0);
-#endif
        }
 
+       return 1;
+
+ err:
+       return 0;
+}
+
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
+{
+       list_add_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list,
+                       &cpu_rq(cpu)->leaf_rt_rq_list);
+}
+
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
+{
+       list_del_rcu(&tg->rt_rq[cpu]->leaf_rt_rq_list);
+}
+#else
+static inline void free_rt_sched_group(struct task_group *tg)
+{
+}
+
+static inline int alloc_rt_sched_group(struct task_group *tg)
+{
+       return 1;
+}
+
+static inline void register_rt_sched_group(struct task_group *tg, int cpu)
+{
+}
+
+static inline void unregister_rt_sched_group(struct task_group *tg, int cpu)
+{
+}
+#endif
+
+static void free_sched_group(struct task_group *tg)
+{
+       free_fair_sched_group(tg);
+       free_rt_sched_group(tg);
+       kfree(tg);
+}
+
+/* allocate runqueue etc for a new task group */
+struct task_group *sched_create_group(void)
+{
+       struct task_group *tg;
+       unsigned long flags;
+       int i;
+
+       tg = kzalloc(sizeof(*tg), GFP_KERNEL);
+       if (!tg)
+               return ERR_PTR(-ENOMEM);
+
+       if (!alloc_fair_sched_group(tg))
+               goto err;
+
+       if (!alloc_rt_sched_group(tg))
+               goto err;
+
        spin_lock_irqsave(&task_group_lock, flags);
        for_each_possible_cpu(i) {
-               rq = cpu_rq(i);
-#ifdef CONFIG_FAIR_GROUP_SCHED
-               cfs_rq = tg->cfs_rq[i];
-               list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-               rt_rq = tg->rt_rq[i];
-               list_add_rcu(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
-#endif
+               register_fair_sched_group(tg, i);
+               register_rt_sched_group(tg, i);
        }
        list_add_rcu(&tg->list, &task_groups);
        spin_unlock_irqrestore(&task_group_lock, flags);
@@ -7700,12 +7795,8 @@ void sched_destroy_group(struct task_group *tg)
 
        spin_lock_irqsave(&task_group_lock, flags);
        for_each_possible_cpu(i) {
-#ifdef CONFIG_FAIR_GROUP_SCHED
-               list_del_rcu(&tg->cfs_rq[i]->leaf_cfs_rq_list);
-#endif
-#ifdef CONFIG_RT_GROUP_SCHED
-               list_del_rcu(&tg->rt_rq[i]->leaf_rt_rq_list);
-#endif
+               unregister_fair_sched_group(tg, i);
+               unregister_rt_sched_group(tg, i);
        }
        list_del_rcu(&tg->list);
        spin_unlock_irqrestore(&task_group_lock, flags);
@@ -7780,8 +7871,6 @@ static DEFINE_MUTEX(shares_mutex);
 int sched_group_set_shares(struct task_group *tg, unsigned long shares)
 {
        int i;
-       struct cfs_rq *cfs_rq;
-       struct rq *rq;
        unsigned long flags;
 
        mutex_lock(&shares_mutex);
@@ -7797,10 +7886,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
         * by taking it off the rq->leaf_cfs_rq_list on each cpu.
         */
        spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i) {
-               cfs_rq = tg->cfs_rq[i];
-               list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
-       }
+       for_each_possible_cpu(i)
+               unregister_fair_sched_group(tg, i);
        spin_unlock_irqrestore(&task_group_lock, flags);
 
        /* wait for any ongoing reference to this group to finish */
@@ -7822,11 +7909,8 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
         * each cpu's rq->leaf_cfs_rq_list.
         */
        spin_lock_irqsave(&task_group_lock, flags);
-       for_each_possible_cpu(i) {
-               rq = cpu_rq(i);
-               cfs_rq = tg->cfs_rq[i];
-               list_add_rcu(&cfs_rq->leaf_cfs_rq_list, &rq->leaf_cfs_rq_list);
-       }
+       for_each_possible_cpu(i)
+               register_fair_sched_group(tg, i);
        spin_unlock_irqrestore(&task_group_lock, flags);
 done:
        mutex_unlock(&shares_mutex);
@@ -7960,9 +8044,15 @@ static int
 cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp,
                      struct task_struct *tsk)
 {
+#ifdef CONFIG_RT_GROUP_SCHED
+       /* Don't accept realtime tasks when there is no way for them to run */
+       if (rt_task(tsk) && cgroup_tg(cgrp)->rt_runtime == 0)
+               return -EINVAL;
+#else
        /* We don't support RT-tasks being in separate groups */
        if (tsk->sched_class != &fair_sched_class)
                return -EINVAL;
+#endif
 
        return 0;
 }