sched/balancing: Periodically decay max cost of idle balance

author Jason Low <jason.low2@hp.com>

Fri, 13 Sep 2013 18:26:53 +0000 (11:26 -0700)

committer Ingo Molnar <mingo@kernel.org>

Fri, 20 Sep 2013 10:03:46 +0000 (12:03 +0200)
author Jason Low <jason.low2@hp.com>
Fri, 13 Sep 2013 18:26:53 +0000 (11:26 -0700)
committer Ingo Molnar <mingo@kernel.org>
Fri, 20 Sep 2013 10:03:46 +0000 (12:03 +0200)
diff --git a/arch/metag/include/asm/topology.h b/arch/metag/include/asm/topology.h

index db192924f4b0cadb09cbecdf77f4d782aab526a0..8e9c0b3b9691f3746f0ce7efec4acffd0ebdfe68 100644 (file)
--- a/arch/metag/include/asm/topology.h
+++ b/arch/metag/include/asm/topology.h
@@ -27,6 +27,7 @@
         .balance_interval       = 1,                    \
         .nr_balance_failed      = 0,                    \
         .max_newidle_lb_cost    = 0,                    \
+       .next_decay_max_lb_cost = jiffies,              \
  }
  
  #define cpu_to_node(cpu)       ((void)(cpu), 0)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index be078ff9157f22de1393ad5d05affd0ab086f0cd..b5344de1658ba17deda686faf7b1317e09445c69 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -810,7 +810,10 @@ struct sched_domain {
         unsigned int nr_balance_failed; /* initialise to 0 */
  
         u64 last_update;
+
+       /* idle_balance() stats */
         u64 max_newidle_lb_cost;
+       unsigned long next_decay_max_lb_cost;
  
  #ifdef CONFIG_SCHEDSTATS
         /* load_balance() stats */
diff --git a/include/linux/topology.h b/include/linux/topology.h

index e2a2c3da29292127b634dcb0de89388da08d4094..12ae6ce997d6be9f658fb2956a957966267028b5 100644 (file)
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -107,6 +107,7 @@ int arch_update_cpu_topology(void);
         .balance_interval       = 1,                                    \
         .smt_gain               = 1178, /* 15% */                       \
         .max_newidle_lb_cost    = 0,                                    \
+       .next_decay_max_lb_cost = jiffies,                              \
  }
  #endif
  #endif /* CONFIG_SCHED_SMT */
@@ -137,6 +138,7 @@ int arch_update_cpu_topology(void);
         .last_balance           = jiffies,                              \
         .balance_interval       = 1,                                    \
         .max_newidle_lb_cost    = 0,                                    \
+       .next_decay_max_lb_cost = jiffies,                              \
  }
  #endif
  #endif /* CONFIG_SCHED_MC */
@@ -169,6 +171,7 @@ int arch_update_cpu_topology(void);
         .last_balance           = jiffies,                              \
         .balance_interval       = 1,                                    \
         .max_newidle_lb_cost    = 0,                                    \
+       .next_decay_max_lb_cost = jiffies,                              \
  }
  #endif
  
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index ffc99d8f0a95119f1d6ac8e910b32bc25adf058d..2b89cd244b0d75e2e08b2b3d0450d2c8d1f5319a 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5681,15 +5681,39 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle)
         /* Earliest time when we have to do rebalance again */
         unsigned long next_balance = jiffies + 60*HZ;
         int update_next_balance = 0;
-       int need_serialize;
+       int need_serialize, need_decay = 0;
+       u64 max_cost = 0;
  
         update_blocked_averages(cpu);
  
         rcu_read_lock();
         for_each_domain(cpu, sd) {
+               /*
+                * Decay the newidle max times here because this is a regular
+                * visit to all the domains. Decay ~1% per second.
+                */
+               if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
+                       sd->max_newidle_lb_cost =
+                               (sd->max_newidle_lb_cost * 253) / 256;
+                       sd->next_decay_max_lb_cost = jiffies + HZ;
+                       need_decay = 1;
+               }
+               max_cost += sd->max_newidle_lb_cost;
+
                 if (!(sd->flags & SD_LOAD_BALANCE))
                         continue;
  
+               /*
+                * Stop the load balance at this level. There is another
+                * CPU in our sched group which is doing load balancing more
+                * actively.
+                */
+               if (!continue_balancing) {
+                       if (need_decay)
+                               continue;
+                       break;
+               }
+
                 interval = sd->balance_interval;
                 if (idle != CPU_IDLE)
                         interval *= sd->busy_factor;
@@ -5723,14 +5747,14 @@ out:
                         next_balance = sd->last_balance + interval;
                         update_next_balance = 1;
                 }
-
+       }
+       if (need_decay) {
                 /*
-                * Stop the load balance at this level. There is another
-                * CPU in our sched group which is doing load balancing more
-                * actively.
+                * Ensure the rq-wide value also decays but keep it at a
+                * reasonable floor to avoid funnies with rq->avg_idle.
                  */
-               if (!continue_balancing)
-                       break;
+               rq->max_idle_balance_cost =
+                       max((u64)sysctl_sched_migration_cost, max_cost);
         }
         rcu_read_unlock();
author	Jason Low <jason.low2@hp.com>
	Fri, 13 Sep 2013 18:26:53 +0000 (11:26 -0700)
committer	Ingo Molnar <mingo@kernel.org>
	Fri, 20 Sep 2013 10:03:46 +0000 (12:03 +0200)
arch/metag/include/asm/topology.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
include/linux/topology.h		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history