powerpc/pseries: Use stop machine to update cpu maps

[~andy/linux] / arch / powerpc / mm / numa.c
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c

index bba87ca2b4d78a1a8fbedd70e6122d583d365d79..e8d1aeb6348cde24bcf93b65e6a6e54874ad0d76 100644 (file)
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -22,6 +22,7 @@
  #include <linux/pfn.h>
  #include <linux/cpuset.h>
  #include <linux/node.h>
+#include <linux/stop_machine.h>
  #include <asm/sparsemem.h>
  #include <asm/prom.h>
  #include <asm/smp.h>
@@ -79,7 +80,7 @@ static void __init setup_node_to_cpumask_map(void)
         dbg("Node to cpumask map for %d nodes\n", nr_node_ids);
  }
  
-static int __cpuinit fake_numa_create_new_node(unsigned long end_pfn,
+static int __init fake_numa_create_new_node(unsigned long end_pfn,
                                                 unsigned int *nid)
  {
         unsigned long long mem;
@@ -201,7 +202,7 @@ int __node_distance(int a, int b)
         int distance = LOCAL_DISTANCE;
  
         if (!form1_affinity)
-               return distance;
+               return ((a == b) ? LOCAL_DISTANCE : REMOTE_DISTANCE);
  
         for (i = 0; i < distance_ref_points_depth; i++) {
                 if (distance_lookup_table[a][i] == distance_lookup_table[b][i])
@@ -291,9 +292,7 @@ EXPORT_SYMBOL_GPL(of_node_to_nid);
  static int __init find_min_common_depth(void)
  {
         int depth;
-       struct device_node *chosen;
         struct device_node *root;
-       const char *vec5;
  
         if (firmware_has_feature(FW_FEATURE_OPAL))
                 root = of_find_node_by_path("/ibm,opal");
@@ -325,24 +324,10 @@ static int __init find_min_common_depth(void)
  
         distance_ref_points_depth /= sizeof(int);
  
-#define VEC5_AFFINITY_BYTE     5
-#define VEC5_AFFINITY          0x80
-
-       if (firmware_has_feature(FW_FEATURE_OPAL))
+       if (firmware_has_feature(FW_FEATURE_OPAL) ||
+           firmware_has_feature(FW_FEATURE_TYPE1_AFFINITY)) {
+               dbg("Using form 1 affinity\n");
                 form1_affinity = 1;
-       else {
-               chosen = of_find_node_by_path("/chosen");
-               if (chosen) {
-                       vec5 = of_get_property(chosen,
-                                              "ibm,architecture-vec-5", NULL);
-                       if (vec5 && (vec5[VEC5_AFFINITY_BYTE] &
-                                                       VEC5_AFFINITY)) {
-                               dbg("Using form 1 affinity\n");
-                               form1_affinity = 1;
-                       }
-
-                       of_node_put(chosen);
-               }
         }
  
         if (form1_affinity) {
@@ -1270,10 +1255,18 @@ u64 memory_hotplug_max(void)
  
  /* Virtual Processor Home Node (VPHN) support */
  #ifdef CONFIG_PPC_SPLPAR
+struct topology_update_data {
+       struct topology_update_data *next;
+       unsigned int cpu;
+       int old_nid;
+       int new_nid;
+};
+
  static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
  static cpumask_t cpu_associativity_changes_mask;
  static int vphn_enabled;
-static void set_topology_timer(void);
+static int prrn_enabled;
+static void reset_topology_timer(void);
  
  /*
   * Store the current values of the associativity change counters in the
@@ -1309,11 +1302,9 @@ static void setup_cpu_associativity_change_counters(void)
   */
  static int update_cpu_associativity_changes_mask(void)
  {
-       int cpu, nr_cpus = 0;
+       int cpu;
         cpumask_t *changes = &cpu_associativity_changes_mask;
  
-       cpumask_clear(changes);
-
         for_each_possible_cpu(cpu) {
                 int i, changed = 0;
                 u8 *counts = vphn_cpu_change_counts[cpu];
@@ -1327,11 +1318,10 @@ static int update_cpu_associativity_changes_mask(void)
                 }
                 if (changed) {
                         cpumask_set_cpu(cpu, changes);
-                       nr_cpus++;
                 }
         }
  
-       return nr_cpus;
+       return cpumask_weight(changes);
  }
  
  /*
@@ -1422,41 +1412,80 @@ static long vphn_get_associativity(unsigned long cpu,
         return rc;
  }
  
+/*
+ * Update the CPU maps and sysfs entries for a single CPU when its NUMA
+ * characteristics change. This function doesn't perform any locking and is
+ * only safe to call from stop_machine().
+ */
+static int update_cpu_topology(void *data)
+{
+       struct topology_update_data *update;
+       unsigned long cpu;
+
+       if (!data)
+               return -EINVAL;
+
+       cpu = get_cpu();
+
+       for (update = data; update; update = update->next) {
+               if (cpu != update->cpu)
+                       continue;
+
+               unregister_cpu_under_node(update->cpu, update->old_nid);
+               unmap_cpu_from_node(update->cpu);
+               map_cpu_to_node(update->cpu, update->new_nid);
+               register_cpu_under_node(update->cpu, update->new_nid);
+       }
+
+       return 0;
+}
+
  /*
   * Update the node maps and sysfs entries for each cpu whose home node
   * has changed. Returns 1 when the topology has changed, and 0 otherwise.
   */
  int arch_update_cpu_topology(void)
  {
-       int cpu, nid, old_nid, changed = 0;
+       unsigned int cpu, changed = 0;
+       struct topology_update_data *updates, *ud;
         unsigned int associativity[VPHN_ASSOC_BUFSIZE] = {0};
         struct device *dev;
+       int weight, i = 0;
+
+       weight = cpumask_weight(&cpu_associativity_changes_mask);
+       if (!weight)
+               return 0;
  
-       for_each_cpu(cpu,&cpu_associativity_changes_mask) {
+       updates = kzalloc(weight * (sizeof(*updates)), GFP_KERNEL);
+       if (!updates)
+               return 0;
+
+       for_each_cpu(cpu, &cpu_associativity_changes_mask) {
+               ud = &updates[i++];
+               ud->cpu = cpu;
                 vphn_get_associativity(cpu, associativity);
-               nid = associativity_to_nid(associativity);
+               ud->new_nid = associativity_to_nid(associativity);
  
-               if (nid < 0 || !node_online(nid))
-                       nid = first_online_node;
+               if (ud->new_nid < 0 || !node_online(ud->new_nid))
+                       ud->new_nid = first_online_node;
  
-               old_nid = numa_cpu_lookup_table[cpu];
+               ud->old_nid = numa_cpu_lookup_table[cpu];
  
-               /* Disable hotplug while we update the cpu
-                * masks and sysfs.
-                */
-               get_online_cpus();
-               unregister_cpu_under_node(cpu, old_nid);
-               unmap_cpu_from_node(cpu);
-               map_cpu_to_node(cpu, nid);
-               register_cpu_under_node(cpu, nid);
-               put_online_cpus();
-
-               dev = get_cpu_device(cpu);
+               if (i < weight)
+                       ud->next = &updates[i];
+       }
+
+       stop_machine(update_cpu_topology, &updates[0], cpu_online_mask);
+
+       for (ud = &updates[0]; ud; ud = ud->next) {
+               dev = get_cpu_device(ud->cpu);
                 if (dev)
                         kobject_uevent(&dev->kobj, KOBJ_CHANGE);
+               cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
                 changed = 1;
         }
  
+       kfree(updates);
         return changed;
  }
  
@@ -1473,37 +1502,80 @@ void topology_schedule_update(void)
  
  static void topology_timer_fn(unsigned long ignored)
  {
-       if (!vphn_enabled)
-               return;
-       if (update_cpu_associativity_changes_mask() > 0)
+       if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
                 topology_schedule_update();
-       set_topology_timer();
+       else if (vphn_enabled) {
+               if (update_cpu_associativity_changes_mask() > 0)
+                       topology_schedule_update();
+               reset_topology_timer();
+       }
  }
  static struct timer_list topology_timer =
         TIMER_INITIALIZER(topology_timer_fn, 0, 0);
  
-static void set_topology_timer(void)
+static void reset_topology_timer(void)
  {
         topology_timer.data = 0;
         topology_timer.expires = jiffies + 60 * HZ;
-       add_timer(&topology_timer);
+       mod_timer(&topology_timer, topology_timer.expires);
+}
+
+static void stage_topology_update(int core_id)
+{
+       cpumask_or(&cpu_associativity_changes_mask,
+               &cpu_associativity_changes_mask, cpu_sibling_mask(core_id));
+       reset_topology_timer();
  }
  
+static int dt_update_callback(struct notifier_block *nb,
+                               unsigned long action, void *data)
+{
+       struct of_prop_reconfig *update;
+       int rc = NOTIFY_DONE;
+
+       switch (action) {
+       case OF_RECONFIG_UPDATE_PROPERTY:
+               update = (struct of_prop_reconfig *)data;
+               if (!of_prop_cmp(update->dn->type, "cpu") &&
+                   !of_prop_cmp(update->prop->name, "ibm,associativity")) {
+                       u32 core_id;
+                       of_property_read_u32(update->dn, "reg", &core_id);
+                       stage_topology_update(core_id);
+                       rc = NOTIFY_OK;
+               }
+               break;
+       }
+
+       return rc;
+}
+
+static struct notifier_block dt_update_nb = {
+       .notifier_call = dt_update_callback,
+};
+
  /*
- * Start polling for VPHN associativity changes.
+ * Start polling for associativity changes.
   */
  int start_topology_update(void)
  {
         int rc = 0;
  
-       /* Disabled until races with load balancing are fixed */
-       if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
-           get_lppaca()->shared_proc) {
-               vphn_enabled = 1;
-               setup_cpu_associativity_change_counters();
-               init_timer_deferrable(&topology_timer);
-               set_topology_timer();
-               rc = 1;
+       if (firmware_has_feature(FW_FEATURE_PRRN)) {
+               if (!prrn_enabled) {
+                       prrn_enabled = 1;
+                       vphn_enabled = 0;
+                       rc = of_reconfig_notifier_register(&dt_update_nb);
+               }
+       } else if (0 && firmware_has_feature(FW_FEATURE_VPHN) &&
+                  get_lppaca()->shared_proc) {
+               /* Disabled until races with load balancing are fixed */
+               if (!vphn_enabled) {
+                       prrn_enabled = 0;
+                       vphn_enabled = 1;
+                       setup_cpu_associativity_change_counters();
+                       init_timer_deferrable(&topology_timer);
+                       reset_topology_timer();
+               }
         }
  
         return rc;
@@ -1515,7 +1587,16 @@ __initcall(start_topology_update);
   */
  int stop_topology_update(void)
  {
-       vphn_enabled = 0;
-       return del_timer_sync(&topology_timer);
+       int rc = 0;
+
+       if (prrn_enabled) {
+               prrn_enabled = 0;
+               rc = of_reconfig_notifier_unregister(&dt_update_nb);
+       } else if (vphn_enabled) {
+               vphn_enabled = 0;
+               rc = del_timer_sync(&topology_timer);
+       }
+
+       return rc;
  }
  #endif /* CONFIG_PPC_SPLPAR */