mm: numa: Do not group on RO pages

author Peter Zijlstra <peterz@infradead.org>

Mon, 7 Oct 2013 10:29:24 +0000 (11:29 +0100)

committer Ingo Molnar <mingo@kernel.org>

Wed, 9 Oct 2013 12:47:53 +0000 (14:47 +0200)
author Peter Zijlstra <peterz@infradead.org>
Mon, 7 Oct 2013 10:29:24 +0000 (11:29 +0100)
committer Ingo Molnar <mingo@kernel.org>
Wed, 9 Oct 2013 12:47:53 +0000 (14:47 +0200)
diff --git a/include/linux/sched.h b/include/linux/sched.h

index b0b343b1ba64e8ddc4eb2eb16e8219df34a404f5..ff543851a18a888e2d44ae5b784f8fea91761767 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1450,13 +1450,16 @@ struct task_struct {
  /* Future-safe accessor for struct task_struct's cpus_allowed. */
  #define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
  
+#define TNF_MIGRATED   0x01
+#define TNF_NO_GROUP   0x02
+
  #ifdef CONFIG_NUMA_BALANCING
-extern void task_numa_fault(int last_node, int node, int pages, bool migrated);
+extern void task_numa_fault(int last_node, int node, int pages, int flags);
  extern pid_t task_numa_group_id(struct task_struct *p);
  extern void set_numabalancing_state(bool enabled);
  #else
  static inline void task_numa_fault(int last_node, int node, int pages,
-                                  bool migrated)
+                                  int flags)
  {
  }
  static inline pid_t task_numa_group_id(struct task_struct *p)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 5bd309c035c7f89fb5f8127188dc77c855cb3baf..35661b8afb4e042b16ad79fc9b718df74089c94d 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1361,9 +1361,10 @@ void task_numa_free(struct task_struct *p)
  /*
   * Got a PROT_NONE fault for a page on @node.
   */
-void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
+void task_numa_fault(int last_cpupid, int node, int pages, int flags)
  {
         struct task_struct *p = current;
+       bool migrated = flags & TNF_MIGRATED;
         int priv;
  
         if (!numabalancing_enabled)
@@ -1394,7 +1395,7 @@ void task_numa_fault(int last_cpupid, int node, int pages, bool migrated)
                 priv = 1;
         } else {
                 priv = cpupid_match_pid(p, last_cpupid);
-               if (!priv)
+               if (!priv && !(flags & TNF_NO_GROUP))
                         task_numa_group(p, last_cpupid);
         }
  
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index becf92ca54f3891930f47edb6c8c70462598270d..7ab4e32afe12b4798d625c97ec22d14067b89991 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1285,6 +1285,7 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         int target_nid, last_cpupid = -1;
         bool page_locked;
         bool migrated = false;
+       int flags = 0;
  
         spin_lock(&mm->page_table_lock);
         if (unlikely(!pmd_same(pmd, *pmdp)))
@@ -1298,6 +1299,14 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         if (page_nid == this_nid)
                 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
  
+       /*
+        * Avoid grouping on DSO/COW pages in specific and RO pages
+        * in general, RO pages shouldn't hurt as much anyway since
+        * they can be in shared cache state.
+        */
+       if (!pmd_write(pmd))
+               flags |= TNF_NO_GROUP;
+
         /*
          * Acquire the page lock to serialise THP migrations but avoid dropping
          * page_table_lock if at all possible
@@ -1343,8 +1352,10 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         spin_unlock(&mm->page_table_lock);
         migrated = migrate_misplaced_transhuge_page(mm, vma,
                                 pmdp, pmd, addr, page, target_nid);
-       if (migrated)
+       if (migrated) {
+               flags |= TNF_MIGRATED;
                 page_nid = target_nid;
+       }
  
         goto out;
  clear_pmdnuma:
@@ -1362,7 +1373,7 @@ out:
                 page_unlock_anon_vma_read(anon_vma);
  
         if (page_nid != -1)
-               task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, migrated);
+               task_numa_fault(last_cpupid, page_nid, HPAGE_PMD_NR, flags);
  
         return 0;
  }
diff --git a/mm/memory.c b/mm/memory.c

index c57efa25cdbb235fa05d3a93222d5d72c9b0c387..eba846bcf124fc61f390749aa0861e8d0574eba5 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3547,6 +3547,7 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         int last_cpupid;
         int target_nid;
         bool migrated = false;
+       int flags = 0;
  
         /*
         * The "pte" at this point cannot be used safely without
@@ -3575,6 +3576,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
         }
         BUG_ON(is_zero_pfn(page_to_pfn(page)));
  
+       /*
+        * Avoid grouping on DSO/COW pages in specific and RO pages
+        * in general, RO pages shouldn't hurt as much anyway since
+        * they can be in shared cache state.
+        */
+       if (!pte_write(pte))
+               flags |= TNF_NO_GROUP;
+
         last_cpupid = page_cpupid_last(page);
         page_nid = page_to_nid(page);
         target_nid = numa_migrate_prep(page, vma, addr, page_nid);
@@ -3586,12 +3595,14 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
  
         /* Migrate to the requested node */
         migrated = migrate_misplaced_page(page, vma, target_nid);
-       if (migrated)
+       if (migrated) {
                 page_nid = target_nid;
+               flags |= TNF_MIGRATED;
+       }
  
  out:
         if (page_nid != -1)
-               task_numa_fault(last_cpupid, page_nid, 1, migrated);
+               task_numa_fault(last_cpupid, page_nid, 1, flags);
         return 0;
  }
  
@@ -3632,6 +3643,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 int page_nid = -1;
                 int target_nid;
                 bool migrated = false;
+               int flags = 0;
  
                 if (!pte_present(pteval))
                         continue;
@@ -3651,20 +3663,30 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 if (unlikely(!page))
                         continue;
  
+               /*
+                * Avoid grouping on DSO/COW pages in specific and RO pages
+                * in general, RO pages shouldn't hurt as much anyway since
+                * they can be in shared cache state.
+                */
+               if (!pte_write(pteval))
+                       flags |= TNF_NO_GROUP;
+
                 last_cpupid = page_cpupid_last(page);
                 page_nid = page_to_nid(page);
                 target_nid = numa_migrate_prep(page, vma, addr, page_nid);
                 pte_unmap_unlock(pte, ptl);
                 if (target_nid != -1) {
                         migrated = migrate_misplaced_page(page, vma, target_nid);
-                       if (migrated)
+                       if (migrated) {
                                 page_nid = target_nid;
+                               flags |= TNF_MIGRATED;
+                       }
                 } else {
                         put_page(page);
                 }
  
                 if (page_nid != -1)
-                       task_numa_fault(last_cpupid, page_nid, 1, migrated);
+                       task_numa_fault(last_cpupid, page_nid, 1, flags);
  
                 pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
         }
author	Peter Zijlstra <peterz@infradead.org>
	Mon, 7 Oct 2013 10:29:24 +0000 (11:29 +0100)
committer	Ingo Molnar <mingo@kernel.org>
	Wed, 9 Oct 2013 12:47:53 +0000 (14:47 +0200)
include/linux/sched.h		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/memory.c		patch \| blob \| history