]> Pileus Git - ~andy/linux/blobdiff - mm/memcontrol.c
Merge branch 'timers-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[~andy/linux] / mm / memcontrol.c
index e3cd40b2d5d926d45344bcdf2c513c2ee074c438..bf5e8945714944f896e2dc79b33b75a491da4794 100644 (file)
@@ -2694,7 +2694,10 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
                goto bypass;
 
        if (unlikely(task_in_memcg_oom(current)))
-               goto bypass;
+               goto nomem;
+
+       if (gfp_mask & __GFP_NOFAIL)
+               oom = false;
 
        /*
         * We always charge the cgroup the mm_struct belongs to.
@@ -6352,6 +6355,42 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+       /*
+        * XXX: css_offline() would be where we should reparent all
+        * memory to prepare the cgroup for destruction.  However,
+        * memcg does not do css_tryget() and res_counter charging
+        * under the same RCU lock region, which means that charging
+        * could race with offlining.  Offlining only happens to
+        * cgroups with no tasks in them but charges can show up
+        * without any tasks from the swapin path when the target
+        * memcg is looked up from the swapout record and not from the
+        * current task as it usually is.  A race like this can leak
+        * charges and put pages with stale cgroup pointers into
+        * circulation:
+        *
+        * #0                        #1
+        *                           lookup_swap_cgroup_id()
+        *                           rcu_read_lock()
+        *                           mem_cgroup_lookup()
+        *                           css_tryget()
+        *                           rcu_read_unlock()
+        * disable css_tryget()
+        * call_rcu()
+        *   offline_css()
+        *     reparent_charges()
+        *                           res_counter_charge()
+        *                           css_put()
+        *                             css_free()
+        *                           pc->mem_cgroup = dead memcg
+        *                           add page to lru
+        *
+        * The bulk of the charges are still moved in offline_css() to
+        * avoid pinning a lot of pages in case a long-term reference
+        * like a swapout record is deferring the css_free() to long
+        * after offlining.  But this makes sure we catch any charges
+        * made after offlining:
+        */
+       mem_cgroup_reparent_charges(memcg);
 
        memcg_destroy_kmem(memcg);
        __mem_cgroup_free(memcg);
@@ -6605,10 +6644,10 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma) == 1) {
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
                        mc.precharge += HPAGE_PMD_NR;
-               spin_unlock(&vma->vm_mm->page_table_lock);
+               spin_unlock(ptl);
                return 0;
        }
 
@@ -6797,9 +6836,9 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
         *    to be unlocked in __split_huge_page_splitting(), where the main
         *    part of thp split is not executed yet.
         */
-       if (pmd_trans_huge_lock(pmd, vma) == 1) {
+       if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
                if (mc.precharge < HPAGE_PMD_NR) {
-                       spin_unlock(&vma->vm_mm->page_table_lock);
+                       spin_unlock(ptl);
                        return 0;
                }
                target_type = get_mctgt_type_thp(vma, addr, *pmd, &target);
@@ -6816,7 +6855,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
                        }
                        put_page(page);
                }
-               spin_unlock(&vma->vm_mm->page_table_lock);
+               spin_unlock(ptl);
                return 0;
        }