]> Pileus Git - ~andy/linux/blobdiff - mm/rmap.c
mm: fix swapops.h:131 bug if remap_file_pages raced migration
[~andy/linux] / mm / rmap.c
index 068522d8502a58e9465a963e68c37ce4ccf635d7..8fc049f9a5a6c5d511ac7a5ac0dd41c698ed99a3 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -660,17 +660,22 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
        return 1;
 }
 
+struct page_referenced_arg {
+       int mapcount;
+       int referenced;
+       unsigned long vm_flags;
+       struct mem_cgroup *memcg;
+};
 /*
- * Subfunctions of page_referenced: page_referenced_one called
- * repeatedly from either page_referenced_anon or page_referenced_file.
+ * arg: page_referenced_arg will be passed
  */
 int page_referenced_one(struct page *page, struct vm_area_struct *vma,
-                       unsigned long address, unsigned int *mapcount,
-                       unsigned long *vm_flags)
+                       unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
        spinlock_t *ptl;
        int referenced = 0;
+       struct page_referenced_arg *pra = arg;
 
        if (unlikely(PageTransHuge(page))) {
                pmd_t *pmd;
@@ -682,13 +687,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                pmd = page_check_address_pmd(page, mm, address,
                                             PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
                if (!pmd)
-                       goto out;
+                       return SWAP_AGAIN;
 
                if (vma->vm_flags & VM_LOCKED) {
                        spin_unlock(ptl);
-                       *mapcount = 0;  /* break early from loop */
-                       *vm_flags |= VM_LOCKED;
-                       goto out;
+                       pra->vm_flags |= VM_LOCKED;
+                       return SWAP_FAIL; /* To break the loop */
                }
 
                /* go ahead even if the pmd is pmd_trans_splitting() */
@@ -704,13 +708,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                 */
                pte = page_check_address(page, mm, address, &ptl, 0);
                if (!pte)
-                       goto out;
+                       return SWAP_AGAIN;
 
                if (vma->vm_flags & VM_LOCKED) {
                        pte_unmap_unlock(pte, ptl);
-                       *mapcount = 0;  /* break early from loop */
-                       *vm_flags |= VM_LOCKED;
-                       goto out;
+                       pra->vm_flags |= VM_LOCKED;
+                       return SWAP_FAIL; /* To break the loop */
                }
 
                if (ptep_clear_flush_young_notify(vma, address, pte)) {
@@ -727,113 +730,27 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                pte_unmap_unlock(pte, ptl);
        }
 
-       (*mapcount)--;
-
-       if (referenced)
-               *vm_flags |= vma->vm_flags;
-out:
-       return referenced;
-}
-
-static int page_referenced_anon(struct page *page,
-                               struct mem_cgroup *memcg,
-                               unsigned long *vm_flags)
-{
-       unsigned int mapcount;
-       struct anon_vma *anon_vma;
-       pgoff_t pgoff;
-       struct anon_vma_chain *avc;
-       int referenced = 0;
-
-       anon_vma = page_lock_anon_vma_read(page);
-       if (!anon_vma)
-               return referenced;
-
-       mapcount = page_mapcount(page);
-       pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
-               struct vm_area_struct *vma = avc->vma;
-               unsigned long address = vma_address(page, vma);
-               /*
-                * If we are reclaiming on behalf of a cgroup, skip
-                * counting on behalf of references from different
-                * cgroups
-                */
-               if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
-                       continue;
-               referenced += page_referenced_one(page, vma, address,
-                                                 &mapcount, vm_flags);
-               if (!mapcount)
-                       break;
+       if (referenced) {
+               pra->referenced++;
+               pra->vm_flags |= vma->vm_flags;
        }
 
-       page_unlock_anon_vma_read(anon_vma);
-       return referenced;
+       pra->mapcount--;
+       if (!pra->mapcount)
+               return SWAP_SUCCESS; /* To break the loop */
+
+       return SWAP_AGAIN;
 }
 
-/**
- * page_referenced_file - referenced check for object-based rmap
- * @page: the page we're checking references on.
- * @memcg: target memory control group
- * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
- *
- * For an object-based mapped page, find all the places it is mapped and
- * check/clear the referenced flag.  This is done by following the page->mapping
- * pointer, then walking the chain of vmas it holds.  It returns the number
- * of references it found.
- *
- * This function is only called from page_referenced for object-based pages.
- */
-static int page_referenced_file(struct page *page,
-                               struct mem_cgroup *memcg,
-                               unsigned long *vm_flags)
+static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
 {
-       unsigned int mapcount;
-       struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct vm_area_struct *vma;
-       int referenced = 0;
-
-       /*
-        * The caller's checks on page->mapping and !PageAnon have made
-        * sure that this is a file page: the check for page->mapping
-        * excludes the case just before it gets set on an anon page.
-        */
-       BUG_ON(PageAnon(page));
-
-       /*
-        * The page lock not only makes sure that page->mapping cannot
-        * suddenly be NULLified by truncation, it makes sure that the
-        * structure at mapping cannot be freed and reused yet,
-        * so we can safely take mapping->i_mmap_mutex.
-        */
-       BUG_ON(!PageLocked(page));
-
-       mutex_lock(&mapping->i_mmap_mutex);
+       struct page_referenced_arg *pra = arg;
+       struct mem_cgroup *memcg = pra->memcg;
 
-       /*
-        * i_mmap_mutex does not stabilize mapcount at all, but mapcount
-        * is more likely to be accurate if we note it after spinning.
-        */
-       mapcount = page_mapcount(page);
-
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               unsigned long address = vma_address(page, vma);
-               /*
-                * If we are reclaiming on behalf of a cgroup, skip
-                * counting on behalf of references from different
-                * cgroups
-                */
-               if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
-                       continue;
-               referenced += page_referenced_one(page, vma, address,
-                                                 &mapcount, vm_flags);
-               if (!mapcount)
-                       break;
-       }
+       if (!mm_match_cgroup(vma->vm_mm, memcg))
+               return true;
 
-       mutex_unlock(&mapping->i_mmap_mutex);
-       return referenced;
+       return false;
 }
 
 /**
@@ -851,41 +768,57 @@ int page_referenced(struct page *page,
                    struct mem_cgroup *memcg,
                    unsigned long *vm_flags)
 {
-       int referenced = 0;
+       int ret;
        int we_locked = 0;
+       struct page_referenced_arg pra = {
+               .mapcount = page_mapcount(page),
+               .memcg = memcg,
+       };
+       struct rmap_walk_control rwc = {
+               .rmap_one = page_referenced_one,
+               .arg = (void *)&pra,
+               .anon_lock = page_lock_anon_vma_read,
+       };
 
        *vm_flags = 0;
-       if (page_mapped(page) && page_rmapping(page)) {
-               if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
-                       we_locked = trylock_page(page);
-                       if (!we_locked) {
-                               referenced++;
-                               goto out;
-                       }
-               }
-               if (unlikely(PageKsm(page)))
-                       referenced += page_referenced_ksm(page, memcg,
-                                                               vm_flags);
-               else if (PageAnon(page))
-                       referenced += page_referenced_anon(page, memcg,
-                                                               vm_flags);
-               else if (page->mapping)
-                       referenced += page_referenced_file(page, memcg,
-                                                               vm_flags);
-               if (we_locked)
-                       unlock_page(page);
+       if (!page_mapped(page))
+               return 0;
+
+       if (!page_rmapping(page))
+               return 0;
+
+       if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
+               we_locked = trylock_page(page);
+               if (!we_locked)
+                       return 1;
        }
-out:
-       return referenced;
+
+       /*
+        * If we are reclaiming on behalf of a cgroup, skip
+        * counting on behalf of references from different
+        * cgroups
+        */
+       if (memcg) {
+               rwc.invalid_vma = invalid_page_referenced_vma;
+       }
+
+       ret = rmap_walk(page, &rwc);
+       *vm_flags = pra.vm_flags;
+
+       if (we_locked)
+               unlock_page(page);
+
+       return pra.referenced;
 }
 
 static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
-                           unsigned long address)
+                           unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
        pte_t *pte;
        spinlock_t *ptl;
        int ret = 0;
+       int *cleaned = arg;
 
        pte = page_check_address(page, mm, address, &ptl, 1);
        if (!pte)
@@ -904,44 +837,44 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 
        pte_unmap_unlock(pte, ptl);
 
-       if (ret)
+       if (ret) {
                mmu_notifier_invalidate_page(mm, address);
+               (*cleaned)++;
+       }
 out:
-       return ret;
+       return SWAP_AGAIN;
 }
 
-static int page_mkclean_file(struct address_space *mapping, struct page *page)
+static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
 {
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct vm_area_struct *vma;
-       int ret = 0;
-
-       BUG_ON(PageAnon(page));
+       if (vma->vm_flags & VM_SHARED)
+               return false;
 
-       mutex_lock(&mapping->i_mmap_mutex);
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               if (vma->vm_flags & VM_SHARED) {
-                       unsigned long address = vma_address(page, vma);
-                       ret += page_mkclean_one(page, vma, address);
-               }
-       }
-       mutex_unlock(&mapping->i_mmap_mutex);
-       return ret;
+       return true;
 }
 
 int page_mkclean(struct page *page)
 {
-       int ret = 0;
+       int cleaned = 0;
+       struct address_space *mapping;
+       struct rmap_walk_control rwc = {
+               .arg = (void *)&cleaned,
+               .rmap_one = page_mkclean_one,
+               .invalid_vma = invalid_mkclean_vma,
+       };
 
        BUG_ON(!PageLocked(page));
 
-       if (page_mapped(page)) {
-               struct address_space *mapping = page_mapping(page);
-               if (mapping)
-                       ret = page_mkclean_file(mapping, page);
-       }
+       if (!page_mapped(page))
+               return 0;
 
-       return ret;
+       mapping = page_mapping(page);
+       if (!mapping)
+               return 0;
+
+       rmap_walk(page, &rwc);
+
+       return cleaned;
 }
 EXPORT_SYMBOL_GPL(page_mkclean);
 
@@ -961,9 +894,9 @@ void page_move_anon_rmap(struct page *page,
 {
        struct anon_vma *anon_vma = vma->anon_vma;
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON(!anon_vma);
-       VM_BUG_ON(page->index != linear_page_index(vma, address));
+       VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
 
        anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
        page->mapping = (struct address_space *) anon_vma;
@@ -1062,7 +995,7 @@ void do_page_add_anon_rmap(struct page *page,
        if (unlikely(PageKsm(page)))
                return;
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        /* address might be in next vma when migration races vma_adjust */
        if (first)
                __page_set_anon_rmap(page, vma, address, exclusive);
@@ -1177,17 +1110,17 @@ out:
 }
 
 /*
- * Subfunctions of try_to_unmap: try_to_unmap_one called
- * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file.
+ * @arg: enum ttu_flags will be passed to this argument
  */
 int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
-                    unsigned long address, enum ttu_flags flags)
+                    unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
        pte_t *pte;
        pte_t pteval;
        spinlock_t *ptl;
        int ret = SWAP_AGAIN;
+       enum ttu_flags flags = (enum ttu_flags)arg;
 
        pte = page_check_address(page, mm, address, &ptl, 0);
        if (!pte)
@@ -1426,93 +1359,9 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
        return ret;
 }
 
-bool is_vma_temporary_stack(struct vm_area_struct *vma)
-{
-       int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
-
-       if (!maybe_stack)
-               return false;
-
-       if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
-                                               VM_STACK_INCOMPLETE_SETUP)
-               return true;
-
-       return false;
-}
-
-/**
- * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
- * rmap method
- * @page: the page to unmap/unlock
- * @flags: action and flags
- *
- * Find all the mappings of a page using the mapping pointer and the vma chains
- * contained in the anon_vma struct it points to.
- *
- * This function is only called from try_to_unmap/try_to_munlock for
- * anonymous pages.
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
- * where the page was found will be held for write.  So, we won't recheck
- * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
- * 'LOCKED.
- */
-static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
-{
-       struct anon_vma *anon_vma;
-       pgoff_t pgoff;
-       struct anon_vma_chain *avc;
-       int ret = SWAP_AGAIN;
-
-       anon_vma = page_lock_anon_vma_read(page);
-       if (!anon_vma)
-               return ret;
-
-       pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
-               struct vm_area_struct *vma = avc->vma;
-               unsigned long address;
-
-               /*
-                * During exec, a temporary VMA is setup and later moved.
-                * The VMA is moved under the anon_vma lock but not the
-                * page tables leading to a race where migration cannot
-                * find the migration ptes. Rather than increasing the
-                * locking requirements of exec(), migration skips
-                * temporary VMAs until after exec() completes.
-                */
-               if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
-                               is_vma_temporary_stack(vma))
-                       continue;
-
-               address = vma_address(page, vma);
-               ret = try_to_unmap_one(page, vma, address, flags);
-               if (ret != SWAP_AGAIN || !page_mapped(page))
-                       break;
-       }
-
-       page_unlock_anon_vma_read(anon_vma);
-       return ret;
-}
-
-/**
- * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
- * @page: the page to unmap/unlock
- * @flags: action and flags
- *
- * Find all the mappings of a page using the mapping pointer and the vma chains
- * contained in the address_space struct it points to.
- *
- * This function is only called from try_to_unmap/try_to_munlock for
- * object-based pages.
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
- * where the page was found will be held for write.  So, we won't recheck
- * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
- * 'LOCKED.
- */
-static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
+static int try_to_unmap_nonlinear(struct page *page,
+               struct address_space *mapping, void *arg)
 {
-       struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
        struct vm_area_struct *vma;
        int ret = SWAP_AGAIN;
        unsigned long cursor;
@@ -1520,30 +1369,9 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
        unsigned long max_nl_size = 0;
        unsigned int mapcount;
 
-       if (PageHuge(page))
-               pgoff = page->index << compound_order(page);
+       list_for_each_entry(vma,
+               &mapping->i_mmap_nonlinear, shared.nonlinear) {
 
-       mutex_lock(&mapping->i_mmap_mutex);
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               unsigned long address = vma_address(page, vma);
-               ret = try_to_unmap_one(page, vma, address, flags);
-               if (ret != SWAP_AGAIN || !page_mapped(page))
-                       goto out;
-       }
-
-       if (list_empty(&mapping->i_mmap_nonlinear))
-               goto out;
-
-       /*
-        * We don't bother to try to find the munlocked page in nonlinears.
-        * It's costly. Instead, later, page reclaim logic may call
-        * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
-        */
-       if (TTU_ACTION(flags) == TTU_MUNLOCK)
-               goto out;
-
-       list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-                                                       shared.nonlinear) {
                cursor = (unsigned long) vma->vm_private_data;
                if (cursor > max_nl_cursor)
                        max_nl_cursor = cursor;
@@ -1553,8 +1381,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
        }
 
        if (max_nl_size == 0) { /* all nonlinears locked or reserved ? */
-               ret = SWAP_FAIL;
-               goto out;
+               return SWAP_FAIL;
        }
 
        /*
@@ -1566,7 +1393,8 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
         */
        mapcount = page_mapcount(page);
        if (!mapcount)
-               goto out;
+               return ret;
+
        cond_resched();
 
        max_nl_size = (max_nl_size + CLUSTER_SIZE - 1) & CLUSTER_MASK;
@@ -1574,10 +1402,11 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
                max_nl_cursor = CLUSTER_SIZE;
 
        do {
-               list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-                                                       shared.nonlinear) {
+               list_for_each_entry(vma,
+                       &mapping->i_mmap_nonlinear, shared.nonlinear) {
+
                        cursor = (unsigned long) vma->vm_private_data;
-                       while ( cursor < max_nl_cursor &&
+                       while (cursor < max_nl_cursor &&
                                cursor < vma->vm_end - vma->vm_start) {
                                if (try_to_unmap_cluster(cursor, &mapcount,
                                                vma, page) == SWAP_MLOCK)
@@ -1585,7 +1414,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
                                cursor += CLUSTER_SIZE;
                                vma->vm_private_data = (void *) cursor;
                                if ((int)mapcount <= 0)
-                                       goto out;
+                                       return ret;
                        }
                        vma->vm_private_data = (void *) max_nl_cursor;
                }
@@ -1600,11 +1429,34 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
         */
        list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
                vma->vm_private_data = NULL;
-out:
-       mutex_unlock(&mapping->i_mmap_mutex);
+
        return ret;
 }
 
+bool is_vma_temporary_stack(struct vm_area_struct *vma)
+{
+       int maybe_stack = vma->vm_flags & (VM_GROWSDOWN | VM_GROWSUP);
+
+       if (!maybe_stack)
+               return false;
+
+       if ((vma->vm_flags & VM_STACK_INCOMPLETE_SETUP) ==
+                                               VM_STACK_INCOMPLETE_SETUP)
+               return true;
+
+       return false;
+}
+
+static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
+{
+       return is_vma_temporary_stack(vma);
+}
+
+static int page_not_mapped(struct page *page)
+{
+       return !page_mapped(page);
+};
+
 /**
  * try_to_unmap - try to remove all page table mappings to a page
  * @page: the page to get unmapped
@@ -1622,16 +1474,29 @@ out:
 int try_to_unmap(struct page *page, enum ttu_flags flags)
 {
        int ret;
+       struct rmap_walk_control rwc = {
+               .rmap_one = try_to_unmap_one,
+               .arg = (void *)flags,
+               .done = page_not_mapped,
+               .file_nonlinear = try_to_unmap_nonlinear,
+               .anon_lock = page_lock_anon_vma_read,
+       };
 
-       BUG_ON(!PageLocked(page));
-       VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
+       VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page);
+
+       /*
+        * During exec, a temporary VMA is setup and later moved.
+        * The VMA is moved under the anon_vma lock but not the
+        * page tables leading to a race where migration cannot
+        * find the migration ptes. Rather than increasing the
+        * locking requirements of exec(), migration skips
+        * temporary VMAs until after exec() completes.
+        */
+       if (flags & TTU_MIGRATION && !PageKsm(page) && PageAnon(page))
+               rwc.invalid_vma = invalid_migration_vma;
+
+       ret = rmap_walk(page, &rwc);
 
-       if (unlikely(PageKsm(page)))
-               ret = try_to_unmap_ksm(page, flags);
-       else if (PageAnon(page))
-               ret = try_to_unmap_anon(page, flags);
-       else
-               ret = try_to_unmap_file(page, flags);
        if (ret != SWAP_MLOCK && !page_mapped(page))
                ret = SWAP_SUCCESS;
        return ret;
@@ -1654,14 +1519,25 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
  */
 int try_to_munlock(struct page *page)
 {
-       VM_BUG_ON(!PageLocked(page) || PageLRU(page));
+       int ret;
+       struct rmap_walk_control rwc = {
+               .rmap_one = try_to_unmap_one,
+               .arg = (void *)TTU_MUNLOCK,
+               .done = page_not_mapped,
+               /*
+                * We don't bother to try to find the munlocked page in
+                * nonlinears. It's costly. Instead, later, page reclaim logic
+                * may call try_to_unmap() and recover PG_mlocked lazily.
+                */
+               .file_nonlinear = NULL,
+               .anon_lock = page_lock_anon_vma_read,
 
-       if (unlikely(PageKsm(page)))
-               return try_to_unmap_ksm(page, TTU_MUNLOCK);
-       else if (PageAnon(page))
-               return try_to_unmap_anon(page, TTU_MUNLOCK);
-       else
-               return try_to_unmap_file(page, TTU_MUNLOCK);
+       };
+
+       VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
+
+       ret = rmap_walk(page, &rwc);
+       return ret;
 }
 
 void __put_anon_vma(struct anon_vma *anon_vma)
@@ -1674,18 +1550,13 @@ void __put_anon_vma(struct anon_vma *anon_vma)
        anon_vma_free(anon_vma);
 }
 
-#ifdef CONFIG_MIGRATION
-/*
- * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
- * Called by migrate.c to remove migration ptes, but might be used more later.
- */
-static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+static struct anon_vma *rmap_walk_anon_lock(struct page *page,
+                                       struct rmap_walk_control *rwc)
 {
        struct anon_vma *anon_vma;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct anon_vma_chain *avc;
-       int ret = SWAP_AGAIN;
+
+       if (rwc->anon_lock)
+               return rwc->anon_lock(page);
 
        /*
         * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read()
@@ -1695,58 +1566,120 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
         */
        anon_vma = page_anon_vma(page);
        if (!anon_vma)
-               return ret;
+               return NULL;
+
        anon_vma_lock_read(anon_vma);
+       return anon_vma;
+}
+
+/*
+ * rmap_walk_anon - do something to anonymous page using the object-based
+ * rmap method
+ * @page: the page to be handled
+ * @rwc: control variable according to each walk type
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the anon_vma struct it points to.
+ *
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * LOCKED.
+ */
+static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
+{
+       struct anon_vma *anon_vma;
+       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+       struct anon_vma_chain *avc;
+       int ret = SWAP_AGAIN;
+
+       anon_vma = rmap_walk_anon_lock(page, rwc);
+       if (!anon_vma)
+               return ret;
+
        anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
                struct vm_area_struct *vma = avc->vma;
                unsigned long address = vma_address(page, vma);
-               ret = rmap_one(page, vma, address, arg);
+
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+                       continue;
+
+               ret = rwc->rmap_one(page, vma, address, rwc->arg);
                if (ret != SWAP_AGAIN)
                        break;
+               if (rwc->done && rwc->done(page))
+                       break;
        }
        anon_vma_unlock_read(anon_vma);
        return ret;
 }
 
-static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+/*
+ * rmap_walk_file - do something to file page using the object-based rmap method
+ * @page: the page to be handled
+ * @rwc: control variable according to each walk type
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the address_space struct it points to.
+ *
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * LOCKED.
+ */
+static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 {
        struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+       pgoff_t pgoff = page->index << compound_order(page);
        struct vm_area_struct *vma;
        int ret = SWAP_AGAIN;
 
+       /*
+        * The page lock not only makes sure that page->mapping cannot
+        * suddenly be NULLified by truncation, it makes sure that the
+        * structure at mapping cannot be freed and reused yet,
+        * so we can safely take mapping->i_mmap_mutex.
+        */
+       VM_BUG_ON(!PageLocked(page));
+
        if (!mapping)
                return ret;
        mutex_lock(&mapping->i_mmap_mutex);
        vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
                unsigned long address = vma_address(page, vma);
-               ret = rmap_one(page, vma, address, arg);
+
+               if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))
+                       continue;
+
+               ret = rwc->rmap_one(page, vma, address, rwc->arg);
                if (ret != SWAP_AGAIN)
-                       break;
+                       goto done;
+               if (rwc->done && rwc->done(page))
+                       goto done;
        }
-       /*
-        * No nonlinear handling: being always shared, nonlinear vmas
-        * never contain migration ptes.  Decide what to do about this
-        * limitation to linear when we need rmap_walk() on nonlinear.
-        */
+
+       if (!rwc->file_nonlinear)
+               goto done;
+
+       if (list_empty(&mapping->i_mmap_nonlinear))
+               goto done;
+
+       ret = rwc->file_nonlinear(page, mapping, rwc->arg);
+
+done:
        mutex_unlock(&mapping->i_mmap_mutex);
        return ret;
 }
 
-int rmap_walk(struct page *page, int (*rmap_one)(struct page *,
-               struct vm_area_struct *, unsigned long, void *), void *arg)
+int rmap_walk(struct page *page, struct rmap_walk_control *rwc)
 {
-       VM_BUG_ON(!PageLocked(page));
-
        if (unlikely(PageKsm(page)))
-               return rmap_walk_ksm(page, rmap_one, arg);
+               return rmap_walk_ksm(page, rwc);
        else if (PageAnon(page))
-               return rmap_walk_anon(page, rmap_one, arg);
+               return rmap_walk_anon(page, rwc);
        else
-               return rmap_walk_file(page, rmap_one, arg);
+               return rmap_walk_file(page, rwc);
 }
-#endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_HUGETLB_PAGE
 /*