]> Pileus Git - ~andy/linux/blobdiff - mm/rmap.c
mm: dump page when hitting a VM_BUG_ON using VM_BUG_ON_PAGE
[~andy/linux] / mm / rmap.c
index b3263cb323616febd5a83d13d0ea2595fe2f62f8..2dcd3353c3f679d0da34c2fefe750c41c257b12f 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -660,17 +660,22 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma)
        return 1;
 }
 
+struct page_referenced_arg {
+       int mapcount;
+       int referenced;
+       unsigned long vm_flags;
+       struct mem_cgroup *memcg;
+};
 /*
- * Subfunctions of page_referenced: page_referenced_one called
- * repeatedly from either page_referenced_anon or page_referenced_file.
+ * arg: page_referenced_arg will be passed
  */
 int page_referenced_one(struct page *page, struct vm_area_struct *vma,
-                       unsigned long address, unsigned int *mapcount,
-                       unsigned long *vm_flags)
+                       unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
        spinlock_t *ptl;
        int referenced = 0;
+       struct page_referenced_arg *pra = arg;
 
        if (unlikely(PageTransHuge(page))) {
                pmd_t *pmd;
@@ -682,13 +687,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                pmd = page_check_address_pmd(page, mm, address,
                                             PAGE_CHECK_ADDRESS_PMD_FLAG, &ptl);
                if (!pmd)
-                       goto out;
+                       return SWAP_AGAIN;
 
                if (vma->vm_flags & VM_LOCKED) {
                        spin_unlock(ptl);
-                       *mapcount = 0;  /* break early from loop */
-                       *vm_flags |= VM_LOCKED;
-                       goto out;
+                       pra->vm_flags |= VM_LOCKED;
+                       return SWAP_FAIL; /* To break the loop */
                }
 
                /* go ahead even if the pmd is pmd_trans_splitting() */
@@ -704,13 +708,12 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                 */
                pte = page_check_address(page, mm, address, &ptl, 0);
                if (!pte)
-                       goto out;
+                       return SWAP_AGAIN;
 
                if (vma->vm_flags & VM_LOCKED) {
                        pte_unmap_unlock(pte, ptl);
-                       *mapcount = 0;  /* break early from loop */
-                       *vm_flags |= VM_LOCKED;
-                       goto out;
+                       pra->vm_flags |= VM_LOCKED;
+                       return SWAP_FAIL; /* To break the loop */
                }
 
                if (ptep_clear_flush_young_notify(vma, address, pte)) {
@@ -727,113 +730,27 @@ int page_referenced_one(struct page *page, struct vm_area_struct *vma,
                pte_unmap_unlock(pte, ptl);
        }
 
-       (*mapcount)--;
-
-       if (referenced)
-               *vm_flags |= vma->vm_flags;
-out:
-       return referenced;
-}
-
-static int page_referenced_anon(struct page *page,
-                               struct mem_cgroup *memcg,
-                               unsigned long *vm_flags)
-{
-       unsigned int mapcount;
-       struct anon_vma *anon_vma;
-       pgoff_t pgoff;
-       struct anon_vma_chain *avc;
-       int referenced = 0;
-
-       anon_vma = page_lock_anon_vma_read(page);
-       if (!anon_vma)
-               return referenced;
-
-       mapcount = page_mapcount(page);
-       pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
-               struct vm_area_struct *vma = avc->vma;
-               unsigned long address = vma_address(page, vma);
-               /*
-                * If we are reclaiming on behalf of a cgroup, skip
-                * counting on behalf of references from different
-                * cgroups
-                */
-               if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
-                       continue;
-               referenced += page_referenced_one(page, vma, address,
-                                                 &mapcount, vm_flags);
-               if (!mapcount)
-                       break;
+       if (referenced) {
+               pra->referenced++;
+               pra->vm_flags |= vma->vm_flags;
        }
 
-       page_unlock_anon_vma_read(anon_vma);
-       return referenced;
+       pra->mapcount--;
+       if (!pra->mapcount)
+               return SWAP_SUCCESS; /* To break the loop */
+
+       return SWAP_AGAIN;
 }
 
-/**
- * page_referenced_file - referenced check for object-based rmap
- * @page: the page we're checking references on.
- * @memcg: target memory control group
- * @vm_flags: collect encountered vma->vm_flags who actually referenced the page
- *
- * For an object-based mapped page, find all the places it is mapped and
- * check/clear the referenced flag.  This is done by following the page->mapping
- * pointer, then walking the chain of vmas it holds.  It returns the number
- * of references it found.
- *
- * This function is only called from page_referenced for object-based pages.
- */
-static int page_referenced_file(struct page *page,
-                               struct mem_cgroup *memcg,
-                               unsigned long *vm_flags)
+static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)
 {
-       unsigned int mapcount;
-       struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct vm_area_struct *vma;
-       int referenced = 0;
-
-       /*
-        * The caller's checks on page->mapping and !PageAnon have made
-        * sure that this is a file page: the check for page->mapping
-        * excludes the case just before it gets set on an anon page.
-        */
-       BUG_ON(PageAnon(page));
+       struct page_referenced_arg *pra = arg;
+       struct mem_cgroup *memcg = pra->memcg;
 
-       /*
-        * The page lock not only makes sure that page->mapping cannot
-        * suddenly be NULLified by truncation, it makes sure that the
-        * structure at mapping cannot be freed and reused yet,
-        * so we can safely take mapping->i_mmap_mutex.
-        */
-       BUG_ON(!PageLocked(page));
-
-       mutex_lock(&mapping->i_mmap_mutex);
-
-       /*
-        * i_mmap_mutex does not stabilize mapcount at all, but mapcount
-        * is more likely to be accurate if we note it after spinning.
-        */
-       mapcount = page_mapcount(page);
-
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               unsigned long address = vma_address(page, vma);
-               /*
-                * If we are reclaiming on behalf of a cgroup, skip
-                * counting on behalf of references from different
-                * cgroups
-                */
-               if (memcg && !mm_match_cgroup(vma->vm_mm, memcg))
-                       continue;
-               referenced += page_referenced_one(page, vma, address,
-                                                 &mapcount, vm_flags);
-               if (!mapcount)
-                       break;
-       }
+       if (!mm_match_cgroup(vma->vm_mm, memcg))
+               return true;
 
-       mutex_unlock(&mapping->i_mmap_mutex);
-       return referenced;
+       return false;
 }
 
 /**
@@ -851,41 +768,57 @@ int page_referenced(struct page *page,
                    struct mem_cgroup *memcg,
                    unsigned long *vm_flags)
 {
-       int referenced = 0;
+       int ret;
        int we_locked = 0;
+       struct page_referenced_arg pra = {
+               .mapcount = page_mapcount(page),
+               .memcg = memcg,
+       };
+       struct rmap_walk_control rwc = {
+               .rmap_one = page_referenced_one,
+               .arg = (void *)&pra,
+               .anon_lock = page_lock_anon_vma_read,
+       };
 
        *vm_flags = 0;
-       if (page_mapped(page) && page_rmapping(page)) {
-               if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
-                       we_locked = trylock_page(page);
-                       if (!we_locked) {
-                               referenced++;
-                               goto out;
-                       }
-               }
-               if (unlikely(PageKsm(page)))
-                       referenced += page_referenced_ksm(page, memcg,
-                                                               vm_flags);
-               else if (PageAnon(page))
-                       referenced += page_referenced_anon(page, memcg,
-                                                               vm_flags);
-               else if (page->mapping)
-                       referenced += page_referenced_file(page, memcg,
-                                                               vm_flags);
-               if (we_locked)
-                       unlock_page(page);
+       if (!page_mapped(page))
+               return 0;
+
+       if (!page_rmapping(page))
+               return 0;
+
+       if (!is_locked && (!PageAnon(page) || PageKsm(page))) {
+               we_locked = trylock_page(page);
+               if (!we_locked)
+                       return 1;
        }
-out:
-       return referenced;
+
+       /*
+        * If we are reclaiming on behalf of a cgroup, skip
+        * counting on behalf of references from different
+        * cgroups
+        */
+       if (memcg) {
+               rwc.invalid_vma = invalid_page_referenced_vma;
+       }
+
+       ret = rmap_walk(page, &rwc);
+       *vm_flags = pra.vm_flags;
+
+       if (we_locked)
+               unlock_page(page);
+
+       return pra.referenced;
 }
 
 static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
-                           unsigned long address)
+                           unsigned long address, void *arg)
 {
        struct mm_struct *mm = vma->vm_mm;
        pte_t *pte;
        spinlock_t *ptl;
        int ret = 0;
+       int *cleaned = arg;
 
        pte = page_check_address(page, mm, address, &ptl, 1);
        if (!pte)
@@ -904,44 +837,44 @@ static int page_mkclean_one(struct page *page, struct vm_area_struct *vma,
 
        pte_unmap_unlock(pte, ptl);
 
-       if (ret)
+       if (ret) {
                mmu_notifier_invalidate_page(mm, address);
+               (*cleaned)++;
+       }
 out:
-       return ret;
+       return SWAP_AGAIN;
 }
 
-static int page_mkclean_file(struct address_space *mapping, struct page *page)
+static bool invalid_mkclean_vma(struct vm_area_struct *vma, void *arg)
 {
-       pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       struct vm_area_struct *vma;
-       int ret = 0;
-
-       BUG_ON(PageAnon(page));
+       if (vma->vm_flags & VM_SHARED)
+               return 0;
 
-       mutex_lock(&mapping->i_mmap_mutex);
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               if (vma->vm_flags & VM_SHARED) {
-                       unsigned long address = vma_address(page, vma);
-                       ret += page_mkclean_one(page, vma, address);
-               }
-       }
-       mutex_unlock(&mapping->i_mmap_mutex);
-       return ret;
+       return 1;
 }
 
 int page_mkclean(struct page *page)
 {
-       int ret = 0;
+       int cleaned = 0;
+       struct address_space *mapping;
+       struct rmap_walk_control rwc = {
+               .arg = (void *)&cleaned,
+               .rmap_one = page_mkclean_one,
+               .invalid_vma = invalid_mkclean_vma,
+       };
 
        BUG_ON(!PageLocked(page));
 
-       if (page_mapped(page)) {
-               struct address_space *mapping = page_mapping(page);
-               if (mapping)
-                       ret = page_mkclean_file(mapping, page);
-       }
+       if (!page_mapped(page))
+               return 0;
 
-       return ret;
+       mapping = page_mapping(page);
+       if (!mapping)
+               return 0;
+
+       rmap_walk(page, &rwc);
+
+       return cleaned;
 }
 EXPORT_SYMBOL_GPL(page_mkclean);
 
@@ -961,9 +894,9 @@ void page_move_anon_rmap(struct page *page,
 {
        struct anon_vma *anon_vma = vma->anon_vma;
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON(!anon_vma);
-       VM_BUG_ON(page->index != linear_page_index(vma, address));
+       VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
 
        anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
        page->mapping = (struct address_space *) anon_vma;
@@ -1062,7 +995,7 @@ void do_page_add_anon_rmap(struct page *page,
        if (unlikely(PageKsm(page)))
                return;
 
-       VM_BUG_ON(!PageLocked(page));
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
        /* address might be in next vma when migration races vma_adjust */
        if (first)
                __page_set_anon_rmap(page, vma, address, exclusive);
@@ -1177,9 +1110,6 @@ out:
 }
 
 /*
- * Subfunctions of try_to_unmap: try_to_unmap_one called
- * repeatedly from try_to_unmap_ksm, try_to_unmap_anon or try_to_unmap_file.
- *
  * @arg: enum ttu_flags will be passed to this argument
  */
 int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
@@ -1521,107 +1451,6 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
        return is_vma_temporary_stack(vma);
 }
 
-/**
- * try_to_unmap_anon - unmap or unlock anonymous page using the object-based
- * rmap method
- * @page: the page to unmap/unlock
- * @flags: action and flags
- *
- * Find all the mappings of a page using the mapping pointer and the vma chains
- * contained in the anon_vma struct it points to.
- *
- * This function is only called from try_to_unmap/try_to_munlock for
- * anonymous pages.
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
- * where the page was found will be held for write.  So, we won't recheck
- * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
- * 'LOCKED.
- */
-static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
-{
-       struct anon_vma *anon_vma;
-       pgoff_t pgoff;
-       struct anon_vma_chain *avc;
-       int ret = SWAP_AGAIN;
-
-       anon_vma = page_lock_anon_vma_read(page);
-       if (!anon_vma)
-               return ret;
-
-       pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) {
-               struct vm_area_struct *vma = avc->vma;
-               unsigned long address;
-
-               /*
-                * During exec, a temporary VMA is setup and later moved.
-                * The VMA is moved under the anon_vma lock but not the
-                * page tables leading to a race where migration cannot
-                * find the migration ptes. Rather than increasing the
-                * locking requirements of exec(), migration skips
-                * temporary VMAs until after exec() completes.
-                */
-               if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
-                               is_vma_temporary_stack(vma))
-                       continue;
-
-               address = vma_address(page, vma);
-               ret = try_to_unmap_one(page, vma, address, (void *)flags);
-               if (ret != SWAP_AGAIN || !page_mapped(page))
-                       break;
-       }
-
-       page_unlock_anon_vma_read(anon_vma);
-       return ret;
-}
-
-/**
- * try_to_unmap_file - unmap/unlock file page using the object-based rmap method
- * @page: the page to unmap/unlock
- * @flags: action and flags
- *
- * Find all the mappings of a page using the mapping pointer and the vma chains
- * contained in the address_space struct it points to.
- *
- * This function is only called from try_to_unmap/try_to_munlock for
- * object-based pages.
- * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
- * where the page was found will be held for write.  So, we won't recheck
- * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
- * 'LOCKED.
- */
-static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
-{
-       struct address_space *mapping = page->mapping;
-       pgoff_t pgoff = page->index << compound_order(page);
-       struct vm_area_struct *vma;
-       int ret = SWAP_AGAIN;
-
-       mutex_lock(&mapping->i_mmap_mutex);
-       vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
-               unsigned long address = vma_address(page, vma);
-               ret = try_to_unmap_one(page, vma, address, (void *)flags);
-               if (ret != SWAP_AGAIN || !page_mapped(page))
-                       goto out;
-       }
-
-       if (list_empty(&mapping->i_mmap_nonlinear))
-               goto out;
-
-       /*
-        * We don't bother to try to find the munlocked page in nonlinears.
-        * It's costly. Instead, later, page reclaim logic may call
-        * try_to_unmap(TTU_MUNLOCK) and recover PG_mlocked lazily.
-        */
-       if (TTU_ACTION(flags) == TTU_MUNLOCK)
-               goto out;
-
-       ret = try_to_unmap_nonlinear(page, mapping, vma);
-out:
-       mutex_unlock(&mapping->i_mmap_mutex);
-       return ret;
-}
-
 static int page_not_mapped(struct page *page)
 {
        return !page_mapped(page);
@@ -1652,7 +1481,7 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
                .anon_lock = page_lock_anon_vma_read,
        };
 
-       VM_BUG_ON(!PageHuge(page) && PageTransHuge(page));
+       VM_BUG_ON_PAGE(!PageHuge(page) && PageTransHuge(page), page);
 
        /*
         * During exec, a temporary VMA is setup and later moved.
@@ -1689,14 +1518,25 @@ int try_to_unmap(struct page *page, enum ttu_flags flags)
  */
 int try_to_munlock(struct page *page)
 {
-       VM_BUG_ON(!PageLocked(page) || PageLRU(page));
+       int ret;
+       struct rmap_walk_control rwc = {
+               .rmap_one = try_to_unmap_one,
+               .arg = (void *)TTU_MUNLOCK,
+               .done = page_not_mapped,
+               /*
+                * We don't bother to try to find the munlocked page in
+                * nonlinears. It's costly. Instead, later, page reclaim logic
+                * may call try_to_unmap() and recover PG_mlocked lazily.
+                */
+               .file_nonlinear = NULL,
+               .anon_lock = page_lock_anon_vma_read,
 
-       if (unlikely(PageKsm(page)))
-               return try_to_unmap_ksm(page, TTU_MUNLOCK);
-       else if (PageAnon(page))
-               return try_to_unmap_anon(page, TTU_MUNLOCK);
-       else
-               return try_to_unmap_file(page, TTU_MUNLOCK);
+       };
+
+       VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page);
+
+       ret = rmap_walk(page, &rwc);
+       return ret;
 }
 
 void __put_anon_vma(struct anon_vma *anon_vma)
@@ -1732,8 +1572,18 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,
 }
 
 /*
- * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file():
- * Called by migrate.c to remove migration ptes, but might be used more later.
+ * rmap_walk_anon - do something to anonymous page using the object-based
+ * rmap method
+ * @page: the page to be handled
+ * @rwc: control variable according to each walk type
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the anon_vma struct it points to.
+ *
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * LOCKED.
  */
 static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
 {
@@ -1763,6 +1613,19 @@ static int rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc)
        return ret;
 }
 
+/*
+ * rmap_walk_file - do something to file page using the object-based rmap method
+ * @page: the page to be handled
+ * @rwc: control variable according to each walk type
+ *
+ * Find all the mappings of a page using the mapping pointer and the vma chains
+ * contained in the address_space struct it points to.
+ *
+ * When called from try_to_munlock(), the mmap_sem of the mm containing the vma
+ * where the page was found will be held for write.  So, we won't recheck
+ * vm_flags for that VMA.  That should be OK, because that vma shouldn't be
+ * LOCKED.
+ */
 static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 {
        struct address_space *mapping = page->mapping;
@@ -1770,6 +1633,14 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
        struct vm_area_struct *vma;
        int ret = SWAP_AGAIN;
 
+       /*
+        * The page lock not only makes sure that page->mapping cannot
+        * suddenly be NULLified by truncation, it makes sure that the
+        * structure at mapping cannot be freed and reused yet,
+        * so we can safely take mapping->i_mmap_mutex.
+        */
+       VM_BUG_ON(!PageLocked(page));
+
        if (!mapping)
                return ret;
        mutex_lock(&mapping->i_mmap_mutex);
@@ -1801,8 +1672,6 @@ done:
 
 int rmap_walk(struct page *page, struct rmap_walk_control *rwc)
 {
-       VM_BUG_ON(!PageLocked(page));
-
        if (unlikely(PageKsm(page)))
                return rmap_walk_ksm(page, rwc);
        else if (PageAnon(page))