Merge remote-tracking branch 'asoc/topic/wm8962' into asoc-next

[~andy/linux] / mm / hugetlb.c
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index d37b3b95c4392cb8d156ba915827dea820d3483a..0b7656e804d126cf0fcfa04a8b427396af86deb1 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -21,6 +21,7 @@
  #include <linux/rmap.h>
  #include <linux/swap.h>
  #include <linux/swapops.h>
+#include <linux/page-isolation.h>
  
  #include <asm/page.h>
  #include <asm/pgtable.h>
@@ -33,7 +34,6 @@
  #include "internal.h"
  
  const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
  unsigned long hugepages_treat_as_movable;
  
  int hugetlb_max_hstate __read_mostly;
@@ -522,9 +522,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
  {
         struct page *page;
  
-       if (list_empty(&h->hugepage_freelists[nid]))
+       list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
+               if (!is_migrate_isolate_page(page))
+                       break;
+       /*
+        * if 'non-isolated free hugepage' not found on the list,
+        * the allocation fails.
+        */
+       if (&h->hugepage_freelists[nid] == &page->lru)
                 return NULL;
-       page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
         list_move(&page->lru, &h->hugepage_activelist);
         set_page_refcounted(page);
         h->free_huge_pages--;
@@ -532,6 +538,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
         return page;
  }
  
+/* Movability of hugepages depends on migration support. */
+static inline gfp_t htlb_alloc_mask(struct hstate *h)
+{
+       if (hugepages_treat_as_movable || hugepage_migration_support(h))
+               return GFP_HIGHUSER_MOVABLE;
+       else
+               return GFP_HIGHUSER;
+}
+
  static struct page *dequeue_huge_page_vma(struct hstate *h,
                                 struct vm_area_struct *vma,
                                 unsigned long address, int avoid_reserve,
@@ -561,11 +576,11 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
  retry_cpuset:
         cpuset_mems_cookie = get_mems_allowed();
         zonelist = huge_zonelist(vma, address,
-                                       htlb_alloc_mask, &mpol, &nodemask);
+                                       htlb_alloc_mask(h), &mpol, &nodemask);
  
         for_each_zone_zonelist_nodemask(zone, z, zonelist,
                                                 MAX_NR_ZONES - 1, nodemask) {
-               if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
+               if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) {
                         page = dequeue_huge_page_node(h, zone_to_nid(zone));
                         if (page) {
                                 if (avoid_reserve)
@@ -638,6 +653,7 @@ static void free_huge_page(struct page *page)
         BUG_ON(page_count(page));
         BUG_ON(page_mapcount(page));
         restore_reserve = PagePrivate(page);
+       ClearPagePrivate(page);
  
         spin_lock(&hugetlb_lock);
         hugetlb_cgroup_uncharge_page(hstate_index(h),
@@ -680,8 +696,22 @@ static void prep_compound_gigantic_page(struct page *page, unsigned long order)
         /* we rely on prep_new_huge_page to set the destructor */
         set_compound_order(page, order);
         __SetPageHead(page);
+       __ClearPageReserved(page);
         for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
                 __SetPageTail(p);
+               /*
+                * For gigantic hugepages allocated through bootmem at
+                * boot, it's safer to be consistent with the not-gigantic
+                * hugepages and clear the PG_reserved bit from all tail pages
+                * too.  Otherwse drivers using get_user_pages() to access tail
+                * pages may get the reference counting wrong if they see
+                * PG_reserved set on a tail page (despite the head page not
+                * having PG_reserved set).  Enforcing this consistency between
+                * head and tail pages allows drivers to optimize away a check
+                * on the head page when they need know if put_page() is needed
+                * after get_user_pages().
+                */
+               __ClearPageReserved(p);
                 set_page_count(p, 0);
                 p->first_page = page;
         }
@@ -731,7 +761,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
                 return NULL;
  
         page = alloc_pages_exact_node(nid,
-               htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
+               htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
                                                 __GFP_REPEAT|__GFP_NOWARN,
                 huge_page_order(h));
         if (page) {
@@ -878,6 +908,44 @@ static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
         return ret;
  }
  
+/*
+ * Dissolve a given free hugepage into free buddy pages. This function does
+ * nothing for in-use (including surplus) hugepages.
+ */
+static void dissolve_free_huge_page(struct page *page)
+{
+       spin_lock(&hugetlb_lock);
+       if (PageHuge(page) && !page_count(page)) {
+               struct hstate *h = page_hstate(page);
+               int nid = page_to_nid(page);
+               list_del(&page->lru);
+               h->free_huge_pages--;
+               h->free_huge_pages_node[nid]--;
+               update_and_free_page(h, page);
+       }
+       spin_unlock(&hugetlb_lock);
+}
+
+/*
+ * Dissolve free hugepages in a given pfn range. Used by memory hotplug to
+ * make specified memory blocks removable from the system.
+ * Note that start_pfn should aligned with (minimum) hugepage size.
+ */
+void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
+{
+       unsigned int order = 8 * sizeof(void *);
+       unsigned long pfn;
+       struct hstate *h;
+
+       /* Set scan step to minimum hugepage size */
+       for_each_hstate(h)
+               if (order > huge_page_order(h))
+                       order = huge_page_order(h);
+       VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order));
+       for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order)
+               dissolve_free_huge_page(pfn_to_page(pfn));
+}
+
  static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
  {
         struct page *page;
@@ -920,12 +988,12 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
         spin_unlock(&hugetlb_lock);
  
         if (nid == NUMA_NO_NODE)
-               page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
+               page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP|
                                    __GFP_REPEAT|__GFP_NOWARN,
                                    huge_page_order(h));
         else
                 page = alloc_pages_exact_node(nid,
-                       htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
+                       htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
                         __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
  
         if (page && arch_prepare_hugepage(page)) {
@@ -1276,9 +1344,9 @@ static void __init gather_bootmem_prealloc(void)
  #else
                 page = virt_to_page(m);
  #endif
-               __ClearPageReserved(page);
                 WARN_ON(page_count(page) != 1);
                 prep_compound_huge_page(page, h->order);
+               WARN_ON(PageReserved(page));
                 prep_new_huge_page(h, page, page_to_nid(page));
                 /*
                  * If we had gigantic hugepages allocated at boot time, we need
@@ -2072,18 +2140,6 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
  }
  #endif /* CONFIG_NUMA */
  
-int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
-                       void __user *buffer,
-                       size_t *length, loff_t *ppos)
-{
-       proc_dointvec(table, write, buffer, length, ppos);
-       if (hugepages_treat_as_movable)
-               htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
-       else
-               htlb_alloc_mask = GFP_HIGHUSER;
-       return 0;
-}
-
  int hugetlb_overcommit_handler(struct ctl_table *table, int write,
                         void __user *buffer,
                         size_t *length, loff_t *ppos)
@@ -3457,3 +3513,25 @@ void putback_active_hugepage(struct page *page)
         spin_unlock(&hugetlb_lock);
         put_page(page);
  }
+
+bool is_hugepage_active(struct page *page)
+{
+       VM_BUG_ON(!PageHuge(page));
+       /*
+        * This function can be called for a tail page because the caller,
+        * scan_movable_pages, scans through a given pfn-range which typically
+        * covers one memory block. In systems using gigantic hugepage (1GB
+        * for x86_64,) a hugepage is larger than a memory block, and we don't
+        * support migrating such large hugepages for now, so return false
+        * when called for tail pages.
+        */
+       if (PageTail(page))
+               return false;
+       /*
+        * Refcount of a hwpoisoned hugepages is 1, but they are not active,
+        * so we should return false for them.
+        */
+       if (unlikely(PageHWPoison(page)))
+               return false;
+       return page_count(page) > 0;
+}