#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <linux/page-isolation.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include "internal.h"
const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
unsigned long hugepages_treat_as_movable;
int hugetlb_max_hstate __read_mostly;
{
struct page *page;
- if (list_empty(&h->hugepage_freelists[nid]))
+ list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
+ if (!is_migrate_isolate_page(page))
+ break;
+ /*
+ * if 'non-isolated free hugepage' not found on the list,
+ * the allocation fails.
+ */
+ if (&h->hugepage_freelists[nid] == &page->lru)
return NULL;
- page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
list_move(&page->lru, &h->hugepage_activelist);
set_page_refcounted(page);
h->free_huge_pages--;
return page;
}
+/* Movability of hugepages depends on migration support. */
+static inline gfp_t htlb_alloc_mask(struct hstate *h)
+{
+ if (hugepages_treat_as_movable || hugepage_migration_support(h))
+ return GFP_HIGHUSER_MOVABLE;
+ else
+ return GFP_HIGHUSER;
+}
+
static struct page *dequeue_huge_page_vma(struct hstate *h,
struct vm_area_struct *vma,
unsigned long address, int avoid_reserve,
retry_cpuset:
cpuset_mems_cookie = get_mems_allowed();
zonelist = huge_zonelist(vma, address,
- htlb_alloc_mask, &mpol, &nodemask);
+ htlb_alloc_mask(h), &mpol, &nodemask);
for_each_zone_zonelist_nodemask(zone, z, zonelist,
MAX_NR_ZONES - 1, nodemask) {
- if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
+ if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) {
page = dequeue_huge_page_node(h, zone_to_nid(zone));
if (page) {
if (avoid_reserve)
return NULL;
page = alloc_pages_exact_node(nid,
- htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
+ htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
__GFP_REPEAT|__GFP_NOWARN,
huge_page_order(h));
if (page) {
return ret;
}
+/*
+ * Dissolve a given free hugepage into free buddy pages. This function does
+ * nothing for in-use (including surplus) hugepages.
+ */
+static void dissolve_free_huge_page(struct page *page)
+{
+ spin_lock(&hugetlb_lock);
+ if (PageHuge(page) && !page_count(page)) {
+ struct hstate *h = page_hstate(page);
+ int nid = page_to_nid(page);
+ list_del(&page->lru);
+ h->free_huge_pages--;
+ h->free_huge_pages_node[nid]--;
+ update_and_free_page(h, page);
+ }
+ spin_unlock(&hugetlb_lock);
+}
+
+/*
+ * Dissolve free hugepages in a given pfn range. Used by memory hotplug to
+ * make specified memory blocks removable from the system.
+ * Note that start_pfn should aligned with (minimum) hugepage size.
+ */
+void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
+{
+ unsigned int order = 8 * sizeof(void *);
+ unsigned long pfn;
+ struct hstate *h;
+
+ /* Set scan step to minimum hugepage size */
+ for_each_hstate(h)
+ if (order > huge_page_order(h))
+ order = huge_page_order(h);
+ VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order));
+ for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order)
+ dissolve_free_huge_page(pfn_to_page(pfn));
+}
+
static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
{
struct page *page;
spin_unlock(&hugetlb_lock);
if (nid == NUMA_NO_NODE)
- page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
+ page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP|
__GFP_REPEAT|__GFP_NOWARN,
huge_page_order(h));
else
page = alloc_pages_exact_node(nid,
- htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
+ htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
__GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
if (page && arch_prepare_hugepage(page)) {
return page;
}
+/*
+ * alloc_huge_page()'s wrapper which simply returns the page if allocation
+ * succeeds, otherwise NULL. This function is called from new_vma_page(),
+ * where no ERR_VALUE is expected to be returned.
+ */
+struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
+ unsigned long addr, int avoid_reserve)
+{
+ struct page *page = alloc_huge_page(vma, addr, avoid_reserve);
+ if (IS_ERR(page))
+ page = NULL;
+ return page;
+}
+
int __weak alloc_bootmem_huge_page(struct hstate *h)
{
struct huge_bootmem_page *m;
}
#endif /* CONFIG_NUMA */
-int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *length, loff_t *ppos)
-{
- proc_dointvec(table, write, buffer, length, ppos);
- if (hugepages_treat_as_movable)
- htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
- else
- htlb_alloc_mask = GFP_HIGHUSER;
- return 0;
-}
-
int hugetlb_overcommit_handler(struct ctl_table *table, int write,
void __user *buffer,
size_t *length, loff_t *ppos)
spin_unlock(&hugetlb_lock);
put_page(page);
}
+
+bool is_hugepage_active(struct page *page)
+{
+ VM_BUG_ON(!PageHuge(page));
+ /*
+ * This function can be called for a tail page because the caller,
+ * scan_movable_pages, scans through a given pfn-range which typically
+ * covers one memory block. In systems using gigantic hugepage (1GB
+ * for x86_64,) a hugepage is larger than a memory block, and we don't
+ * support migrating such large hugepages for now, so return false
+ * when called for tail pages.
+ */
+ if (PageTail(page))
+ return false;
+ /*
+ * Refcount of a hwpoisoned hugepages is 1, but they are not active,
+ * so we should return false for them.
+ */
+ if (unlikely(PageHWPoison(page)))
+ return false;
+ return page_count(page) > 0;
+}