X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=mm%2Fhugetlb.c;h=d237a02eb2289774ded6ea2c0767f779d5b76e9b;hb=5adf2b03d97111c8955495ba11e8b7db27df8695;hp=5e620e25cf0820bd80efc5c1b2608f9bccb0cc6e;hpb=5ced66c901f1cf0b684feb15c2cd8b126e263d07;p=~andy%2Flinux diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5e620e25cf0..d237a02eb22 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -9,15 +9,18 @@ #include #include #include +#include #include #include #include #include #include +#include #include - +#include #include #include +#include #include #include "internal.h" @@ -30,9 +33,12 @@ static int max_hstate; unsigned int default_hstate_idx; struct hstate hstates[HUGE_MAX_HSTATE]; +__initdata LIST_HEAD(huge_boot_pages); + /* for command line parsing */ static struct hstate * __initdata parsed_hstate; static unsigned long __initdata default_hstate_max_huge_pages; +static unsigned long __initdata default_hstate_size; #define for_each_hstate(h) \ for ((h) = hstates; (h) < &hstates[max_hstate]; (h)++) @@ -338,13 +344,13 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma) } /* Returns true if the VMA has associated reserve pages */ -static int vma_has_private_reserves(struct vm_area_struct *vma) +static int vma_has_reserves(struct vm_area_struct *vma) { if (vma->vm_flags & VM_SHARED) - return 0; - if (!is_vma_resv_set(vma, HPAGE_RESV_OWNER)) - return 0; - return 1; + return 1; + if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) + return 1; + return 0; } static void clear_huge_page(struct page *page, @@ -416,7 +422,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, * have no page reserves. This check ensures that reservations are * not "stolen". The child may still get SIGKILLed */ - if (!vma_has_private_reserves(vma) && + if (!vma_has_reserves(vma) && h->free_huge_pages - h->resv_huge_pages == 0) return NULL; @@ -489,7 +495,7 @@ static void free_huge_page(struct page *page) INIT_LIST_HEAD(&page->lru); spin_lock(&hugetlb_lock); - if (h->surplus_huge_pages_node[nid]) { + if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) { update_and_free_page(h, page); h->surplus_huge_pages--; h->surplus_huge_pages_node[nid]--; @@ -550,6 +556,9 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid) { struct page *page; + if (h->order >= MAX_ORDER) + return NULL; + page = alloc_pages_node(nid, htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE| __GFP_REPEAT|__GFP_NOWARN, @@ -616,6 +625,9 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, struct page *page; unsigned int nid; + if (h->order >= MAX_ORDER) + return NULL; + /* * Assume we will successfully allocate the surplus page to * prevent racing processes from causing the surplus to exceed @@ -792,6 +804,10 @@ static void return_unused_surplus_pages(struct hstate *h, /* Uncommit the reservation */ h->resv_huge_pages -= unused_resv_pages; + /* Cannot return gigantic pages currently */ + if (h->order >= MAX_ORDER) + return; + nr_pages = min(unused_resv_pages, h->surplus_huge_pages); while (remaining_iterations-- && nr_pages) { @@ -913,20 +929,68 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, return page; } -static void __init hugetlb_init_one_hstate(struct hstate *h) +__attribute__((weak)) int alloc_bootmem_huge_page(struct hstate *h) { - unsigned long i; + struct huge_bootmem_page *m; + int nr_nodes = nodes_weight(node_online_map); - for (i = 0; i < MAX_NUMNODES; ++i) - INIT_LIST_HEAD(&h->hugepage_freelists[i]); + while (nr_nodes) { + void *addr; - h->hugetlb_next_nid = first_node(node_online_map); + addr = __alloc_bootmem_node_nopanic( + NODE_DATA(h->hugetlb_next_nid), + huge_page_size(h), huge_page_size(h), 0); + + if (addr) { + /* + * Use the beginning of the huge page to store the + * huge_bootmem_page struct (until gather_bootmem + * puts them into the mem_map). + */ + m = addr; + if (m) + goto found; + } + hstate_next_node(h); + nr_nodes--; + } + return 0; + +found: + BUG_ON((unsigned long)virt_to_phys(m) & (huge_page_size(h) - 1)); + /* Put them into a private list first because mem_map is not up yet */ + list_add(&m->list, &huge_boot_pages); + m->hstate = h; + return 1; +} + +/* Put bootmem huge pages into the standard lists after mem_map is up */ +static void __init gather_bootmem_prealloc(void) +{ + struct huge_bootmem_page *m; + + list_for_each_entry(m, &huge_boot_pages, list) { + struct page *page = virt_to_page(m); + struct hstate *h = m->hstate; + __ClearPageReserved(page); + WARN_ON(page_count(page) != 1); + prep_compound_page(page, h->order); + prep_new_huge_page(h, page, page_to_nid(page)); + } +} + +static void __init hugetlb_hstate_alloc_pages(struct hstate *h) +{ + unsigned long i; for (i = 0; i < h->max_huge_pages; ++i) { - if (!alloc_fresh_huge_page(h)) + if (h->order >= MAX_ORDER) { + if (!alloc_bootmem_huge_page(h)) + break; + } else if (!alloc_fresh_huge_page(h)) break; } - h->max_huge_pages = h->free_huge_pages = h->nr_huge_pages = i; + h->max_huge_pages = i; } static void __init hugetlb_init_hstates(void) @@ -934,28 +998,44 @@ static void __init hugetlb_init_hstates(void) struct hstate *h; for_each_hstate(h) { - hugetlb_init_one_hstate(h); + /* oversize hugepages were init'ed in early boot */ + if (h->order < MAX_ORDER) + hugetlb_hstate_alloc_pages(h); } } +static char * __init memfmt(char *buf, unsigned long n) +{ + if (n >= (1UL << 30)) + sprintf(buf, "%lu GB", n >> 30); + else if (n >= (1UL << 20)) + sprintf(buf, "%lu MB", n >> 20); + else + sprintf(buf, "%lu KB", n >> 10); + return buf; +} + static void __init report_hugepages(void) { struct hstate *h; for_each_hstate(h) { - printk(KERN_INFO "Total HugeTLB memory allocated, " - "%ld %dMB pages\n", - h->free_huge_pages, - 1 << (h->order + PAGE_SHIFT - 20)); + char buf[32]; + printk(KERN_INFO "HugeTLB registered %s page size, " + "pre-allocated %ld pages\n", + memfmt(buf, huge_page_size(h)), + h->free_huge_pages); } } -#ifdef CONFIG_SYSCTL #ifdef CONFIG_HIGHMEM static void try_to_free_low(struct hstate *h, unsigned long count) { int i; + if (h->order >= MAX_ORDER) + return; + for (i = 0; i < MAX_NUMNODES; ++i) { struct page *page, *next; struct list_head *freel = &h->hugepage_freelists[i]; @@ -982,6 +1062,9 @@ static unsigned long set_max_huge_pages(struct hstate *h, unsigned long count) { unsigned long min_count, ret; + if (h->order >= MAX_ORDER) + return h->max_huge_pages; + /* * Increase the pool size * First take pages out of surplus state. Then make up the @@ -1202,14 +1285,19 @@ static int __init hugetlb_init(void) { BUILD_BUG_ON(HPAGE_SHIFT == 0); - if (!size_to_hstate(HPAGE_SIZE)) { - hugetlb_add_hstate(HUGETLB_PAGE_ORDER); - parsed_hstate->max_huge_pages = default_hstate_max_huge_pages; + if (!size_to_hstate(default_hstate_size)) { + default_hstate_size = HPAGE_SIZE; + if (!size_to_hstate(default_hstate_size)) + hugetlb_add_hstate(HUGETLB_PAGE_ORDER); } - default_hstate_idx = size_to_hstate(HPAGE_SIZE) - hstates; + default_hstate_idx = size_to_hstate(default_hstate_size) - hstates; + if (default_hstate_max_huge_pages) + default_hstate.max_huge_pages = default_hstate_max_huge_pages; hugetlb_init_hstates(); + gather_bootmem_prealloc(); + report_hugepages(); hugetlb_sysfs_init(); @@ -1222,6 +1310,8 @@ module_init(hugetlb_init); void __init hugetlb_add_hstate(unsigned order) { struct hstate *h; + unsigned long i; + if (size_to_hstate(PAGE_SIZE << order)) { printk(KERN_WARNING "hugepagesz= specified twice, ignoring\n"); return; @@ -1231,15 +1321,21 @@ void __init hugetlb_add_hstate(unsigned order) h = &hstates[max_hstate++]; h->order = order; h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1); + h->nr_huge_pages = 0; + h->free_huge_pages = 0; + for (i = 0; i < MAX_NUMNODES; ++i) + INIT_LIST_HEAD(&h->hugepage_freelists[i]); + h->hugetlb_next_nid = first_node(node_online_map); snprintf(h->name, HSTATE_NAME_LEN, "hugepages-%lukB", huge_page_size(h)/1024); - hugetlb_init_one_hstate(h); + parsed_hstate = h; } -static int __init hugetlb_setup(char *s) +static int __init hugetlb_nrpages_setup(char *s) { unsigned long *mhp; + static unsigned long *last_mhp; /* * !max_hstate means we haven't parsed a hugepagesz= parameter yet, @@ -1250,12 +1346,35 @@ static int __init hugetlb_setup(char *s) else mhp = &parsed_hstate->max_huge_pages; + if (mhp == last_mhp) { + printk(KERN_WARNING "hugepages= specified twice without " + "interleaving hugepagesz=, ignoring\n"); + return 1; + } + if (sscanf(s, "%lu", mhp) <= 0) *mhp = 0; + /* + * Global state is always initialized later in hugetlb_init. + * But we need to allocate >= MAX_ORDER hstates here early to still + * use the bootmem allocator. + */ + if (max_hstate && parsed_hstate->order >= MAX_ORDER) + hugetlb_hstate_alloc_pages(parsed_hstate); + + last_mhp = mhp; + return 1; } -__setup("hugepages=", hugetlb_setup); +__setup("hugepages=", hugetlb_nrpages_setup); + +static int __init hugetlb_default_setup(char *s) +{ + default_hstate_size = memparse(s, &s); + return 1; +} +__setup("default_hugepagesz=", hugetlb_default_setup); static unsigned int cpuset_mems_nr(unsigned int *array) { @@ -1268,6 +1387,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array) return nr; } +#ifdef CONFIG_SYSCTL int hugetlb_sysctl_handler(struct ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) @@ -1434,8 +1554,10 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma) kref_put(&reservations->refs, resv_map_release); - if (reserve) + if (reserve) { hugetlb_acct_memory(h, -reserve); + hugetlb_put_quota(vma->vm_file->f_mapping, reserve); + } } } @@ -1552,6 +1674,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, BUG_ON(start & ~huge_page_mask(h)); BUG_ON(end & ~huge_page_mask(h)); + mmu_notifier_invalidate_range_start(mm, start, end); spin_lock(&mm->page_table_lock); for (address = start; address < end; address += sz) { ptep = huge_pte_offset(mm, address); @@ -1593,6 +1716,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, } spin_unlock(&mm->page_table_lock); flush_tlb_range(vma, start, end); + mmu_notifier_invalidate_range_end(mm, start, end); list_for_each_entry_safe(page, tmp, &page_list, lru) { list_del(&page->lru); put_page(page); @@ -1884,6 +2008,15 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, return ret; } +/* Can be overriden by architectures */ +__attribute__((weak)) struct page * +follow_huge_pud(struct mm_struct *mm, unsigned long address, + pud_t *pud, int write) +{ + BUG(); + return NULL; +} + int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page **pages, struct vm_area_struct **vmas, unsigned long *position, int *length, int i,