X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=mm%2Fvmalloc.c;h=d365724feb05206fc0b37c852f5859499f1cb6b3;hb=ea44083a7081ac8d9cc84d49525e6041025a7a17;hp=0f751f2068c3f02d7b69b5fd90f4354831172eb5;hpb=69df2ac1288b456a95aceadafbf88cd891a577c8;p=~andy%2Flinux diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 0f751f2068c..d365724feb0 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -27,10 +27,30 @@ #include #include #include +#include #include #include #include +struct vfree_deferred { + struct llist_head list; + struct work_struct wq; +}; +static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred); + +static void __vunmap(const void *, int); + +static void free_work(struct work_struct *w) +{ + struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq); + struct llist_node *llnode = llist_del_all(&p->list); + while (llnode) { + void *p = llnode; + llnode = llist_next(llnode); + __vunmap(p, 1); + } +} + /*** Page table manipulation functions ***/ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) @@ -249,19 +269,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn); #define VM_LAZY_FREEING 0x02 #define VM_VM_AREA 0x04 -struct vmap_area { - unsigned long va_start; - unsigned long va_end; - unsigned long flags; - struct rb_node rb_node; /* address sorted rbtree */ - struct list_head list; /* address sorted list */ - struct list_head purge_list; /* "lazy purge" list */ - struct vm_struct *vm; - struct rcu_head rcu_head; -}; - static DEFINE_SPINLOCK(vmap_area_lock); -static LIST_HEAD(vmap_area_list); +/* Export for kexec only */ +LIST_HEAD(vmap_area_list); static struct rb_root vmap_area_root = RB_ROOT; /* The vmap cache globals are protected by vmap_area_lock */ @@ -313,7 +323,7 @@ static void __insert_vmap_area(struct vmap_area *va) rb_link_node(&va->rb_node, parent, p); rb_insert_color(&va->rb_node, &vmap_area_root); - /* address-sort this list so it is usable like the vmlist */ + /* address-sort this list */ tmp = rb_prev(&va->rb_node); if (tmp) { struct vmap_area *prev; @@ -1125,6 +1135,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro } EXPORT_SYMBOL(vm_map_ram); +static struct vm_struct *vmlist __initdata; /** * vm_area_add_early - add vmap area early during boot * @vm: vm_struct to add @@ -1184,10 +1195,14 @@ void __init vmalloc_init(void) for_each_possible_cpu(i) { struct vmap_block_queue *vbq; + struct vfree_deferred *p; vbq = &per_cpu(vmap_block_queue, i); spin_lock_init(&vbq->lock); INIT_LIST_HEAD(&vbq->free); + p = &per_cpu(vfree_deferred, i); + init_llist_head(&p->list); + INIT_WORK(&p->wq, free_work); } /* Import existing vmlist entries. */ @@ -1283,41 +1298,35 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages) } EXPORT_SYMBOL_GPL(map_vm_area); -/*** Old vmalloc interfaces ***/ -DEFINE_RWLOCK(vmlist_lock); -struct vm_struct *vmlist; - static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { + spin_lock(&vmap_area_lock); vm->flags = flags; vm->addr = (void *)va->va_start; vm->size = va->va_end - va->va_start; vm->caller = caller; va->vm = vm; va->flags |= VM_VM_AREA; + spin_unlock(&vmap_area_lock); } -static void insert_vmalloc_vmlist(struct vm_struct *vm) +static void clear_vm_unlist(struct vm_struct *vm) { - struct vm_struct *tmp, **p; - + /* + * Before removing VM_UNLIST, + * we should make sure that vm has proper values. + * Pair with smp_rmb() in show_numa_info(). + */ + smp_wmb(); vm->flags &= ~VM_UNLIST; - write_lock(&vmlist_lock); - for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) { - if (tmp->addr >= vm->addr) - break; - } - vm->next = *p; - *p = vm; - write_unlock(&vmlist_lock); } static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { setup_vmalloc_vm(vm, va, flags, caller); - insert_vmalloc_vmlist(vm); + clear_vm_unlist(vm); } static struct vm_struct *__get_vm_area_node(unsigned long size, @@ -1360,10 +1369,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, /* * When this function is called from __vmalloc_node_range, - * we do not add vm_struct to vmlist here to avoid - * accessing uninitialized members of vm_struct such as - * pages and nr_pages fields. They will be set later. - * To distinguish it from others, we use a VM_UNLIST flag. + * we add VM_UNLIST flag to avoid accessing uninitialized + * members of vm_struct such as pages and nr_pages fields. + * They will be set later. */ if (flags & VM_UNLIST) setup_vmalloc_vm(area, va, flags, caller); @@ -1447,19 +1455,10 @@ struct vm_struct *remove_vm_area(const void *addr) if (va && va->flags & VM_VM_AREA) { struct vm_struct *vm = va->vm; - if (!(vm->flags & VM_UNLIST)) { - struct vm_struct *tmp, **p; - /* - * remove from list and disallow access to - * this vm_struct before unmap. (address range - * confliction is maintained by vmap.) - */ - write_lock(&vmlist_lock); - for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next) - ; - *p = tmp->next; - write_unlock(&vmlist_lock); - } + spin_lock(&vmap_area_lock); + va->vm = NULL; + va->flags &= ~VM_VM_AREA; + spin_unlock(&vmap_area_lock); vmap_debug_free_range(va->va_start, va->va_end); free_unmap_vmap_area(va); @@ -1511,7 +1510,7 @@ static void __vunmap(const void *addr, int deallocate_pages) kfree(area); return; } - + /** * vfree - release memory allocated by vmalloc() * @addr: memory base address @@ -1520,15 +1519,27 @@ static void __vunmap(const void *addr, int deallocate_pages) * obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is * NULL, no operation is performed. * - * Must not be called in interrupt context. + * Must not be called in NMI context (strictly speaking, only if we don't + * have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling + * conventions for vfree() arch-depenedent would be a really bad idea) + * + * NOTE: assumes that the object at *addr has a size >= sizeof(llist_node) + * */ void vfree(const void *addr) { - BUG_ON(in_interrupt()); + BUG_ON(in_nmi()); kmemleak_free(addr); - __vunmap(addr, 1); + if (!addr) + return; + if (unlikely(in_interrupt())) { + struct vfree_deferred *p = &__get_cpu_var(vfree_deferred); + llist_add((struct llist_node *)addr, &p->list); + schedule_work(&p->wq); + } else + __vunmap(addr, 1); } EXPORT_SYMBOL(vfree); @@ -1545,7 +1556,8 @@ void vunmap(const void *addr) { BUG_ON(in_interrupt()); might_sleep(); - __vunmap(addr, 0); + if (addr) + __vunmap(addr, 0); } EXPORT_SYMBOL(vunmap); @@ -1680,10 +1692,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, return NULL; /* - * In this function, newly allocated vm_struct is not added - * to vmlist at __get_vm_area_node(). so, it is added here. + * In this function, newly allocated vm_struct has VM_UNLIST flag. + * It means that vm_struct is not fully initialized. + * Now, it is fully initialized, so remove this flag here. */ - insert_vmalloc_vmlist(area); + clear_vm_unlist(area); /* * A ref_count = 3 is needed because the vm_struct and vmap_area @@ -2005,7 +2018,8 @@ static int aligned_vwrite(char *buf, char *addr, unsigned long count) long vread(char *buf, char *addr, unsigned long count) { - struct vm_struct *tmp; + struct vmap_area *va; + struct vm_struct *vm; char *vaddr, *buf_start = buf; unsigned long buflen = count; unsigned long n; @@ -2014,10 +2028,17 @@ long vread(char *buf, char *addr, unsigned long count) if ((unsigned long) addr + count < count) count = -(unsigned long) addr; - read_lock(&vmlist_lock); - for (tmp = vmlist; count && tmp; tmp = tmp->next) { - vaddr = (char *) tmp->addr; - if (addr >= vaddr + tmp->size - PAGE_SIZE) + spin_lock(&vmap_area_lock); + list_for_each_entry(va, &vmap_area_list, list) { + if (!count) + break; + + if (!(va->flags & VM_VM_AREA)) + continue; + + vm = va->vm; + vaddr = (char *) vm->addr; + if (addr >= vaddr + vm->size - PAGE_SIZE) continue; while (addr < vaddr) { if (count == 0) @@ -2027,10 +2048,10 @@ long vread(char *buf, char *addr, unsigned long count) addr++; count--; } - n = vaddr + tmp->size - PAGE_SIZE - addr; + n = vaddr + vm->size - PAGE_SIZE - addr; if (n > count) n = count; - if (!(tmp->flags & VM_IOREMAP)) + if (!(vm->flags & VM_IOREMAP)) aligned_vread(buf, addr, n); else /* IOREMAP area is treated as memory hole */ memset(buf, 0, n); @@ -2039,7 +2060,7 @@ long vread(char *buf, char *addr, unsigned long count) count -= n; } finished: - read_unlock(&vmlist_lock); + spin_unlock(&vmap_area_lock); if (buf == buf_start) return 0; @@ -2078,7 +2099,8 @@ finished: long vwrite(char *buf, char *addr, unsigned long count) { - struct vm_struct *tmp; + struct vmap_area *va; + struct vm_struct *vm; char *vaddr; unsigned long n, buflen; int copied = 0; @@ -2088,10 +2110,17 @@ long vwrite(char *buf, char *addr, unsigned long count) count = -(unsigned long) addr; buflen = count; - read_lock(&vmlist_lock); - for (tmp = vmlist; count && tmp; tmp = tmp->next) { - vaddr = (char *) tmp->addr; - if (addr >= vaddr + tmp->size - PAGE_SIZE) + spin_lock(&vmap_area_lock); + list_for_each_entry(va, &vmap_area_list, list) { + if (!count) + break; + + if (!(va->flags & VM_VM_AREA)) + continue; + + vm = va->vm; + vaddr = (char *) vm->addr; + if (addr >= vaddr + vm->size - PAGE_SIZE) continue; while (addr < vaddr) { if (count == 0) @@ -2100,10 +2129,10 @@ long vwrite(char *buf, char *addr, unsigned long count) addr++; count--; } - n = vaddr + tmp->size - PAGE_SIZE - addr; + n = vaddr + vm->size - PAGE_SIZE - addr; if (n > count) n = count; - if (!(tmp->flags & VM_IOREMAP)) { + if (!(vm->flags & VM_IOREMAP)) { aligned_vwrite(buf, addr, n); copied++; } @@ -2112,7 +2141,7 @@ long vwrite(char *buf, char *addr, unsigned long count) count -= n; } finished: - read_unlock(&vmlist_lock); + spin_unlock(&vmap_area_lock); if (!copied) return 0; return buflen; @@ -2519,19 +2548,19 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) - __acquires(&vmlist_lock) + __acquires(&vmap_area_lock) { loff_t n = *pos; - struct vm_struct *v; + struct vmap_area *va; - read_lock(&vmlist_lock); - v = vmlist; - while (n > 0 && v) { + spin_lock(&vmap_area_lock); + va = list_entry((&vmap_area_list)->next, typeof(*va), list); + while (n > 0 && &va->list != &vmap_area_list) { n--; - v = v->next; + va = list_entry(va->list.next, typeof(*va), list); } - if (!n) - return v; + if (!n && &va->list != &vmap_area_list) + return va; return NULL; @@ -2539,16 +2568,20 @@ static void *s_start(struct seq_file *m, loff_t *pos) static void *s_next(struct seq_file *m, void *p, loff_t *pos) { - struct vm_struct *v = p; + struct vmap_area *va = p, *next; ++*pos; - return v->next; + next = list_entry(va->list.next, typeof(*va), list); + if (&next->list != &vmap_area_list) + return next; + + return NULL; } static void s_stop(struct seq_file *m, void *p) - __releases(&vmlist_lock) + __releases(&vmap_area_lock) { - read_unlock(&vmlist_lock); + spin_unlock(&vmap_area_lock); } static void show_numa_info(struct seq_file *m, struct vm_struct *v) @@ -2559,6 +2592,11 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) if (!counters) return; + /* Pair with smp_wmb() in clear_vm_unlist() */ + smp_rmb(); + if (v->flags & VM_UNLIST) + return; + memset(counters, 0, nr_node_ids * sizeof(unsigned int)); for (nr = 0; nr < v->nr_pages; nr++) @@ -2572,7 +2610,20 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v) static int s_show(struct seq_file *m, void *p) { - struct vm_struct *v = p; + struct vmap_area *va = p; + struct vm_struct *v; + + if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING)) + return 0; + + if (!(va->flags & VM_VM_AREA)) { + seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n", + (void *)va->va_start, (void *)va->va_end, + va->va_end - va->va_start); + return 0; + } + + v = va->vm; seq_printf(m, "0x%pK-0x%pK %7ld", v->addr, v->addr + v->size, v->size); @@ -2645,5 +2696,53 @@ static int __init proc_vmalloc_init(void) return 0; } module_init(proc_vmalloc_init); + +void get_vmalloc_info(struct vmalloc_info *vmi) +{ + struct vmap_area *va; + unsigned long free_area_size; + unsigned long prev_end; + + vmi->used = 0; + vmi->largest_chunk = 0; + + prev_end = VMALLOC_START; + + spin_lock(&vmap_area_lock); + + if (list_empty(&vmap_area_list)) { + vmi->largest_chunk = VMALLOC_TOTAL; + goto out; + } + + list_for_each_entry(va, &vmap_area_list, list) { + unsigned long addr = va->va_start; + + /* + * Some archs keep another range for modules in vmalloc space + */ + if (addr < VMALLOC_START) + continue; + if (addr >= VMALLOC_END) + break; + + if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING)) + continue; + + vmi->used += (va->va_end - va->va_start); + + free_area_size = addr - prev_end; + if (vmi->largest_chunk < free_area_size) + vmi->largest_chunk = free_area_size; + + prev_end = va->va_end; + } + + if (VMALLOC_END - prev_end > vmi->largest_chunk) + vmi->largest_chunk = VMALLOC_END - prev_end; + +out: + spin_unlock(&vmap_area_lock); +} #endif