]> Pileus Git - ~andy/linux/blobdiff - mm/vmalloc.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[~andy/linux] / mm / vmalloc.c
index 0f751f2068c3f02d7b69b5fd90f4354831172eb5..d365724feb05206fc0b37c852f5859499f1cb6b3 100644 (file)
 #include <linux/pfn.h>
 #include <linux/kmemleak.h>
 #include <linux/atomic.h>
+#include <linux/llist.h>
 #include <asm/uaccess.h>
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
+struct vfree_deferred {
+       struct llist_head list;
+       struct work_struct wq;
+};
+static DEFINE_PER_CPU(struct vfree_deferred, vfree_deferred);
+
+static void __vunmap(const void *, int);
+
+static void free_work(struct work_struct *w)
+{
+       struct vfree_deferred *p = container_of(w, struct vfree_deferred, wq);
+       struct llist_node *llnode = llist_del_all(&p->list);
+       while (llnode) {
+               void *p = llnode;
+               llnode = llist_next(llnode);
+               __vunmap(p, 1);
+       }
+}
+
 /*** Page table manipulation functions ***/
 
 static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
@@ -249,19 +269,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
 #define VM_LAZY_FREEING        0x02
 #define VM_VM_AREA     0x04
 
-struct vmap_area {
-       unsigned long va_start;
-       unsigned long va_end;
-       unsigned long flags;
-       struct rb_node rb_node;         /* address sorted rbtree */
-       struct list_head list;          /* address sorted list */
-       struct list_head purge_list;    /* "lazy purge" list */
-       struct vm_struct *vm;
-       struct rcu_head rcu_head;
-};
-
 static DEFINE_SPINLOCK(vmap_area_lock);
-static LIST_HEAD(vmap_area_list);
+/* Export for kexec only */
+LIST_HEAD(vmap_area_list);
 static struct rb_root vmap_area_root = RB_ROOT;
 
 /* The vmap cache globals are protected by vmap_area_lock */
@@ -313,7 +323,7 @@ static void __insert_vmap_area(struct vmap_area *va)
        rb_link_node(&va->rb_node, parent, p);
        rb_insert_color(&va->rb_node, &vmap_area_root);
 
-       /* address-sort this list so it is usable like the vmlist */
+       /* address-sort this list */
        tmp = rb_prev(&va->rb_node);
        if (tmp) {
                struct vmap_area *prev;
@@ -1125,6 +1135,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
 }
 EXPORT_SYMBOL(vm_map_ram);
 
+static struct vm_struct *vmlist __initdata;
 /**
  * vm_area_add_early - add vmap area early during boot
  * @vm: vm_struct to add
@@ -1184,10 +1195,14 @@ void __init vmalloc_init(void)
 
        for_each_possible_cpu(i) {
                struct vmap_block_queue *vbq;
+               struct vfree_deferred *p;
 
                vbq = &per_cpu(vmap_block_queue, i);
                spin_lock_init(&vbq->lock);
                INIT_LIST_HEAD(&vbq->free);
+               p = &per_cpu(vfree_deferred, i);
+               init_llist_head(&p->list);
+               INIT_WORK(&p->wq, free_work);
        }
 
        /* Import existing vmlist entries. */
@@ -1283,41 +1298,35 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 }
 EXPORT_SYMBOL_GPL(map_vm_area);
 
-/*** Old vmalloc interfaces ***/
-DEFINE_RWLOCK(vmlist_lock);
-struct vm_struct *vmlist;
-
 static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
                              unsigned long flags, const void *caller)
 {
+       spin_lock(&vmap_area_lock);
        vm->flags = flags;
        vm->addr = (void *)va->va_start;
        vm->size = va->va_end - va->va_start;
        vm->caller = caller;
        va->vm = vm;
        va->flags |= VM_VM_AREA;
+       spin_unlock(&vmap_area_lock);
 }
 
-static void insert_vmalloc_vmlist(struct vm_struct *vm)
+static void clear_vm_unlist(struct vm_struct *vm)
 {
-       struct vm_struct *tmp, **p;
-
+       /*
+        * Before removing VM_UNLIST,
+        * we should make sure that vm has proper values.
+        * Pair with smp_rmb() in show_numa_info().
+        */
+       smp_wmb();
        vm->flags &= ~VM_UNLIST;
-       write_lock(&vmlist_lock);
-       for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
-               if (tmp->addr >= vm->addr)
-                       break;
-       }
-       vm->next = *p;
-       *p = vm;
-       write_unlock(&vmlist_lock);
 }
 
 static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
                              unsigned long flags, const void *caller)
 {
        setup_vmalloc_vm(vm, va, flags, caller);
-       insert_vmalloc_vmlist(vm);
+       clear_vm_unlist(vm);
 }
 
 static struct vm_struct *__get_vm_area_node(unsigned long size,
@@ -1360,10 +1369,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
 
        /*
         * When this function is called from __vmalloc_node_range,
-        * we do not add vm_struct to vmlist here to avoid
-        * accessing uninitialized members of vm_struct such as
-        * pages and nr_pages fields. They will be set later.
-        * To distinguish it from others, we use a VM_UNLIST flag.
+        * we add VM_UNLIST flag to avoid accessing uninitialized
+        * members of vm_struct such as pages and nr_pages fields.
+        * They will be set later.
         */
        if (flags & VM_UNLIST)
                setup_vmalloc_vm(area, va, flags, caller);
@@ -1447,19 +1455,10 @@ struct vm_struct *remove_vm_area(const void *addr)
        if (va && va->flags & VM_VM_AREA) {
                struct vm_struct *vm = va->vm;
 
-               if (!(vm->flags & VM_UNLIST)) {
-                       struct vm_struct *tmp, **p;
-                       /*
-                        * remove from list and disallow access to
-                        * this vm_struct before unmap. (address range
-                        * confliction is maintained by vmap.)
-                        */
-                       write_lock(&vmlist_lock);
-                       for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
-                               ;
-                       *p = tmp->next;
-                       write_unlock(&vmlist_lock);
-               }
+               spin_lock(&vmap_area_lock);
+               va->vm = NULL;
+               va->flags &= ~VM_VM_AREA;
+               spin_unlock(&vmap_area_lock);
 
                vmap_debug_free_range(va->va_start, va->va_end);
                free_unmap_vmap_area(va);
@@ -1511,7 +1510,7 @@ static void __vunmap(const void *addr, int deallocate_pages)
        kfree(area);
        return;
 }
-
 /**
  *     vfree  -  release memory allocated by vmalloc()
  *     @addr:          memory base address
@@ -1520,15 +1519,27 @@ static void __vunmap(const void *addr, int deallocate_pages)
  *     obtained from vmalloc(), vmalloc_32() or __vmalloc(). If @addr is
  *     NULL, no operation is performed.
  *
- *     Must not be called in interrupt context.
+ *     Must not be called in NMI context (strictly speaking, only if we don't
+ *     have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
+ *     conventions for vfree() arch-depenedent would be a really bad idea)
+ *
+ *     NOTE: assumes that the object at *addr has a size >= sizeof(llist_node)
+ *     
  */
 void vfree(const void *addr)
 {
-       BUG_ON(in_interrupt());
+       BUG_ON(in_nmi());
 
        kmemleak_free(addr);
 
-       __vunmap(addr, 1);
+       if (!addr)
+               return;
+       if (unlikely(in_interrupt())) {
+               struct vfree_deferred *p = &__get_cpu_var(vfree_deferred);
+               llist_add((struct llist_node *)addr, &p->list);
+               schedule_work(&p->wq);
+       } else
+               __vunmap(addr, 1);
 }
 EXPORT_SYMBOL(vfree);
 
@@ -1545,7 +1556,8 @@ void vunmap(const void *addr)
 {
        BUG_ON(in_interrupt());
        might_sleep();
-       __vunmap(addr, 0);
+       if (addr)
+               __vunmap(addr, 0);
 }
 EXPORT_SYMBOL(vunmap);
 
@@ -1680,10 +1692,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
                return NULL;
 
        /*
-        * In this function, newly allocated vm_struct is not added
-        * to vmlist at __get_vm_area_node(). so, it is added here.
+        * In this function, newly allocated vm_struct has VM_UNLIST flag.
+        * It means that vm_struct is not fully initialized.
+        * Now, it is fully initialized, so remove this flag here.
         */
-       insert_vmalloc_vmlist(area);
+       clear_vm_unlist(area);
 
        /*
         * A ref_count = 3 is needed because the vm_struct and vmap_area
@@ -2005,7 +2018,8 @@ static int aligned_vwrite(char *buf, char *addr, unsigned long count)
 
 long vread(char *buf, char *addr, unsigned long count)
 {
-       struct vm_struct *tmp;
+       struct vmap_area *va;
+       struct vm_struct *vm;
        char *vaddr, *buf_start = buf;
        unsigned long buflen = count;
        unsigned long n;
@@ -2014,10 +2028,17 @@ long vread(char *buf, char *addr, unsigned long count)
        if ((unsigned long) addr + count < count)
                count = -(unsigned long) addr;
 
-       read_lock(&vmlist_lock);
-       for (tmp = vmlist; count && tmp; tmp = tmp->next) {
-               vaddr = (char *) tmp->addr;
-               if (addr >= vaddr + tmp->size - PAGE_SIZE)
+       spin_lock(&vmap_area_lock);
+       list_for_each_entry(va, &vmap_area_list, list) {
+               if (!count)
+                       break;
+
+               if (!(va->flags & VM_VM_AREA))
+                       continue;
+
+               vm = va->vm;
+               vaddr = (char *) vm->addr;
+               if (addr >= vaddr + vm->size - PAGE_SIZE)
                        continue;
                while (addr < vaddr) {
                        if (count == 0)
@@ -2027,10 +2048,10 @@ long vread(char *buf, char *addr, unsigned long count)
                        addr++;
                        count--;
                }
-               n = vaddr + tmp->size - PAGE_SIZE - addr;
+               n = vaddr + vm->size - PAGE_SIZE - addr;
                if (n > count)
                        n = count;
-               if (!(tmp->flags & VM_IOREMAP))
+               if (!(vm->flags & VM_IOREMAP))
                        aligned_vread(buf, addr, n);
                else /* IOREMAP area is treated as memory hole */
                        memset(buf, 0, n);
@@ -2039,7 +2060,7 @@ long vread(char *buf, char *addr, unsigned long count)
                count -= n;
        }
 finished:
-       read_unlock(&vmlist_lock);
+       spin_unlock(&vmap_area_lock);
 
        if (buf == buf_start)
                return 0;
@@ -2078,7 +2099,8 @@ finished:
 
 long vwrite(char *buf, char *addr, unsigned long count)
 {
-       struct vm_struct *tmp;
+       struct vmap_area *va;
+       struct vm_struct *vm;
        char *vaddr;
        unsigned long n, buflen;
        int copied = 0;
@@ -2088,10 +2110,17 @@ long vwrite(char *buf, char *addr, unsigned long count)
                count = -(unsigned long) addr;
        buflen = count;
 
-       read_lock(&vmlist_lock);
-       for (tmp = vmlist; count && tmp; tmp = tmp->next) {
-               vaddr = (char *) tmp->addr;
-               if (addr >= vaddr + tmp->size - PAGE_SIZE)
+       spin_lock(&vmap_area_lock);
+       list_for_each_entry(va, &vmap_area_list, list) {
+               if (!count)
+                       break;
+
+               if (!(va->flags & VM_VM_AREA))
+                       continue;
+
+               vm = va->vm;
+               vaddr = (char *) vm->addr;
+               if (addr >= vaddr + vm->size - PAGE_SIZE)
                        continue;
                while (addr < vaddr) {
                        if (count == 0)
@@ -2100,10 +2129,10 @@ long vwrite(char *buf, char *addr, unsigned long count)
                        addr++;
                        count--;
                }
-               n = vaddr + tmp->size - PAGE_SIZE - addr;
+               n = vaddr + vm->size - PAGE_SIZE - addr;
                if (n > count)
                        n = count;
-               if (!(tmp->flags & VM_IOREMAP)) {
+               if (!(vm->flags & VM_IOREMAP)) {
                        aligned_vwrite(buf, addr, n);
                        copied++;
                }
@@ -2112,7 +2141,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
                count -= n;
        }
 finished:
-       read_unlock(&vmlist_lock);
+       spin_unlock(&vmap_area_lock);
        if (!copied)
                return 0;
        return buflen;
@@ -2519,19 +2548,19 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
 
 #ifdef CONFIG_PROC_FS
 static void *s_start(struct seq_file *m, loff_t *pos)
-       __acquires(&vmlist_lock)
+       __acquires(&vmap_area_lock)
 {
        loff_t n = *pos;
-       struct vm_struct *v;
+       struct vmap_area *va;
 
-       read_lock(&vmlist_lock);
-       v = vmlist;
-       while (n > 0 && v) {
+       spin_lock(&vmap_area_lock);
+       va = list_entry((&vmap_area_list)->next, typeof(*va), list);
+       while (n > 0 && &va->list != &vmap_area_list) {
                n--;
-               v = v->next;
+               va = list_entry(va->list.next, typeof(*va), list);
        }
-       if (!n)
-               return v;
+       if (!n && &va->list != &vmap_area_list)
+               return va;
 
        return NULL;
 
@@ -2539,16 +2568,20 @@ static void *s_start(struct seq_file *m, loff_t *pos)
 
 static void *s_next(struct seq_file *m, void *p, loff_t *pos)
 {
-       struct vm_struct *v = p;
+       struct vmap_area *va = p, *next;
 
        ++*pos;
-       return v->next;
+       next = list_entry(va->list.next, typeof(*va), list);
+       if (&next->list != &vmap_area_list)
+               return next;
+
+       return NULL;
 }
 
 static void s_stop(struct seq_file *m, void *p)
-       __releases(&vmlist_lock)
+       __releases(&vmap_area_lock)
 {
-       read_unlock(&vmlist_lock);
+       spin_unlock(&vmap_area_lock);
 }
 
 static void show_numa_info(struct seq_file *m, struct vm_struct *v)
@@ -2559,6 +2592,11 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
                if (!counters)
                        return;
 
+               /* Pair with smp_wmb() in clear_vm_unlist() */
+               smp_rmb();
+               if (v->flags & VM_UNLIST)
+                       return;
+
                memset(counters, 0, nr_node_ids * sizeof(unsigned int));
 
                for (nr = 0; nr < v->nr_pages; nr++)
@@ -2572,7 +2610,20 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
 
 static int s_show(struct seq_file *m, void *p)
 {
-       struct vm_struct *v = p;
+       struct vmap_area *va = p;
+       struct vm_struct *v;
+
+       if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
+               return 0;
+
+       if (!(va->flags & VM_VM_AREA)) {
+               seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
+                       (void *)va->va_start, (void *)va->va_end,
+                                       va->va_end - va->va_start);
+               return 0;
+       }
+
+       v = va->vm;
 
        seq_printf(m, "0x%pK-0x%pK %7ld",
                v->addr, v->addr + v->size, v->size);
@@ -2645,5 +2696,53 @@ static int __init proc_vmalloc_init(void)
        return 0;
 }
 module_init(proc_vmalloc_init);
+
+void get_vmalloc_info(struct vmalloc_info *vmi)
+{
+       struct vmap_area *va;
+       unsigned long free_area_size;
+       unsigned long prev_end;
+
+       vmi->used = 0;
+       vmi->largest_chunk = 0;
+
+       prev_end = VMALLOC_START;
+
+       spin_lock(&vmap_area_lock);
+
+       if (list_empty(&vmap_area_list)) {
+               vmi->largest_chunk = VMALLOC_TOTAL;
+               goto out;
+       }
+
+       list_for_each_entry(va, &vmap_area_list, list) {
+               unsigned long addr = va->va_start;
+
+               /*
+                * Some archs keep another range for modules in vmalloc space
+                */
+               if (addr < VMALLOC_START)
+                       continue;
+               if (addr >= VMALLOC_END)
+                       break;
+
+               if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
+                       continue;
+
+               vmi->used += (va->va_end - va->va_start);
+
+               free_area_size = addr - prev_end;
+               if (vmi->largest_chunk < free_area_size)
+                       vmi->largest_chunk = free_area_size;
+
+               prev_end = va->va_end;
+       }
+
+       if (VMALLOC_END - prev_end > vmi->largest_chunk)
+               vmi->largest_chunk = VMALLOC_END - prev_end;
+
+out:
+       spin_unlock(&vmap_area_lock);
+}
 #endif