X-Git-Url: http://pileus.org/git/?a=blobdiff_plain;f=drivers%2Fstaging%2Fzsmalloc%2Fzsmalloc-main.c;h=09a9d35d436ff6075f256ec02f9e029e9e26a455;hb=e9eca4de957ac33744fb994ccacd4a5102e445a8;hp=8b0bcb626a7f8e3082fa6dce0fbac81c92fde966;hpb=bd463a06064c4bc8497f6aa6dfb4437be8f07a3b;p=~andy%2Flinux diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c index 8b0bcb626a7..09a9d35d436 100644 --- a/drivers/staging/zsmalloc/zsmalloc-main.c +++ b/drivers/staging/zsmalloc/zsmalloc-main.c @@ -75,9 +75,140 @@ #include #include #include +#include +#include +#include #include "zsmalloc.h" -#include "zsmalloc_int.h" + +/* + * This must be power of 2 and greater than of equal to sizeof(link_free). + * These two conditions ensure that any 'struct link_free' itself doesn't + * span more than 1 page which avoids complex case of mapping 2 pages simply + * to restore link_free pointer values. + */ +#define ZS_ALIGN 8 + +/* + * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single) + * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N. + */ +#define ZS_MAX_ZSPAGE_ORDER 2 +#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER) + +/* + * Object location (, ) is encoded as + * as single (void *) handle value. + * + * Note that object index is relative to system + * page it is stored in, so for each sub-page belonging + * to a zspage, obj_idx starts with 0. + * + * This is made more complicated by various memory models and PAE. + */ + +#ifndef MAX_PHYSMEM_BITS +#ifdef CONFIG_HIGHMEM64G +#define MAX_PHYSMEM_BITS 36 +#else /* !CONFIG_HIGHMEM64G */ +/* + * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just + * be PAGE_SHIFT + */ +#define MAX_PHYSMEM_BITS BITS_PER_LONG +#endif +#endif +#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) +#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS) +#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1) + +#define MAX(a, b) ((a) >= (b) ? (a) : (b)) +/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ +#define ZS_MIN_ALLOC_SIZE \ + MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) +#define ZS_MAX_ALLOC_SIZE PAGE_SIZE + +/* + * On systems with 4K page size, this gives 254 size classes! There is a + * trader-off here: + * - Large number of size classes is potentially wasteful as free page are + * spread across these classes + * - Small number of size classes causes large internal fragmentation + * - Probably its better to use specific size classes (empirically + * determined). NOTE: all those class sizes must be set as multiple of + * ZS_ALIGN to make sure link_free itself never has to span 2 pages. + * + * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN + * (reason above) + */ +#define ZS_SIZE_CLASS_DELTA 16 +#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \ + ZS_SIZE_CLASS_DELTA + 1) + +/* + * We do not maintain any list for completely empty or full pages + */ +enum fullness_group { + ZS_ALMOST_FULL, + ZS_ALMOST_EMPTY, + _ZS_NR_FULLNESS_GROUPS, + + ZS_EMPTY, + ZS_FULL +}; + +/* + * We assign a page to ZS_ALMOST_EMPTY fullness group when: + * n <= N / f, where + * n = number of allocated objects + * N = total number of objects zspage can store + * f = 1/fullness_threshold_frac + * + * Similarly, we assign zspage to: + * ZS_ALMOST_FULL when n > N / f + * ZS_EMPTY when n == 0 + * ZS_FULL when n == N + * + * (see: fix_fullness_group()) + */ +static const int fullness_threshold_frac = 4; + +struct size_class { + /* + * Size of objects stored in this class. Must be multiple + * of ZS_ALIGN. + */ + int size; + unsigned int index; + + /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */ + int pages_per_zspage; + + spinlock_t lock; + + /* stats */ + u64 pages_allocated; + + struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS]; +}; + +/* + * Placed within free objects to form a singly linked list. + * For every zspage, first_page->freelist gives head of this list. + * + * This must be power of 2 and less than or equal to ZS_ALIGN + */ +struct link_free { + /* Handle of next free chunk (encodes ) */ + void *next; +}; + +struct zs_pool { + struct size_class size_class[ZS_SIZE_CLASSES]; + + gfp_t flags; /* allocation flags used when growing pool */ + const char *name; +}; /* * A zspage's class index and fullness group @@ -88,6 +219,30 @@ #define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1) #define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1) +/* + * By default, zsmalloc uses a copy-based object mapping method to access + * allocations that span two pages. However, if a particular architecture + * 1) Implements local_flush_tlb_kernel_range() and 2) Performs VM mapping + * faster than copying, then it should be added here so that + * USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use page table + * mapping rather than copying + * for object mapping. +*/ +#if defined(CONFIG_ARM) +#define USE_PGTABLE_MAPPING +#endif + +struct mapping_area { +#ifdef USE_PGTABLE_MAPPING + struct vm_struct *vm; /* vm area for mapping object that span pages */ +#else + char *vm_buf; /* copy buffer for objects that span pages */ +#endif + char *vm_addr; /* address of kmap_atomic()'ed pages */ + enum zs_mapmode vm_mm; /* mapping mode */ +}; + + /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ static DEFINE_PER_CPU(struct mapping_area, zs_map_area); @@ -470,16 +625,83 @@ static struct page *find_get_zspage(struct size_class *class) return page; } -static void zs_copy_map_object(char *buf, struct page *firstpage, - int off, int size) +#ifdef USE_PGTABLE_MAPPING +static inline int __zs_cpu_up(struct mapping_area *area) +{ + /* + * Make sure we don't leak memory if a cpu UP notification + * and zs_init() race and both call zs_cpu_up() on the same cpu + */ + if (area->vm) + return 0; + area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL); + if (!area->vm) + return -ENOMEM; + return 0; +} + +static inline void __zs_cpu_down(struct mapping_area *area) +{ + if (area->vm) + free_vm_area(area->vm); + area->vm = NULL; +} + +static inline void *__zs_map_object(struct mapping_area *area, + struct page *pages[2], int off, int size) +{ + BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages)); + area->vm_addr = area->vm->addr; + return area->vm_addr + off; +} + +static inline void __zs_unmap_object(struct mapping_area *area, + struct page *pages[2], int off, int size) +{ + unsigned long addr = (unsigned long)area->vm_addr; + unsigned long end = addr + (PAGE_SIZE * 2); + + flush_cache_vunmap(addr, end); + unmap_kernel_range_noflush(addr, PAGE_SIZE * 2); + local_flush_tlb_kernel_range(addr, end); +} + +#else /* USE_PGTABLE_MAPPING */ + +static inline int __zs_cpu_up(struct mapping_area *area) +{ + /* + * Make sure we don't leak memory if a cpu UP notification + * and zs_init() race and both call zs_cpu_up() on the same cpu + */ + if (area->vm_buf) + return 0; + area->vm_buf = (char *)__get_free_page(GFP_KERNEL); + if (!area->vm_buf) + return -ENOMEM; + return 0; +} + +static inline void __zs_cpu_down(struct mapping_area *area) +{ + if (area->vm_buf) + free_page((unsigned long)area->vm_buf); + area->vm_buf = NULL; +} + +static void *__zs_map_object(struct mapping_area *area, + struct page *pages[2], int off, int size) { - struct page *pages[2]; int sizes[2]; void *addr; + char *buf = area->vm_buf; - pages[0] = firstpage; - pages[1] = get_next_page(firstpage); - BUG_ON(!pages[1]); + /* disable page faults to match kmap_atomic() return conditions */ + pagefault_disable(); + + /* no read fastpath */ + if (area->vm_mm == ZS_MM_WO) + goto out; sizes[0] = PAGE_SIZE - off; sizes[1] = size - sizes[0]; @@ -491,18 +713,20 @@ static void zs_copy_map_object(char *buf, struct page *firstpage, addr = kmap_atomic(pages[1]); memcpy(buf + sizes[0], addr, sizes[1]); kunmap_atomic(addr); +out: + return area->vm_buf; } -static void zs_copy_unmap_object(char *buf, struct page *firstpage, - int off, int size) +static void __zs_unmap_object(struct mapping_area *area, + struct page *pages[2], int off, int size) { - struct page *pages[2]; int sizes[2]; void *addr; + char *buf = area->vm_buf; - pages[0] = firstpage; - pages[1] = get_next_page(firstpage); - BUG_ON(!pages[1]); + /* no write fastpath */ + if (area->vm_mm == ZS_MM_RO) + goto out; sizes[0] = PAGE_SIZE - off; sizes[1] = size - sizes[0]; @@ -514,34 +738,31 @@ static void zs_copy_unmap_object(char *buf, struct page *firstpage, addr = kmap_atomic(pages[1]); memcpy(addr, buf + sizes[0], sizes[1]); kunmap_atomic(addr); + +out: + /* enable page faults to match kunmap_atomic() return conditions */ + pagefault_enable(); } +#endif /* USE_PGTABLE_MAPPING */ + static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action, void *pcpu) { - int cpu = (long)pcpu; + int ret, cpu = (long)pcpu; struct mapping_area *area; switch (action) { case CPU_UP_PREPARE: area = &per_cpu(zs_map_area, cpu); - /* - * Make sure we don't leak memory if a cpu UP notification - * and zs_init() race and both call zs_cpu_up() on the same cpu - */ - if (area->vm_buf) - return 0; - area->vm_buf = (char *)__get_free_page(GFP_KERNEL); - if (!area->vm_buf) - return -ENOMEM; - return 0; + ret = __zs_cpu_up(area); + if (ret) + return notifier_from_errno(ret); break; case CPU_DEAD: case CPU_UP_CANCELED: area = &per_cpu(zs_map_area, cpu); - if (area->vm_buf) - free_page((unsigned long)area->vm_buf); - area->vm_buf = NULL; + __zs_cpu_down(area); break; } @@ -758,28 +979,36 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle, enum fullness_group fg; struct size_class *class; struct mapping_area *area; + struct page *pages[2]; BUG_ON(!handle); + /* + * Because we use per-cpu mapping areas shared among the + * pools/users, we can't allow mapping in interrupt context + * because it can corrupt another users mappings. + */ + BUG_ON(in_interrupt()); + obj_handle_to_location(handle, &page, &obj_idx); get_zspage_mapping(get_first_page(page), &class_idx, &fg); class = &pool->size_class[class_idx]; off = obj_idx_to_offset(page, obj_idx, class->size); area = &get_cpu_var(zs_map_area); + area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ area->vm_addr = kmap_atomic(page); return area->vm_addr + off; } - /* disable page faults to match kmap_atomic() return conditions */ - pagefault_disable(); + /* this object spans two pages */ + pages[0] = page; + pages[1] = get_next_page(page); + BUG_ON(!pages[1]); - if (mm != ZS_MM_WO) - zs_copy_map_object(area->vm_buf, page, off, class->size); - area->vm_addr = NULL; - return area->vm_buf; + return __zs_map_object(area, pages, off, class->size); } EXPORT_SYMBOL_GPL(zs_map_object); @@ -793,17 +1022,6 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) struct size_class *class; struct mapping_area *area; - area = &__get_cpu_var(zs_map_area); - /* single-page object fastpath */ - if (area->vm_addr) { - kunmap_atomic(area->vm_addr); - goto out; - } - - /* no write fastpath */ - if (area->vm_mm == ZS_MM_RO) - goto pfenable; - BUG_ON(!handle); obj_handle_to_location(handle, &page, &obj_idx); @@ -811,12 +1029,18 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle) class = &pool->size_class[class_idx]; off = obj_idx_to_offset(page, obj_idx, class->size); - zs_copy_unmap_object(area->vm_buf, page, off, class->size); + area = &__get_cpu_var(zs_map_area); + if (off + class->size <= PAGE_SIZE) + kunmap_atomic(area->vm_addr); + else { + struct page *pages[2]; -pfenable: - /* enable page faults to match kunmap_atomic() return conditions */ - pagefault_enable(); -out: + pages[0] = page; + pages[1] = get_next_page(page); + BUG_ON(!pages[1]); + + __zs_unmap_object(area, pages, off, class->size); + } put_cpu_var(zs_map_area); } EXPORT_SYMBOL_GPL(zs_unmap_object);