4 * Copyright (c) 2010-2012, Dan Magenheimer, Oracle Corp.
5 * Copyright (c) 2010,2011, Nitin Gupta
7 * Zcache provides an in-kernel "host implementation" for transcendent memory
8 * ("tmem") and, thus indirectly, for cleancache and frontswap. Zcache uses
9 * lzo1x compression to improve density and an embedded allocator called
10 * "zbud" which "buddies" two compressed pages semi-optimally in each physical
11 * pageframe. Zbud is integrally tied into tmem to allow pageframes to
12 * be "reclaimed" efficiently.
15 #include <linux/module.h>
16 #include <linux/cpu.h>
17 #include <linux/highmem.h>
18 #include <linux/list.h>
19 #include <linux/slab.h>
20 #include <linux/spinlock.h>
21 #include <linux/types.h>
22 #include <linux/atomic.h>
23 #include <linux/math64.h>
24 #include <linux/crypto.h>
25 #include <linux/swap.h>
26 #include <linux/swapops.h>
27 #include <linux/pagemap.h>
28 #include <linux/writeback.h>
30 #include <linux/cleancache.h>
31 #include <linux/frontswap.h>
38 static bool ramster_enabled __read_mostly;
40 #define ramster_enabled false
43 #ifndef __PG_WAS_ACTIVE
44 static inline bool PageWasActive(struct page *page)
49 static inline void SetPageWasActive(struct page *page)
54 #ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS
55 static bool frontswap_has_exclusive_gets __read_mostly = true;
57 static bool frontswap_has_exclusive_gets __read_mostly;
58 static inline void frontswap_tmem_exclusive_gets(bool b)
63 /* enable (or fix code) when Seth's patches are accepted upstream */
64 #define zcache_writeback_enabled 0
66 static bool zcache_enabled __read_mostly;
67 static bool disable_cleancache __read_mostly;
68 static bool disable_frontswap __read_mostly;
69 static bool disable_frontswap_ignore_nonactive __read_mostly;
70 static bool disable_cleancache_ignore_nonactive __read_mostly;
71 static char *namestr __read_mostly = "zcache";
73 #define ZCACHE_GFP_MASK \
74 (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC)
76 MODULE_LICENSE("GPL");
78 /* crypto API for zcache */
79 #define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME
80 static char zcache_comp_name[ZCACHE_COMP_NAME_SZ] __read_mostly;
81 static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms __read_mostly;
84 ZCACHE_COMPOP_COMPRESS,
85 ZCACHE_COMPOP_DECOMPRESS
88 static inline int zcache_comp_op(enum comp_op op,
89 const u8 *src, unsigned int slen,
90 u8 *dst, unsigned int *dlen)
92 struct crypto_comp *tfm;
95 BUG_ON(!zcache_comp_pcpu_tfms);
96 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu());
99 case ZCACHE_COMPOP_COMPRESS:
100 ret = crypto_comp_compress(tfm, src, slen, dst, dlen);
102 case ZCACHE_COMPOP_DECOMPRESS:
103 ret = crypto_comp_decompress(tfm, src, slen, dst, dlen);
117 * byte count defining poor compression; pages with greater zsize will be
120 static unsigned int zbud_max_zsize __read_mostly = (PAGE_SIZE / 8) * 7;
122 * byte count defining poor *mean* compression; pages with greater zsize
123 * will be rejected until sufficient better-compressed pages are accepted
124 * driving the mean below this threshold
126 static unsigned int zbud_max_mean_zsize __read_mostly = (PAGE_SIZE / 8) * 5;
129 * for now, used named slabs so can easily track usage; later can
130 * either just use kmalloc, or perhaps add a slab-like allocator
131 * to more carefully manage total memory utilization
133 static struct kmem_cache *zcache_objnode_cache;
134 static struct kmem_cache *zcache_obj_cache;
136 static DEFINE_PER_CPU(struct zcache_preload, zcache_preloads) = { 0, };
138 /* Used by debug.c */
139 ssize_t zcache_pers_zpages;
140 u64 zcache_pers_zbytes;
141 ssize_t zcache_eph_pageframes;
142 ssize_t zcache_pers_pageframes;
144 /* Used by this code. */
145 static ssize_t zcache_flush_total;
146 static ssize_t zcache_flush_found;
147 static ssize_t zcache_flobj_total;
148 static ssize_t zcache_flobj_found;
149 static ssize_t zcache_failed_eph_puts;
150 static ssize_t zcache_failed_pers_puts;
151 static ssize_t zcache_failed_getfreepages;
152 static ssize_t zcache_failed_alloc;
153 static ssize_t zcache_put_to_flush;
154 static ssize_t zcache_compress_poor;
155 static ssize_t zcache_mean_compress_poor;
156 static ssize_t zcache_eph_ate_tail;
157 static ssize_t zcache_eph_ate_tail_failed;
158 static ssize_t zcache_pers_ate_eph;
159 static ssize_t zcache_pers_ate_eph_failed;
160 static ssize_t zcache_evicted_eph_zpages;
161 static ssize_t zcache_evicted_eph_pageframes;
162 static ssize_t zcache_last_active_file_pageframes;
163 static ssize_t zcache_last_inactive_file_pageframes;
164 static ssize_t zcache_last_active_anon_pageframes;
165 static ssize_t zcache_last_inactive_anon_pageframes;
166 static ssize_t zcache_eph_nonactive_puts_ignored;
167 static ssize_t zcache_pers_nonactive_puts_ignored;
168 #ifdef CONFIG_ZCACHE_WRITEBACK
169 static ssize_t zcache_writtenback_pages;
170 static ssize_t zcache_outstanding_writeback_pages;
173 * zcache core code starts here
176 static struct zcache_client zcache_host;
177 static struct zcache_client zcache_clients[MAX_CLIENTS];
179 static inline bool is_local_client(struct zcache_client *cli)
181 return cli == &zcache_host;
184 static struct zcache_client *zcache_get_client_by_id(uint16_t cli_id)
186 struct zcache_client *cli = &zcache_host;
188 if (cli_id != LOCAL_CLIENT) {
189 if (cli_id >= MAX_CLIENTS)
191 cli = &zcache_clients[cli_id];
198 * Tmem operations assume the poolid implies the invoking client.
199 * Zcache only has one client (the kernel itself): LOCAL_CLIENT.
200 * RAMster has each client numbered by cluster node, and a KVM version
201 * of zcache would have one client per guest and each client might
204 struct tmem_pool *zcache_get_pool_by_id(uint16_t cli_id, uint16_t poolid)
206 struct tmem_pool *pool = NULL;
207 struct zcache_client *cli = NULL;
209 cli = zcache_get_client_by_id(cli_id);
212 if (!is_local_client(cli))
213 atomic_inc(&cli->refcount);
214 if (poolid < MAX_POOLS_PER_CLIENT) {
215 pool = cli->tmem_pools[poolid];
217 atomic_inc(&pool->refcount);
223 void zcache_put_pool(struct tmem_pool *pool)
225 struct zcache_client *cli = NULL;
230 atomic_dec(&pool->refcount);
231 if (!is_local_client(cli))
232 atomic_dec(&cli->refcount);
235 int zcache_new_client(uint16_t cli_id)
237 struct zcache_client *cli;
240 cli = zcache_get_client_by_id(cli_id);
252 * zcache implementation for tmem host ops
255 static struct tmem_objnode *zcache_objnode_alloc(struct tmem_pool *pool)
257 struct tmem_objnode *objnode = NULL;
258 struct zcache_preload *kp;
261 kp = &__get_cpu_var(zcache_preloads);
262 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) {
263 objnode = kp->objnodes[i];
264 if (objnode != NULL) {
265 kp->objnodes[i] = NULL;
269 BUG_ON(objnode == NULL);
270 inc_zcache_objnode_count();
274 static void zcache_objnode_free(struct tmem_objnode *objnode,
275 struct tmem_pool *pool)
277 dec_zcache_objnode_count();
278 kmem_cache_free(zcache_objnode_cache, objnode);
281 static struct tmem_obj *zcache_obj_alloc(struct tmem_pool *pool)
283 struct tmem_obj *obj = NULL;
284 struct zcache_preload *kp;
286 kp = &__get_cpu_var(zcache_preloads);
290 inc_zcache_obj_count();
294 static void zcache_obj_free(struct tmem_obj *obj, struct tmem_pool *pool)
296 dec_zcache_obj_count();
297 kmem_cache_free(zcache_obj_cache, obj);
300 static struct tmem_hostops zcache_hostops = {
301 .obj_alloc = zcache_obj_alloc,
302 .obj_free = zcache_obj_free,
303 .objnode_alloc = zcache_objnode_alloc,
304 .objnode_free = zcache_objnode_free,
307 static struct page *zcache_alloc_page(void)
309 struct page *page = alloc_page(ZCACHE_GFP_MASK);
312 inc_zcache_pageframes_alloced();
316 static void zcache_free_page(struct page *page)
318 long curr_pageframes;
319 static long max_pageframes, min_pageframes;
324 inc_zcache_pageframes_freed();
325 curr_pageframes = curr_pageframes_count();
326 if (curr_pageframes > max_pageframes)
327 max_pageframes = curr_pageframes;
328 if (curr_pageframes < min_pageframes)
329 min_pageframes = curr_pageframes;
331 if (curr_pageframes > 2L || curr_pageframes < -2L) {
338 * zcache implementations for PAM page descriptor ops
341 /* forward reference */
342 static void zcache_compress(struct page *from,
343 void **out_va, unsigned *out_len);
345 static struct page *zcache_evict_eph_pageframe(void);
347 static void *zcache_pampd_eph_create(char *data, size_t size, bool raw,
348 struct tmem_handle *th)
350 void *pampd = NULL, *cdata = data;
351 unsigned clen = size;
352 struct page *page = (struct page *)(data), *newpage;
355 zcache_compress(page, &cdata, &clen);
356 if (clen > zbud_max_buddy_size()) {
357 zcache_compress_poor++;
361 BUG_ON(clen > zbud_max_buddy_size());
364 /* look for space via an existing match first */
365 pampd = (void *)zbud_match_prep(th, true, cdata, clen);
369 /* no match, now we need to find (or free up) a full page */
370 newpage = zcache_alloc_page();
372 goto create_in_new_page;
374 zcache_failed_getfreepages++;
375 /* can't allocate a page, evict an ephemeral page via LRU */
376 newpage = zcache_evict_eph_pageframe();
377 if (newpage == NULL) {
378 zcache_eph_ate_tail_failed++;
381 zcache_eph_ate_tail++;
384 pampd = (void *)zbud_create_prep(th, true, cdata, clen, newpage);
385 BUG_ON(pampd == NULL);
386 inc_zcache_eph_pageframes();
389 inc_zcache_eph_zbytes(clen);
390 inc_zcache_eph_zpages();
391 if (ramster_enabled && raw)
392 ramster_count_foreign_pages(true, 1);
397 static void *zcache_pampd_pers_create(char *data, size_t size, bool raw,
398 struct tmem_handle *th)
400 void *pampd = NULL, *cdata = data;
401 unsigned clen = size;
402 struct page *page = (struct page *)(data), *newpage;
403 unsigned long zbud_mean_zsize;
404 unsigned long curr_pers_zpages, total_zsize;
407 BUG_ON(!ramster_enabled);
410 curr_pers_zpages = zcache_pers_zpages;
411 /* FIXME CONFIG_RAMSTER... subtract atomic remote_pers_pages here? */
413 zcache_compress(page, &cdata, &clen);
414 /* reject if compression is too poor */
415 if (clen > zbud_max_zsize) {
416 zcache_compress_poor++;
419 /* reject if mean compression is too poor */
420 if ((clen > zbud_max_mean_zsize) && (curr_pers_zpages > 0)) {
421 total_zsize = zcache_pers_zbytes;
422 if ((long)total_zsize < 0)
424 zbud_mean_zsize = div_u64(total_zsize,
426 if (zbud_mean_zsize > zbud_max_mean_zsize) {
427 zcache_mean_compress_poor++;
433 /* look for space via an existing match first */
434 pampd = (void *)zbud_match_prep(th, false, cdata, clen);
438 /* no match, now we need to find (or free up) a full page */
439 newpage = zcache_alloc_page();
441 goto create_in_new_page;
443 * FIXME do the following only if eph is oversized?
444 * if (zcache_eph_pageframes >
445 * (global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE) +
446 * global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE)))
448 zcache_failed_getfreepages++;
449 /* can't allocate a page, evict an ephemeral page via LRU */
450 newpage = zcache_evict_eph_pageframe();
451 if (newpage == NULL) {
452 zcache_pers_ate_eph_failed++;
455 zcache_pers_ate_eph++;
458 pampd = (void *)zbud_create_prep(th, false, cdata, clen, newpage);
459 BUG_ON(pampd == NULL);
460 inc_zcache_pers_pageframes();
463 inc_zcache_pers_zpages();
464 inc_zcache_pers_zbytes(clen);
465 if (ramster_enabled && raw)
466 ramster_count_foreign_pages(false, 1);
472 * This is called directly from zcache_put_page to pre-allocate space
475 void *zcache_pampd_create(char *data, unsigned int size, bool raw,
476 int eph, struct tmem_handle *th)
479 struct zcache_preload *kp;
480 struct tmem_objnode *objnode;
481 struct tmem_obj *obj;
484 BUG_ON(!irqs_disabled());
485 /* pre-allocate per-cpu metadata */
486 BUG_ON(zcache_objnode_cache == NULL);
487 BUG_ON(zcache_obj_cache == NULL);
488 kp = &__get_cpu_var(zcache_preloads);
489 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) {
490 objnode = kp->objnodes[i];
491 if (objnode == NULL) {
492 objnode = kmem_cache_alloc(zcache_objnode_cache,
494 if (unlikely(objnode == NULL)) {
495 zcache_failed_alloc++;
498 kp->objnodes[i] = objnode;
501 if (kp->obj == NULL) {
502 obj = kmem_cache_alloc(zcache_obj_cache, ZCACHE_GFP_MASK);
505 if (unlikely(kp->obj == NULL)) {
506 zcache_failed_alloc++;
510 * ok, have all the metadata pre-allocated, now do the data
511 * but since how we allocate the data is dependent on ephemeral
512 * or persistent, we split the call here to different sub-functions
515 pampd = zcache_pampd_eph_create(data, size, raw, th);
517 pampd = zcache_pampd_pers_create(data, size, raw, th);
523 * This is a pamops called via tmem_put and is necessary to "finish"
526 void zcache_pampd_create_finish(void *pampd, bool eph)
528 zbud_create_finish((struct zbudref *)pampd, eph);
532 * This is passed as a function parameter to zbud_decompress so that
533 * zbud need not be familiar with the details of crypto. It assumes that
534 * the bytes from_va and to_va through from_va+size-1 and to_va+size-1 are
535 * kmapped. It must be successful, else there is a logic bug somewhere.
537 static void zcache_decompress(char *from_va, unsigned int size, char *to_va)
540 unsigned int outlen = PAGE_SIZE;
542 ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, from_va, size,
545 BUG_ON(outlen != PAGE_SIZE);
549 * Decompress from the kernel va to a pageframe
551 void zcache_decompress_to_page(char *from_va, unsigned int size,
552 struct page *to_page)
554 char *to_va = kmap_atomic(to_page);
555 zcache_decompress(from_va, size, to_va);
556 kunmap_atomic(to_va);
560 * fill the pageframe corresponding to the struct page with the data
561 * from the passed pampd
563 static int zcache_pampd_get_data(char *data, size_t *sizep, bool raw,
564 void *pampd, struct tmem_pool *pool,
565 struct tmem_oid *oid, uint32_t index)
568 bool eph = !is_persistent(pool);
570 BUG_ON(preemptible());
571 BUG_ON(eph); /* fix later if shared pools get implemented */
572 BUG_ON(pampd_is_remote(pampd));
574 ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd,
577 ret = zbud_decompress((struct page *)(data),
578 (struct zbudref *)pampd, false,
586 * fill the pageframe corresponding to the struct page with the data
587 * from the passed pampd
589 static int zcache_pampd_get_data_and_free(char *data, size_t *sizep, bool raw,
590 void *pampd, struct tmem_pool *pool,
591 struct tmem_oid *oid, uint32_t index)
594 bool eph = !is_persistent(pool);
595 struct page *page = NULL;
596 unsigned int zsize, zpages;
598 BUG_ON(preemptible());
599 BUG_ON(pampd_is_remote(pampd));
601 ret = zbud_copy_from_zbud(data, (struct zbudref *)pampd,
604 ret = zbud_decompress((struct page *)(data),
605 (struct zbudref *)pampd, eph,
609 page = zbud_free_and_delist((struct zbudref *)pampd, eph,
613 dec_zcache_eph_pageframes();
614 dec_zcache_eph_zpages(zpages);
615 dec_zcache_eph_zbytes(zsize);
618 dec_zcache_pers_pageframes();
619 dec_zcache_pers_zpages(zpages);
620 dec_zcache_pers_zbytes(zsize);
622 if (!is_local_client(pool->client))
623 ramster_count_foreign_pages(eph, -1);
625 zcache_free_page(page);
630 * free the pampd and remove it from any zcache lists
631 * pampd must no longer be pointed to from any tmem data structures!
633 static void zcache_pampd_free(void *pampd, struct tmem_pool *pool,
634 struct tmem_oid *oid, uint32_t index, bool acct)
636 struct page *page = NULL;
637 unsigned int zsize, zpages;
639 BUG_ON(preemptible());
640 if (pampd_is_remote(pampd)) {
641 BUG_ON(!ramster_enabled);
642 pampd = ramster_pampd_free(pampd, pool, oid, index, acct);
646 if (is_ephemeral(pool)) {
647 page = zbud_free_and_delist((struct zbudref *)pampd,
648 true, &zsize, &zpages);
650 dec_zcache_eph_pageframes();
651 dec_zcache_eph_zpages(zpages);
652 dec_zcache_eph_zbytes(zsize);
653 /* FIXME CONFIG_RAMSTER... check acct parameter? */
655 page = zbud_free_and_delist((struct zbudref *)pampd,
656 false, &zsize, &zpages);
658 dec_zcache_pers_pageframes();
659 dec_zcache_pers_zpages(zpages);
660 dec_zcache_pers_zbytes(zsize);
662 if (!is_local_client(pool->client))
663 ramster_count_foreign_pages(is_ephemeral(pool), -1);
665 zcache_free_page(page);
668 static struct tmem_pamops zcache_pamops = {
669 .create_finish = zcache_pampd_create_finish,
670 .get_data = zcache_pampd_get_data,
671 .get_data_and_free = zcache_pampd_get_data_and_free,
672 .free = zcache_pampd_free,
676 * zcache compression/decompression and related per-cpu stuff
679 static DEFINE_PER_CPU(unsigned char *, zcache_dstmem);
680 #define ZCACHE_DSTMEM_ORDER 1
682 static void zcache_compress(struct page *from, void **out_va, unsigned *out_len)
685 unsigned char *dmem = __get_cpu_var(zcache_dstmem);
688 BUG_ON(!irqs_disabled());
689 /* no buffer or no compressor so can't compress */
690 BUG_ON(dmem == NULL);
691 *out_len = PAGE_SIZE << ZCACHE_DSTMEM_ORDER;
692 from_va = kmap_atomic(from);
694 ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, from_va, PAGE_SIZE, dmem,
698 kunmap_atomic(from_va);
701 static int zcache_comp_cpu_up(int cpu)
703 struct crypto_comp *tfm;
705 tfm = crypto_alloc_comp(zcache_comp_name, 0, 0);
708 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm;
712 static void zcache_comp_cpu_down(int cpu)
714 struct crypto_comp *tfm;
716 tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu);
717 crypto_free_comp(tfm);
718 *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL;
721 static int zcache_cpu_notifier(struct notifier_block *nb,
722 unsigned long action, void *pcpu)
724 int ret, i, cpu = (long)pcpu;
725 struct zcache_preload *kp;
729 ret = zcache_comp_cpu_up(cpu);
730 if (ret != NOTIFY_OK) {
731 pr_err("%s: can't allocate compressor xform\n",
735 per_cpu(zcache_dstmem, cpu) = (void *)__get_free_pages(
736 GFP_KERNEL | __GFP_REPEAT, ZCACHE_DSTMEM_ORDER);
741 case CPU_UP_CANCELED:
742 zcache_comp_cpu_down(cpu);
743 free_pages((unsigned long)per_cpu(zcache_dstmem, cpu),
744 ZCACHE_DSTMEM_ORDER);
745 per_cpu(zcache_dstmem, cpu) = NULL;
746 kp = &per_cpu(zcache_preloads, cpu);
747 for (i = 0; i < ARRAY_SIZE(kp->objnodes); i++) {
749 kmem_cache_free(zcache_objnode_cache,
753 kmem_cache_free(zcache_obj_cache, kp->obj);
757 ramster_cpu_down(cpu);
765 static struct notifier_block zcache_cpu_notifier_block = {
766 .notifier_call = zcache_cpu_notifier
770 * The following code interacts with the zbud eviction and zbud
771 * zombify code to access LRU pages
774 static struct page *zcache_evict_eph_pageframe(void)
777 unsigned int zsize = 0, zpages = 0;
779 page = zbud_evict_pageframe_lru(&zsize, &zpages);
782 dec_zcache_eph_zbytes(zsize);
783 dec_zcache_eph_zpages(zpages);
784 zcache_evicted_eph_zpages += zpages;
785 dec_zcache_eph_pageframes();
786 zcache_evicted_eph_pageframes++;
791 #ifdef CONFIG_ZCACHE_WRITEBACK
793 static atomic_t zcache_outstanding_writeback_pages_atomic = ATOMIC_INIT(0);
795 static inline void inc_zcache_outstanding_writeback_pages(void)
797 zcache_outstanding_writeback_pages =
798 atomic_inc_return(&zcache_outstanding_writeback_pages_atomic);
800 static inline void dec_zcache_outstanding_writeback_pages(void)
802 zcache_outstanding_writeback_pages =
803 atomic_dec_return(&zcache_outstanding_writeback_pages_atomic);
805 static void unswiz(struct tmem_oid oid, u32 index,
806 unsigned *type, pgoff_t *offset);
809 * Choose an LRU persistent pageframe and attempt to write it back to
810 * the backing swap disk by calling frontswap_writeback on both zpages.
812 * This is work-in-progress.
815 static void zcache_end_swap_write(struct bio *bio, int err)
817 end_swap_bio_write(bio, err);
818 dec_zcache_outstanding_writeback_pages();
819 zcache_writtenback_pages++;
823 * zcache_get_swap_cache_page
825 * This is an adaption of read_swap_cache_async()
827 * If success, page is returned in retpage
828 * Returns 0 if page was already in the swap cache, page is not locked
829 * Returns 1 if the new page needs to be populated, page is locked
831 static int zcache_get_swap_cache_page(int type, pgoff_t offset,
832 struct page *new_page)
834 struct page *found_page;
835 swp_entry_t entry = swp_entry(type, offset);
838 BUG_ON(new_page == NULL);
841 * First check the swap cache. Since this is normally
842 * called after lookup_swap_cache() failed, re-calling
843 * that would confuse statistics.
845 found_page = find_get_page(&swapper_space, entry.val);
850 * call radix_tree_preload() while we can wait.
852 err = radix_tree_preload(GFP_KERNEL);
857 * Swap entry may have been freed since our caller observed it.
859 err = swapcache_prepare(entry);
860 if (err == -EEXIST) { /* seems racy */
861 radix_tree_preload_end();
864 if (err) { /* swp entry is obsolete ? */
865 radix_tree_preload_end();
869 /* May fail (-ENOMEM) if radix-tree node allocation failed. */
870 __set_page_locked(new_page);
871 SetPageSwapBacked(new_page);
872 err = __add_to_swap_cache(new_page, entry);
874 radix_tree_preload_end();
875 lru_cache_add_anon(new_page);
878 radix_tree_preload_end();
879 ClearPageSwapBacked(new_page);
880 __clear_page_locked(new_page);
882 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
883 * clear SWAP_HAS_CACHE flag.
885 swapcache_free(entry, NULL);
886 /* FIXME: is it possible to get here without err==-ENOMEM?
887 * If not, we can dispense with the do loop, use goto retry */
888 } while (err != -ENOMEM);
894 * Given a frontswap zpage in zcache (identified by type/offset) and
895 * an empty page, put the page into the swap cache, use frontswap
896 * to get the page from zcache into the empty page, then give it
897 * to the swap subsystem to send to disk (carefully avoiding the
898 * possibility that frontswap might snatch it back).
899 * Returns < 0 if error, 0 if successful, and 1 if successful but
900 * the newpage passed in not needed and should be freed.
902 static int zcache_frontswap_writeback_zpage(int type, pgoff_t offset,
903 struct page *newpage)
905 struct page *page = newpage;
907 struct writeback_control wbc = {
908 .sync_mode = WB_SYNC_NONE,
911 ret = zcache_get_swap_cache_page(type, offset, page);
915 /* more uptodate page is already in swapcache */
916 __frontswap_invalidate_page(type, offset);
920 BUG_ON(!frontswap_has_exclusive_gets); /* load must also invalidate */
921 /* FIXME: how is it possible to get here when page is unlocked? */
922 __frontswap_load(page);
923 SetPageUptodate(page); /* above does SetPageDirty, is that enough? */
925 /* start writeback */
926 SetPageReclaim(page);
928 * Return value is ignored here because it doesn't change anything
929 * for us. Page is returned unlocked.
931 (void)__swap_writepage(page, &wbc, zcache_end_swap_write);
932 page_cache_release(page);
933 inc_zcache_outstanding_writeback_pages();
939 * The following is still a magic number... we want to allow forward progress
940 * for writeback because it clears out needed RAM when under pressure, but
941 * we don't want to allow writeback to absorb and queue too many GFP_KERNEL
942 * pages if the swap device is very slow.
944 #define ZCACHE_MAX_OUTSTANDING_WRITEBACK_PAGES 6400
947 * Try to allocate two free pages, first using a non-aggressive alloc,
948 * then by evicting zcache ephemeral (clean pagecache) pages, and last
949 * by aggressive GFP_KERNEL alloc. We allow zbud to choose a pageframe
950 * consisting of 1-2 zbuds/zpages, then call the writeback_zpage helper
951 * function above for each.
953 static int zcache_frontswap_writeback(void)
955 struct tmem_handle th[2];
957 int nzbuds, writeback_ret;
959 struct page *znewpage1 = NULL, *znewpage2 = NULL;
960 struct page *evictpage1 = NULL, *evictpage2 = NULL;
961 struct page *newpage1 = NULL, *newpage2 = NULL;
962 struct page *page1 = NULL, *page2 = NULL;
965 znewpage1 = alloc_page(ZCACHE_GFP_MASK);
966 znewpage2 = alloc_page(ZCACHE_GFP_MASK);
967 if (znewpage1 == NULL)
968 evictpage1 = zcache_evict_eph_pageframe();
969 if (znewpage2 == NULL)
970 evictpage2 = zcache_evict_eph_pageframe();
972 if ((evictpage1 == NULL || evictpage2 == NULL) &&
973 atomic_read(&zcache_outstanding_writeback_pages_atomic) >
974 ZCACHE_MAX_OUTSTANDING_WRITEBACK_PAGES) {
977 if (znewpage1 == NULL && evictpage1 == NULL)
978 newpage1 = alloc_page(GFP_KERNEL);
979 if (znewpage2 == NULL && evictpage2 == NULL)
980 newpage2 = alloc_page(GFP_KERNEL);
981 if (newpage1 == NULL || newpage2 == NULL)
984 /* ok, we have two pageframes pre-allocated, get a pair of zbuds */
985 nzbuds = zbud_make_zombie_lru(&th[0], NULL, NULL, false);
991 /* process the first zbud */
992 unswiz(th[0].oid, th[0].index, &type, &offset);
993 page1 = (znewpage1 != NULL) ? znewpage1 :
994 ((newpage1 != NULL) ? newpage1 : evictpage1);
995 writeback_ret = zcache_frontswap_writeback_zpage(type, offset, page1);
996 if (writeback_ret < 0) {
1000 if (evictpage1 != NULL)
1001 zcache_pageframes_freed =
1002 atomic_inc_return(&zcache_pageframes_freed_atomic);
1003 if (writeback_ret == 0) {
1004 /* zcache_get_swap_cache_page will free, don't double free */
1012 /* if there is a second zbud, process it */
1013 unswiz(th[1].oid, th[1].index, &type, &offset);
1014 page2 = (znewpage2 != NULL) ? znewpage2 :
1015 ((newpage2 != NULL) ? newpage2 : evictpage2);
1016 writeback_ret = zcache_frontswap_writeback_zpage(type, offset, page2);
1017 if (writeback_ret < 0) {
1021 if (evictpage2 != NULL)
1022 zcache_pageframes_freed =
1023 atomic_inc_return(&zcache_pageframes_freed_atomic);
1024 if (writeback_ret == 0) {
1031 if (znewpage1 != NULL)
1032 page_cache_release(znewpage1);
1033 if (znewpage2 != NULL)
1034 page_cache_release(znewpage2);
1035 if (newpage1 != NULL)
1036 page_cache_release(newpage1);
1037 if (newpage2 != NULL)
1038 page_cache_release(newpage2);
1039 if (evictpage1 != NULL)
1040 zcache_free_page(evictpage1);
1041 if (evictpage2 != NULL)
1042 zcache_free_page(evictpage2);
1045 #endif /* CONFIG_ZCACHE_WRITEBACK */
1048 * When zcache is disabled ("frozen"), pools can be created and destroyed,
1049 * but all puts (and thus all other operations that require memory allocation)
1050 * must fail. If zcache is unfrozen, accepts puts, then frozen again,
1051 * data consistency requires all puts while frozen to be converted into
1054 static bool zcache_freeze;
1057 * This zcache shrinker interface reduces the number of ephemeral pageframes
1058 * used by zcache to approximately the same as the total number of LRU_FILE
1059 * pageframes in use, and now also reduces the number of persistent pageframes
1060 * used by zcache to approximately the same as the total number of LRU_ANON
1061 * pageframes in use. FIXME POLICY: Probably the writeback should only occur
1062 * if the eviction doesn't free enough pages.
1064 static int shrink_zcache_memory(struct shrinker *shrink,
1065 struct shrink_control *sc)
1067 static bool in_progress;
1069 int nr = sc->nr_to_scan;
1071 int nr_writeback = 0;
1073 int file_pageframes_inuse, anon_pageframes_inuse;
1078 /* don't allow more than one eviction thread at a time */
1084 /* we are going to ignore nr, and target a different value */
1085 zcache_last_active_file_pageframes =
1086 global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE);
1087 zcache_last_inactive_file_pageframes =
1088 global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE);
1089 file_pageframes_inuse = zcache_last_active_file_pageframes +
1090 zcache_last_inactive_file_pageframes;
1091 if (zcache_eph_pageframes > file_pageframes_inuse)
1092 nr_evict = zcache_eph_pageframes - file_pageframes_inuse;
1095 while (nr_evict-- > 0) {
1096 page = zcache_evict_eph_pageframe();
1099 zcache_free_page(page);
1102 zcache_last_active_anon_pageframes =
1103 global_page_state(NR_LRU_BASE + LRU_ACTIVE_ANON);
1104 zcache_last_inactive_anon_pageframes =
1105 global_page_state(NR_LRU_BASE + LRU_INACTIVE_ANON);
1106 anon_pageframes_inuse = zcache_last_active_anon_pageframes +
1107 zcache_last_inactive_anon_pageframes;
1108 if (zcache_pers_pageframes > anon_pageframes_inuse)
1109 nr_writeback = zcache_pers_pageframes - anon_pageframes_inuse;
1112 while (nr_writeback-- > 0) {
1113 #ifdef CONFIG_ZCACHE_WRITEBACK
1115 writeback_ret = zcache_frontswap_writeback();
1116 if (writeback_ret == -ENOMEM)
1120 in_progress = false;
1123 /* resample: has changed, but maybe not all the way yet */
1124 zcache_last_active_file_pageframes =
1125 global_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE);
1126 zcache_last_inactive_file_pageframes =
1127 global_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE);
1128 ret = zcache_eph_pageframes - zcache_last_active_file_pageframes +
1129 zcache_last_inactive_file_pageframes;
1135 static struct shrinker zcache_shrinker = {
1136 .shrink = shrink_zcache_memory,
1137 .seeks = DEFAULT_SEEKS,
1141 * zcache shims between cleancache/frontswap ops and tmem
1144 /* FIXME rename these core routines to zcache_tmemput etc? */
1145 int zcache_put_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1146 uint32_t index, void *page,
1147 unsigned int size, bool raw, int ephemeral)
1149 struct tmem_pool *pool;
1150 struct tmem_handle th;
1154 BUG_ON(!irqs_disabled());
1155 pool = zcache_get_pool_by_id(cli_id, pool_id);
1156 if (unlikely(pool == NULL))
1158 if (!zcache_freeze) {
1160 th.client_id = cli_id;
1161 th.pool_id = pool_id;
1164 pampd = zcache_pampd_create((char *)page, size, raw,
1166 if (pampd == NULL) {
1169 zcache_failed_eph_puts++;
1171 zcache_failed_pers_puts++;
1173 if (ramster_enabled)
1174 ramster_do_preload_flnode(pool);
1175 ret = tmem_put(pool, oidp, index, 0, pampd);
1179 zcache_put_pool(pool);
1181 zcache_put_to_flush++;
1182 if (ramster_enabled)
1183 ramster_do_preload_flnode(pool);
1184 if (atomic_read(&pool->obj_count) > 0)
1185 /* the put fails whether the flush succeeds or not */
1186 (void)tmem_flush_page(pool, oidp, index);
1187 zcache_put_pool(pool);
1193 int zcache_get_page(int cli_id, int pool_id, struct tmem_oid *oidp,
1194 uint32_t index, void *page,
1195 size_t *sizep, bool raw, int get_and_free)
1197 struct tmem_pool *pool;
1202 BUG_ON(irqs_disabled());
1203 BUG_ON(in_softirq());
1205 pool = zcache_get_pool_by_id(cli_id, pool_id);
1206 eph = is_ephemeral(pool);
1207 if (likely(pool != NULL)) {
1208 if (atomic_read(&pool->obj_count) > 0)
1209 ret = tmem_get(pool, oidp, index, (char *)(page),
1210 sizep, raw, get_and_free);
1211 zcache_put_pool(pool);
1213 WARN_ONCE((!is_ephemeral(pool) && (ret != 0)),
1214 "zcache_get fails on persistent pool, "
1215 "bad things are very likely to happen soon\n");
1216 #ifdef RAMSTER_TESTING
1217 if (ret != 0 && ret != -1 && !(ret == -EINVAL && is_ephemeral(pool)))
1218 pr_err("TESTING zcache_get tmem_get returns ret=%d\n", ret);
1223 int zcache_flush_page(int cli_id, int pool_id,
1224 struct tmem_oid *oidp, uint32_t index)
1226 struct tmem_pool *pool;
1228 unsigned long flags;
1230 local_irq_save(flags);
1231 zcache_flush_total++;
1232 pool = zcache_get_pool_by_id(cli_id, pool_id);
1233 if (ramster_enabled)
1234 ramster_do_preload_flnode(pool);
1235 if (likely(pool != NULL)) {
1236 if (atomic_read(&pool->obj_count) > 0)
1237 ret = tmem_flush_page(pool, oidp, index);
1238 zcache_put_pool(pool);
1241 zcache_flush_found++;
1242 local_irq_restore(flags);
1246 int zcache_flush_object(int cli_id, int pool_id,
1247 struct tmem_oid *oidp)
1249 struct tmem_pool *pool;
1251 unsigned long flags;
1253 local_irq_save(flags);
1254 zcache_flobj_total++;
1255 pool = zcache_get_pool_by_id(cli_id, pool_id);
1256 if (ramster_enabled)
1257 ramster_do_preload_flnode(pool);
1258 if (likely(pool != NULL)) {
1259 if (atomic_read(&pool->obj_count) > 0)
1260 ret = tmem_flush_object(pool, oidp);
1261 zcache_put_pool(pool);
1264 zcache_flobj_found++;
1265 local_irq_restore(flags);
1269 static int zcache_client_destroy_pool(int cli_id, int pool_id)
1271 struct tmem_pool *pool = NULL;
1272 struct zcache_client *cli = NULL;
1277 if (cli_id == LOCAL_CLIENT)
1279 else if ((unsigned int)cli_id < MAX_CLIENTS)
1280 cli = &zcache_clients[cli_id];
1283 atomic_inc(&cli->refcount);
1284 pool = cli->tmem_pools[pool_id];
1287 cli->tmem_pools[pool_id] = NULL;
1288 /* wait for pool activity on other cpus to quiesce */
1289 while (atomic_read(&pool->refcount) != 0)
1291 atomic_dec(&cli->refcount);
1293 ret = tmem_destroy_pool(pool);
1296 if (cli_id == LOCAL_CLIENT)
1297 pr_info("%s: destroyed local pool id=%d\n", namestr, pool_id);
1299 pr_info("%s: destroyed pool id=%d, client=%d\n",
1300 namestr, pool_id, cli_id);
1305 int zcache_new_pool(uint16_t cli_id, uint32_t flags)
1308 struct tmem_pool *pool;
1309 struct zcache_client *cli = NULL;
1311 if (cli_id == LOCAL_CLIENT)
1313 else if ((unsigned int)cli_id < MAX_CLIENTS)
1314 cli = &zcache_clients[cli_id];
1317 atomic_inc(&cli->refcount);
1318 pool = kmalloc(sizeof(struct tmem_pool), GFP_ATOMIC);
1322 for (poolid = 0; poolid < MAX_POOLS_PER_CLIENT; poolid++)
1323 if (cli->tmem_pools[poolid] == NULL)
1325 if (poolid >= MAX_POOLS_PER_CLIENT) {
1326 pr_info("%s: pool creation failed: max exceeded\n", namestr);
1331 atomic_set(&pool->refcount, 0);
1333 pool->pool_id = poolid;
1334 tmem_new_pool(pool, flags);
1335 cli->tmem_pools[poolid] = pool;
1336 if (cli_id == LOCAL_CLIENT)
1337 pr_info("%s: created %s local tmem pool, id=%d\n", namestr,
1338 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1341 pr_info("%s: created %s tmem pool, id=%d, client=%d\n", namestr,
1342 flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1346 atomic_dec(&cli->refcount);
1350 static int zcache_local_new_pool(uint32_t flags)
1352 return zcache_new_pool(LOCAL_CLIENT, flags);
1355 int zcache_autocreate_pool(unsigned int cli_id, unsigned int pool_id, bool eph)
1357 struct tmem_pool *pool;
1358 struct zcache_client *cli;
1359 uint32_t flags = eph ? 0 : TMEM_POOL_PERSIST;
1362 BUG_ON(!ramster_enabled);
1363 if (cli_id == LOCAL_CLIENT)
1365 if (pool_id >= MAX_POOLS_PER_CLIENT)
1367 if (cli_id >= MAX_CLIENTS)
1370 cli = &zcache_clients[cli_id];
1371 if ((eph && disable_cleancache) || (!eph && disable_frontswap)) {
1372 pr_err("zcache_autocreate_pool: pool type disabled\n");
1375 if (!cli->allocated) {
1376 if (zcache_new_client(cli_id)) {
1377 pr_err("zcache_autocreate_pool: can't create client\n");
1380 cli = &zcache_clients[cli_id];
1382 atomic_inc(&cli->refcount);
1383 pool = cli->tmem_pools[pool_id];
1385 if (pool->persistent && eph) {
1386 pr_err("zcache_autocreate_pool: type mismatch\n");
1392 pool = kmalloc(sizeof(struct tmem_pool), GFP_KERNEL);
1396 atomic_set(&pool->refcount, 0);
1398 pool->pool_id = pool_id;
1399 tmem_new_pool(pool, flags);
1400 cli->tmem_pools[pool_id] = pool;
1401 pr_info("%s: AUTOcreated %s tmem poolid=%d, for remote client=%d\n",
1402 namestr, flags & TMEM_POOL_PERSIST ? "persistent" : "ephemeral",
1407 atomic_dec(&cli->refcount);
1412 * Two kernel functionalities currently can be layered on top of tmem.
1413 * These are "cleancache" which is used as a second-chance cache for clean
1414 * page cache pages; and "frontswap" which is used for swap pages
1415 * to avoid writes to disk. A generic "shim" is provided here for each
1416 * to translate in-kernel semantics to zcache semantics.
1419 static void zcache_cleancache_put_page(int pool_id,
1420 struct cleancache_filekey key,
1421 pgoff_t index, struct page *page)
1423 u32 ind = (u32) index;
1424 struct tmem_oid oid = *(struct tmem_oid *)&key;
1426 if (!disable_cleancache_ignore_nonactive && !PageWasActive(page)) {
1427 zcache_eph_nonactive_puts_ignored++;
1430 if (likely(ind == index))
1431 (void)zcache_put_page(LOCAL_CLIENT, pool_id, &oid, index,
1432 page, PAGE_SIZE, false, 1);
1435 static int zcache_cleancache_get_page(int pool_id,
1436 struct cleancache_filekey key,
1437 pgoff_t index, struct page *page)
1439 u32 ind = (u32) index;
1440 struct tmem_oid oid = *(struct tmem_oid *)&key;
1444 if (likely(ind == index)) {
1445 ret = zcache_get_page(LOCAL_CLIENT, pool_id, &oid, index,
1446 page, &size, false, 0);
1447 BUG_ON(ret >= 0 && size != PAGE_SIZE);
1449 SetPageWasActive(page);
1454 static void zcache_cleancache_flush_page(int pool_id,
1455 struct cleancache_filekey key,
1458 u32 ind = (u32) index;
1459 struct tmem_oid oid = *(struct tmem_oid *)&key;
1461 if (likely(ind == index))
1462 (void)zcache_flush_page(LOCAL_CLIENT, pool_id, &oid, ind);
1465 static void zcache_cleancache_flush_inode(int pool_id,
1466 struct cleancache_filekey key)
1468 struct tmem_oid oid = *(struct tmem_oid *)&key;
1470 (void)zcache_flush_object(LOCAL_CLIENT, pool_id, &oid);
1473 static void zcache_cleancache_flush_fs(int pool_id)
1476 (void)zcache_client_destroy_pool(LOCAL_CLIENT, pool_id);
1479 static int zcache_cleancache_init_fs(size_t pagesize)
1481 BUG_ON(sizeof(struct cleancache_filekey) !=
1482 sizeof(struct tmem_oid));
1483 BUG_ON(pagesize != PAGE_SIZE);
1484 return zcache_local_new_pool(0);
1487 static int zcache_cleancache_init_shared_fs(char *uuid, size_t pagesize)
1489 /* shared pools are unsupported and map to private */
1490 BUG_ON(sizeof(struct cleancache_filekey) !=
1491 sizeof(struct tmem_oid));
1492 BUG_ON(pagesize != PAGE_SIZE);
1493 return zcache_local_new_pool(0);
1496 static struct cleancache_ops zcache_cleancache_ops = {
1497 .put_page = zcache_cleancache_put_page,
1498 .get_page = zcache_cleancache_get_page,
1499 .invalidate_page = zcache_cleancache_flush_page,
1500 .invalidate_inode = zcache_cleancache_flush_inode,
1501 .invalidate_fs = zcache_cleancache_flush_fs,
1502 .init_shared_fs = zcache_cleancache_init_shared_fs,
1503 .init_fs = zcache_cleancache_init_fs
1506 struct cleancache_ops zcache_cleancache_register_ops(void)
1508 struct cleancache_ops old_ops =
1509 cleancache_register_ops(&zcache_cleancache_ops);
1514 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
1515 static int zcache_frontswap_poolid __read_mostly = -1;
1518 * Swizzling increases objects per swaptype, increasing tmem concurrency
1519 * for heavy swaploads. Later, larger nr_cpus -> larger SWIZ_BITS
1520 * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from
1521 * frontswap_get_page(), but has side-effects. Hence using 8.
1524 #define SWIZ_MASK ((1 << SWIZ_BITS) - 1)
1525 #define _oswiz(_type, _ind) ((_type << SWIZ_BITS) | (_ind & SWIZ_MASK))
1526 #define iswiz(_ind) (_ind >> SWIZ_BITS)
1528 static inline struct tmem_oid oswiz(unsigned type, u32 ind)
1530 struct tmem_oid oid = { .oid = { 0 } };
1531 oid.oid[0] = _oswiz(type, ind);
1535 #ifdef CONFIG_ZCACHE_WRITEBACK
1536 static void unswiz(struct tmem_oid oid, u32 index,
1537 unsigned *type, pgoff_t *offset)
1539 *type = (unsigned)(oid.oid[0] >> SWIZ_BITS);
1540 *offset = (pgoff_t)((index << SWIZ_BITS) |
1541 (oid.oid[0] & SWIZ_MASK));
1545 static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
1548 u64 ind64 = (u64)offset;
1549 u32 ind = (u32)offset;
1550 struct tmem_oid oid = oswiz(type, ind);
1552 unsigned long flags;
1554 BUG_ON(!PageLocked(page));
1555 if (!disable_frontswap_ignore_nonactive && !PageWasActive(page)) {
1556 zcache_pers_nonactive_puts_ignored++;
1560 if (likely(ind64 == ind)) {
1561 local_irq_save(flags);
1562 ret = zcache_put_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1564 page, PAGE_SIZE, false, 0);
1565 local_irq_restore(flags);
1571 /* returns 0 if the page was successfully gotten from frontswap, -1 if
1572 * was not present (should never happen!) */
1573 static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
1576 u64 ind64 = (u64)offset;
1577 u32 ind = (u32)offset;
1578 struct tmem_oid oid = oswiz(type, ind);
1580 int ret = -1, get_and_free;
1582 if (frontswap_has_exclusive_gets)
1586 BUG_ON(!PageLocked(page));
1587 if (likely(ind64 == ind)) {
1588 ret = zcache_get_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1590 page, &size, false, get_and_free);
1591 BUG_ON(ret >= 0 && size != PAGE_SIZE);
1596 /* flush a single page from frontswap */
1597 static void zcache_frontswap_flush_page(unsigned type, pgoff_t offset)
1599 u64 ind64 = (u64)offset;
1600 u32 ind = (u32)offset;
1601 struct tmem_oid oid = oswiz(type, ind);
1603 if (likely(ind64 == ind))
1604 (void)zcache_flush_page(LOCAL_CLIENT, zcache_frontswap_poolid,
1608 /* flush all pages from the passed swaptype */
1609 static void zcache_frontswap_flush_area(unsigned type)
1611 struct tmem_oid oid;
1614 for (ind = SWIZ_MASK; ind >= 0; ind--) {
1615 oid = oswiz(type, ind);
1616 (void)zcache_flush_object(LOCAL_CLIENT,
1617 zcache_frontswap_poolid, &oid);
1621 static void zcache_frontswap_init(unsigned ignored)
1623 /* a single tmem poolid is used for all frontswap "types" (swapfiles) */
1624 if (zcache_frontswap_poolid < 0)
1625 zcache_frontswap_poolid =
1626 zcache_local_new_pool(TMEM_POOL_PERSIST);
1629 static struct frontswap_ops zcache_frontswap_ops = {
1630 .store = zcache_frontswap_put_page,
1631 .load = zcache_frontswap_get_page,
1632 .invalidate_page = zcache_frontswap_flush_page,
1633 .invalidate_area = zcache_frontswap_flush_area,
1634 .init = zcache_frontswap_init
1637 struct frontswap_ops zcache_frontswap_register_ops(void)
1639 struct frontswap_ops old_ops =
1640 frontswap_register_ops(&zcache_frontswap_ops);
1646 * zcache initialization
1647 * NOTE FOR NOW zcache or ramster MUST BE PROVIDED AS A KERNEL BOOT PARAMETER
1648 * OR NOTHING HAPPENS!
1651 static int __init enable_zcache(char *s)
1653 zcache_enabled = true;
1656 __setup("zcache", enable_zcache);
1658 static int __init enable_ramster(char *s)
1660 zcache_enabled = true;
1661 #ifdef CONFIG_RAMSTER
1662 ramster_enabled = true;
1666 __setup("ramster", enable_ramster);
1668 /* allow independent dynamic disabling of cleancache and frontswap */
1670 static int __init no_cleancache(char *s)
1672 disable_cleancache = true;
1676 __setup("nocleancache", no_cleancache);
1678 static int __init no_frontswap(char *s)
1680 disable_frontswap = true;
1684 __setup("nofrontswap", no_frontswap);
1686 static int __init no_frontswap_exclusive_gets(char *s)
1688 frontswap_has_exclusive_gets = false;
1692 __setup("nofrontswapexclusivegets", no_frontswap_exclusive_gets);
1694 static int __init no_frontswap_ignore_nonactive(char *s)
1696 disable_frontswap_ignore_nonactive = true;
1700 __setup("nofrontswapignorenonactive", no_frontswap_ignore_nonactive);
1702 static int __init no_cleancache_ignore_nonactive(char *s)
1704 disable_cleancache_ignore_nonactive = true;
1708 __setup("nocleancacheignorenonactive", no_cleancache_ignore_nonactive);
1710 static int __init enable_zcache_compressor(char *s)
1712 strncpy(zcache_comp_name, s, ZCACHE_COMP_NAME_SZ);
1713 zcache_enabled = true;
1716 __setup("zcache=", enable_zcache_compressor);
1719 static int __init zcache_comp_init(void)
1723 /* check crypto algorithm */
1724 if (*zcache_comp_name != '\0') {
1725 ret = crypto_has_comp(zcache_comp_name, 0, 0);
1727 pr_info("zcache: %s not supported\n",
1731 strcpy(zcache_comp_name, "lzo");
1732 ret = crypto_has_comp(zcache_comp_name, 0, 0);
1737 pr_info("zcache: using %s compressor\n", zcache_comp_name);
1739 /* alloc percpu transforms */
1741 zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *);
1742 if (!zcache_comp_pcpu_tfms)
1748 static int __init zcache_init(void)
1752 if (ramster_enabled) {
1753 namestr = "ramster";
1754 ramster_register_pamops(&zcache_pamops);
1756 #ifdef CONFIG_DEBUG_FS
1757 zcache_debugfs_init();
1759 if (zcache_enabled) {
1762 tmem_register_hostops(&zcache_hostops);
1763 tmem_register_pamops(&zcache_pamops);
1764 ret = register_cpu_notifier(&zcache_cpu_notifier_block);
1766 pr_err("%s: can't register cpu notifier\n", namestr);
1769 ret = zcache_comp_init();
1771 pr_err("%s: compressor initialization failed\n",
1775 for_each_online_cpu(cpu) {
1776 void *pcpu = (void *)(long)cpu;
1777 zcache_cpu_notifier(&zcache_cpu_notifier_block,
1778 CPU_UP_PREPARE, pcpu);
1781 zcache_objnode_cache = kmem_cache_create("zcache_objnode",
1782 sizeof(struct tmem_objnode), 0, 0, NULL);
1783 zcache_obj_cache = kmem_cache_create("zcache_obj",
1784 sizeof(struct tmem_obj), 0, 0, NULL);
1785 ret = zcache_new_client(LOCAL_CLIENT);
1787 pr_err("%s: can't create client\n", namestr);
1791 if (zcache_enabled && !disable_cleancache) {
1792 struct cleancache_ops old_ops;
1794 register_shrinker(&zcache_shrinker);
1795 old_ops = zcache_cleancache_register_ops();
1796 pr_info("%s: cleancache enabled using kernel transcendent "
1797 "memory and compression buddies\n", namestr);
1799 pr_info("%s: cleancache: ignorenonactive = %d\n",
1800 namestr, !disable_cleancache_ignore_nonactive);
1802 if (old_ops.init_fs != NULL)
1803 pr_warn("%s: cleancache_ops overridden\n", namestr);
1805 if (zcache_enabled && !disable_frontswap) {
1806 struct frontswap_ops old_ops;
1808 old_ops = zcache_frontswap_register_ops();
1809 if (frontswap_has_exclusive_gets)
1810 frontswap_tmem_exclusive_gets(true);
1811 pr_info("%s: frontswap enabled using kernel transcendent "
1812 "memory and compression buddies\n", namestr);
1814 pr_info("%s: frontswap: excl gets = %d active only = %d\n",
1815 namestr, frontswap_has_exclusive_gets,
1816 !disable_frontswap_ignore_nonactive);
1818 if (old_ops.init != NULL)
1819 pr_warn("%s: frontswap_ops overridden\n", namestr);
1821 if (ramster_enabled)
1822 ramster_init(!disable_cleancache, !disable_frontswap,
1823 frontswap_has_exclusive_gets);
1828 late_initcall(zcache_init);