/* * linux/mm/zcache.c * * A cleancache backend for file pages compression. * Concepts based on original zcache by Dan Magenheimer. * Copyright (C) 2013 Bob Liu * * With zcache, active file pages can be compressed in memory during page * reclaiming. When their data is needed again the I/O reading operation is * avoided. This results in a significant performance gain under memory pressure * for systems with many file pages. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Enable/disable zcache (disabled by default) */ static bool zcache_enabled __read_mostly; module_param_named(enabled, zcache_enabled, bool, 0); /* * Compressor to be used by zcache */ #define ZCACHE_COMPRESSOR_DEFAULT "lzo" static char *zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT; module_param_named(compressor, zcache_compressor, charp, 0); /* * The maximum percentage of memory that the compressed pool can occupy. */ static unsigned int zcache_max_pool_percent = 10; module_param_named(max_pool_percent, zcache_max_pool_percent, uint, 0644); static unsigned int zcache_clear_percent = 4; module_param_named(clear_percent, zcache_clear_percent, uint, 0644); /* * zcache statistics */ static u64 zcache_pool_limit_hit; static u64 zcache_dup_entry; static u64 zcache_zbud_alloc_fail; static u64 zcache_evict_zpages; static u64 zcache_evict_filepages; static u64 zcache_inactive_pages_refused; static u64 zcache_reclaim_fail; static u64 zcache_pool_shrink; static u64 zcache_pool_shrink_fail; static u64 zcache_pool_shrink_pages; static u64 zcache_store_failed; static atomic_t zcache_stored_pages = ATOMIC_INIT(0); static atomic_t zcache_stored_zero_pages = ATOMIC_INIT(0); #define GFP_ZCACHE \ (__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | \ __GFP_NOMEMALLOC | __GFP_NO_KSWAPD | __GFP_ZERO) /* * Make sure this is different from radix tree * indirect ptr or exceptional entry. */ #define ZERO_HANDLE ((void *)~(~0UL >> 1)) /* * Zcache receives pages for compression through the Cleancache API and is able * to evict pages from its own compressed pool on an LRU basis in the case that * the compressed pool is full. * * Zcache makes use of zbud for the managing the compressed memory pool. Each * allocation in zbud is not directly accessible by address. Rather, a handle * (zaddr) is return by the allocation routine and that handle(zaddr must be * mapped before being accessed. The compressed memory pool grows on demand and * shrinks as compressed pages are freed. * * When a file page is passed from cleancache to zcache, zcache maintains a * mapping of the to the zbud * address that references that compressed file page. This mapping is achieved * with a red-black tree per filesystem type, plus a radix tree per red-black * node. * * A zcache pool with pool_id as the index is created when a filesystem mounted * Each zcache pool has a red-black tree, the inode number(rb_index) is the * search key. Each red-black tree node has a radix tree which use * page->index(ra_index) as the index. Each radix tree slot points to the zbud * address combining with some extra information(zcache_ra_handle). */ #define MAX_ZCACHE_POOLS 32 /* * One zcache_pool per (cleancache aware) filesystem mount instance */ struct zcache_pool { struct rb_root rbtree; rwlock_t rb_lock; /* Protects rbtree */ u64 size; struct zbud_pool *pool; /* Zbud pool used */ }; /* * Manage all zcache pools */ struct _zcache { struct zcache_pool *pools[MAX_ZCACHE_POOLS]; u32 num_pools; /* Current no. of zcache pools */ spinlock_t pool_lock; /* Protects pools[] and num_pools */ }; struct _zcache zcache; /* * Redblack tree node, each node has a page index radix-tree. * Indexed by inode nubmer. */ struct zcache_rbnode { struct rb_node rb_node; int rb_index; struct radix_tree_root ratree; /* Page radix tree per inode rbtree */ spinlock_t ra_lock; /* Protects radix tree */ struct kref refcount; }; /* * Radix-tree leaf, indexed by page->index */ struct zcache_ra_handle { int rb_index; /* Redblack tree index */ int ra_index; /* Radix tree index */ int zlen; /* Compressed page size */ struct zcache_pool *zpool; /* Finding zcache_pool during evict */ }; u64 zcache_pages(void) { int i; u64 count = 0; for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++) count += zcache.pools[i]->size; return count; } static struct kmem_cache *zcache_rbnode_cache; static int zcache_rbnode_cache_create(void) { zcache_rbnode_cache = KMEM_CACHE(zcache_rbnode, 0); return zcache_rbnode_cache == NULL; } static void zcache_rbnode_cache_destroy(void) { kmem_cache_destroy(zcache_rbnode_cache); } static int zcache_shrink(struct shrinker *s, struct shrink_control *sc) { unsigned long active_file; unsigned long file; long file_gap; unsigned long freed = 0; unsigned long pool; static bool running; int i = 0; int retries; if (running) goto end; running = true; active_file = global_page_state(NR_ACTIVE_FILE); file = global_page_state(NR_FILE_PAGES); pool = zcache_pages(); file_gap = pool - file; if ((file_gap >= 0) && (totalram_pages * zcache_clear_percent / 100 > file)) { file_gap = pool; zcache_pool_shrink++; goto reclaim; } /* * file_gap == 0 means that the number of pages * stored by zcache is around twice as many as the * number of active file pages. */ file_gap = pool - active_file; if (file_gap < 0) file_gap = 0; else zcache_pool_shrink++; reclaim: retries = file_gap; while ((file_gap > 0) && retries) { struct zcache_pool *zpool = zcache.pools[i++ % MAX_ZCACHE_POOLS]; if (!zpool || !zpool->size) continue; if (zbud_reclaim_page(zpool->pool, 8)) { zcache_pool_shrink_fail++; retries--; continue; } freed++; file_gap--; } zcache_pool_shrink_pages += freed; for (i = 0; (i < MAX_ZCACHE_POOLS) && zcache.pools[i]; i++) zcache.pools[i]->size = zbud_get_pool_size(zcache.pools[i]->pool); running = false; end: return freed; } static struct shrinker zcache_shrinker = { .shrink = zcache_shrink, .seeks = DEFAULT_SEEKS * 16 }; /* * Compression functions * (Below functions are copyed from zswap!) */ static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms; enum comp_op { ZCACHE_COMPOP_COMPRESS, ZCACHE_COMPOP_DECOMPRESS }; static int zcache_comp_op(enum comp_op op, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { struct crypto_comp *tfm; int ret; tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, get_cpu()); switch (op) { case ZCACHE_COMPOP_COMPRESS: ret = crypto_comp_compress(tfm, src, slen, dst, dlen); break; case ZCACHE_COMPOP_DECOMPRESS: ret = crypto_comp_decompress(tfm, src, slen, dst, dlen); break; default: ret = -EINVAL; } put_cpu(); return ret; } static int __init zcache_comp_init(void) { if (!crypto_has_comp(zcache_compressor, 0, 0)) { pr_info("%s compressor not available\n", zcache_compressor); /* fall back to default compressor */ zcache_compressor = ZCACHE_COMPRESSOR_DEFAULT; if (!crypto_has_comp(zcache_compressor, 0, 0)) /* can't even load the default compressor */ return -ENODEV; } pr_info("using %s compressor\n", zcache_compressor); /* alloc percpu transforms */ zcache_comp_pcpu_tfms = alloc_percpu(struct crypto_comp *); if (!zcache_comp_pcpu_tfms) return -ENOMEM; return 0; } static void zcache_comp_exit(void) { /* free percpu transforms */ if (zcache_comp_pcpu_tfms) free_percpu(zcache_comp_pcpu_tfms); } /* * Per-cpu code * (Below functions are also copyed from zswap!) */ static DEFINE_PER_CPU(u8 *, zcache_dstmem); static int __zcache_cpu_notifier(unsigned long action, unsigned long cpu) { struct crypto_comp *tfm; u8 *dst; switch (action) { case CPU_UP_PREPARE: tfm = crypto_alloc_comp(zcache_compressor, 0, 0); if (IS_ERR(tfm)) { pr_err("can't allocate compressor transform\n"); return NOTIFY_BAD; } *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = tfm; dst = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); if (!dst) { pr_err("can't allocate compressor buffer\n"); crypto_free_comp(tfm); *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL; return NOTIFY_BAD; } per_cpu(zcache_dstmem, cpu) = dst; break; case CPU_DEAD: case CPU_UP_CANCELED: tfm = *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu); if (tfm) { crypto_free_comp(tfm); *per_cpu_ptr(zcache_comp_pcpu_tfms, cpu) = NULL; } dst = per_cpu(zcache_dstmem, cpu); kfree(dst); per_cpu(zcache_dstmem, cpu) = NULL; break; default: break; } return NOTIFY_OK; } static int zcache_cpu_notifier(struct notifier_block *nb, unsigned long action, void *pcpu) { unsigned long cpu = (unsigned long)pcpu; return __zcache_cpu_notifier(action, cpu); } static struct notifier_block zcache_cpu_notifier_block = { .notifier_call = zcache_cpu_notifier }; static int zcache_cpu_init(void) { unsigned long cpu; get_online_cpus(); for_each_online_cpu(cpu) if (__zcache_cpu_notifier(CPU_UP_PREPARE, cpu) != NOTIFY_OK) goto cleanup; register_cpu_notifier(&zcache_cpu_notifier_block); put_online_cpus(); return 0; cleanup: for_each_online_cpu(cpu) __zcache_cpu_notifier(CPU_UP_CANCELED, cpu); put_online_cpus(); return -ENOMEM; } /* * Zcache helpers */ static bool zcache_is_full(void) { long file = global_page_state(NR_FILE_PAGES); return ((totalram_pages * zcache_max_pool_percent / 100 < zcache_pages()) || (totalram_pages * zcache_clear_percent / 100 > file)); } /* * The caller must hold zpool->rb_lock at least */ static struct zcache_rbnode *zcache_find_rbnode(struct rb_root *rbtree, int index, struct rb_node **rb_parent, struct rb_node ***rb_link) { struct zcache_rbnode *entry; struct rb_node **__rb_link, *__rb_parent, *rb_prev; __rb_link = &rbtree->rb_node; rb_prev = __rb_parent = NULL; while (*__rb_link) { __rb_parent = *__rb_link; entry = rb_entry(__rb_parent, struct zcache_rbnode, rb_node); if (entry->rb_index > index) __rb_link = &__rb_parent->rb_left; else if (entry->rb_index < index) { rb_prev = __rb_parent; __rb_link = &__rb_parent->rb_right; } else return entry; } if (rb_parent) *rb_parent = __rb_parent; if (rb_link) *rb_link = __rb_link; return NULL; } static struct zcache_rbnode *zcache_find_get_rbnode(struct zcache_pool *zpool, int rb_index) { unsigned long flags; struct zcache_rbnode *rbnode; read_lock_irqsave(&zpool->rb_lock, flags); rbnode = zcache_find_rbnode(&zpool->rbtree, rb_index, 0, 0); if (rbnode) kref_get(&rbnode->refcount); read_unlock_irqrestore(&zpool->rb_lock, flags); return rbnode; } /* * kref_put callback for zcache_rbnode. * * The rbnode must have been isolated from rbtree already. */ static void zcache_rbnode_release(struct kref *kref) { struct zcache_rbnode *rbnode; rbnode = container_of(kref, struct zcache_rbnode, refcount); BUG_ON(rbnode->ratree.rnode); kmem_cache_free(zcache_rbnode_cache, rbnode); } /* * Check whether the radix-tree of this rbnode is empty. * If that's true, then we can delete this zcache_rbnode from * zcache_pool->rbtree * * Caller must hold zcache_rbnode->ra_lock */ static int zcache_rbnode_empty(struct zcache_rbnode *rbnode) { return rbnode->ratree.rnode == NULL; } /* * Remove zcache_rbnode from zpool->rbtree * * holded_rblock - whether the caller has holded zpool->rb_lock */ static void zcache_rbnode_isolate(struct zcache_pool *zpool, struct zcache_rbnode *rbnode, bool holded_rblock) { unsigned long flags; if (!holded_rblock) write_lock_irqsave(&zpool->rb_lock, flags); /* * Someone can get reference on this rbnode before we could * acquire write lock above. * We want to remove it from zpool->rbtree when only the caller and * corresponding ratree holds a reference to this rbnode. * Below check ensures that a racing zcache put will not end up adding * a page to an isolated node and thereby losing that memory. */ if (atomic_read(&rbnode->refcount.refcount) == 2) { rb_erase(&rbnode->rb_node, &zpool->rbtree); RB_CLEAR_NODE(&rbnode->rb_node); kref_put(&rbnode->refcount, zcache_rbnode_release); } if (!holded_rblock) write_unlock_irqrestore(&zpool->rb_lock, flags); } /* * Store zaddr which allocated by zbud_alloc() to the hierarchy rbtree-ratree. */ static int zcache_store_zaddr(struct zcache_pool *zpool, int ra_index, int rb_index, unsigned long zaddr) { unsigned long flags; struct zcache_rbnode *rbnode, *tmp; struct rb_node **link = NULL, *parent = NULL; int ret; void *dup_zaddr; rbnode = zcache_find_get_rbnode(zpool, rb_index); if (!rbnode) { /* alloc and init a new rbnode */ rbnode = kmem_cache_alloc(zcache_rbnode_cache, GFP_ZCACHE); if (!rbnode) return -ENOMEM; INIT_RADIX_TREE(&rbnode->ratree, GFP_ATOMIC|__GFP_NOWARN); spin_lock_init(&rbnode->ra_lock); rbnode->rb_index = rb_index; kref_init(&rbnode->refcount); RB_CLEAR_NODE(&rbnode->rb_node); /* add that rbnode to rbtree */ write_lock_irqsave(&zpool->rb_lock, flags); tmp = zcache_find_rbnode(&zpool->rbtree, rb_index, &parent, &link); if (tmp) { /* somebody else allocated new rbnode */ kmem_cache_free(zcache_rbnode_cache, rbnode); rbnode = tmp; } else { rb_link_node(&rbnode->rb_node, parent, link); rb_insert_color(&rbnode->rb_node, &zpool->rbtree); } /* Inc the reference of this zcache_rbnode */ kref_get(&rbnode->refcount); write_unlock_irqrestore(&zpool->rb_lock, flags); } /* Succfully got a zcache_rbnode when arriving here */ spin_lock_irqsave(&rbnode->ra_lock, flags); dup_zaddr = radix_tree_delete(&rbnode->ratree, ra_index); if (unlikely(dup_zaddr)) { if (dup_zaddr == ZERO_HANDLE) { atomic_dec(&zcache_stored_zero_pages); } else { zbud_free(zpool->pool, (unsigned long)dup_zaddr); atomic_dec(&zcache_stored_pages); zpool->size = zbud_get_pool_size(zpool->pool); } zcache_dup_entry++; } /* Insert zcache_ra_handle to ratree */ ret = radix_tree_insert(&rbnode->ratree, ra_index, (void *)zaddr); spin_unlock_irqrestore(&rbnode->ra_lock, flags); if (unlikely(ret)) { write_lock_irqsave(&zpool->rb_lock, flags); spin_lock(&rbnode->ra_lock); if (zcache_rbnode_empty(rbnode)) zcache_rbnode_isolate(zpool, rbnode, 1); spin_unlock(&rbnode->ra_lock); write_unlock_irqrestore(&zpool->rb_lock, flags); } kref_put(&rbnode->refcount, zcache_rbnode_release); return ret; } /* * Load zaddr and delete it from radix tree. * If the radix tree of the corresponding rbnode is empty, delete the rbnode * from zpool->rbtree also. */ static void *zcache_load_delete_zaddr(struct zcache_pool *zpool, int rb_index, int ra_index) { struct zcache_rbnode *rbnode; void *zaddr = NULL; unsigned long flags; rbnode = zcache_find_get_rbnode(zpool, rb_index); if (!rbnode) goto out; BUG_ON(rbnode->rb_index != rb_index); spin_lock_irqsave(&rbnode->ra_lock, flags); zaddr = radix_tree_delete(&rbnode->ratree, ra_index); spin_unlock_irqrestore(&rbnode->ra_lock, flags); /* rb_lock and ra_lock must be taken again in the given sequence */ write_lock_irqsave(&zpool->rb_lock, flags); spin_lock(&rbnode->ra_lock); if (zcache_rbnode_empty(rbnode)) zcache_rbnode_isolate(zpool, rbnode, 1); spin_unlock(&rbnode->ra_lock); write_unlock_irqrestore(&zpool->rb_lock, flags); kref_put(&rbnode->refcount, zcache_rbnode_release); out: return zaddr; } static bool zero_page(struct page *page) { unsigned long *ptr = kmap_atomic(page); int i; bool ret = false; for (i = 0; i < PAGE_SIZE / sizeof(*ptr); i++) { if (ptr[i]) goto out; } ret = true; out: kunmap_atomic(ptr); return ret; } static void zcache_store_page(int pool_id, struct cleancache_filekey key, pgoff_t index, struct page *page) { struct zcache_ra_handle *zhandle; u8 *zpage, *src, *dst; /* Address of zhandle + compressed data(zpage) */ unsigned long zaddr = 0; unsigned int zlen = PAGE_SIZE; bool zero = 0; int ret; struct zcache_pool *zpool = zcache.pools[pool_id]; /* * Zcache will be ineffective if the compressed memory pool is full with * compressed inactive file pages and most of them will never be used * again. * So we refuse to compress pages that are not from active file list. */ if (!PageWasActive(page)) { zcache_inactive_pages_refused++; return; } zero = zero_page(page); if (zero) goto zero; if (zcache_is_full()) { zcache_pool_limit_hit++; if (zbud_reclaim_page(zpool->pool, 8)) { zcache_reclaim_fail++; return; } /* * Continue if reclaimed a page frame succ. */ zcache_evict_filepages++; zpool->size = zbud_get_pool_size(zpool->pool); } /* compress */ dst = get_cpu_var(zcache_dstmem); src = kmap_atomic(page); ret = zcache_comp_op(ZCACHE_COMPOP_COMPRESS, src, PAGE_SIZE, dst, &zlen); kunmap_atomic(src); if (ret) { pr_err("zcache compress error ret %d\n", ret); put_cpu_var(zcache_dstmem); return; } /* store zcache handle together with compressed page data */ ret = zbud_alloc(zpool->pool, zlen + sizeof(struct zcache_ra_handle), GFP_ZCACHE, &zaddr); if (ret) { zcache_zbud_alloc_fail++; put_cpu_var(zcache_dstmem); return; } zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, zaddr); /* Compressed page data stored at the end of zcache_ra_handle */ zpage = (u8 *)(zhandle + 1); memcpy(zpage, dst, zlen); zbud_unmap(zpool->pool, zaddr); put_cpu_var(zcache_dstmem); zero: if (zero) zaddr = (unsigned long)ZERO_HANDLE; /* store zcache handle */ ret = zcache_store_zaddr(zpool, index, key.u.ino, zaddr); if (ret) { zcache_store_failed++; if (!zero) zbud_free(zpool->pool, zaddr); return; } /* update stats */ if (zero) { atomic_inc(&zcache_stored_zero_pages); } else { zhandle->ra_index = index; zhandle->rb_index = key.u.ino; zhandle->zlen = zlen; zhandle->zpool = zpool; atomic_inc(&zcache_stored_pages); zpool->size = zbud_get_pool_size(zpool->pool); } return; } static int zcache_load_page(int pool_id, struct cleancache_filekey key, pgoff_t index, struct page *page) { int ret = 0; u8 *src, *dst; void *zaddr; unsigned int dlen = PAGE_SIZE; struct zcache_ra_handle *zhandle; struct zcache_pool *zpool = zcache.pools[pool_id]; zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index); if (!zaddr) return -ENOENT; else if (zaddr == ZERO_HANDLE) goto map; zhandle = (struct zcache_ra_handle *)zbud_map(zpool->pool, (unsigned long)zaddr); /* Compressed page data stored at the end of zcache_ra_handle */ src = (u8 *)(zhandle + 1); /* decompress */ map: dst = kmap_atomic(page); if (zaddr != ZERO_HANDLE) { ret = zcache_comp_op(ZCACHE_COMPOP_DECOMPRESS, src, zhandle->zlen, dst, &dlen); } else { memset(dst, 0, PAGE_SIZE); kunmap_atomic(dst); flush_dcache_page(page); atomic_dec(&zcache_stored_zero_pages); goto out; } kunmap_atomic(dst); zbud_unmap(zpool->pool, (unsigned long)zaddr); zbud_free(zpool->pool, (unsigned long)zaddr); BUG_ON(ret); BUG_ON(dlen != PAGE_SIZE); /* update stats */ atomic_dec(&zcache_stored_pages); zpool->size = zbud_get_pool_size(zpool->pool); out: SetPageWasActive(page); return ret; } static void zcache_flush_page(int pool_id, struct cleancache_filekey key, pgoff_t index) { struct zcache_pool *zpool = zcache.pools[pool_id]; void *zaddr = NULL; zaddr = zcache_load_delete_zaddr(zpool, key.u.ino, index); if (zaddr && (zaddr != ZERO_HANDLE)) { zbud_free(zpool->pool, (unsigned long)zaddr); atomic_dec(&zcache_stored_pages); zpool->size = zbud_get_pool_size(zpool->pool); } else if (zaddr == ZERO_HANDLE) { atomic_dec(&zcache_stored_zero_pages); } } #define FREE_BATCH 16 /* * Callers must hold the lock */ static void zcache_flush_ratree(struct zcache_pool *zpool, struct zcache_rbnode *rbnode) { unsigned long index = 0; int count, i; struct zcache_ra_handle *zhandle; void *zaddr = NULL; do { void *zaddrs[FREE_BATCH]; unsigned long indices[FREE_BATCH]; count = radix_tree_gang_lookup_index(&rbnode->ratree, (void **)zaddrs, indices, index, FREE_BATCH); for (i = 0; i < count; i++) { if (zaddrs[i] == ZERO_HANDLE) { zaddr = radix_tree_delete(&rbnode->ratree, indices[i]); if (zaddr) atomic_dec(&zcache_stored_zero_pages); continue; } zhandle = (struct zcache_ra_handle *)zbud_map( zpool->pool, (unsigned long)zaddrs[i]); index = zhandle->ra_index; zaddr = radix_tree_delete(&rbnode->ratree, index); if (!zaddr) continue; zbud_unmap(zpool->pool, (unsigned long)zaddrs[i]); zbud_free(zpool->pool, (unsigned long)zaddrs[i]); atomic_dec(&zcache_stored_pages); zpool->size = zbud_get_pool_size(zpool->pool); } index++; } while (count == FREE_BATCH); } static void zcache_flush_inode(int pool_id, struct cleancache_filekey key) { struct zcache_rbnode *rbnode; unsigned long flags1, flags2; struct zcache_pool *zpool = zcache.pools[pool_id]; /* * Refuse new pages added in to the same rbinode, so get rb_lock at * first. */ write_lock_irqsave(&zpool->rb_lock, flags1); rbnode = zcache_find_rbnode(&zpool->rbtree, key.u.ino, 0, 0); if (!rbnode) { write_unlock_irqrestore(&zpool->rb_lock, flags1); return; } kref_get(&rbnode->refcount); spin_lock_irqsave(&rbnode->ra_lock, flags2); zcache_flush_ratree(zpool, rbnode); if (zcache_rbnode_empty(rbnode)) /* When arrvied here, we already hold rb_lock */ zcache_rbnode_isolate(zpool, rbnode, 1); spin_unlock_irqrestore(&rbnode->ra_lock, flags2); write_unlock_irqrestore(&zpool->rb_lock, flags1); kref_put(&rbnode->refcount, zcache_rbnode_release); } static void zcache_destroy_pool(struct zcache_pool *zpool); static void zcache_flush_fs(int pool_id) { struct zcache_rbnode *z_rbnode = NULL; struct rb_node *rbnode; unsigned long flags1, flags2; struct zcache_pool *zpool; if (pool_id < 0) return; zpool = zcache.pools[pool_id]; if (!zpool) return; /* * Refuse new pages added in, so get rb_lock at first. */ write_lock_irqsave(&zpool->rb_lock, flags1); rbnode = rb_first(&zpool->rbtree); while (rbnode) { z_rbnode = rb_entry(rbnode, struct zcache_rbnode, rb_node); rbnode = rb_next(rbnode); if (z_rbnode) { kref_get(&z_rbnode->refcount); spin_lock_irqsave(&z_rbnode->ra_lock, flags2); zcache_flush_ratree(zpool, z_rbnode); if (zcache_rbnode_empty(z_rbnode)) zcache_rbnode_isolate(zpool, z_rbnode, 1); spin_unlock_irqrestore(&z_rbnode->ra_lock, flags2); kref_put(&z_rbnode->refcount, zcache_rbnode_release); } } write_unlock_irqrestore(&zpool->rb_lock, flags1); zcache_destroy_pool(zpool); } /* * Evict compressed pages from zcache pool on an LRU basis after the compressed * pool is full. */ static int zcache_evict_zpage(struct zbud_pool *pool, unsigned long zaddr) { struct zcache_pool *zpool; struct zcache_ra_handle *zhandle; void *zaddr_intree; BUG_ON(zaddr == (unsigned long)ZERO_HANDLE); zhandle = (struct zcache_ra_handle *)zbud_map(pool, zaddr); zpool = zhandle->zpool; /* There can be a race with zcache store */ if (!zpool) return -EINVAL; BUG_ON(pool != zpool->pool); zaddr_intree = zcache_load_delete_zaddr(zpool, zhandle->rb_index, zhandle->ra_index); if (zaddr_intree) { BUG_ON((unsigned long)zaddr_intree != zaddr); zbud_unmap(pool, zaddr); zbud_free(pool, zaddr); atomic_dec(&zcache_stored_pages); zpool->size = zbud_get_pool_size(pool); zcache_evict_zpages++; } return 0; } static struct zbud_ops zcache_zbud_ops = { .evict = zcache_evict_zpage }; /* Return pool id */ static int zcache_create_pool(void) { int ret; struct zcache_pool *zpool; zpool = kzalloc(sizeof(*zpool), GFP_KERNEL); if (!zpool) { ret = -ENOMEM; goto out; } zpool->pool = zbud_create_pool(GFP_KERNEL, &zcache_zbud_ops); if (!zpool->pool) { kfree(zpool); ret = -ENOMEM; goto out; } spin_lock(&zcache.pool_lock); if (zcache.num_pools == MAX_ZCACHE_POOLS) { pr_err("Cannot create new pool (limit:%u)\n", MAX_ZCACHE_POOLS); zbud_destroy_pool(zpool->pool); kfree(zpool); ret = -EPERM; goto out_unlock; } rwlock_init(&zpool->rb_lock); zpool->rbtree = RB_ROOT; /* Add to pool list */ for (ret = 0; ret < MAX_ZCACHE_POOLS; ret++) if (!zcache.pools[ret]) break; zcache.pools[ret] = zpool; zcache.num_pools++; pr_info("New pool created id:%d\n", ret); out_unlock: spin_unlock(&zcache.pool_lock); out: return ret; } static void zcache_destroy_pool(struct zcache_pool *zpool) { int i; if (!zpool) return; spin_lock(&zcache.pool_lock); zcache.num_pools--; for (i = 0; i < MAX_ZCACHE_POOLS; i++) if (zcache.pools[i] == zpool) break; zcache.pools[i] = NULL; spin_unlock(&zcache.pool_lock); if (!RB_EMPTY_ROOT(&zpool->rbtree)) WARN_ON("Memory leak detected. Freeing non-empty pool!\n"); zbud_destroy_pool(zpool->pool); kfree(zpool); } static int zcache_init_fs(size_t pagesize) { int ret; if (pagesize != PAGE_SIZE) { pr_info("Unsupported page size: %zu", pagesize); ret = -EINVAL; goto out; } ret = zcache_create_pool(); if (ret < 0) { pr_info("Failed to create new pool\n"); ret = -ENOMEM; goto out; } out: return ret; } static int zcache_init_shared_fs(char *uuid, size_t pagesize) { /* shared pools are unsupported and map to private */ return zcache_init_fs(pagesize); } static struct cleancache_ops zcache_ops = { .put_page = zcache_store_page, .get_page = zcache_load_page, .invalidate_page = zcache_flush_page, .invalidate_inode = zcache_flush_inode, .invalidate_fs = zcache_flush_fs, .init_shared_fs = zcache_init_shared_fs, .init_fs = zcache_init_fs }; /* * Debugfs functions */ #ifdef CONFIG_DEBUG_FS #include static int pool_pages_get(void *_data, u64 *val) { *val = zcache_pages(); return 0; } DEFINE_SIMPLE_ATTRIBUTE(pool_page_fops, pool_pages_get, NULL, "%llu\n"); static struct dentry *zcache_debugfs_root; static int __init zcache_debugfs_init(void) { if (!debugfs_initialized()) return -ENODEV; zcache_debugfs_root = debugfs_create_dir("zcache", NULL); if (!zcache_debugfs_root) return -ENOMEM; debugfs_create_u64("pool_limit_hit", S_IRUGO, zcache_debugfs_root, &zcache_pool_limit_hit); debugfs_create_u64("reject_alloc_fail", S_IRUGO, zcache_debugfs_root, &zcache_zbud_alloc_fail); debugfs_create_u64("duplicate_entry", S_IRUGO, zcache_debugfs_root, &zcache_dup_entry); debugfs_create_file("pool_pages", S_IRUGO, zcache_debugfs_root, NULL, &pool_page_fops); debugfs_create_atomic_t("stored_pages", S_IRUGO, zcache_debugfs_root, &zcache_stored_pages); debugfs_create_atomic_t("stored_zero_pages", S_IRUGO, zcache_debugfs_root, &zcache_stored_zero_pages); debugfs_create_u64("evicted_zpages", S_IRUGO, zcache_debugfs_root, &zcache_evict_zpages); debugfs_create_u64("evicted_filepages", S_IRUGO, zcache_debugfs_root, &zcache_evict_filepages); debugfs_create_u64("reclaim_fail", S_IRUGO, zcache_debugfs_root, &zcache_reclaim_fail); debugfs_create_u64("inactive_pages_refused", S_IRUGO, zcache_debugfs_root, &zcache_inactive_pages_refused); debugfs_create_u64("pool_shrink_count", S_IRUGO, zcache_debugfs_root, &zcache_pool_shrink); debugfs_create_u64("pool_shrink_fail", S_IRUGO, zcache_debugfs_root, &zcache_pool_shrink_fail); debugfs_create_u64("pool_shrink_pages", S_IRUGO, zcache_debugfs_root, &zcache_pool_shrink_pages); debugfs_create_u64("store_fail", S_IRUGO, zcache_debugfs_root, &zcache_store_failed); return 0; } static void __exit zcache_debugfs_exit(void) { debugfs_remove_recursive(zcache_debugfs_root); } #else static int __init zcache_debugfs_init(void) { return 0; } static void __exit zcache_debugfs_exit(void) { } #endif /* * zcache init and exit */ static int __init init_zcache(void) { if (!zcache_enabled) return 0; pr_info("loading zcache..\n"); if (zcache_rbnode_cache_create()) { pr_err("entry cache creation failed\n"); goto error; } if (zcache_comp_init()) { pr_err("compressor initialization failed\n"); goto compfail; } if (zcache_cpu_init()) { pr_err("per-cpu initialization failed\n"); goto pcpufail; } spin_lock_init(&zcache.pool_lock); cleancache_register_ops(&zcache_ops); if (zcache_debugfs_init()) pr_warn("debugfs initialization failed\n"); register_shrinker(&zcache_shrinker); return 0; pcpufail: zcache_comp_exit(); compfail: zcache_rbnode_cache_destroy(); error: return -ENOMEM; } /* must be late so crypto has time to come up */ late_initcall(init_zcache); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bob Liu "); MODULE_DESCRIPTION("Compressed cache for clean file pages");