mm: Backport ZCache from QC kernel 3.18

Change-Id: I3edff3a56cf6525f13430ab93309272d1faecfe1 Signed-off-by: Kevin F. Haggerty <haggertk@lineageos.org>
2016-04-14 02:52:21 -07:00 · 2016-04-14 02:52:21 -07:00 · b23d0a2d3c
parent c780c66637
commit b23d0a2d3c
16 changed files with 1778 additions and 3084 deletions
--- a/drivers/staging/Kconfig
+++ b/drivers/staging/Kconfig
@ -82,8 +82,6 @@ source "drivers/staging/iio/Kconfig"

 source "drivers/staging/zram/Kconfig"

-source "drivers/staging/zcache/Kconfig"
-
 source "drivers/staging/zsmalloc/Kconfig"

 source "drivers/staging/wlags49_h2/Kconfig"
--- a/drivers/staging/Makefile
+++ b/drivers/staging/Makefile
@ -33,7 +33,6 @@ obj-$(CONFIG_VME_BUS)		+= vme/
 obj-$(CONFIG_DX_SEP)            += sep/
 obj-$(CONFIG_IIO)		+= iio/
 obj-$(CONFIG_ZRAM)		+= zram/
-obj-$(CONFIG_ZCACHE)		+= zcache/
 obj-$(CONFIG_ZSMALLOC)		+= zsmalloc/
 obj-$(CONFIG_WLAGS49_H2)	+= wlags49_h2/
 obj-$(CONFIG_WLAGS49_H25)	+= wlags49_h25/
--- a/drivers/staging/android/lowmemorykiller.c
+++ b/drivers/staging/android/lowmemorykiller.c
@ -41,6 +41,7 @@
 #include <linux/delay.h>
 #include <linux/swap.h>
 #include <linux/fs.h>
+#include <linux/zcache.h>

 #include <linux/ratelimit.h>

@ -199,7 +200,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
 	is_active_high = (global_page_state(NR_ACTIVE_FILE) >
 				global_page_state(NR_INACTIVE_FILE)) ? 1 : 0;
 #endif
-	other_file = global_page_state(NR_FILE_PAGES);
+	other_file = global_page_state(NR_FILE_PAGES) + zcache_pages();

 #if defined(CONFIG_CMA_PAGE_COUNTING) && defined(CONFIG_EXCLUDE_LRU_LIVING_IN_CMA)
 	if (get_nr_swap_pages() < SSWAP_LMK_THRESHOLD && cma_page_ratio >= CMA_PAGE_RATIO
@ -446,7 +447,7 @@ static int android_oom_handler(struct notifier_block *nb,

 	nr_cma_inactive_file = global_page_state(NR_CMA_INACTIVE_FILE);
 	nr_cma_active_file = global_page_state(NR_CMA_ACTIVE_FILE);
-	other_file = global_page_state(NR_FILE_PAGES) -
+	other_file = global_page_state(NR_FILE_PAGES) + zcache_pages() -
 					global_page_state(NR_SHMEM) -
 					total_swapcache_pages -
 					nr_cma_inactive_file -
--- a/drivers/staging/zcache/Kconfig
+++ b/drivers/staging/zcache/Kconfig
@ -1,14 +0,0 @@
-config ZCACHE
-	bool "Dynamic compression of swap pages and clean pagecache pages"
-	# X86 dependency is because zsmalloc uses non-portable pte/tlb
-	# functions
-	depends on (CLEANCACHE || FRONTSWAP) && CRYPTO=y && X86
-	select ZSMALLOC
-	select CRYPTO_LZO
-	default n
-	help
-	  Zcache doubles RAM efficiency while providing a significant
-	  performance boosts on many workloads.  Zcache uses
-	  compression and an in-kernel implementation of transcendent
-	  memory to store clean page cache pages and swap in RAM,
-	  providing a noticeable reduction in disk I/O.
--- a/drivers/staging/zcache/Makefile
+++ b/drivers/staging/zcache/Makefile
@ -1,3 +0,0 @@
-zcache-y	:=	zcache-main.o tmem.o
-
-obj-$(CONFIG_ZCACHE)	+=	zcache.o
--- a/drivers/staging/zcache/tmem.c
+++ b/drivers/staging/zcache/tmem.c
@ -1,770 +0,0 @@
-/*
- * In-kernel transcendent memory (generic implementation)
- *
- * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
- *
- * The primary purpose of Transcedent Memory ("tmem") is to map object-oriented
- * "handles" (triples containing a pool id, and object id, and an index), to
- * pages in a page-accessible memory (PAM).  Tmem references the PAM pages via
- * an abstract "pampd" (PAM page-descriptor), which can be operated on by a
- * set of functions (pamops).  Each pampd contains some representation of
- * PAGE_SIZE bytes worth of data. Tmem must support potentially millions of
- * pages and must be able to insert, find, and delete these pages at a
- * potential frequency of thousands per second concurrently across many CPUs,
- * (and, if used with KVM, across many vcpus across many guests).
- * Tmem is tracked with a hierarchy of data structures, organized by
- * the elements in a handle-tuple: pool_id, object_id, and page index.
- * One or more "clients" (e.g. guests) each provide one or more tmem_pools.
- * Each pool, contains a hash table of rb_trees of tmem_objs.  Each
- * tmem_obj contains a radix-tree-like tree of pointers, with intermediate
- * nodes called tmem_objnodes.  Each leaf pointer in this tree points to
- * a pampd, which is accessible only through a small set of callbacks
- * registered by the PAM implementation (see tmem_register_pamops). Tmem
- * does all memory allocation via a set of callbacks registered by the tmem
- * host implementation (e.g. see tmem_register_hostops).
- */
-
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-
-#include "tmem.h"
-
-/* data structure sentinels used for debugging... see tmem.h */
-#define POOL_SENTINEL 0x87658765
-#define OBJ_SENTINEL 0x12345678
-#define OBJNODE_SENTINEL 0xfedcba09
-
-/*
- * A tmem host implementation must use this function to register callbacks
- * for memory allocation.
- */
-static struct tmem_hostops tmem_hostops;
-
-static void tmem_objnode_tree_init(void);
-
-void tmem_register_hostops(struct tmem_hostops *m)
-{
-	tmem_objnode_tree_init();
-	tmem_hostops = *m;
-}
-
-/*
- * A tmem host implementation must use this function to register
- * callbacks for a page-accessible memory (PAM) implementation
- */
-static struct tmem_pamops tmem_pamops;
-
-void tmem_register_pamops(struct tmem_pamops *m)
-{
-	tmem_pamops = *m;
-}
-
-/*
- * Oid's are potentially very sparse and tmem_objs may have an indeterminately
- * short life, being added and deleted at a relatively high frequency.
- * So an rb_tree is an ideal data structure to manage tmem_objs.  But because
- * of the potentially huge number of tmem_objs, each pool manages a hashtable
- * of rb_trees to reduce search, insert, delete, and rebalancing time.
- * Each hashbucket also has a lock to manage concurrent access.
- *
- * The following routines manage tmem_objs.  When any tmem_obj is accessed,
- * the hashbucket lock must be held.
- */
-
-/* searches for object==oid in pool, returns locked object if found */
-static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
-					struct tmem_oid *oidp)
-{
-	struct rb_node *rbnode;
-	struct tmem_obj *obj;
-
-	rbnode = hb->obj_rb_root.rb_node;
-	while (rbnode) {
-		BUG_ON(RB_EMPTY_NODE(rbnode));
-		obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
-		switch (tmem_oid_compare(oidp, &obj->oid)) {
-		case 0: /* equal */
-			goto out;
-		case -1:
-			rbnode = rbnode->rb_left;
-			break;
-		case 1:
-			rbnode = rbnode->rb_right;
-			break;
-		}
-	}
-	obj = NULL;
-out:
-	return obj;
-}
-
-static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *);
-
-/* free an object that has no more pampds in it */
-static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
-{
-	struct tmem_pool *pool;
-
-	BUG_ON(obj == NULL);
-	ASSERT_SENTINEL(obj, OBJ);
-	BUG_ON(obj->pampd_count > 0);
-	pool = obj->pool;
-	BUG_ON(pool == NULL);
-	if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
-		tmem_pampd_destroy_all_in_obj(obj);
-	BUG_ON(obj->objnode_tree_root != NULL);
-	BUG_ON((long)obj->objnode_count != 0);
-	atomic_dec(&pool->obj_count);
-	BUG_ON(atomic_read(&pool->obj_count) < 0);
-	INVERT_SENTINEL(obj, OBJ);
-	obj->pool = NULL;
-	tmem_oid_set_invalid(&obj->oid);
-	rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
-}
-
-/*
- * initialize, and insert an tmem_object_root (called only if find failed)
- */
-static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
-					struct tmem_pool *pool,
-					struct tmem_oid *oidp)
-{
-	struct rb_root *root = &hb->obj_rb_root;
-	struct rb_node **new = &(root->rb_node), *parent = NULL;
-	struct tmem_obj *this;
-
-	BUG_ON(pool == NULL);
-	atomic_inc(&pool->obj_count);
-	obj->objnode_tree_height = 0;
-	obj->objnode_tree_root = NULL;
-	obj->pool = pool;
-	obj->oid = *oidp;
-	obj->objnode_count = 0;
-	obj->pampd_count = 0;
-	(*tmem_pamops.new_obj)(obj);
-	SET_SENTINEL(obj, OBJ);
-	while (*new) {
-		BUG_ON(RB_EMPTY_NODE(*new));
-		this = rb_entry(*new, struct tmem_obj, rb_tree_node);
-		parent = *new;
-		switch (tmem_oid_compare(oidp, &this->oid)) {
-		case 0:
-			BUG(); /* already present; should never happen! */
-			break;
-		case -1:
-			new = &(*new)->rb_left;
-			break;
-		case 1:
-			new = &(*new)->rb_right;
-			break;
-		}
-	}
-	rb_link_node(&obj->rb_tree_node, parent, new);
-	rb_insert_color(&obj->rb_tree_node, root);
-}
-
-/*
- * Tmem is managed as a set of tmem_pools with certain attributes, such as
- * "ephemeral" vs "persistent".  These attributes apply to all tmem_objs
- * and all pampds that belong to a tmem_pool.  A tmem_pool is created
- * or deleted relatively rarely (for example, when a filesystem is
- * mounted or unmounted.
- */
-
-/* flush all data from a pool and, optionally, free it */
-static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
-{
-	struct rb_node *rbnode;
-	struct tmem_obj *obj;
-	struct tmem_hashbucket *hb = &pool->hashbucket[0];
-	int i;
-
-	BUG_ON(pool == NULL);
-	for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
-		spin_lock(&hb->lock);
-		rbnode = rb_first(&hb->obj_rb_root);
-		while (rbnode != NULL) {
-			obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
-			rbnode = rb_next(rbnode);
-			tmem_pampd_destroy_all_in_obj(obj);
-			tmem_obj_free(obj, hb);
-			(*tmem_hostops.obj_free)(obj, pool);
-		}
-		spin_unlock(&hb->lock);
-	}
-	if (destroy)
-		list_del(&pool->pool_list);
-}
-
-/*
- * A tmem_obj contains a radix-tree-like tree in which the intermediate
- * nodes are called tmem_objnodes.  (The kernel lib/radix-tree.c implementation
- * is very specialized and tuned for specific uses and is not particularly
- * suited for use from this code, though some code from the core algorithms has
- * been reused, thus the copyright notices below).  Each tmem_objnode contains
- * a set of pointers which point to either a set of intermediate tmem_objnodes
- * or a set of of pampds.
- *
- * Portions Copyright (C) 2001 Momchil Velikov
- * Portions Copyright (C) 2001 Christoph Hellwig
- * Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
- */
-
-struct tmem_objnode_tree_path {
-	struct tmem_objnode *objnode;
-	int offset;
-};
-
-/* objnode height_to_maxindex translation */
-static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
-
-static void tmem_objnode_tree_init(void)
-{
-	unsigned int ht, tmp;
-
-	for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
-		tmp = ht * OBJNODE_TREE_MAP_SHIFT;
-		if (tmp >= OBJNODE_TREE_INDEX_BITS)
-			tmem_objnode_tree_h2max[ht] = ~0UL;
-		else
-			tmem_objnode_tree_h2max[ht] =
-			    (~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
-	}
-}
-
-static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
-{
-	struct tmem_objnode *objnode;
-
-	ASSERT_SENTINEL(obj, OBJ);
-	BUG_ON(obj->pool == NULL);
-	ASSERT_SENTINEL(obj->pool, POOL);
-	objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
-	if (unlikely(objnode == NULL))
-		goto out;
-	objnode->obj = obj;
-	SET_SENTINEL(objnode, OBJNODE);
-	memset(&objnode->slots, 0, sizeof(objnode->slots));
-	objnode->slots_in_use = 0;
-	obj->objnode_count++;
-out:
-	return objnode;
-}
-
-static void tmem_objnode_free(struct tmem_objnode *objnode)
-{
-	struct tmem_pool *pool;
-	int i;
-
-	BUG_ON(objnode == NULL);
-	for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
-		BUG_ON(objnode->slots[i] != NULL);
-	ASSERT_SENTINEL(objnode, OBJNODE);
-	INVERT_SENTINEL(objnode, OBJNODE);
-	BUG_ON(objnode->obj == NULL);
-	ASSERT_SENTINEL(objnode->obj, OBJ);
-	pool = objnode->obj->pool;
-	BUG_ON(pool == NULL);
-	ASSERT_SENTINEL(pool, POOL);
-	objnode->obj->objnode_count--;
-	objnode->obj = NULL;
-	(*tmem_hostops.objnode_free)(objnode, pool);
-}
-
-/*
- * lookup index in object and return associated pampd (or NULL if not found)
- */
-static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
-{
-	unsigned int height, shift;
-	struct tmem_objnode **slot = NULL;
-
-	BUG_ON(obj == NULL);
-	ASSERT_SENTINEL(obj, OBJ);
-	BUG_ON(obj->pool == NULL);
-	ASSERT_SENTINEL(obj->pool, POOL);
-
-	height = obj->objnode_tree_height;
-	if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
-		goto out;
-	if (height == 0 && obj->objnode_tree_root) {
-		slot = &obj->objnode_tree_root;
-		goto out;
-	}
-	shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
-	slot = &obj->objnode_tree_root;
-	while (height > 0) {
-		if (*slot == NULL)
-			goto out;
-		slot = (struct tmem_objnode **)
-			((*slot)->slots +
-			 ((index >> shift) & OBJNODE_TREE_MAP_MASK));
-		shift -= OBJNODE_TREE_MAP_SHIFT;
-		height--;
-	}
-out:
-	return slot != NULL ? (void **)slot : NULL;
-}
-
-static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
-{
-	struct tmem_objnode **slot;
-
-	slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
-	return slot != NULL ? *slot : NULL;
-}
-
-static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
-					void *new_pampd)
-{
-	struct tmem_objnode **slot;
-	void *ret = NULL;
-
-	slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
-	if ((slot != NULL) && (*slot != NULL)) {
-		void *old_pampd = *(void **)slot;
-		*(void **)slot = new_pampd;
-		(*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);
-		ret = new_pampd;
-	}
-	return ret;
-}
-
-static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
-					void *pampd)
-{
-	int ret = 0;
-	struct tmem_objnode *objnode = NULL, *newnode, *slot;
-	unsigned int height, shift;
-	int offset = 0;
-
-	/* if necessary, extend the tree to be higher  */
-	if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
-		height = obj->objnode_tree_height + 1;
-		if (index > tmem_objnode_tree_h2max[height])
-			while (index > tmem_objnode_tree_h2max[height])
-				height++;
-		if (obj->objnode_tree_root == NULL) {
-			obj->objnode_tree_height = height;
-			goto insert;
-		}
-		do {
-			newnode = tmem_objnode_alloc(obj);
-			if (!newnode) {
-				ret = -ENOMEM;
-				goto out;
-			}
-			newnode->slots[0] = obj->objnode_tree_root;
-			newnode->slots_in_use = 1;
-			obj->objnode_tree_root = newnode;
-			obj->objnode_tree_height++;
-		} while (height > obj->objnode_tree_height);
-	}
-insert:
-	slot = obj->objnode_tree_root;
-	height = obj->objnode_tree_height;
-	shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
-	while (height > 0) {
-		if (slot == NULL) {
-			/* add a child objnode.  */
-			slot = tmem_objnode_alloc(obj);
-			if (!slot) {
-				ret = -ENOMEM;
-				goto out;
-			}
-			if (objnode) {
-
-				objnode->slots[offset] = slot;
-				objnode->slots_in_use++;
-			} else
-				obj->objnode_tree_root = slot;
-		}
-		/* go down a level */
-		offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
-		objnode = slot;
-		slot = objnode->slots[offset];
-		shift -= OBJNODE_TREE_MAP_SHIFT;
-		height--;
-	}
-	BUG_ON(slot != NULL);
-	if (objnode) {
-		objnode->slots_in_use++;
-		objnode->slots[offset] = pampd;
-	} else
-		obj->objnode_tree_root = pampd;
-	obj->pampd_count++;
-out:
-	return ret;
-}
-
-static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
-{
-	struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1];
-	struct tmem_objnode_tree_path *pathp = path;
-	struct tmem_objnode *slot = NULL;
-	unsigned int height, shift;
-	int offset;
-
-	BUG_ON(obj == NULL);
-	ASSERT_SENTINEL(obj, OBJ);
-	BUG_ON(obj->pool == NULL);
-	ASSERT_SENTINEL(obj->pool, POOL);
-	height = obj->objnode_tree_height;
-	if (index > tmem_objnode_tree_h2max[height])
-		goto out;
-	slot = obj->objnode_tree_root;
-	if (height == 0 && obj->objnode_tree_root) {
-		obj->objnode_tree_root = NULL;
-		goto out;
-	}
-	shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
-	pathp->objnode = NULL;
-	do {
-		if (slot == NULL)
-			goto out;
-		pathp++;
-		offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
-		pathp->offset = offset;
-		pathp->objnode = slot;
-		slot = slot->slots[offset];
-		shift -= OBJNODE_TREE_MAP_SHIFT;
-		height--;
-	} while (height > 0);
-	if (slot == NULL)
-		goto out;
-	while (pathp->objnode) {
-		pathp->objnode->slots[pathp->offset] = NULL;
-		pathp->objnode->slots_in_use--;
-		if (pathp->objnode->slots_in_use) {
-			if (pathp->objnode == obj->objnode_tree_root) {
-				while (obj->objnode_tree_height > 0 &&
-				  obj->objnode_tree_root->slots_in_use == 1 &&
-				  obj->objnode_tree_root->slots[0]) {
-					struct tmem_objnode *to_free =
-						obj->objnode_tree_root;
-
-					obj->objnode_tree_root =
-							to_free->slots[0];
-					obj->objnode_tree_height--;
-					to_free->slots[0] = NULL;
-					to_free->slots_in_use = 0;
-					tmem_objnode_free(to_free);
-				}
-			}
-			goto out;
-		}
-		tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
-		pathp--;
-	}
-	obj->objnode_tree_height = 0;
-	obj->objnode_tree_root = NULL;
-
-out:
-	if (slot != NULL)
-		obj->pampd_count--;
-	BUG_ON(obj->pampd_count < 0);
-	return slot;
-}
-
-/* recursively walk the objnode_tree destroying pampds and objnodes */
-static void tmem_objnode_node_destroy(struct tmem_obj *obj,
-					struct tmem_objnode *objnode,
-					unsigned int ht)
-{
-	int i;
-
-	if (ht == 0)
-		return;
-	for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
-		if (objnode->slots[i]) {
-			if (ht == 1) {
-				obj->pampd_count--;
-				(*tmem_pamops.free)(objnode->slots[i],
-						obj->pool, NULL, 0);
-				objnode->slots[i] = NULL;
-				continue;
-			}
-			tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
-			tmem_objnode_free(objnode->slots[i]);
-			objnode->slots[i] = NULL;
-		}
-	}
-}
-
-static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
-{
-	if (obj->objnode_tree_root == NULL)
-		return;
-	if (obj->objnode_tree_height == 0) {
-		obj->pampd_count--;
-		(*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);
-	} else {
-		tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
-					obj->objnode_tree_height);
-		tmem_objnode_free(obj->objnode_tree_root);
-		obj->objnode_tree_height = 0;
-	}
-	obj->objnode_tree_root = NULL;
-	(*tmem_pamops.free_obj)(obj->pool, obj);
-}
-
-/*
- * Tmem is operated on by a set of well-defined actions:
- * "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
- * (The tmem ABI allows for subpages and exchanges but these operations
- * are not included in this implementation.)
- *
- * These "tmem core" operations are implemented in the following functions.
- */
-
-/*
- * "Put" a page, e.g. copy a page from the kernel into newly allocated
- * PAM space (if such space is available).  Tmem_put is complicated by
- * a corner case: What if a page with matching handle already exists in
- * tmem?  To guarantee coherency, one of two actions is necessary: Either
- * the data for the page must be overwritten, or the page must be
- * "flushed" so that the data is not accessible to a subsequent "get".
- * Since these "duplicate puts" are relatively rare, this implementation
- * always flushes for simplicity.
- */
-int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
-		char *data, size_t size, bool raw, bool ephemeral)
-{
-	struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
-	void *pampd = NULL, *pampd_del = NULL;
-	int ret = -ENOMEM;
-	struct tmem_hashbucket *hb;
-
-	hb = &pool->hashbucket[tmem_oid_hash(oidp)];
-	spin_lock(&hb->lock);
-	obj = objfound = tmem_obj_find(hb, oidp);
-	if (obj != NULL) {
-		pampd = tmem_pampd_lookup_in_obj(objfound, index);
-		if (pampd != NULL) {
-			/* if found, is a dup put, flush the old one */
-			pampd_del = tmem_pampd_delete_from_obj(obj, index);
-			BUG_ON(pampd_del != pampd);
-			(*tmem_pamops.free)(pampd, pool, oidp, index);
-			if (obj->pampd_count == 0) {
-				objnew = obj;
-				objfound = NULL;
-			}
-			pampd = NULL;
-		}
-	} else {
-		obj = objnew = (*tmem_hostops.obj_alloc)(pool);
-		if (unlikely(obj == NULL)) {
-			ret = -ENOMEM;
-			goto out;
-		}
-		tmem_obj_init(obj, hb, pool, oidp);
-	}
-	BUG_ON(obj == NULL);
-	BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
-	pampd = (*tmem_pamops.create)(data, size, raw, ephemeral,
-					obj->pool, &obj->oid, index);
-	if (unlikely(pampd == NULL))
-		goto free;
-	ret = tmem_pampd_add_to_obj(obj, index, pampd);
-	if (unlikely(ret == -ENOMEM))
-		/* may have partially built objnode tree ("stump") */
-		goto delete_and_free;
-	goto out;
-
-delete_and_free:
-	(void)tmem_pampd_delete_from_obj(obj, index);
-free:
-	if (pampd)
-		(*tmem_pamops.free)(pampd, pool, NULL, 0);
-	if (objnew) {
-		tmem_obj_free(objnew, hb);
-		(*tmem_hostops.obj_free)(objnew, pool);
-	}
-out:
-	spin_unlock(&hb->lock);
-	return ret;
-}
-
-/*
- * "Get" a page, e.g. if one can be found, copy the tmem page with the
- * matching handle from PAM space to the kernel.  By tmem definition,
- * when a "get" is successful on an ephemeral page, the page is "flushed",
- * and when a "get" is successful on a persistent page, the page is retained
- * in tmem.  Note that to preserve
- * coherency, "get" can never be skipped if tmem contains the data.
- * That is, if a get is done with a certain handle and fails, any
- * subsequent "get" must also fail (unless of course there is a
- * "put" done with the same handle).
-
- */
-int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
-		char *data, size_t *size, bool raw, int get_and_free)
-{
-	struct tmem_obj *obj;
-	void *pampd;
-	bool ephemeral = is_ephemeral(pool);
-	int ret = -1;
-	struct tmem_hashbucket *hb;
-	bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
-	bool lock_held = false;
-
-	hb = &pool->hashbucket[tmem_oid_hash(oidp)];
-	spin_lock(&hb->lock);
-	lock_held = true;
-	obj = tmem_obj_find(hb, oidp);
-	if (obj == NULL)
-		goto out;
-	if (free)
-		pampd = tmem_pampd_delete_from_obj(obj, index);
-	else
-		pampd = tmem_pampd_lookup_in_obj(obj, index);
-	if (pampd == NULL)
-		goto out;
-	if (free) {
-		if (obj->pampd_count == 0) {
-			tmem_obj_free(obj, hb);
-			(*tmem_hostops.obj_free)(obj, pool);
-			obj = NULL;
-		}
-	}
-	if (tmem_pamops.is_remote(pampd)) {
-		lock_held = false;
-		spin_unlock(&hb->lock);
-	}
-	if (free)
-		ret = (*tmem_pamops.get_data_and_free)(
-				data, size, raw, pampd, pool, oidp, index);
-	else
-		ret = (*tmem_pamops.get_data)(
-				data, size, raw, pampd, pool, oidp, index);
-	if (ret < 0)
-		goto out;
-	ret = 0;
-out:
-	if (lock_held)
-		spin_unlock(&hb->lock);
-	return ret;
-}
-
-/*
- * If a page in tmem matches the handle, "flush" this page from tmem such
- * that any subsequent "get" does not succeed (unless, of course, there
- * was another "put" with the same handle).
- */
-int tmem_flush_page(struct tmem_pool *pool,
-				struct tmem_oid *oidp, uint32_t index)
-{
-	struct tmem_obj *obj;
-	void *pampd;
-	int ret = -1;
-	struct tmem_hashbucket *hb;
-
-	hb = &pool->hashbucket[tmem_oid_hash(oidp)];
-	spin_lock(&hb->lock);
-	obj = tmem_obj_find(hb, oidp);
-	if (obj == NULL)
-		goto out;
-	pampd = tmem_pampd_delete_from_obj(obj, index);
-	if (pampd == NULL)
-		goto out;
-	(*tmem_pamops.free)(pampd, pool, oidp, index);
-	if (obj->pampd_count == 0) {
-		tmem_obj_free(obj, hb);
-		(*tmem_hostops.obj_free)(obj, pool);
-	}
-	ret = 0;
-
-out:
-	spin_unlock(&hb->lock);
-	return ret;
-}
-
-/*
- * If a page in tmem matches the handle, replace the page so that any
- * subsequent "get" gets the new page.  Returns 0 if
- * there was a page to replace, else returns -1.
- */
-int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
-			uint32_t index, void *new_pampd)
-{
-	struct tmem_obj *obj;
-	int ret = -1;
-	struct tmem_hashbucket *hb;
-
-	hb = &pool->hashbucket[tmem_oid_hash(oidp)];
-	spin_lock(&hb->lock);
-	obj = tmem_obj_find(hb, oidp);
-	if (obj == NULL)
-		goto out;
-	new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);
-	ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
-out:
-	spin_unlock(&hb->lock);
-	return ret;
-}
-
-/*
- * "Flush" all pages in tmem matching this oid.
- */
-int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
-{
-	struct tmem_obj *obj;
-	struct tmem_hashbucket *hb;
-	int ret = -1;
-
-	hb = &pool->hashbucket[tmem_oid_hash(oidp)];
-	spin_lock(&hb->lock);
-	obj = tmem_obj_find(hb, oidp);
-	if (obj == NULL)
-		goto out;
-	tmem_pampd_destroy_all_in_obj(obj);
-	tmem_obj_free(obj, hb);
-	(*tmem_hostops.obj_free)(obj, pool);
-	ret = 0;
-
-out:
-	spin_unlock(&hb->lock);
-	return ret;
-}
-
-/*
- * "Flush" all pages (and tmem_objs) from this tmem_pool and disable
- * all subsequent access to this tmem_pool.
- */
-int tmem_destroy_pool(struct tmem_pool *pool)
-{
-	int ret = -1;
-
-	if (pool == NULL)
-		goto out;
-	tmem_pool_flush(pool, 1);
-	ret = 0;
-out:
-	return ret;
-}
-
-static LIST_HEAD(tmem_global_pool_list);
-
-/*
- * Create a new tmem_pool with the provided flag and return
- * a pool id provided by the tmem host implementation.
- */
-void tmem_new_pool(struct tmem_pool *pool, uint32_t flags)
-{
-	int persistent = flags & TMEM_POOL_PERSIST;
-	int shared = flags & TMEM_POOL_SHARED;
-	struct tmem_hashbucket *hb = &pool->hashbucket[0];
-	int i;
-
-	for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
-		hb->obj_rb_root = RB_ROOT;
-		spin_lock_init(&hb->lock);
-	}
-	INIT_LIST_HEAD(&pool->pool_list);
-	atomic_set(&pool->obj_count, 0);
-	SET_SENTINEL(pool, POOL);
-	list_add_tail(&pool->pool_list, &tmem_global_pool_list);
-	pool->persistent = persistent;
-	pool->shared = shared;
-}
--- a/drivers/staging/zcache/tmem.h
+++ b/drivers/staging/zcache/tmem.h
@ -1,206 +0,0 @@
-/*
- * tmem.h
- *
- * Transcendent memory
- *
- * Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
- */
-
-#ifndef _TMEM_H_
-#define _TMEM_H_
-
-#include <linux/types.h>
-#include <linux/highmem.h>
-#include <linux/hash.h>
-#include <linux/atomic.h>
-
-/*
- * These are pre-defined by the Xen<->Linux ABI
- */
-#define TMEM_PUT_PAGE			4
-#define TMEM_GET_PAGE			5
-#define TMEM_FLUSH_PAGE			6
-#define TMEM_FLUSH_OBJECT		7
-#define TMEM_POOL_PERSIST		1
-#define TMEM_POOL_SHARED		2
-#define TMEM_POOL_PRECOMPRESSED		4
-#define TMEM_POOL_PAGESIZE_SHIFT	4
-#define TMEM_POOL_PAGESIZE_MASK		0xf
-#define TMEM_POOL_RESERVED_BITS		0x00ffff00
-
-/*
- * sentinels have proven very useful for debugging but can be removed
- * or disabled before final merge.
- */
-#define SENTINELS
-#ifdef SENTINELS
-#define DECL_SENTINEL uint32_t sentinel;
-#define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL)
-#define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL)
-#define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL)
-#define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL)
-#else
-#define DECL_SENTINEL
-#define SET_SENTINEL(_x, _y) do { } while (0)
-#define INVERT_SENTINEL(_x, _y) do { } while (0)
-#define ASSERT_SENTINEL(_x, _y) do { } while (0)
-#define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0)
-#endif
-
-#define ASSERT_SPINLOCK(_l)	lockdep_assert_held(_l)
-
-/*
- * A pool is the highest-level data structure managed by tmem and
- * usually corresponds to a large independent set of pages such as
- * a filesystem.  Each pool has an id, and certain attributes and counters.
- * It also contains a set of hash buckets, each of which contains an rbtree
- * of objects and a lock to manage concurrency within the pool.
- */
-
-#define TMEM_HASH_BUCKET_BITS	8
-#define TMEM_HASH_BUCKETS	(1<<TMEM_HASH_BUCKET_BITS)
-
-struct tmem_hashbucket {
-	struct rb_root obj_rb_root;
-	spinlock_t lock;
-};
-
-struct tmem_pool {
-	void *client; /* "up" for some clients, avoids table lookup */
-	struct list_head pool_list;
-	uint32_t pool_id;
-	bool persistent;
-	bool shared;
-	atomic_t obj_count;
-	atomic_t refcount;
-	struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS];
-	DECL_SENTINEL
-};
-
-#define is_persistent(_p)  (_p->persistent)
-#define is_ephemeral(_p)   (!(_p->persistent))
-
-/*
- * An object id ("oid") is large: 192-bits (to ensure, for example, files
- * in a modern filesystem can be uniquely identified).
- */
-
-struct tmem_oid {
-	uint64_t oid[3];
-};
-
-static inline void tmem_oid_set_invalid(struct tmem_oid *oidp)
-{
-	oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
-}
-
-static inline bool tmem_oid_valid(struct tmem_oid *oidp)
-{
-	return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL ||
-		oidp->oid[2] != -1UL;
-}
-
-static inline int tmem_oid_compare(struct tmem_oid *left,
-					struct tmem_oid *right)
-{
-	int ret;
-
-	if (left->oid[2] == right->oid[2]) {
-		if (left->oid[1] == right->oid[1]) {
-			if (left->oid[0] == right->oid[0])
-				ret = 0;
-			else if (left->oid[0] < right->oid[0])
-				ret = -1;
-			else
-				return 1;
-		} else if (left->oid[1] < right->oid[1])
-			ret = -1;
-		else
-			ret = 1;
-	} else if (left->oid[2] < right->oid[2])
-		ret = -1;
-	else
-		ret = 1;
-	return ret;
-}
-
-static inline unsigned tmem_oid_hash(struct tmem_oid *oidp)
-{
-	return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
-				TMEM_HASH_BUCKET_BITS);
-}
-
-/*
- * A tmem_obj contains an identifier (oid), pointers to the parent
- * pool and the rb_tree to which it belongs, counters, and an ordered
- * set of pampds, structured in a radix-tree-like tree.  The intermediate
- * nodes of the tree are called tmem_objnodes.
- */
-
-struct tmem_objnode;
-
-struct tmem_obj {
-	struct tmem_oid oid;
-	struct tmem_pool *pool;
-	struct rb_node rb_tree_node;
-	struct tmem_objnode *objnode_tree_root;
-	unsigned int objnode_tree_height;
-	unsigned long objnode_count;
-	long pampd_count;
-	void *extra; /* for private use by pampd implementation */
-	DECL_SENTINEL
-};
-
-#define OBJNODE_TREE_MAP_SHIFT 6
-#define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT)
-#define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1)
-#define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
-#define OBJNODE_TREE_MAX_PATH \
-		(OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2)
-
-struct tmem_objnode {
-	struct tmem_obj *obj;
-	DECL_SENTINEL
-	void *slots[OBJNODE_TREE_MAP_SIZE];
-	unsigned int slots_in_use;
-};
-
-/* pampd abstract datatype methods provided by the PAM implementation */
-struct tmem_pamops {
-	void *(*create)(char *, size_t, bool, int,
-			struct tmem_pool *, struct tmem_oid *, uint32_t);
-	int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *,
-				struct tmem_oid *, uint32_t);
-	int (*get_data_and_free)(char *, size_t *, bool, void *,
-				struct tmem_pool *, struct tmem_oid *,
-				uint32_t);
-	void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t);
-	void (*free_obj)(struct tmem_pool *, struct tmem_obj *);
-	bool (*is_remote)(void *);
-	void (*new_obj)(struct tmem_obj *);
-	int (*replace_in_obj)(void *, struct tmem_obj *);
-};
-extern void tmem_register_pamops(struct tmem_pamops *m);
-
-/* memory allocation methods provided by the host implementation */
-struct tmem_hostops {
-	struct tmem_obj *(*obj_alloc)(struct tmem_pool *);
-	void (*obj_free)(struct tmem_obj *, struct tmem_pool *);
-	struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *);
-	void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *);
-};
-extern void tmem_register_hostops(struct tmem_hostops *m);
-
-/* core tmem accessor functions */
-extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
-			char *, size_t, bool, bool);
-extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
-			char *, size_t *, bool, int);
-extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
-			void *);
-extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
-			uint32_t index);
-extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
-extern int tmem_destroy_pool(struct tmem_pool *);
-extern void tmem_new_pool(struct tmem_pool *, uint32_t);
-#endif /* _TMEM_H */
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@ -112,6 +112,9 @@ enum pageflags {
 #ifdef CONFIG_SCFS_LOWER_PAGECACHE_INVALIDATION
 	PG_scfslower,
 	PG_nocache,
+#endif
+#ifdef CONFIG_ZCACHE
+	PG_was_active,
 #endif
 	__NR_PAGEFLAGS,
 #if defined(CONFIG_CMA_PAGE_COUNTING)
@ -217,6 +220,11 @@ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
 PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)

 __PAGEFLAG(SlobFree, slob_free)
+#ifdef CONFIG_ZCACHE
+PAGEFLAG(WasActive, was_active)
+#else
+PAGEFLAG_FALSE(WasActive)
+#endif

 /*
 * Private page markings that may be used by the filesystem that owns the page
--- a/include/linux/zbud.h
+++ b/include/linux/zbud.h
@ -0,0 +1,22 @@
+#ifndef _ZBUD_H_
+#define _ZBUD_H_
+
+#include <linux/types.h>
+
+struct zbud_pool;
+
+struct zbud_ops {
+	int (*evict)(struct zbud_pool *pool, unsigned long handle);
+};
+
+struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
+void zbud_destroy_pool(struct zbud_pool *pool);
+int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
+	unsigned long *handle);
+void zbud_free(struct zbud_pool *pool, unsigned long handle);
+int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
+void *zbud_map(struct zbud_pool *pool, unsigned long handle);
+void zbud_unmap(struct zbud_pool *pool, unsigned long handle);
+u64 zbud_get_pool_size(struct zbud_pool *pool);
+
+#endif /* _ZBUD_H_ */
--- a/include/linux/zcache.h
+++ b/include/linux/zcache.h
@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef _LINUX_ZCACHE_H
+#define _LINUX_ZCACHE_H
+
+#ifdef CONFIG_ZCACHE
+extern u64 zcache_pages(void);
+#else
+u64 zcache_pages(void) { return 0; }
+#endif
+
+#endif /* _LINUX_ZCACHE_H */
--- a/mm/Kconfig
+++ b/mm/Kconfig
@ -507,11 +507,36 @@ config MIN_DIRTY_THRESH_PAGES
 	  disables this option.)
 	  Do not use it if you unsure.

-
 config MMAP_READAROUND_LIMIT
 	int "Limit mmap readaround upperbound"
 	default 0
 	help
 	  Inappropriate mmap readaround size can hurt device performance
 	  during the sluggish situation. Add the hard upper-limit for
-	  mmap readaround.
+	  mmap readaround.
+	  
+config ZBUD
+	tristate "Low density storage for compressed pages"
+	default n
+	help
+	  A special purpose allocator for storing compressed pages.
+	  It is designed to store up to two compressed pages per physical
+	  page.  While this design limits storage density, it has simple and
+	  deterministic reclaim properties that make it preferable to a higher
+	  density approach when reclaim will be used.	  
+
+config ZCACHE
+	   bool "Compressed cache for file pages (EXPERIMENTAL)"
+	   depends on CRYPTO && CLEANCACHE
+	   select CRYPTO_LZO
+	   select ZBUD
+	   default n
+	   help
+		 A compressed cache for file pages.
+		 It takes active file pages that are in the process of being reclaimed
+		 and attempts to compress them into a dynamically allocated RAM-based
+		 memory pool.
+
+		 If this process is successful, when those file pages needed again, the
+		 I/O reading operation was avoided. This results in a significant performance
+		 gains under memory pressure for systems full with file pages.
--- a/mm/Makefile
+++ b/mm/Makefile
@ -53,3 +53,5 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_ZSMALLOC_NEW) += zsmalloc.o
+obj-$(CONFIG_ZCACHE)    += zcache.o
+obj-$(CONFIG_ZBUD)  += zbud.o
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@ -1303,6 +1303,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
 	while (!list_empty(page_list)) {
 		struct page *page = lru_to_page(page_list);
 		int lru;
+		int file;

 		VM_BUG_ON(PageLRU(page));
 		list_del(&page->lru);
@ -1315,8 +1316,12 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
 		SetPageLRU(page);
 		lru = page_lru(page);
 		add_page_to_lru_list(zone, page, lru);
+
+		file = is_file_lru(lru);
+		if (IS_ENABLED(CONFIG_ZCACHE))
+			if (file)
+				SetPageWasActive(page);
 		if (is_active_lru(lru)) {
-			int file = is_file_lru(lru);
 			int numpages = hpage_nr_pages(page);
 			reclaim_stat->recent_rotated[file] += numpages;
 		}
@ -1612,6 +1617,12 @@ static void shrink_active_list(unsigned long nr_to_scan,
 		}

 		ClearPageActive(page);	/* we are de-activating */
+		if (IS_ENABLED(CONFIG_ZCACHE))
+			/*
+			 * For zcache to know whether the page is from active
+			 * file list
+			 */
+			SetPageWasActive(page);
 		list_add(&page->lru, &l_inactive);
 	}

--- a/mm/zbud.c
+++ b/mm/zbud.c
@ -0,0 +1,527 @@
+/*
+ * zbud.c
+ *
+ * Copyright (C) 2013, Seth Jennings, IBM
+ *
+ * Concepts based on zcache internal zbud allocator by Dan Magenheimer.
+ *
+ * zbud is an special purpose allocator for storing compressed pages.  Contrary
+ * to what its name may suggest, zbud is not a buddy allocator, but rather an
+ * allocator that "buddies" two compressed pages together in a single memory
+ * page.
+ *
+ * While this design limits storage density, it has simple and deterministic
+ * reclaim properties that make it preferable to a higher density approach when
+ * reclaim will be used.
+ *
+ * zbud works by storing compressed pages, or "zpages", together in pairs in a
+ * single memory page called a "zbud page".  The first buddy is "left
+ * justifed" at the beginning of the zbud page, and the last buddy is "right
+ * justified" at the end of the zbud page.  The benefit is that if either
+ * buddy is freed, the freed buddy space, coalesced with whatever slack space
+ * that existed between the buddies, results in the largest possible free region
+ * within the zbud page.
+ *
+ * zbud also provides an attractive lower bound on density. The ratio of zpages
+ * to zbud pages can not be less than 1.  This ensures that zbud can never "do
+ * harm" by using more pages to store zpages than the uncompressed zpages would
+ * have used on their own.
+ *
+ * zbud pages are divided into "chunks".  The size of the chunks is fixed at
+ * compile time and determined by NCHUNKS_ORDER below.  Dividing zbud pages
+ * into chunks allows organizing unbuddied zbud pages into a manageable number
+ * of unbuddied lists according to the number of free chunks available in the
+ * zbud page.
+ *
+ * The zbud API differs from that of conventional allocators in that the
+ * allocation function, zbud_alloc(), returns an opaque handle to the user,
+ * not a dereferenceable pointer.  The user must map the handle using
+ * zbud_map() in order to get a usable pointer by which to access the
+ * allocation data and unmap the handle with zbud_unmap() when operations
+ * on the allocation data are complete.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/zbud.h>
+
+/*****************
+ * Structures
+*****************/
+/*
+ * NCHUNKS_ORDER determines the internal allocation granularity, effectively
+ * adjusting internal fragmentation.  It also determines the number of
+ * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
+ * allocation granularity will be in chunks of size PAGE_SIZE/64, and there
+ * will be 64 freelists per pool.
+ */
+#define NCHUNKS_ORDER	6
+
+#define CHUNK_SHIFT	(PAGE_SHIFT - NCHUNKS_ORDER)
+#define CHUNK_SIZE	(1 << CHUNK_SHIFT)
+#define NCHUNKS		(PAGE_SIZE >> CHUNK_SHIFT)
+#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
+
+/**
+ * struct zbud_pool - stores metadata for each zbud pool
+ * @lock:	protects all pool fields and first|last_chunk fields of any
+ *		zbud page in the pool
+ * @unbuddied:	array of lists tracking zbud pages that only contain one buddy;
+ *		the lists each zbud page is added to depends on the size of
+ *		its free region.
+ * @buddied:	list tracking the zbud pages that contain two buddies;
+ *		these zbud pages are full
+ * @lru:	list tracking the zbud pages in LRU order by most recently
+ *		added buddy.
+ * @pages_nr:	number of zbud pages in the pool.
+ * @ops:	pointer to a structure of user defined operations specified at
+ *		pool creation time.
+ *
+ * This structure is allocated at pool creation time and maintains metadata
+ * pertaining to a particular zbud pool.
+ */
+struct zbud_pool {
+	spinlock_t lock;
+	struct list_head unbuddied[NCHUNKS];
+	struct list_head buddied;
+	struct list_head lru;
+	u64 pages_nr;
+	struct zbud_ops *ops;
+};
+
+/*
+ * struct zbud_header - zbud page metadata occupying the first chunk of each
+ *			zbud page.
+ * @buddy:	links the zbud page into the unbuddied/buddied lists in the pool
+ * @lru:	links the zbud page into the lru list in the pool
+ * @first_chunks:	the size of the first buddy in chunks, 0 if free
+ * @last_chunks:	the size of the last buddy in chunks, 0 if free
+ */
+struct zbud_header {
+	struct list_head buddy;
+	struct list_head lru;
+	unsigned int first_chunks;
+	unsigned int last_chunks;
+	bool under_reclaim;
+};
+
+/*****************
+ * Helpers
+*****************/
+/* Just to make the code easier to read */
+enum buddy {
+	FIRST,
+	LAST
+};
+
+/* Converts an allocation size in bytes to size in zbud chunks */
+static int size_to_chunks(int size)
+{
+	return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
+}
+
+#define for_each_unbuddied_list(_iter, _begin) \
+	for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
+
+/* Initializes the zbud header of a newly allocated zbud page */
+static struct zbud_header *init_zbud_page(struct page *page)
+{
+	struct zbud_header *zhdr = page_address(page);
+	zhdr->first_chunks = 0;
+	zhdr->last_chunks = 0;
+	INIT_LIST_HEAD(&zhdr->buddy);
+	INIT_LIST_HEAD(&zhdr->lru);
+	zhdr->under_reclaim = 0;
+	return zhdr;
+}
+
+/* Resets the struct page fields and frees the page */
+static void free_zbud_page(struct zbud_header *zhdr)
+{
+	__free_page(virt_to_page(zhdr));
+}
+
+/*
+ * Encodes the handle of a particular buddy within a zbud page
+ * Pool lock should be held as this function accesses first|last_chunks
+ */
+static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
+{
+	unsigned long handle;
+
+	/*
+	 * For now, the encoded handle is actually just the pointer to the data
+	 * but this might not always be the case.  A little information hiding.
+	 * Add CHUNK_SIZE to the handle if it is the first allocation to jump
+	 * over the zbud header in the first chunk.
+	 */
+	handle = (unsigned long)zhdr;
+	if (bud == FIRST)
+		/* skip over zbud header */
+		handle += ZHDR_SIZE_ALIGNED;
+	else /* bud == LAST */
+		handle += PAGE_SIZE - (zhdr->last_chunks  << CHUNK_SHIFT);
+	return handle;
+}
+
+/* Returns the zbud page where a given handle is stored */
+static struct zbud_header *handle_to_zbud_header(unsigned long handle)
+{
+	return (struct zbud_header *)(handle & PAGE_MASK);
+}
+
+/* Returns the number of free chunks in a zbud page */
+static int num_free_chunks(struct zbud_header *zhdr)
+{
+	/*
+	 * Rather than branch for different situations, just use the fact that
+	 * free buddies have a length of zero to simplify everything. -1 at the
+	 * end for the zbud header.
+	 */
+	return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks - 1;
+}
+
+/*****************
+ * API Functions
+*****************/
+/**
+ * zbud_create_pool() - create a new zbud pool
+ * @gfp:	gfp flags when allocating the zbud pool structure
+ * @ops:	user-defined operations for the zbud pool
+ *
+ * Return: pointer to the new zbud pool or NULL if the metadata allocation
+ * failed.
+ */
+struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops)
+{
+	struct zbud_pool *pool;
+	int i;
+
+	pool = kmalloc(sizeof(struct zbud_pool), gfp);
+	if (!pool)
+		return NULL;
+	spin_lock_init(&pool->lock);
+	for_each_unbuddied_list(i, 0)
+		INIT_LIST_HEAD(&pool->unbuddied[i]);
+	INIT_LIST_HEAD(&pool->buddied);
+	INIT_LIST_HEAD(&pool->lru);
+	pool->pages_nr = 0;
+	pool->ops = ops;
+	return pool;
+}
+
+/**
+ * zbud_destroy_pool() - destroys an existing zbud pool
+ * @pool:	the zbud pool to be destroyed
+ *
+ * The pool should be emptied before this function is called.
+ */
+void zbud_destroy_pool(struct zbud_pool *pool)
+{
+	kfree(pool);
+}
+
+/**
+ * zbud_alloc() - allocates a region of a given size
+ * @pool:	zbud pool from which to allocate
+ * @size:	size in bytes of the desired allocation
+ * @gfp:	gfp flags used if the pool needs to grow
+ * @handle:	handle of the new allocation
+ *
+ * This function will attempt to find a free region in the pool large enough to
+ * satisfy the allocation request.  A search of the unbuddied lists is
+ * performed first. If no suitable free region is found, then a new page is
+ * allocated and added to the pool to satisfy the request.
+ *
+ * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
+ * as zbud pool pages.
+ *
+ * Return: 0 if success and handle is set, otherwise -EINVAL is the size or
+ * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
+ * a new page.
+ */
+int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
+			unsigned long *handle)
+{
+	int chunks, i, freechunks;
+	struct zbud_header *zhdr = NULL;
+	enum buddy bud;
+	struct page *page;
+
+	if (size <= 0 || gfp & __GFP_HIGHMEM)
+		return -EINVAL;
+	if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED)
+		return -ENOSPC;
+	chunks = size_to_chunks(size);
+	spin_lock(&pool->lock);
+
+	/* First, try to find an unbuddied zbud page. */
+	zhdr = NULL;
+	for_each_unbuddied_list(i, chunks) {
+		if (!list_empty(&pool->unbuddied[i])) {
+			zhdr = list_first_entry(&pool->unbuddied[i],
+					struct zbud_header, buddy);
+			list_del(&zhdr->buddy);
+			if (zhdr->first_chunks == 0)
+				bud = FIRST;
+			else
+				bud = LAST;
+			goto found;
+		}
+	}
+
+	/* Couldn't find unbuddied zbud page, create new one */
+	spin_unlock(&pool->lock);
+	page = alloc_page(gfp);
+	if (!page)
+		return -ENOMEM;
+	spin_lock(&pool->lock);
+	pool->pages_nr++;
+	zhdr = init_zbud_page(page);
+	bud = FIRST;
+
+found:
+	if (bud == FIRST)
+		zhdr->first_chunks = chunks;
+	else
+		zhdr->last_chunks = chunks;
+
+	if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) {
+		/* Add to unbuddied list */
+		freechunks = num_free_chunks(zhdr);
+		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+	} else {
+		/* Add to buddied list */
+		list_add(&zhdr->buddy, &pool->buddied);
+	}
+
+	/* Add/move zbud page to beginning of LRU */
+	if (!list_empty(&zhdr->lru))
+		list_del(&zhdr->lru);
+	list_add(&zhdr->lru, &pool->lru);
+
+	*handle = encode_handle(zhdr, bud);
+	spin_unlock(&pool->lock);
+
+	return 0;
+}
+
+/**
+ * zbud_free() - frees the allocation associated with the given handle
+ * @pool:	pool in which the allocation resided
+ * @handle:	handle associated with the allocation returned by zbud_alloc()
+ *
+ * In the case that the zbud page in which the allocation resides is under
+ * reclaim, as indicated by the PG_reclaim flag being set, this function
+ * only sets the first|last_chunks to 0.  The page is actually freed
+ * once both buddies are evicted (see zbud_reclaim_page() below).
+ */
+void zbud_free(struct zbud_pool *pool, unsigned long handle)
+{
+	struct zbud_header *zhdr;
+	int freechunks;
+
+	spin_lock(&pool->lock);
+	zhdr = handle_to_zbud_header(handle);
+
+	/* If first buddy, handle will be page aligned */
+	if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
+		zhdr->last_chunks = 0;
+	else
+		zhdr->first_chunks = 0;
+
+	if (zhdr->under_reclaim) {
+		/* zbud page is under reclaim, reclaim will free */
+		spin_unlock(&pool->lock);
+		return;
+	}
+
+	/* Remove from existing buddy list */
+	list_del(&zhdr->buddy);
+
+	if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+		/* zbud page is empty, free */
+		list_del(&zhdr->lru);
+		free_zbud_page(zhdr);
+		pool->pages_nr--;
+	} else {
+		/* Add to unbuddied list */
+		freechunks = num_free_chunks(zhdr);
+		list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+	}
+
+	spin_unlock(&pool->lock);
+}
+
+#define list_tail_entry(ptr, type, member) \
+	list_entry((ptr)->prev, type, member)
+
+/**
+ * zbud_reclaim_page() - evicts allocations from a pool page and frees it
+ * @pool:	pool from which a page will attempt to be evicted
+ * @retires:	number of pages on the LRU list for which eviction will
+ *		be attempted before failing
+ *
+ * zbud reclaim is different from normal system reclaim in that the reclaim is
+ * done from the bottom, up.  This is because only the bottom layer, zbud, has
+ * information on how the allocations are organized within each zbud page. This
+ * has the potential to create interesting locking situations between zbud and
+ * the user, however.
+ *
+ * To avoid these, this is how zbud_reclaim_page() should be called:
+
+ * The user detects a page should be reclaimed and calls zbud_reclaim_page().
+ * zbud_reclaim_page() will remove a zbud page from the pool LRU list and call
+ * the user-defined eviction handler with the pool and handle as arguments.
+ *
+ * If the handle can not be evicted, the eviction handler should return
+ * non-zero. zbud_reclaim_page() will add the zbud page back to the
+ * appropriate list and try the next zbud page on the LRU up to
+ * a user defined number of retries.
+ *
+ * If the handle is successfully evicted, the eviction handler should
+ * return 0 _and_ should have called zbud_free() on the handle. zbud_free()
+ * contains logic to delay freeing the page if the page is under reclaim,
+ * as indicated by the setting of the PG_reclaim flag on the underlying page.
+ *
+ * If all buddies in the zbud page are successfully evicted, then the
+ * zbud page can be freed.
+ *
+ * Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
+ * no pages to evict or an eviction handler is not registered, -EAGAIN if
+ * the retry limit was hit.
+ */
+int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
+{
+	int i, ret, freechunks;
+	struct zbud_header *zhdr;
+	unsigned long first_handle = 0, last_handle = 0;
+
+	spin_lock(&pool->lock);
+	if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
+			retries == 0) {
+		spin_unlock(&pool->lock);
+		return -EINVAL;
+	}
+	for (i = 0; i < retries; i++) {
+		zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
+		list_del(&zhdr->lru);
+		list_del(&zhdr->buddy);
+		/* Protect zbud page against free */
+		zhdr->under_reclaim = true;
+		/*
+		 * We need encode the handles before unlocking, since we can
+		 * race with free that will set (first|last)_chunks to 0
+		 */
+		first_handle = 0;
+		last_handle = 0;
+		if (zhdr->first_chunks)
+			first_handle = encode_handle(zhdr, FIRST);
+		if (zhdr->last_chunks)
+			last_handle = encode_handle(zhdr, LAST);
+		spin_unlock(&pool->lock);
+
+		/* Issue the eviction callback(s) */
+		if (first_handle) {
+			ret = pool->ops->evict(pool, first_handle);
+			if (ret)
+				goto next;
+		}
+		if (last_handle) {
+			ret = pool->ops->evict(pool, last_handle);
+			if (ret)
+				goto next;
+		}
+next:
+		spin_lock(&pool->lock);
+		zhdr->under_reclaim = false;
+		if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
+			/*
+			 * Both buddies are now free, free the zbud page and
+			 * return success.
+			 */
+			free_zbud_page(zhdr);
+			pool->pages_nr--;
+			spin_unlock(&pool->lock);
+			return 0;
+		} else if (zhdr->first_chunks == 0 ||
+				zhdr->last_chunks == 0) {
+			/* add to unbuddied list */
+			freechunks = num_free_chunks(zhdr);
+			list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+		} else {
+			/* add to buddied list */
+			list_add(&zhdr->buddy, &pool->buddied);
+		}
+
+		/* add to beginning of LRU */
+		list_add(&zhdr->lru, &pool->lru);
+	}
+	spin_unlock(&pool->lock);
+	return -EAGAIN;
+}
+
+/**
+ * zbud_map() - maps the allocation associated with the given handle
+ * @pool:	pool in which the allocation resides
+ * @handle:	handle associated with the allocation to be mapped
+ *
+ * While trivial for zbud, the mapping functions for others allocators
+ * implementing this allocation API could have more complex information encoded
+ * in the handle and could create temporary mappings to make the data
+ * accessible to the user.
+ *
+ * Returns: a pointer to the mapped allocation
+ */
+void *zbud_map(struct zbud_pool *pool, unsigned long handle)
+{
+	return (void *)(handle);
+}
+
+/**
+ * zbud_unmap() - maps the allocation associated with the given handle
+ * @pool:	pool in which the allocation resides
+ * @handle:	handle associated with the allocation to be unmapped
+ */
+void zbud_unmap(struct zbud_pool *pool, unsigned long handle)
+{
+}
+
+/**
+ * zbud_get_pool_size() - gets the zbud pool size in pages
+ * @pool:	pool whose size is being queried
+ *
+ * Returns: size in pages of the given pool.  The pool lock need not be
+ * taken to access pages_nr.
+ */
+u64 zbud_get_pool_size(struct zbud_pool *pool)
+{
+	return pool->pages_nr;
+}
+
+static int __init init_zbud(void)
+{
+	/* Make sure the zbud header will fit in one chunk */
+	BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
+	pr_info("loaded\n");
+	return 0;
+}
+
+static void __exit exit_zbud(void)
+{
+	pr_info("unloaded\n");
+}
+
+module_init(init_zbud);
+module_exit(exit_zbud);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("Buddy Allocator for Compressed Pages");
--- a/mm/zcache.c
+++ b/mm/zcache.c