mm: Backport ZCache from QC kernel 3.18
Change-Id: I3edff3a56cf6525f13430ab93309272d1faecfe1 Signed-off-by: Kevin F. Haggerty <haggertk@lineageos.org>
This commit is contained in:
parent
c780c66637
commit
b23d0a2d3c
|
@ -82,8 +82,6 @@ source "drivers/staging/iio/Kconfig"
|
|||
|
||||
source "drivers/staging/zram/Kconfig"
|
||||
|
||||
source "drivers/staging/zcache/Kconfig"
|
||||
|
||||
source "drivers/staging/zsmalloc/Kconfig"
|
||||
|
||||
source "drivers/staging/wlags49_h2/Kconfig"
|
||||
|
|
|
@ -33,7 +33,6 @@ obj-$(CONFIG_VME_BUS) += vme/
|
|||
obj-$(CONFIG_DX_SEP) += sep/
|
||||
obj-$(CONFIG_IIO) += iio/
|
||||
obj-$(CONFIG_ZRAM) += zram/
|
||||
obj-$(CONFIG_ZCACHE) += zcache/
|
||||
obj-$(CONFIG_ZSMALLOC) += zsmalloc/
|
||||
obj-$(CONFIG_WLAGS49_H2) += wlags49_h2/
|
||||
obj-$(CONFIG_WLAGS49_H25) += wlags49_h25/
|
||||
|
|
|
@ -41,6 +41,7 @@
|
|||
#include <linux/delay.h>
|
||||
#include <linux/swap.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/zcache.h>
|
||||
|
||||
#include <linux/ratelimit.h>
|
||||
|
||||
|
@ -199,7 +200,7 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
|
|||
is_active_high = (global_page_state(NR_ACTIVE_FILE) >
|
||||
global_page_state(NR_INACTIVE_FILE)) ? 1 : 0;
|
||||
#endif
|
||||
other_file = global_page_state(NR_FILE_PAGES);
|
||||
other_file = global_page_state(NR_FILE_PAGES) + zcache_pages();
|
||||
|
||||
#if defined(CONFIG_CMA_PAGE_COUNTING) && defined(CONFIG_EXCLUDE_LRU_LIVING_IN_CMA)
|
||||
if (get_nr_swap_pages() < SSWAP_LMK_THRESHOLD && cma_page_ratio >= CMA_PAGE_RATIO
|
||||
|
@ -446,7 +447,7 @@ static int android_oom_handler(struct notifier_block *nb,
|
|||
|
||||
nr_cma_inactive_file = global_page_state(NR_CMA_INACTIVE_FILE);
|
||||
nr_cma_active_file = global_page_state(NR_CMA_ACTIVE_FILE);
|
||||
other_file = global_page_state(NR_FILE_PAGES) -
|
||||
other_file = global_page_state(NR_FILE_PAGES) + zcache_pages() -
|
||||
global_page_state(NR_SHMEM) -
|
||||
total_swapcache_pages -
|
||||
nr_cma_inactive_file -
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
config ZCACHE
|
||||
bool "Dynamic compression of swap pages and clean pagecache pages"
|
||||
# X86 dependency is because zsmalloc uses non-portable pte/tlb
|
||||
# functions
|
||||
depends on (CLEANCACHE || FRONTSWAP) && CRYPTO=y && X86
|
||||
select ZSMALLOC
|
||||
select CRYPTO_LZO
|
||||
default n
|
||||
help
|
||||
Zcache doubles RAM efficiency while providing a significant
|
||||
performance boosts on many workloads. Zcache uses
|
||||
compression and an in-kernel implementation of transcendent
|
||||
memory to store clean page cache pages and swap in RAM,
|
||||
providing a noticeable reduction in disk I/O.
|
|
@ -1,3 +0,0 @@
|
|||
zcache-y := zcache-main.o tmem.o
|
||||
|
||||
obj-$(CONFIG_ZCACHE) += zcache.o
|
|
@ -1,770 +0,0 @@
|
|||
/*
|
||||
* In-kernel transcendent memory (generic implementation)
|
||||
*
|
||||
* Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
|
||||
*
|
||||
* The primary purpose of Transcedent Memory ("tmem") is to map object-oriented
|
||||
* "handles" (triples containing a pool id, and object id, and an index), to
|
||||
* pages in a page-accessible memory (PAM). Tmem references the PAM pages via
|
||||
* an abstract "pampd" (PAM page-descriptor), which can be operated on by a
|
||||
* set of functions (pamops). Each pampd contains some representation of
|
||||
* PAGE_SIZE bytes worth of data. Tmem must support potentially millions of
|
||||
* pages and must be able to insert, find, and delete these pages at a
|
||||
* potential frequency of thousands per second concurrently across many CPUs,
|
||||
* (and, if used with KVM, across many vcpus across many guests).
|
||||
* Tmem is tracked with a hierarchy of data structures, organized by
|
||||
* the elements in a handle-tuple: pool_id, object_id, and page index.
|
||||
* One or more "clients" (e.g. guests) each provide one or more tmem_pools.
|
||||
* Each pool, contains a hash table of rb_trees of tmem_objs. Each
|
||||
* tmem_obj contains a radix-tree-like tree of pointers, with intermediate
|
||||
* nodes called tmem_objnodes. Each leaf pointer in this tree points to
|
||||
* a pampd, which is accessible only through a small set of callbacks
|
||||
* registered by the PAM implementation (see tmem_register_pamops). Tmem
|
||||
* does all memory allocation via a set of callbacks registered by the tmem
|
||||
* host implementation (e.g. see tmem_register_hostops).
|
||||
*/
|
||||
|
||||
#include <linux/list.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
#include "tmem.h"
|
||||
|
||||
/* data structure sentinels used for debugging... see tmem.h */
|
||||
#define POOL_SENTINEL 0x87658765
|
||||
#define OBJ_SENTINEL 0x12345678
|
||||
#define OBJNODE_SENTINEL 0xfedcba09
|
||||
|
||||
/*
|
||||
* A tmem host implementation must use this function to register callbacks
|
||||
* for memory allocation.
|
||||
*/
|
||||
static struct tmem_hostops tmem_hostops;
|
||||
|
||||
static void tmem_objnode_tree_init(void);
|
||||
|
||||
void tmem_register_hostops(struct tmem_hostops *m)
|
||||
{
|
||||
tmem_objnode_tree_init();
|
||||
tmem_hostops = *m;
|
||||
}
|
||||
|
||||
/*
|
||||
* A tmem host implementation must use this function to register
|
||||
* callbacks for a page-accessible memory (PAM) implementation
|
||||
*/
|
||||
static struct tmem_pamops tmem_pamops;
|
||||
|
||||
void tmem_register_pamops(struct tmem_pamops *m)
|
||||
{
|
||||
tmem_pamops = *m;
|
||||
}
|
||||
|
||||
/*
|
||||
* Oid's are potentially very sparse and tmem_objs may have an indeterminately
|
||||
* short life, being added and deleted at a relatively high frequency.
|
||||
* So an rb_tree is an ideal data structure to manage tmem_objs. But because
|
||||
* of the potentially huge number of tmem_objs, each pool manages a hashtable
|
||||
* of rb_trees to reduce search, insert, delete, and rebalancing time.
|
||||
* Each hashbucket also has a lock to manage concurrent access.
|
||||
*
|
||||
* The following routines manage tmem_objs. When any tmem_obj is accessed,
|
||||
* the hashbucket lock must be held.
|
||||
*/
|
||||
|
||||
/* searches for object==oid in pool, returns locked object if found */
|
||||
static struct tmem_obj *tmem_obj_find(struct tmem_hashbucket *hb,
|
||||
struct tmem_oid *oidp)
|
||||
{
|
||||
struct rb_node *rbnode;
|
||||
struct tmem_obj *obj;
|
||||
|
||||
rbnode = hb->obj_rb_root.rb_node;
|
||||
while (rbnode) {
|
||||
BUG_ON(RB_EMPTY_NODE(rbnode));
|
||||
obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
|
||||
switch (tmem_oid_compare(oidp, &obj->oid)) {
|
||||
case 0: /* equal */
|
||||
goto out;
|
||||
case -1:
|
||||
rbnode = rbnode->rb_left;
|
||||
break;
|
||||
case 1:
|
||||
rbnode = rbnode->rb_right;
|
||||
break;
|
||||
}
|
||||
}
|
||||
obj = NULL;
|
||||
out:
|
||||
return obj;
|
||||
}
|
||||
|
||||
static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *);
|
||||
|
||||
/* free an object that has no more pampds in it */
|
||||
static void tmem_obj_free(struct tmem_obj *obj, struct tmem_hashbucket *hb)
|
||||
{
|
||||
struct tmem_pool *pool;
|
||||
|
||||
BUG_ON(obj == NULL);
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pampd_count > 0);
|
||||
pool = obj->pool;
|
||||
BUG_ON(pool == NULL);
|
||||
if (obj->objnode_tree_root != NULL) /* may be "stump" with no leaves */
|
||||
tmem_pampd_destroy_all_in_obj(obj);
|
||||
BUG_ON(obj->objnode_tree_root != NULL);
|
||||
BUG_ON((long)obj->objnode_count != 0);
|
||||
atomic_dec(&pool->obj_count);
|
||||
BUG_ON(atomic_read(&pool->obj_count) < 0);
|
||||
INVERT_SENTINEL(obj, OBJ);
|
||||
obj->pool = NULL;
|
||||
tmem_oid_set_invalid(&obj->oid);
|
||||
rb_erase(&obj->rb_tree_node, &hb->obj_rb_root);
|
||||
}
|
||||
|
||||
/*
|
||||
* initialize, and insert an tmem_object_root (called only if find failed)
|
||||
*/
|
||||
static void tmem_obj_init(struct tmem_obj *obj, struct tmem_hashbucket *hb,
|
||||
struct tmem_pool *pool,
|
||||
struct tmem_oid *oidp)
|
||||
{
|
||||
struct rb_root *root = &hb->obj_rb_root;
|
||||
struct rb_node **new = &(root->rb_node), *parent = NULL;
|
||||
struct tmem_obj *this;
|
||||
|
||||
BUG_ON(pool == NULL);
|
||||
atomic_inc(&pool->obj_count);
|
||||
obj->objnode_tree_height = 0;
|
||||
obj->objnode_tree_root = NULL;
|
||||
obj->pool = pool;
|
||||
obj->oid = *oidp;
|
||||
obj->objnode_count = 0;
|
||||
obj->pampd_count = 0;
|
||||
(*tmem_pamops.new_obj)(obj);
|
||||
SET_SENTINEL(obj, OBJ);
|
||||
while (*new) {
|
||||
BUG_ON(RB_EMPTY_NODE(*new));
|
||||
this = rb_entry(*new, struct tmem_obj, rb_tree_node);
|
||||
parent = *new;
|
||||
switch (tmem_oid_compare(oidp, &this->oid)) {
|
||||
case 0:
|
||||
BUG(); /* already present; should never happen! */
|
||||
break;
|
||||
case -1:
|
||||
new = &(*new)->rb_left;
|
||||
break;
|
||||
case 1:
|
||||
new = &(*new)->rb_right;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rb_link_node(&obj->rb_tree_node, parent, new);
|
||||
rb_insert_color(&obj->rb_tree_node, root);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tmem is managed as a set of tmem_pools with certain attributes, such as
|
||||
* "ephemeral" vs "persistent". These attributes apply to all tmem_objs
|
||||
* and all pampds that belong to a tmem_pool. A tmem_pool is created
|
||||
* or deleted relatively rarely (for example, when a filesystem is
|
||||
* mounted or unmounted.
|
||||
*/
|
||||
|
||||
/* flush all data from a pool and, optionally, free it */
|
||||
static void tmem_pool_flush(struct tmem_pool *pool, bool destroy)
|
||||
{
|
||||
struct rb_node *rbnode;
|
||||
struct tmem_obj *obj;
|
||||
struct tmem_hashbucket *hb = &pool->hashbucket[0];
|
||||
int i;
|
||||
|
||||
BUG_ON(pool == NULL);
|
||||
for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
|
||||
spin_lock(&hb->lock);
|
||||
rbnode = rb_first(&hb->obj_rb_root);
|
||||
while (rbnode != NULL) {
|
||||
obj = rb_entry(rbnode, struct tmem_obj, rb_tree_node);
|
||||
rbnode = rb_next(rbnode);
|
||||
tmem_pampd_destroy_all_in_obj(obj);
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
}
|
||||
spin_unlock(&hb->lock);
|
||||
}
|
||||
if (destroy)
|
||||
list_del(&pool->pool_list);
|
||||
}
|
||||
|
||||
/*
|
||||
* A tmem_obj contains a radix-tree-like tree in which the intermediate
|
||||
* nodes are called tmem_objnodes. (The kernel lib/radix-tree.c implementation
|
||||
* is very specialized and tuned for specific uses and is not particularly
|
||||
* suited for use from this code, though some code from the core algorithms has
|
||||
* been reused, thus the copyright notices below). Each tmem_objnode contains
|
||||
* a set of pointers which point to either a set of intermediate tmem_objnodes
|
||||
* or a set of of pampds.
|
||||
*
|
||||
* Portions Copyright (C) 2001 Momchil Velikov
|
||||
* Portions Copyright (C) 2001 Christoph Hellwig
|
||||
* Portions Copyright (C) 2005 SGI, Christoph Lameter <clameter@sgi.com>
|
||||
*/
|
||||
|
||||
struct tmem_objnode_tree_path {
|
||||
struct tmem_objnode *objnode;
|
||||
int offset;
|
||||
};
|
||||
|
||||
/* objnode height_to_maxindex translation */
|
||||
static unsigned long tmem_objnode_tree_h2max[OBJNODE_TREE_MAX_PATH + 1];
|
||||
|
||||
static void tmem_objnode_tree_init(void)
|
||||
{
|
||||
unsigned int ht, tmp;
|
||||
|
||||
for (ht = 0; ht < ARRAY_SIZE(tmem_objnode_tree_h2max); ht++) {
|
||||
tmp = ht * OBJNODE_TREE_MAP_SHIFT;
|
||||
if (tmp >= OBJNODE_TREE_INDEX_BITS)
|
||||
tmem_objnode_tree_h2max[ht] = ~0UL;
|
||||
else
|
||||
tmem_objnode_tree_h2max[ht] =
|
||||
(~0UL >> (OBJNODE_TREE_INDEX_BITS - tmp - 1)) >> 1;
|
||||
}
|
||||
}
|
||||
|
||||
static struct tmem_objnode *tmem_objnode_alloc(struct tmem_obj *obj)
|
||||
{
|
||||
struct tmem_objnode *objnode;
|
||||
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pool == NULL);
|
||||
ASSERT_SENTINEL(obj->pool, POOL);
|
||||
objnode = (*tmem_hostops.objnode_alloc)(obj->pool);
|
||||
if (unlikely(objnode == NULL))
|
||||
goto out;
|
||||
objnode->obj = obj;
|
||||
SET_SENTINEL(objnode, OBJNODE);
|
||||
memset(&objnode->slots, 0, sizeof(objnode->slots));
|
||||
objnode->slots_in_use = 0;
|
||||
obj->objnode_count++;
|
||||
out:
|
||||
return objnode;
|
||||
}
|
||||
|
||||
static void tmem_objnode_free(struct tmem_objnode *objnode)
|
||||
{
|
||||
struct tmem_pool *pool;
|
||||
int i;
|
||||
|
||||
BUG_ON(objnode == NULL);
|
||||
for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++)
|
||||
BUG_ON(objnode->slots[i] != NULL);
|
||||
ASSERT_SENTINEL(objnode, OBJNODE);
|
||||
INVERT_SENTINEL(objnode, OBJNODE);
|
||||
BUG_ON(objnode->obj == NULL);
|
||||
ASSERT_SENTINEL(objnode->obj, OBJ);
|
||||
pool = objnode->obj->pool;
|
||||
BUG_ON(pool == NULL);
|
||||
ASSERT_SENTINEL(pool, POOL);
|
||||
objnode->obj->objnode_count--;
|
||||
objnode->obj = NULL;
|
||||
(*tmem_hostops.objnode_free)(objnode, pool);
|
||||
}
|
||||
|
||||
/*
|
||||
* lookup index in object and return associated pampd (or NULL if not found)
|
||||
*/
|
||||
static void **__tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
|
||||
{
|
||||
unsigned int height, shift;
|
||||
struct tmem_objnode **slot = NULL;
|
||||
|
||||
BUG_ON(obj == NULL);
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pool == NULL);
|
||||
ASSERT_SENTINEL(obj->pool, POOL);
|
||||
|
||||
height = obj->objnode_tree_height;
|
||||
if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height])
|
||||
goto out;
|
||||
if (height == 0 && obj->objnode_tree_root) {
|
||||
slot = &obj->objnode_tree_root;
|
||||
goto out;
|
||||
}
|
||||
shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
|
||||
slot = &obj->objnode_tree_root;
|
||||
while (height > 0) {
|
||||
if (*slot == NULL)
|
||||
goto out;
|
||||
slot = (struct tmem_objnode **)
|
||||
((*slot)->slots +
|
||||
((index >> shift) & OBJNODE_TREE_MAP_MASK));
|
||||
shift -= OBJNODE_TREE_MAP_SHIFT;
|
||||
height--;
|
||||
}
|
||||
out:
|
||||
return slot != NULL ? (void **)slot : NULL;
|
||||
}
|
||||
|
||||
static void *tmem_pampd_lookup_in_obj(struct tmem_obj *obj, uint32_t index)
|
||||
{
|
||||
struct tmem_objnode **slot;
|
||||
|
||||
slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
|
||||
return slot != NULL ? *slot : NULL;
|
||||
}
|
||||
|
||||
static void *tmem_pampd_replace_in_obj(struct tmem_obj *obj, uint32_t index,
|
||||
void *new_pampd)
|
||||
{
|
||||
struct tmem_objnode **slot;
|
||||
void *ret = NULL;
|
||||
|
||||
slot = (struct tmem_objnode **)__tmem_pampd_lookup_in_obj(obj, index);
|
||||
if ((slot != NULL) && (*slot != NULL)) {
|
||||
void *old_pampd = *(void **)slot;
|
||||
*(void **)slot = new_pampd;
|
||||
(*tmem_pamops.free)(old_pampd, obj->pool, NULL, 0);
|
||||
ret = new_pampd;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int tmem_pampd_add_to_obj(struct tmem_obj *obj, uint32_t index,
|
||||
void *pampd)
|
||||
{
|
||||
int ret = 0;
|
||||
struct tmem_objnode *objnode = NULL, *newnode, *slot;
|
||||
unsigned int height, shift;
|
||||
int offset = 0;
|
||||
|
||||
/* if necessary, extend the tree to be higher */
|
||||
if (index > tmem_objnode_tree_h2max[obj->objnode_tree_height]) {
|
||||
height = obj->objnode_tree_height + 1;
|
||||
if (index > tmem_objnode_tree_h2max[height])
|
||||
while (index > tmem_objnode_tree_h2max[height])
|
||||
height++;
|
||||
if (obj->objnode_tree_root == NULL) {
|
||||
obj->objnode_tree_height = height;
|
||||
goto insert;
|
||||
}
|
||||
do {
|
||||
newnode = tmem_objnode_alloc(obj);
|
||||
if (!newnode) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
newnode->slots[0] = obj->objnode_tree_root;
|
||||
newnode->slots_in_use = 1;
|
||||
obj->objnode_tree_root = newnode;
|
||||
obj->objnode_tree_height++;
|
||||
} while (height > obj->objnode_tree_height);
|
||||
}
|
||||
insert:
|
||||
slot = obj->objnode_tree_root;
|
||||
height = obj->objnode_tree_height;
|
||||
shift = (height-1) * OBJNODE_TREE_MAP_SHIFT;
|
||||
while (height > 0) {
|
||||
if (slot == NULL) {
|
||||
/* add a child objnode. */
|
||||
slot = tmem_objnode_alloc(obj);
|
||||
if (!slot) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
if (objnode) {
|
||||
|
||||
objnode->slots[offset] = slot;
|
||||
objnode->slots_in_use++;
|
||||
} else
|
||||
obj->objnode_tree_root = slot;
|
||||
}
|
||||
/* go down a level */
|
||||
offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
|
||||
objnode = slot;
|
||||
slot = objnode->slots[offset];
|
||||
shift -= OBJNODE_TREE_MAP_SHIFT;
|
||||
height--;
|
||||
}
|
||||
BUG_ON(slot != NULL);
|
||||
if (objnode) {
|
||||
objnode->slots_in_use++;
|
||||
objnode->slots[offset] = pampd;
|
||||
} else
|
||||
obj->objnode_tree_root = pampd;
|
||||
obj->pampd_count++;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void *tmem_pampd_delete_from_obj(struct tmem_obj *obj, uint32_t index)
|
||||
{
|
||||
struct tmem_objnode_tree_path path[OBJNODE_TREE_MAX_PATH + 1];
|
||||
struct tmem_objnode_tree_path *pathp = path;
|
||||
struct tmem_objnode *slot = NULL;
|
||||
unsigned int height, shift;
|
||||
int offset;
|
||||
|
||||
BUG_ON(obj == NULL);
|
||||
ASSERT_SENTINEL(obj, OBJ);
|
||||
BUG_ON(obj->pool == NULL);
|
||||
ASSERT_SENTINEL(obj->pool, POOL);
|
||||
height = obj->objnode_tree_height;
|
||||
if (index > tmem_objnode_tree_h2max[height])
|
||||
goto out;
|
||||
slot = obj->objnode_tree_root;
|
||||
if (height == 0 && obj->objnode_tree_root) {
|
||||
obj->objnode_tree_root = NULL;
|
||||
goto out;
|
||||
}
|
||||
shift = (height - 1) * OBJNODE_TREE_MAP_SHIFT;
|
||||
pathp->objnode = NULL;
|
||||
do {
|
||||
if (slot == NULL)
|
||||
goto out;
|
||||
pathp++;
|
||||
offset = (index >> shift) & OBJNODE_TREE_MAP_MASK;
|
||||
pathp->offset = offset;
|
||||
pathp->objnode = slot;
|
||||
slot = slot->slots[offset];
|
||||
shift -= OBJNODE_TREE_MAP_SHIFT;
|
||||
height--;
|
||||
} while (height > 0);
|
||||
if (slot == NULL)
|
||||
goto out;
|
||||
while (pathp->objnode) {
|
||||
pathp->objnode->slots[pathp->offset] = NULL;
|
||||
pathp->objnode->slots_in_use--;
|
||||
if (pathp->objnode->slots_in_use) {
|
||||
if (pathp->objnode == obj->objnode_tree_root) {
|
||||
while (obj->objnode_tree_height > 0 &&
|
||||
obj->objnode_tree_root->slots_in_use == 1 &&
|
||||
obj->objnode_tree_root->slots[0]) {
|
||||
struct tmem_objnode *to_free =
|
||||
obj->objnode_tree_root;
|
||||
|
||||
obj->objnode_tree_root =
|
||||
to_free->slots[0];
|
||||
obj->objnode_tree_height--;
|
||||
to_free->slots[0] = NULL;
|
||||
to_free->slots_in_use = 0;
|
||||
tmem_objnode_free(to_free);
|
||||
}
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
tmem_objnode_free(pathp->objnode); /* 0 slots used, free it */
|
||||
pathp--;
|
||||
}
|
||||
obj->objnode_tree_height = 0;
|
||||
obj->objnode_tree_root = NULL;
|
||||
|
||||
out:
|
||||
if (slot != NULL)
|
||||
obj->pampd_count--;
|
||||
BUG_ON(obj->pampd_count < 0);
|
||||
return slot;
|
||||
}
|
||||
|
||||
/* recursively walk the objnode_tree destroying pampds and objnodes */
|
||||
static void tmem_objnode_node_destroy(struct tmem_obj *obj,
|
||||
struct tmem_objnode *objnode,
|
||||
unsigned int ht)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (ht == 0)
|
||||
return;
|
||||
for (i = 0; i < OBJNODE_TREE_MAP_SIZE; i++) {
|
||||
if (objnode->slots[i]) {
|
||||
if (ht == 1) {
|
||||
obj->pampd_count--;
|
||||
(*tmem_pamops.free)(objnode->slots[i],
|
||||
obj->pool, NULL, 0);
|
||||
objnode->slots[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
tmem_objnode_node_destroy(obj, objnode->slots[i], ht-1);
|
||||
tmem_objnode_free(objnode->slots[i]);
|
||||
objnode->slots[i] = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void tmem_pampd_destroy_all_in_obj(struct tmem_obj *obj)
|
||||
{
|
||||
if (obj->objnode_tree_root == NULL)
|
||||
return;
|
||||
if (obj->objnode_tree_height == 0) {
|
||||
obj->pampd_count--;
|
||||
(*tmem_pamops.free)(obj->objnode_tree_root, obj->pool, NULL, 0);
|
||||
} else {
|
||||
tmem_objnode_node_destroy(obj, obj->objnode_tree_root,
|
||||
obj->objnode_tree_height);
|
||||
tmem_objnode_free(obj->objnode_tree_root);
|
||||
obj->objnode_tree_height = 0;
|
||||
}
|
||||
obj->objnode_tree_root = NULL;
|
||||
(*tmem_pamops.free_obj)(obj->pool, obj);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tmem is operated on by a set of well-defined actions:
|
||||
* "put", "get", "flush", "flush_object", "new pool" and "destroy pool".
|
||||
* (The tmem ABI allows for subpages and exchanges but these operations
|
||||
* are not included in this implementation.)
|
||||
*
|
||||
* These "tmem core" operations are implemented in the following functions.
|
||||
*/
|
||||
|
||||
/*
|
||||
* "Put" a page, e.g. copy a page from the kernel into newly allocated
|
||||
* PAM space (if such space is available). Tmem_put is complicated by
|
||||
* a corner case: What if a page with matching handle already exists in
|
||||
* tmem? To guarantee coherency, one of two actions is necessary: Either
|
||||
* the data for the page must be overwritten, or the page must be
|
||||
* "flushed" so that the data is not accessible to a subsequent "get".
|
||||
* Since these "duplicate puts" are relatively rare, this implementation
|
||||
* always flushes for simplicity.
|
||||
*/
|
||||
int tmem_put(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
|
||||
char *data, size_t size, bool raw, bool ephemeral)
|
||||
{
|
||||
struct tmem_obj *obj = NULL, *objfound = NULL, *objnew = NULL;
|
||||
void *pampd = NULL, *pampd_del = NULL;
|
||||
int ret = -ENOMEM;
|
||||
struct tmem_hashbucket *hb;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = objfound = tmem_obj_find(hb, oidp);
|
||||
if (obj != NULL) {
|
||||
pampd = tmem_pampd_lookup_in_obj(objfound, index);
|
||||
if (pampd != NULL) {
|
||||
/* if found, is a dup put, flush the old one */
|
||||
pampd_del = tmem_pampd_delete_from_obj(obj, index);
|
||||
BUG_ON(pampd_del != pampd);
|
||||
(*tmem_pamops.free)(pampd, pool, oidp, index);
|
||||
if (obj->pampd_count == 0) {
|
||||
objnew = obj;
|
||||
objfound = NULL;
|
||||
}
|
||||
pampd = NULL;
|
||||
}
|
||||
} else {
|
||||
obj = objnew = (*tmem_hostops.obj_alloc)(pool);
|
||||
if (unlikely(obj == NULL)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
tmem_obj_init(obj, hb, pool, oidp);
|
||||
}
|
||||
BUG_ON(obj == NULL);
|
||||
BUG_ON(((objnew != obj) && (objfound != obj)) || (objnew == objfound));
|
||||
pampd = (*tmem_pamops.create)(data, size, raw, ephemeral,
|
||||
obj->pool, &obj->oid, index);
|
||||
if (unlikely(pampd == NULL))
|
||||
goto free;
|
||||
ret = tmem_pampd_add_to_obj(obj, index, pampd);
|
||||
if (unlikely(ret == -ENOMEM))
|
||||
/* may have partially built objnode tree ("stump") */
|
||||
goto delete_and_free;
|
||||
goto out;
|
||||
|
||||
delete_and_free:
|
||||
(void)tmem_pampd_delete_from_obj(obj, index);
|
||||
free:
|
||||
if (pampd)
|
||||
(*tmem_pamops.free)(pampd, pool, NULL, 0);
|
||||
if (objnew) {
|
||||
tmem_obj_free(objnew, hb);
|
||||
(*tmem_hostops.obj_free)(objnew, pool);
|
||||
}
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Get" a page, e.g. if one can be found, copy the tmem page with the
|
||||
* matching handle from PAM space to the kernel. By tmem definition,
|
||||
* when a "get" is successful on an ephemeral page, the page is "flushed",
|
||||
* and when a "get" is successful on a persistent page, the page is retained
|
||||
* in tmem. Note that to preserve
|
||||
* coherency, "get" can never be skipped if tmem contains the data.
|
||||
* That is, if a get is done with a certain handle and fails, any
|
||||
* subsequent "get" must also fail (unless of course there is a
|
||||
* "put" done with the same handle).
|
||||
|
||||
*/
|
||||
int tmem_get(struct tmem_pool *pool, struct tmem_oid *oidp, uint32_t index,
|
||||
char *data, size_t *size, bool raw, int get_and_free)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
void *pampd;
|
||||
bool ephemeral = is_ephemeral(pool);
|
||||
int ret = -1;
|
||||
struct tmem_hashbucket *hb;
|
||||
bool free = (get_and_free == 1) || ((get_and_free == 0) && ephemeral);
|
||||
bool lock_held = false;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
lock_held = true;
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
if (free)
|
||||
pampd = tmem_pampd_delete_from_obj(obj, index);
|
||||
else
|
||||
pampd = tmem_pampd_lookup_in_obj(obj, index);
|
||||
if (pampd == NULL)
|
||||
goto out;
|
||||
if (free) {
|
||||
if (obj->pampd_count == 0) {
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
obj = NULL;
|
||||
}
|
||||
}
|
||||
if (tmem_pamops.is_remote(pampd)) {
|
||||
lock_held = false;
|
||||
spin_unlock(&hb->lock);
|
||||
}
|
||||
if (free)
|
||||
ret = (*tmem_pamops.get_data_and_free)(
|
||||
data, size, raw, pampd, pool, oidp, index);
|
||||
else
|
||||
ret = (*tmem_pamops.get_data)(
|
||||
data, size, raw, pampd, pool, oidp, index);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = 0;
|
||||
out:
|
||||
if (lock_held)
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a page in tmem matches the handle, "flush" this page from tmem such
|
||||
* that any subsequent "get" does not succeed (unless, of course, there
|
||||
* was another "put" with the same handle).
|
||||
*/
|
||||
int tmem_flush_page(struct tmem_pool *pool,
|
||||
struct tmem_oid *oidp, uint32_t index)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
void *pampd;
|
||||
int ret = -1;
|
||||
struct tmem_hashbucket *hb;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
pampd = tmem_pampd_delete_from_obj(obj, index);
|
||||
if (pampd == NULL)
|
||||
goto out;
|
||||
(*tmem_pamops.free)(pampd, pool, oidp, index);
|
||||
if (obj->pampd_count == 0) {
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If a page in tmem matches the handle, replace the page so that any
|
||||
* subsequent "get" gets the new page. Returns 0 if
|
||||
* there was a page to replace, else returns -1.
|
||||
*/
|
||||
int tmem_replace(struct tmem_pool *pool, struct tmem_oid *oidp,
|
||||
uint32_t index, void *new_pampd)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
int ret = -1;
|
||||
struct tmem_hashbucket *hb;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
new_pampd = tmem_pampd_replace_in_obj(obj, index, new_pampd);
|
||||
ret = (*tmem_pamops.replace_in_obj)(new_pampd, obj);
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Flush" all pages in tmem matching this oid.
|
||||
*/
|
||||
int tmem_flush_object(struct tmem_pool *pool, struct tmem_oid *oidp)
|
||||
{
|
||||
struct tmem_obj *obj;
|
||||
struct tmem_hashbucket *hb;
|
||||
int ret = -1;
|
||||
|
||||
hb = &pool->hashbucket[tmem_oid_hash(oidp)];
|
||||
spin_lock(&hb->lock);
|
||||
obj = tmem_obj_find(hb, oidp);
|
||||
if (obj == NULL)
|
||||
goto out;
|
||||
tmem_pampd_destroy_all_in_obj(obj);
|
||||
tmem_obj_free(obj, hb);
|
||||
(*tmem_hostops.obj_free)(obj, pool);
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
spin_unlock(&hb->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* "Flush" all pages (and tmem_objs) from this tmem_pool and disable
|
||||
* all subsequent access to this tmem_pool.
|
||||
*/
|
||||
int tmem_destroy_pool(struct tmem_pool *pool)
|
||||
{
|
||||
int ret = -1;
|
||||
|
||||
if (pool == NULL)
|
||||
goto out;
|
||||
tmem_pool_flush(pool, 1);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static LIST_HEAD(tmem_global_pool_list);
|
||||
|
||||
/*
|
||||
* Create a new tmem_pool with the provided flag and return
|
||||
* a pool id provided by the tmem host implementation.
|
||||
*/
|
||||
void tmem_new_pool(struct tmem_pool *pool, uint32_t flags)
|
||||
{
|
||||
int persistent = flags & TMEM_POOL_PERSIST;
|
||||
int shared = flags & TMEM_POOL_SHARED;
|
||||
struct tmem_hashbucket *hb = &pool->hashbucket[0];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < TMEM_HASH_BUCKETS; i++, hb++) {
|
||||
hb->obj_rb_root = RB_ROOT;
|
||||
spin_lock_init(&hb->lock);
|
||||
}
|
||||
INIT_LIST_HEAD(&pool->pool_list);
|
||||
atomic_set(&pool->obj_count, 0);
|
||||
SET_SENTINEL(pool, POOL);
|
||||
list_add_tail(&pool->pool_list, &tmem_global_pool_list);
|
||||
pool->persistent = persistent;
|
||||
pool->shared = shared;
|
||||
}
|
|
@ -1,206 +0,0 @@
|
|||
/*
|
||||
* tmem.h
|
||||
*
|
||||
* Transcendent memory
|
||||
*
|
||||
* Copyright (c) 2009-2011, Dan Magenheimer, Oracle Corp.
|
||||
*/
|
||||
|
||||
#ifndef _TMEM_H_
|
||||
#define _TMEM_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/highmem.h>
|
||||
#include <linux/hash.h>
|
||||
#include <linux/atomic.h>
|
||||
|
||||
/*
|
||||
* These are pre-defined by the Xen<->Linux ABI
|
||||
*/
|
||||
#define TMEM_PUT_PAGE 4
|
||||
#define TMEM_GET_PAGE 5
|
||||
#define TMEM_FLUSH_PAGE 6
|
||||
#define TMEM_FLUSH_OBJECT 7
|
||||
#define TMEM_POOL_PERSIST 1
|
||||
#define TMEM_POOL_SHARED 2
|
||||
#define TMEM_POOL_PRECOMPRESSED 4
|
||||
#define TMEM_POOL_PAGESIZE_SHIFT 4
|
||||
#define TMEM_POOL_PAGESIZE_MASK 0xf
|
||||
#define TMEM_POOL_RESERVED_BITS 0x00ffff00
|
||||
|
||||
/*
|
||||
* sentinels have proven very useful for debugging but can be removed
|
||||
* or disabled before final merge.
|
||||
*/
|
||||
#define SENTINELS
|
||||
#ifdef SENTINELS
|
||||
#define DECL_SENTINEL uint32_t sentinel;
|
||||
#define SET_SENTINEL(_x, _y) (_x->sentinel = _y##_SENTINEL)
|
||||
#define INVERT_SENTINEL(_x, _y) (_x->sentinel = ~_y##_SENTINEL)
|
||||
#define ASSERT_SENTINEL(_x, _y) WARN_ON(_x->sentinel != _y##_SENTINEL)
|
||||
#define ASSERT_INVERTED_SENTINEL(_x, _y) WARN_ON(_x->sentinel != ~_y##_SENTINEL)
|
||||
#else
|
||||
#define DECL_SENTINEL
|
||||
#define SET_SENTINEL(_x, _y) do { } while (0)
|
||||
#define INVERT_SENTINEL(_x, _y) do { } while (0)
|
||||
#define ASSERT_SENTINEL(_x, _y) do { } while (0)
|
||||
#define ASSERT_INVERTED_SENTINEL(_x, _y) do { } while (0)
|
||||
#endif
|
||||
|
||||
#define ASSERT_SPINLOCK(_l) lockdep_assert_held(_l)
|
||||
|
||||
/*
|
||||
* A pool is the highest-level data structure managed by tmem and
|
||||
* usually corresponds to a large independent set of pages such as
|
||||
* a filesystem. Each pool has an id, and certain attributes and counters.
|
||||
* It also contains a set of hash buckets, each of which contains an rbtree
|
||||
* of objects and a lock to manage concurrency within the pool.
|
||||
*/
|
||||
|
||||
#define TMEM_HASH_BUCKET_BITS 8
|
||||
#define TMEM_HASH_BUCKETS (1<<TMEM_HASH_BUCKET_BITS)
|
||||
|
||||
struct tmem_hashbucket {
|
||||
struct rb_root obj_rb_root;
|
||||
spinlock_t lock;
|
||||
};
|
||||
|
||||
struct tmem_pool {
|
||||
void *client; /* "up" for some clients, avoids table lookup */
|
||||
struct list_head pool_list;
|
||||
uint32_t pool_id;
|
||||
bool persistent;
|
||||
bool shared;
|
||||
atomic_t obj_count;
|
||||
atomic_t refcount;
|
||||
struct tmem_hashbucket hashbucket[TMEM_HASH_BUCKETS];
|
||||
DECL_SENTINEL
|
||||
};
|
||||
|
||||
#define is_persistent(_p) (_p->persistent)
|
||||
#define is_ephemeral(_p) (!(_p->persistent))
|
||||
|
||||
/*
|
||||
* An object id ("oid") is large: 192-bits (to ensure, for example, files
|
||||
* in a modern filesystem can be uniquely identified).
|
||||
*/
|
||||
|
||||
struct tmem_oid {
|
||||
uint64_t oid[3];
|
||||
};
|
||||
|
||||
static inline void tmem_oid_set_invalid(struct tmem_oid *oidp)
|
||||
{
|
||||
oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL;
|
||||
}
|
||||
|
||||
static inline bool tmem_oid_valid(struct tmem_oid *oidp)
|
||||
{
|
||||
return oidp->oid[0] != -1UL || oidp->oid[1] != -1UL ||
|
||||
oidp->oid[2] != -1UL;
|
||||
}
|
||||
|
||||
static inline int tmem_oid_compare(struct tmem_oid *left,
|
||||
struct tmem_oid *right)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (left->oid[2] == right->oid[2]) {
|
||||
if (left->oid[1] == right->oid[1]) {
|
||||
if (left->oid[0] == right->oid[0])
|
||||
ret = 0;
|
||||
else if (left->oid[0] < right->oid[0])
|
||||
ret = -1;
|
||||
else
|
||||
return 1;
|
||||
} else if (left->oid[1] < right->oid[1])
|
||||
ret = -1;
|
||||
else
|
||||
ret = 1;
|
||||
} else if (left->oid[2] < right->oid[2])
|
||||
ret = -1;
|
||||
else
|
||||
ret = 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned tmem_oid_hash(struct tmem_oid *oidp)
|
||||
{
|
||||
return hash_long(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2],
|
||||
TMEM_HASH_BUCKET_BITS);
|
||||
}
|
||||
|
||||
/*
|
||||
* A tmem_obj contains an identifier (oid), pointers to the parent
|
||||
* pool and the rb_tree to which it belongs, counters, and an ordered
|
||||
* set of pampds, structured in a radix-tree-like tree. The intermediate
|
||||
* nodes of the tree are called tmem_objnodes.
|
||||
*/
|
||||
|
||||
struct tmem_objnode;
|
||||
|
||||
struct tmem_obj {
|
||||
struct tmem_oid oid;
|
||||
struct tmem_pool *pool;
|
||||
struct rb_node rb_tree_node;
|
||||
struct tmem_objnode *objnode_tree_root;
|
||||
unsigned int objnode_tree_height;
|
||||
unsigned long objnode_count;
|
||||
long pampd_count;
|
||||
void *extra; /* for private use by pampd implementation */
|
||||
DECL_SENTINEL
|
||||
};
|
||||
|
||||
#define OBJNODE_TREE_MAP_SHIFT 6
|
||||
#define OBJNODE_TREE_MAP_SIZE (1UL << OBJNODE_TREE_MAP_SHIFT)
|
||||
#define OBJNODE_TREE_MAP_MASK (OBJNODE_TREE_MAP_SIZE-1)
|
||||
#define OBJNODE_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
|
||||
#define OBJNODE_TREE_MAX_PATH \
|
||||
(OBJNODE_TREE_INDEX_BITS/OBJNODE_TREE_MAP_SHIFT + 2)
|
||||
|
||||
struct tmem_objnode {
|
||||
struct tmem_obj *obj;
|
||||
DECL_SENTINEL
|
||||
void *slots[OBJNODE_TREE_MAP_SIZE];
|
||||
unsigned int slots_in_use;
|
||||
};
|
||||
|
||||
/* pampd abstract datatype methods provided by the PAM implementation */
|
||||
struct tmem_pamops {
|
||||
void *(*create)(char *, size_t, bool, int,
|
||||
struct tmem_pool *, struct tmem_oid *, uint32_t);
|
||||
int (*get_data)(char *, size_t *, bool, void *, struct tmem_pool *,
|
||||
struct tmem_oid *, uint32_t);
|
||||
int (*get_data_and_free)(char *, size_t *, bool, void *,
|
||||
struct tmem_pool *, struct tmem_oid *,
|
||||
uint32_t);
|
||||
void (*free)(void *, struct tmem_pool *, struct tmem_oid *, uint32_t);
|
||||
void (*free_obj)(struct tmem_pool *, struct tmem_obj *);
|
||||
bool (*is_remote)(void *);
|
||||
void (*new_obj)(struct tmem_obj *);
|
||||
int (*replace_in_obj)(void *, struct tmem_obj *);
|
||||
};
|
||||
extern void tmem_register_pamops(struct tmem_pamops *m);
|
||||
|
||||
/* memory allocation methods provided by the host implementation */
|
||||
struct tmem_hostops {
|
||||
struct tmem_obj *(*obj_alloc)(struct tmem_pool *);
|
||||
void (*obj_free)(struct tmem_obj *, struct tmem_pool *);
|
||||
struct tmem_objnode *(*objnode_alloc)(struct tmem_pool *);
|
||||
void (*objnode_free)(struct tmem_objnode *, struct tmem_pool *);
|
||||
};
|
||||
extern void tmem_register_hostops(struct tmem_hostops *m);
|
||||
|
||||
/* core tmem accessor functions */
|
||||
extern int tmem_put(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
||||
char *, size_t, bool, bool);
|
||||
extern int tmem_get(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
||||
char *, size_t *, bool, int);
|
||||
extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
|
||||
void *);
|
||||
extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
|
||||
uint32_t index);
|
||||
extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
|
||||
extern int tmem_destroy_pool(struct tmem_pool *);
|
||||
extern void tmem_new_pool(struct tmem_pool *, uint32_t);
|
||||
#endif /* _TMEM_H */
|
File diff suppressed because it is too large
Load Diff
|
@ -112,6 +112,9 @@ enum pageflags {
|
|||
#ifdef CONFIG_SCFS_LOWER_PAGECACHE_INVALIDATION
|
||||
PG_scfslower,
|
||||
PG_nocache,
|
||||
#endif
|
||||
#ifdef CONFIG_ZCACHE
|
||||
PG_was_active,
|
||||
#endif
|
||||
__NR_PAGEFLAGS,
|
||||
#if defined(CONFIG_CMA_PAGE_COUNTING)
|
||||
|
@ -217,6 +220,11 @@ PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
|
|||
PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
|
||||
|
||||
__PAGEFLAG(SlobFree, slob_free)
|
||||
#ifdef CONFIG_ZCACHE
|
||||
PAGEFLAG(WasActive, was_active)
|
||||
#else
|
||||
PAGEFLAG_FALSE(WasActive)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Private page markings that may be used by the filesystem that owns the page
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
#ifndef _ZBUD_H_
|
||||
#define _ZBUD_H_
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
struct zbud_pool;
|
||||
|
||||
struct zbud_ops {
|
||||
int (*evict)(struct zbud_pool *pool, unsigned long handle);
|
||||
};
|
||||
|
||||
struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops);
|
||||
void zbud_destroy_pool(struct zbud_pool *pool);
|
||||
int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
|
||||
unsigned long *handle);
|
||||
void zbud_free(struct zbud_pool *pool, unsigned long handle);
|
||||
int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries);
|
||||
void *zbud_map(struct zbud_pool *pool, unsigned long handle);
|
||||
void zbud_unmap(struct zbud_pool *pool, unsigned long handle);
|
||||
u64 zbud_get_pool_size(struct zbud_pool *pool);
|
||||
|
||||
#endif /* _ZBUD_H_ */
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Copyright (c) 2015, The Linux Foundation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 and
|
||||
* only version 2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*/
|
||||
#ifndef _LINUX_ZCACHE_H
|
||||
#define _LINUX_ZCACHE_H
|
||||
|
||||
#ifdef CONFIG_ZCACHE
|
||||
extern u64 zcache_pages(void);
|
||||
#else
|
||||
u64 zcache_pages(void) { return 0; }
|
||||
#endif
|
||||
|
||||
#endif /* _LINUX_ZCACHE_H */
|
29
mm/Kconfig
29
mm/Kconfig
|
@ -507,11 +507,36 @@ config MIN_DIRTY_THRESH_PAGES
|
|||
disables this option.)
|
||||
Do not use it if you unsure.
|
||||
|
||||
|
||||
config MMAP_READAROUND_LIMIT
|
||||
int "Limit mmap readaround upperbound"
|
||||
default 0
|
||||
help
|
||||
Inappropriate mmap readaround size can hurt device performance
|
||||
during the sluggish situation. Add the hard upper-limit for
|
||||
mmap readaround.
|
||||
mmap readaround.
|
||||
|
||||
config ZBUD
|
||||
tristate "Low density storage for compressed pages"
|
||||
default n
|
||||
help
|
||||
A special purpose allocator for storing compressed pages.
|
||||
It is designed to store up to two compressed pages per physical
|
||||
page. While this design limits storage density, it has simple and
|
||||
deterministic reclaim properties that make it preferable to a higher
|
||||
density approach when reclaim will be used.
|
||||
|
||||
config ZCACHE
|
||||
bool "Compressed cache for file pages (EXPERIMENTAL)"
|
||||
depends on CRYPTO && CLEANCACHE
|
||||
select CRYPTO_LZO
|
||||
select ZBUD
|
||||
default n
|
||||
help
|
||||
A compressed cache for file pages.
|
||||
It takes active file pages that are in the process of being reclaimed
|
||||
and attempts to compress them into a dynamically allocated RAM-based
|
||||
memory pool.
|
||||
|
||||
If this process is successful, when those file pages needed again, the
|
||||
I/O reading operation was avoided. This results in a significant performance
|
||||
gains under memory pressure for systems full with file pages.
|
||||
|
|
|
@ -53,3 +53,5 @@ obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
|
|||
obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
|
||||
obj-$(CONFIG_CLEANCACHE) += cleancache.o
|
||||
obj-$(CONFIG_ZSMALLOC_NEW) += zsmalloc.o
|
||||
obj-$(CONFIG_ZCACHE) += zcache.o
|
||||
obj-$(CONFIG_ZBUD) += zbud.o
|
||||
|
|
13
mm/vmscan.c
13
mm/vmscan.c
|
@ -1303,6 +1303,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
|
|||
while (!list_empty(page_list)) {
|
||||
struct page *page = lru_to_page(page_list);
|
||||
int lru;
|
||||
int file;
|
||||
|
||||
VM_BUG_ON(PageLRU(page));
|
||||
list_del(&page->lru);
|
||||
|
@ -1315,8 +1316,12 @@ putback_inactive_pages(struct mem_cgroup_zone *mz,
|
|||
SetPageLRU(page);
|
||||
lru = page_lru(page);
|
||||
add_page_to_lru_list(zone, page, lru);
|
||||
|
||||
file = is_file_lru(lru);
|
||||
if (IS_ENABLED(CONFIG_ZCACHE))
|
||||
if (file)
|
||||
SetPageWasActive(page);
|
||||
if (is_active_lru(lru)) {
|
||||
int file = is_file_lru(lru);
|
||||
int numpages = hpage_nr_pages(page);
|
||||
reclaim_stat->recent_rotated[file] += numpages;
|
||||
}
|
||||
|
@ -1612,6 +1617,12 @@ static void shrink_active_list(unsigned long nr_to_scan,
|
|||
}
|
||||
|
||||
ClearPageActive(page); /* we are de-activating */
|
||||
if (IS_ENABLED(CONFIG_ZCACHE))
|
||||
/*
|
||||
* For zcache to know whether the page is from active
|
||||
* file list
|
||||
*/
|
||||
SetPageWasActive(page);
|
||||
list_add(&page->lru, &l_inactive);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,527 @@
|
|||
/*
|
||||
* zbud.c
|
||||
*
|
||||
* Copyright (C) 2013, Seth Jennings, IBM
|
||||
*
|
||||
* Concepts based on zcache internal zbud allocator by Dan Magenheimer.
|
||||
*
|
||||
* zbud is an special purpose allocator for storing compressed pages. Contrary
|
||||
* to what its name may suggest, zbud is not a buddy allocator, but rather an
|
||||
* allocator that "buddies" two compressed pages together in a single memory
|
||||
* page.
|
||||
*
|
||||
* While this design limits storage density, it has simple and deterministic
|
||||
* reclaim properties that make it preferable to a higher density approach when
|
||||
* reclaim will be used.
|
||||
*
|
||||
* zbud works by storing compressed pages, or "zpages", together in pairs in a
|
||||
* single memory page called a "zbud page". The first buddy is "left
|
||||
* justifed" at the beginning of the zbud page, and the last buddy is "right
|
||||
* justified" at the end of the zbud page. The benefit is that if either
|
||||
* buddy is freed, the freed buddy space, coalesced with whatever slack space
|
||||
* that existed between the buddies, results in the largest possible free region
|
||||
* within the zbud page.
|
||||
*
|
||||
* zbud also provides an attractive lower bound on density. The ratio of zpages
|
||||
* to zbud pages can not be less than 1. This ensures that zbud can never "do
|
||||
* harm" by using more pages to store zpages than the uncompressed zpages would
|
||||
* have used on their own.
|
||||
*
|
||||
* zbud pages are divided into "chunks". The size of the chunks is fixed at
|
||||
* compile time and determined by NCHUNKS_ORDER below. Dividing zbud pages
|
||||
* into chunks allows organizing unbuddied zbud pages into a manageable number
|
||||
* of unbuddied lists according to the number of free chunks available in the
|
||||
* zbud page.
|
||||
*
|
||||
* The zbud API differs from that of conventional allocators in that the
|
||||
* allocation function, zbud_alloc(), returns an opaque handle to the user,
|
||||
* not a dereferenceable pointer. The user must map the handle using
|
||||
* zbud_map() in order to get a usable pointer by which to access the
|
||||
* allocation data and unmap the handle with zbud_unmap() when operations
|
||||
* on the allocation data are complete.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/zbud.h>
|
||||
|
||||
/*****************
|
||||
* Structures
|
||||
*****************/
|
||||
/*
|
||||
* NCHUNKS_ORDER determines the internal allocation granularity, effectively
|
||||
* adjusting internal fragmentation. It also determines the number of
|
||||
* freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
|
||||
* allocation granularity will be in chunks of size PAGE_SIZE/64, and there
|
||||
* will be 64 freelists per pool.
|
||||
*/
|
||||
#define NCHUNKS_ORDER 6
|
||||
|
||||
#define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER)
|
||||
#define CHUNK_SIZE (1 << CHUNK_SHIFT)
|
||||
#define NCHUNKS (PAGE_SIZE >> CHUNK_SHIFT)
|
||||
#define ZHDR_SIZE_ALIGNED CHUNK_SIZE
|
||||
|
||||
/**
|
||||
* struct zbud_pool - stores metadata for each zbud pool
|
||||
* @lock: protects all pool fields and first|last_chunk fields of any
|
||||
* zbud page in the pool
|
||||
* @unbuddied: array of lists tracking zbud pages that only contain one buddy;
|
||||
* the lists each zbud page is added to depends on the size of
|
||||
* its free region.
|
||||
* @buddied: list tracking the zbud pages that contain two buddies;
|
||||
* these zbud pages are full
|
||||
* @lru: list tracking the zbud pages in LRU order by most recently
|
||||
* added buddy.
|
||||
* @pages_nr: number of zbud pages in the pool.
|
||||
* @ops: pointer to a structure of user defined operations specified at
|
||||
* pool creation time.
|
||||
*
|
||||
* This structure is allocated at pool creation time and maintains metadata
|
||||
* pertaining to a particular zbud pool.
|
||||
*/
|
||||
struct zbud_pool {
|
||||
spinlock_t lock;
|
||||
struct list_head unbuddied[NCHUNKS];
|
||||
struct list_head buddied;
|
||||
struct list_head lru;
|
||||
u64 pages_nr;
|
||||
struct zbud_ops *ops;
|
||||
};
|
||||
|
||||
/*
|
||||
* struct zbud_header - zbud page metadata occupying the first chunk of each
|
||||
* zbud page.
|
||||
* @buddy: links the zbud page into the unbuddied/buddied lists in the pool
|
||||
* @lru: links the zbud page into the lru list in the pool
|
||||
* @first_chunks: the size of the first buddy in chunks, 0 if free
|
||||
* @last_chunks: the size of the last buddy in chunks, 0 if free
|
||||
*/
|
||||
struct zbud_header {
|
||||
struct list_head buddy;
|
||||
struct list_head lru;
|
||||
unsigned int first_chunks;
|
||||
unsigned int last_chunks;
|
||||
bool under_reclaim;
|
||||
};
|
||||
|
||||
/*****************
|
||||
* Helpers
|
||||
*****************/
|
||||
/* Just to make the code easier to read */
|
||||
enum buddy {
|
||||
FIRST,
|
||||
LAST
|
||||
};
|
||||
|
||||
/* Converts an allocation size in bytes to size in zbud chunks */
|
||||
static int size_to_chunks(int size)
|
||||
{
|
||||
return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
|
||||
}
|
||||
|
||||
#define for_each_unbuddied_list(_iter, _begin) \
|
||||
for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
|
||||
|
||||
/* Initializes the zbud header of a newly allocated zbud page */
|
||||
static struct zbud_header *init_zbud_page(struct page *page)
|
||||
{
|
||||
struct zbud_header *zhdr = page_address(page);
|
||||
zhdr->first_chunks = 0;
|
||||
zhdr->last_chunks = 0;
|
||||
INIT_LIST_HEAD(&zhdr->buddy);
|
||||
INIT_LIST_HEAD(&zhdr->lru);
|
||||
zhdr->under_reclaim = 0;
|
||||
return zhdr;
|
||||
}
|
||||
|
||||
/* Resets the struct page fields and frees the page */
|
||||
static void free_zbud_page(struct zbud_header *zhdr)
|
||||
{
|
||||
__free_page(virt_to_page(zhdr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Encodes the handle of a particular buddy within a zbud page
|
||||
* Pool lock should be held as this function accesses first|last_chunks
|
||||
*/
|
||||
static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
|
||||
{
|
||||
unsigned long handle;
|
||||
|
||||
/*
|
||||
* For now, the encoded handle is actually just the pointer to the data
|
||||
* but this might not always be the case. A little information hiding.
|
||||
* Add CHUNK_SIZE to the handle if it is the first allocation to jump
|
||||
* over the zbud header in the first chunk.
|
||||
*/
|
||||
handle = (unsigned long)zhdr;
|
||||
if (bud == FIRST)
|
||||
/* skip over zbud header */
|
||||
handle += ZHDR_SIZE_ALIGNED;
|
||||
else /* bud == LAST */
|
||||
handle += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
|
||||
return handle;
|
||||
}
|
||||
|
||||
/* Returns the zbud page where a given handle is stored */
|
||||
static struct zbud_header *handle_to_zbud_header(unsigned long handle)
|
||||
{
|
||||
return (struct zbud_header *)(handle & PAGE_MASK);
|
||||
}
|
||||
|
||||
/* Returns the number of free chunks in a zbud page */
|
||||
static int num_free_chunks(struct zbud_header *zhdr)
|
||||
{
|
||||
/*
|
||||
* Rather than branch for different situations, just use the fact that
|
||||
* free buddies have a length of zero to simplify everything. -1 at the
|
||||
* end for the zbud header.
|
||||
*/
|
||||
return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks - 1;
|
||||
}
|
||||
|
||||
/*****************
|
||||
* API Functions
|
||||
*****************/
|
||||
/**
|
||||
* zbud_create_pool() - create a new zbud pool
|
||||
* @gfp: gfp flags when allocating the zbud pool structure
|
||||
* @ops: user-defined operations for the zbud pool
|
||||
*
|
||||
* Return: pointer to the new zbud pool or NULL if the metadata allocation
|
||||
* failed.
|
||||
*/
|
||||
struct zbud_pool *zbud_create_pool(gfp_t gfp, struct zbud_ops *ops)
|
||||
{
|
||||
struct zbud_pool *pool;
|
||||
int i;
|
||||
|
||||
pool = kmalloc(sizeof(struct zbud_pool), gfp);
|
||||
if (!pool)
|
||||
return NULL;
|
||||
spin_lock_init(&pool->lock);
|
||||
for_each_unbuddied_list(i, 0)
|
||||
INIT_LIST_HEAD(&pool->unbuddied[i]);
|
||||
INIT_LIST_HEAD(&pool->buddied);
|
||||
INIT_LIST_HEAD(&pool->lru);
|
||||
pool->pages_nr = 0;
|
||||
pool->ops = ops;
|
||||
return pool;
|
||||
}
|
||||
|
||||
/**
|
||||
* zbud_destroy_pool() - destroys an existing zbud pool
|
||||
* @pool: the zbud pool to be destroyed
|
||||
*
|
||||
* The pool should be emptied before this function is called.
|
||||
*/
|
||||
void zbud_destroy_pool(struct zbud_pool *pool)
|
||||
{
|
||||
kfree(pool);
|
||||
}
|
||||
|
||||
/**
|
||||
* zbud_alloc() - allocates a region of a given size
|
||||
* @pool: zbud pool from which to allocate
|
||||
* @size: size in bytes of the desired allocation
|
||||
* @gfp: gfp flags used if the pool needs to grow
|
||||
* @handle: handle of the new allocation
|
||||
*
|
||||
* This function will attempt to find a free region in the pool large enough to
|
||||
* satisfy the allocation request. A search of the unbuddied lists is
|
||||
* performed first. If no suitable free region is found, then a new page is
|
||||
* allocated and added to the pool to satisfy the request.
|
||||
*
|
||||
* gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
|
||||
* as zbud pool pages.
|
||||
*
|
||||
* Return: 0 if success and handle is set, otherwise -EINVAL is the size or
|
||||
* gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
|
||||
* a new page.
|
||||
*/
|
||||
int zbud_alloc(struct zbud_pool *pool, int size, gfp_t gfp,
|
||||
unsigned long *handle)
|
||||
{
|
||||
int chunks, i, freechunks;
|
||||
struct zbud_header *zhdr = NULL;
|
||||
enum buddy bud;
|
||||
struct page *page;
|
||||
|
||||
if (size <= 0 || gfp & __GFP_HIGHMEM)
|
||||
return -EINVAL;
|
||||
if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED)
|
||||
return -ENOSPC;
|
||||
chunks = size_to_chunks(size);
|
||||
spin_lock(&pool->lock);
|
||||
|
||||
/* First, try to find an unbuddied zbud page. */
|
||||
zhdr = NULL;
|
||||
for_each_unbuddied_list(i, chunks) {
|
||||
if (!list_empty(&pool->unbuddied[i])) {
|
||||
zhdr = list_first_entry(&pool->unbuddied[i],
|
||||
struct zbud_header, buddy);
|
||||
list_del(&zhdr->buddy);
|
||||
if (zhdr->first_chunks == 0)
|
||||
bud = FIRST;
|
||||
else
|
||||
bud = LAST;
|
||||
goto found;
|
||||
}
|
||||
}
|
||||
|
||||
/* Couldn't find unbuddied zbud page, create new one */
|
||||
spin_unlock(&pool->lock);
|
||||
page = alloc_page(gfp);
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
spin_lock(&pool->lock);
|
||||
pool->pages_nr++;
|
||||
zhdr = init_zbud_page(page);
|
||||
bud = FIRST;
|
||||
|
||||
found:
|
||||
if (bud == FIRST)
|
||||
zhdr->first_chunks = chunks;
|
||||
else
|
||||
zhdr->last_chunks = chunks;
|
||||
|
||||
if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) {
|
||||
/* Add to unbuddied list */
|
||||
freechunks = num_free_chunks(zhdr);
|
||||
list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
|
||||
} else {
|
||||
/* Add to buddied list */
|
||||
list_add(&zhdr->buddy, &pool->buddied);
|
||||
}
|
||||
|
||||
/* Add/move zbud page to beginning of LRU */
|
||||
if (!list_empty(&zhdr->lru))
|
||||
list_del(&zhdr->lru);
|
||||
list_add(&zhdr->lru, &pool->lru);
|
||||
|
||||
*handle = encode_handle(zhdr, bud);
|
||||
spin_unlock(&pool->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* zbud_free() - frees the allocation associated with the given handle
|
||||
* @pool: pool in which the allocation resided
|
||||
* @handle: handle associated with the allocation returned by zbud_alloc()
|
||||
*
|
||||
* In the case that the zbud page in which the allocation resides is under
|
||||
* reclaim, as indicated by the PG_reclaim flag being set, this function
|
||||
* only sets the first|last_chunks to 0. The page is actually freed
|
||||
* once both buddies are evicted (see zbud_reclaim_page() below).
|
||||
*/
|
||||
void zbud_free(struct zbud_pool *pool, unsigned long handle)
|
||||
{
|
||||
struct zbud_header *zhdr;
|
||||
int freechunks;
|
||||
|
||||
spin_lock(&pool->lock);
|
||||
zhdr = handle_to_zbud_header(handle);
|
||||
|
||||
/* If first buddy, handle will be page aligned */
|
||||
if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
|
||||
zhdr->last_chunks = 0;
|
||||
else
|
||||
zhdr->first_chunks = 0;
|
||||
|
||||
if (zhdr->under_reclaim) {
|
||||
/* zbud page is under reclaim, reclaim will free */
|
||||
spin_unlock(&pool->lock);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Remove from existing buddy list */
|
||||
list_del(&zhdr->buddy);
|
||||
|
||||
if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
|
||||
/* zbud page is empty, free */
|
||||
list_del(&zhdr->lru);
|
||||
free_zbud_page(zhdr);
|
||||
pool->pages_nr--;
|
||||
} else {
|
||||
/* Add to unbuddied list */
|
||||
freechunks = num_free_chunks(zhdr);
|
||||
list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
|
||||
}
|
||||
|
||||
spin_unlock(&pool->lock);
|
||||
}
|
||||
|
||||
#define list_tail_entry(ptr, type, member) \
|
||||
list_entry((ptr)->prev, type, member)
|
||||
|
||||
/**
|
||||
* zbud_reclaim_page() - evicts allocations from a pool page and frees it
|
||||
* @pool: pool from which a page will attempt to be evicted
|
||||
* @retires: number of pages on the LRU list for which eviction will
|
||||
* be attempted before failing
|
||||
*
|
||||
* zbud reclaim is different from normal system reclaim in that the reclaim is
|
||||
* done from the bottom, up. This is because only the bottom layer, zbud, has
|
||||
* information on how the allocations are organized within each zbud page. This
|
||||
* has the potential to create interesting locking situations between zbud and
|
||||
* the user, however.
|
||||
*
|
||||
* To avoid these, this is how zbud_reclaim_page() should be called:
|
||||
|
||||
* The user detects a page should be reclaimed and calls zbud_reclaim_page().
|
||||
* zbud_reclaim_page() will remove a zbud page from the pool LRU list and call
|
||||
* the user-defined eviction handler with the pool and handle as arguments.
|
||||
*
|
||||
* If the handle can not be evicted, the eviction handler should return
|
||||
* non-zero. zbud_reclaim_page() will add the zbud page back to the
|
||||
* appropriate list and try the next zbud page on the LRU up to
|
||||
* a user defined number of retries.
|
||||
*
|
||||
* If the handle is successfully evicted, the eviction handler should
|
||||
* return 0 _and_ should have called zbud_free() on the handle. zbud_free()
|
||||
* contains logic to delay freeing the page if the page is under reclaim,
|
||||
* as indicated by the setting of the PG_reclaim flag on the underlying page.
|
||||
*
|
||||
* If all buddies in the zbud page are successfully evicted, then the
|
||||
* zbud page can be freed.
|
||||
*
|
||||
* Returns: 0 if page is successfully freed, otherwise -EINVAL if there are
|
||||
* no pages to evict or an eviction handler is not registered, -EAGAIN if
|
||||
* the retry limit was hit.
|
||||
*/
|
||||
int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
|
||||
{
|
||||
int i, ret, freechunks;
|
||||
struct zbud_header *zhdr;
|
||||
unsigned long first_handle = 0, last_handle = 0;
|
||||
|
||||
spin_lock(&pool->lock);
|
||||
if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
|
||||
retries == 0) {
|
||||
spin_unlock(&pool->lock);
|
||||
return -EINVAL;
|
||||
}
|
||||
for (i = 0; i < retries; i++) {
|
||||
zhdr = list_tail_entry(&pool->lru, struct zbud_header, lru);
|
||||
list_del(&zhdr->lru);
|
||||
list_del(&zhdr->buddy);
|
||||
/* Protect zbud page against free */
|
||||
zhdr->under_reclaim = true;
|
||||
/*
|
||||
* We need encode the handles before unlocking, since we can
|
||||
* race with free that will set (first|last)_chunks to 0
|
||||
*/
|
||||
first_handle = 0;
|
||||
last_handle = 0;
|
||||
if (zhdr->first_chunks)
|
||||
first_handle = encode_handle(zhdr, FIRST);
|
||||
if (zhdr->last_chunks)
|
||||
last_handle = encode_handle(zhdr, LAST);
|
||||
spin_unlock(&pool->lock);
|
||||
|
||||
/* Issue the eviction callback(s) */
|
||||
if (first_handle) {
|
||||
ret = pool->ops->evict(pool, first_handle);
|
||||
if (ret)
|
||||
goto next;
|
||||
}
|
||||
if (last_handle) {
|
||||
ret = pool->ops->evict(pool, last_handle);
|
||||
if (ret)
|
||||
goto next;
|
||||
}
|
||||
next:
|
||||
spin_lock(&pool->lock);
|
||||
zhdr->under_reclaim = false;
|
||||
if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
|
||||
/*
|
||||
* Both buddies are now free, free the zbud page and
|
||||
* return success.
|
||||
*/
|
||||
free_zbud_page(zhdr);
|
||||
pool->pages_nr--;
|
||||
spin_unlock(&pool->lock);
|
||||
return 0;
|
||||
} else if (zhdr->first_chunks == 0 ||
|
||||
zhdr->last_chunks == 0) {
|
||||
/* add to unbuddied list */
|
||||
freechunks = num_free_chunks(zhdr);
|
||||
list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
|
||||
} else {
|
||||
/* add to buddied list */
|
||||
list_add(&zhdr->buddy, &pool->buddied);
|
||||
}
|
||||
|
||||
/* add to beginning of LRU */
|
||||
list_add(&zhdr->lru, &pool->lru);
|
||||
}
|
||||
spin_unlock(&pool->lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
/**
|
||||
* zbud_map() - maps the allocation associated with the given handle
|
||||
* @pool: pool in which the allocation resides
|
||||
* @handle: handle associated with the allocation to be mapped
|
||||
*
|
||||
* While trivial for zbud, the mapping functions for others allocators
|
||||
* implementing this allocation API could have more complex information encoded
|
||||
* in the handle and could create temporary mappings to make the data
|
||||
* accessible to the user.
|
||||
*
|
||||
* Returns: a pointer to the mapped allocation
|
||||
*/
|
||||
void *zbud_map(struct zbud_pool *pool, unsigned long handle)
|
||||
{
|
||||
return (void *)(handle);
|
||||
}
|
||||
|
||||
/**
|
||||
* zbud_unmap() - maps the allocation associated with the given handle
|
||||
* @pool: pool in which the allocation resides
|
||||
* @handle: handle associated with the allocation to be unmapped
|
||||
*/
|
||||
void zbud_unmap(struct zbud_pool *pool, unsigned long handle)
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* zbud_get_pool_size() - gets the zbud pool size in pages
|
||||
* @pool: pool whose size is being queried
|
||||
*
|
||||
* Returns: size in pages of the given pool. The pool lock need not be
|
||||
* taken to access pages_nr.
|
||||
*/
|
||||
u64 zbud_get_pool_size(struct zbud_pool *pool)
|
||||
{
|
||||
return pool->pages_nr;
|
||||
}
|
||||
|
||||
static int __init init_zbud(void)
|
||||
{
|
||||
/* Make sure the zbud header will fit in one chunk */
|
||||
BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
|
||||
pr_info("loaded\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __exit exit_zbud(void)
|
||||
{
|
||||
pr_info("unloaded\n");
|
||||
}
|
||||
|
||||
module_init(init_zbud);
|
||||
module_exit(exit_zbud);
|
||||
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Seth Jennings <sjenning@linux.vnet.ibm.com>");
|
||||
MODULE_DESCRIPTION("Buddy Allocator for Compressed Pages");
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue