mm: vmpressure: allow in-kernel clients to subscribe for events

Currently, vmpressure is tied to memcg and its events are
available only to userspace clients. This patch removes
the dependency on CONFIG_MEMCG and adds a mechanism for
in-kernel clients to subscribe for vmpressure events (in
fact raw vmpressure values are delivered instead of vmpressure
levels, to provide clients more flexibility to take actions
on custom pressure levels which are not currently defined
by vmpressure module).

Change-Id: I38010f166546e8d7f12f5f355b5dbfd6ba04d587
Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
This commit is contained in:
Vinayak Menon 2015-03-04 16:38:28 +05:30
parent d3769327a0
commit 314a207926
3 changed files with 115 additions and 24 deletions

View file

@ -24,11 +24,13 @@ struct vmpressure {
struct mem_cgroup;
#ifdef CONFIG_MEMCG
extern int vmpressure_notifier_register(struct notifier_block *nb);
extern int vmpressure_notifier_unregister(struct notifier_block *nb);
extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
unsigned long scanned, unsigned long reclaimed);
extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio);
#ifdef CONFIG_MEMCG
extern void vmpressure_init(struct vmpressure *vmpr);
extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg);
extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr);
@ -39,9 +41,9 @@ extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft,
extern void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft,
struct eventfd_ctx *eventfd);
#else
static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
unsigned long scanned, unsigned long reclaimed) {}
static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg,
int prio) {}
static inline struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg)
{
return NULL;
}
#endif /* CONFIG_MEMCG */
#endif /* __LINUX_VMPRESSURE_H */

View file

@ -18,7 +18,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \
mm_init.o mmu_context.o percpu.o slab_common.o \
compaction.o balloon_compaction.o \
interval_tree.o $(mmu-y) \
showmem.o
showmem.o vmpressure.o
obj-y += init-mm.o
@ -52,7 +52,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o
obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o
obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o

View file

@ -21,6 +21,8 @@
#include <linux/eventfd.h>
#include <linux/swap.h>
#include <linux/printk.h>
#include <linux/notifier.h>
#include <linux/init.h>
#include <linux/vmpressure.h>
/*
@ -48,6 +50,24 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16;
static const unsigned int vmpressure_level_med = 60;
static const unsigned int vmpressure_level_critical = 95;
static struct vmpressure global_vmpressure;
BLOCKING_NOTIFIER_HEAD(vmpressure_notifier);
int vmpressure_notifier_register(struct notifier_block *nb)
{
return blocking_notifier_chain_register(&vmpressure_notifier, nb);
}
int vmpressure_notifier_unregister(struct notifier_block *nb)
{
return blocking_notifier_chain_unregister(&vmpressure_notifier, nb);
}
void vmpressure_notify(unsigned long pressure)
{
blocking_notifier_call_chain(&vmpressure_notifier, pressure, NULL);
}
/*
* When there are too little pages left to scan, vmpressure() may miss the
* critical pressure as number of pages will be less than "window size".
@ -74,6 +94,7 @@ static struct vmpressure *work_to_vmpressure(struct work_struct *work)
return container_of(work, struct vmpressure, work);
}
#ifdef CONFIG_MEMCG
static struct vmpressure *cg_to_vmpressure(struct cgroup *cg)
{
return css_to_vmpressure(cgroup_subsys_state(cg, mem_cgroup_subsys_id));
@ -89,6 +110,17 @@ static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
return NULL;
return memcg_to_vmpressure(memcg);
}
#else
static struct vmpressure *cg_to_vmpressure(struct cgroup *cg)
{
return NULL;
}
static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr)
{
return NULL;
}
#endif
enum vmpressure_levels {
VMPRESSURE_LOW = 0,
@ -112,7 +144,7 @@ static enum vmpressure_levels vmpressure_level(unsigned long pressure)
return VMPRESSURE_LOW;
}
static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
static unsigned long vmpressure_calc_pressure(unsigned long scanned,
unsigned long reclaimed)
{
unsigned long scale = scanned + reclaimed;
@ -131,7 +163,7 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned,
pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure,
scanned, reclaimed);
return vmpressure_level(pressure);
return pressure;
}
struct vmpressure_event {
@ -145,9 +177,11 @@ static bool vmpressure_event(struct vmpressure *vmpr,
{
struct vmpressure_event *ev;
enum vmpressure_levels level;
unsigned long pressure;
bool signalled = false;
level = vmpressure_calc_level(scanned, reclaimed);
pressure = vmpressure_calc_pressure(scanned, reclaimed);
level = vmpressure_level(pressure);
mutex_lock(&vmpr->events_lock);
@ -197,24 +231,13 @@ static void vmpressure_work_fn(struct work_struct *work)
} while ((vmpr = vmpressure_parent(vmpr)));
}
/**
* vmpressure() - Account memory pressure through scanned/reclaimed ratio
* @gfp: reclaimer's gfp mask
* @memcg: cgroup memory controller handle
* @scanned: number of pages scanned
* @reclaimed: number of pages reclaimed
*
* This function should be called from the vmscan reclaim path to account
* "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
* pressure index is then further refined and averaged over time.
*
* This function does not return any value.
*/
void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg,
unsigned long scanned, unsigned long reclaimed)
{
struct vmpressure *vmpr = memcg_to_vmpressure(memcg);
BUG_ON(!vmpr);
/*
* Here we only want to account pressure that userland is able to
* help us with. For example, suppose that DMA zone is under
@ -251,6 +274,60 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
schedule_work(&vmpr->work);
}
void vmpressure_global(gfp_t gfp, unsigned long scanned,
unsigned long reclaimed)
{
struct vmpressure *vmpr = &global_vmpressure;
unsigned long pressure;
if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS)))
return;
if (!scanned)
return;
mutex_lock(&vmpr->sr_lock);
vmpr->scanned += scanned;
vmpr->reclaimed += reclaimed;
scanned = vmpr->scanned;
reclaimed = vmpr->reclaimed;
mutex_unlock(&vmpr->sr_lock);
if (scanned < vmpressure_win)
return;
mutex_lock(&vmpr->sr_lock);
vmpr->scanned = 0;
vmpr->reclaimed = 0;
mutex_unlock(&vmpr->sr_lock);
pressure = vmpressure_calc_pressure(scanned, reclaimed);
vmpressure_notify(pressure);
}
/**
* vmpressure() - Account memory pressure through scanned/reclaimed ratio
* @gfp: reclaimer's gfp mask
* @memcg: cgroup memory controller handle
* @scanned: number of pages scanned
* @reclaimed: number of pages reclaimed
*
* This function should be called from the vmscan reclaim path to account
* "instantaneous" memory pressure (scanned/reclaimed ratio). The raw
* pressure index is then further refined and averaged over time.
*
* This function does not return any value.
*/
void vmpressure(gfp_t gfp, struct mem_cgroup *memcg,
unsigned long scanned, unsigned long reclaimed)
{
if (!memcg)
vmpressure_global(gfp, scanned, reclaimed);
if (IS_ENABLED(CONFIG_MEMCG))
vmpressure_memcg(gfp, memcg, scanned, reclaimed);
}
/**
* vmpressure_prio() - Account memory pressure through reclaimer priority level
* @gfp: reclaimer's gfp mask
@ -305,6 +382,8 @@ int vmpressure_register_event(struct cgroup *cg, struct cftype *cft,
struct vmpressure_event *ev;
int level;
BUG_ON(!vmpr);
for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++) {
if (!strcmp(vmpressure_str_levels[level], args))
break;
@ -347,6 +426,9 @@ void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft,
struct vmpressure *vmpr = cg_to_vmpressure(cg);
struct vmpressure_event *ev;
if (!vmpr)
BUG();
mutex_lock(&vmpr->events_lock);
list_for_each_entry(ev, &vmpr->events, node) {
if (ev->efd != eventfd)
@ -372,3 +454,10 @@ void vmpressure_init(struct vmpressure *vmpr)
INIT_LIST_HEAD(&vmpr->events);
INIT_WORK(&vmpr->work, vmpressure_work_fn);
}
int vmpressure_global_init(void)
{
vmpressure_init(&global_vmpressure);
return 0;
}
late_initcall(vmpressure_global_init);