diff --git a/include/linux/vmpressure.h b/include/linux/vmpressure.h index 76be077340ea..f62cf53b9d92 100644 --- a/include/linux/vmpressure.h +++ b/include/linux/vmpressure.h @@ -24,11 +24,13 @@ struct vmpressure { struct mem_cgroup; -#ifdef CONFIG_MEMCG +extern int vmpressure_notifier_register(struct notifier_block *nb); +extern int vmpressure_notifier_unregister(struct notifier_block *nb); extern void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, unsigned long scanned, unsigned long reclaimed); extern void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, int prio); +#ifdef CONFIG_MEMCG extern void vmpressure_init(struct vmpressure *vmpr); extern struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg); extern struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr); @@ -39,9 +41,9 @@ extern int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, extern void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, struct eventfd_ctx *eventfd); #else -static inline void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, - unsigned long scanned, unsigned long reclaimed) {} -static inline void vmpressure_prio(gfp_t gfp, struct mem_cgroup *memcg, - int prio) {} +static inline struct vmpressure *memcg_to_vmpressure(struct mem_cgroup *memcg) +{ + return NULL; +} #endif /* CONFIG_MEMCG */ #endif /* __LINUX_VMPRESSURE_H */ diff --git a/mm/Makefile b/mm/Makefile index 2d9707c470b5..8f7b2ebdac05 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -18,7 +18,7 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ mm_init.o mmu_context.o percpu.o slab_common.o \ compaction.o balloon_compaction.o \ interval_tree.o $(mmu-y) \ - showmem.o + showmem.o vmpressure.o obj-y += init-mm.o @@ -52,7 +52,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o -obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o +obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 736a6011c2c8..bcc32fab7807 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -21,6 +21,8 @@ #include #include #include +#include +#include #include /* @@ -48,6 +50,24 @@ static const unsigned long vmpressure_win = SWAP_CLUSTER_MAX * 16; static const unsigned int vmpressure_level_med = 60; static const unsigned int vmpressure_level_critical = 95; +static struct vmpressure global_vmpressure; +BLOCKING_NOTIFIER_HEAD(vmpressure_notifier); + +int vmpressure_notifier_register(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&vmpressure_notifier, nb); +} + +int vmpressure_notifier_unregister(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&vmpressure_notifier, nb); +} + +void vmpressure_notify(unsigned long pressure) +{ + blocking_notifier_call_chain(&vmpressure_notifier, pressure, NULL); +} + /* * When there are too little pages left to scan, vmpressure() may miss the * critical pressure as number of pages will be less than "window size". @@ -74,6 +94,7 @@ static struct vmpressure *work_to_vmpressure(struct work_struct *work) return container_of(work, struct vmpressure, work); } +#ifdef CONFIG_MEMCG static struct vmpressure *cg_to_vmpressure(struct cgroup *cg) { return css_to_vmpressure(cgroup_subsys_state(cg, mem_cgroup_subsys_id)); @@ -89,6 +110,17 @@ static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr) return NULL; return memcg_to_vmpressure(memcg); } +#else +static struct vmpressure *cg_to_vmpressure(struct cgroup *cg) +{ + return NULL; +} + +static struct vmpressure *vmpressure_parent(struct vmpressure *vmpr) +{ + return NULL; +} +#endif enum vmpressure_levels { VMPRESSURE_LOW = 0, @@ -112,7 +144,7 @@ static enum vmpressure_levels vmpressure_level(unsigned long pressure) return VMPRESSURE_LOW; } -static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, +static unsigned long vmpressure_calc_pressure(unsigned long scanned, unsigned long reclaimed) { unsigned long scale = scanned + reclaimed; @@ -131,7 +163,7 @@ static enum vmpressure_levels vmpressure_calc_level(unsigned long scanned, pr_debug("%s: %3lu (s: %lu r: %lu)\n", __func__, pressure, scanned, reclaimed); - return vmpressure_level(pressure); + return pressure; } struct vmpressure_event { @@ -145,9 +177,11 @@ static bool vmpressure_event(struct vmpressure *vmpr, { struct vmpressure_event *ev; enum vmpressure_levels level; + unsigned long pressure; bool signalled = false; - level = vmpressure_calc_level(scanned, reclaimed); + pressure = vmpressure_calc_pressure(scanned, reclaimed); + level = vmpressure_level(pressure); mutex_lock(&vmpr->events_lock); @@ -197,24 +231,13 @@ static void vmpressure_work_fn(struct work_struct *work) } while ((vmpr = vmpressure_parent(vmpr))); } -/** - * vmpressure() - Account memory pressure through scanned/reclaimed ratio - * @gfp: reclaimer's gfp mask - * @memcg: cgroup memory controller handle - * @scanned: number of pages scanned - * @reclaimed: number of pages reclaimed - * - * This function should be called from the vmscan reclaim path to account - * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw - * pressure index is then further refined and averaged over time. - * - * This function does not return any value. - */ -void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, +void vmpressure_memcg(gfp_t gfp, struct mem_cgroup *memcg, unsigned long scanned, unsigned long reclaimed) { struct vmpressure *vmpr = memcg_to_vmpressure(memcg); + BUG_ON(!vmpr); + /* * Here we only want to account pressure that userland is able to * help us with. For example, suppose that DMA zone is under @@ -251,6 +274,60 @@ void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, schedule_work(&vmpr->work); } +void vmpressure_global(gfp_t gfp, unsigned long scanned, + unsigned long reclaimed) +{ + struct vmpressure *vmpr = &global_vmpressure; + unsigned long pressure; + + if (!(gfp & (__GFP_HIGHMEM | __GFP_MOVABLE | __GFP_IO | __GFP_FS))) + return; + + if (!scanned) + return; + + mutex_lock(&vmpr->sr_lock); + vmpr->scanned += scanned; + vmpr->reclaimed += reclaimed; + scanned = vmpr->scanned; + reclaimed = vmpr->reclaimed; + mutex_unlock(&vmpr->sr_lock); + + if (scanned < vmpressure_win) + return; + + mutex_lock(&vmpr->sr_lock); + vmpr->scanned = 0; + vmpr->reclaimed = 0; + mutex_unlock(&vmpr->sr_lock); + + pressure = vmpressure_calc_pressure(scanned, reclaimed); + vmpressure_notify(pressure); +} + +/** + * vmpressure() - Account memory pressure through scanned/reclaimed ratio + * @gfp: reclaimer's gfp mask + * @memcg: cgroup memory controller handle + * @scanned: number of pages scanned + * @reclaimed: number of pages reclaimed + * + * This function should be called from the vmscan reclaim path to account + * "instantaneous" memory pressure (scanned/reclaimed ratio). The raw + * pressure index is then further refined and averaged over time. + * + * This function does not return any value. + */ +void vmpressure(gfp_t gfp, struct mem_cgroup *memcg, + unsigned long scanned, unsigned long reclaimed) +{ + if (!memcg) + vmpressure_global(gfp, scanned, reclaimed); + + if (IS_ENABLED(CONFIG_MEMCG)) + vmpressure_memcg(gfp, memcg, scanned, reclaimed); +} + /** * vmpressure_prio() - Account memory pressure through reclaimer priority level * @gfp: reclaimer's gfp mask @@ -305,6 +382,8 @@ int vmpressure_register_event(struct cgroup *cg, struct cftype *cft, struct vmpressure_event *ev; int level; + BUG_ON(!vmpr); + for (level = 0; level < VMPRESSURE_NUM_LEVELS; level++) { if (!strcmp(vmpressure_str_levels[level], args)) break; @@ -347,6 +426,9 @@ void vmpressure_unregister_event(struct cgroup *cg, struct cftype *cft, struct vmpressure *vmpr = cg_to_vmpressure(cg); struct vmpressure_event *ev; + if (!vmpr) + BUG(); + mutex_lock(&vmpr->events_lock); list_for_each_entry(ev, &vmpr->events, node) { if (ev->efd != eventfd) @@ -372,3 +454,10 @@ void vmpressure_init(struct vmpressure *vmpr) INIT_LIST_HEAD(&vmpr->events); INIT_WORK(&vmpr->work, vmpressure_work_fn); } + +int vmpressure_global_init(void) +{ + vmpressure_init(&global_vmpressure); + return 0; +} +late_initcall(vmpressure_global_init);