diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 84ead2aa6f18..b4980b8f048e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,6 +35,7 @@ extern int mem_cgroup_charge(struct page *page, struct mm_struct *mm, extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); extern void mem_cgroup_uncharge_page(struct page *page); +extern void mem_cgroup_uncharge_cache_page(struct page *page); extern void mem_cgroup_move_lists(struct page *page, bool active); extern unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, @@ -53,7 +54,6 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); extern int mem_cgroup_prepare_migration(struct page *page, struct page *newpage); extern void mem_cgroup_end_migration(struct page *page); -extern int mem_cgroup_getref(struct page *page); /* * For memory reclaim. @@ -98,6 +98,10 @@ static inline void mem_cgroup_uncharge_page(struct page *page) { } +static inline void mem_cgroup_uncharge_cache_page(struct page *page) +{ +} + static inline void mem_cgroup_move_lists(struct page *page, bool active) { } @@ -123,10 +127,6 @@ static inline void mem_cgroup_end_migration(struct page *page) { } -static inline void mem_cgroup_getref(struct page *page) -{ -} - static inline int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem) { return 0; diff --git a/mm/filemap.c b/mm/filemap.c index 5d4c880d7cd9..2d3ec1ffc66e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -115,7 +115,7 @@ void __remove_from_page_cache(struct page *page) { struct address_space *mapping = page->mapping; - mem_cgroup_uncharge_page(page); + mem_cgroup_uncharge_cache_page(page); radix_tree_delete(&mapping->page_tree, page->index); page->mapping = NULL; mapping->nrpages--; @@ -474,12 +474,12 @@ int add_to_page_cache(struct page *page, struct address_space *mapping, mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); } else - mem_cgroup_uncharge_page(page); + mem_cgroup_uncharge_cache_page(page); write_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); } else - mem_cgroup_uncharge_page(page); + mem_cgroup_uncharge_cache_page(page); out: return error; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index da5912b84551..a61706193c31 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -166,7 +166,6 @@ struct page_cgroup { struct list_head lru; /* per cgroup LRU list */ struct page *page; struct mem_cgroup *mem_cgroup; - int ref_cnt; /* cached, mapped, migrating */ int flags; }; #define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */ @@ -185,6 +184,7 @@ static enum zone_type page_cgroup_zid(struct page_cgroup *pc) enum charge_type { MEM_CGROUP_CHARGE_TYPE_CACHE = 0, MEM_CGROUP_CHARGE_TYPE_MAPPED, + MEM_CGROUP_CHARGE_TYPE_FORCE, /* used by force_empty */ }; /* @@ -552,9 +552,7 @@ retry: */ if (pc) { VM_BUG_ON(pc->page != page); - VM_BUG_ON(pc->ref_cnt <= 0); - - pc->ref_cnt++; + VM_BUG_ON(!pc->mem_cgroup); unlock_page_cgroup(page); goto done; } @@ -570,10 +568,7 @@ retry: * thread group leader migrates. It's possible that mm is not * set, if so charge the init_mm (happens for pagecache usage). */ - if (!memcg) { - if (!mm) - mm = &init_mm; - + if (likely(!memcg)) { rcu_read_lock(); mem = mem_cgroup_from_task(rcu_dereference(mm->owner)); /* @@ -609,7 +604,6 @@ retry: } } - pc->ref_cnt = 1; pc->mem_cgroup = mem; pc->page = page; /* @@ -653,6 +647,17 @@ err: int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { + /* + * If already mapped, we don't have to account. + * If page cache, page->mapping has address_space. + * But page->mapping may have out-of-use anon_vma pointer, + * detecit it by PageAnon() check. newly-mapped-anon's page->mapping + * is NULL. + */ + if (page_mapped(page) || (page->mapping && !PageAnon(page))) + return 0; + if (unlikely(!mm)) + mm = &init_mm; return mem_cgroup_charge_common(page, mm, gfp_mask, MEM_CGROUP_CHARGE_TYPE_MAPPED, NULL); } @@ -660,32 +665,17 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { - if (!mm) + if (unlikely(!mm)) mm = &init_mm; return mem_cgroup_charge_common(page, mm, gfp_mask, MEM_CGROUP_CHARGE_TYPE_CACHE, NULL); } -int mem_cgroup_getref(struct page *page) -{ - struct page_cgroup *pc; - - if (mem_cgroup_subsys.disabled) - return 0; - - lock_page_cgroup(page); - pc = page_get_page_cgroup(page); - VM_BUG_ON(!pc); - pc->ref_cnt++; - unlock_page_cgroup(page); - return 0; -} - /* - * Uncharging is always a welcome operation, we never complain, simply - * uncharge. + * uncharge if !page_mapped(page) */ -void mem_cgroup_uncharge_page(struct page *page) +static void +__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) { struct page_cgroup *pc; struct mem_cgroup *mem; @@ -704,29 +694,41 @@ void mem_cgroup_uncharge_page(struct page *page) goto unlock; VM_BUG_ON(pc->page != page); - VM_BUG_ON(pc->ref_cnt <= 0); - if (--(pc->ref_cnt) == 0) { - mz = page_cgroup_zoneinfo(pc); - spin_lock_irqsave(&mz->lru_lock, flags); - __mem_cgroup_remove_list(mz, pc); - spin_unlock_irqrestore(&mz->lru_lock, flags); + if ((ctype == MEM_CGROUP_CHARGE_TYPE_MAPPED) + && ((pc->flags & PAGE_CGROUP_FLAG_CACHE) + || page_mapped(page))) + goto unlock; - page_assign_page_cgroup(page, NULL); - unlock_page_cgroup(page); + mz = page_cgroup_zoneinfo(pc); + spin_lock_irqsave(&mz->lru_lock, flags); + __mem_cgroup_remove_list(mz, pc); + spin_unlock_irqrestore(&mz->lru_lock, flags); - mem = pc->mem_cgroup; - res_counter_uncharge(&mem->res, PAGE_SIZE); - css_put(&mem->css); + page_assign_page_cgroup(page, NULL); + unlock_page_cgroup(page); - kmem_cache_free(page_cgroup_cache, pc); - return; - } + mem = pc->mem_cgroup; + res_counter_uncharge(&mem->res, PAGE_SIZE); + css_put(&mem->css); + kmem_cache_free(page_cgroup_cache, pc); + return; unlock: unlock_page_cgroup(page); } +void mem_cgroup_uncharge_page(struct page *page) +{ + __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_MAPPED); +} + +void mem_cgroup_uncharge_cache_page(struct page *page) +{ + VM_BUG_ON(page_mapped(page)); + __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); +} + /* * Before starting migration, account against new page. */ @@ -757,15 +759,29 @@ int mem_cgroup_prepare_migration(struct page *page, struct page *newpage) return ret; } -/* remove redundant charge */ +/* remove redundant charge if migration failed*/ void mem_cgroup_end_migration(struct page *newpage) { - mem_cgroup_uncharge_page(newpage); + /* + * At success, page->mapping is not NULL. + * special rollback care is necessary when + * 1. at migration failure. (newpage->mapping is cleared in this case) + * 2. the newpage was moved but not remapped again because the task + * exits and the newpage is obsolete. In this case, the new page + * may be a swapcache. So, we just call mem_cgroup_uncharge_page() + * always for avoiding mess. The page_cgroup will be removed if + * unnecessary. File cache pages is still on radix-tree. Don't + * care it. + */ + if (!newpage->mapping) + __mem_cgroup_uncharge_common(newpage, + MEM_CGROUP_CHARGE_TYPE_FORCE); + else if (PageAnon(newpage)) + mem_cgroup_uncharge_page(newpage); } /* * This routine traverse page_cgroup in given list and drop them all. - * This routine ignores page_cgroup->ref_cnt. * *And* this routine doesn't reclaim page itself, just removes page_cgroup. */ #define FORCE_UNCHARGE_BATCH (128) @@ -795,7 +811,8 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *mem, * if it's under page migration. */ if (PageLRU(page)) { - mem_cgroup_uncharge_page(page); + __mem_cgroup_uncharge_common(page, + MEM_CGROUP_CHARGE_TYPE_FORCE); put_page(page); if (--count <= 0) { count = FORCE_UNCHARGE_BATCH; diff --git a/mm/migrate.c b/mm/migrate.c index f6d7f8efd1a8..d8c65a65c61d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -359,8 +359,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, write_unlock_irq(&mapping->tree_lock); if (!PageSwapCache(newpage)) { - mem_cgroup_uncharge_page(page); - mem_cgroup_getref(newpage); + mem_cgroup_uncharge_cache_page(page); } return 0; diff --git a/mm/rmap.c b/mm/rmap.c index bf0a5b7cfb8e..abbd29f7c43f 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -576,14 +576,8 @@ void page_add_anon_rmap(struct page *page, VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); if (atomic_inc_and_test(&page->_mapcount)) __page_set_anon_rmap(page, vma, address); - else { + else __page_check_anon_rmap(page, vma, address); - /* - * We unconditionally charged during prepare, we uncharge here - * This takes care of balancing the reference counts - */ - mem_cgroup_uncharge_page(page); - } } /** @@ -614,12 +608,6 @@ void page_add_file_rmap(struct page *page) { if (atomic_inc_and_test(&page->_mapcount)) __inc_zone_page_state(page, NR_FILE_MAPPED); - else - /* - * We unconditionally charged during prepare, we uncharge here - * This takes care of balancing the reference counts - */ - mem_cgroup_uncharge_page(page); } #ifdef CONFIG_DEBUG_VM diff --git a/mm/shmem.c b/mm/shmem.c index 9ffbea9b79e1..d58305e8a484 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -922,20 +922,26 @@ found: error = 1; if (!inode) goto out; - /* Precharge page while we can wait, compensate afterwards */ + /* Precharge page using GFP_KERNEL while we can wait */ error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL); if (error) goto out; error = radix_tree_preload(GFP_KERNEL); - if (error) - goto uncharge; + if (error) { + mem_cgroup_uncharge_cache_page(page); + goto out; + } error = 1; spin_lock(&info->lock); ptr = shmem_swp_entry(info, idx, NULL); - if (ptr && ptr->val == entry.val) + if (ptr && ptr->val == entry.val) { error = add_to_page_cache(page, inode->i_mapping, idx, GFP_NOWAIT); + /* does mem_cgroup_uncharge_cache_page on error */ + } else /* we must compensate for our precharge above */ + mem_cgroup_uncharge_cache_page(page); + if (error == -EEXIST) { struct page *filepage = find_get_page(inode->i_mapping, idx); error = 1; @@ -961,8 +967,6 @@ found: shmem_swp_unmap(ptr); spin_unlock(&info->lock); radix_tree_preload_end(); -uncharge: - mem_cgroup_uncharge_page(page); out: unlock_page(page); page_cache_release(page); @@ -1319,7 +1323,7 @@ repeat: page_cache_release(swappage); goto failed; } - mem_cgroup_uncharge_page(swappage); + mem_cgroup_uncharge_cache_page(swappage); } page_cache_release(swappage); goto repeat; @@ -1358,6 +1362,8 @@ repeat: } if (!filepage) { + int ret; + spin_unlock(&info->lock); filepage = shmem_alloc_page(gfp, info, idx); if (!filepage) { @@ -1386,10 +1392,18 @@ repeat: swap = *entry; shmem_swp_unmap(entry); } - if (error || swap.val || 0 != add_to_page_cache_lru( - filepage, mapping, idx, GFP_NOWAIT)) { + ret = error || swap.val; + if (ret) + mem_cgroup_uncharge_cache_page(filepage); + else + ret = add_to_page_cache_lru(filepage, mapping, + idx, GFP_NOWAIT); + /* + * At add_to_page_cache_lru() failure, uncharge will + * be done automatically. + */ + if (ret) { spin_unlock(&info->lock); - mem_cgroup_uncharge_page(filepage); page_cache_release(filepage); shmem_unacct_blocks(info->flags, 1); shmem_free_blocks(inode, 1); @@ -1398,7 +1412,6 @@ repeat: goto failed; goto repeat; } - mem_cgroup_uncharge_page(filepage); info->flags |= SHMEM_PAGEIN; }