From 9fe207732b767427f90f01c13cfa2d83b4722c62 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 22 Feb 2013 16:34:37 -0800 Subject: [PATCH] swap: make each swap partition have one address_space When I use several fast SSD to do swap, swapper_space.tree_lock is heavily contended. This makes each swap partition have one address_space to reduce the lock contention. There is an array of address_space for swap. The swap entry type is the index to the array. In my test with 3 SSD, this increases the swapout throughput 20%. [akpm@linux-foundation.org: revert unneeded change to __add_to_swap_cache] Signed-off-by: Shaohua Li Cc: Hugh Dickins Acked-by: Rik van Riel Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Change-Id: I8ad2d301866b2873562a46df2952cf562b2b5aab Signed-off-by: Kevin F. Haggerty --- drivers/misc/sec_misc.c | 4 +- drivers/staging/android/lowmemorykiller.c | 4 +- fs/proc/meminfo.c | 4 +- include/linux/swap.h | 9 ++-- mm/memcontrol.c | 4 +- mm/mincore.c | 5 ++- mm/swap.c | 9 +++- mm/swap_state.c | 55 ++++++++++++++++------- mm/swapfile.c | 5 ++- mm/util.c | 9 ++-- 10 files changed, 70 insertions(+), 38 deletions(-) diff --git a/drivers/misc/sec_misc.c b/drivers/misc/sec_misc.c index 6b7e647d78b..18569b6c2f0 100644 --- a/drivers/misc/sec_misc.c +++ b/drivers/misc/sec_misc.c @@ -281,7 +281,7 @@ static ssize_t drop_caches_store si_meminfo(&i); printk("[Before]\nMemFree : %8lu kB\n", K(i.freeram)); printk("Cached : %8lu kB\n\n", K(global_page_state(NR_FILE_PAGES) - \ - total_swapcache_pages - i.bufferram)); + total_swapcache_pages() - i.bufferram)); iterate_supers(drop_pagecache_sb, NULL); drop_slab(); @@ -289,7 +289,7 @@ static ssize_t drop_caches_store si_meminfo(&i); printk("[After]\nMemFree : %8lu kB\n", K(i.freeram)); printk("Cached : %8lu kB\n\n", K(global_page_state(NR_FILE_PAGES) - \ - total_swapcache_pages - i.bufferram)); + total_swapcache_pages() - i.bufferram)); printk("Cached Drop done!\n"); } out: diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index 93b3e30ed5d..8d8cef068fc 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -209,8 +209,8 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc) flag = 1; } #endif - if (global_page_state(NR_SHMEM) + total_swapcache_pages < other_file) - other_file -= global_page_state(NR_SHMEM) + total_swapcache_pages; + if (global_page_state(NR_SHMEM) + total_swapcache_pages() < other_file) + other_file -= global_page_state(NR_SHMEM) + total_swapcache_pages(); else other_file = 0; diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 80e4645f799..3742ef26bc2 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -40,7 +40,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) * sysctl_overcommit_ratio / 100) + total_swap_pages; cached = global_page_state(NR_FILE_PAGES) - - total_swapcache_pages - i.bufferram; + total_swapcache_pages() - i.bufferram; if (cached < 0) cached = 0; @@ -109,7 +109,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freeram), K(i.bufferram), K(cached), - K(total_swapcache_pages), + K(total_swapcache_pages()), K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), K(pages[LRU_ACTIVE_ANON]), diff --git a/include/linux/swap.h b/include/linux/swap.h index 61590498024..865e8ac1adf 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -8,7 +8,7 @@ #include #include #include - +#include #include #include @@ -336,8 +336,9 @@ extern int __swap_writepage(struct page *page, struct writeback_control *wbc, extern void end_swap_bio_read(struct bio *bio, int err); /* linux/mm/swap_state.c */ -extern struct address_space swapper_space; -#define total_swapcache_pages swapper_space.nrpages +extern struct address_space swapper_spaces[]; +#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)]) +extern unsigned long total_swapcache_pages(void); extern void show_swap_cache_info(void); extern int add_to_swap(struct page *); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); @@ -410,7 +411,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) #define get_nr_swap_pages() 0L #define total_swap_pages 0L -#define total_swapcache_pages 0UL +#define total_swapcache_pages() 0UL #define vm_swap_full(si) 0 #define si_swapinfo(val) \ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a8758f93278..599f5f33344 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5191,7 +5191,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma, * Because lookup_swap_cache() updates some statistics counter, * we call find_get_page() with swapper_space directly. */ - page = find_get_page(&swapper_space, ent.val); + page = find_get_page(swap_address_space(ent), ent.val); if (do_swap_account) entry->val = ent.val; @@ -5234,7 +5234,7 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma, swp_entry_t swap = radix_to_swp_entry(page); if (do_swap_account) *entry = swap; - page = find_get_page(&swapper_space, swap.val); + page = find_get_page(swap_address_space(swap), swap.val); } #endif return page; diff --git a/mm/mincore.c b/mm/mincore.c index 936b4cee8cb..da2be56a7b8 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -75,7 +75,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff) /* shmem/tmpfs may return swap: account for swapcache page too. */ if (radix_tree_exceptional_entry(page)) { swp_entry_t swap = radix_to_swp_entry(page); - page = find_get_page(&swapper_space, swap.val); + page = find_get_page(swap_address_space(swap), swap.val); } #endif if (page) { @@ -135,7 +135,8 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd, } else { #ifdef CONFIG_SWAP pgoff = entry.val; - *vec = mincore_page(&swapper_space, pgoff); + *vec = mincore_page(swap_address_space(entry), + pgoff); #else WARN_ON(1); *vec = 1; diff --git a/mm/swap.c b/mm/swap.c index a8feea68a60..3ad6e87e49e 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -843,9 +843,14 @@ EXPORT_SYMBOL(pagevec_lookup_tag); void __init swap_setup(void) { unsigned long megs = totalram_pages >> (20 - PAGE_SHIFT); - #ifdef CONFIG_SWAP - bdi_init(swapper_space.backing_dev_info); + int i; + + bdi_init(swapper_spaces[0].backing_dev_info); + for (i = 0; i < MAX_SWAPFILES; i++) { + spin_lock_init(&swapper_spaces[i].tree_lock); + INIT_LIST_HEAD(&swapper_spaces[i].i_mmap_nonlinear); + } #endif /* Use a smaller cluster for small-memory machines */ diff --git a/mm/swap_state.c b/mm/swap_state.c index 49a75a713b6..41bd0e8462a 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -35,12 +35,12 @@ static struct backing_dev_info swap_backing_dev_info = { .capabilities = BDI_CAP_NO_ACCT_AND_WRITEBACK | BDI_CAP_SWAP_BACKED, }; -struct address_space swapper_space = { - .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), - .tree_lock = __SPIN_LOCK_UNLOCKED(swapper_space.tree_lock), - .a_ops = &swap_aops, - .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear), - .backing_dev_info = &swap_backing_dev_info, +struct address_space swapper_spaces[MAX_SWAPFILES] = { + [0 ... MAX_SWAPFILES - 1] = { + .page_tree = RADIX_TREE_INIT(GFP_ATOMIC|__GFP_NOWARN), + .a_ops = &swap_aops, + .backing_dev_info = &swap_backing_dev_info, + } }; #define INC_CACHE_INFO(x) do { swap_cache_info.x++; } while (0) @@ -52,9 +52,19 @@ static struct { unsigned long find_total; } swap_cache_info; +unsigned long total_swapcache_pages(void) +{ + int i; + unsigned long ret = 0; + + for (i = 0; i < MAX_SWAPFILES; i++) + ret += swapper_spaces[i].nrpages; + return ret; +} + void show_swap_cache_info(void) { - printk("%lu pages in swap cache\n", total_swapcache_pages); + printk("%lu pages in swap cache\n", total_swapcache_pages()); printk("Swap cache stats: add %lu, delete %lu, find %lu/%lu\n", swap_cache_info.add_total, swap_cache_info.del_total, swap_cache_info.find_success, swap_cache_info.find_total); @@ -70,6 +80,7 @@ void show_swap_cache_info(void) int __add_to_swap_cache(struct page *page, swp_entry_t entry) { int error; + struct address_space *address_space; VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(PageSwapCache(page)); @@ -79,14 +90,16 @@ int __add_to_swap_cache(struct page *page, swp_entry_t entry) SetPageSwapCache(page); set_page_private(page, entry.val); - spin_lock_irq(&swapper_space.tree_lock); - error = radix_tree_insert(&swapper_space.page_tree, entry.val, page); + address_space = swap_address_space(entry); + spin_lock_irq(&address_space->tree_lock); + error = radix_tree_insert(&address_space->page_tree, + entry.val, page); if (likely(!error)) { - total_swapcache_pages++; + address_space->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(add_total); } - spin_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&address_space->tree_lock); if (unlikely(error)) { /* @@ -122,14 +135,19 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) */ void __delete_from_swap_cache(struct page *page) { + swp_entry_t entry; + struct address_space *address_space; + VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(!PageSwapCache(page)); VM_BUG_ON(PageWriteback(page)); - radix_tree_delete(&swapper_space.page_tree, page_private(page)); + entry.val = page_private(page); + address_space = swap_address_space(entry); + radix_tree_delete(&address_space->page_tree, page_private(page)); set_page_private(page, 0); ClearPageSwapCache(page); - total_swapcache_pages--; + address_space->nrpages--; __dec_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); } @@ -195,12 +213,14 @@ int add_to_swap(struct page *page) void delete_from_swap_cache(struct page *page) { swp_entry_t entry; + struct address_space *address_space; entry.val = page_private(page); - spin_lock_irq(&swapper_space.tree_lock); + address_space = swap_address_space(entry); + spin_lock_irq(&address_space->tree_lock); __delete_from_swap_cache(page); - spin_unlock_irq(&swapper_space.tree_lock); + spin_unlock_irq(&address_space->tree_lock); swapcache_free(entry, page); page_cache_release(page); @@ -263,7 +283,7 @@ struct page * lookup_swap_cache(swp_entry_t entry) { struct page *page; - page = find_get_page(&swapper_space, entry.val); + page = find_get_page(swap_address_space(entry), entry.val); if (page) INC_CACHE_INFO(find_success); @@ -290,7 +310,8 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask, * called after lookup_swap_cache() failed, re-calling * that would confuse statistics. */ - found_page = find_get_page(&swapper_space, entry.val); + found_page = find_get_page(swap_address_space(entry), + entry.val); if (found_page) break; diff --git a/mm/swapfile.c b/mm/swapfile.c index 4d488783c94..a083bc0b285 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -101,7 +101,7 @@ __try_to_reclaim_swap(struct swap_info_struct *si, unsigned long offset) struct page *page; int ret = 0; - page = find_get_page(&swapper_space, entry.val); + page = find_get_page(swap_address_space(entry), entry.val); if (!page) return 0; /* @@ -811,7 +811,8 @@ int free_swap_and_cache(swp_entry_t entry) p = swap_info_get(entry); if (p) { if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) { - page = find_get_page(&swapper_space, entry.val); + page = find_get_page(swap_address_space(entry), + entry.val); if (page && !trylock_page(page)) { page_cache_release(page); page = NULL; diff --git a/mm/util.c b/mm/util.c index d1488d5e162..cdef8868ca7 100644 --- a/mm/util.c +++ b/mm/util.c @@ -410,9 +410,12 @@ struct address_space *page_mapping(struct page *page) VM_BUG_ON(PageSlab(page)); #ifdef CONFIG_SWAP - if (unlikely(PageSwapCache(page))) - mapping = &swapper_space; - else + if (unlikely(PageSwapCache(page))) { + swp_entry_t entry; + + entry.val = page_private(page); + mapping = swap_address_space(entry); + } else #endif if ((unsigned long)mapping & PAGE_MAPPING_ANON) mapping = NULL;