mirror of
https://github.com/team-infusion-developers/android_kernel_samsung_msm8976.git
synced 2024-10-31 18:09:19 +00:00
0c4bceb3dc
The test_bit operations in get/set pageblock flags are expensive. This patch reads the bitmap on a word basis and use shifts and masks to isolate the bits of interest. Similarly masks are used to set a local copy of the bitmap and then use cmpxchg to update the bitmap if there have been no other changes made in parallel. In a test running dd onto tmpfs the overhead of the pageblock-related functions went from 1.27% in profiles to 0.5%. In addition to the performance benefits, this patch closes races that are possible between: a) get_ and set_pageblock_migratetype(), where get_pageblock_migratetype() reads part of the bits before and other part of the bits after set_pageblock_migratetype() has updated them. b) set_pageblock_migratetype() and set_pageblock_skip(), where the non-atomic read-modify-update set bit operation in set_pageblock_skip() will cause lost updates to some bits changed in the set_pageblock_migratetype(). Joonsoo Kim first reported the case a) via code inspection. Vlastimil Babka's testing with a debug patch showed that either a) or b) occurs roughly once per mmtests' stress-highalloc benchmark (although not necessarily in the same pageblock). Furthermore during development of unrelated compaction patches, it was observed that frequent calls to {start,undo}_isolate_page_range() the race occurs several thousands of times and has resulted in NULL pointer dereferences in move_freepages() and free_one_page() in places where free_list[migratetype] is manipulated by e.g. list_move(). Further debugging confirmed that migratetype had invalid value of 6, causing out of bounds access to the free_list array. That confirmed that the race exist, although it may be extremely rare, and currently only fatal where page isolation is performed due to memory hot remove. Races on pageblocks being updated by set_pageblock_migratetype(), where both old and new migratetype are lower MIGRATE_RESERVE, currently cannot result in an invalid value being observed, although theoretically they may still lead to unexpected creation or destruction of MIGRATE_RESERVE pageblocks. Furthermore, things could get suddenly worse when memory isolation is used more, or when new migratetypes are added. After this patch, the race has no longer been observed in testing. Signed-off-by: Mel Gorman <mgorman@suse.de> Acked-by: Vlastimil Babka <vbabka@suse.cz> Reported-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Reported-and-tested-by: Vlastimil Babka <vbabka@suse.cz> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Jan Kara <jack@suse.cz> Cc: Michal Hocko <mhocko@suse.cz> Cc: Hugh Dickins <hughd@google.com> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Theodore Ts'o <tytso@mit.edu> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Rik van Riel <riel@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Change-Id: Ibbcf2ba494831b5f29039ef82be629cb5eacb906 Git-commit: e58469bafd0524e848c3733bc3918d854595e20f Git-repo: git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git [vinmenon@codeaurora.org: resolve trivial merge conflicts] Signed-off-by: Vinayak Menon <vinmenon@codeaurora.org>
114 lines
3.4 KiB
C
114 lines
3.4 KiB
C
/*
|
|
* Macros for manipulating and testing flags related to a
|
|
* pageblock_nr_pages number of pages.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation version 2 of the License
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
*
|
|
* Copyright (C) IBM Corporation, 2006
|
|
*
|
|
* Original author, Mel Gorman
|
|
* Major cleanups and reduction of bit operations, Andy Whitcroft
|
|
*/
|
|
#ifndef PAGEBLOCK_FLAGS_H
|
|
#define PAGEBLOCK_FLAGS_H
|
|
|
|
#include <linux/types.h>
|
|
|
|
/* Bit indices that affect a whole block of pages */
|
|
enum pageblock_bits {
|
|
PB_migrate,
|
|
PB_migrate_end = PB_migrate + 3 - 1,
|
|
/* 3 bits required for migrate types */
|
|
PB_migrate_skip,/* If set the block is skipped by compaction */
|
|
|
|
/*
|
|
* Assume the bits will always align on a word. If this assumption
|
|
* changes then get/set pageblock needs updating.
|
|
*/
|
|
NR_PAGEBLOCK_BITS
|
|
};
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE
|
|
|
|
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
|
|
|
|
/* Huge page sizes are variable */
|
|
extern int pageblock_order;
|
|
|
|
#else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
|
|
|
|
/* Huge pages are a constant size */
|
|
#define pageblock_order HUGETLB_PAGE_ORDER
|
|
|
|
#endif /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */
|
|
|
|
#else /* CONFIG_HUGETLB_PAGE */
|
|
|
|
/* If huge pages are not used, group by MAX_ORDER_NR_PAGES */
|
|
#define pageblock_order (MAX_ORDER-1)
|
|
|
|
#endif /* CONFIG_HUGETLB_PAGE */
|
|
|
|
#define pageblock_nr_pages (1UL << pageblock_order)
|
|
|
|
/* Forward declaration */
|
|
struct page;
|
|
|
|
unsigned long get_pageblock_flags_mask(struct page *page,
|
|
unsigned long end_bitidx,
|
|
unsigned long mask);
|
|
void set_pageblock_flags_mask(struct page *page,
|
|
unsigned long flags,
|
|
unsigned long end_bitidx,
|
|
unsigned long mask);
|
|
|
|
/* Declarations for getting and setting flags. See mm/page_alloc.c */
|
|
static inline unsigned long get_pageblock_flags_group(struct page *page,
|
|
int start_bitidx, int end_bitidx)
|
|
{
|
|
unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
|
|
unsigned long mask = (1 << nr_flag_bits) - 1;
|
|
|
|
return get_pageblock_flags_mask(page, end_bitidx, mask);
|
|
}
|
|
|
|
static inline void set_pageblock_flags_group(struct page *page,
|
|
unsigned long flags,
|
|
int start_bitidx, int end_bitidx)
|
|
{
|
|
unsigned long nr_flag_bits = end_bitidx - start_bitidx + 1;
|
|
unsigned long mask = (1 << nr_flag_bits) - 1;
|
|
|
|
set_pageblock_flags_mask(page, flags, end_bitidx, mask);
|
|
}
|
|
|
|
#ifdef CONFIG_COMPACTION
|
|
#define get_pageblock_skip(page) \
|
|
get_pageblock_flags_group(page, PB_migrate_skip, \
|
|
PB_migrate_skip)
|
|
#define clear_pageblock_skip(page) \
|
|
set_pageblock_flags_group(page, 0, PB_migrate_skip, \
|
|
PB_migrate_skip)
|
|
#define set_pageblock_skip(page) \
|
|
set_pageblock_flags_group(page, 1, PB_migrate_skip, \
|
|
PB_migrate_skip)
|
|
#endif /* CONFIG_COMPACTION */
|
|
|
|
#define get_pageblock_flags(page) \
|
|
get_pageblock_flags_group(page, 0, PB_migrate_end)
|
|
#define set_pageblock_flags(page, flags) \
|
|
set_pageblock_flags_group(page, flags, \
|
|
0, PB_migrate_end)
|
|
|
|
#endif /* PAGEBLOCK_FLAGS_H */
|