[IA64] Fix NUMA configuration issue

There is a NUMA memory configuration issue in 2.6.24:

A 2-node machine of ours has got the following memory layout:

Node 0:	0 - 2 Gbytes
Node 0:	4 - 8 Gbytes
Node 1:	8 - 16 Gbytes
Node 0:	16 - 18 Gbytes

"efi_memmap_init()" merges the three last ranges into one.

"register_active_ranges()" is called as follows:

efi_memmap_walk(register_active_ranges, NULL);

i.e. once for the 4 - 18 Gbytes range. It picks up the node
number from the start address, and registers all the memory for
the node #0.

"register_active_ranges()" should be called as follows to
make sure there is no merged address range at its entry:

efi_memmap_walk(filter_memory, register_active_ranges);

"filter_memory()" is similar to "filter_rsvd_memory()",
but the reserved memory ranges are not filtered out.

Signed-off-by: Zoltan Menyhart <Zoltan.Menyhart@bull.net>
Signed-off-by: Tony Luck <tony.luck@intel.com>
This commit is contained in:
Zoltan Menyhart 2008-04-11 15:21:35 -07:00 committed by Tony Luck
parent c19b2930df
commit 98075d245a
5 changed files with 29 additions and 7 deletions

View file

@ -176,6 +176,29 @@ filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
return 0;
}
/*
* Similar to "filter_rsvd_memory()", but the reserved memory ranges
* are not filtered out.
*/
int __init
filter_memory(unsigned long start, unsigned long end, void *arg)
{
void (*func)(unsigned long, unsigned long, int);
#if IGNORE_PFN0
if (start == PAGE_OFFSET) {
printk(KERN_WARNING "warning: skipping physical page 0\n");
start += PAGE_SIZE;
if (start >= end)
return 0;
}
#endif
func = arg;
if (start < end)
call_pernode_memory(__pa(start), end - start, func);
return 0;
}
static void __init
sort_regions (struct rsvd_region *rsvd_region, int max)
{

View file

@ -253,7 +253,7 @@ paging_init (void)
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_VIRTUAL_MEM_MAP
efi_memmap_walk(register_active_ranges, NULL);
efi_memmap_walk(filter_memory, register_active_ranges);
efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
if (max_gap < LARGE_GAP) {
vmem_map = (struct page *) 0;

View file

@ -444,7 +444,7 @@ void __init find_memory(void)
mem_data[node].min_pfn = ~0UL;
}
efi_memmap_walk(register_active_ranges, NULL);
efi_memmap_walk(filter_memory, register_active_ranges);
/*
* Initialize the boot memory maps in reverse order since that's

View file

@ -547,12 +547,10 @@ find_largest_hole (u64 start, u64 end, void *arg)
#endif /* CONFIG_VIRTUAL_MEM_MAP */
int __init
register_active_ranges(u64 start, u64 end, void *arg)
register_active_ranges(u64 start, u64 len, int nid)
{
int nid = paddr_to_nid(__pa(start));
u64 end = start + len;
if (nid < 0)
nid = 0;
#ifdef CONFIG_KEXEC
if (start > crashk_res.start && start < crashk_res.end)
start = crashk_res.end;

View file

@ -35,6 +35,7 @@ extern void find_memory (void);
extern void reserve_memory (void);
extern void find_initrd (void);
extern int filter_rsvd_memory (unsigned long start, unsigned long end, void *arg);
extern int filter_memory (unsigned long start, unsigned long end, void *arg);
extern unsigned long efi_memmap_init(unsigned long *s, unsigned long *e);
extern int find_max_min_low_pfn (unsigned long , unsigned long, void *);
@ -56,7 +57,7 @@ extern int reserve_elfcorehdr(unsigned long *start, unsigned long *end);
#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */
extern int register_active_ranges(u64 start, u64 end, void *arg);
extern int register_active_ranges(u64 start, u64 len, int nid);
#ifdef CONFIG_VIRTUAL_MEM_MAP
# define LARGE_GAP 0x40000000 /* Use virtual mem map if hole is > than this */