mm: fix boundary checking in free_bootmem_core

With numa enabled, some callers could have a range of memory on one node
but try to free that on other node.  This can cause some pages to be
freed wrongly.

For example: when we try to allocate 128g boot ram early for
gart/swiotlb, and free that range later so gart/swiotlb can get some
range afterwards.

With this patch, we don't need to care which node holds the range, just
loop to call free_bootmem_node for all online nodes.

This patch makes free_bootmem_core() more robust by trimming the sidx
and eidx according the ram range that the node has.

And make the free_bootmem_core handle this out of range case.  We could
use bdata_list to make sure the range can be freed for sure.  So next
time, we don't need to loop online nodes and could use free_bootmem
directly.

Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: Andi Kleen <ak@suse.de>
Cc: Yasunori Goto <y-goto@jp.fujitsu.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Tested-by: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
Yinghai Lu 2008-03-24 12:29:45 -07:00 committed by Linus Torvalds
parent 2875fb65f8
commit 5a982cbc7b

View file

@ -125,6 +125,7 @@ static int __init reserve_bootmem_core(bootmem_data_t *bdata,
BUG_ON(!size); BUG_ON(!size);
BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn); BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn);
BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn); BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn);
BUG_ON(addr < bdata->node_boot_start);
sidx = PFN_DOWN(addr - bdata->node_boot_start); sidx = PFN_DOWN(addr - bdata->node_boot_start);
eidx = PFN_UP(addr + size - bdata->node_boot_start); eidx = PFN_UP(addr + size - bdata->node_boot_start);
@ -156,21 +157,31 @@ static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr,
unsigned long sidx, eidx; unsigned long sidx, eidx;
unsigned long i; unsigned long i;
BUG_ON(!size);
/* out range */
if (addr + size < bdata->node_boot_start ||
PFN_DOWN(addr) > bdata->node_low_pfn)
return;
/* /*
* round down end of usable mem, partially free pages are * round down end of usable mem, partially free pages are
* considered reserved. * considered reserved.
*/ */
BUG_ON(!size);
BUG_ON(PFN_DOWN(addr + size) > bdata->node_low_pfn);
if (addr < bdata->last_success) if (addr >= bdata->node_boot_start && addr < bdata->last_success)
bdata->last_success = addr; bdata->last_success = addr;
/* /*
* Round up the beginning of the address. * Round up to index to the range.
*/ */
sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start); if (PFN_UP(addr) > PFN_DOWN(bdata->node_boot_start))
sidx = PFN_UP(addr) - PFN_DOWN(bdata->node_boot_start);
else
sidx = 0;
eidx = PFN_DOWN(addr + size - bdata->node_boot_start); eidx = PFN_DOWN(addr + size - bdata->node_boot_start);
if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start))
eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start);
for (i = sidx; i < eidx; i++) { for (i = sidx; i < eidx; i++) {
if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map))) if (unlikely(!test_and_clear_bit(i, bdata->node_bootmem_map)))
@ -421,7 +432,9 @@ int __init reserve_bootmem(unsigned long addr, unsigned long size,
void __init free_bootmem(unsigned long addr, unsigned long size) void __init free_bootmem(unsigned long addr, unsigned long size)
{ {
free_bootmem_core(NODE_DATA(0)->bdata, addr, size); bootmem_data_t *bdata;
list_for_each_entry(bdata, &bdata_list, list)
free_bootmem_core(bdata, addr, size);
} }
unsigned long __init free_all_bootmem(void) unsigned long __init free_all_bootmem(void)