linux_dsm_epyc7002/arch/ia64/mm/numa.c

98 lines
2.6 KiB
C
Raw Normal View History

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* This file contains NUMA specific variables and functions which can
* be split away from DISCONTIGMEM and are used on NUMA machines with
* contiguous memory.
*
* 2002/08/07 Erich Focht <efocht@ess.nec.de>
*/
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/node.h>
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/module.h>
#include <asm/mmzone.h>
#include <asm/numa.h>
/*
* The following structures are usually initialized by ACPI or
* similar mechanisms and describe the NUMA characteristics of the machine.
*/
int num_node_memblks;
struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
struct node_cpuid_s node_cpuid[NR_CPUS] =
{ [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } };
/*
* This is a matrix with "distances" between nodes, they should be
* proportional to the memory access latency ratios.
*/
u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
/* Identify which cnode a physical address resides on */
int
paddr_to_nid(unsigned long paddr)
{
int i;
for (i = 0; i < num_node_memblks; i++)
if (paddr >= node_memblk[i].start_paddr &&
paddr < node_memblk[i].start_paddr + node_memblk[i].size)
break;
return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0);
}
#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA)
/*
* Because of holes evaluate on section limits.
* If the section of memory exists, then return the node where the section
* resides. Otherwise return node 0 as the default. This is used by
* SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
* the section resides.
*/
mm: clean up for early_pfn_to_nid() What's happening is that the assertion in mm/page_alloc.c:move_freepages() is triggering: BUG_ON(page_zone(start_page) != page_zone(end_page)); Once I knew this is what was happening, I added some annotations: if (unlikely(page_zone(start_page) != page_zone(end_page))) { printk(KERN_ERR "move_freepages: Bogus zones: " "start_page[%p] end_page[%p] zone[%p]\n", start_page, end_page, zone); printk(KERN_ERR "move_freepages: " "start_zone[%p] end_zone[%p]\n", page_zone(start_page), page_zone(end_page)); printk(KERN_ERR "move_freepages: " "start_pfn[0x%lx] end_pfn[0x%lx]\n", page_to_pfn(start_page), page_to_pfn(end_page)); printk(KERN_ERR "move_freepages: " "start_nid[%d] end_nid[%d]\n", page_to_nid(start_page), page_to_nid(end_page)); ... And here's what I got: move_freepages: Bogus zones: start_page[2207d0000] end_page[2207dffc0] zone[fffff8103effcb00] move_freepages: start_zone[fffff8103effcb00] end_zone[fffff8003fffeb00] move_freepages: start_pfn[0x81f600] end_pfn[0x81f7ff] move_freepages: start_nid[1] end_nid[0] My memory layout on this box is: [ 0.000000] Zone PFN ranges: [ 0.000000] Normal 0x00000000 -> 0x0081ff5d [ 0.000000] Movable zone start PFN for each node [ 0.000000] early_node_map[8] active PFN ranges [ 0.000000] 0: 0x00000000 -> 0x00020000 [ 0.000000] 1: 0x00800000 -> 0x0081f7ff [ 0.000000] 1: 0x0081f800 -> 0x0081fe50 [ 0.000000] 1: 0x0081fed1 -> 0x0081fed8 [ 0.000000] 1: 0x0081feda -> 0x0081fedb [ 0.000000] 1: 0x0081fedd -> 0x0081fee5 [ 0.000000] 1: 0x0081fee7 -> 0x0081ff51 [ 0.000000] 1: 0x0081ff59 -> 0x0081ff5d So it's a block move in that 0x81f600-->0x81f7ff region which triggers the problem. This patch: Declaration of early_pfn_to_nid() is scattered over per-arch include files, and it seems it's complicated to know when the declaration is used. I think it makes fix-for-memmap-init not easy. This patch moves all declaration to include/linux/mm.h After this, if !CONFIG_NODES_POPULATES_NODE_MAP && !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use static definition in include/linux/mm.h else if !CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID -> Use generic definition in mm/page_alloc.c else -> per-arch back end function will be called. Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Tested-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Reported-by: David Miller <davem@davemlloft.net> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: <stable@kernel.org> [2.6.25.x, 2.6.26.x, 2.6.27.x, 2.6.28.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2009-02-19 05:48:32 +07:00
int __meminit __early_pfn_to_nid(unsigned long pfn)
{
int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
for (i = 0; i < num_node_memblks; i++) {
ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT;
esec = (node_memblk[i].start_paddr + node_memblk[i].size +
((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT;
if (section >= ssec && section < esec)
return node_memblk[i].nid;
}
return -1;
}
void __cpuinit numa_clear_node(int cpu)
{
unmap_cpu_from_node(cpu, NUMA_NO_NODE);
}
#ifdef CONFIG_MEMORY_HOTPLUG
/*
* SRAT information is stored in node_memblk[], then we can use SRAT
* information at memory-hot-add if necessary.
*/
int memory_add_physaddr_to_nid(u64 addr)
{
int nid = paddr_to_nid(addr);
if (nid < 0)
return 0;
return nid;
}
EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
#endif
#endif