linux_dsm_epyc7002/include/linux/bootmem.h

180 lines
5.5 KiB
C
Raw Normal View History

/*
* Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999
*/
#ifndef _LINUX_BOOTMEM_H
#define _LINUX_BOOTMEM_H
#include <linux/mmzone.h>
#include <asm/dma.h>
/*
* simple boot-time physical memory area allocator.
*/
extern unsigned long max_low_pfn;
extern unsigned long min_low_pfn;
/*
* highest page
*/
extern unsigned long max_pfn;
#ifndef CONFIG_NO_BOOTMEM
/*
* node_bootmem_map is a map pointer - the bits represent all physical
* memory pages (including holes) on the node.
*/
typedef struct bootmem_data {
unsigned long node_min_pfn;
unsigned long node_low_pfn;
void *node_bootmem_map;
unsigned long last_end_off;
unsigned long hint_idx;
struct list_head list;
} bootmem_data_t;
extern bootmem_data_t bootmem_node_data[];
#endif
extern unsigned long bootmem_bootmap_pages(unsigned long);
extern unsigned long init_bootmem_node(pg_data_t *pgdat,
unsigned long freepfn,
unsigned long startpfn,
unsigned long endpfn);
extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
extern unsigned long free_all_bootmem(void);
mm: accurately calculate zone->managed_pages for highmem zones Commit "mm: introduce new field 'managed_pages' to struct zone" assumes that all highmem pages will be freed into the buddy system by function mem_init(). But that's not always true, some architectures may reserve some highmem pages during boot. For example PPC may allocate highmem pages for giagant HugeTLB pages, and several architectures have code to check PageReserved flag to exclude highmem pages allocated during boot when freeing highmem pages into the buddy system. So treat highmem pages in the same way as normal pages, that is to: 1) reset zone->managed_pages to zero in mem_init(). 2) recalculate managed_pages when freeing pages into the buddy system. Signed-off-by: Jiang Liu <jiang.liu@huawei.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Tejun Heo <tj@kernel.org> Cc: Joonsoo Kim <js1304@gmail.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Minchan Kim <minchan@kernel.org> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: <sworddragon2@aol.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: David Howells <dhowells@redhat.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Jianguo Wu <wujianguo@huawei.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Michel Lespinasse <walken@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Tang Chen <tangchen@cn.fujitsu.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wen Congyang <wency@cn.fujitsu.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Russell King <rmk@arm.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-07-04 05:03:11 +07:00
extern void reset_all_zones_managed_pages(void);
extern void free_bootmem_node(pg_data_t *pgdat,
unsigned long addr,
unsigned long size);
extern void free_bootmem(unsigned long physaddr, unsigned long size);
extern void free_bootmem_late(unsigned long physaddr, unsigned long size);
memory-hotplug: common APIs to support page tables hot-remove When memory is removed, the corresponding pagetables should alse be removed. This patch introduces some common APIs to support vmemmap pagetable and x86_64 architecture direct mapping pagetable removing. All pages of virtual mapping in removed memory cannot be freed if some pages used as PGD/PUD include not only removed memory but also other memory. So this patch uses the following way to check whether a page can be freed or not. 1) When removing memory, the page structs of the removed memory are filled with 0FD. 2) All page structs are filled with 0xFD on PT/PMD, PT/PMD can be cleared. In this case, the page used as PT/PMD can be freed. For direct mapping pages, update direct_pages_count[level] when we freed their pagetables. And do not free the pages again because they were freed when offlining. For vmemmap pages, free the pages and their pagetables. For larger pages, do not split them into smaller ones because there is no way to know if the larger page has been split. As a result, there is no way to decide when to split. We deal the larger pages in the following way: 1) For direct mapped pages, all the pages were freed when they were offlined. And since menmory offline is done section by section, all the memory ranges being removed are aligned to PAGE_SIZE. So only need to deal with unaligned pages when freeing vmemmap pages. 2) For vmemmap pages being used to store page_struct, if part of the larger page is still in use, just fill the unused part with 0xFD. And when the whole page is fulfilled with 0xFD, then free the larger page. [akpm@linux-foundation.org: fix typo in comment] [tangchen@cn.fujitsu.com: do not calculate direct mapping pages when freeing vmemmap pagetables] [tangchen@cn.fujitsu.com: do not free direct mapping pages twice] [tangchen@cn.fujitsu.com: do not free page split from hugepage one by one] [tangchen@cn.fujitsu.com: do not split pages when freeing pagetable pages] [akpm@linux-foundation.org: use pmd_page_vaddr()] [akpm@linux-foundation.org: fix used-uninitialised bug] Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Signed-off-by: Jianguo Wu <wujianguo@huawei.com> Signed-off-by: Wen Congyang <wency@cn.fujitsu.com> Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com> Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> Cc: Jiang Liu <jiang.liu@huawei.com> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Cc: Wu Jianguo <wujianguo@huawei.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-23 07:33:04 +07:00
extern void __free_pages_bootmem(struct page *page, unsigned int order);
/*
* Flags for reserve_bootmem (also if CONFIG_HAVE_ARCH_BOOTMEM_NODE,
* the architecture-specific code should honor this).
*
* If flags is 0, then the return value is always 0 (success). If
* flags contains BOOTMEM_EXCLUSIVE, then -EBUSY is returned if the
* memory already was reserved.
*/
#define BOOTMEM_DEFAULT 0
#define BOOTMEM_EXCLUSIVE (1<<0)
extern int reserve_bootmem(unsigned long addr,
unsigned long size,
int flags);
extern int reserve_bootmem_node(pg_data_t *pgdat,
unsigned long physaddr,
unsigned long size,
int flags);
extern void *__alloc_bootmem(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_nopanic(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_node(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
void *__alloc_bootmem_node_high(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
void *___alloc_bootmem_node_nopanic(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal,
unsigned long limit);
extern void *__alloc_bootmem_low(unsigned long size,
unsigned long align,
unsigned long goal);
void *__alloc_bootmem_low_nopanic(unsigned long size,
unsigned long align,
unsigned long goal);
extern void *__alloc_bootmem_low_node(pg_data_t *pgdat,
unsigned long size,
unsigned long align,
unsigned long goal);
#ifdef CONFIG_NO_BOOTMEM
/* We are using top down, so it is safe to use 0 here */
#define BOOTMEM_LOW_LIMIT 0
#else
#define BOOTMEM_LOW_LIMIT __pa(MAX_DMA_ADDRESS)
#endif
#define alloc_bootmem(x) \
__alloc_bootmem(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_align(x, align) \
__alloc_bootmem(x, align, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_nopanic(x) \
__alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages(x) \
__alloc_bootmem(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages_nopanic(x) \
__alloc_bootmem_nopanic(x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
mm: use alloc_bootmem_node_nopanic() on really needed path Stefan found nobootmem does not work on his system that has only 8M of RAM. This causes an early panic: BIOS-provided physical RAM map: BIOS-88: 0000000000000000 - 000000000009f000 (usable) BIOS-88: 0000000000100000 - 0000000000840000 (usable) bootconsole [earlyser0] enabled Notice: NX (Execute Disable) protection missing in CPU or disabled in BIOS! DMI not present or invalid. last_pfn = 0x840 max_arch_pfn = 0x100000 init_memory_mapping: 0000000000000000-0000000000840000 8MB LOWMEM available. mapped low ram: 0 - 00840000 low ram: 0 - 00840000 Zone PFN ranges: DMA 0x00000001 -> 0x00001000 Normal empty Movable zone start PFN for each node early_node_map[2] active PFN ranges 0: 0x00000001 -> 0x0000009f 0: 0x00000100 -> 0x00000840 BUG: Int 6: CR2 (null) EDI c034663c ESI (null) EBP c0329f38 ESP c0329ef4 EBX c0346380 EDX 00000006 ECX ffffffff EAX fffffff4 err (null) EIP c0353191 CS c0320060 flg 00010082 Stack: (null) c030c533 000007cd (null) c030c533 00000001 (null) (null) 00000003 0000083f 00000018 00000002 00000002 c0329f6c c03534d6 (null) (null) 00000100 00000840 (null) c0329f64 00000001 00001000 (null) Pid: 0, comm: swapper Not tainted 2.6.36 #5 Call Trace: [<c02e3707>] ? 0xc02e3707 [<c035e6e5>] 0xc035e6e5 [<c0353191>] ? 0xc0353191 [<c03534d6>] 0xc03534d6 [<c034f1cd>] 0xc034f1cd [<c034a824>] 0xc034a824 [<c03513cb>] ? 0xc03513cb [<c0349432>] 0xc0349432 [<c0349066>] 0xc0349066 It turns out that we should ignore the low limit of 16M. Use alloc_bootmem_node_nopanic() in this case. [akpm@linux-foundation.org: less mess] Signed-off-by: Yinghai LU <yinghai@kernel.org> Reported-by: Stefan Hellermann <stefan@the2masters.de> Tested-by: Stefan Hellermann <stefan@the2masters.de> Cc: Ingo Molnar <mingo@elte.hu> Cc: "H. Peter Anvin" <hpa@linux.intel.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: <stable@kernel.org> [2.6.34+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2011-05-12 05:13:32 +07:00
#define alloc_bootmem_node_nopanic(pgdat, x) \
__alloc_bootmem_node_nopanic(pgdat, x, SMP_CACHE_BYTES, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages_node(pgdat, x) \
__alloc_bootmem_node(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_pages_node_nopanic(pgdat, x) \
__alloc_bootmem_node_nopanic(pgdat, x, PAGE_SIZE, BOOTMEM_LOW_LIMIT)
#define alloc_bootmem_low(x) \
__alloc_bootmem_low(x, SMP_CACHE_BYTES, 0)
#define alloc_bootmem_low_pages_nopanic(x) \
__alloc_bootmem_low_nopanic(x, PAGE_SIZE, 0)
#define alloc_bootmem_low_pages(x) \
__alloc_bootmem_low(x, PAGE_SIZE, 0)
#define alloc_bootmem_low_pages_node(pgdat, x) \
__alloc_bootmem_low_node(pgdat, x, PAGE_SIZE, 0)
#ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
extern void *alloc_remap(int nid, unsigned long size);
#else
static inline void *alloc_remap(int nid, unsigned long size)
{
return NULL;
}
#endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
extern void *alloc_large_system_hash(const char *tablename,
unsigned long bucketsize,
unsigned long numentries,
int scale,
int flags,
unsigned int *_hash_shift,
unsigned int *_hash_mask,
unsigned long low_limit,
unsigned long high_limit);
#define HASH_EARLY 0x00000001 /* Allocating during early boot? */
#define HASH_SMALL 0x00000002 /* sub-page allocation allowed, min
* shift passed via *_hash_shift */
/* Only NUMA needs hash distribution. 64bit NUMA architectures have
* sufficient vmalloc space.
*/
#if defined(CONFIG_NUMA) && defined(CONFIG_64BIT)
#define HASHDIST_DEFAULT 1
#else
#define HASHDIST_DEFAULT 0
#endif
extern int hashdist; /* Distribute hashes across NUMA nodes? */
#endif /* _LINUX_BOOTMEM_H */