linux_dsm_epyc7002/arch/sparc/mm/init_32.c

360 lines
8.9 KiB
C
Raw Normal View History

/*
* linux/arch/sparc/mm/init.c
*
* Copyright (C) 1995 David S. Miller (davem@caip.rutgers.edu)
* Copyright (C) 1995 Eddie C. Dost (ecd@skynet.be)
* Copyright (C) 1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
* Copyright (C) 2000 Anton Blanchard (anton@samba.org)
*/
#include <linux/module.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/initrd.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/bootmem.h>
#include <linux/pagemap.h>
#include <linux/poison.h>
include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo <tj@kernel.org> Guess-its-ok-by: Christoph Lameter <cl@linux-foundation.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Lee Schermerhorn <Lee.Schermerhorn@hp.com>
2010-03-24 15:04:11 +07:00
#include <linux/gfp.h>
#include <asm/sections.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/vaddrs.h>
#include <asm/pgalloc.h> /* bug in asm-generic/tlb.h: check_pgt_cache */
#include <asm/setup.h>
#include <asm/tlb.h>
#include <asm/prom.h>
#include <asm/leon.h>
#include "mm_32.h"
unsigned long *sparc_valid_addr_bitmap;
EXPORT_SYMBOL(sparc_valid_addr_bitmap);
unsigned long phys_base;
EXPORT_SYMBOL(phys_base);
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);
struct sparc_phys_banks sp_banks[SPARC_PHYS_BANKS+1];
/* Initial ramdisk setup */
extern unsigned int sparc_ramdisk_image;
extern unsigned int sparc_ramdisk_size;
unsigned long highstart_pfn, highend_pfn;
void show_mem(unsigned int filter)
{
printk("Mem-info:\n");
show_free_areas(filter);
printk("Free swap: %6ldkB\n",
swap: add per-partition lock for swapfile swap_lock is heavily contended when I test swap to 3 fast SSD (even slightly slower than swap to 2 such SSD). The main contention comes from swap_info_get(). This patch tries to fix the gap with adding a new per-partition lock. Global data like nr_swapfiles, total_swap_pages, least_priority and swap_list are still protected by swap_lock. nr_swap_pages is an atomic now, it can be changed without swap_lock. In theory, it's possible get_swap_page() finds no swap pages but actually there are free swap pages. But sounds not a big problem. Accessing partition specific data (like scan_swap_map and so on) is only protected by swap_info_struct.lock. Changing swap_info_struct.flags need hold swap_lock and swap_info_struct.lock, because scan_scan_map() will check it. read the flags is ok with either the locks hold. If both swap_lock and swap_info_struct.lock must be hold, we always hold the former first to avoid deadlock. swap_entry_free() can change swap_list. To delete that code, we add a new highest_priority_index. Whenever get_swap_page() is called, we check it. If it's valid, we use it. It's a pity get_swap_page() still holds swap_lock(). But in practice, swap_lock() isn't heavily contended in my test with this patch (or I can say there are other much more heavier bottlenecks like TLB flush). And BTW, looks get_swap_page() doesn't really need the lock. We never free swap_info[] and we check SWAP_WRITEOK flag. The only risk without the lock is we could swapout to some low priority swap, but we can quickly recover after several rounds of swap, so sounds not a big deal to me. But I'd prefer to fix this if it's a real problem. "swap: make each swap partition have one address_space" improved the swapout speed from 1.7G/s to 2G/s. This patch further improves the speed to 2.3G/s, so around 15% improvement. It's a multi-process test, so TLB flush isn't the biggest bottleneck before the patches. [arnd@arndb.de: fix it for nommu] [hughd@google.com: add missing unlock] [minchan@kernel.org: get rid of lockdep whinge on sys_swapon] Signed-off-by: Shaohua Li <shli@fusionio.com> Cc: Hugh Dickins <hughd@google.com> Cc: Rik van Riel <riel@redhat.com> Cc: Minchan Kim <minchan.kim@gmail.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Seth Jennings <sjenning@linux.vnet.ibm.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com> Cc: Dan Magenheimer <dan.magenheimer@oracle.com> Cc: Stephen Rothwell <sfr@canb.auug.org.au> Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Minchan Kim <minchan@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-02-23 07:34:38 +07:00
get_nr_swap_pages() << (PAGE_SHIFT-10));
printk("%ld pages of RAM\n", totalram_pages);
printk("%ld free pages\n", nr_free_pages());
}
unsigned long last_valid_pfn;
unsigned long calc_highpages(void)
{
int i;
int nr = 0;
for (i = 0; sp_banks[i].num_bytes != 0; i++) {
unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
if (end_pfn <= max_low_pfn)
continue;
if (start_pfn < max_low_pfn)
start_pfn = max_low_pfn;
nr += end_pfn - start_pfn;
}
return nr;
}
static unsigned long calc_max_low_pfn(void)
{
int i;
unsigned long tmp = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT);
unsigned long curr_pfn, last_pfn;
last_pfn = (sp_banks[0].base_addr + sp_banks[0].num_bytes) >> PAGE_SHIFT;
for (i = 1; sp_banks[i].num_bytes != 0; i++) {
curr_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
if (curr_pfn >= tmp) {
if (last_pfn < tmp)
tmp = last_pfn;
break;
}
last_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
}
return tmp;
}
unsigned long __init bootmem_init(unsigned long *pages_avail)
{
unsigned long bootmap_size, start_pfn;
unsigned long end_of_phys_memory = 0UL;
unsigned long bootmap_pfn, bytes_avail, size;
int i;
bytes_avail = 0UL;
for (i = 0; sp_banks[i].num_bytes != 0; i++) {
end_of_phys_memory = sp_banks[i].base_addr +
sp_banks[i].num_bytes;
bytes_avail += sp_banks[i].num_bytes;
if (cmdline_memory_size) {
if (bytes_avail > cmdline_memory_size) {
unsigned long slack = bytes_avail - cmdline_memory_size;
bytes_avail -= slack;
end_of_phys_memory -= slack;
sp_banks[i].num_bytes -= slack;
if (sp_banks[i].num_bytes == 0) {
sp_banks[i].base_addr = 0xdeadbeef;
} else {
sp_banks[i+1].num_bytes = 0;
sp_banks[i+1].base_addr = 0xdeadbeef;
}
break;
}
}
}
/* Start with page aligned address of last symbol in kernel
* image.
*/
start_pfn = (unsigned long)__pa(PAGE_ALIGN((unsigned long) &_end));
/* Now shift down to get the real physical page frame number. */
start_pfn >>= PAGE_SHIFT;
bootmap_pfn = start_pfn;
max_pfn = end_of_phys_memory >> PAGE_SHIFT;
max_low_pfn = max_pfn;
highstart_pfn = highend_pfn = max_pfn;
if (max_low_pfn > pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT)) {
highstart_pfn = pfn_base + (SRMMU_MAXMEM >> PAGE_SHIFT);
max_low_pfn = calc_max_low_pfn();
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
calc_highpages() >> (20 - PAGE_SHIFT));
}
#ifdef CONFIG_BLK_DEV_INITRD
/* Now have to check initial ramdisk, so that bootmap does not overwrite it */
if (sparc_ramdisk_image) {
if (sparc_ramdisk_image >= (unsigned long)&_end - 2 * PAGE_SIZE)
sparc_ramdisk_image -= KERNBASE;
initrd_start = sparc_ramdisk_image + phys_base;
initrd_end = initrd_start + sparc_ramdisk_size;
if (initrd_end > end_of_phys_memory) {
printk(KERN_CRIT "initrd extends beyond end of memory "
"(0x%016lx > 0x%016lx)\ndisabling initrd\n",
initrd_end, end_of_phys_memory);
initrd_start = 0;
}
if (initrd_start) {
if (initrd_start >= (start_pfn << PAGE_SHIFT) &&
initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE)
bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT;
}
}
#endif
/* Initialize the boot-time allocator. */
bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base,
max_low_pfn);
/* Now register the available physical memory with the
* allocator.
*/
*pages_avail = 0;
for (i = 0; sp_banks[i].num_bytes != 0; i++) {
unsigned long curr_pfn, last_pfn;
curr_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
if (curr_pfn >= max_low_pfn)
break;
last_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
if (last_pfn > max_low_pfn)
last_pfn = max_low_pfn;
/*
* .. finally, did all the rounding and playing
* around just make the area go away?
*/
if (last_pfn <= curr_pfn)
continue;
size = (last_pfn - curr_pfn) << PAGE_SHIFT;
*pages_avail += last_pfn - curr_pfn;
free_bootmem(sp_banks[i].base_addr, size);
}
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start) {
/* Reserve the initrd image area. */
size = initrd_end - initrd_start;
reserve_bootmem(initrd_start, size, BOOTMEM_DEFAULT);
*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
initrd_start = (initrd_start - phys_base) + PAGE_OFFSET;
initrd_end = (initrd_end - phys_base) + PAGE_OFFSET;
}
#endif
/* Reserve the kernel text/data/bss. */
size = (start_pfn << PAGE_SHIFT) - phys_base;
reserve_bootmem(phys_base, size, BOOTMEM_DEFAULT);
*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
/* Reserve the bootmem map. We do not account for it
* in pages_avail because we will release that memory
* in free_all_bootmem.
*/
size = bootmap_size;
reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size, BOOTMEM_DEFAULT);
*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
return max_pfn;
}
/*
* paging_init() sets up the page tables: We call the MMU specific
* init routine based upon the Sun model type on the Sparc.
*
*/
void __init paging_init(void)
{
srmmu_paging_init();
prom_build_devicetree();
of_fill_in_cpu_data();
device_scan();
}
static void __init taint_real_pages(void)
{
int i;
for (i = 0; sp_banks[i].num_bytes; i++) {
unsigned long start, end;
start = sp_banks[i].base_addr;
end = start + sp_banks[i].num_bytes;
while (start < end) {
set_bit(start >> 20, sparc_valid_addr_bitmap);
start += PAGE_SIZE;
}
}
}
static void map_high_region(unsigned long start_pfn, unsigned long end_pfn)
{
unsigned long tmp;
#ifdef CONFIG_DEBUG_HIGHMEM
printk("mapping high region %08lx - %08lx\n", start_pfn, end_pfn);
#endif
for (tmp = start_pfn; tmp < end_pfn; tmp++)
free_highmem_page(pfn_to_page(tmp));
}
void __init mem_init(void)
{
int i;
if (PKMAP_BASE+LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
prom_printf("BUG: fixmap and pkmap areas overlap\n");
prom_printf("pkbase: 0x%lx pkend: 0x%lx fixstart 0x%lx\n",
PKMAP_BASE,
(unsigned long)PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
FIXADDR_START);
prom_printf("Please mail sparclinux@vger.kernel.org.\n");
prom_halt();
}
/* Saves us work later. */
memset((void *)&empty_zero_page, 0, PAGE_SIZE);
i = last_valid_pfn >> ((20 - PAGE_SHIFT) + 5);
i += 1;
sparc_valid_addr_bitmap = (unsigned long *)
__alloc_bootmem(i << 2, SMP_CACHE_BYTES, 0UL);
if (sparc_valid_addr_bitmap == NULL) {
prom_printf("mem_init: Cannot alloc valid_addr_bitmap.\n");
prom_halt();
}
memset(sparc_valid_addr_bitmap, 0, i << 2);
taint_real_pages();
max_mapnr = last_valid_pfn - pfn_base;
high_memory = __va(max_low_pfn << PAGE_SHIFT);
mm: concentrate modification of totalram_pages into the mm core Concentrate code to modify totalram_pages into the mm core, so the arch memory initialized code doesn't need to take care of it. With these changes applied, only following functions from mm core modify global variable totalram_pages: free_bootmem_late(), free_all_bootmem(), free_all_bootmem_node(), adjust_managed_page_count(). With this patch applied, it will be much more easier for us to keep totalram_pages and zone->managed_pages in consistence. Signed-off-by: Jiang Liu <jiang.liu@huawei.com> Acked-by: David Howells <dhowells@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: "Michael S. Tsirkin" <mst@redhat.com> Cc: <sworddragon2@aol.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Ingo Molnar <mingo@redhat.com> Cc: Jeremy Fitzhardinge <jeremy@goop.org> Cc: Jianguo Wu <wujianguo@huawei.com> Cc: Joonsoo Kim <js1304@gmail.com> Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Michel Lespinasse <walken@google.com> Cc: Minchan Kim <minchan@kernel.org> Cc: Rik van Riel <riel@redhat.com> Cc: Rusty Russell <rusty@rustcorp.com.au> Cc: Tang Chen <tangchen@cn.fujitsu.com> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Wen Congyang <wency@cn.fujitsu.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Yinghai Lu <yinghai@kernel.org> Cc: Russell King <rmk@arm.linux.org.uk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2013-07-04 05:03:24 +07:00
free_all_bootmem();
for (i = 0; sp_banks[i].num_bytes != 0; i++) {
unsigned long start_pfn = sp_banks[i].base_addr >> PAGE_SHIFT;
unsigned long end_pfn = (sp_banks[i].base_addr + sp_banks[i].num_bytes) >> PAGE_SHIFT;
if (end_pfn <= highstart_pfn)
continue;
if (start_pfn < highstart_pfn)
start_pfn = highstart_pfn;
map_high_region(start_pfn, end_pfn);
}
mem_init_print_info(NULL);
}
void free_initmem (void)
{
free_initmem_default(POISON_FREE_INITMEM);
}
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM,
"initrd");
}
#endif
void sparc_flush_page_to_ram(struct page *page)
{
unsigned long vaddr = (unsigned long)page_address(page);
if (vaddr)
__flush_page_to_ram(vaddr);
}
EXPORT_SYMBOL(sparc_flush_page_to_ram);