Merge branch 'x86-mm' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into x86/mm

This commit is contained in:
Ingo Molnar 2011-04-04 16:39:20 +02:00
commit 9402efaa96
10 changed files with 70 additions and 203 deletions

View File

@ -1223,6 +1223,10 @@ config HAVE_ARCH_BOOTMEM
def_bool y
depends on X86_32 && NUMA
config HAVE_ARCH_ALLOC_REMAP
def_bool y
depends on X86_32 && NUMA
config ARCH_HAVE_MEMORY_PRESENT
def_bool y
depends on X86_32 && DISCONTIGMEM
@ -1231,13 +1235,9 @@ config NEED_NODE_MEMMAP_SIZE
def_bool y
depends on X86_32 && (DISCONTIGMEM || SPARSEMEM)
config HAVE_ARCH_ALLOC_REMAP
def_bool y
depends on X86_32 && NUMA
config ARCH_FLATMEM_ENABLE
def_bool y
depends on X86_32 && ARCH_SELECT_MEMORY_MODEL && !NUMA
depends on X86_32 && !NUMA
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
@ -1247,20 +1247,16 @@ config ARCH_DISCONTIGMEM_DEFAULT
def_bool y
depends on NUMA && X86_32
config ARCH_PROC_KCORE_TEXT
def_bool y
depends on X86_64 && PROC_KCORE
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on X86_64
config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on X86_64 || NUMA || (EXPERIMENTAL && X86_32) || X86_32_NON_STANDARD
select SPARSEMEM_STATIC if X86_32
select SPARSEMEM_VMEMMAP_ENABLE if X86_64
config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on X86_64
config ARCH_SELECT_MEMORY_MODEL
def_bool y
depends on ARCH_SPARSEMEM_ENABLE
@ -1269,6 +1265,10 @@ config ARCH_MEMORY_PROBE
def_bool X86_64
depends on MEMORY_HOTPLUG
config ARCH_PROC_KCORE_TEXT
def_bool y
depends on X86_64 && PROC_KCORE
config ILLEGAL_POINTER_VALUE
hex
default 0 if X86_32
@ -1703,10 +1703,6 @@ config ARCH_ENABLE_MEMORY_HOTREMOVE
def_bool y
depends on MEMORY_HOTPLUG
config HAVE_ARCH_EARLY_PFN_TO_NID
def_bool X86_64
depends on NUMA
config USE_PERCPU_NUMA_NODE_ID
def_bool y
depends on NUMA

View File

@ -207,8 +207,7 @@ extern const char * const x86_power_flags[32];
#define test_cpu_cap(c, bit) \
test_bit(bit, (unsigned long *)((c)->x86_capability))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && \
#define REQUIRED_MASK_BIT_SET(bit) \
( (((bit)>>5)==0 && (1UL<<((bit)&31) & REQUIRED_MASK0)) || \
(((bit)>>5)==1 && (1UL<<((bit)&31) & REQUIRED_MASK1)) || \
(((bit)>>5)==2 && (1UL<<((bit)&31) & REQUIRED_MASK2)) || \
@ -218,10 +217,16 @@ extern const char * const x86_power_flags[32];
(((bit)>>5)==6 && (1UL<<((bit)&31) & REQUIRED_MASK6)) || \
(((bit)>>5)==7 && (1UL<<((bit)&31) & REQUIRED_MASK7)) || \
(((bit)>>5)==8 && (1UL<<((bit)&31) & REQUIRED_MASK8)) || \
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) ) \
? 1 : \
(((bit)>>5)==9 && (1UL<<((bit)&31) & REQUIRED_MASK9)) )
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
test_cpu_cap(c, bit))
#define this_cpu_has(bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability))
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))

View File

@ -4,36 +4,13 @@
#ifndef _ASM_X86_MMZONE_64_H
#define _ASM_X86_MMZONE_64_H
#ifdef CONFIG_NUMA
#include <linux/mmdebug.h>
#include <asm/smp.h>
/* Simple perfect hash to map physical addresses to node numbers */
struct memnode {
int shift;
unsigned int mapsize;
s16 *map;
s16 embedded_map[64 - 8];
} ____cacheline_aligned; /* total size = 128 bytes */
extern struct memnode memnode;
#define memnode_shift memnode.shift
#define memnodemap memnode.map
#define memnodemapsize memnode.mapsize
extern struct pglist_data *node_data[];
static inline __attribute__((pure)) int phys_to_nid(unsigned long addr)
{
unsigned nid;
VIRTUAL_BUG_ON(!memnodemap);
nid = memnodemap[addr >> memnode_shift];
VIRTUAL_BUG_ON(nid >= MAX_NUMNODES || !node_data[nid]);
return nid;
}
#define NODE_DATA(nid) (node_data[nid])
#define node_start_pfn(nid) (NODE_DATA(nid)->node_start_pfn)

View File

@ -542,6 +542,33 @@ do { \
old__; \
})
static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr,
const unsigned long __percpu *addr)
{
unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG;
return ((1UL << (nr % BITS_PER_LONG)) & percpu_read(*a)) != 0;
}
static inline int x86_this_cpu_variable_test_bit(int nr,
const unsigned long __percpu *addr)
{
int oldbit;
asm volatile("bt "__percpu_arg(2)",%1\n\t"
"sbb %0,%0"
: "=r" (oldbit)
: "m" (*(unsigned long *)addr), "Ir" (nr));
return oldbit;
}
#define x86_this_cpu_test_bit(nr, addr) \
(__builtin_constant_p((nr)) \
? x86_this_cpu_constant_test_bit((nr), (addr)) \
: x86_this_cpu_variable_test_bit((nr), (addr)))
#include <asm-generic/percpu.h>
/* We can use this directly for local CPU (faster). */

View File

@ -505,7 +505,7 @@ static void __cpuinit setup_APIC_timer(void)
{
struct clock_event_device *levt = &__get_cpu_var(lapic_events);
if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_ARAT)) {
if (this_cpu_has(X86_FEATURE_ARAT)) {
lapic_clockevent.features &= ~CLOCK_EVT_FEAT_C3STOP;
/* Make LAPIC timer preferrable over percpu HPET */
lapic_clockevent.rating = 150;

View File

@ -355,7 +355,6 @@ static void notify_thresholds(__u64 msr_val)
static void intel_thermal_interrupt(void)
{
__u64 msr_val;
struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
@ -367,19 +366,19 @@ static void intel_thermal_interrupt(void)
CORE_LEVEL) != 0)
mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
if (cpu_has(c, X86_FEATURE_PLN))
if (this_cpu_has(X86_FEATURE_PLN))
if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,
CORE_LEVEL) != 0)
mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
if (cpu_has(c, X86_FEATURE_PTS)) {
if (this_cpu_has(X86_FEATURE_PTS)) {
rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
THERMAL_THROTTLING_EVENT,
PACKAGE_LEVEL) != 0)
mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
if (cpu_has(c, X86_FEATURE_PLN))
if (this_cpu_has(X86_FEATURE_PLN))
if (therm_throt_process(msr_val &
PACKAGE_THERM_STATUS_POWER_LIMIT,
POWER_LIMIT_EVENT,

View File

@ -449,7 +449,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
if (!need_resched()) {
if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
__monitor((void *)&current_thread_info()->flags, 0, 0);
@ -465,7 +465,7 @@ static void mwait_idle(void)
if (!need_resched()) {
trace_power_start(POWER_CSTATE, 1, smp_processor_id());
trace_cpu_idle(1, smp_processor_id());
if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR))
if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
__monitor((void *)&current_thread_info()->flags, 0, 0);

View File

@ -1332,9 +1332,9 @@ static inline void mwait_play_dead(void)
void *mwait_ptr;
struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info);
if (!(cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)))
if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))
return;
if (!cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLSH))
if (!this_cpu_has(X86_FEATURE_CLFLSH))
return;
if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF)
return;

View File

@ -28,125 +28,10 @@ EXPORT_SYMBOL(node_data);
nodemask_t numa_nodes_parsed __initdata;
struct memnode memnode;
static unsigned long __initdata nodemap_addr;
static unsigned long __initdata nodemap_size;
static struct numa_meminfo numa_meminfo __initdata;
static int numa_distance_cnt;
static u8 *numa_distance;
/*
* Given a shift value, try to populate memnodemap[]
* Returns :
* 1 if OK
* 0 if memnodmap[] too small (of shift too small)
* -1 if node overlap or lost ram (shift too big)
*/
static int __init populate_memnodemap(const struct numa_meminfo *mi, int shift)
{
unsigned long addr, end;
int i, res = -1;
memset(memnodemap, 0xff, sizeof(s16)*memnodemapsize);
for (i = 0; i < mi->nr_blks; i++) {
addr = mi->blk[i].start;
end = mi->blk[i].end;
if (addr >= end)
continue;
if ((end >> shift) >= memnodemapsize)
return 0;
do {
if (memnodemap[addr >> shift] != NUMA_NO_NODE)
return -1;
memnodemap[addr >> shift] = mi->blk[i].nid;
addr += (1UL << shift);
} while (addr < end);
res = 1;
}
return res;
}
static int __init allocate_cachealigned_memnodemap(void)
{
unsigned long addr;
memnodemap = memnode.embedded_map;
if (memnodemapsize <= ARRAY_SIZE(memnode.embedded_map))
return 0;
addr = 0x8000;
nodemap_size = roundup(sizeof(s16) * memnodemapsize, L1_CACHE_BYTES);
nodemap_addr = memblock_find_in_range(addr, get_max_mapped(),
nodemap_size, L1_CACHE_BYTES);
if (nodemap_addr == MEMBLOCK_ERROR) {
printk(KERN_ERR
"NUMA: Unable to allocate Memory to Node hash map\n");
nodemap_addr = nodemap_size = 0;
return -1;
}
memnodemap = phys_to_virt(nodemap_addr);
memblock_x86_reserve_range(nodemap_addr, nodemap_addr + nodemap_size, "MEMNODEMAP");
printk(KERN_DEBUG "NUMA: Allocated memnodemap from %lx - %lx\n",
nodemap_addr, nodemap_addr + nodemap_size);
return 0;
}
/*
* The LSB of all start and end addresses in the node map is the value of the
* maximum possible shift.
*/
static int __init extract_lsb_from_nodes(const struct numa_meminfo *mi)
{
int i, nodes_used = 0;
unsigned long start, end;
unsigned long bitfield = 0, memtop = 0;
for (i = 0; i < mi->nr_blks; i++) {
start = mi->blk[i].start;
end = mi->blk[i].end;
if (start >= end)
continue;
bitfield |= start;
nodes_used++;
if (end > memtop)
memtop = end;
}
if (nodes_used <= 1)
i = 63;
else
i = find_first_bit(&bitfield, sizeof(unsigned long)*8);
memnodemapsize = (memtop >> i)+1;
return i;
}
static int __init compute_hash_shift(const struct numa_meminfo *mi)
{
int shift;
shift = extract_lsb_from_nodes(mi);
if (allocate_cachealigned_memnodemap())
return -1;
printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n",
shift);
if (populate_memnodemap(mi, shift) != 1) {
printk(KERN_INFO "Your memory is not aligned you need to "
"rebuild your kernel with a bigger NODEMAPSIZE "
"shift=%d\n", shift);
return -1;
}
return shift;
}
int __meminit __early_pfn_to_nid(unsigned long pfn)
{
return phys_to_nid(pfn << PAGE_SHIFT);
}
static void * __init early_node_mem(int nodeid, unsigned long start,
unsigned long end, unsigned long size,
unsigned long align)
@ -270,7 +155,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end)
memblock_x86_reserve_range(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA");
printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys,
nodedata_phys + pgdat_size - 1);
nid = phys_to_nid(nodedata_phys);
nid = early_pfn_to_nid(nodedata_phys >> PAGE_SHIFT);
if (nid != nodeid)
printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid);
@ -527,12 +412,6 @@ static int __init numa_register_memblks(struct numa_meminfo *mi)
if (WARN_ON(nodes_empty(node_possible_map)))
return -EINVAL;
memnode_shift = compute_hash_shift(mi);
if (memnode_shift < 0) {
printk(KERN_ERR "NUMA: No NUMA node hash function found. Contact maintainer\n");
return -EINVAL;
}
for (i = 0; i < mi->nr_blks; i++)
memblock_x86_register_active_regions(mi->blk[i].nid,
mi->blk[i].start >> PAGE_SHIFT,
@ -626,17 +505,13 @@ static int __init numa_init(int (*init_func)(void))
void __init initmem_init(void)
{
int ret;
if (!numa_off) {
#ifdef CONFIG_ACPI_NUMA
ret = numa_init(x86_acpi_numa_init);
if (!ret)
if (!numa_init(x86_acpi_numa_init))
return;
#endif
#ifdef CONFIG_AMD_NUMA
ret = numa_init(amd_numa_init);
if (!ret)
if (!numa_init(amd_numa_init))
return;
#endif
}

View File

@ -710,20 +710,14 @@ static int acpi_processor_get_throttling_fadt(struct acpi_processor *pr)
}
#ifdef CONFIG_X86
static int acpi_throttling_rdmsr(struct acpi_processor *pr,
u64 *value)
static int acpi_throttling_rdmsr(u64 *value)
{
struct cpuinfo_x86 *c;
u64 msr_high, msr_low;
unsigned int cpu;
u64 msr = 0;
int ret = -1;
cpu = pr->id;
c = &cpu_data(cpu);
if ((c->x86_vendor != X86_VENDOR_INTEL) ||
!cpu_has(c, X86_FEATURE_ACPI)) {
if ((this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_INTEL) ||
!this_cpu_has(X86_FEATURE_ACPI)) {
printk(KERN_ERR PREFIX
"HARDWARE addr space,NOT supported yet\n");
} else {
@ -738,18 +732,13 @@ static int acpi_throttling_rdmsr(struct acpi_processor *pr,
return ret;
}
static int acpi_throttling_wrmsr(struct acpi_processor *pr, u64 value)
static int acpi_throttling_wrmsr(u64 value)
{
struct cpuinfo_x86 *c;
unsigned int cpu;
int ret = -1;
u64 msr;
cpu = pr->id;
c = &cpu_data(cpu);
if ((c->x86_vendor != X86_VENDOR_INTEL) ||
!cpu_has(c, X86_FEATURE_ACPI)) {
if ((this_cpu_read(cpu_info.x86_vendor) != X86_VENDOR_INTEL) ||
!this_cpu_has(X86_FEATURE_ACPI)) {
printk(KERN_ERR PREFIX
"HARDWARE addr space,NOT supported yet\n");
} else {
@ -761,15 +750,14 @@ static int acpi_throttling_wrmsr(struct acpi_processor *pr, u64 value)
return ret;
}
#else
static int acpi_throttling_rdmsr(struct acpi_processor *pr,
u64 *value)
static int acpi_throttling_rdmsr(u64 *value)
{
printk(KERN_ERR PREFIX
"HARDWARE addr space,NOT supported yet\n");
return -1;
}
static int acpi_throttling_wrmsr(struct acpi_processor *pr, u64 value)
static int acpi_throttling_wrmsr(u64 value)
{
printk(KERN_ERR PREFIX
"HARDWARE addr space,NOT supported yet\n");
@ -801,7 +789,7 @@ static int acpi_read_throttling_status(struct acpi_processor *pr,
ret = 0;
break;
case ACPI_ADR_SPACE_FIXED_HARDWARE:
ret = acpi_throttling_rdmsr(pr, value);
ret = acpi_throttling_rdmsr(value);
break;
default:
printk(KERN_ERR PREFIX "Unknown addr space %d\n",
@ -834,7 +822,7 @@ static int acpi_write_throttling_state(struct acpi_processor *pr,
ret = 0;
break;
case ACPI_ADR_SPACE_FIXED_HARDWARE:
ret = acpi_throttling_wrmsr(pr, value);
ret = acpi_throttling_wrmsr(value);
break;
default:
printk(KERN_ERR PREFIX "Unknown addr space %d\n",