mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-25 03:50:53 +07:00
[SPARC64]: Eliminate NR_CPUS limitations.
Cheetah systems can have cpuids as large as 1023, although physical systems don't have that many cpus. Only three limitations existed in the kernel preventing arbitrary NR_CPUS values: 1) dcache dirty cpu state stored in page->flags on D-cache aliasing platforms. With some build time calculations and some build-time BUG checks on page->flags layout, this one was easily solved. 2) The cheetah XCALL delivery code could only handle a cpumask with up to 32 cpus set. Some simple looping logic clears that up too. 3) thread_info->cpu was a u8, easily changed to a u16. There are a few spots in the kernel that still put NR_CPUS sized arrays on the kernel stack, but that's not a sparc64 specific problem. Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
parent
5cbc307373
commit
22adb358e8
@ -147,10 +147,10 @@ config SMP
|
||||
If you don't know what to do here, say N.
|
||||
|
||||
config NR_CPUS
|
||||
int "Maximum number of CPUs (2-64)"
|
||||
range 2 64
|
||||
int "Maximum number of CPUs (2-1024)"
|
||||
range 2 1024
|
||||
depends on SMP
|
||||
default "32"
|
||||
default "64"
|
||||
|
||||
source "drivers/cpufreq/Kconfig"
|
||||
|
||||
|
@ -523,7 +523,7 @@ tlb_fixup_done:
|
||||
#else
|
||||
mov 0, %o0
|
||||
#endif
|
||||
stb %o0, [%g6 + TI_CPU]
|
||||
sth %o0, [%g6 + TI_CPU]
|
||||
|
||||
/* Off we go.... */
|
||||
call start_kernel
|
||||
|
@ -400,7 +400,7 @@ static __inline__ void spitfire_xcall_deliver(u64 data0, u64 data1, u64 data2, c
|
||||
static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mask)
|
||||
{
|
||||
u64 pstate, ver;
|
||||
int nack_busy_id, is_jbus;
|
||||
int nack_busy_id, is_jbus, need_more;
|
||||
|
||||
if (cpus_empty(mask))
|
||||
return;
|
||||
@ -416,6 +416,7 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
|
||||
__asm__ __volatile__("rdpr %%pstate, %0" : "=r" (pstate));
|
||||
|
||||
retry:
|
||||
need_more = 0;
|
||||
__asm__ __volatile__("wrpr %0, %1, %%pstate\n\t"
|
||||
: : "r" (pstate), "i" (PSTATE_IE));
|
||||
|
||||
@ -444,6 +445,10 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
|
||||
: /* no outputs */
|
||||
: "r" (target), "i" (ASI_INTR_W));
|
||||
nack_busy_id++;
|
||||
if (nack_busy_id == 32) {
|
||||
need_more = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -460,6 +465,16 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
|
||||
if (dispatch_stat == 0UL) {
|
||||
__asm__ __volatile__("wrpr %0, 0x0, %%pstate"
|
||||
: : "r" (pstate));
|
||||
if (unlikely(need_more)) {
|
||||
int i, cnt = 0;
|
||||
for_each_cpu_mask(i, mask) {
|
||||
cpu_clear(i, mask);
|
||||
cnt++;
|
||||
if (cnt == 32)
|
||||
break;
|
||||
}
|
||||
goto retry;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!--stuck)
|
||||
@ -497,6 +512,8 @@ static void cheetah_xcall_deliver(u64 data0, u64 data1, u64 data2, cpumask_t mas
|
||||
if ((dispatch_stat & check_mask) == 0)
|
||||
cpu_clear(i, mask);
|
||||
this_busy_nack += 2;
|
||||
if (this_busy_nack == 64)
|
||||
break;
|
||||
}
|
||||
|
||||
goto retry;
|
||||
|
@ -191,12 +191,9 @@ inline void flush_dcache_page_impl(struct page *page)
|
||||
}
|
||||
|
||||
#define PG_dcache_dirty PG_arch_1
|
||||
#define PG_dcache_cpu_shift 24UL
|
||||
#define PG_dcache_cpu_mask (256UL - 1UL)
|
||||
|
||||
#if NR_CPUS > 256
|
||||
#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus
|
||||
#endif
|
||||
#define PG_dcache_cpu_shift 32UL
|
||||
#define PG_dcache_cpu_mask \
|
||||
((1UL<<ilog2(roundup_pow_of_two(NR_CPUS)))-1UL)
|
||||
|
||||
#define dcache_dirty_cpu(page) \
|
||||
(((page)->flags >> PG_dcache_cpu_shift) & PG_dcache_cpu_mask)
|
||||
@ -1349,6 +1346,19 @@ void __init paging_init(void)
|
||||
unsigned long end_pfn, pages_avail, shift, phys_base;
|
||||
unsigned long real_end, i;
|
||||
|
||||
/* These build time checkes make sure that the dcache_dirty_cpu()
|
||||
* page->flags usage will work.
|
||||
*
|
||||
* When a page gets marked as dcache-dirty, we store the
|
||||
* cpu number starting at bit 32 in the page->flags. Also,
|
||||
* functions like clear_dcache_dirty_cpu use the cpu mask
|
||||
* in 13-bit signed-immediate instruction fields.
|
||||
*/
|
||||
BUILD_BUG_ON(FLAGS_RESERVED != 32);
|
||||
BUILD_BUG_ON(SECTIONS_WIDTH + NODES_WIDTH + ZONES_WIDTH +
|
||||
ilog2(roundup_pow_of_two(NR_CPUS)) > FLAGS_RESERVED);
|
||||
BUILD_BUG_ON(NR_CPUS > 4096);
|
||||
|
||||
kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
|
||||
kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
|
||||
|
||||
|
@ -202,7 +202,7 @@ extern struct sun4v_2insn_patch_entry __sun4v_2insn_patch,
|
||||
* the calculations done by the macro mid-stream.
|
||||
*/
|
||||
#define LOAD_PER_CPU_BASE(DEST, THR, REG1, REG2, REG3) \
|
||||
ldub [THR + TI_CPU], REG1; \
|
||||
lduh [THR + TI_CPU], REG1; \
|
||||
sethi %hi(__per_cpu_shift), REG3; \
|
||||
sethi %hi(__per_cpu_base), REG2; \
|
||||
ldx [REG3 + %lo(__per_cpu_shift)], REG3; \
|
||||
|
@ -38,8 +38,8 @@ struct thread_info {
|
||||
/* D$ line 1 */
|
||||
struct task_struct *task;
|
||||
unsigned long flags;
|
||||
__u8 cpu;
|
||||
__u8 fpsaved[7];
|
||||
__u8 pad;
|
||||
unsigned long ksp;
|
||||
|
||||
/* D$ line 2 */
|
||||
@ -49,7 +49,7 @@ struct thread_info {
|
||||
int preempt_count; /* 0 => preemptable, <0 => BUG */
|
||||
__u8 new_child;
|
||||
__u8 syscall_noerror;
|
||||
__u16 __pad;
|
||||
__u16 cpu;
|
||||
|
||||
unsigned long *utraps;
|
||||
|
||||
@ -83,8 +83,7 @@ struct thread_info {
|
||||
#define TI_CURRENT_DS (TI_FLAGS + TI_FLAG_BYTE_CURRENT_DS)
|
||||
#define TI_FPDEPTH (TI_FLAGS + TI_FLAG_BYTE_FPDEPTH)
|
||||
#define TI_WSAVED (TI_FLAGS + TI_FLAG_BYTE_WSAVED)
|
||||
#define TI_CPU 0x00000010
|
||||
#define TI_FPSAVED 0x00000011
|
||||
#define TI_FPSAVED 0x00000010
|
||||
#define TI_KSP 0x00000018
|
||||
#define TI_FAULT_ADDR 0x00000020
|
||||
#define TI_KREGS 0x00000028
|
||||
@ -92,6 +91,7 @@ struct thread_info {
|
||||
#define TI_PRE_COUNT 0x00000038
|
||||
#define TI_NEW_CHILD 0x0000003c
|
||||
#define TI_SYS_NOERROR 0x0000003d
|
||||
#define TI_CPU 0x0000003e
|
||||
#define TI_UTRAPS 0x00000040
|
||||
#define TI_REG_WINDOW 0x00000048
|
||||
#define TI_RWIN_SPTRS 0x000003c8
|
||||
|
Loading…
Reference in New Issue
Block a user