linux_dsm_epyc7002/arch/s390/kernel/topology.c

608 lines
13 KiB
C
Raw Normal View History

/*
* Copyright IBM Corp. 2007, 2011
* Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
*/
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/workqueue.h>
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
#include <linux/bootmem.h>
#include <linux/uaccess.h>
#include <linux/sysctl.h>
#include <linux/cpuset.h>
#include <linux/device.h>
#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/topology.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <linux/mm.h>
#include <linux/nodemask.h>
#include <linux/node.h>
#include <asm/sysinfo.h>
#include <asm/numa.h>
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
#define PTF_HORIZONTAL (0UL)
#define PTF_VERTICAL (1UL)
#define PTF_CHECK (2UL)
enum {
TOPOLOGY_MODE_HW,
TOPOLOGY_MODE_SINGLE,
TOPOLOGY_MODE_PACKAGE,
TOPOLOGY_MODE_UNINITIALIZED
};
struct mask_info {
struct mask_info *next;
unsigned char id;
cpumask_t mask;
};
static int topology_mode = TOPOLOGY_MODE_UNINITIALIZED;
static void set_topology_timer(void);
static void topology_work_fn(struct work_struct *work);
static struct sysinfo_15_1_x *tl_info;
static DECLARE_WORK(topology_work, topology_work_fn);
/*
* Socket/Book linked lists and cpu_topology updates are
* protected by "sched_domains_mutex".
*/
static struct mask_info socket_info;
static struct mask_info book_info;
static struct mask_info drawer_info;
struct cpu_topology_s390 cpu_topology[NR_CPUS];
EXPORT_SYMBOL_GPL(cpu_topology);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
cpumask_t cpus_with_topology;
static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
{
cpumask_t mask;
cpumask_copy(&mask, cpumask_of(cpu));
switch (topology_mode) {
case TOPOLOGY_MODE_HW:
while (info) {
if (cpumask_test_cpu(cpu, &info->mask)) {
mask = info->mask;
break;
}
info = info->next;
}
if (cpumask_empty(&mask))
cpumask_copy(&mask, cpumask_of(cpu));
break;
case TOPOLOGY_MODE_PACKAGE:
cpumask_copy(&mask, cpu_present_mask);
break;
default:
/* fallthrough */
case TOPOLOGY_MODE_SINGLE:
cpumask_copy(&mask, cpumask_of(cpu));
break;
}
return mask;
}
static cpumask_t cpu_thread_map(unsigned int cpu)
{
cpumask_t mask;
int i;
cpumask_copy(&mask, cpumask_of(cpu));
if (topology_mode != TOPOLOGY_MODE_HW)
return mask;
cpu -= cpu % (smp_cpu_mtid + 1);
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_present(cpu + i))
cpumask_set_cpu(cpu + i, &mask);
return mask;
}
#define TOPOLOGY_CORE_BITS 64
static void add_cpus_to_mask(struct topology_core *tl_core,
struct mask_info *drawer,
struct mask_info *book,
struct mask_info *socket)
{
struct cpu_topology_s390 *topo;
unsigned int core;
for_each_set_bit(core, &tl_core->mask, TOPOLOGY_CORE_BITS) {
unsigned int rcore;
int lcpu, i;
rcore = TOPOLOGY_CORE_BITS - 1 - core + tl_core->origin;
lcpu = smp_find_processor_id(rcore << smp_cpu_mt_shift);
if (lcpu < 0)
continue;
for (i = 0; i <= smp_cpu_mtid; i++) {
topo = &cpu_topology[lcpu + i];
topo->drawer_id = drawer->id;
topo->book_id = book->id;
topo->socket_id = socket->id;
topo->core_id = rcore;
topo->thread_id = lcpu + i;
cpumask_set_cpu(lcpu + i, &drawer->mask);
cpumask_set_cpu(lcpu + i, &book->mask);
cpumask_set_cpu(lcpu + i, &socket->mask);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
cpumask_set_cpu(lcpu + i, &cpus_with_topology);
smp_cpu_set_polarization(lcpu + i, tl_core->pp);
}
}
}
static void clear_masks(void)
{
struct mask_info *info;
info = &socket_info;
while (info) {
cpumask_clear(&info->mask);
info = info->next;
}
info = &book_info;
while (info) {
cpumask_clear(&info->mask);
info = info->next;
}
info = &drawer_info;
while (info) {
cpumask_clear(&info->mask);
info = info->next;
}
}
static union topology_entry *next_tle(union topology_entry *tle)
{
if (!tle->nl)
return (union topology_entry *)((struct topology_core *)tle + 1);
return (union topology_entry *)((struct topology_container *)tle + 1);
}
static void tl_to_masks(struct sysinfo_15_1_x *info)
{
struct mask_info *socket = &socket_info;
struct mask_info *book = &book_info;
struct mask_info *drawer = &drawer_info;
union topology_entry *tle, *end;
clear_masks();
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
tle = info->tle;
end = (union topology_entry *)((unsigned long)info + info->length);
while (tle < end) {
switch (tle->nl) {
case 3:
drawer = drawer->next;
drawer->id = tle->container.id;
break;
case 2:
book = book->next;
book->id = tle->container.id;
break;
case 1:
socket = socket->next;
socket->id = tle->container.id;
break;
case 0:
add_cpus_to_mask(&tle->cpu, drawer, book, socket);
break;
default:
clear_masks();
return;
}
tle = next_tle(tle);
}
}
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
static void topology_update_polarization_simple(void)
{
int cpu;
for_each_possible_cpu(cpu)
smp_cpu_set_polarization(cpu, POLARIZATION_HRZ);
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
}
static int ptf(unsigned long fc)
{
int rc;
asm volatile(
" .insn rre,0xb9a20000,%1,%1\n"
" ipm %0\n"
" srl %0,28\n"
: "=d" (rc)
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
: "d" (fc) : "cc");
return rc;
}
int topology_set_cpu_management(int fc)
{
int cpu, rc;
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
if (!MACHINE_HAS_TOPOLOGY)
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
return -EOPNOTSUPP;
if (fc)
rc = ptf(PTF_VERTICAL);
else
rc = ptf(PTF_HORIZONTAL);
if (rc)
return -EBUSY;
for_each_possible_cpu(cpu)
smp_cpu_set_polarization(cpu, POLARIZATION_UNKNOWN);
return rc;
}
static void update_cpu_masks(void)
{
struct cpu_topology_s390 *topo;
int cpu, id;
for_each_possible_cpu(cpu) {
topo = &cpu_topology[cpu];
topo->thread_mask = cpu_thread_map(cpu);
topo->core_mask = cpu_group_map(&socket_info, cpu);
topo->book_mask = cpu_group_map(&book_info, cpu);
topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
if (topology_mode != TOPOLOGY_MODE_HW) {
id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
topo->thread_id = cpu;
topo->core_id = cpu;
topo->socket_id = id;
topo->book_id = id;
topo->drawer_id = id;
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
if (cpu_present(cpu))
cpumask_set_cpu(cpu, &cpus_with_topology);
}
}
numa_update_cpu_topology();
}
void store_topology(struct sysinfo_15_1_x *info)
{
stsi(info, 15, 1, topology_mnest_limit());
}
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
static int __arch_update_cpu_topology(void)
{
struct sysinfo_15_1_x *info = tl_info;
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
int rc = 0;
mutex_lock(&smp_cpu_state_mutex);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
cpumask_clear(&cpus_with_topology);
if (MACHINE_HAS_TOPOLOGY) {
rc = 1;
store_topology(info);
tl_to_masks(info);
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
}
update_cpu_masks();
if (!MACHINE_HAS_TOPOLOGY)
topology_update_polarization_simple();
mutex_unlock(&smp_cpu_state_mutex);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
return rc;
}
int arch_update_cpu_topology(void)
{
struct device *dev;
int cpu, rc;
rc = __arch_update_cpu_topology();
for_each_online_cpu(cpu) {
cpu: convert 'cpu' and 'machinecheck' sysdev_class to a regular subsystem This moves the 'cpu sysdev_class' over to a regular 'cpu' subsystem and converts the devices to regular devices. The sysdev drivers are implemented as subsystem interfaces now. After all sysdev classes are ported to regular driver core entities, the sysdev implementation will be entirely removed from the kernel. Userspace relies on events and generic sysfs subsystem infrastructure from sysdev devices, which are made available with this conversion. Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Tony Luck <tony.luck@intel.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Paul Mundt <lethal@linux-sh.org> Cc: "David S. Miller" <davem@davemloft.net> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Ingo Molnar <mingo@redhat.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Borislav Petkov <bp@amd64.org> Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk> Cc: Len Brown <lenb@kernel.org> Cc: Zhang Rui <rui.zhang@intel.com> Cc: Dave Jones <davej@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russell King <rmk+kernel@arm.linux.org.uk> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Arjan van de Ven <arjan@linux.intel.com> Cc: "Rafael J. Wysocki" <rjw@sisk.pl> Cc: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com> Signed-off-by: Kay Sievers <kay.sievers@vrfy.org> Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
2011-12-22 05:29:42 +07:00
dev = get_cpu_device(cpu);
kobject_uevent(&dev->kobj, KOBJ_CHANGE);
}
return rc;
}
[S390] cpu topology: Fix possible deadlock. When we get a notification that cpu topology changed, we schedule a work struct which just calls arch_reinit_sched_domains. This function in turn calls get_online_cpus() which results int the lockdep warning below. After all it turnded out that it's not legal to call get_online_cpus() from the context of a multi-threaded work queue. It could deadlock this way: process 0 (events/cpu-x): -> run_workqueue -> removes my work_struct from the work queue -> calls work_struct->fn -> get_online_cpus() -> locks on cpu_hotplug.lock since process 1 below is doing cpu hotplug process 1: -> cpu_down (for cpu-x) -> cpu_hotplug_begin (holds cpu_hotplug.lock now) -> cpu-x dead -> notifier_call_chain with CPU_DEAD -> cleanup_workqueue_thread -> flush_cpu_workqueue (succeeds) -> kthread_stop for events/cpu-x -> now kthread_stop waits for my work_struct to complete from within process 0. -> dead. A single threaded workqueue wouldn't have such problems, however there is no such common queue available and it's not worth to create one for the very rare calls to arch_reinit_sched_domains. So we just create a kernel thread from our work struct which calls arch_reinit_sched_domains and are done with it. Thanks to Oleg Nesterov and Peter Zijlstra for helping me figuring out that this isn't a false positive lockdep warning: ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.25-03562-g3dc5063-dirty #12 ------------------------------------------------------- events/3/14 is trying to acquire lock: (&cpu_hotplug.lock){--..}, at: [<0000000000076094>] get_online_cpus+0x50/0x78 but task is already holding lock: (topology_work){--..}, at: [<0000000000059cde>] run_workqueue+0x106/0x278 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (topology_work){--..}: [<000000000006fc74>] __lock_acquire+0x1010/0x111c [<000000000006fe40>] lock_acquire+0xc0/0xf8 [<0000000000059d48>] run_workqueue+0x170/0x278 [<0000000000059edc>] worker_thread+0x8c/0xf0 [<000000000005f5bc>] kthread+0x68/0xa0 [<000000000001a33e>] kernel_thread_starter+0x6/0xc [<000000000001a338>] kernel_thread_starter+0x0/0xc -> #1 (events){--..}: [<000000000006fc74>] __lock_acquire+0x1010/0x111c [<000000000006fe40>] lock_acquire+0xc0/0xf8 [<000000000005a23c>] cleanup_workqueue_thread+0x60/0xa8 [<00000000003b2ab8>] workqueue_cpu_callback+0xbc/0x170 [<00000000003bba80>] notifier_call_chain+0x5c/0xa4 [<00000000000655a2>] __raw_notifier_call_chain+0x26/0x38 [<00000000000655e2>] raw_notifier_call_chain+0x2e/0x40 [<0000000000075e00>] cpu_down+0x228/0x31c [<00000000003b1dd8>] store_online+0x64/0xb8 [<00000000001e7128>] sysdev_store+0x48/0x58 [<0000000000121cd2>] sysfs_write_file+0x126/0x1c0 [<00000000000c1944>] vfs_write+0xb0/0x15c [<00000000000c20e6>] sys_write+0x56/0x88 [<0000000000027a68>] sys32_write+0x34/0x4c [<0000000000023f70>] sysc_noemu+0x10/0x16 [<0000000077f3f186>] 0x77f3f186 -> #0 (&cpu_hotplug.lock){--..}: [<000000000006fa84>] __lock_acquire+0xe20/0x111c [<000000000006fe40>] lock_acquire+0xc0/0xf8 [<00000000003b701c>] mutex_lock_nested+0xd0/0x364 [<0000000000076094>] get_online_cpus+0x50/0x78 [<000000000003a03e>] arch_reinit_sched_domains+0x26/0x58 [<000000000002700e>] topology_work_fn+0x26/0x34 [<0000000000059d4e>] run_workqueue+0x176/0x278 [<0000000000059edc>] worker_thread+0x8c/0xf0 [<000000000005f5bc>] kthread+0x68/0xa0 [<000000000001a33e>] kernel_thread_starter+0x6/0xc [<000000000001a338>] kernel_thread_starter+0x0/0xc other info that might help us debug this: 2 locks held by events/3/14: #0: (events){--..}, at: [<0000000000059cde>] run_workqueue+0x106/0x278 #1: (topology_work){--..}, at: [<0000000000059cde>] run_workqueue+0x106/0x278 stack backtrace: CPU: 3 Not tainted 2.6.25-03562-g3dc5063-dirty #12 Process events/3 (pid: 14, task: 000000002fb04038, ksp: 000000002fb0bd70) 0400000000000000 000000002fb0ba40 0000000000000002 0000000000000000 000000002fb0bae0 000000002fb0ba58 000000002fb0ba58 0000000000016488 0000000000000000 000000002fb0bd70 0000000000000000 0000000000000000 000000002fb0ba40 000000000000000c 000000002fb0ba40 000000002fb0bab0 00000000003c99e0 0000000000016488 000000002fb0ba40 000000002fb0ba90 Call Trace: ([<00000000000163fc>] show_trace+0x138/0x158) [<00000000000164e2>] show_stack+0xc6/0xf8 [<0000000000016624>] dump_stack+0xb0/0xc0 [<000000000006cd36>] print_circular_bug_tail+0xa2/0xb4 [<000000000006fa84>] __lock_acquire+0xe20/0x111c [<000000000006fe40>] lock_acquire+0xc0/0xf8 [<00000000003b701c>] mutex_lock_nested+0xd0/0x364 [<0000000000076094>] get_online_cpus+0x50/0x78 [<000000000003a03e>] arch_reinit_sched_domains+0x26/0x58 [<000000000002700e>] topology_work_fn+0x26/0x34 [<0000000000059d4e>] run_workqueue+0x176/0x278 [<0000000000059edc>] worker_thread+0x8c/0xf0 [<000000000005f5bc>] kthread+0x68/0xa0 [<000000000001a33e>] kernel_thread_starter+0x6/0xc [<000000000001a338>] kernel_thread_starter+0x0/0xc INFO: lockdep is turned off. Cc: Oleg Nesterov <oleg@tv-sign.ru> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2008-04-30 18:38:41 +07:00
static void topology_work_fn(struct work_struct *work)
{
rebuild_sched_domains();
}
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
void topology_schedule_update(void)
{
schedule_work(&topology_work);
}
static void topology_flush_work(void)
{
flush_work(&topology_work);
}
static void topology_timer_fn(unsigned long ignored)
{
[S390] Vertical cpu management. If vertical cpu polarization is active then the hypervisor will dispatch certain cpus for a longer time than other cpus for maximum performance. For example if a guest would have three virtual cpus, each of them with a share of 33 percent, then in case of vertical cpu polarization all of the processing time would be combined to a single cpu which would run all the time, while the other two cpus would get nearly no cpu time. There are three different types of vertical cpus: high, medium and low. Low cpus hardly get any real cpu time, while high cpus get a full real cpu. Medium cpus get something in between. In order to switch between the two possible modes (default is horizontal) a 0 for horizontal polarization or a 1 for vertical polarization must be written to the dispatching sysfs attribute: /sys/devices/system/cpu/dispatching The polarization of each single cpu can be figured out by the polarization sysfs attribute of each cpu: /sys/devices/system/cpu/cpuX/polarization horizontal, vertical:high, vertical:medium, vertical:low or unknown. When switching polarization the polarization attribute may contain the value unknown until the configuration change is done and the kernel has figured out the new polarization of each cpu. Note that running a system with different types of vertical cpus may result in significant performance regressions. If possible only one type of vertical cpus should be used. All other cpus should be offlined. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
2008-04-17 12:46:13 +07:00
if (ptf(PTF_CHECK))
topology_schedule_update();
set_topology_timer();
}
static struct timer_list topology_timer =
TIMER_DEFERRED_INITIALIZER(topology_timer_fn, 0, 0);
static atomic_t topology_poll = ATOMIC_INIT(0);
static void set_topology_timer(void)
{
if (atomic_add_unless(&topology_poll, -1, 0))
mod_timer(&topology_timer, jiffies + HZ / 10);
else
mod_timer(&topology_timer, jiffies + HZ * 60);
}
void topology_expect_change(void)
{
if (!MACHINE_HAS_TOPOLOGY)
return;
/* This is racy, but it doesn't matter since it is just a heuristic.
* Worst case is that we poll in a higher frequency for a bit longer.
*/
if (atomic_read(&topology_poll) > 60)
return;
atomic_add(60, &topology_poll);
set_topology_timer();
}
static int cpu_management;
static ssize_t dispatching_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
count = sprintf(buf, "%d\n", cpu_management);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
static ssize_t dispatching_store(struct device *dev,
struct device_attribute *attr,
const char *buf,
size_t count)
{
int val, rc;
char delim;
if (sscanf(buf, "%d %c", &val, &delim) != 1)
return -EINVAL;
if (val != 0 && val != 1)
return -EINVAL;
rc = 0;
get_online_cpus();
mutex_lock(&smp_cpu_state_mutex);
if (cpu_management == val)
goto out;
rc = topology_set_cpu_management(val);
if (rc)
goto out;
cpu_management = val;
topology_expect_change();
out:
mutex_unlock(&smp_cpu_state_mutex);
put_online_cpus();
return rc ? rc : count;
}
static DEVICE_ATTR(dispatching, 0644, dispatching_show,
dispatching_store);
static ssize_t cpu_polarization_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
int cpu = dev->id;
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
switch (smp_cpu_get_polarization(cpu)) {
case POLARIZATION_HRZ:
count = sprintf(buf, "horizontal\n");
break;
case POLARIZATION_VL:
count = sprintf(buf, "vertical:low\n");
break;
case POLARIZATION_VM:
count = sprintf(buf, "vertical:medium\n");
break;
case POLARIZATION_VH:
count = sprintf(buf, "vertical:high\n");
break;
default:
count = sprintf(buf, "unknown\n");
break;
}
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
static DEVICE_ATTR(polarization, 0444, cpu_polarization_show, NULL);
static struct attribute *topology_cpu_attrs[] = {
&dev_attr_polarization.attr,
NULL,
};
static struct attribute_group topology_cpu_attr_group = {
.attrs = topology_cpu_attrs,
};
int topology_cpu_init(struct cpu *cpu)
{
return sysfs_create_group(&cpu->dev.kobj, &topology_cpu_attr_group);
}
static const struct cpumask *cpu_thread_mask(int cpu)
{
return &cpu_topology[cpu].thread_mask;
}
const struct cpumask *cpu_coregroup_mask(int cpu)
{
return &cpu_topology[cpu].core_mask;
}
static const struct cpumask *cpu_book_mask(int cpu)
{
return &cpu_topology[cpu].book_mask;
}
static const struct cpumask *cpu_drawer_mask(int cpu)
{
return &cpu_topology[cpu].drawer_mask;
}
static struct sched_domain_topology_level s390_topology[] = {
{ cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
{ cpu_book_mask, SD_INIT_NAME(BOOK) },
{ cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
{ NULL, },
};
static void __init alloc_masks(struct sysinfo_15_1_x *info,
struct mask_info *mask, int offset)
{
int i, nr_masks;
nr_masks = info->mag[TOPOLOGY_NR_MAG - offset];
for (i = 0; i < info->mnest - offset; i++)
nr_masks *= info->mag[TOPOLOGY_NR_MAG - offset - 1 - i];
nr_masks = max(nr_masks, 1);
for (i = 0; i < nr_masks; i++) {
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
mask->next = memblock_virt_alloc(sizeof(*mask->next), 8);
mask = mask->next;
}
}
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
void __init topology_init_early(void)
{
struct sysinfo_15_1_x *info;
set_sched_topology(s390_topology);
if (topology_mode == TOPOLOGY_MODE_UNINITIALIZED) {
if (MACHINE_HAS_TOPOLOGY)
topology_mode = TOPOLOGY_MODE_HW;
else
topology_mode = TOPOLOGY_MODE_SINGLE;
}
if (!MACHINE_HAS_TOPOLOGY)
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
goto out;
tl_info = memblock_virt_alloc(PAGE_SIZE, PAGE_SIZE);
info = tl_info;
store_topology(info);
pr_info("The CPU configuration topology of the machine is: %d %d %d %d %d %d / %d\n",
info->mag[0], info->mag[1], info->mag[2], info->mag[3],
info->mag[4], info->mag[5], info->mnest);
alloc_masks(info, &socket_info, 1);
alloc_masks(info, &book_info, 2);
alloc_masks(info, &drawer_info, 3);
s390/numa: establish cpu to node mapping early Initialize the cpu topology and therefore also the cpu to node mapping much earlier. Fixes this warning and subsequent crashes when using the fake numa emulation mode on s390: WARNING: CPU: 0 PID: 1 at include/linux/cpumask.h:121 select_task_rq+0xe6/0x1a8 CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.6.0-rc6-00001-ge9d867a67fd0-dirty #28 task: 00000001dd270008 ti: 00000001eccb4000 task.ti: 00000001eccb4000 Krnl PSW : 0404c00180000000 0000000000176c56 (select_task_rq+0xe6/0x1a8) R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Call Trace: ([<0000000000176c30>] select_task_rq+0xc0/0x1a8) ([<0000000000177d64>] try_to_wake_up+0x2e4/0x478) ([<000000000015d46c>] create_worker+0x174/0x1c0) ([<0000000000161a98>] alloc_unbound_pwq+0x360/0x438) ([<0000000000162550>] apply_wqattrs_prepare+0x200/0x2a0) ([<000000000016266a>] apply_workqueue_attrs_locked+0x7a/0xb0) ([<0000000000162af0>] apply_workqueue_attrs+0x50/0x78) ([<000000000016441c>] __alloc_workqueue_key+0x304/0x520) ([<0000000000ee3706>] default_bdi_init+0x3e/0x70) ([<0000000000100270>] do_one_initcall+0x140/0x1d8) ([<0000000000ec9da8>] kernel_init_freeable+0x220/0x2d8) ([<0000000000984a7a>] kernel_init+0x2a/0x150) ([<00000000009913fa>] kernel_thread_starter+0x6/0xc) ([<00000000009913f4>] kernel_thread_starter+0x0/0xc) Reviewed-by: Michael Holzheu <holzheu@linux.vnet.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-12-03 15:50:21 +07:00
out:
__arch_update_cpu_topology();
}
static inline int topology_get_mode(int enabled)
{
if (!enabled)
return TOPOLOGY_MODE_SINGLE;
return MACHINE_HAS_TOPOLOGY ? TOPOLOGY_MODE_HW : TOPOLOGY_MODE_PACKAGE;
}
static inline int topology_is_enabled(void)
{
return topology_mode != TOPOLOGY_MODE_SINGLE;
}
static int __init topology_setup(char *str)
{
bool enabled;
int rc;
rc = kstrtobool(str, &enabled);
if (rc)
return rc;
topology_mode = topology_get_mode(enabled);
return 0;
}
early_param("topology", topology_setup);
static int topology_ctl_handler(struct ctl_table *ctl, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
unsigned int len;
int new_mode;
char buf[2];
if (!*lenp || *ppos) {
*lenp = 0;
return 0;
}
if (!write) {
strncpy(buf, topology_is_enabled() ? "1\n" : "0\n",
ARRAY_SIZE(buf));
len = strnlen(buf, ARRAY_SIZE(buf));
if (len > *lenp)
len = *lenp;
if (copy_to_user(buffer, buf, len))
return -EFAULT;
goto out;
}
len = *lenp;
if (copy_from_user(buf, buffer, len > sizeof(buf) ? sizeof(buf) : len))
return -EFAULT;
if (buf[0] != '0' && buf[0] != '1')
return -EINVAL;
mutex_lock(&smp_cpu_state_mutex);
new_mode = topology_get_mode(buf[0] == '1');
if (topology_mode != new_mode) {
topology_mode = new_mode;
topology_schedule_update();
}
mutex_unlock(&smp_cpu_state_mutex);
topology_flush_work();
out:
*lenp = len;
*ppos += len;
return 0;
}
static struct ctl_table topology_ctl_table[] = {
{
.procname = "topology",
.mode = 0644,
.proc_handler = topology_ctl_handler,
},
{ },
};
static struct ctl_table topology_dir_table[] = {
{
.procname = "s390",
.maxlen = 0,
.mode = 0555,
.child = topology_ctl_table,
},
{ },
};
static int __init topology_init(void)
{
if (MACHINE_HAS_TOPOLOGY)
set_topology_timer();
else
topology_update_polarization_simple();
register_sysctl_table(topology_dir_table);
return device_create_file(cpu_subsys.dev_root, &dev_attr_dispatching);
}
device_initcall(topology_init);