mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-25 15:27:36 +07:00
696ac2e3bf
Similar to commit 0266d81e9b
("acpi/processor: Prevent cpu hotplug
deadlock") except this is for acpi_processor_ffh_cstate_probe():
"The problem is that the work is scheduled on the current CPU from the
hotplug thread associated with that CPU.
It's not required to invoke these functions via the workqueue because
the hotplug thread runs on the target CPU already.
Check whether current is a per cpu thread pinned on the target CPU and
invoke the function directly to avoid the workqueue."
WARNING: possible circular locking dependency detected
------------------------------------------------------
cpuhp/1/15 is trying to acquire lock:
ffffc90003447a28 ((work_completion)(&wfc.work)){+.+.}-{0:0}, at: __flush_work+0x4c6/0x630
but task is already holding lock:
ffffffffafa1c0e8 (cpuidle_lock){+.+.}-{3:3}, at: cpuidle_pause_and_lock+0x17/0x20
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #1 (cpu_hotplug_lock){++++}-{0:0}:
cpus_read_lock+0x3e/0xc0
irq_calc_affinity_vectors+0x5f/0x91
__pci_enable_msix_range+0x10f/0x9a0
pci_alloc_irq_vectors_affinity+0x13e/0x1f0
pci_alloc_irq_vectors_affinity at drivers/pci/msi.c:1208
pqi_ctrl_init+0x72f/0x1618 [smartpqi]
pqi_pci_probe.cold.63+0x882/0x892 [smartpqi]
local_pci_probe+0x7a/0xc0
work_for_cpu_fn+0x2e/0x50
process_one_work+0x57e/0xb90
worker_thread+0x363/0x5b0
kthread+0x1f4/0x220
ret_from_fork+0x27/0x50
-> #0 ((work_completion)(&wfc.work)){+.+.}-{0:0}:
__lock_acquire+0x2244/0x32a0
lock_acquire+0x1a2/0x680
__flush_work+0x4e6/0x630
work_on_cpu+0x114/0x160
acpi_processor_ffh_cstate_probe+0x129/0x250
acpi_processor_evaluate_cst+0x4c8/0x580
acpi_processor_get_power_info+0x86/0x740
acpi_processor_hotplug+0xc3/0x140
acpi_soft_cpu_online+0x102/0x1d0
cpuhp_invoke_callback+0x197/0x1120
cpuhp_thread_fun+0x252/0x2f0
smpboot_thread_fn+0x255/0x440
kthread+0x1f4/0x220
ret_from_fork+0x27/0x50
other info that might help us debug this:
Chain exists of:
(work_completion)(&wfc.work) --> cpuhp_state-up --> cpuidle_lock
Possible unsafe locking scenario:
CPU0 CPU1
---- ----
lock(cpuidle_lock);
lock(cpuhp_state-up);
lock(cpuidle_lock);
lock((work_completion)(&wfc.work));
*** DEADLOCK ***
3 locks held by cpuhp/1/15:
#0: ffffffffaf51ab10 (cpu_hotplug_lock){++++}-{0:0}, at: cpuhp_thread_fun+0x69/0x2f0
#1: ffffffffaf51ad40 (cpuhp_state-up){+.+.}-{0:0}, at: cpuhp_thread_fun+0x69/0x2f0
#2: ffffffffafa1c0e8 (cpuidle_lock){+.+.}-{3:3}, at: cpuidle_pause_and_lock+0x17/0x20
Call Trace:
dump_stack+0xa0/0xea
print_circular_bug.cold.52+0x147/0x14c
check_noncircular+0x295/0x2d0
__lock_acquire+0x2244/0x32a0
lock_acquire+0x1a2/0x680
__flush_work+0x4e6/0x630
work_on_cpu+0x114/0x160
acpi_processor_ffh_cstate_probe+0x129/0x250
acpi_processor_evaluate_cst+0x4c8/0x580
acpi_processor_get_power_info+0x86/0x740
acpi_processor_hotplug+0xc3/0x140
acpi_soft_cpu_online+0x102/0x1d0
cpuhp_invoke_callback+0x197/0x1120
cpuhp_thread_fun+0x252/0x2f0
smpboot_thread_fn+0x255/0x440
kthread+0x1f4/0x220
ret_from_fork+0x27/0x50
Signed-off-by: Qian Cai <cai@lca.pw>
Tested-by: Borislav Petkov <bp@suse.de>
[ rjw: Subject ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
215 lines
6.0 KiB
C
215 lines
6.0 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright (C) 2005 Intel Corporation
|
|
* Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
|
|
* - Added _PDC for SMP C-states on Intel CPUs
|
|
*/
|
|
|
|
#include <linux/kernel.h>
|
|
#include <linux/export.h>
|
|
#include <linux/init.h>
|
|
#include <linux/acpi.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/sched.h>
|
|
|
|
#include <acpi/processor.h>
|
|
#include <asm/mwait.h>
|
|
#include <asm/special_insns.h>
|
|
|
|
/*
|
|
* Initialize bm_flags based on the CPU cache properties
|
|
* On SMP it depends on cache configuration
|
|
* - When cache is not shared among all CPUs, we flush cache
|
|
* before entering C3.
|
|
* - When cache is shared among all CPUs, we use bm_check
|
|
* mechanism as in UP case
|
|
*
|
|
* This routine is called only after all the CPUs are online
|
|
*/
|
|
void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
|
|
unsigned int cpu)
|
|
{
|
|
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
|
|
|
flags->bm_check = 0;
|
|
if (num_online_cpus() == 1)
|
|
flags->bm_check = 1;
|
|
else if (c->x86_vendor == X86_VENDOR_INTEL) {
|
|
/*
|
|
* Today all MP CPUs that support C3 share cache.
|
|
* And caches should not be flushed by software while
|
|
* entering C3 type state.
|
|
*/
|
|
flags->bm_check = 1;
|
|
}
|
|
|
|
/*
|
|
* On all recent Intel platforms, ARB_DISABLE is a nop.
|
|
* So, set bm_control to zero to indicate that ARB_DISABLE
|
|
* is not required while entering C3 type state on
|
|
* P4, Core and beyond CPUs
|
|
*/
|
|
if (c->x86_vendor == X86_VENDOR_INTEL &&
|
|
(c->x86 > 0xf || (c->x86 == 6 && c->x86_model >= 0x0f)))
|
|
flags->bm_control = 0;
|
|
/*
|
|
* For all recent Centaur CPUs, the ucode will make sure that each
|
|
* core can keep cache coherence with each other while entering C3
|
|
* type state. So, set bm_check to 1 to indicate that the kernel
|
|
* doesn't need to execute a cache flush operation (WBINVD) when
|
|
* entering C3 type state.
|
|
*/
|
|
if (c->x86_vendor == X86_VENDOR_CENTAUR) {
|
|
if (c->x86 > 6 || (c->x86 == 6 && c->x86_model == 0x0f &&
|
|
c->x86_stepping >= 0x0e))
|
|
flags->bm_check = 1;
|
|
}
|
|
|
|
if (c->x86_vendor == X86_VENDOR_ZHAOXIN) {
|
|
/*
|
|
* All Zhaoxin CPUs that support C3 share cache.
|
|
* And caches should not be flushed by software while
|
|
* entering C3 type state.
|
|
*/
|
|
flags->bm_check = 1;
|
|
/*
|
|
* On all recent Zhaoxin platforms, ARB_DISABLE is a nop.
|
|
* So, set bm_control to zero to indicate that ARB_DISABLE
|
|
* is not required while entering C3 type state.
|
|
*/
|
|
flags->bm_control = 0;
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
|
|
|
|
/* The code below handles cstate entry with monitor-mwait pair on Intel*/
|
|
|
|
struct cstate_entry {
|
|
struct {
|
|
unsigned int eax;
|
|
unsigned int ecx;
|
|
} states[ACPI_PROCESSOR_MAX_POWER];
|
|
};
|
|
static struct cstate_entry __percpu *cpu_cstate_entry; /* per CPU ptr */
|
|
|
|
static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
|
|
|
|
#define NATIVE_CSTATE_BEYOND_HALT (2)
|
|
|
|
static long acpi_processor_ffh_cstate_probe_cpu(void *_cx)
|
|
{
|
|
struct acpi_processor_cx *cx = _cx;
|
|
long retval;
|
|
unsigned int eax, ebx, ecx, edx;
|
|
unsigned int edx_part;
|
|
unsigned int cstate_type; /* C-state type and not ACPI C-state type */
|
|
unsigned int num_cstate_subtype;
|
|
|
|
cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
|
|
|
|
/* Check whether this particular cx_type (in CST) is supported or not */
|
|
cstate_type = ((cx->address >> MWAIT_SUBSTATE_SIZE) &
|
|
MWAIT_CSTATE_MASK) + 1;
|
|
edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
|
|
num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
|
|
|
|
retval = 0;
|
|
/* If the HW does not support any sub-states in this C-state */
|
|
if (num_cstate_subtype == 0) {
|
|
pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n",
|
|
cx->address, edx_part);
|
|
retval = -1;
|
|
goto out;
|
|
}
|
|
|
|
/* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
|
|
if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
|
|
!(ecx & CPUID5_ECX_INTERRUPT_BREAK)) {
|
|
retval = -1;
|
|
goto out;
|
|
}
|
|
|
|
if (!mwait_supported[cstate_type]) {
|
|
mwait_supported[cstate_type] = 1;
|
|
printk(KERN_DEBUG
|
|
"Monitor-Mwait will be used to enter C-%d state\n",
|
|
cx->type);
|
|
}
|
|
snprintf(cx->desc,
|
|
ACPI_CX_DESC_LEN, "ACPI FFH MWAIT 0x%x",
|
|
cx->address);
|
|
out:
|
|
return retval;
|
|
}
|
|
|
|
int acpi_processor_ffh_cstate_probe(unsigned int cpu,
|
|
struct acpi_processor_cx *cx, struct acpi_power_register *reg)
|
|
{
|
|
struct cstate_entry *percpu_entry;
|
|
struct cpuinfo_x86 *c = &cpu_data(cpu);
|
|
long retval;
|
|
|
|
if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF)
|
|
return -1;
|
|
|
|
if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT)
|
|
return -1;
|
|
|
|
percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
|
|
percpu_entry->states[cx->index].eax = 0;
|
|
percpu_entry->states[cx->index].ecx = 0;
|
|
|
|
/* Make sure we are running on right CPU */
|
|
|
|
retval = call_on_cpu(cpu, acpi_processor_ffh_cstate_probe_cpu, cx,
|
|
false);
|
|
if (retval == 0) {
|
|
/* Use the hint in CST */
|
|
percpu_entry->states[cx->index].eax = cx->address;
|
|
percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
|
|
}
|
|
|
|
/*
|
|
* For _CST FFH on Intel, if GAS.access_size bit 1 is cleared,
|
|
* then we should skip checking BM_STS for this C-state.
|
|
* ref: "Intel Processor Vendor-Specific ACPI Interface Specification"
|
|
*/
|
|
if ((c->x86_vendor == X86_VENDOR_INTEL) && !(reg->access_size & 0x2))
|
|
cx->bm_sts_skip = 1;
|
|
|
|
return retval;
|
|
}
|
|
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
|
|
|
|
void __cpuidle acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
|
|
{
|
|
unsigned int cpu = smp_processor_id();
|
|
struct cstate_entry *percpu_entry;
|
|
|
|
percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
|
|
mwait_idle_with_hints(percpu_entry->states[cx->index].eax,
|
|
percpu_entry->states[cx->index].ecx);
|
|
}
|
|
EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter);
|
|
|
|
static int __init ffh_cstate_init(void)
|
|
{
|
|
struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
|
|
if (c->x86_vendor != X86_VENDOR_INTEL &&
|
|
c->x86_vendor != X86_VENDOR_AMD)
|
|
return -1;
|
|
|
|
cpu_cstate_entry = alloc_percpu(struct cstate_entry);
|
|
return 0;
|
|
}
|
|
|
|
static void __exit ffh_cstate_exit(void)
|
|
{
|
|
free_percpu(cpu_cstate_entry);
|
|
cpu_cstate_entry = NULL;
|
|
}
|
|
|
|
arch_initcall(ffh_cstate_init);
|
|
__exitcall(ffh_cstate_exit);
|