mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 13:36:07 +07:00
Power management and ACPI material for v4.3-rc4
- intel_idle driver fixup for the recently added Skylake chips support (Len Brown). - Operating Performance Points (OPP) library fix related to the recently added support for new DT bindings and a fix for a typo in a comment (Viresh Kumar, Stephen Boyd). - ACPI EC driver fix for a recently introduced memory leak in an error code path (Lv Zheng). - ACPI PCI IRQ management fix for the issue where an ISA IRQ is shared with a PCI device which requires it to be configured in a different way and may cause an interrupt storm to happen as a result with an extra ACPI SCI IRQ handling simplification on top of it (Jiang Liu). - Update of the PCI power management documentation that became outdated and started to actively confuse the readers to make it actually reflect the code (Rafael J Wysocki). - turbostat fixes including an IVB Xeon regression fix (related to the --debug command line option), Skylake adjustment for the TSC running at a frequency that doesn't match the base one exactly, and a Knights Landing quirk to account for the fact that it only updates APERF and MPERF every 1024 clock cycles plus bumping up the turbostat version number (Len Brown, Hubert Chrzaniuk). / -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.22 (GNU/Linux) iQIcBAABCAAGBQJWDag4AAoJEILEb/54YlRxy/UQAJa39EC2IQd+PrMlgMx3cp2N ssotwuQiQ0jL2V/qc36wfzgu3A5k0ldHHQGbgX0f/z9LjD+zLsZiPtHj27LrNtG5 J9DgViLh9vut4XEsLlzj8W2z1OcTyAmZyTIiVeFlj/zM517oeXKVYMX2RuhHQk0r lwDI/hc1rtpUkdN7gkT9DqyO32r1LgNkDt6+ubRr/qrYVhYPXSrp4k9wxnr9j1Bx 0G9bvCz8ETTclRPcfToGU9P86snk5FS3veSm231ioABdry7BxhTZHjQKSZyjuvx4 l8YedxBc0ks7yyeN9lvWPbNSpHLjhYen+d9q1koQsHJYb+gWJ/KbSGu3kfg0bPDj Rzh1u76ak7MOYpkn+95MRhzIiFxG3IhUoqYhIGGyCNFGAJgPfFos2IJTISAxSmTE ebCyFEX07AdhjHac4RyRCnMVavZthgLyXHwXiNqG9gdW9aOEzN65svH2LLMBiKcH IGRCsjom1uCUT0y1gy3R7q1nTCi112IcXwvAziX7QKCNOxLIH8HJNiraVcyl2vY5 BbDyTOQ7VboviWWSQ09+bQFq4CAhe4b9+nR4XhvHO9F0ffxBujBoCwjjFQY+yJIH 9nYaYyUynpi1m0Y1AwlrI8wgVLDfNEE6UU63clHQ2PoOFfDDE+/5I/l3yuWubo0I cUtW1RVEgDaa61ehyFuS =ELup -----END PGP SIGNATURE----- Merge tag 'pm+acpi-4.3-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm Pull power management and ACPI fixes from Rafael Wysocki: "These are fixes mostly, for a few changes made in this cycle (the intel_idle driver, the OPP library, the ACPI EC driver, turbostat) and for some issues that have just been discovered (ACPI PCI IRQ management, PCI power management documentation, turbostat), with a couple of cleanups on top of them. Specifics: - intel_idle driver fixup for the recently added Skylake chips support (Len Brown). - Operating Performance Points (OPP) library fix related to the recently added support for new DT bindings and a fix for a typo in a comment (Viresh Kumar, Stephen Boyd). - ACPI EC driver fix for a recently introduced memory leak in an error code path (Lv Zheng). - ACPI PCI IRQ management fix for the issue where an ISA IRQ is shared with a PCI device which requires it to be configured in a different way and may cause an interrupt storm to happen as a result with an extra ACPI SCI IRQ handling simplification on top of it (Jiang Liu). - Update of the PCI power management documentation that became outdated and started to actively confuse the readers to make it actually reflect the code (Rafael J Wysocki). - turbostat fixes including an IVB Xeon regression fix (related to the --debug command line option), Skylake adjustment for the TSC running at a frequency that doesn't match the base one exactly, and a Knights Landing quirk to account for the fact that it only updates APERF and MPERF every 1024 clock cycles plus bumping up the turbostat version number (Len Brown, Hubert Chrzaniuk)" * tag 'pm+acpi-4.3-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: tools/power turbosat: update version number tools/power turbostat: SKL: Adjust for TSC difference from base frequency tools/power turbostat: KNL workaround for %Busy and Avg_MHz tools/power turbostat: IVB Xeon: fix --debug regression ACPI / PCI: Remove duplicated penalty on SCI IRQ ACPI, PCI, irq: Do not share PCI IRQ with ISA IRQ ACPI / EC: Fix a memory leak issue in acpi_ec_query() PM / OPP: Fix typo modifcation -> modification PCI / PM: Update runtime PM documentation for PCI devices PM / OPP: of_property_count_u32_elems() can return errors intel_idle: Skylake Client Support - updated
This commit is contained in:
commit
1bca1000fa
@ -979,20 +979,45 @@ every time right after the runtime_resume() callback has returned
|
||||
(alternatively, the runtime_suspend() callback will have to check if the
|
||||
device should really be suspended and return -EAGAIN if that is not the case).
|
||||
|
||||
The runtime PM of PCI devices is disabled by default. It is also blocked by
|
||||
pci_pm_init() that runs the pm_runtime_forbid() helper function. If a PCI
|
||||
driver implements the runtime PM callbacks and intends to use the runtime PM
|
||||
framework provided by the PM core and the PCI subsystem, it should enable this
|
||||
feature by executing the pm_runtime_enable() helper function. However, the
|
||||
driver should not call the pm_runtime_allow() helper function unblocking
|
||||
the runtime PM of the device. Instead, it should allow user space or some
|
||||
platform-specific code to do that (user space can do it via sysfs), although
|
||||
once it has called pm_runtime_enable(), it must be prepared to handle the
|
||||
The runtime PM of PCI devices is enabled by default by the PCI core. PCI
|
||||
device drivers do not need to enable it and should not attempt to do so.
|
||||
However, it is blocked by pci_pm_init() that runs the pm_runtime_forbid()
|
||||
helper function. In addition to that, the runtime PM usage counter of
|
||||
each PCI device is incremented by local_pci_probe() before executing the
|
||||
probe callback provided by the device's driver.
|
||||
|
||||
If a PCI driver implements the runtime PM callbacks and intends to use the
|
||||
runtime PM framework provided by the PM core and the PCI subsystem, it needs
|
||||
to decrement the device's runtime PM usage counter in its probe callback
|
||||
function. If it doesn't do that, the counter will always be different from
|
||||
zero for the device and it will never be runtime-suspended. The simplest
|
||||
way to do that is by calling pm_runtime_put_noidle(), but if the driver
|
||||
wants to schedule an autosuspend right away, for example, it may call
|
||||
pm_runtime_put_autosuspend() instead for this purpose. Generally, it
|
||||
just needs to call a function that decrements the devices usage counter
|
||||
from its probe routine to make runtime PM work for the device.
|
||||
|
||||
It is important to remember that the driver's runtime_suspend() callback
|
||||
may be executed right after the usage counter has been decremented, because
|
||||
user space may already have cuased the pm_runtime_allow() helper function
|
||||
unblocking the runtime PM of the device to run via sysfs, so the driver must
|
||||
be prepared to cope with that.
|
||||
|
||||
The driver itself should not call pm_runtime_allow(), though. Instead, it
|
||||
should let user space or some platform-specific code do that (user space can
|
||||
do it via sysfs as stated above), but it must be prepared to handle the
|
||||
runtime PM of the device correctly as soon as pm_runtime_allow() is called
|
||||
(which may happen at any time). [It also is possible that user space causes
|
||||
pm_runtime_allow() to be called via sysfs before the driver is loaded, so in
|
||||
fact the driver has to be prepared to handle the runtime PM of the device as
|
||||
soon as it calls pm_runtime_enable().]
|
||||
(which may happen at any time, even before the driver is loaded).
|
||||
|
||||
When the driver's remove callback runs, it has to balance the decrementation
|
||||
of the device's runtime PM usage counter at the probe time. For this reason,
|
||||
if it has decremented the counter in its probe callback, it must run
|
||||
pm_runtime_get_noresume() in its remove callback. [Since the core carries
|
||||
out a runtime resume of the device and bumps up the device's usage counter
|
||||
before running the driver's remove callback, the runtime PM of the device
|
||||
is effectively disabled for the duration of the remove execution and all
|
||||
runtime PM helper functions incrementing the device's usage counter are
|
||||
then effectively equivalent to pm_runtime_get_noresume().]
|
||||
|
||||
The runtime PM framework works by processing requests to suspend or resume
|
||||
devices, or to check if they are idle (in which cases it is reasonable to
|
||||
|
@ -1044,8 +1044,10 @@ static int acpi_ec_query(struct acpi_ec *ec, u8 *data)
|
||||
goto err_exit;
|
||||
|
||||
mutex_lock(&ec->mutex);
|
||||
result = -ENODATA;
|
||||
list_for_each_entry(handler, &ec->list, node) {
|
||||
if (value == handler->query_bit) {
|
||||
result = 0;
|
||||
q->handler = acpi_ec_get_query_handler(handler);
|
||||
ec_dbg_evt("Query(0x%02x) scheduled",
|
||||
q->handler->query_bit);
|
||||
|
@ -372,6 +372,7 @@ static int acpi_isa_register_gsi(struct pci_dev *dev)
|
||||
|
||||
/* Interrupt Line values above 0xF are forbidden */
|
||||
if (dev->irq > 0 && (dev->irq <= 0xF) &&
|
||||
acpi_isa_irq_available(dev->irq) &&
|
||||
(acpi_isa_irq_to_gsi(dev->irq, &dev_gsi) == 0)) {
|
||||
dev_warn(&dev->dev, "PCI INT %c: no GSI - using ISA IRQ %d\n",
|
||||
pin_name(dev->pin), dev->irq);
|
||||
|
@ -498,8 +498,7 @@ int __init acpi_irq_penalty_init(void)
|
||||
PIRQ_PENALTY_PCI_POSSIBLE;
|
||||
}
|
||||
}
|
||||
/* Add a penalty for the SCI */
|
||||
acpi_irq_penalty[acpi_gbl_FADT.sci_interrupt] += PIRQ_PENALTY_PCI_USING;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -553,6 +552,13 @@ static int acpi_pci_link_allocate(struct acpi_pci_link *link)
|
||||
irq = link->irq.possible[i];
|
||||
}
|
||||
}
|
||||
if (acpi_irq_penalty[irq] >= PIRQ_PENALTY_ISA_ALWAYS) {
|
||||
printk(KERN_ERR PREFIX "No IRQ available for %s [%s]. "
|
||||
"Try pci=noacpi or acpi=off\n",
|
||||
acpi_device_name(link->device),
|
||||
acpi_device_bid(link->device));
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
/* Attempt to enable the link device at this IRQ. */
|
||||
if (acpi_pci_link_set(link, irq)) {
|
||||
@ -821,6 +827,12 @@ void acpi_penalize_isa_irq(int irq, int active)
|
||||
}
|
||||
}
|
||||
|
||||
bool acpi_isa_irq_available(int irq)
|
||||
{
|
||||
return irq >= 0 && (irq >= ARRAY_SIZE(acpi_irq_penalty) ||
|
||||
acpi_irq_penalty[irq] < PIRQ_PENALTY_ISA_ALWAYS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict with
|
||||
* PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be use for
|
||||
|
@ -892,10 +892,17 @@ static int opp_get_microvolt(struct dev_pm_opp *opp, struct device *dev)
|
||||
u32 microvolt[3] = {0};
|
||||
int count, ret;
|
||||
|
||||
count = of_property_count_u32_elems(opp->np, "opp-microvolt");
|
||||
if (!count)
|
||||
/* Missing property isn't a problem, but an invalid entry is */
|
||||
if (!of_find_property(opp->np, "opp-microvolt", NULL))
|
||||
return 0;
|
||||
|
||||
count = of_property_count_u32_elems(opp->np, "opp-microvolt");
|
||||
if (count < 0) {
|
||||
dev_err(dev, "%s: Invalid opp-microvolt property (%d)\n",
|
||||
__func__, count);
|
||||
return count;
|
||||
}
|
||||
|
||||
/* There can be one or three elements here */
|
||||
if (count != 1 && count != 3) {
|
||||
dev_err(dev, "%s: Invalid number of elements in opp-microvolt property (%d)\n",
|
||||
@ -1063,7 +1070,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_add);
|
||||
* share a common logic which is isolated here.
|
||||
*
|
||||
* Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
|
||||
* copy operation, returns 0 if no modifcation was done OR modification was
|
||||
* copy operation, returns 0 if no modification was done OR modification was
|
||||
* successful.
|
||||
*
|
||||
* Locking: The internal device_opp and opp structures are RCU protected.
|
||||
@ -1151,7 +1158,7 @@ static int _opp_set_availability(struct device *dev, unsigned long freq,
|
||||
* mutex locking or synchronize_rcu() blocking calls cannot be used.
|
||||
*
|
||||
* Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
|
||||
* copy operation, returns 0 if no modifcation was done OR modification was
|
||||
* copy operation, returns 0 if no modification was done OR modification was
|
||||
* successful.
|
||||
*/
|
||||
int dev_pm_opp_enable(struct device *dev, unsigned long freq)
|
||||
@ -1177,7 +1184,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_enable);
|
||||
* mutex locking or synchronize_rcu() blocking calls cannot be used.
|
||||
*
|
||||
* Return: -EINVAL for bad pointers, -ENOMEM if no memory available for the
|
||||
* copy operation, returns 0 if no modifcation was done OR modification was
|
||||
* copy operation, returns 0 if no modification was done OR modification was
|
||||
* successful.
|
||||
*/
|
||||
int dev_pm_opp_disable(struct device *dev, unsigned long freq)
|
||||
|
@ -620,7 +620,7 @@ static struct cpuidle_state skl_cstates[] = {
|
||||
.name = "C6-SKL",
|
||||
.desc = "MWAIT 0x20",
|
||||
.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 75,
|
||||
.exit_latency = 85,
|
||||
.target_residency = 200,
|
||||
.enter = &intel_idle,
|
||||
.enter_freeze = intel_idle_freeze, },
|
||||
@ -636,10 +636,18 @@ static struct cpuidle_state skl_cstates[] = {
|
||||
.name = "C8-SKL",
|
||||
.desc = "MWAIT 0x40",
|
||||
.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 174,
|
||||
.exit_latency = 200,
|
||||
.target_residency = 800,
|
||||
.enter = &intel_idle,
|
||||
.enter_freeze = intel_idle_freeze, },
|
||||
{
|
||||
.name = "C9-SKL",
|
||||
.desc = "MWAIT 0x50",
|
||||
.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
|
||||
.exit_latency = 480,
|
||||
.target_residency = 5000,
|
||||
.enter = &intel_idle,
|
||||
.enter_freeze = intel_idle_freeze, },
|
||||
{
|
||||
.name = "C10-SKL",
|
||||
.desc = "MWAIT 0x60",
|
||||
|
@ -299,9 +299,10 @@ static long local_pci_probe(void *_ddi)
|
||||
* Unbound PCI devices are always put in D0, regardless of
|
||||
* runtime PM status. During probe, the device is set to
|
||||
* active and the usage count is incremented. If the driver
|
||||
* supports runtime PM, it should call pm_runtime_put_noidle()
|
||||
* in its probe routine and pm_runtime_get_noresume() in its
|
||||
* remove routine.
|
||||
* supports runtime PM, it should call pm_runtime_put_noidle(),
|
||||
* or any other runtime PM helper function decrementing the usage
|
||||
* count, in its probe routine and pm_runtime_get_noresume() in
|
||||
* its remove routine.
|
||||
*/
|
||||
pm_runtime_get_sync(dev);
|
||||
pci_dev->driver = pci_drv;
|
||||
|
@ -217,6 +217,7 @@ struct pci_dev;
|
||||
|
||||
int acpi_pci_irq_enable (struct pci_dev *dev);
|
||||
void acpi_penalize_isa_irq(int irq, int active);
|
||||
bool acpi_isa_irq_available(int irq);
|
||||
void acpi_penalize_sci_irq(int irq, int trigger, int polarity);
|
||||
void acpi_pci_irq_disable (struct pci_dev *dev);
|
||||
|
||||
|
@ -71,8 +71,11 @@ unsigned int extra_msr_offset32;
|
||||
unsigned int extra_msr_offset64;
|
||||
unsigned int extra_delta_offset32;
|
||||
unsigned int extra_delta_offset64;
|
||||
unsigned int aperf_mperf_multiplier = 1;
|
||||
int do_smi;
|
||||
double bclk;
|
||||
double base_hz;
|
||||
double tsc_tweak = 1.0;
|
||||
unsigned int show_pkg;
|
||||
unsigned int show_core;
|
||||
unsigned int show_cpu;
|
||||
@ -502,7 +505,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
||||
/* %Busy */
|
||||
if (has_aperf) {
|
||||
if (!skip_c0)
|
||||
outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc);
|
||||
outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
|
||||
else
|
||||
outp += sprintf(outp, "********");
|
||||
}
|
||||
@ -510,7 +513,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
|
||||
/* Bzy_MHz */
|
||||
if (has_aperf)
|
||||
outp += sprintf(outp, "%8.0f",
|
||||
1.0 * t->tsc / units * t->aperf / t->mperf / interval_float);
|
||||
1.0 * t->tsc * tsc_tweak / units * t->aperf / t->mperf / interval_float);
|
||||
|
||||
/* TSC_MHz */
|
||||
outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float);
|
||||
@ -984,6 +987,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
|
||||
return -3;
|
||||
if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
|
||||
return -4;
|
||||
t->aperf = t->aperf * aperf_mperf_multiplier;
|
||||
t->mperf = t->mperf * aperf_mperf_multiplier;
|
||||
}
|
||||
|
||||
if (do_smi) {
|
||||
@ -1149,6 +1154,19 @@ int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV,
|
||||
int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
|
||||
int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
|
||||
|
||||
|
||||
static void
|
||||
calculate_tsc_tweak()
|
||||
{
|
||||
unsigned long long msr;
|
||||
unsigned int base_ratio;
|
||||
|
||||
get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr);
|
||||
base_ratio = (msr >> 8) & 0xFF;
|
||||
base_hz = base_ratio * bclk * 1000000;
|
||||
tsc_tweak = base_hz / tsc_hz;
|
||||
}
|
||||
|
||||
static void
|
||||
dump_nhm_platform_info(void)
|
||||
{
|
||||
@ -1926,8 +1944,6 @@ int has_config_tdp(unsigned int family, unsigned int model)
|
||||
|
||||
switch (model) {
|
||||
case 0x3A: /* IVB */
|
||||
case 0x3E: /* IVB Xeon */
|
||||
|
||||
case 0x3C: /* HSW */
|
||||
case 0x3F: /* HSX */
|
||||
case 0x45: /* HSW */
|
||||
@ -2543,6 +2559,13 @@ int is_knl(unsigned int family, unsigned int model)
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned int get_aperf_mperf_multiplier(unsigned int family, unsigned int model)
|
||||
{
|
||||
if (is_knl(family, model))
|
||||
return 1024;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#define SLM_BCLK_FREQS 5
|
||||
double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
|
||||
|
||||
@ -2744,6 +2767,9 @@ void process_cpuid()
|
||||
}
|
||||
}
|
||||
|
||||
if (has_aperf)
|
||||
aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
|
||||
|
||||
do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
|
||||
do_snb_cstates = has_snb_msrs(family, model);
|
||||
do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
|
||||
@ -2762,6 +2788,9 @@ void process_cpuid()
|
||||
if (debug)
|
||||
dump_cstate_pstate_config_info();
|
||||
|
||||
if (has_skl_msrs(family, model))
|
||||
calculate_tsc_tweak();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -3090,7 +3119,7 @@ int get_and_dump_counters(void)
|
||||
}
|
||||
|
||||
void print_version() {
|
||||
fprintf(stderr, "turbostat version 4.7 17-June, 2015"
|
||||
fprintf(stderr, "turbostat version 4.8 26-Sep, 2015"
|
||||
" - Len Brown <lenb@kernel.org>\n");
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user