linux_dsm_epyc7002/arch/x86/include/asm/acpi.h
Andy Lutomirski 675357362a Revert "x86/mm: Stop calling leave_mm() in idle code"
This reverts commit 43858b4f25.

The reason I removed the leave_mm() calls in question is because the
heuristic wasn't needed after that patch.  With the original version
of my PCID series, we never flushed a "lazy cpu" (i.e. a CPU running
kernel thread) due a flush on the loaded mm.

Unfortunately, that caused architectural issues, so now I've
reinstated these flushes on non-PCID systems in:

    commit b956575bed ("x86/mm: Flush more aggressively in lazy TLB mode").

That, in turn, gives us a power management and occasionally
performance regression as compared to old kernels: a process that
goes into a deep idle state on a given CPU and gets its mm flushed
due to activity on a different CPU will wake the idle CPU.

Reinstate the old ugly heuristic: if a CPU goes into ACPI C3 or an
intel_idle state that is likely to cause a TLB flush gets its mm
switched to init_mm before going idle.

FWIW, this heuristic is lousy.  Whether we should change CR3 before
idle isn't a good hint except insofar as the performance hit is a bit
lower if the TLB is getting flushed by the idle code anyway.  What we
really want to know is whether we anticipate being idle long enough
that the mm is likely to be flushed before we wake up.  This is more a
matter of the expected latency than the idle state that gets chosen.
This heuristic also completely fails on systems that don't know
whether the TLB will be flushed (e.g. AMD systems?).  OTOH it may be a
bit obsolete anyway -- PCID systems don't presently benefit from this
heuristic at all.

We also shouldn't do this callback from innermost bit of the idle code
due to the RCU nastiness it causes.  All the information need is
available before rcu_idle_enter() needs to happen.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Borislav Petkov <bpetkov@suse.de>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Denys Vlasenko <dvlasenk@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 43858b4f25 "x86/mm: Stop calling leave_mm() in idle code"
Link: http://lkml.kernel.org/r/c513bbd4e653747213e05bc7062de000bf0202a5.1509793738.git.luto@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2017-11-04 15:01:50 +01:00

178 lines
4.8 KiB
C

#ifndef _ASM_X86_ACPI_H
#define _ASM_X86_ACPI_H
/*
* Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
* Copyright (C) 2001 Patrick Mochel <mochel@osdl.org>
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <acpi/pdc_intel.h>
#include <asm/numa.h>
#include <asm/fixmap.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mpspec.h>
#include <asm/realmode.h>
#ifdef CONFIG_ACPI_APEI
# include <asm/pgtable_types.h>
#endif
#ifdef CONFIG_ACPI
extern int acpi_lapic;
extern int acpi_ioapic;
extern int acpi_noirq;
extern int acpi_strict;
extern int acpi_disabled;
extern int acpi_pci_disabled;
extern int acpi_skip_timer_override;
extern int acpi_use_timer_override;
extern int acpi_fix_pin2_polarity;
extern int acpi_disable_cmcff;
extern u8 acpi_sci_flags;
extern int acpi_sci_override_gsi;
void acpi_pic_sci_set_trigger(unsigned int, u16);
struct device;
extern int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity);
extern void (*__acpi_unregister_gsi)(u32 gsi);
static inline void disable_acpi(void)
{
acpi_disabled = 1;
acpi_pci_disabled = 1;
acpi_noirq = 1;
}
extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
static inline void acpi_disable_pci(void)
{
acpi_pci_disabled = 1;
acpi_noirq_set();
}
/* Low-level suspend routine. */
extern int (*acpi_suspend_lowlevel)(void);
/* Physical address to resume after wakeup */
#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
/*
* Check if the CPU can handle C2 and deeper
*/
static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate)
{
/*
* Early models (<=5) of AMD Opterons are not supposed to go into
* C2 state.
*
* Steppings 0x0A and later are good
*/
if (boot_cpu_data.x86 == 0x0F &&
boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
boot_cpu_data.x86_model <= 0x05 &&
boot_cpu_data.x86_mask < 0x0A)
return 1;
else if (boot_cpu_has(X86_BUG_AMD_APIC_C1E))
return 1;
else
return max_cstate;
}
static inline bool arch_has_acpi_pdc(void)
{
struct cpuinfo_x86 *c = &cpu_data(0);
return (c->x86_vendor == X86_VENDOR_INTEL ||
c->x86_vendor == X86_VENDOR_CENTAUR);
}
static inline void arch_acpi_set_pdc_bits(u32 *buf)
{
struct cpuinfo_x86 *c = &cpu_data(0);
buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
if (cpu_has(c, X86_FEATURE_EST))
buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
if (cpu_has(c, X86_FEATURE_ACPI))
buf[2] |= ACPI_PDC_T_FFH;
/*
* If mwait/monitor is unsupported, C2/C3_FFH will be disabled
*/
if (!cpu_has(c, X86_FEATURE_MWAIT))
buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
}
static inline bool acpi_has_cpu_in_madt(void)
{
return !!acpi_lapic;
}
#else /* !CONFIG_ACPI */
#define acpi_lapic 0
#define acpi_ioapic 0
#define acpi_disable_cmcff 0
static inline void acpi_noirq_set(void) { }
static inline void acpi_disable_pci(void) { }
static inline void disable_acpi(void) { }
#endif /* !CONFIG_ACPI */
#define ARCH_HAS_POWER_INIT 1
#ifdef CONFIG_ACPI_NUMA
extern int x86_acpi_numa_init(void);
#endif /* CONFIG_ACPI_NUMA */
#define acpi_unlazy_tlb(x) leave_mm(x)
#ifdef CONFIG_ACPI_APEI
static inline pgprot_t arch_apei_get_mem_attribute(phys_addr_t addr)
{
/*
* We currently have no way to look up the EFI memory map
* attributes for a region in a consistent way, because the
* memmap is discarded after efi_free_boot_services(). So if
* you call efi_mem_attributes() during boot and at runtime,
* you could theoretically see different attributes.
*
* We are yet to see any x86 platforms that require anything
* other than PAGE_KERNEL (some ARM64 platforms require the
* equivalent of PAGE_KERNEL_NOCACHE). Additionally, if SME
* is active, the ACPI information will not be encrypted,
* so return PAGE_KERNEL_NOENC until we know differently.
*/
return PAGE_KERNEL_NOENC;
}
#endif
#define ACPI_TABLE_UPGRADE_MAX_PHYS (max_low_pfn_mapped << PAGE_SHIFT)
#endif /* _ASM_X86_ACPI_H */