mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-13 17:26:41 +07:00
Misc changes:
- Prepare for Intel's new SERIALIZE instruction - Enable split-lock debugging on more CPUs - Add more Intel CPU models - Optimize stack canary initialization a bit - Simplify the Spectre logic a bit Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAl8oTsQRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1gueQ//Vh9sTi8+q5ZCxXnJQOi59SZsFy1quC2Q 6bFoSQ46npMBoYyC2eDQ4exBncWLqorT8Vq/evlW3XPldUzHKOk7b4Omonwyrrj5 dg5fqcRjpjU8ni6egmy4ElMjab53gDuv0yNazjONeBGeWuBGu4vI2bP2eY3Addfm 2eo2d5ZIMRCdShrUNwToJWWt6q4DzL/lcrVZAlX0LwlWVLqUCdIARALRM7V1XDsC udxS8KnvhTaJ7l63BSJREe3AGksLQd9P4UkJS4IE4t0zINBIrME043BYBMTh2Vvk y3jykKegIbmhPquGXG8grJbPDUF2/3FxmGKTIhpoo++agb2fxt921y5kqMJwniNS H/Gk032iGzjjwWnOoWE56UeuDTOlweSIrm4EG22HyEDK7kOMJusjYAV5fB4Sv7vj TBy5q0PCIutjXDTL1hIWf0WDiQt6eGNQS/yt3FlapLBGVRQwMU/pKYVVIOIaFtNs szx1ZeiT358Ww8a2fQlb8pqv50Upmr2wqFkAsMbm+NN3N92cqK6gJlo1p7fnxIuG +YVASobjsqbn0S62v/9SB/KRJz07adlZ6Tl/O/ILRvWyqik7COCCHDVJ62Zzaz5z LqR2daVM5H+Lp6jGZuIoq/JiUkxUe2K990eWHb3PUpOC4Rh73PvtMc7WFhbAjbye XV3eOEDi65c= =sL2Q -----END PGP SIGNATURE----- Merge tag 'x86-cpu-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull x86 cpu updates from Ingo Molar: - prepare for Intel's new SERIALIZE instruction - enable split-lock debugging on more CPUs - add more Intel CPU models - optimize stack canary initialization a bit - simplify the Spectre logic a bit * tag 'x86-cpu-2020-08-03' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/cpu: Refactor sync_core() for readability x86/cpu: Relocate sync_core() to sync_core.h x86/cpufeatures: Add enumeration for SERIALIZE instruction x86/split_lock: Enable the split lock feature on Sapphire Rapids and Alder Lake CPUs x86/cpu: Add Lakefield, Alder Lake and Rocket Lake models to the to Intel CPU family x86/stackprotector: Pre-initialize canary for secondary CPUs x86/speculation: Merge one test in spectre_v2_user_select_mitigation()
This commit is contained in:
commit
335ad94c21
@ -367,6 +367,7 @@
|
||||
#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
|
||||
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
|
||||
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
|
||||
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
|
||||
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
|
||||
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
|
||||
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
|
||||
|
@ -89,8 +89,15 @@
|
||||
#define INTEL_FAM6_COMETLAKE 0xA5
|
||||
#define INTEL_FAM6_COMETLAKE_L 0xA6
|
||||
|
||||
#define INTEL_FAM6_ROCKETLAKE 0xA7
|
||||
|
||||
#define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F
|
||||
|
||||
/* Hybrid Core/Atom Processors */
|
||||
|
||||
#define INTEL_FAM6_LAKEFIELD 0x8A
|
||||
#define INTEL_FAM6_ALDERLAKE 0x97
|
||||
|
||||
/* "Small Core" Processors (Atom) */
|
||||
|
||||
#define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
|
||||
|
@ -678,70 +678,6 @@ static inline unsigned int cpuid_edx(unsigned int op)
|
||||
return edx;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function forces the icache and prefetched instruction stream to
|
||||
* catch up with reality in two very specific cases:
|
||||
*
|
||||
* a) Text was modified using one virtual address and is about to be executed
|
||||
* from the same physical page at a different virtual address.
|
||||
*
|
||||
* b) Text was modified on a different CPU, may subsequently be
|
||||
* executed on this CPU, and you want to make sure the new version
|
||||
* gets executed. This generally means you're calling this in a IPI.
|
||||
*
|
||||
* If you're calling this for a different reason, you're probably doing
|
||||
* it wrong.
|
||||
*/
|
||||
static inline void sync_core(void)
|
||||
{
|
||||
/*
|
||||
* There are quite a few ways to do this. IRET-to-self is nice
|
||||
* because it works on every CPU, at any CPL (so it's compatible
|
||||
* with paravirtualization), and it never exits to a hypervisor.
|
||||
* The only down sides are that it's a bit slow (it seems to be
|
||||
* a bit more than 2x slower than the fastest options) and that
|
||||
* it unmasks NMIs. The "push %cs" is needed because, in
|
||||
* paravirtual environments, __KERNEL_CS may not be a valid CS
|
||||
* value when we do IRET directly.
|
||||
*
|
||||
* In case NMI unmasking or performance ever becomes a problem,
|
||||
* the next best option appears to be MOV-to-CR2 and an
|
||||
* unconditional jump. That sequence also works on all CPUs,
|
||||
* but it will fault at CPL3 (i.e. Xen PV).
|
||||
*
|
||||
* CPUID is the conventional way, but it's nasty: it doesn't
|
||||
* exist on some 486-like CPUs, and it usually exits to a
|
||||
* hypervisor.
|
||||
*
|
||||
* Like all of Linux's memory ordering operations, this is a
|
||||
* compiler barrier as well.
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
asm volatile (
|
||||
"pushfl\n\t"
|
||||
"pushl %%cs\n\t"
|
||||
"pushl $1f\n\t"
|
||||
"iret\n\t"
|
||||
"1:"
|
||||
: ASM_CALL_CONSTRAINT : : "memory");
|
||||
#else
|
||||
unsigned int tmp;
|
||||
|
||||
asm volatile (
|
||||
"mov %%ss, %0\n\t"
|
||||
"pushq %q0\n\t"
|
||||
"pushq %%rsp\n\t"
|
||||
"addq $8, (%%rsp)\n\t"
|
||||
"pushfq\n\t"
|
||||
"mov %%cs, %0\n\t"
|
||||
"pushq %q0\n\t"
|
||||
"pushq $1f\n\t"
|
||||
"iretq\n\t"
|
||||
"1:"
|
||||
: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
extern void select_idle_routine(const struct cpuinfo_x86 *c);
|
||||
extern void amd_e400_c1e_apic_setup(void);
|
||||
|
||||
|
@ -234,7 +234,6 @@ static inline void clwb(volatile void *__p)
|
||||
|
||||
#define nop() asm volatile ("nop")
|
||||
|
||||
|
||||
#endif /* __KERNEL__ */
|
||||
|
||||
#endif /* _ASM_X86_SPECIAL_INSNS_H */
|
||||
|
@ -90,6 +90,15 @@ static __always_inline void boot_init_stack_canary(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
|
||||
#else
|
||||
per_cpu(stack_canary.canary, cpu) = idle->stack_canary;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void setup_stack_canary_segment(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
@ -119,6 +128,9 @@ static inline void load_stack_canary_segment(void)
|
||||
static inline void setup_stack_canary_segment(int cpu)
|
||||
{ }
|
||||
|
||||
static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
|
||||
{ }
|
||||
|
||||
static inline void load_stack_canary_segment(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_32
|
||||
|
@ -6,6 +6,78 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static inline void iret_to_self(void)
|
||||
{
|
||||
asm volatile (
|
||||
"pushfl\n\t"
|
||||
"pushl %%cs\n\t"
|
||||
"pushl $1f\n\t"
|
||||
"iret\n\t"
|
||||
"1:"
|
||||
: ASM_CALL_CONSTRAINT : : "memory");
|
||||
}
|
||||
#else
|
||||
static inline void iret_to_self(void)
|
||||
{
|
||||
unsigned int tmp;
|
||||
|
||||
asm volatile (
|
||||
"mov %%ss, %0\n\t"
|
||||
"pushq %q0\n\t"
|
||||
"pushq %%rsp\n\t"
|
||||
"addq $8, (%%rsp)\n\t"
|
||||
"pushfq\n\t"
|
||||
"mov %%cs, %0\n\t"
|
||||
"pushq %q0\n\t"
|
||||
"pushq $1f\n\t"
|
||||
"iretq\n\t"
|
||||
"1:"
|
||||
: "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory");
|
||||
}
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
/*
|
||||
* This function forces the icache and prefetched instruction stream to
|
||||
* catch up with reality in two very specific cases:
|
||||
*
|
||||
* a) Text was modified using one virtual address and is about to be executed
|
||||
* from the same physical page at a different virtual address.
|
||||
*
|
||||
* b) Text was modified on a different CPU, may subsequently be
|
||||
* executed on this CPU, and you want to make sure the new version
|
||||
* gets executed. This generally means you're calling this in a IPI.
|
||||
*
|
||||
* If you're calling this for a different reason, you're probably doing
|
||||
* it wrong.
|
||||
*/
|
||||
static inline void sync_core(void)
|
||||
{
|
||||
/*
|
||||
* There are quite a few ways to do this. IRET-to-self is nice
|
||||
* because it works on every CPU, at any CPL (so it's compatible
|
||||
* with paravirtualization), and it never exits to a hypervisor.
|
||||
* The only down sides are that it's a bit slow (it seems to be
|
||||
* a bit more than 2x slower than the fastest options) and that
|
||||
* it unmasks NMIs. The "push %cs" is needed because, in
|
||||
* paravirtual environments, __KERNEL_CS may not be a valid CS
|
||||
* value when we do IRET directly.
|
||||
*
|
||||
* In case NMI unmasking or performance ever becomes a problem,
|
||||
* the next best option appears to be MOV-to-CR2 and an
|
||||
* unconditional jump. That sequence also works on all CPUs,
|
||||
* but it will fault at CPL3 (i.e. Xen PV).
|
||||
*
|
||||
* CPUID is the conventional way, but it's nasty: it doesn't
|
||||
* exist on some 486-like CPUs, and it usually exits to a
|
||||
* hypervisor.
|
||||
*
|
||||
* Like all of Linux's memory ordering operations, this is a
|
||||
* compiler barrier as well.
|
||||
*/
|
||||
iret_to_self();
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that a core serializing instruction is issued before returning
|
||||
* to user-mode. x86 implements return to user-space through sysexit,
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <linux/kprobes.h>
|
||||
#include <linux/mmu_context.h>
|
||||
#include <linux/bsearch.h>
|
||||
#include <linux/sync_core.h>
|
||||
#include <asm/text-patching.h>
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/sections.h>
|
||||
|
@ -763,10 +763,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
|
||||
}
|
||||
|
||||
/*
|
||||
* If enhanced IBRS is enabled or SMT impossible, STIBP is not
|
||||
* If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
|
||||
* required.
|
||||
*/
|
||||
if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
|
||||
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
|
||||
!smt_possible ||
|
||||
spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
|
||||
return;
|
||||
|
||||
/*
|
||||
@ -778,12 +780,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
|
||||
boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
|
||||
mode = SPECTRE_V2_USER_STRICT_PREFERRED;
|
||||
|
||||
/*
|
||||
* If STIBP is not available, clear the STIBP mode.
|
||||
*/
|
||||
if (!boot_cpu_has(X86_FEATURE_STIBP))
|
||||
mode = SPECTRE_V2_USER_NONE;
|
||||
|
||||
spectre_v2_user_stibp = mode;
|
||||
|
||||
set_mode:
|
||||
@ -1270,7 +1266,6 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
|
||||
* Indirect branch speculation is always disabled in strict
|
||||
* mode. It can neither be enabled if it was force-disabled
|
||||
* by a previous prctl call.
|
||||
|
||||
*/
|
||||
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
|
||||
spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
|
||||
|
@ -1156,6 +1156,8 @@ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, 1),
|
||||
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, 1),
|
||||
{}
|
||||
};
|
||||
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <linux/export.h>
|
||||
#include <linux/jump_label.h>
|
||||
#include <linux/set_memory.h>
|
||||
#include <linux/sync_core.h>
|
||||
#include <linux/task_work.h>
|
||||
#include <linux/hardirq.h>
|
||||
|
||||
|
@ -51,7 +51,6 @@
|
||||
#include <linux/err.h>
|
||||
#include <linux/nmi.h>
|
||||
#include <linux/tboot.h>
|
||||
#include <linux/stackprotector.h>
|
||||
#include <linux/gfp.h>
|
||||
#include <linux/cpuidle.h>
|
||||
#include <linux/numa.h>
|
||||
@ -81,6 +80,7 @@
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/spec-ctrl.h>
|
||||
#include <asm/hw_irq.h>
|
||||
#include <asm/stackprotector.h>
|
||||
|
||||
/* representing HT siblings of each logical CPU */
|
||||
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
|
||||
@ -260,21 +260,10 @@ static void notrace start_secondary(void *unused)
|
||||
/* enable local interrupts */
|
||||
local_irq_enable();
|
||||
|
||||
/* to prevent fake stack check failure in clock setup */
|
||||
boot_init_stack_canary();
|
||||
|
||||
x86_cpuinit.setup_percpu_clockev();
|
||||
|
||||
wmb();
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
|
||||
/*
|
||||
* Prevent tail call to cpu_startup_entry() because the stack protector
|
||||
* guard has been changed a couple of function calls up, in
|
||||
* boot_init_stack_canary() and must not be checked before tail calling
|
||||
* another function.
|
||||
*/
|
||||
prevent_tail_call_optimization();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1012,6 +1001,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
alternatives_enable_smp();
|
||||
|
||||
per_cpu(current_task, cpu) = idle;
|
||||
cpu_init_stack_canary(cpu, idle);
|
||||
|
||||
/* Initialize the interrupt stack(s) */
|
||||
ret = irq_init_percpu_irqstack(cpu);
|
||||
|
@ -92,9 +92,7 @@ static void cpu_bringup(void)
|
||||
asmlinkage __visible void cpu_bringup_and_idle(void)
|
||||
{
|
||||
cpu_bringup();
|
||||
boot_init_stack_canary();
|
||||
cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
|
||||
prevent_tail_call_optimization();
|
||||
}
|
||||
|
||||
void xen_smp_intr_free_pv(unsigned int cpu)
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <linux/io.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/sync_core.h>
|
||||
#include <linux/prefetch.h>
|
||||
#include "gru.h"
|
||||
#include "grutables.h"
|
||||
|
@ -16,6 +16,7 @@
|
||||
#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10)
|
||||
#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq)
|
||||
#else
|
||||
#include <linux/sync_core.h>
|
||||
#include <asm/tsc.h>
|
||||
#define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000)
|
||||
#define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz)
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <linux/sync_core.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/delay.h>
|
||||
#include <linux/export.h>
|
||||
|
Loading…
Reference in New Issue
Block a user