mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-15 07:26:39 +07:00
1dff3064c7
On P9 DD2.2 due to a CPU defect some TM instructions need to be emulated by KVM. This is handled at first by the hardware raising a softpatch interrupt when certain TM instructions that need KVM assistance are executed in the guest. Althought some TM instructions per Power ISA are invalid forms they can raise a softpatch interrupt too. For instance, 'tresume.' instruction as defined in the ISA must have bit 31 set (1), but an instruction that matches 'tresume.' PO and XO opcode fields but has bit 31 not set (0), like 0x7cfe9ddc, also raises a softpatch interrupt. Similarly for 'treclaim.' and 'trechkpt.' instructions with bit 31 = 0, i.e. 0x7c00075c and 0x7c0007dc, respectively. Hence, if a code like the following is executed in the guest it will raise a softpatch interrupt just like a 'tresume.' when the TM facility is enabled ('tabort. 0' in the example is used only to enable the TM facility): int main() { asm("tabort. 0; .long 0x7cfe9ddc;"); } Currently in such a case KVM throws a complete trace like: [345523.705984] WARNING: CPU: 24 PID: 64413 at arch/powerpc/kvm/book3s_hv_tm.c:211 kvmhv_p9_tm_emulation+0x68/0x620 [kvm_hv] [345523.705985] Modules linked in: kvm_hv(E) xt_conntrack ipt_REJECT nf_reject_ipv4 xt_tcpudp ip6table_mangle ip6table_nat iptable_mangle iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter bridge stp llc sch_fq_codel ipmi_powernv at24 vmx_crypto ipmi_devintf ipmi_msghandler ibmpowernv uio_pdrv_genirq kvm opal_prd uio leds_powernv ib_iser rdma_cm iw_cm ib_cm ib_core iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ip_tables x_tables autofs4 btrfs blake2b_generic zstd_compress raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx libcrc32c xor raid6_pq raid1 raid0 multipath linear tg3 crct10dif_vpmsum crc32c_vpmsum ipr [last unloaded: kvm_hv] [345523.706030] CPU: 24 PID: 64413 Comm: CPU 0/KVM Tainted: G W E 5.5.0+ #1 [345523.706031] NIP: c0080000072cb9c0 LR: c0080000072b5e80 CTR: c0080000085c7850 [345523.706034] REGS: c000000399467680 TRAP: 0700 Tainted: G W E (5.5.0+) [345523.706034] MSR: 900000010282b033 <SF,HV,VEC,VSX,EE,FP,ME,IR,DR,RI,LE,TM[E]> CR: 24022428 XER: 00000000 [345523.706042] CFAR: c0080000072b5e7c IRQMASK: 0 GPR00: c0080000072b5e80 c000000399467910 c0080000072db500 c000000375ccc720 GPR04: c000000375ccc720 00000003fbec0000 0000a10395dda5a6 0000000000000000 GPR08: 000000007cfe9ddc 7cfe9ddc000005dc 7cfe9ddc7c0005dc c0080000072cd530 GPR12: c0080000085c7850 c0000003fffeb800 0000000000000001 00007dfb737f0000 GPR16: c0002001edcca558 0000000000000000 0000000000000000 0000000000000001 GPR20: c000000001b21258 c0002001edcca558 0000000000000018 0000000000000000 GPR24: 0000000001000000 ffffffffffffffff 0000000000000001 0000000000001500 GPR28: c0002001edcc4278 c00000037dd80000 800000050280f033 c000000375ccc720 [345523.706062] NIP [c0080000072cb9c0] kvmhv_p9_tm_emulation+0x68/0x620 [kvm_hv] [345523.706065] LR [c0080000072b5e80] kvmppc_handle_exit_hv.isra.53+0x3e8/0x798 [kvm_hv] [345523.706066] Call Trace: [345523.706069] [c000000399467910] [c000000399467940] 0xc000000399467940 (unreliable) [345523.706071] [c000000399467950] [c000000399467980] 0xc000000399467980 [345523.706075] [c0000003994679f0] [c0080000072bd1c4] kvmhv_run_single_vcpu+0xa1c/0xb80 [kvm_hv] [345523.706079] [c000000399467ac0] [c0080000072bd8e0] kvmppc_vcpu_run_hv+0x5b8/0xb00 [kvm_hv] [345523.706087] [c000000399467b90] [c0080000085c93cc] kvmppc_vcpu_run+0x34/0x48 [kvm] [345523.706095] [c000000399467bb0] [c0080000085c582c] kvm_arch_vcpu_ioctl_run+0x244/0x420 [kvm] [345523.706101] [c000000399467c40] [c0080000085b7498] kvm_vcpu_ioctl+0x3d0/0x7b0 [kvm] [345523.706105] [c000000399467db0] [c0000000004adf9c] ksys_ioctl+0x13c/0x170 [345523.706107] [c000000399467e00] [c0000000004adff8] sys_ioctl+0x28/0x80 [345523.706111] [c000000399467e20] [c00000000000b278] system_call+0x5c/0x68 [345523.706112] Instruction dump: [345523.706114] 419e0390 7f8a4840 409d0048 6d497c00 2f89075d 419e021c 6d497c00 2f8907dd [345523.706119] 419e01c0 6d497c00 2f8905dd 419e00a4 <0fe00000> 38210040 38600000 ebc1fff0 and then treats the executed instruction as a 'nop'. However the POWER9 User's Manual, in section "4.6.10 Book II Invalid Forms", informs that for TM instructions bit 31 is in fact ignored, thus for the TM-related invalid forms ignoring bit 31 and handling them like the valid forms is an acceptable way to handle them. POWER8 behaves the same way too. This commit changes the handling of the cases here described by treating the TM-related invalid forms that can generate a softpatch interrupt just like their valid forms (w/ bit 31 = 1) instead of as a 'nop' and by gently reporting any other unrecognized case to the host and treating it as illegal instruction instead of throwing a trace and treating it as a 'nop'. Signed-off-by: Gustavo Romero <gromero@linux.ibm.com> Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org> Acked-By: Michael Neuling <mikey@neuling.org> Reviewed-by: Leonardo Bras <leonardo@linux.ibm.com> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
232 lines
7.8 KiB
C
232 lines
7.8 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
/*
|
|
* Copyright 2017 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
|
|
*/
|
|
|
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
|
|
#include <linux/kvm_host.h>
|
|
|
|
#include <asm/kvm_ppc.h>
|
|
#include <asm/kvm_book3s.h>
|
|
#include <asm/kvm_book3s_64.h>
|
|
#include <asm/reg.h>
|
|
#include <asm/ppc-opcode.h>
|
|
|
|
static void emulate_tx_failure(struct kvm_vcpu *vcpu, u64 failure_cause)
|
|
{
|
|
u64 texasr, tfiar;
|
|
u64 msr = vcpu->arch.shregs.msr;
|
|
|
|
tfiar = vcpu->arch.regs.nip & ~0x3ull;
|
|
texasr = (failure_cause << 56) | TEXASR_ABORT | TEXASR_FS | TEXASR_EXACT;
|
|
if (MSR_TM_SUSPENDED(vcpu->arch.shregs.msr))
|
|
texasr |= TEXASR_SUSP;
|
|
if (msr & MSR_PR) {
|
|
texasr |= TEXASR_PR;
|
|
tfiar |= 1;
|
|
}
|
|
vcpu->arch.tfiar = tfiar;
|
|
/* Preserve ROT and TL fields of existing TEXASR */
|
|
vcpu->arch.texasr = (vcpu->arch.texasr & 0x3ffffff) | texasr;
|
|
}
|
|
|
|
/*
|
|
* This gets called on a softpatch interrupt on POWER9 DD2.2 processors.
|
|
* We expect to find a TM-related instruction to be emulated. The
|
|
* instruction image is in vcpu->arch.emul_inst. If the guest was in
|
|
* TM suspended or transactional state, the checkpointed state has been
|
|
* reclaimed and is in the vcpu struct. The CPU is in virtual mode in
|
|
* host context.
|
|
*/
|
|
int kvmhv_p9_tm_emulation(struct kvm_vcpu *vcpu)
|
|
{
|
|
u32 instr = vcpu->arch.emul_inst;
|
|
u64 msr = vcpu->arch.shregs.msr;
|
|
u64 newmsr, bescr;
|
|
int ra, rs;
|
|
|
|
/*
|
|
* rfid, rfebb, and mtmsrd encode bit 31 = 0 since it's a reserved bit
|
|
* in these instructions, so masking bit 31 out doesn't change these
|
|
* instructions. For treclaim., tsr., and trechkpt. instructions if bit
|
|
* 31 = 0 then they are per ISA invalid forms, however P9 UM, in section
|
|
* 4.6.10 Book II Invalid Forms, informs specifically that ignoring bit
|
|
* 31 is an acceptable way to handle these invalid forms that have
|
|
* bit 31 = 0. Moreover, for emulation purposes both forms (w/ and wo/
|
|
* bit 31 set) can generate a softpatch interrupt. Hence both forms
|
|
* are handled below for these instructions so they behave the same way.
|
|
*/
|
|
switch (instr & PO_XOP_OPCODE_MASK) {
|
|
case PPC_INST_RFID:
|
|
/* XXX do we need to check for PR=0 here? */
|
|
newmsr = vcpu->arch.shregs.srr1;
|
|
/* should only get here for Sx -> T1 transition */
|
|
WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
|
|
MSR_TM_TRANSACTIONAL(newmsr) &&
|
|
(newmsr & MSR_TM)));
|
|
newmsr = sanitize_msr(newmsr);
|
|
vcpu->arch.shregs.msr = newmsr;
|
|
vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
|
|
vcpu->arch.regs.nip = vcpu->arch.shregs.srr0;
|
|
return RESUME_GUEST;
|
|
|
|
case PPC_INST_RFEBB:
|
|
if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
|
|
/* generate an illegal instruction interrupt */
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
|
return RESUME_GUEST;
|
|
}
|
|
/* check EBB facility is available */
|
|
if (!(vcpu->arch.hfscr & HFSCR_EBB)) {
|
|
/* generate an illegal instruction interrupt */
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
|
return RESUME_GUEST;
|
|
}
|
|
if ((msr & MSR_PR) && !(vcpu->arch.fscr & FSCR_EBB)) {
|
|
/* generate a facility unavailable interrupt */
|
|
vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
|
|
((u64)FSCR_EBB_LG << 56);
|
|
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_FAC_UNAVAIL);
|
|
return RESUME_GUEST;
|
|
}
|
|
bescr = vcpu->arch.bescr;
|
|
/* expect to see a S->T transition requested */
|
|
WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
|
|
((bescr >> 30) & 3) == 2));
|
|
bescr &= ~BESCR_GE;
|
|
if (instr & (1 << 11))
|
|
bescr |= BESCR_GE;
|
|
vcpu->arch.bescr = bescr;
|
|
msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
|
|
vcpu->arch.shregs.msr = msr;
|
|
vcpu->arch.cfar = vcpu->arch.regs.nip - 4;
|
|
vcpu->arch.regs.nip = vcpu->arch.ebbrr;
|
|
return RESUME_GUEST;
|
|
|
|
case PPC_INST_MTMSRD:
|
|
/* XXX do we need to check for PR=0 here? */
|
|
rs = (instr >> 21) & 0x1f;
|
|
newmsr = kvmppc_get_gpr(vcpu, rs);
|
|
/* check this is a Sx -> T1 transition */
|
|
WARN_ON_ONCE(!(MSR_TM_SUSPENDED(msr) &&
|
|
MSR_TM_TRANSACTIONAL(newmsr) &&
|
|
(newmsr & MSR_TM)));
|
|
/* mtmsrd doesn't change LE */
|
|
newmsr = (newmsr & ~MSR_LE) | (msr & MSR_LE);
|
|
newmsr = sanitize_msr(newmsr);
|
|
vcpu->arch.shregs.msr = newmsr;
|
|
return RESUME_GUEST;
|
|
|
|
/* ignore bit 31, see comment above */
|
|
case (PPC_INST_TSR & PO_XOP_OPCODE_MASK):
|
|
/* check for PR=1 and arch 2.06 bit set in PCR */
|
|
if ((msr & MSR_PR) && (vcpu->arch.vcore->pcr & PCR_ARCH_206)) {
|
|
/* generate an illegal instruction interrupt */
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
|
return RESUME_GUEST;
|
|
}
|
|
/* check for TM disabled in the HFSCR or MSR */
|
|
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
|
|
/* generate an illegal instruction interrupt */
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
|
return RESUME_GUEST;
|
|
}
|
|
if (!(msr & MSR_TM)) {
|
|
/* generate a facility unavailable interrupt */
|
|
vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
|
|
((u64)FSCR_TM_LG << 56);
|
|
kvmppc_book3s_queue_irqprio(vcpu,
|
|
BOOK3S_INTERRUPT_FAC_UNAVAIL);
|
|
return RESUME_GUEST;
|
|
}
|
|
/* Set CR0 to indicate previous transactional state */
|
|
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
|
|
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
|
|
/* L=1 => tresume, L=0 => tsuspend */
|
|
if (instr & (1 << 21)) {
|
|
if (MSR_TM_SUSPENDED(msr))
|
|
msr = (msr & ~MSR_TS_MASK) | MSR_TS_T;
|
|
} else {
|
|
if (MSR_TM_TRANSACTIONAL(msr))
|
|
msr = (msr & ~MSR_TS_MASK) | MSR_TS_S;
|
|
}
|
|
vcpu->arch.shregs.msr = msr;
|
|
return RESUME_GUEST;
|
|
|
|
/* ignore bit 31, see comment above */
|
|
case (PPC_INST_TRECLAIM & PO_XOP_OPCODE_MASK):
|
|
/* check for TM disabled in the HFSCR or MSR */
|
|
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
|
|
/* generate an illegal instruction interrupt */
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
|
return RESUME_GUEST;
|
|
}
|
|
if (!(msr & MSR_TM)) {
|
|
/* generate a facility unavailable interrupt */
|
|
vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
|
|
((u64)FSCR_TM_LG << 56);
|
|
kvmppc_book3s_queue_irqprio(vcpu,
|
|
BOOK3S_INTERRUPT_FAC_UNAVAIL);
|
|
return RESUME_GUEST;
|
|
}
|
|
/* If no transaction active, generate TM bad thing */
|
|
if (!MSR_TM_ACTIVE(msr)) {
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
|
|
return RESUME_GUEST;
|
|
}
|
|
/* If failure was not previously recorded, recompute TEXASR */
|
|
if (!(vcpu->arch.orig_texasr & TEXASR_FS)) {
|
|
ra = (instr >> 16) & 0x1f;
|
|
if (ra)
|
|
ra = kvmppc_get_gpr(vcpu, ra) & 0xff;
|
|
emulate_tx_failure(vcpu, ra);
|
|
}
|
|
|
|
copy_from_checkpoint(vcpu);
|
|
|
|
/* Set CR0 to indicate previous transactional state */
|
|
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
|
|
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
|
|
vcpu->arch.shregs.msr &= ~MSR_TS_MASK;
|
|
return RESUME_GUEST;
|
|
|
|
/* ignore bit 31, see comment above */
|
|
case (PPC_INST_TRECHKPT & PO_XOP_OPCODE_MASK):
|
|
/* XXX do we need to check for PR=0 here? */
|
|
/* check for TM disabled in the HFSCR or MSR */
|
|
if (!(vcpu->arch.hfscr & HFSCR_TM)) {
|
|
/* generate an illegal instruction interrupt */
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
|
return RESUME_GUEST;
|
|
}
|
|
if (!(msr & MSR_TM)) {
|
|
/* generate a facility unavailable interrupt */
|
|
vcpu->arch.fscr = (vcpu->arch.fscr & ~(0xffull << 56)) |
|
|
((u64)FSCR_TM_LG << 56);
|
|
kvmppc_book3s_queue_irqprio(vcpu,
|
|
BOOK3S_INTERRUPT_FAC_UNAVAIL);
|
|
return RESUME_GUEST;
|
|
}
|
|
/* If transaction active or TEXASR[FS] = 0, bad thing */
|
|
if (MSR_TM_ACTIVE(msr) || !(vcpu->arch.texasr & TEXASR_FS)) {
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
|
|
return RESUME_GUEST;
|
|
}
|
|
|
|
copy_to_checkpoint(vcpu);
|
|
|
|
/* Set CR0 to indicate previous transactional state */
|
|
vcpu->arch.regs.ccr = (vcpu->arch.regs.ccr & 0x0fffffff) |
|
|
(((msr & MSR_TS_MASK) >> MSR_TS_S_LG) << 29);
|
|
vcpu->arch.shregs.msr = msr | MSR_TS_S;
|
|
return RESUME_GUEST;
|
|
}
|
|
|
|
/* What should we do here? We didn't recognize the instruction */
|
|
kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
|
|
pr_warn_ratelimited("Unrecognized TM-related instruction %#x for emulation", instr);
|
|
|
|
return RESUME_GUEST;
|
|
}
|