mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-18 19:26:21 +07:00
Merge branch 'topic/ppc-kvm' into next
Merge our ppc-kvm topic branch. This contains several fixes for the XIVE interrupt controller that we are sharing with the KVM tree.
This commit is contained in:
commit
1a47908e0f
@ -46,7 +46,15 @@ struct xive_irq_data {
|
||||
|
||||
/* Setup/used by frontend */
|
||||
int target;
|
||||
/*
|
||||
* saved_p means that there is a queue entry for this interrupt
|
||||
* in some CPU's queue (not including guest vcpu queues), even
|
||||
* if P is not set in the source ESB.
|
||||
* stale_p means that there is no queue entry for this interrupt
|
||||
* in some CPU's queue, even if P is set in the source ESB.
|
||||
*/
|
||||
bool saved_p;
|
||||
bool stale_p;
|
||||
};
|
||||
#define XIVE_IRQ_FLAG_STORE_EOI 0x01
|
||||
#define XIVE_IRQ_FLAG_LSI 0x02
|
||||
|
@ -942,6 +942,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
|
||||
ld r11, VCPU_XIVE_SAVED_STATE(r4)
|
||||
li r9, TM_QW1_OS
|
||||
lwz r8, VCPU_XIVE_CAM_WORD(r4)
|
||||
cmpwi r8, 0
|
||||
beq no_xive
|
||||
li r7, TM_QW1_OS + TM_WORD2
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
@ -2831,29 +2833,39 @@ kvm_cede_prodded:
|
||||
kvm_cede_exit:
|
||||
ld r9, HSTATE_KVM_VCPU(r13)
|
||||
#ifdef CONFIG_KVM_XICS
|
||||
/* Abort if we still have a pending escalation */
|
||||
lbz r5, VCPU_XIVE_ESC_ON(r9)
|
||||
cmpwi r5, 0
|
||||
beq 1f
|
||||
li r0, 0
|
||||
stb r0, VCPU_CEDED(r9)
|
||||
1: /* Enable XIVE escalation */
|
||||
li r5, XIVE_ESB_SET_PQ_00
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
beq 1f
|
||||
/* are we using XIVE with single escalation? */
|
||||
ld r10, VCPU_XIVE_ESC_VADDR(r9)
|
||||
cmpdi r10, 0
|
||||
beq 3f
|
||||
ldx r0, r10, r5
|
||||
li r6, XIVE_ESB_SET_PQ_00
|
||||
/*
|
||||
* If we still have a pending escalation, abort the cede,
|
||||
* and we must set PQ to 10 rather than 00 so that we don't
|
||||
* potentially end up with two entries for the escalation
|
||||
* interrupt in the XIVE interrupt queue. In that case
|
||||
* we also don't want to set xive_esc_on to 1 here in
|
||||
* case we race with xive_esc_irq().
|
||||
*/
|
||||
lbz r5, VCPU_XIVE_ESC_ON(r9)
|
||||
cmpwi r5, 0
|
||||
beq 4f
|
||||
li r0, 0
|
||||
stb r0, VCPU_CEDED(r9)
|
||||
li r6, XIVE_ESB_SET_PQ_10
|
||||
b 5f
|
||||
4: li r0, 1
|
||||
stb r0, VCPU_XIVE_ESC_ON(r9)
|
||||
/* make sure store to xive_esc_on is seen before xive_esc_irq runs */
|
||||
sync
|
||||
5: /* Enable XIVE escalation */
|
||||
mfmsr r0
|
||||
andi. r0, r0, MSR_DR /* in real mode? */
|
||||
beq 1f
|
||||
ldx r0, r10, r6
|
||||
b 2f
|
||||
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
|
||||
cmpdi r10, 0
|
||||
beq 3f
|
||||
ldcix r0, r10, r5
|
||||
ldcix r0, r10, r6
|
||||
2: sync
|
||||
li r0, 1
|
||||
stb r0, VCPU_XIVE_ESC_ON(r9)
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
3: b guest_exit_cont
|
||||
|
||||
|
@ -67,8 +67,14 @@ void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
|
||||
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
|
||||
u64 pq;
|
||||
|
||||
if (!tima)
|
||||
/*
|
||||
* Nothing to do if the platform doesn't have a XIVE
|
||||
* or this vCPU doesn't have its own XIVE context
|
||||
* (e.g. because it's not using an in-kernel interrupt controller).
|
||||
*/
|
||||
if (!tima || !vcpu->arch.xive_cam_word)
|
||||
return;
|
||||
|
||||
eieio();
|
||||
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
|
||||
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
|
||||
@ -160,6 +166,9 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
|
||||
*/
|
||||
vcpu->arch.xive_esc_on = false;
|
||||
|
||||
/* This orders xive_esc_on = false vs. subsequent stale_p = true */
|
||||
smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
@ -1113,6 +1122,31 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
|
||||
vcpu->arch.xive_esc_raddr = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* In single escalation mode, the escalation interrupt is marked so
|
||||
* that EOI doesn't re-enable it, but just sets the stale_p flag to
|
||||
* indicate that the P bit has already been dealt with. However, the
|
||||
* assembly code that enters the guest sets PQ to 00 without clearing
|
||||
* stale_p (because it has no easy way to address it). Hence we have
|
||||
* to adjust stale_p before shutting down the interrupt.
|
||||
*/
|
||||
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
|
||||
struct kvmppc_xive_vcpu *xc, int irq)
|
||||
{
|
||||
struct irq_data *d = irq_get_irq_data(irq);
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
|
||||
/*
|
||||
* This slightly odd sequence gives the right result
|
||||
* (i.e. stale_p set if xive_esc_on is false) even if
|
||||
* we race with xive_esc_irq() and xive_irq_eoi().
|
||||
*/
|
||||
xd->stale_p = false;
|
||||
smp_mb(); /* paired with smb_wmb in xive_esc_irq */
|
||||
if (!vcpu->arch.xive_esc_on)
|
||||
xd->stale_p = true;
|
||||
}
|
||||
|
||||
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
||||
{
|
||||
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
|
||||
@ -1134,20 +1168,28 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
||||
/* Mask the VP IPI */
|
||||
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Free the queues & associated interrupts */
|
||||
/* Free escalations */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
|
||||
/* Free the escalation irq */
|
||||
if (xc->esc_virq[i]) {
|
||||
if (xc->xive->single_escalation)
|
||||
xive_cleanup_single_escalation(vcpu, xc,
|
||||
xc->esc_virq[i]);
|
||||
free_irq(xc->esc_virq[i], vcpu);
|
||||
irq_dispose_mapping(xc->esc_virq[i]);
|
||||
kfree(xc->esc_virq_names[i]);
|
||||
}
|
||||
/* Free the queue */
|
||||
}
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Clear the cam word so guest entry won't try to push context */
|
||||
vcpu->arch.xive_cam_word = 0;
|
||||
|
||||
/* Free the queues */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
struct xive_q *q = &xc->queues[i];
|
||||
|
||||
xive_native_disable_queue(xc->vp_id, q, i);
|
||||
if (q->qpage) {
|
||||
free_pages((unsigned long)q->qpage,
|
||||
|
@ -282,6 +282,8 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
|
||||
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
|
||||
bool single_escalation);
|
||||
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
|
||||
void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
|
||||
struct kvmppc_xive_vcpu *xc, int irq);
|
||||
|
||||
#endif /* CONFIG_KVM_XICS */
|
||||
#endif /* _KVM_PPC_BOOK3S_XICS_H */
|
||||
|
@ -67,20 +67,28 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
|
||||
xc->valid = false;
|
||||
kvmppc_xive_disable_vcpu_interrupts(vcpu);
|
||||
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Free the queues & associated interrupts */
|
||||
/* Free escalations */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
/* Free the escalation irq */
|
||||
if (xc->esc_virq[i]) {
|
||||
if (xc->xive->single_escalation)
|
||||
xive_cleanup_single_escalation(vcpu, xc,
|
||||
xc->esc_virq[i]);
|
||||
free_irq(xc->esc_virq[i], vcpu);
|
||||
irq_dispose_mapping(xc->esc_virq[i]);
|
||||
kfree(xc->esc_virq_names[i]);
|
||||
xc->esc_virq[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Free the queue */
|
||||
/* Disable the VP */
|
||||
xive_native_disable_vp(xc->vp_id);
|
||||
|
||||
/* Clear the cam word so guest entry won't try to push context */
|
||||
vcpu->arch.xive_cam_word = 0;
|
||||
|
||||
/* Free the queues */
|
||||
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
|
||||
kvmppc_xive_native_cleanup_queue(vcpu, i);
|
||||
}
|
||||
|
||||
|
@ -135,7 +135,7 @@ static u32 xive_read_eq(struct xive_q *q, bool just_peek)
|
||||
static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
|
||||
{
|
||||
u32 irq = 0;
|
||||
u8 prio;
|
||||
u8 prio = 0;
|
||||
|
||||
/* Find highest pending priority */
|
||||
while (xc->pending_prio != 0) {
|
||||
@ -148,8 +148,19 @@ static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
|
||||
irq = xive_read_eq(&xc->queue[prio], just_peek);
|
||||
|
||||
/* Found something ? That's it */
|
||||
if (irq)
|
||||
break;
|
||||
if (irq) {
|
||||
if (just_peek || irq_to_desc(irq))
|
||||
break;
|
||||
/*
|
||||
* We should never get here; if we do then we must
|
||||
* have failed to synchronize the interrupt properly
|
||||
* when shutting it down.
|
||||
*/
|
||||
pr_crit("xive: got interrupt %d without descriptor, dropping\n",
|
||||
irq);
|
||||
WARN_ON(1);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Clear pending bits */
|
||||
xc->pending_prio &= ~(1 << prio);
|
||||
@ -307,6 +318,7 @@ static void xive_do_queue_eoi(struct xive_cpu *xc)
|
||||
*/
|
||||
static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
|
||||
{
|
||||
xd->stale_p = false;
|
||||
/* If the XIVE supports the new "store EOI facility, use it */
|
||||
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
|
||||
xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
|
||||
@ -350,7 +362,7 @@ static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
|
||||
}
|
||||
}
|
||||
|
||||
/* irq_chip eoi callback */
|
||||
/* irq_chip eoi callback, called with irq descriptor lock held */
|
||||
static void xive_irq_eoi(struct irq_data *d)
|
||||
{
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
@ -366,6 +378,8 @@ static void xive_irq_eoi(struct irq_data *d)
|
||||
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
|
||||
!(xd->flags & XIVE_IRQ_NO_EOI))
|
||||
xive_do_source_eoi(irqd_to_hwirq(d), xd);
|
||||
else
|
||||
xd->stale_p = true;
|
||||
|
||||
/*
|
||||
* Clear saved_p to indicate that it's no longer occupying
|
||||
@ -397,11 +411,16 @@ static void xive_do_source_set_mask(struct xive_irq_data *xd,
|
||||
*/
|
||||
if (mask) {
|
||||
val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
|
||||
xd->saved_p = !!(val & XIVE_ESB_VAL_P);
|
||||
} else if (xd->saved_p)
|
||||
if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
|
||||
xd->saved_p = true;
|
||||
xd->stale_p = false;
|
||||
} else if (xd->saved_p) {
|
||||
xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
|
||||
else
|
||||
xd->saved_p = false;
|
||||
} else {
|
||||
xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
|
||||
xd->stale_p = false;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -541,6 +560,8 @@ static unsigned int xive_irq_startup(struct irq_data *d)
|
||||
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
|
||||
int target, rc;
|
||||
|
||||
xd->saved_p = false;
|
||||
xd->stale_p = false;
|
||||
pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
|
||||
d->irq, hw_irq, d);
|
||||
|
||||
@ -587,6 +608,7 @@ static unsigned int xive_irq_startup(struct irq_data *d)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* called with irq descriptor lock held */
|
||||
static void xive_irq_shutdown(struct irq_data *d)
|
||||
{
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
@ -601,16 +623,6 @@ static void xive_irq_shutdown(struct irq_data *d)
|
||||
/* Mask the interrupt at the source */
|
||||
xive_do_source_set_mask(xd, true);
|
||||
|
||||
/*
|
||||
* The above may have set saved_p. We clear it otherwise it
|
||||
* will prevent re-enabling later on. It is ok to forget the
|
||||
* fact that the interrupt might be in a queue because we are
|
||||
* accounting that already in xive_dec_target_count() and will
|
||||
* be re-routing it to a new queue with proper accounting when
|
||||
* it's started up again
|
||||
*/
|
||||
xd->saved_p = false;
|
||||
|
||||
/*
|
||||
* Mask the interrupt in HW in the IVT/EAS and set the number
|
||||
* to be the "bad" IRQ number
|
||||
@ -797,6 +809,10 @@ static int xive_irq_retrigger(struct irq_data *d)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Caller holds the irq descriptor lock, so this won't be called
|
||||
* concurrently with xive_get_irqchip_state on the same interrupt.
|
||||
*/
|
||||
static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
||||
{
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
|
||||
@ -820,6 +836,10 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
||||
|
||||
/* Set it to PQ=10 state to prevent further sends */
|
||||
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
|
||||
if (!xd->stale_p) {
|
||||
xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
|
||||
xd->stale_p = !xd->saved_p;
|
||||
}
|
||||
|
||||
/* No target ? nothing to do */
|
||||
if (xd->target == XIVE_INVALID_TARGET) {
|
||||
@ -827,7 +847,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
||||
* An untargetted interrupt should have been
|
||||
* also masked at the source
|
||||
*/
|
||||
WARN_ON(pq & 2);
|
||||
WARN_ON(xd->saved_p);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -847,9 +867,8 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
||||
* This saved_p is cleared by the host EOI, when we know
|
||||
* for sure the queue slot is no longer in use.
|
||||
*/
|
||||
if (pq & 2) {
|
||||
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
|
||||
xd->saved_p = true;
|
||||
if (xd->saved_p) {
|
||||
xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
|
||||
|
||||
/*
|
||||
* Sync the XIVE source HW to ensure the interrupt
|
||||
@ -862,8 +881,7 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
||||
*/
|
||||
if (xive_ops->sync_source)
|
||||
xive_ops->sync_source(hw_irq);
|
||||
} else
|
||||
xd->saved_p = false;
|
||||
}
|
||||
} else {
|
||||
irqd_clr_forwarded_to_vcpu(d);
|
||||
|
||||
@ -914,6 +932,23 @@ static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Called with irq descriptor lock held. */
|
||||
static int xive_get_irqchip_state(struct irq_data *data,
|
||||
enum irqchip_irq_state which, bool *state)
|
||||
{
|
||||
struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
|
||||
|
||||
switch (which) {
|
||||
case IRQCHIP_STATE_ACTIVE:
|
||||
*state = !xd->stale_p &&
|
||||
(xd->saved_p ||
|
||||
!!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P));
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
static struct irq_chip xive_irq_chip = {
|
||||
.name = "XIVE-IRQ",
|
||||
.irq_startup = xive_irq_startup,
|
||||
@ -925,6 +960,7 @@ static struct irq_chip xive_irq_chip = {
|
||||
.irq_set_type = xive_irq_set_type,
|
||||
.irq_retrigger = xive_irq_retrigger,
|
||||
.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
|
||||
.irq_get_irqchip_state = xive_get_irqchip_state,
|
||||
};
|
||||
|
||||
bool is_xive_irq(struct irq_chip *chip)
|
||||
@ -1337,6 +1373,11 @@ static void xive_flush_cpu_queue(unsigned int cpu, struct xive_cpu *xc)
|
||||
raw_spin_lock(&desc->lock);
|
||||
xd = irq_desc_get_handler_data(desc);
|
||||
|
||||
/*
|
||||
* Clear saved_p to indicate that it's no longer pending
|
||||
*/
|
||||
xd->saved_p = false;
|
||||
|
||||
/*
|
||||
* For LSIs, we EOI, this will cause a resend if it's
|
||||
* still asserted. Otherwise do an MSI retrigger.
|
||||
|
Loading…
Reference in New Issue
Block a user