From 437f9963bc4fd75889c1fe9289a92dea9124a439 Mon Sep 17 00:00:00 2001
From: Pavel Fedin
Date: Fri, 25 Sep 2015 17:00:29 +0300
Subject: [PATCH 1/6] KVM: arm/arm64: Do not inject spurious interrupts
When lowering a level-triggered line from userspace, we forgot to lower
the pending bit on the emulated CPU interface and we also did not
re-compute the pending_on_cpu bitmap for the CPU affected by the change.
Update vgic_update_irq_pending() to fix the two issues above and also
raise a warning in vgic_quue_irq_to_lr if we encounter an interrupt
pending on a CPU which is neither marked active nor pending.
[ Commit text reworked completely - Christoffer ]
Signed-off-by: Pavel Fedin
Signed-off-by: Christoffer Dall
---
virt/kvm/arm/vgic.c | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 6bd1c9bf7ae7..596455a394af 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1132,7 +1132,8 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
vgic_irq_clear_active(vcpu, irq);
vgic_update_state(vcpu->kvm);
- } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
+ } else {
+ WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq));
vlr.state |= LR_STATE_PENDING;
kvm_debug("Set pending: 0x%x\n", vlr.state);
}
@@ -1607,8 +1608,12 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
} else {
if (level_triggered) {
vgic_dist_irq_clear_level(vcpu, irq_num);
- if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
+ if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) {
vgic_dist_irq_clear_pending(vcpu, irq_num);
+ vgic_cpu_irq_clear(vcpu, irq_num);
+ if (!compute_pending_for_cpu(vcpu))
+ clear_bit(cpuid, dist->irq_pending_on_cpu);
+ }
}
ret = false;
From 399ea0f6bcd318af94ec8e4ffe96703ed674f22e Mon Sep 17 00:00:00 2001
From: Pavel Fedin
Date: Tue, 6 Oct 2015 11:14:35 +0300
Subject: [PATCH 2/6] KVM: arm/arm64: Fix memory leak if timer initialization
fails
Jump to correct label and free kvm_host_cpu_state
Reviewed-by: Wei Huang
Signed-off-by: Pavel Fedin
Signed-off-by: Christoffer Dall
---
arch/arm/kvm/arm.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index dc017adfddc8..78b286994577 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1080,7 +1080,7 @@ static int init_hyp_mode(void)
*/
err = kvm_timer_hyp_init();
if (err)
- goto out_free_mappings;
+ goto out_free_context;
#ifndef CONFIG_HOTPLUG_CPU
free_boot_hyp_pgd();
From 4a5d69b73948d0e03cd38d77dc11edb2e707165f Mon Sep 17 00:00:00 2001
From: Arnd Bergmann
Date: Mon, 12 Oct 2015 15:22:31 +0200
Subject: [PATCH 3/6] KVM: arm: use GIC support unconditionally
The vgic code on ARM is built for all configurations that enable KVM,
but the parent_data field that it references is only present when
CONFIG_IRQ_DOMAIN_HIERARCHY is set:
virt/kvm/arm/vgic.c: In function 'kvm_vgic_map_phys_irq':
virt/kvm/arm/vgic.c:1781:13: error: 'struct irq_data' has no member named 'parent_data'
This flag is implied by the GIC driver, and indeed the VGIC code only
makes sense if a GIC is present. This changes the CONFIG_KVM symbol
to always select GIC, which avoids the issue.
Fixes: 662d9715840 ("arm/arm64: KVM: Kill CONFIG_KVM_ARM_{VGIC,TIMER}")
Signed-off-by: Arnd Bergmann
Acked-by: Marc Zyngier
Signed-off-by: Christoffer Dall
---
arch/arm/kvm/Kconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 210eccadb69a..356970f3b25e 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
depends on MMU && OF
select PREEMPT_NOTIFIERS
select ANON_INODES
+ select ARM_GIC
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
From cff9211eb1a1f58ce7f5a2d596b617928fd4be0e Mon Sep 17 00:00:00 2001
From: Christoffer Dall
Date: Fri, 16 Oct 2015 12:41:21 +0200
Subject: [PATCH 4/6] arm/arm64: KVM: Fix arch timer behavior for disabled
interrupts
We have an interesting issue when the guest disables the timer interrupt
on the VGIC, which happens when turning VCPUs off using PSCI, for
example.
The problem is that because the guest disables the virtual interrupt at
the VGIC level, we never inject interrupts to the guest and therefore
never mark the interrupt as active on the physical distributor. The
host also never takes the timer interrupt (we only use the timer device
to trigger a guest exit and everything else is done in software), so the
interrupt does not become active through normal means.
The result is that we keep entering the guest with a programmed timer
that will always fire as soon as we context switch the hardware timer
state and run the guest, preventing forward progress for the VCPU.
Since the active state on the physical distributor is really part of the
timer logic, it is the job of our virtual arch timer driver to manage
this state.
The timer->map->active boolean field indicates whether we have signalled
this interrupt to the vgic and if that interrupt is still pending or
active. As long as that is the case, the hardware doesn't have to
generate physical interrupts and therefore we mark the interrupt as
active on the physical distributor.
We also have to restore the pending state of an interrupt that was
queued to an LR but was retired from the LR for some reason, while
remaining pending in the LR.
Cc: Marc Zyngier
Reported-by: Lorenzo Pieralisi
Signed-off-by: Christoffer Dall
---
virt/kvm/arm/arch_timer.c | 19 +++++++++++++++++
virt/kvm/arm/vgic.c | 43 ++++++++++-----------------------------
2 files changed, 30 insertions(+), 32 deletions(-)
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 48c6e1ac6827..b9d3a32cbc04 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -137,6 +137,8 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ bool phys_active;
+ int ret;
/*
* We're about to run this vcpu again, so there is no need to
@@ -151,6 +153,23 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu)
*/
if (kvm_timer_should_fire(vcpu))
kvm_timer_inject_irq(vcpu);
+
+ /*
+ * We keep track of whether the edge-triggered interrupt has been
+ * signalled to the vgic/guest, and if so, we mask the interrupt and
+ * the physical distributor to prevent the timer from raising a
+ * physical interrupt whenever we run a guest, preventing forward
+ * VCPU progress.
+ */
+ if (kvm_vgic_get_phys_irq_active(timer->map))
+ phys_active = true;
+ else
+ phys_active = false;
+
+ ret = irq_set_irqchip_state(timer->map->irq,
+ IRQCHIP_STATE_ACTIVE,
+ phys_active);
+ WARN_ON(ret);
}
/**
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 596455a394af..ea21bc273542 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1092,6 +1092,15 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr);
+ /*
+ * We must transfer the pending state back to the distributor before
+ * retiring the LR, otherwise we may loose edge-triggered interrupts.
+ */
+ if (vlr.state & LR_STATE_PENDING) {
+ vgic_dist_irq_set_pending(vcpu, irq);
+ vlr.hwirq = 0;
+ }
+
vlr.state = 0;
vgic_set_lr(vcpu, lr_nr, vlr);
clear_bit(lr_nr, vgic_cpu->lr_used);
@@ -1241,7 +1250,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
unsigned long *pa_percpu, *pa_shared;
- int i, vcpu_id, lr, ret;
+ int i, vcpu_id;
int overflow = 0;
int nr_shared = vgic_nr_shared_irqs(dist);
@@ -1296,31 +1305,6 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
*/
clear_bit(vcpu_id, dist->irq_pending_on_cpu);
}
-
- for (lr = 0; lr < vgic->nr_lr; lr++) {
- struct vgic_lr vlr;
-
- if (!test_bit(lr, vgic_cpu->lr_used))
- continue;
-
- vlr = vgic_get_lr(vcpu, lr);
-
- /*
- * If we have a mapping, and the virtual interrupt is
- * presented to the guest (as pending or active), then we must
- * set the state to active in the physical world. See
- * Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt.
- */
- if (vlr.state & LR_HW) {
- struct irq_phys_map *map;
- map = vgic_irq_map_search(vcpu, vlr.irq);
-
- ret = irq_set_irqchip_state(map->irq,
- IRQCHIP_STATE_ACTIVE,
- true);
- WARN_ON(ret);
- }
- }
}
static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
@@ -1430,13 +1414,8 @@ static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
WARN_ON(ret);
- if (map->active) {
- ret = irq_set_irqchip_state(map->irq,
- IRQCHIP_STATE_ACTIVE,
- false);
- WARN_ON(ret);
+ if (map->active)
return 0;
- }
return 1;
}
From 544c572e03174438b6656ed24a4516b9a9d5f14a Mon Sep 17 00:00:00 2001
From: Christoffer Dall
Date: Sat, 17 Oct 2015 17:55:12 +0200
Subject: [PATCH 5/6] arm/arm64: KVM: Clear map->active on pend/active clear
When a guest reboots or offlines/onlines CPUs, it is not uncommon for it
to clear the pending and active states of an interrupt through the
emulated VGIC distributor. However, since the architected timers are
defined by the architecture to be level triggered and the guest
rightfully expects them to be that, but we emulate them as
edge-triggered, we have to mimic level-triggered behavior for an
edge-triggered virtual implementation.
We currently do not signal the VGIC when the map->active field is true,
because it indicates that the guest has already been signalled of the
interrupt as required. Normally this field is set to false when the
guest deactivates the virtual interrupt through the sync path.
We also need to catch the case where the guest deactivates the interrupt
through the emulated distributor, again allowing guests to boot even if
the original virtual timer signal hit before the guest's GIC
initialization sequence is run.
Reviewed-by: Eric Auger
Signed-off-by: Christoffer Dall
---
virt/kvm/arm/vgic.c | 32 +++++++++++++++++++++++++++++++-
1 file changed, 31 insertions(+), 1 deletion(-)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index ea21bc273542..58b125676785 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -531,6 +531,34 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm,
return false;
}
+/*
+ * If a mapped interrupt's state has been modified by the guest such that it
+ * is no longer active or pending, without it have gone through the sync path,
+ * then the map->active field must be cleared so the interrupt can be taken
+ * again.
+ */
+static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ struct list_head *root;
+ struct irq_phys_map_entry *entry;
+ struct irq_phys_map *map;
+
+ rcu_read_lock();
+
+ /* Check for PPIs */
+ root = &vgic_cpu->irq_phys_map_list;
+ list_for_each_entry_rcu(entry, root, entry) {
+ map = &entry->map;
+
+ if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) &&
+ !vgic_irq_is_active(vcpu, map->virt_irq))
+ map->active = false;
+ }
+
+ rcu_read_unlock();
+}
+
bool vgic_handle_clear_pending_reg(struct kvm *kvm,
struct kvm_exit_mmio *mmio,
phys_addr_t offset, int vcpu_id)
@@ -561,6 +589,7 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm,
vcpu_id, offset);
vgic_reg_access(mmio, reg, offset, mode);
+ vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
vgic_update_state(kvm);
return true;
}
@@ -598,6 +627,7 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm,
ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT);
if (mmio->is_write) {
+ vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id));
vgic_update_state(kvm);
return true;
}
@@ -1406,7 +1436,7 @@ static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
return 0;
map = vgic_irq_map_search(vcpu, vlr.irq);
- BUG_ON(!map || !map->active);
+ BUG_ON(!map);
ret = irq_get_irqchip_state(map->irq,
IRQCHIP_STATE_ACTIVE,
From 0d997491f814c87310a6ad7be30a9049c7150489 Mon Sep 17 00:00:00 2001
From: Christoffer Dall
Date: Sat, 17 Oct 2015 19:05:27 +0200
Subject: [PATCH 6/6] arm/arm64: KVM: Fix disabled distributor operation
We currently do a single update of the vgic state when the distributor
enable/disable control register is accessed and then bypass updating the
state for as long as the distributor remains disabled.
This is incorrect, because updating the state does not consider the
distributor enable bit, and this you can end up in a situation where an
interrupt is marked as pending on the CPU interface, but not pending on
the distributor, which is an impossible state to be in, and triggers a
warning. Consider for example the following sequence of events:
1. An interrupt is marked as pending on the distributor
- the interrupt is also forwarded to the CPU interface
2. The guest turns off the distributor (it's about to do a reboot)
- we stop updating the CPU interface state from now on
3. The guest disables the pending interrupt
- we remove the pending state from the distributor, but don't touch
the CPU interface, see point 2.
Since the distributor disable bit really means that no interrupts should
be forwarded to the CPU interface, we modify the code to keep updating
the internal VGIC state, but always set the CPU interface pending bits
to zero when the distributor is disabled.
Signed-off-by: Christoffer Dall
---
virt/kvm/arm/vgic.c | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 58b125676785..66c66165e712 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1012,6 +1012,12 @@ static int compute_pending_for_cpu(struct kvm_vcpu *vcpu)
pend_percpu = vcpu->arch.vgic_cpu.pending_percpu;
pend_shared = vcpu->arch.vgic_cpu.pending_shared;
+ if (!dist->enabled) {
+ bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS);
+ bitmap_zero(pend_shared, nr_shared);
+ return 0;
+ }
+
pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id);
enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id);
bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS);
@@ -1039,11 +1045,6 @@ void vgic_update_state(struct kvm *kvm)
struct kvm_vcpu *vcpu;
int c;
- if (!dist->enabled) {
- set_bit(0, dist->irq_pending_on_cpu);
- return;
- }
-
kvm_for_each_vcpu(c, vcpu, kvm) {
if (compute_pending_for_cpu(vcpu))
set_bit(c, dist->irq_pending_on_cpu);