This series enables the "KVM_(S|G)ET_MP_STATE" ioctls on s390 to make

the cpu state settable by user space.
 
 This is necessary to avoid races in s390 SIGP/reset handling which
 happen because some SIGPs are handled in QEMU, while others are
 handled in the kernel. Together with the busy conditions as return
 value of SIGP races happen especially in areas like starting and
 stopping of CPUs. (For example, there is a program 'cpuplugd', that
 runs on several s390 distros which does automatic onlining and
 offlining on cpus.)
 
 As soon as the MPSTATE interface is used, user space takes complete
 control of the cpu states. Otherwise the kernel will use the old way.
 
 Therefore, the new kernel continues to work fine with old QEMUs.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 
 iQIcBAABAgAGBQJTxSp+AAoJEBF7vIC1phx8RYgP/0mBaV3isXrCR+iisLJYNJjq
 s6Ssl9TUMR3+lRZ9epytRd02UfkBGqVaW+HtRh5JP5KKAGSn2i3eB9WDAY7bD8i7
 DVLVE2aO7okw1Z2G6CEO27dRfS0SCAfj/X77BRISyqVxK4eY86lAhQdyU5nB67TR
 c0Fk4YHwjeBoQxZTAQr2xL4052gkB+Jp/PpltszILonsYNASOsxbcHqH4t+0SFmo
 FGXydBn6eN+e3fWQSxetkrxvj14sj5K6ljiZoSMyw5nDfyrRn8RcCX87GjNLG+GR
 X0eFB9Nl83NQoC5ksQtojunsx57+cEMgoWbdK7mxoqp+6+wJrvYB2eSKY77RYH4J
 2xIy3klF/ypSZt7gxwL0pugi9QodGW39mA+stuezKUwyPalpMxHmRRwvHitGJjkP
 KwvWc4m2QebKJ6RHhgkvZ0gMaVUJcqitrlXUxWgAAcH6MNBIC1g2ufsxnv51V/O6
 SnspBWTPVDUqO6bJP4brJiAt8K7Jx3Bg5frpyN0jparh8Nmu3Kwfz0RtDYrUYyOe
 p2o2lzY5L6gvY3iOrhvoc9zbpbyuycon8nUP4WOh/eGvIM2WV6cxmkck1Fo/wNso
 evunS1FNvbN7Wxk5h4/XSVsfdcM/mUa3E7cVxgpg8+Aqse9qfpM35BlNWR+zf0G+
 AdF90u/I+3mcRKWoSrKu
 =86qw
 -----END PGP SIGNATURE-----

Merge tag 'kvm-s390-20140715' of git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into kvm-next

This series enables the "KVM_(S|G)ET_MP_STATE" ioctls on s390 to make
the cpu state settable by user space.

This is necessary to avoid races in s390 SIGP/reset handling which
happen because some SIGPs are handled in QEMU, while others are
handled in the kernel. Together with the busy conditions as return
value of SIGP races happen especially in areas like starting and
stopping of CPUs. (For example, there is a program 'cpuplugd', that
runs on several s390 distros which does automatic onlining and
offlining on cpus.)

As soon as the MPSTATE interface is used, user space takes complete
control of the cpu states. Otherwise the kernel will use the old way.

Therefore, the new kernel continues to work fine with old QEMUs.
This commit is contained in:
Paolo Bonzini 2014-07-21 13:35:43 +02:00
commit ec10b72701
8 changed files with 98 additions and 45 deletions

View File

@ -988,7 +988,7 @@ for vm-wide capabilities.
4.38 KVM_GET_MP_STATE
Capability: KVM_CAP_MP_STATE
Architectures: x86, ia64
Architectures: x86, ia64, s390
Type: vcpu ioctl
Parameters: struct kvm_mp_state (out)
Returns: 0 on success; -1 on error
@ -1002,24 +1002,32 @@ uniprocessor guests).
Possible values are:
- KVM_MP_STATE_RUNNABLE: the vcpu is currently running
- KVM_MP_STATE_RUNNABLE: the vcpu is currently running [x86, ia64]
- KVM_MP_STATE_UNINITIALIZED: the vcpu is an application processor (AP)
which has not yet received an INIT signal
which has not yet received an INIT signal [x86,
ia64]
- KVM_MP_STATE_INIT_RECEIVED: the vcpu has received an INIT signal, and is
now ready for a SIPI
now ready for a SIPI [x86, ia64]
- KVM_MP_STATE_HALTED: the vcpu has executed a HLT instruction and
is waiting for an interrupt
is waiting for an interrupt [x86, ia64]
- KVM_MP_STATE_SIPI_RECEIVED: the vcpu has just received a SIPI (vector
accessible via KVM_GET_VCPU_EVENTS)
accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
- KVM_MP_STATE_STOPPED: the vcpu is stopped [s390]
- KVM_MP_STATE_CHECK_STOP: the vcpu is in a special error state [s390]
- KVM_MP_STATE_OPERATING: the vcpu is operating (running or halted)
[s390]
- KVM_MP_STATE_LOAD: the vcpu is in a special load/startup state
[s390]
This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
irqchip, the multiprocessing state must be maintained by userspace.
On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
in-kernel irqchip, the multiprocessing state must be maintained by userspace on
these architectures.
4.39 KVM_SET_MP_STATE
Capability: KVM_CAP_MP_STATE
Architectures: x86, ia64
Architectures: x86, ia64, s390
Type: vcpu ioctl
Parameters: struct kvm_mp_state (in)
Returns: 0 on success; -1 on error
@ -1027,8 +1035,9 @@ Returns: 0 on success; -1 on error
Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
arguments.
This ioctl is only useful after KVM_CREATE_IRQCHIP. Without an in-kernel
irqchip, the multiprocessing state must be maintained by userspace.
On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
in-kernel irqchip, the multiprocessing state must be maintained by userspace on
these architectures.
4.40 KVM_SET_IDENTITY_MAP_ADDR

View File

@ -418,6 +418,7 @@ struct kvm_arch{
int css_support;
int use_irqchip;
int use_cmma;
int user_cpu_state_ctrl;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
spinlock_t start_stop_lock;

View File

@ -176,7 +176,8 @@ static int __diag_ipl_functions(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
kvm_s390_vcpu_stop(vcpu);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
vcpu->run->s390_reset_flags |= KVM_S390_RESET_SUBSYSTEM;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_IPL;
vcpu->run->s390_reset_flags |= KVM_S390_RESET_CPU_INIT;

View File

@ -56,32 +56,26 @@ static int handle_noop(struct kvm_vcpu *vcpu)
static int handle_stop(struct kvm_vcpu *vcpu)
{
int rc = 0;
unsigned int action_bits;
vcpu->stat.exit_stop_request++;
spin_lock_bh(&vcpu->arch.local_int.lock);
trace_kvm_s390_stop_request(vcpu->arch.local_int.action_bits);
if (vcpu->arch.local_int.action_bits & ACTION_STOP_ON_STOP) {
kvm_s390_vcpu_stop(vcpu);
vcpu->arch.local_int.action_bits &= ~ACTION_STOP_ON_STOP;
VCPU_EVENT(vcpu, 3, "%s", "cpu stopped");
rc = -EOPNOTSUPP;
}
action_bits = vcpu->arch.local_int.action_bits;
if (vcpu->arch.local_int.action_bits & ACTION_STORE_ON_STOP) {
vcpu->arch.local_int.action_bits &= ~ACTION_STORE_ON_STOP;
/* store status must be called unlocked. Since local_int.lock
* only protects local_int.* and not guest memory we can give
* up the lock here */
spin_unlock_bh(&vcpu->arch.local_int.lock);
if (!(action_bits & ACTION_STOP_ON_STOP))
return 0;
if (action_bits & ACTION_STORE_ON_STOP) {
rc = kvm_s390_vcpu_store_status(vcpu,
KVM_S390_STORE_STATUS_NOADDR);
if (rc >= 0)
rc = -EOPNOTSUPP;
} else
spin_unlock_bh(&vcpu->arch.local_int.lock);
return rc;
if (rc)
return rc;
}
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
return -EOPNOTSUPP;
}
static int handle_validity(struct kvm_vcpu *vcpu)

View File

@ -167,6 +167,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_VM_ATTRIBUTES:
case KVM_CAP_MP_STATE:
r = 1;
break;
case KVM_CAP_NR_VCPUS:
@ -595,7 +596,8 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->pp = 0;
vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
kvm_clear_async_pf_completion_queue(vcpu);
kvm_s390_vcpu_stop(vcpu);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
kvm_s390_vcpu_stop(vcpu);
kvm_s390_clear_local_irqs(vcpu);
}
@ -926,7 +928,7 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
{
int rc = 0;
if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED))
if (!is_vcpu_stopped(vcpu))
rc = -EBUSY;
else {
vcpu->run->psw_mask = psw.mask;
@ -980,13 +982,34 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
return -EINVAL; /* not implemented yet */
/* CHECK_STOP and LOAD are not supported yet */
return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
KVM_MP_STATE_OPERATING;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
return -EINVAL; /* not implemented yet */
int rc = 0;
/* user space knows about this interface - let it control the state */
vcpu->kvm->arch.user_cpu_state_ctrl = 1;
switch (mp_state->mp_state) {
case KVM_MP_STATE_STOPPED:
kvm_s390_vcpu_stop(vcpu);
break;
case KVM_MP_STATE_OPERATING:
kvm_s390_vcpu_start(vcpu);
break;
case KVM_MP_STATE_LOAD:
case KVM_MP_STATE_CHECK_STOP:
/* fall through - CHECK_STOP and LOAD are not supported yet */
default:
rc = -ENXIO;
}
return rc;
}
bool kvm_s390_cmma_enabled(struct kvm *kvm)
@ -1284,7 +1307,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (vcpu->sigset_active)
sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
kvm_s390_vcpu_start(vcpu);
if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
kvm_s390_vcpu_start(vcpu);
} else if (is_vcpu_stopped(vcpu)) {
pr_err_ratelimited("kvm-s390: can't run stopped vcpu %d\n",
vcpu->vcpu_id);
return -EINVAL;
}
switch (kvm_run->exit_reason) {
case KVM_EXIT_S390_SIEIC:
@ -1413,11 +1442,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
return kvm_s390_store_status_unloaded(vcpu, addr);
}
static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
{
return atomic_read(&(vcpu)->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
}
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
@ -1494,7 +1518,15 @@ void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
spin_lock_bh(&vcpu->kvm->arch.start_stop_lock);
online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
/* Need to lock access to action_bits to avoid a SIGP race condition */
spin_lock_bh(&vcpu->arch.local_int.lock);
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
vcpu->arch.local_int.action_bits &=
~(ACTION_STOP_ON_STOP | ACTION_STORE_ON_STOP);
spin_unlock_bh(&vcpu->arch.local_int.lock);
__disable_ibs_on_vcpu(vcpu);
for (i = 0; i < online_vcpus; i++) {

View File

@ -45,9 +45,9 @@ do { \
d_args); \
} while (0)
static inline int __cpu_is_stopped(struct kvm_vcpu *vcpu)
static inline int is_vcpu_stopped(struct kvm_vcpu *vcpu)
{
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOP_INT;
return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED;
}
static inline int kvm_is_ucontrol(struct kvm *kvm)
@ -129,6 +129,12 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
vcpu->arch.sie_block->gpsw.mask |= cc << 44;
}
/* are cpu states controlled by user space */
static inline int kvm_s390_user_cpu_state_ctrl(struct kvm *kvm)
{
return kvm->arch.user_cpu_state_ctrl != 0;
}
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_s390_idle_wakeup(struct hrtimer *timer);
void kvm_s390_tasklet(unsigned long parm);

View File

@ -136,6 +136,11 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
inti->type = KVM_S390_SIGP_STOP;
spin_lock_bh(&li->lock);
if (li->action_bits & ACTION_STOP_ON_STOP) {
/* another SIGP STOP is pending */
rc = SIGP_CC_BUSY;
goto out;
}
if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
kfree(inti);
if ((action & ACTION_STORE_ON_STOP) != 0)
@ -144,8 +149,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
}
list_add_tail(&inti->list, &li->list);
atomic_set(&li->active, 1);
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
li->action_bits |= action;
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
if (waitqueue_active(li->wq))
wake_up_interruptible(li->wq);
out:

View File

@ -399,13 +399,18 @@ struct kvm_vapic_addr {
__u64 vapic_addr;
};
/* for KVM_SET_MPSTATE */
/* for KVM_SET_MP_STATE */
/* not all states are valid on all architectures */
#define KVM_MP_STATE_RUNNABLE 0
#define KVM_MP_STATE_UNINITIALIZED 1
#define KVM_MP_STATE_INIT_RECEIVED 2
#define KVM_MP_STATE_HALTED 3
#define KVM_MP_STATE_SIPI_RECEIVED 4
#define KVM_MP_STATE_STOPPED 5
#define KVM_MP_STATE_CHECK_STOP 6
#define KVM_MP_STATE_OPERATING 7
#define KVM_MP_STATE_LOAD 8
struct kvm_mp_state {
__u32 mp_state;