mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-27 07:35:12 +07:00
1123a60416
Linu Cherian reported a WARN in cleanup_srcu_struct() when shutting down a guest running iperf on a VFIO assigned device. This happens because irqfd_wakeup() calls srcu_read_lock(&kvm->irq_srcu) in interrupt context, while a worker thread does the same inside kvm_set_irq(). If the interrupt happens while the worker thread is executing __srcu_read_lock(), updates to the Classic SRCU ->lock_count[] field or the Tree SRCU ->srcu_lock_count[] field can be lost. The docs say you are not supposed to call srcu_read_lock() and srcu_read_unlock() from irq context, but KVM interrupt injection happens from (host) interrupt context and it would be nice if SRCU supported the use case. KVM is using SRCU here not really for the "sleepable" part, but rather due to its IPI-free fast detection of grace periods. It is therefore not desirable to switch back to RCU, which would effectively revert commit719d93cd5f
("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING", 2014-01-16). However, the docs are overly conservative. You can have an SRCU instance only has users in irq context, and you can mix process and irq context as long as process context users disable interrupts. In addition, __srcu_read_unlock() actually uses this_cpu_dec() on both Tree SRCU and Classic SRCU. For those two implementations, only srcu_read_lock() is unsafe. When Classic SRCU's __srcu_read_unlock() was changed to use this_cpu_dec(), in commit5a41344a3d
("srcu: Simplify __srcu_read_unlock() via this_cpu_dec()", 2012-11-29), __srcu_read_lock() did two increments. Therefore it kept __this_cpu_inc(), with preempt_disable/enable in the caller. Tree SRCU however only does one increment, so on most architectures it is more efficient for __srcu_read_lock() to use this_cpu_inc(), and any performance differences appear to be down in the noise. Cc: stable@vger.kernel.org Fixes:719d93cd5f
("kvm/irqchip: Speed up KVM_SET_GSI_ROUTING") Reported-by: Linu Cherian <linuc.decode@gmail.com> Suggested-by: Linu Cherian <linuc.decode@gmail.com> Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
209 lines
7.1 KiB
C
209 lines
7.1 KiB
C
/*
|
|
* Sleepable Read-Copy Update mechanism for mutual exclusion
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, you can access it online at
|
|
* http://www.gnu.org/licenses/gpl-2.0.html.
|
|
*
|
|
* Copyright (C) IBM Corporation, 2006
|
|
* Copyright (C) Fujitsu, 2012
|
|
*
|
|
* Author: Paul McKenney <paulmck@us.ibm.com>
|
|
* Lai Jiangshan <laijs@cn.fujitsu.com>
|
|
*
|
|
* For detailed explanation of Read-Copy Update mechanism see -
|
|
* Documentation/RCU/ *.txt
|
|
*
|
|
*/
|
|
|
|
#ifndef _LINUX_SRCU_H
|
|
#define _LINUX_SRCU_H
|
|
|
|
#include <linux/mutex.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/rcu_segcblist.h>
|
|
|
|
struct srcu_struct;
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
|
|
int __init_srcu_struct(struct srcu_struct *sp, const char *name,
|
|
struct lock_class_key *key);
|
|
|
|
#define init_srcu_struct(sp) \
|
|
({ \
|
|
static struct lock_class_key __srcu_key; \
|
|
\
|
|
__init_srcu_struct((sp), #sp, &__srcu_key); \
|
|
})
|
|
|
|
#define __SRCU_DEP_MAP_INIT(srcu_name) .dep_map = { .name = #srcu_name },
|
|
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
|
|
|
int init_srcu_struct(struct srcu_struct *sp);
|
|
|
|
#define __SRCU_DEP_MAP_INIT(srcu_name)
|
|
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
|
|
|
#ifdef CONFIG_TINY_SRCU
|
|
#include <linux/srcutiny.h>
|
|
#elif defined(CONFIG_TREE_SRCU)
|
|
#include <linux/srcutree.h>
|
|
#elif defined(CONFIG_CLASSIC_SRCU)
|
|
#include <linux/srcuclassic.h>
|
|
#else
|
|
#error "Unknown SRCU implementation specified to kernel configuration"
|
|
#endif
|
|
|
|
/**
|
|
* call_srcu() - Queue a callback for invocation after an SRCU grace period
|
|
* @sp: srcu_struct in queue the callback
|
|
* @head: structure to be used for queueing the SRCU callback.
|
|
* @func: function to be invoked after the SRCU grace period
|
|
*
|
|
* The callback function will be invoked some time after a full SRCU
|
|
* grace period elapses, in other words after all pre-existing SRCU
|
|
* read-side critical sections have completed. However, the callback
|
|
* function might well execute concurrently with other SRCU read-side
|
|
* critical sections that started after call_srcu() was invoked. SRCU
|
|
* read-side critical sections are delimited by srcu_read_lock() and
|
|
* srcu_read_unlock(), and may be nested.
|
|
*
|
|
* The callback will be invoked from process context, but must nevertheless
|
|
* be fast and must not block.
|
|
*/
|
|
void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
|
|
void (*func)(struct rcu_head *head));
|
|
|
|
void cleanup_srcu_struct(struct srcu_struct *sp);
|
|
int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
|
|
void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
|
|
void synchronize_srcu(struct srcu_struct *sp);
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
|
|
/**
|
|
* srcu_read_lock_held - might we be in SRCU read-side critical section?
|
|
*
|
|
* If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU
|
|
* read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC,
|
|
* this assumes we are in an SRCU read-side critical section unless it can
|
|
* prove otherwise.
|
|
*
|
|
* Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
|
|
* and while lockdep is disabled.
|
|
*
|
|
* Note that SRCU is based on its own statemachine and it doesn't
|
|
* relies on normal RCU, it can be called from the CPU which
|
|
* is in the idle loop from an RCU point of view or offline.
|
|
*/
|
|
static inline int srcu_read_lock_held(struct srcu_struct *sp)
|
|
{
|
|
if (!debug_lockdep_rcu_enabled())
|
|
return 1;
|
|
return lock_is_held(&sp->dep_map);
|
|
}
|
|
|
|
#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
|
|
|
static inline int srcu_read_lock_held(struct srcu_struct *sp)
|
|
{
|
|
return 1;
|
|
}
|
|
|
|
#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
|
|
|
|
/**
|
|
* srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing
|
|
* @p: the pointer to fetch and protect for later dereferencing
|
|
* @sp: pointer to the srcu_struct, which is used to check that we
|
|
* really are in an SRCU read-side critical section.
|
|
* @c: condition to check for update-side use
|
|
*
|
|
* If PROVE_RCU is enabled, invoking this outside of an RCU read-side
|
|
* critical section will result in an RCU-lockdep splat, unless @c evaluates
|
|
* to 1. The @c argument will normally be a logical expression containing
|
|
* lockdep_is_held() calls.
|
|
*/
|
|
#define srcu_dereference_check(p, sp, c) \
|
|
__rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu)
|
|
|
|
/**
|
|
* srcu_dereference - fetch SRCU-protected pointer for later dereferencing
|
|
* @p: the pointer to fetch and protect for later dereferencing
|
|
* @sp: pointer to the srcu_struct, which is used to check that we
|
|
* really are in an SRCU read-side critical section.
|
|
*
|
|
* Makes rcu_dereference_check() do the dirty work. If PROVE_RCU
|
|
* is enabled, invoking this outside of an RCU read-side critical
|
|
* section will result in an RCU-lockdep splat.
|
|
*/
|
|
#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0)
|
|
|
|
/**
|
|
* srcu_read_lock - register a new reader for an SRCU-protected structure.
|
|
* @sp: srcu_struct in which to register the new reader.
|
|
*
|
|
* Enter an SRCU read-side critical section. Note that SRCU read-side
|
|
* critical sections may be nested. However, it is illegal to
|
|
* call anything that waits on an SRCU grace period for the same
|
|
* srcu_struct, whether directly or indirectly. Please note that
|
|
* one way to indirectly wait on an SRCU grace period is to acquire
|
|
* a mutex that is held elsewhere while calling synchronize_srcu() or
|
|
* synchronize_srcu_expedited().
|
|
*
|
|
* Note that srcu_read_lock() and the matching srcu_read_unlock() must
|
|
* occur in the same context, for example, it is illegal to invoke
|
|
* srcu_read_unlock() in an irq handler if the matching srcu_read_lock()
|
|
* was invoked in process context.
|
|
*/
|
|
static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp)
|
|
{
|
|
int retval;
|
|
|
|
retval = __srcu_read_lock(sp);
|
|
rcu_lock_acquire(&(sp)->dep_map);
|
|
return retval;
|
|
}
|
|
|
|
/**
|
|
* srcu_read_unlock - unregister a old reader from an SRCU-protected structure.
|
|
* @sp: srcu_struct in which to unregister the old reader.
|
|
* @idx: return value from corresponding srcu_read_lock().
|
|
*
|
|
* Exit an SRCU read-side critical section.
|
|
*/
|
|
static inline void srcu_read_unlock(struct srcu_struct *sp, int idx)
|
|
__releases(sp)
|
|
{
|
|
rcu_lock_release(&(sp)->dep_map);
|
|
__srcu_read_unlock(sp, idx);
|
|
}
|
|
|
|
/**
|
|
* smp_mb__after_srcu_read_unlock - ensure full ordering after srcu_read_unlock
|
|
*
|
|
* Converts the preceding srcu_read_unlock into a two-way memory barrier.
|
|
*
|
|
* Call this after srcu_read_unlock, to guarantee that all memory operations
|
|
* that occur after smp_mb__after_srcu_read_unlock will appear to happen after
|
|
* the preceding srcu_read_unlock.
|
|
*/
|
|
static inline void smp_mb__after_srcu_read_unlock(void)
|
|
{
|
|
/* __srcu_read_unlock has smp_mb() internally so nothing to do here. */
|
|
}
|
|
|
|
#endif
|