linux_dsm_epyc7002/include/linux/lglock.h
Srivatsa S. Bhat e30e2fdfe5 VFS: Fix race between CPU hotplug and lglocks
Currently, the *_global_[un]lock_online() routines are not at all synchronized
with CPU hotplug. Soft-lockups detected as a consequence of this race was
reported earlier at https://lkml.org/lkml/2011/8/24/185. (Thanks to Cong Meng
for finding out that the root-cause of this issue is the race condition
between br_write_[un]lock() and CPU hotplug, which results in the lock states
getting messed up).

Fixing this race by just adding {get,put}_online_cpus() at appropriate places
in *_global_[un]lock_online() is not a good option, because, then suddenly
br_write_[un]lock() would become blocking, whereas they have been kept as
non-blocking all this time, and we would want to keep them that way.

So, overall, we want to ensure 3 things:
1. br_write_lock() and br_write_unlock() must remain as non-blocking.
2. The corresponding lock and unlock of the per-cpu spinlocks must not happen
   for different sets of CPUs.
3. Either prevent any new CPU online operation in between this lock-unlock, or
   ensure that the newly onlined CPU does not proceed with its corresponding
   per-cpu spinlock unlocked.

To achieve all this:
(a) We introduce a new spinlock that is taken by the *_global_lock_online()
    routine and released by the *_global_unlock_online() routine.
(b) We register a callback for CPU hotplug notifications, and this callback
    takes the same spinlock as above.
(c) We maintain a bitmap which is close to the cpu_online_mask, and once it is
    initialized in the lock_init() code, all future updates to it are done in
    the callback, under the above spinlock.
(d) The above bitmap is used (instead of cpu_online_mask) while locking and
    unlocking the per-cpu locks.

The callback takes the spinlock upon the CPU_UP_PREPARE event. So, if the
br_write_lock-unlock sequence is in progress, the callback keeps spinning,
thus preventing the CPU online operation till the lock-unlock sequence is
complete. This takes care of requirement (3).

The bitmap that we maintain remains unmodified throughout the lock-unlock
sequence, since all updates to it are managed by the callback, which takes
the same spinlock as the one taken by the lock code and released only by the
unlock routine. Combining this with (d) above, satisfies requirement (2).

Overall, since we use a spinlock (mentioned in (a)) to prevent CPU hotplug
operations from racing with br_write_lock-unlock, requirement (1) is also
taken care of.

By the way, it is to be noted that a CPU offline operation can actually run
in parallel with our lock-unlock sequence, because our callback doesn't react
to notifications earlier than CPU_DEAD (in order to maintain our bitmap
properly). And this means, since we use our own bitmap (which is stale, on
purpose) during the lock-unlock sequence, we could end up unlocking the
per-cpu lock of an offline CPU (because we had locked it earlier, when the
CPU was online), in order to satisfy requirement (2). But this is harmless,
though it looks a bit awkward.

Debugged-by: Cong Meng <mc@linux.vnet.ibm.com>
Signed-off-by: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@vger.kernel.org
2011-12-22 02:02:20 -05:00

201 lines
6.6 KiB
C

/*
* Specialised local-global spinlock. Can only be declared as global variables
* to avoid overhead and keep things simple (and we don't want to start using
* these inside dynamically allocated structures).
*
* "local/global locks" (lglocks) can be used to:
*
* - Provide fast exclusive access to per-CPU data, with exclusive access to
* another CPU's data allowed but possibly subject to contention, and to
* provide very slow exclusive access to all per-CPU data.
* - Or to provide very fast and scalable read serialisation, and to provide
* very slow exclusive serialisation of data (not necessarily per-CPU data).
*
* Brlocks are also implemented as a short-hand notation for the latter use
* case.
*
* Copyright 2009, 2010, Nick Piggin, Novell Inc.
*/
#ifndef __LINUX_LGLOCK_H
#define __LINUX_LGLOCK_H
#include <linux/spinlock.h>
#include <linux/lockdep.h>
#include <linux/percpu.h>
#include <linux/cpu.h>
/* can make br locks by using local lock for read side, global lock for write */
#define br_lock_init(name) name##_lock_init()
#define br_read_lock(name) name##_local_lock()
#define br_read_unlock(name) name##_local_unlock()
#define br_write_lock(name) name##_global_lock_online()
#define br_write_unlock(name) name##_global_unlock_online()
#define DECLARE_BRLOCK(name) DECLARE_LGLOCK(name)
#define DEFINE_BRLOCK(name) DEFINE_LGLOCK(name)
#define lg_lock_init(name) name##_lock_init()
#define lg_local_lock(name) name##_local_lock()
#define lg_local_unlock(name) name##_local_unlock()
#define lg_local_lock_cpu(name, cpu) name##_local_lock_cpu(cpu)
#define lg_local_unlock_cpu(name, cpu) name##_local_unlock_cpu(cpu)
#define lg_global_lock(name) name##_global_lock()
#define lg_global_unlock(name) name##_global_unlock()
#define lg_global_lock_online(name) name##_global_lock_online()
#define lg_global_unlock_online(name) name##_global_unlock_online()
#ifdef CONFIG_DEBUG_LOCK_ALLOC
#define LOCKDEP_INIT_MAP lockdep_init_map
#define DEFINE_LGLOCK_LOCKDEP(name) \
struct lock_class_key name##_lock_key; \
struct lockdep_map name##_lock_dep_map; \
EXPORT_SYMBOL(name##_lock_dep_map)
#else
#define LOCKDEP_INIT_MAP(a, b, c, d)
#define DEFINE_LGLOCK_LOCKDEP(name)
#endif
#define DECLARE_LGLOCK(name) \
extern void name##_lock_init(void); \
extern void name##_local_lock(void); \
extern void name##_local_unlock(void); \
extern void name##_local_lock_cpu(int cpu); \
extern void name##_local_unlock_cpu(int cpu); \
extern void name##_global_lock(void); \
extern void name##_global_unlock(void); \
extern void name##_global_lock_online(void); \
extern void name##_global_unlock_online(void); \
#define DEFINE_LGLOCK(name) \
\
DEFINE_SPINLOCK(name##_cpu_lock); \
cpumask_t name##_cpus __read_mostly; \
DEFINE_PER_CPU(arch_spinlock_t, name##_lock); \
DEFINE_LGLOCK_LOCKDEP(name); \
\
static int \
name##_lg_cpu_callback(struct notifier_block *nb, \
unsigned long action, void *hcpu) \
{ \
switch (action & ~CPU_TASKS_FROZEN) { \
case CPU_UP_PREPARE: \
spin_lock(&name##_cpu_lock); \
cpu_set((unsigned long)hcpu, name##_cpus); \
spin_unlock(&name##_cpu_lock); \
break; \
case CPU_UP_CANCELED: case CPU_DEAD: \
spin_lock(&name##_cpu_lock); \
cpu_clear((unsigned long)hcpu, name##_cpus); \
spin_unlock(&name##_cpu_lock); \
} \
return NOTIFY_OK; \
} \
static struct notifier_block name##_lg_cpu_notifier = { \
.notifier_call = name##_lg_cpu_callback, \
}; \
void name##_lock_init(void) { \
int i; \
LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
for_each_possible_cpu(i) { \
arch_spinlock_t *lock; \
lock = &per_cpu(name##_lock, i); \
*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; \
} \
register_hotcpu_notifier(&name##_lg_cpu_notifier); \
get_online_cpus(); \
for_each_online_cpu(i) \
cpu_set(i, name##_cpus); \
put_online_cpus(); \
} \
EXPORT_SYMBOL(name##_lock_init); \
\
void name##_local_lock(void) { \
arch_spinlock_t *lock; \
preempt_disable(); \
rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \
lock = &__get_cpu_var(name##_lock); \
arch_spin_lock(lock); \
} \
EXPORT_SYMBOL(name##_local_lock); \
\
void name##_local_unlock(void) { \
arch_spinlock_t *lock; \
rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \
lock = &__get_cpu_var(name##_lock); \
arch_spin_unlock(lock); \
preempt_enable(); \
} \
EXPORT_SYMBOL(name##_local_unlock); \
\
void name##_local_lock_cpu(int cpu) { \
arch_spinlock_t *lock; \
preempt_disable(); \
rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_); \
lock = &per_cpu(name##_lock, cpu); \
arch_spin_lock(lock); \
} \
EXPORT_SYMBOL(name##_local_lock_cpu); \
\
void name##_local_unlock_cpu(int cpu) { \
arch_spinlock_t *lock; \
rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_); \
lock = &per_cpu(name##_lock, cpu); \
arch_spin_unlock(lock); \
preempt_enable(); \
} \
EXPORT_SYMBOL(name##_local_unlock_cpu); \
\
void name##_global_lock_online(void) { \
int i; \
spin_lock(&name##_cpu_lock); \
rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
for_each_cpu(i, &name##_cpus) { \
arch_spinlock_t *lock; \
lock = &per_cpu(name##_lock, i); \
arch_spin_lock(lock); \
} \
} \
EXPORT_SYMBOL(name##_global_lock_online); \
\
void name##_global_unlock_online(void) { \
int i; \
rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
for_each_cpu(i, &name##_cpus) { \
arch_spinlock_t *lock; \
lock = &per_cpu(name##_lock, i); \
arch_spin_unlock(lock); \
} \
spin_unlock(&name##_cpu_lock); \
} \
EXPORT_SYMBOL(name##_global_unlock_online); \
\
void name##_global_lock(void) { \
int i; \
preempt_disable(); \
rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \
for_each_possible_cpu(i) { \
arch_spinlock_t *lock; \
lock = &per_cpu(name##_lock, i); \
arch_spin_lock(lock); \
} \
} \
EXPORT_SYMBOL(name##_global_lock); \
\
void name##_global_unlock(void) { \
int i; \
rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \
for_each_possible_cpu(i) { \
arch_spinlock_t *lock; \
lock = &per_cpu(name##_lock, i); \
arch_spin_unlock(lock); \
} \
preempt_enable(); \
} \
EXPORT_SYMBOL(name##_global_unlock);
#endif