mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2025-01-21 18:40:30 +07:00
f862eefec0
It turns out the kernel relies on barrier() to force a reload of the
percpu offset value. Since we can't easily modify the definition of
barrier() to include "tp" as an output register, we instead provide a
definition of __my_cpu_offset as extended assembly that includes a fake
stack read to hazard against barrier(), forcing gcc to know that it
must reread "tp" and recompute anything based on "tp" after a barrier.
This fixes observed hangs in the slub allocator when we are looping
on a percpu cmpxchg_double.
A similar fix for ARMv7 was made in June in change 509eb76ebf
.
Cc: stable@vger.kernel.org
Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
53 lines
1.8 KiB
C
53 lines
1.8 KiB
C
/*
|
|
* Copyright 2010 Tilera Corporation. All Rights Reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation, version 2.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
|
|
* NON INFRINGEMENT. See the GNU General Public License for
|
|
* more details.
|
|
*/
|
|
|
|
#ifndef _ASM_TILE_PERCPU_H
|
|
#define _ASM_TILE_PERCPU_H
|
|
|
|
register unsigned long my_cpu_offset_reg asm("tp");
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
/*
|
|
* For full preemption, we can't just use the register variable
|
|
* directly, since we need barrier() to hazard against it, causing the
|
|
* compiler to reload anything computed from a previous "tp" value.
|
|
* But we also don't want to use volatile asm, since we'd like the
|
|
* compiler to be able to cache the value across multiple percpu reads.
|
|
* So we use a fake stack read as a hazard against barrier().
|
|
* The 'U' constraint is like 'm' but disallows postincrement.
|
|
*/
|
|
static inline unsigned long __my_cpu_offset(void)
|
|
{
|
|
unsigned long tp;
|
|
register unsigned long *sp asm("sp");
|
|
asm("move %0, tp" : "=r" (tp) : "U" (*sp));
|
|
return tp;
|
|
}
|
|
#define __my_cpu_offset __my_cpu_offset()
|
|
#else
|
|
/*
|
|
* We don't need to hazard against barrier() since "tp" doesn't ever
|
|
* change with PREEMPT_NONE, and with PREEMPT_VOLUNTARY it only
|
|
* changes at function call points, at which we are already re-reading
|
|
* the value of "tp" due to "my_cpu_offset_reg" being a global variable.
|
|
*/
|
|
#define __my_cpu_offset my_cpu_offset_reg
|
|
#endif
|
|
|
|
#define set_my_cpu_offset(tp) (my_cpu_offset_reg = (tp))
|
|
|
|
#include <asm-generic/percpu.h>
|
|
|
|
#endif /* _ASM_TILE_PERCPU_H */
|