mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-04 12:37:00 +07:00
a7e926abc3
This patch replaces atomic64_32.c with two assembly implementations,
one for 386/486 machines using pushf/cli/popf and one for 586+ machines
using cmpxchg8b.
The cmpxchg8b implementation provides the following advantages over the
current one:
1. Implements atomic64_add_unless, atomic64_dec_if_positive and
atomic64_inc_not_zero
2. Uses the ZF flag changed by cmpxchg8b instead of doing a comparison
3. Uses custom register calling conventions that reduce or eliminate
register moves to suit cmpxchg8b
4. Reads the initial value instead of using cmpxchg8b to do that.
Currently we use lock xaddl and movl, which seems the fastest.
5. Does not use the lock prefix for atomic64_set
64-bit writes are already atomic, so we don't need that.
We still need it for atomic64_read to avoid restoring a value
changed in the meantime.
6. Allocates registers as well or better than gcc
The 386 implementation provides support for 386 and 486 machines.
386/486 SMP is not supported (we dropped it), but such support can be
added easily if desired.
A pure assembly implementation is required due to the custom calling
conventions, and desire to use %ebp in atomic64_add_return (we need
7 registers...), as well as the ability to use pushf/popf in the 386
code without an intermediate pop/push.
The parameter names are changed to match the convention in atomic_64.h
Changes in v3 (due to rebasing to tip/x86/asm):
- Patches atomic64_32.h instead of atomic_32.h
- Uses the CALL alternative mechanism from commit
1b1d925818
Changes in v2:
- Merged 386 and cx8 support in the same patch
- 386 support now done in assembly, C code no longer used at all
- cmpxchg64 is used for atomic64_cmpxchg
- stop using macros, use one-line inline functions instead
- miscellanous changes and improvements
Signed-off-by: Luca Barbieri <luca@luca-barbieri.com>
LKML-Reference: <1267005265-27958-5-git-send-email-luca@luca-barbieri.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
60 lines
2.5 KiB
C
60 lines
2.5 KiB
C
#include <linux/compiler.h>
|
|
#include <linux/module.h>
|
|
#include <linux/types.h>
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/cmpxchg.h>
|
|
#include <asm/atomic.h>
|
|
|
|
long long atomic64_read_cx8(long long, const atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_read_cx8);
|
|
long long atomic64_set_cx8(long long, const atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_set_cx8);
|
|
long long atomic64_xchg_cx8(long long, unsigned high);
|
|
EXPORT_SYMBOL(atomic64_xchg_cx8);
|
|
long long atomic64_add_return_cx8(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_add_return_cx8);
|
|
long long atomic64_sub_return_cx8(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_sub_return_cx8);
|
|
long long atomic64_inc_return_cx8(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_inc_return_cx8);
|
|
long long atomic64_dec_return_cx8(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_dec_return_cx8);
|
|
long long atomic64_dec_if_positive_cx8(atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_dec_if_positive_cx8);
|
|
int atomic64_inc_not_zero_cx8(atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_inc_not_zero_cx8);
|
|
int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u);
|
|
EXPORT_SYMBOL(atomic64_add_unless_cx8);
|
|
|
|
#ifndef CONFIG_X86_CMPXCHG64
|
|
long long atomic64_read_386(long long, const atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_read_386);
|
|
long long atomic64_set_386(long long, const atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_set_386);
|
|
long long atomic64_xchg_386(long long, unsigned high);
|
|
EXPORT_SYMBOL(atomic64_xchg_386);
|
|
long long atomic64_add_return_386(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_add_return_386);
|
|
long long atomic64_sub_return_386(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_sub_return_386);
|
|
long long atomic64_inc_return_386(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_inc_return_386);
|
|
long long atomic64_dec_return_386(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_dec_return_386);
|
|
long long atomic64_add_386(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_add_386);
|
|
long long atomic64_sub_386(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_sub_386);
|
|
long long atomic64_inc_386(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_inc_386);
|
|
long long atomic64_dec_386(long long a, atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_dec_386);
|
|
long long atomic64_dec_if_positive_386(atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_dec_if_positive_386);
|
|
int atomic64_inc_not_zero_386(atomic64_t *v);
|
|
EXPORT_SYMBOL(atomic64_inc_not_zero_386);
|
|
int atomic64_add_unless_386(atomic64_t *v, long long a, long long u);
|
|
EXPORT_SYMBOL(atomic64_add_unless_386);
|
|
#endif
|