mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
500c2e1fdb
The current locking mechanism uses a ll/sc sequence to release a spinlock. This is slower than a wmb() followed by a store to unlock. The branching forward to .subsection 2 on sc failure slows down the contended case. So we get rid of that part too. Since we are now working on naturally aligned u16 values, we can get rid of a masking operation as the LHU already does the right thing. The ANDI are reversed for better scheduling on multi-issue CPUs On a 12 CPU 750MHz Octeon cn5750 this patch improves ipv4 UDP packet forwarding rates from 3.58*10^6 PPS to 3.99*10^6 PPS, or about 11%. Signed-off-by: David Daney <ddaney@caviumnetworks.com> To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/937/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
182 lines
4.9 KiB
C
182 lines
4.9 KiB
C
/*
|
|
* This file is subject to the terms and conditions of the GNU General Public
|
|
* License. See the file "COPYING" in the main directory of this archive
|
|
* for more details.
|
|
*
|
|
* Copyright (C) 2006 by Ralf Baechle (ralf@linux-mips.org)
|
|
*/
|
|
#ifndef __ASM_BARRIER_H
|
|
#define __ASM_BARRIER_H
|
|
|
|
/*
|
|
* read_barrier_depends - Flush all pending reads that subsequents reads
|
|
* depend on.
|
|
*
|
|
* No data-dependent reads from memory-like regions are ever reordered
|
|
* over this barrier. All reads preceding this primitive are guaranteed
|
|
* to access memory (but not necessarily other CPUs' caches) before any
|
|
* reads following this primitive that depend on the data return by
|
|
* any of the preceding reads. This primitive is much lighter weight than
|
|
* rmb() on most CPUs, and is never heavier weight than is
|
|
* rmb().
|
|
*
|
|
* These ordering constraints are respected by both the local CPU
|
|
* and the compiler.
|
|
*
|
|
* Ordering is not guaranteed by anything other than these primitives,
|
|
* not even by data dependencies. See the documentation for
|
|
* memory_barrier() for examples and URLs to more information.
|
|
*
|
|
* For example, the following code would force ordering (the initial
|
|
* value of "a" is zero, "b" is one, and "p" is "&a"):
|
|
*
|
|
* <programlisting>
|
|
* CPU 0 CPU 1
|
|
*
|
|
* b = 2;
|
|
* memory_barrier();
|
|
* p = &b; q = p;
|
|
* read_barrier_depends();
|
|
* d = *q;
|
|
* </programlisting>
|
|
*
|
|
* because the read of "*q" depends on the read of "p" and these
|
|
* two reads are separated by a read_barrier_depends(). However,
|
|
* the following code, with the same initial values for "a" and "b":
|
|
*
|
|
* <programlisting>
|
|
* CPU 0 CPU 1
|
|
*
|
|
* a = 2;
|
|
* memory_barrier();
|
|
* b = 3; y = b;
|
|
* read_barrier_depends();
|
|
* x = a;
|
|
* </programlisting>
|
|
*
|
|
* does not enforce ordering, since there is no data dependency between
|
|
* the read of "a" and the read of "b". Therefore, on some CPUs, such
|
|
* as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
|
|
* in cases like this where there are no data dependencies.
|
|
*/
|
|
|
|
#define read_barrier_depends() do { } while(0)
|
|
#define smp_read_barrier_depends() do { } while(0)
|
|
|
|
#ifdef CONFIG_CPU_HAS_SYNC
|
|
#define __sync() \
|
|
__asm__ __volatile__( \
|
|
".set push\n\t" \
|
|
".set noreorder\n\t" \
|
|
".set mips2\n\t" \
|
|
"sync\n\t" \
|
|
".set pop" \
|
|
: /* no output */ \
|
|
: /* no input */ \
|
|
: "memory")
|
|
#else
|
|
#define __sync() do { } while(0)
|
|
#endif
|
|
|
|
#define __fast_iob() \
|
|
__asm__ __volatile__( \
|
|
".set push\n\t" \
|
|
".set noreorder\n\t" \
|
|
"lw $0,%0\n\t" \
|
|
"nop\n\t" \
|
|
".set pop" \
|
|
: /* no output */ \
|
|
: "m" (*(int *)CKSEG1) \
|
|
: "memory")
|
|
#ifdef CONFIG_CPU_CAVIUM_OCTEON
|
|
# define OCTEON_SYNCW_STR ".set push\n.set arch=octeon\nsyncw\nsyncw\n.set pop\n"
|
|
# define __syncw() __asm__ __volatile__(OCTEON_SYNCW_STR : : : "memory")
|
|
|
|
# define fast_wmb() __syncw()
|
|
# define fast_rmb() barrier()
|
|
# define fast_mb() __sync()
|
|
# define fast_iob() do { } while (0)
|
|
#else /* ! CONFIG_CPU_CAVIUM_OCTEON */
|
|
# define fast_wmb() __sync()
|
|
# define fast_rmb() __sync()
|
|
# define fast_mb() __sync()
|
|
# ifdef CONFIG_SGI_IP28
|
|
# define fast_iob() \
|
|
__asm__ __volatile__( \
|
|
".set push\n\t" \
|
|
".set noreorder\n\t" \
|
|
"lw $0,%0\n\t" \
|
|
"sync\n\t" \
|
|
"lw $0,%0\n\t" \
|
|
".set pop" \
|
|
: /* no output */ \
|
|
: "m" (*(int *)CKSEG1ADDR(0x1fa00004)) \
|
|
: "memory")
|
|
# else
|
|
# define fast_iob() \
|
|
do { \
|
|
__sync(); \
|
|
__fast_iob(); \
|
|
} while (0)
|
|
# endif
|
|
#endif /* CONFIG_CPU_CAVIUM_OCTEON */
|
|
|
|
#ifdef CONFIG_CPU_HAS_WB
|
|
|
|
#include <asm/wbflush.h>
|
|
|
|
#define wmb() fast_wmb()
|
|
#define rmb() fast_rmb()
|
|
#define mb() wbflush()
|
|
#define iob() wbflush()
|
|
|
|
#else /* !CONFIG_CPU_HAS_WB */
|
|
|
|
#define wmb() fast_wmb()
|
|
#define rmb() fast_rmb()
|
|
#define mb() fast_mb()
|
|
#define iob() fast_iob()
|
|
|
|
#endif /* !CONFIG_CPU_HAS_WB */
|
|
|
|
#if defined(CONFIG_WEAK_ORDERING) && defined(CONFIG_SMP)
|
|
# ifdef CONFIG_CPU_CAVIUM_OCTEON
|
|
# define smp_mb() __sync()
|
|
# define smp_rmb() barrier()
|
|
# define smp_wmb() __syncw()
|
|
# else
|
|
# define smp_mb() __asm__ __volatile__("sync" : : :"memory")
|
|
# define smp_rmb() __asm__ __volatile__("sync" : : :"memory")
|
|
# define smp_wmb() __asm__ __volatile__("sync" : : :"memory")
|
|
# endif
|
|
#else
|
|
#define smp_mb() barrier()
|
|
#define smp_rmb() barrier()
|
|
#define smp_wmb() barrier()
|
|
#endif
|
|
|
|
#if defined(CONFIG_WEAK_REORDERING_BEYOND_LLSC) && defined(CONFIG_SMP)
|
|
#define __WEAK_LLSC_MB " sync \n"
|
|
#else
|
|
#define __WEAK_LLSC_MB " \n"
|
|
#endif
|
|
|
|
#define set_mb(var, value) \
|
|
do { var = value; smp_mb(); } while (0)
|
|
|
|
#define smp_llsc_mb() __asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
|
|
|
|
#ifdef CONFIG_CPU_CAVIUM_OCTEON
|
|
#define smp_mb__before_llsc() smp_wmb()
|
|
/* Cause previous writes to become visible on all CPUs as soon as possible */
|
|
#define nudge_writes() __asm__ __volatile__(".set push\n\t" \
|
|
".set arch=octeon\n\t" \
|
|
"syncw\n\t" \
|
|
".set pop" : : : "memory")
|
|
#else
|
|
#define smp_mb__before_llsc() smp_llsc_mb()
|
|
#define nudge_writes() mb()
|
|
#endif
|
|
|
|
#endif /* __ASM_BARRIER_H */
|