linux_dsm_epyc7002/arch/mips/include/asm/system.h
Ralf Baechle 7837314d14 MIPS: Get rid of branches to .subsections.
It was a nice optimization - on paper at least.  In practice it results in
branches that may exceed the maximum legal range for a branch.  We can
fight that problem with -ffunction-sections but -ffunction-sections again
is incompatible with -pg used by the function tracer.

By rewriting the loop around all simple LL/SC blocks to C we reduce the
amount of inline assembler and at the same time allow GCC to often fill
the branch delay slots with something sensible or whatever else clever
optimization it may have up in its sleeve.

With this optimization gone we also no longer need -ffunction-sections,
so drop it.

This optimization was originally introduced in 2.6.21, commit
5999eca25c1fd4b9b9aca7833b04d10fe4bc877d (linux-mips.org) rsp.
f65e4fa8e0 (kernel.org).

Original fix for the issues which caused me to pull this optimization by
Paul Gortmaker <paul.gortmaker@windriver.com>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
2010-10-29 19:08:24 +01:00

236 lines
5.8 KiB
C

/*
* This file is subject to the terms and conditions of the GNU General Public
* License. See the file "COPYING" in the main directory of this archive
* for more details.
*
* Copyright (C) 1994, 95, 96, 97, 98, 99, 2003, 06 by Ralf Baechle
* Copyright (C) 1996 by Paul M. Antoine
* Copyright (C) 1999 Silicon Graphics
* Kevin D. Kissell, kevink@mips.org and Carsten Langgaard, carstenl@mips.com
* Copyright (C) 2000 MIPS Technologies, Inc.
*/
#ifndef _ASM_SYSTEM_H
#define _ASM_SYSTEM_H
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/irqflags.h>
#include <asm/addrspace.h>
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
#include <asm/cpu-features.h>
#include <asm/dsp.h>
#include <asm/watch.h>
#include <asm/war.h>
/*
* switch_to(n) should switch tasks to task nr n, first
* checking that n isn't the current task, in which case it does nothing.
*/
extern asmlinkage void *resume(void *last, void *next, void *next_ti);
struct task_struct;
extern unsigned int ll_bit;
extern struct task_struct *ll_task;
#ifdef CONFIG_MIPS_MT_FPAFF
/*
* Handle the scheduler resume end of FPU affinity management. We do this
* inline to try to keep the overhead down. If we have been forced to run on
* a "CPU" with an FPU because of a previous high level of FP computation,
* but did not actually use the FPU during the most recent time-slice (CU1
* isn't set), we undo the restriction on cpus_allowed.
*
* We're not calling set_cpus_allowed() here, because we have no need to
* force prompt migration - we're already switching the current CPU to a
* different thread.
*/
#define __mips_mt_fpaff_switch_to(prev) \
do { \
struct thread_info *__prev_ti = task_thread_info(prev); \
\
if (cpu_has_fpu && \
test_ti_thread_flag(__prev_ti, TIF_FPUBOUND) && \
(!(KSTK_STATUS(prev) & ST0_CU1))) { \
clear_ti_thread_flag(__prev_ti, TIF_FPUBOUND); \
prev->cpus_allowed = prev->thread.user_cpus_allowed; \
} \
next->thread.emulated_fp = 0; \
} while(0)
#else
#define __mips_mt_fpaff_switch_to(prev) do { (void) (prev); } while (0)
#endif
#define __clear_software_ll_bit() \
do { \
if (!__builtin_constant_p(cpu_has_llsc) || !cpu_has_llsc) \
ll_bit = 0; \
} while (0)
#define switch_to(prev, next, last) \
do { \
__mips_mt_fpaff_switch_to(prev); \
if (cpu_has_dsp) \
__save_dsp(prev); \
__clear_software_ll_bit(); \
(last) = resume(prev, next, task_thread_info(next)); \
} while (0)
#define finish_arch_switch(prev) \
do { \
if (cpu_has_dsp) \
__restore_dsp(current); \
if (cpu_has_userlocal) \
write_c0_userlocal(current_thread_info()->tp_value); \
__restore_watch(); \
} while (0)
static inline unsigned long __xchg_u32(volatile int * m, unsigned int val)
{
__u32 retval;
smp_mb__before_llsc();
if (kernel_uses_llsc && R10000_LLSC_WAR) {
unsigned long dummy;
__asm__ __volatile__(
" .set mips3 \n"
"1: ll %0, %3 # xchg_u32 \n"
" .set mips0 \n"
" move %2, %z4 \n"
" .set mips3 \n"
" sc %2, %1 \n"
" beqzl %2, 1b \n"
" .set mips0 \n"
: "=&r" (retval), "=m" (*m), "=&r" (dummy)
: "R" (*m), "Jr" (val)
: "memory");
} else if (kernel_uses_llsc) {
unsigned long dummy;
do {
__asm__ __volatile__(
" .set mips3 \n"
" ll %0, %3 # xchg_u32 \n"
" .set mips0 \n"
" move %2, %z4 \n"
" .set mips3 \n"
" sc %2, %1 \n"
" .set mips0 \n"
: "=&r" (retval), "=m" (*m), "=&r" (dummy)
: "R" (*m), "Jr" (val)
: "memory");
} while (unlikely(!dummy));
} else {
unsigned long flags;
raw_local_irq_save(flags);
retval = *m;
*m = val;
raw_local_irq_restore(flags); /* implies memory barrier */
}
smp_llsc_mb();
return retval;
}
#ifdef CONFIG_64BIT
static inline __u64 __xchg_u64(volatile __u64 * m, __u64 val)
{
__u64 retval;
smp_mb__before_llsc();
if (kernel_uses_llsc && R10000_LLSC_WAR) {
unsigned long dummy;
__asm__ __volatile__(
" .set mips3 \n"
"1: lld %0, %3 # xchg_u64 \n"
" move %2, %z4 \n"
" scd %2, %1 \n"
" beqzl %2, 1b \n"
" .set mips0 \n"
: "=&r" (retval), "=m" (*m), "=&r" (dummy)
: "R" (*m), "Jr" (val)
: "memory");
} else if (kernel_uses_llsc) {
unsigned long dummy;
do {
__asm__ __volatile__(
" .set mips3 \n"
" lld %0, %3 # xchg_u64 \n"
" move %2, %z4 \n"
" scd %2, %1 \n"
" .set mips0 \n"
: "=&r" (retval), "=m" (*m), "=&r" (dummy)
: "R" (*m), "Jr" (val)
: "memory");
} while (unlikely(!dummy));
} else {
unsigned long flags;
raw_local_irq_save(flags);
retval = *m;
*m = val;
raw_local_irq_restore(flags); /* implies memory barrier */
}
smp_llsc_mb();
return retval;
}
#else
extern __u64 __xchg_u64_unsupported_on_32bit_kernels(volatile __u64 * m, __u64 val);
#define __xchg_u64 __xchg_u64_unsupported_on_32bit_kernels
#endif
static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
{
switch (size) {
case 4:
return __xchg_u32(ptr, x);
case 8:
return __xchg_u64(ptr, x);
}
return x;
}
#define xchg(ptr, x) \
({ \
BUILD_BUG_ON(sizeof(*(ptr)) & ~0xc); \
\
((__typeof__(*(ptr))) \
__xchg((unsigned long)(x), (ptr), sizeof(*(ptr)))); \
})
extern void set_handler(unsigned long offset, void *addr, unsigned long len);
extern void set_uncached_handler(unsigned long offset, void *addr, unsigned long len);
typedef void (*vi_handler_t)(void);
extern void *set_vi_handler(int n, vi_handler_t addr);
extern void *set_except_vector(int n, void *addr);
extern unsigned long ebase;
extern void per_cpu_trap_init(void);
/*
* See include/asm-ia64/system.h; prevents deadlock on SMP
* systems.
*/
#define __ARCH_WANT_UNLOCKED_CTXSW
extern unsigned long arch_align_stack(unsigned long sp);
#endif /* _ASM_SYSTEM_H */