linux_dsm_epyc7002/arch/sh/include/asm/processor_32.h
Stuart Menefy d3ea9fa0a5 sh: Minor optimisations to FPU handling
A number of small optimisations to FPU handling, in particular:

 - move the task USEDFPU flag from the thread_info flags field (which
   is accessed asynchronously to the thread) to a new status field,
   which is only accessed by the thread itself. This allows locking to
   be removed in most cases, or can be reduced to a preempt_lock().
   This mimics the i386 behaviour.

 - move the modification of regs->sr and thread_info->status flags out
   of save_fpu() to __unlazy_fpu(). This gives the compiler a better
   chance to optimise things, as well as making save_fpu() symmetrical
   with restore_fpu() and init_fpu().

 - implement prepare_to_copy(), so that when creating a thread, we can
   unlazy the FPU prior to copying the thread data structures.

Also make sure that the FPU is disabled while in the kernel, in
particular while booting, and for newly created kernel threads,

In a very artificial benchmark, the execution time for 2500000
context switches was reduced from 50 to 45 seconds.

Signed-off-by: Stuart Menefy <stuart.menefy@st.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
2009-11-24 17:45:38 +09:00

222 lines
4.8 KiB
C

/*
* include/asm-sh/processor.h
*
* Copyright (C) 1999, 2000 Niibe Yutaka
* Copyright (C) 2002, 2003 Paul Mundt
*/
#ifndef __ASM_SH_PROCESSOR_32_H
#define __ASM_SH_PROCESSOR_32_H
#ifdef __KERNEL__
#include <linux/compiler.h>
#include <linux/linkage.h>
#include <asm/page.h>
#include <asm/types.h>
#include <asm/ptrace.h>
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
*/
#define current_text_addr() ({ void *pc; __asm__("mova 1f, %0\n.align 2\n1:":"=z" (pc)); pc; })
/* Core Processor Version Register */
#define CCN_PVR 0xff000030
#define CCN_CVR 0xff000040
#define CCN_PRR 0xff000044
asmlinkage void __init sh_cpu_init(void);
/*
* User space process size: 2GB.
*
* Since SH7709 and SH7750 have "area 7", we can't use 0x7c000000--0x7fffffff
*/
#define TASK_SIZE 0x7c000000UL
#define STACK_TOP TASK_SIZE
#define STACK_TOP_MAX STACK_TOP
/* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
#define TASK_UNMAPPED_BASE (TASK_SIZE / 3)
/*
* Bit of SR register
*
* FD-bit:
* When it's set, it means the processor doesn't have right to use FPU,
* and it results exception when the floating operation is executed.
*
* IMASK-bit:
* Interrupt level mask
*/
#define SR_DSP 0x00001000
#define SR_IMASK 0x000000f0
#define SR_FD 0x00008000
#define SR_MD 0x40000000
/*
* DSP structure and data
*/
struct sh_dsp_struct {
unsigned long dsp_regs[14];
long status;
};
/*
* FPU structure and data
*/
struct sh_fpu_hard_struct {
unsigned long fp_regs[16];
unsigned long xfp_regs[16];
unsigned long fpscr;
unsigned long fpul;
long status; /* software status information */
};
/* Dummy fpu emulator */
struct sh_fpu_soft_struct {
unsigned long fp_regs[16];
unsigned long xfp_regs[16];
unsigned long fpscr;
unsigned long fpul;
unsigned char lookahead;
unsigned long entry_pc;
};
union sh_fpu_union {
struct sh_fpu_hard_struct hard;
struct sh_fpu_soft_struct soft;
};
struct thread_struct {
/* Saved registers when thread is descheduled */
unsigned long sp;
unsigned long pc;
/* Hardware debugging registers */
unsigned long ubc_pc;
/* floating point info */
union sh_fpu_union fpu;
#ifdef CONFIG_SH_DSP
/* Dsp status information */
struct sh_dsp_struct dsp_status;
#endif
};
/* Count of active tasks with UBC settings */
extern int ubc_usercnt;
#define INIT_THREAD { \
.sp = sizeof(init_stack) + (long) &init_stack, \
}
/*
* Do necessary setup to start up a newly executed thread.
*/
#define start_thread(_regs, new_pc, new_sp) \
set_fs(USER_DS); \
_regs->pr = 0; \
_regs->sr = SR_FD; /* User mode. */ \
_regs->pc = new_pc; \
_regs->regs[15] = new_sp
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
/* Prepare to copy thread state - unlazy all lazy status */
void prepare_to_copy(struct task_struct *tsk);
/*
* create a kernel thread without removing it from tasklists
*/
extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
/* Copy and release all segment info associated with a VM */
#define copy_segments(p, mm) do { } while(0)
#define release_segments(mm) do { } while(0)
/*
* FPU lazy state save handling.
*/
static __inline__ void disable_fpu(void)
{
unsigned long __dummy;
/* Set FD flag in SR */
__asm__ __volatile__("stc sr, %0\n\t"
"or %1, %0\n\t"
"ldc %0, sr"
: "=&r" (__dummy)
: "r" (SR_FD));
}
static __inline__ void enable_fpu(void)
{
unsigned long __dummy;
/* Clear out FD flag in SR */
__asm__ __volatile__("stc sr, %0\n\t"
"and %1, %0\n\t"
"ldc %0, sr"
: "=&r" (__dummy)
: "r" (~SR_FD));
}
/* Double presision, NANS as NANS, rounding to nearest, no exceptions */
#define FPSCR_INIT 0x00080000
#define FPSCR_CAUSE_MASK 0x0001f000 /* Cause bits */
#define FPSCR_FLAG_MASK 0x0000007c /* Flag bits */
/*
* Return saved PC of a blocked thread.
*/
#define thread_saved_pc(tsk) (tsk->thread.pc)
void show_trace(struct task_struct *tsk, unsigned long *sp,
struct pt_regs *regs);
#ifdef CONFIG_DUMP_CODE
void show_code(struct pt_regs *regs);
#else
static inline void show_code(struct pt_regs *regs)
{
}
#endif
extern unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->pc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->regs[15])
#define user_stack_pointer(_regs) ((_regs)->regs[15])
#if defined(CONFIG_CPU_SH2A) || defined(CONFIG_CPU_SH4)
#define PREFETCH_STRIDE L1_CACHE_BYTES
#define ARCH_HAS_PREFETCH
#define ARCH_HAS_PREFETCHW
static inline void prefetch(void *x)
{
__asm__ __volatile__ ("pref @%0\n\t" : : "r" (x) : "memory");
}
#define prefetchw(x) prefetch(x)
#endif
#endif /* __KERNEL__ */
#endif /* __ASM_SH_PROCESSOR_32_H */