linux_dsm_epyc7002/arch/s390/include/asm/processor.h

374 lines
9.7 KiB
C
Raw Normal View History

/*
* S390 version
* Copyright IBM Corp. 1999
* Author(s): Hartmut Penner (hp@de.ibm.com),
* Martin Schwidefsky (schwidefsky@de.ibm.com)
*
* Derived from "include/asm-i386/processor.h"
* Copyright (C) 1994, Linus Torvalds
*/
#ifndef __ASM_S390_PROCESSOR_H
#define __ASM_S390_PROCESSOR_H
#include <linux/const.h>
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore FPU registers */
#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
#define _CIF_ASCE _BITUL(CIF_ASCE)
#define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY)
#define _CIF_FPU _BITUL(CIF_FPU)
#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT)
#ifndef __ASSEMBLY__
#include <linux/linkage.h>
#include <linux/irqflags.h>
#include <asm/cpu.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/setup.h>
#include <asm/runtime_instr.h>
#include <asm/fpu/types.h>
#include <asm/fpu/internal.h>
static inline void set_cpu_flag(int flag)
{
S390_lowcore.cpu_flags |= (1UL << flag);
}
static inline void clear_cpu_flag(int flag)
{
S390_lowcore.cpu_flags &= ~(1UL << flag);
}
static inline int test_cpu_flag(int flag)
{
return !!(S390_lowcore.cpu_flags & (1UL << flag));
}
/*
* Test CIF flag of another CPU. The caller needs to ensure that
* CPU hotplug can not happen, e.g. by disabling preemption.
*/
static inline int test_cpu_flag_of(int flag, int cpu)
{
struct lowcore *lc = lowcore_ptr[cpu];
return !!(lc->cpu_flags & (1UL << flag));
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
*/
#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
static inline void get_cpu_id(struct cpuid *ptr)
{
asm volatile("stidp %0" : "=Q" (*ptr));
}
void s390_adjust_jiffies(void);
void s390_update_cpu_mhz(void);
void cpu_detect_mhz_feature(void);
extern const struct seq_operations cpuinfo_op;
extern int sysctl_ieee_emulation_warnings;
extern void execve_tail(void);
/*
* User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
*/
#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit)
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
(1UL << 30) : (1UL << 41))
#define TASK_SIZE TASK_SIZE_OF(current)
#define TASK_MAX_SIZE (1UL << 53)
#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42))
#define STACK_TOP_MAX (1UL << 42)
#define HAVE_ARCH_PICK_MMAP_LAYOUT
typedef struct {
__u32 ar4;
} mm_segment_t;
/*
* Thread structure
*/
struct thread_struct {
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
unsigned long user_timer; /* task cputime in user space */
2017-01-06 00:11:49 +07:00
unsigned long guest_timer; /* task cputime in kvm guest */
unsigned long system_timer; /* task cputime in kernel space */
2017-01-06 00:11:49 +07:00
unsigned long hardirq_timer; /* task cputime in hardirq context */
unsigned long softirq_timer; /* task cputime in softirq context */
unsigned long sys_call_table; /* system call table address */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
s390/mm: add shadow gmap support For a nested KVM guest the outer KVM host needs to create shadow page tables for the nested guest. This patch adds the basic support to the guest address space (gmap) code. For each guest address space the inner KVM host creates, the first outer KVM host needs to create shadow page tables. The address space is identified by the ASCE loaded into the control register 1 at the time the inner SIE instruction for the second nested KVM guest is executed. The outer KVM host creates the shadow tables starting with the table identified by the ASCE on a on-demand basis. The outer KVM host will get repeated faults for all the shadow tables needed to run the second KVM guest. While a shadow page table for the second KVM guest is active the access to the origin region, segment and page tables needs to be restricted for the first KVM guest. For region and segment and page tables the first KVM guest may read the memory, but write attempt has to lead to an unshadow. This is done using the page invalid and read-only bits in the page table of the first KVM guest. If the first guest re-accesses one of the origin pages of a shadow, it gets a fault and the affected parts of the shadow page table hierarchy needs to be removed again. PGSTE tables don't have to be shadowed, as all interpretation assist can't deal with the invalid bits in the shadow pte being set differently than the original ones provided by the first KVM guest. Many bug fixes and improvements by David Hildenbrand. Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2016-03-08 18:12:18 +07:00
unsigned int gmap_write_flag; /* gmap fault write indication */
unsigned int gmap_int_code; /* int code of last gmap fault */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
/* Per-thread information related to debugging */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
unsigned int system_call; /* system call number in signal */
unsigned long last_break; /* last breaking-event-address. */
/* pfault_wait is used to block the process on a pfault event */
unsigned long pfault_wait;
struct list_head list;
/* cpu runtime instrumentation */
struct runtime_instr_cb *ri_cb;
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
/*
* Warning: 'fpu' is dynamically-sized. It *MUST* be at
* the end.
*/
struct fpu fpu; /* FP and VX register save area */
};
/* Flag to disable transactions. */
#define PER_FLAG_NO_TE 1UL
/* Flag to enable random transaction aborts. */
#define PER_FLAG_TE_ABORT_RAND 2UL
/* Flag to specify random transaction abort mode:
* - abort each transaction at a random instruction before TEND if set.
* - abort random transactions at a random instruction if cleared.
*/
#define PER_FLAG_TE_ABORT_RAND_TEND 4UL
typedef struct thread_struct thread_struct;
/*
* Stack layout of a C stack frame.
*/
#ifndef __PACK_STACK
struct stack_frame {
unsigned long back_chain;
unsigned long empty1[5];
unsigned long gprs[10];
unsigned int empty2[8];
};
#else
struct stack_frame {
unsigned long empty1[5];
unsigned int empty2[8];
unsigned long gprs[10];
unsigned long back_chain;
};
#endif
#define ARCH_MIN_TASKALIGN 8
#define INIT_THREAD { \
.ksp = sizeof(init_stack) + (unsigned long) &init_stack, \
.fpu.regs = (void *) init_task.thread.fpu.fprs, \
}
/*
* Do necessary setup to start up a new thread.
*/
#define start_thread(regs, new_psw, new_stackp) do { \
regs->psw.mask = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA; \
regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
execve_tail(); \
} while (0)
#define start_thread31(regs, new_psw, new_stackp) do { \
regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \
regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
s390/mm: fix asce_bits handling with dynamic pagetable levels There is a race with multi-threaded applications between context switch and pagetable upgrade. In switch_mm() a new user_asce is built from mm->pgd and mm->context.asce_bits, w/o holding any locks. A concurrent mmap with a pagetable upgrade on another thread in crst_table_upgrade() could already have set new asce_bits, but not yet the new mm->pgd. This would result in a corrupt user_asce in switch_mm(), and eventually in a kernel panic from a translation exception. Fix this by storing the complete asce instead of just the asce_bits, which can then be read atomically from switch_mm(), so that it either sees the old value or the new value, but no mixture. Both cases are OK. Having the old value would result in a page fault on access to the higher level memory, but the fault handler would see the new mm->pgd, if it was a valid access after the mmap on the other thread has completed. So as worst-case scenario we would have a page fault loop for the racing thread until the next time slice. Also remove dead code and simplify the upgrade/downgrade path, there are no upgrades from 2 levels, and only downgrades from 3 levels for compat tasks. There are also no concurrent upgrades, because the mmap_sem is held with down_write() in do_mmap, so the flush and table checks during upgrade can be removed. Reported-by: Michael Munday <munday@ca.ibm.com> Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-04-15 21:38:40 +07:00
crst_table_downgrade(current->mm); \
execve_tail(); \
} while (0)
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
struct seq_file;
typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
void dump_trace(dump_trace_func_t func, void *data,
struct task_struct *task, unsigned long sp);
void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
/*
* Return saved PC of a blocked thread.
*/
extern unsigned long thread_saved_pc(struct task_struct *t);
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
(task_stack_page(tsk) + THREAD_SIZE) - 1)
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->gprs[15])
/* Has task runtime instrumentation enabled ? */
#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
static inline unsigned long current_stack_pointer(void)
{
unsigned long sp;
asm volatile("la %0,0(15)" : "=a" (sp));
return sp;
}
static inline unsigned short stap(void)
{
unsigned short cpu_address;
asm volatile("stap %0" : "=m" (cpu_address));
return cpu_address;
}
/*
* Give up the time slice of the virtual PU.
*/
#define cpu_relax_yield cpu_relax_yield
locking/core: Introduce cpu_relax_yield() For spinning loops people do often use barrier() or cpu_relax(). For most architectures cpu_relax and barrier are the same, but on some architectures cpu_relax can add some latency. For example on power,sparc64 and arc, cpu_relax can shift the CPU towards other hardware threads in an SMT environment. On s390 cpu_relax does even more, it uses an hypercall to the hypervisor to give up the timeslice. In contrast to the SMT yielding this can result in larger latencies. In some places this latency is unwanted, so another variant "cpu_relax_lowlatency" was introduced. Before this is used in more and more places, lets revert the logic and provide a cpu_relax_yield that can be called in places where yielding is more important than latency. By default this is the same as cpu_relax on all architectures. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Noam Camus <noamc@ezchip.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Deacon <will.deacon@arm.com> Cc: linuxppc-dev@lists.ozlabs.org Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1477386195-32736-2-git-send-email-borntraeger@de.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-10-25 16:03:11 +07:00
void cpu_relax_yield(void);
#define cpu_relax() barrier()
#define ECAG_CACHE_ATTRIBUTE 0
#define ECAG_CPU_ATTRIBUTE 1
static inline unsigned long __ecag(unsigned int asi, unsigned char parm)
{
unsigned long val;
asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
: "=d" (val) : "a" (asi << 8 | parm));
return val;
}
static inline void psw_set_key(unsigned int key)
{
asm volatile("spka 0(%0)" : : "d" (key));
}
/*
* Set PSW to specified value.
*/
static inline void __load_psw(psw_t psw)
{
asm volatile("lpswe %0" : : "Q" (psw) : "cc");
}
/*
* Set PSW mask to specified value, while leaving the
* PSW addr pointing to the next instruction.
*/
static inline void __load_psw_mask(unsigned long mask)
{
unsigned long addr;
psw_t psw;
psw.mask = mask;
asm volatile(
" larl %0,1f\n"
" stg %0,%O1+8(%R1)\n"
" lpswe %1\n"
"1:"
: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
}
/*
* Extract current PSW mask
*/
static inline unsigned long __extract_psw(void)
{
unsigned int reg1, reg2;
asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
}
static inline void local_mcck_enable(void)
{
__load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
}
static inline void local_mcck_disable(void)
{
__load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK);
}
/*
* Rewind PSW instruction address by specified number of bytes.
*/
static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
{
unsigned long mask;
mask = (psw.mask & PSW_MASK_EA) ? -1UL :
(psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
(1UL << 24) - 1;
return (psw.addr - ilc) & mask;
}
/*
* Function to stop a processor until the next interrupt occurs
*/
void enabled_wait(void);
/*
* Function to drop a processor into disabled wait state
*/
static inline void __noreturn disabled_wait(unsigned long code)
{
psw_t psw;
psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
psw.addr = code;
__load_psw(psw);
while (1);
}
/*
* Basic Machine Check/Program Check Handler.
*/
extern void s390_base_mcck_handler(void);
extern void s390_base_pgm_handler(void);
extern void s390_base_ext_handler(void);
extern void (*s390_base_mcck_handler_fn)(void);
extern void (*s390_base_pgm_handler_fn)(void);
extern void (*s390_base_ext_handler_fn)(void);
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
extern int memcpy_real(void *, void *, size_t);
extern void memcpy_absolute(void *, void *, size_t);
#define mem_assign_absolute(dest, val) do { \
__typeof__(dest) __tmp = (val); \
\
BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \
memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
} while (0)
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_PROCESSOR_H */