linux_dsm_epyc7002/arch/s390/include/asm/processor.h

365 lines
9.2 KiB
C
Raw Normal View History

/*
* S390 version
* Copyright IBM Corp. 1999
* Author(s): Hartmut Penner (hp@de.ibm.com),
* Martin Schwidefsky (schwidefsky@de.ibm.com)
*
* Derived from "include/asm-i386/processor.h"
* Copyright (C) 1994, Linus Torvalds
*/
#ifndef __ASM_S390_PROCESSOR_H
#define __ASM_S390_PROCESSOR_H
#include <linux/const.h>
#define CIF_MCCK_PENDING 0 /* machine check handling is pending */
#define CIF_ASCE 1 /* user asce needs fixup / uaccess */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_FPU 3 /* restore FPU registers */
#define CIF_IGNORE_IRQ 4 /* ignore interrupt (for udelay) */
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define _CIF_MCCK_PENDING _BITUL(CIF_MCCK_PENDING)
#define _CIF_ASCE _BITUL(CIF_ASCE)
#define _CIF_NOHZ_DELAY _BITUL(CIF_NOHZ_DELAY)
#define _CIF_FPU _BITUL(CIF_FPU)
#define _CIF_IGNORE_IRQ _BITUL(CIF_IGNORE_IRQ)
#define _CIF_ENABLED_WAIT _BITUL(CIF_ENABLED_WAIT)
#ifndef __ASSEMBLY__
#include <linux/linkage.h>
#include <linux/irqflags.h>
#include <asm/cpu.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/setup.h>
#include <asm/runtime_instr.h>
#include <asm/fpu/types.h>
#include <asm/fpu/internal.h>
static inline void set_cpu_flag(int flag)
{
S390_lowcore.cpu_flags |= (1UL << flag);
}
static inline void clear_cpu_flag(int flag)
{
S390_lowcore.cpu_flags &= ~(1UL << flag);
}
static inline int test_cpu_flag(int flag)
{
return !!(S390_lowcore.cpu_flags & (1UL << flag));
}
/*
* Test CIF flag of another CPU. The caller needs to ensure that
* CPU hotplug can not happen, e.g. by disabling preemption.
*/
static inline int test_cpu_flag_of(int flag, int cpu)
{
struct lowcore *lc = lowcore_ptr[cpu];
return !!(lc->cpu_flags & (1UL << flag));
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
*/
#define current_text_addr() ({ void *pc; asm("basr %0,0" : "=a" (pc)); pc; })
static inline void get_cpu_id(struct cpuid *ptr)
{
asm volatile("stidp %0" : "=Q" (*ptr));
}
void s390_adjust_jiffies(void);
void s390_update_cpu_mhz(void);
void cpu_detect_mhz_feature(void);
extern const struct seq_operations cpuinfo_op;
extern int sysctl_ieee_emulation_warnings;
extern void execve_tail(void);
/*
* User space process size: 2GB for 31 bit, 4TB or 8PT for 64 bit.
*/
#define TASK_SIZE_OF(tsk) ((tsk)->mm->context.asce_limit)
#define TASK_UNMAPPED_BASE (test_thread_flag(TIF_31BIT) ? \
(1UL << 30) : (1UL << 41))
#define TASK_SIZE TASK_SIZE_OF(current)
#define TASK_MAX_SIZE (1UL << 53)
#define STACK_TOP (1UL << (test_thread_flag(TIF_31BIT) ? 31:42))
#define STACK_TOP_MAX (1UL << 42)
#define HAVE_ARCH_PICK_MMAP_LAYOUT
typedef struct {
__u32 ar4;
} mm_segment_t;
/*
* Thread structure
*/
struct thread_struct {
unsigned int acrs[NUM_ACRS];
unsigned long ksp; /* kernel stack pointer */
mm_segment_t mm_segment;
unsigned long gmap_addr; /* address of last gmap fault. */
s390/mm: add shadow gmap support For a nested KVM guest the outer KVM host needs to create shadow page tables for the nested guest. This patch adds the basic support to the guest address space (gmap) code. For each guest address space the inner KVM host creates, the first outer KVM host needs to create shadow page tables. The address space is identified by the ASCE loaded into the control register 1 at the time the inner SIE instruction for the second nested KVM guest is executed. The outer KVM host creates the shadow tables starting with the table identified by the ASCE on a on-demand basis. The outer KVM host will get repeated faults for all the shadow tables needed to run the second KVM guest. While a shadow page table for the second KVM guest is active the access to the origin region, segment and page tables needs to be restricted for the first KVM guest. For region and segment and page tables the first KVM guest may read the memory, but write attempt has to lead to an unshadow. This is done using the page invalid and read-only bits in the page table of the first KVM guest. If the first guest re-accesses one of the origin pages of a shadow, it gets a fault and the affected parts of the shadow page table hierarchy needs to be removed again. PGSTE tables don't have to be shadowed, as all interpretation assist can't deal with the invalid bits in the shadow pte being set differently than the original ones provided by the first KVM guest. Many bug fixes and improvements by David Hildenbrand. Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2016-03-08 18:12:18 +07:00
unsigned int gmap_write_flag; /* gmap fault write indication */
unsigned int gmap_int_code; /* int code of last gmap fault */
unsigned int gmap_pfault; /* signal of a pending guest pfault */
struct per_regs per_user; /* User specified PER registers */
struct per_event per_event; /* Cause of the last PER trap */
unsigned long per_flags; /* Flags to control debug behavior */
/* pfault_wait is used to block the process on a pfault event */
unsigned long pfault_wait;
struct list_head list;
/* cpu runtime instrumentation */
struct runtime_instr_cb *ri_cb;
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
/*
* Warning: 'fpu' is dynamically-sized. It *MUST* be at
* the end.
*/
struct fpu fpu; /* FP and VX register save area */
};
/* Flag to disable transactions. */
#define PER_FLAG_NO_TE 1UL
/* Flag to enable random transaction aborts. */
#define PER_FLAG_TE_ABORT_RAND 2UL
/* Flag to specify random transaction abort mode:
* - abort each transaction at a random instruction before TEND if set.
* - abort random transactions at a random instruction if cleared.
*/
#define PER_FLAG_TE_ABORT_RAND_TEND 4UL
typedef struct thread_struct thread_struct;
/*
* Stack layout of a C stack frame.
*/
#ifndef __PACK_STACK
struct stack_frame {
unsigned long back_chain;
unsigned long empty1[5];
unsigned long gprs[10];
unsigned int empty2[8];
};
#else
struct stack_frame {
unsigned long empty1[5];
unsigned int empty2[8];
unsigned long gprs[10];
unsigned long back_chain;
};
#endif
#define ARCH_MIN_TASKALIGN 8
#define INIT_THREAD { \
.ksp = sizeof(init_stack) + (unsigned long) &init_stack, \
.fpu.regs = (void *) init_task.thread.fpu.fprs, \
}
/*
* Do necessary setup to start up a new thread.
*/
#define start_thread(regs, new_psw, new_stackp) do { \
regs->psw.mask = PSW_USER_BITS | PSW_MASK_EA | PSW_MASK_BA; \
regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
execve_tail(); \
} while (0)
#define start_thread31(regs, new_psw, new_stackp) do { \
regs->psw.mask = PSW_USER_BITS | PSW_MASK_BA; \
regs->psw.addr = new_psw; \
regs->gprs[15] = new_stackp; \
s390/mm: fix asce_bits handling with dynamic pagetable levels There is a race with multi-threaded applications between context switch and pagetable upgrade. In switch_mm() a new user_asce is built from mm->pgd and mm->context.asce_bits, w/o holding any locks. A concurrent mmap with a pagetable upgrade on another thread in crst_table_upgrade() could already have set new asce_bits, but not yet the new mm->pgd. This would result in a corrupt user_asce in switch_mm(), and eventually in a kernel panic from a translation exception. Fix this by storing the complete asce instead of just the asce_bits, which can then be read atomically from switch_mm(), so that it either sees the old value or the new value, but no mixture. Both cases are OK. Having the old value would result in a page fault on access to the higher level memory, but the fault handler would see the new mm->pgd, if it was a valid access after the mmap on the other thread has completed. So as worst-case scenario we would have a page fault loop for the racing thread until the next time slice. Also remove dead code and simplify the upgrade/downgrade path, there are no upgrades from 2 levels, and only downgrades from 3 levels for compat tasks. There are also no concurrent upgrades, because the mmap_sem is held with down_write() in do_mmap, so the flush and table checks during upgrade can be removed. Reported-by: Michael Munday <munday@ca.ibm.com> Reviewed-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2016-04-15 21:38:40 +07:00
crst_table_downgrade(current->mm); \
execve_tail(); \
} while (0)
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
struct seq_file;
typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable);
void dump_trace(dump_trace_func_t func, void *data,
struct task_struct *task, unsigned long sp);
void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
/*
* Return saved PC of a blocked thread.
*/
extern unsigned long thread_saved_pc(struct task_struct *t);
unsigned long get_wchan(struct task_struct *p);
#define task_pt_regs(tsk) ((struct pt_regs *) \
(task_stack_page(tsk) + THREAD_SIZE) - 1)
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->psw.addr)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->gprs[15])
/* Has task runtime instrumentation enabled ? */
#define is_ri_task(tsk) (!!(tsk)->thread.ri_cb)
static inline unsigned long current_stack_pointer(void)
{
unsigned long sp;
asm volatile("la %0,0(15)" : "=a" (sp));
return sp;
}
static inline unsigned short stap(void)
{
unsigned short cpu_address;
asm volatile("stap %0" : "=m" (cpu_address));
return cpu_address;
}
/*
* Give up the time slice of the virtual PU.
*/
locking/core: Introduce cpu_relax_yield() For spinning loops people do often use barrier() or cpu_relax(). For most architectures cpu_relax and barrier are the same, but on some architectures cpu_relax can add some latency. For example on power,sparc64 and arc, cpu_relax can shift the CPU towards other hardware threads in an SMT environment. On s390 cpu_relax does even more, it uses an hypercall to the hypervisor to give up the timeslice. In contrast to the SMT yielding this can result in larger latencies. In some places this latency is unwanted, so another variant "cpu_relax_lowlatency" was introduced. Before this is used in more and more places, lets revert the logic and provide a cpu_relax_yield that can be called in places where yielding is more important than latency. By default this is the same as cpu_relax on all architectures. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Noam Camus <noamc@ezchip.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Deacon <will.deacon@arm.com> Cc: linuxppc-dev@lists.ozlabs.org Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1477386195-32736-2-git-send-email-borntraeger@de.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-10-25 16:03:11 +07:00
void cpu_relax_yield(void);
locking/core: Introduce cpu_relax_yield() For spinning loops people do often use barrier() or cpu_relax(). For most architectures cpu_relax and barrier are the same, but on some architectures cpu_relax can add some latency. For example on power,sparc64 and arc, cpu_relax can shift the CPU towards other hardware threads in an SMT environment. On s390 cpu_relax does even more, it uses an hypercall to the hypervisor to give up the timeslice. In contrast to the SMT yielding this can result in larger latencies. In some places this latency is unwanted, so another variant "cpu_relax_lowlatency" was introduced. Before this is used in more and more places, lets revert the logic and provide a cpu_relax_yield that can be called in places where yielding is more important than latency. By default this is the same as cpu_relax on all architectures. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Nicholas Piggin <npiggin@gmail.com> Cc: Noam Camus <noamc@ezchip.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Will Deacon <will.deacon@arm.com> Cc: linuxppc-dev@lists.ozlabs.org Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1477386195-32736-2-git-send-email-borntraeger@de.ibm.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-10-25 16:03:11 +07:00
#define cpu_relax() cpu_relax_yield()
arch, locking: Ciao arch_mutex_cpu_relax() The arch_mutex_cpu_relax() function, introduced by 34b133f, is hacky and ugly. It was added a few years ago to address the fact that common cpu_relax() calls include yielding on s390, and thus impact the optimistic spinning functionality of mutexes. Nowadays we use this function well beyond mutexes: rwsem, qrwlock, mcs and lockref. Since the macro that defines the call is in the mutex header, any users must include mutex.h and the naming is misleading as well. This patch (i) renames the call to cpu_relax_lowlatency ("relax, but only if you can do it with very low latency") and (ii) defines it in each arch's asm/processor.h local header, just like for regular cpu_relax functions. On all archs, except s390, cpu_relax_lowlatency is simply cpu_relax, and thus we can take it out of mutex.h. While this can seem redundant, I believe it is a good choice as it allows us to move out arch specific logic from generic locking primitives and enables future(?) archs to transparently define it, similarly to System Z. Signed-off-by: Davidlohr Bueso <davidlohr@hp.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Anton Blanchard <anton@samba.org> Cc: Aurelien Jacquiot <a-jacquiot@ti.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Bharat Bhushan <r65777@freescale.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Chen Liqin <liqin.linux@gmail.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Christian Borntraeger <borntraeger@de.ibm.com> Cc: Chris Zankel <chris@zankel.net> Cc: David Howells <dhowells@redhat.com> Cc: David S. Miller <davem@davemloft.net> Cc: Deepthi Dharwar <deepthi@linux.vnet.ibm.com> Cc: Dominik Dingel <dingel@linux.vnet.ibm.com> Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Geert Uytterhoeven <geert@linux-m68k.org> Cc: Guan Xuetao <gxt@mprc.pku.edu.cn> Cc: Haavard Skinnemoen <hskinnemoen@gmail.com> Cc: Hans-Christian Egtvedt <egtvedt@samfundet.no> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: Helge Deller <deller@gmx.de> Cc: Hirokazu Takata <takata@linux-m32r.org> Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: James E.J. Bottomley <jejb@parisc-linux.org> Cc: James Hogan <james.hogan@imgtec.com> Cc: Jason Wang <jasowang@redhat.com> Cc: Jesper Nilsson <jesper.nilsson@axis.com> Cc: Joe Perches <joe@perches.com> Cc: Jonas Bonn <jonas@southpole.se> Cc: Joseph Myers <joseph@codesourcery.com> Cc: Kees Cook <keescook@chromium.org> Cc: Koichi Yasutake <yasutake.koichi@jp.panasonic.com> Cc: Lennox Wu <lennox.wu@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mark Salter <msalter@redhat.com> Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Michael Neuling <mikey@neuling.org> Cc: Michal Simek <monstr@monstr.eu> Cc: Mikael Starvik <starvik@axis.com> Cc: Nicolas Pitre <nico@linaro.org> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: Paul Burton <paul.burton@imgtec.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Paul Mackerras <paulus@samba.org> Cc: Qais Yousef <qais.yousef@imgtec.com> Cc: Qiaowei Ren <qiaowei.ren@intel.com> Cc: Rafael Wysocki <rafael.j.wysocki@intel.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Richard Henderson <rth@twiddle.net> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: Russell King <linux@arm.linux.org.uk> Cc: Steven Miao <realmz6@gmail.com> Cc: Steven Rostedt <srostedt@redhat.com> Cc: Stratos Karafotis <stratosk@semaphore.gr> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Tony Luck <tony.luck@intel.com> Cc: Vasily Kulikov <segoon@openwall.com> Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Vineet Gupta <Vineet.Gupta1@synopsys.com> Cc: Waiman Long <Waiman.Long@hp.com> Cc: Will Deacon <will.deacon@arm.com> Cc: Wolfram Sang <wsa@the-dreams.de> Cc: adi-buildroot-devel@lists.sourceforge.net Cc: linux390@de.ibm.com Cc: linux-alpha@vger.kernel.org Cc: linux-am33-list@redhat.com Cc: linux-arm-kernel@lists.infradead.org Cc: linux-c6x-dev@linux-c6x.org Cc: linux-cris-kernel@axis.com Cc: linux-hexagon@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux@lists.openrisc.net Cc: linux-m32r-ja@ml.linux-m32r.org Cc: linux-m32r@ml.linux-m32r.org Cc: linux-m68k@lists.linux-m68k.org Cc: linux-metag@vger.kernel.org Cc: linux-mips@linux-mips.org Cc: linux-parisc@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1404079773.2619.4.camel@buesod1.americas.hpqcorp.net Signed-off-by: Ingo Molnar <mingo@kernel.org>
2014-06-30 05:09:33 +07:00
#define cpu_relax_lowlatency() barrier()
#define ECAG_CACHE_ATTRIBUTE 0
#define ECAG_CPU_ATTRIBUTE 1
static inline unsigned long __ecag(unsigned int asi, unsigned char parm)
{
unsigned long val;
asm volatile(".insn rsy,0xeb000000004c,%0,0,0(%1)" /* ecag */
: "=d" (val) : "a" (asi << 8 | parm));
return val;
}
static inline void psw_set_key(unsigned int key)
{
asm volatile("spka 0(%0)" : : "d" (key));
}
/*
* Set PSW to specified value.
*/
static inline void __load_psw(psw_t psw)
{
asm volatile("lpswe %0" : : "Q" (psw) : "cc");
}
/*
* Set PSW mask to specified value, while leaving the
* PSW addr pointing to the next instruction.
*/
static inline void __load_psw_mask(unsigned long mask)
{
unsigned long addr;
psw_t psw;
psw.mask = mask;
asm volatile(
" larl %0,1f\n"
" stg %0,%O1+8(%R1)\n"
" lpswe %1\n"
"1:"
: "=&d" (addr), "=Q" (psw) : "Q" (psw) : "memory", "cc");
}
/*
* Extract current PSW mask
*/
static inline unsigned long __extract_psw(void)
{
unsigned int reg1, reg2;
asm volatile("epsw %0,%1" : "=d" (reg1), "=a" (reg2));
return (((unsigned long) reg1) << 32) | ((unsigned long) reg2);
}
static inline void local_mcck_enable(void)
{
__load_psw_mask(__extract_psw() | PSW_MASK_MCHECK);
}
static inline void local_mcck_disable(void)
{
__load_psw_mask(__extract_psw() & ~PSW_MASK_MCHECK);
}
/*
* Rewind PSW instruction address by specified number of bytes.
*/
static inline unsigned long __rewind_psw(psw_t psw, unsigned long ilc)
{
unsigned long mask;
mask = (psw.mask & PSW_MASK_EA) ? -1UL :
(psw.mask & PSW_MASK_BA) ? (1UL << 31) - 1 :
(1UL << 24) - 1;
return (psw.addr - ilc) & mask;
}
/*
* Function to stop a processor until the next interrupt occurs
*/
void enabled_wait(void);
/*
* Function to drop a processor into disabled wait state
*/
static inline void __noreturn disabled_wait(unsigned long code)
{
psw_t psw;
psw.mask = PSW_MASK_BASE | PSW_MASK_WAIT | PSW_MASK_BA | PSW_MASK_EA;
psw.addr = code;
__load_psw(psw);
while (1);
}
/*
* Basic Machine Check/Program Check Handler.
*/
extern void s390_base_mcck_handler(void);
extern void s390_base_pgm_handler(void);
extern void s390_base_ext_handler(void);
extern void (*s390_base_mcck_handler_fn)(void);
extern void (*s390_base_pgm_handler_fn)(void);
extern void (*s390_base_ext_handler_fn)(void);
#define ARCH_LOW_ADDRESS_LIMIT 0x7fffffffUL
extern int memcpy_real(void *, void *, size_t);
extern void memcpy_absolute(void *, void *, size_t);
#define mem_assign_absolute(dest, val) { \
__typeof__(dest) __tmp = (val); \
\
BUILD_BUG_ON(sizeof(__tmp) != sizeof(val)); \
memcpy_absolute(&(dest), &__tmp, sizeof(__tmp)); \
}
#endif /* __ASSEMBLY__ */
#endif /* __ASM_S390_PROCESSOR_H */