2008-01-30 19:30:33 +07:00
|
|
|
#ifndef _X86_IRQFLAGS_H_
|
|
|
|
#define _X86_IRQFLAGS_H_
|
|
|
|
|
|
|
|
#include <asm/processor-flags.h>
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
/*
|
|
|
|
* Interrupt control:
|
|
|
|
*/
|
|
|
|
|
|
|
|
static inline unsigned long native_save_fl(void)
|
|
|
|
{
|
|
|
|
unsigned long flags;
|
|
|
|
|
2009-08-04 06:33:40 +07:00
|
|
|
/*
|
2009-08-26 06:47:16 +07:00
|
|
|
* "=rm" is safe here, because "pop" adjusts the stack before
|
|
|
|
* it evaluates its effective address -- this is part of the
|
|
|
|
* documented behavior of the "pop" instruction.
|
2009-08-04 06:33:40 +07:00
|
|
|
*/
|
2008-03-23 15:02:30 +07:00
|
|
|
asm volatile("# __raw_save_flags\n\t"
|
|
|
|
"pushf ; pop %0"
|
2009-08-26 06:47:16 +07:00
|
|
|
: "=rm" (flags)
|
2008-03-23 15:02:30 +07:00
|
|
|
: /* no input */
|
|
|
|
: "memory");
|
2008-01-30 19:30:33 +07:00
|
|
|
|
|
|
|
return flags;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void native_restore_fl(unsigned long flags)
|
|
|
|
{
|
2008-03-23 15:02:30 +07:00
|
|
|
asm volatile("push %0 ; popf"
|
|
|
|
: /* no output */
|
|
|
|
:"g" (flags)
|
|
|
|
:"memory", "cc");
|
2008-01-30 19:30:33 +07:00
|
|
|
}
|
|
|
|
|
|
|
|
static inline void native_irq_disable(void)
|
|
|
|
{
|
|
|
|
asm volatile("cli": : :"memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void native_irq_enable(void)
|
|
|
|
{
|
|
|
|
asm volatile("sti": : :"memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void native_safe_halt(void)
|
|
|
|
{
|
|
|
|
asm volatile("sti; hlt": : :"memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void native_halt(void)
|
|
|
|
{
|
|
|
|
asm volatile("hlt": : :"memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef CONFIG_PARAVIRT
|
|
|
|
#include <asm/paravirt.h>
|
|
|
|
#else
|
|
|
|
#ifndef __ASSEMBLY__
|
2011-07-02 10:04:36 +07:00
|
|
|
#include <linux/types.h>
|
2008-01-30 19:30:33 +07:00
|
|
|
|
2011-07-02 10:04:36 +07:00
|
|
|
static inline notrace unsigned long arch_local_save_flags(void)
|
2008-01-30 19:30:33 +07:00
|
|
|
{
|
|
|
|
return native_save_fl();
|
|
|
|
}
|
|
|
|
|
2011-07-02 10:04:36 +07:00
|
|
|
static inline notrace void arch_local_irq_restore(unsigned long flags)
|
2008-01-30 19:30:33 +07:00
|
|
|
{
|
|
|
|
native_restore_fl(flags);
|
|
|
|
}
|
|
|
|
|
2011-07-02 10:04:36 +07:00
|
|
|
static inline notrace void arch_local_irq_disable(void)
|
2008-01-30 19:30:33 +07:00
|
|
|
{
|
|
|
|
native_irq_disable();
|
|
|
|
}
|
|
|
|
|
2011-07-02 10:04:36 +07:00
|
|
|
static inline notrace void arch_local_irq_enable(void)
|
2008-01-30 19:30:33 +07:00
|
|
|
{
|
|
|
|
native_irq_enable();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used in the idle loop; sti takes one instruction cycle
|
|
|
|
* to complete:
|
|
|
|
*/
|
2010-10-07 20:08:55 +07:00
|
|
|
static inline void arch_safe_halt(void)
|
2008-01-30 19:30:33 +07:00
|
|
|
{
|
|
|
|
native_safe_halt();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Used when interrupts are already enabled or to
|
|
|
|
* shutdown the processor:
|
|
|
|
*/
|
|
|
|
static inline void halt(void)
|
|
|
|
{
|
|
|
|
native_halt();
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For spinlocks, etc:
|
|
|
|
*/
|
2011-07-02 10:04:36 +07:00
|
|
|
static inline notrace unsigned long arch_local_irq_save(void)
|
2008-01-30 19:30:33 +07:00
|
|
|
{
|
2010-10-07 20:08:55 +07:00
|
|
|
unsigned long flags = arch_local_save_flags();
|
|
|
|
arch_local_irq_disable();
|
2008-01-30 19:30:33 +07:00
|
|
|
return flags;
|
|
|
|
}
|
|
|
|
#else
|
|
|
|
|
|
|
|
#define ENABLE_INTERRUPTS(x) sti
|
|
|
|
#define DISABLE_INTERRUPTS(x) cli
|
|
|
|
|
|
|
|
#ifdef CONFIG_X86_64
|
2008-06-28 02:04:03 +07:00
|
|
|
#define SWAPGS swapgs
|
|
|
|
/*
|
|
|
|
* Currently paravirt can't handle swapgs nicely when we
|
|
|
|
* don't have a stack we can rely on (such as a user space
|
|
|
|
* stack). So we either find a way around these or just fault
|
|
|
|
* and emulate if a guest tries to call swapgs directly.
|
|
|
|
*
|
|
|
|
* Either way, this is a good way to document that we don't
|
|
|
|
* have a reliable stack. x86_64 only.
|
|
|
|
*/
|
2008-06-25 11:19:30 +07:00
|
|
|
#define SWAPGS_UNSAFE_STACK swapgs
|
2008-06-28 02:04:03 +07:00
|
|
|
|
|
|
|
#define PARAVIRT_ADJUST_EXCEPTION_FRAME /* */
|
|
|
|
|
2014-07-23 22:34:11 +07:00
|
|
|
#define INTERRUPT_RETURN jmp native_iret
|
2008-06-25 11:19:28 +07:00
|
|
|
#define USERGS_SYSRET64 \
|
|
|
|
swapgs; \
|
|
|
|
sysretq;
|
|
|
|
#define USERGS_SYSRET32 \
|
|
|
|
swapgs; \
|
|
|
|
sysretl
|
|
|
|
#define ENABLE_INTERRUPTS_SYSEXIT32 \
|
|
|
|
swapgs; \
|
|
|
|
sti; \
|
|
|
|
sysexit
|
|
|
|
|
2008-01-30 19:30:33 +07:00
|
|
|
#else
|
|
|
|
#define INTERRUPT_RETURN iret
|
2008-06-25 11:19:26 +07:00
|
|
|
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
|
2008-01-30 19:30:33 +07:00
|
|
|
#define GET_CR0_INTO_EAX movl %cr0, %eax
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* CONFIG_PARAVIRT */
|
|
|
|
|
|
|
|
#ifndef __ASSEMBLY__
|
2010-10-07 20:08:55 +07:00
|
|
|
static inline int arch_irqs_disabled_flags(unsigned long flags)
|
2008-01-30 19:30:33 +07:00
|
|
|
{
|
|
|
|
return !(flags & X86_EFLAGS_IF);
|
|
|
|
}
|
|
|
|
|
2010-10-07 20:08:55 +07:00
|
|
|
static inline int arch_irqs_disabled(void)
|
2008-01-30 19:30:33 +07:00
|
|
|
{
|
2010-10-07 20:08:55 +07:00
|
|
|
unsigned long flags = arch_local_save_flags();
|
2008-01-30 19:30:33 +07:00
|
|
|
|
2010-10-07 20:08:55 +07:00
|
|
|
return arch_irqs_disabled_flags(flags);
|
2008-01-30 19:30:33 +07:00
|
|
|
}
|
2015-03-26 03:14:26 +07:00
|
|
|
#endif /* !__ASSEMBLY__ */
|
2008-01-30 19:30:33 +07:00
|
|
|
|
2015-03-26 03:14:26 +07:00
|
|
|
#ifdef __ASSEMBLY__
|
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
|
|
# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
|
|
|
|
# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
|
2007-10-11 16:20:03 +07:00
|
|
|
#else
|
2015-03-26 03:14:26 +07:00
|
|
|
# define TRACE_IRQS_ON
|
|
|
|
# define TRACE_IRQS_OFF
|
|
|
|
#endif
|
|
|
|
#ifdef CONFIG_DEBUG_LOCK_ALLOC
|
|
|
|
# ifdef CONFIG_X86_64
|
|
|
|
# define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
|
|
|
|
# define ARCH_LOCKDEP_SYS_EXIT_IRQ \
|
2008-01-30 19:30:33 +07:00
|
|
|
TRACE_IRQS_ON; \
|
|
|
|
sti; \
|
x86/asm/entry/64: Always allocate a complete "struct pt_regs" on the kernel stack
The 64-bit entry code was using six stack slots less by not
saving/restoring registers which are callee-preserved according
to the C ABI, and was not allocating space for them.
Only when syscalls needed a complete "struct pt_regs" was
the complete area allocated and filled in.
As an additional twist, on interrupt entry a "slightly less
truncated pt_regs" trick is used, to make nested interrupt
stacks easier to unwind.
This proved to be a source of significant obfuscation and subtle
bugs. For example, 'stub_fork' had to pop the return address,
extend the struct, save registers, and push return address back.
Ugly. 'ia32_ptregs_common' pops return address and "returns" via
jmp insn, throwing a wrench into CPU return stack cache.
This patch changes the code to always allocate a complete
"struct pt_regs" on the kernel stack. The saving of registers
is still done lazily.
"Partial pt_regs" trick on interrupt stack is retained.
Macros which manipulate "struct pt_regs" on stack are reworked:
- ALLOC_PT_GPREGS_ON_STACK allocates the structure.
- SAVE_C_REGS saves to it those registers which are clobbered
by C code.
- SAVE_EXTRA_REGS saves to it all other registers.
- Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros
reverse it.
'ia32_ptregs_common', 'stub_fork' and friends lost their ugly dance
with the return pointer.
LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.
'error_entry' and 'save_paranoid' now use SAVE_C_REGS +
SAVE_EXTRA_REGS instead of having it open-coded yet again.
Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.
Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-2-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/b89763d354aa23e670b9bdf3a40ae320320a7c2e.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-02-27 05:40:27 +07:00
|
|
|
SAVE_EXTRA_REGS; \
|
2008-01-30 19:30:33 +07:00
|
|
|
LOCKDEP_SYS_EXIT; \
|
x86/asm/entry/64: Always allocate a complete "struct pt_regs" on the kernel stack
The 64-bit entry code was using six stack slots less by not
saving/restoring registers which are callee-preserved according
to the C ABI, and was not allocating space for them.
Only when syscalls needed a complete "struct pt_regs" was
the complete area allocated and filled in.
As an additional twist, on interrupt entry a "slightly less
truncated pt_regs" trick is used, to make nested interrupt
stacks easier to unwind.
This proved to be a source of significant obfuscation and subtle
bugs. For example, 'stub_fork' had to pop the return address,
extend the struct, save registers, and push return address back.
Ugly. 'ia32_ptregs_common' pops return address and "returns" via
jmp insn, throwing a wrench into CPU return stack cache.
This patch changes the code to always allocate a complete
"struct pt_regs" on the kernel stack. The saving of registers
is still done lazily.
"Partial pt_regs" trick on interrupt stack is retained.
Macros which manipulate "struct pt_regs" on stack are reworked:
- ALLOC_PT_GPREGS_ON_STACK allocates the structure.
- SAVE_C_REGS saves to it those registers which are clobbered
by C code.
- SAVE_EXTRA_REGS saves to it all other registers.
- Corresponding RESTORE_* and REMOVE_PT_GPREGS_FROM_STACK macros
reverse it.
'ia32_ptregs_common', 'stub_fork' and friends lost their ugly dance
with the return pointer.
LOAD_ARGS32 in ia32entry.S now uses symbolic stack offsets
instead of magic numbers.
'error_entry' and 'save_paranoid' now use SAVE_C_REGS +
SAVE_EXTRA_REGS instead of having it open-coded yet again.
Patch was run-tested: 64-bit executables, 32-bit executables,
strace works.
Timing tests did not show measurable difference in 32-bit
and 64-bit syscalls.
Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Will Drewry <wad@chromium.org>
Link: http://lkml.kernel.org/r/1423778052-21038-2-git-send-email-dvlasenk@redhat.com
Link: http://lkml.kernel.org/r/b89763d354aa23e670b9bdf3a40ae320320a7c2e.1424989793.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
2015-02-27 05:40:27 +07:00
|
|
|
RESTORE_EXTRA_REGS; \
|
2008-01-30 19:30:33 +07:00
|
|
|
cli; \
|
|
|
|
TRACE_IRQS_OFF;
|
2015-03-26 03:14:26 +07:00
|
|
|
# else
|
|
|
|
# define ARCH_LOCKDEP_SYS_EXIT \
|
2008-01-30 19:30:33 +07:00
|
|
|
pushl %eax; \
|
|
|
|
pushl %ecx; \
|
|
|
|
pushl %edx; \
|
|
|
|
call lockdep_sys_exit; \
|
|
|
|
popl %edx; \
|
|
|
|
popl %ecx; \
|
|
|
|
popl %eax;
|
2015-03-26 03:14:26 +07:00
|
|
|
# define ARCH_LOCKDEP_SYS_EXIT_IRQ
|
|
|
|
# endif
|
2008-01-30 19:30:33 +07:00
|
|
|
# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT
|
|
|
|
# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
|
|
|
|
# else
|
|
|
|
# define LOCKDEP_SYS_EXIT
|
|
|
|
# define LOCKDEP_SYS_EXIT_IRQ
|
|
|
|
# endif
|
|
|
|
#endif /* __ASSEMBLY__ */
|
2015-03-26 03:14:26 +07:00
|
|
|
|
2007-10-11 16:20:03 +07:00
|
|
|
#endif
|