mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 07:40:53 +07:00
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
Lots of overlapping changes. Also on the net-next side the XDP state management is handled more in the generic layers so undo the 'net' nfp fix which isn't applicable in net-next. Include a necessary change by Jakub Kicinski, with log message: ==================== cls_bpf no longer takes care of offload tracking. Make sure netdevsim performs necessary checks. This fixes a warning caused by TC trying to remove a filter it has not added. Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com> ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
This commit is contained in:
commit
fba961ab29
@ -13,7 +13,6 @@ Required properties:
|
||||
at25df321a
|
||||
at25df641
|
||||
at26df081a
|
||||
en25s64
|
||||
mr25h128
|
||||
mr25h256
|
||||
mr25h10
|
||||
@ -33,7 +32,6 @@ Required properties:
|
||||
s25fl008k
|
||||
s25fl064k
|
||||
sst25vf040b
|
||||
sst25wf040b
|
||||
m25p40
|
||||
m25p80
|
||||
m25p16
|
||||
|
@ -12,24 +12,30 @@ Required properties:
|
||||
- "fsl,imx53-ecspi" for SPI compatible with the one integrated on i.MX53 and later Soc
|
||||
- reg : Offset and length of the register set for the device
|
||||
- interrupts : Should contain CSPI/eCSPI interrupt
|
||||
- cs-gpios : Specifies the gpio pins to be used for chipselects.
|
||||
- clocks : Clock specifiers for both ipg and per clocks.
|
||||
- clock-names : Clock names should include both "ipg" and "per"
|
||||
See the clock consumer binding,
|
||||
Documentation/devicetree/bindings/clock/clock-bindings.txt
|
||||
- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
|
||||
Documentation/devicetree/bindings/dma/dma.txt
|
||||
- dma-names: DMA request names should include "tx" and "rx" if present.
|
||||
|
||||
Obsolete properties:
|
||||
- fsl,spi-num-chipselects : Contains the number of the chipselect
|
||||
Recommended properties:
|
||||
- cs-gpios : GPIOs to use as chip selects, see spi-bus.txt. While the native chip
|
||||
select lines can be used, they appear to always generate a pulse between each
|
||||
word of a transfer. Most use cases will require GPIO based chip selects to
|
||||
generate a valid transaction.
|
||||
|
||||
Optional properties:
|
||||
- num-cs : Number of total chip selects, see spi-bus.txt.
|
||||
- dmas: DMA specifiers for tx and rx dma. See the DMA client binding,
|
||||
Documentation/devicetree/bindings/dma/dma.txt.
|
||||
- dma-names: DMA request names, if present, should include "tx" and "rx".
|
||||
- fsl,spi-rdy-drctl: Integer, representing the value of DRCTL, the register
|
||||
controlling the SPI_READY handling. Note that to enable the DRCTL consideration,
|
||||
the SPI_READY mode-flag needs to be set too.
|
||||
Valid values are: 0 (disabled), 1 (edge-triggered burst) and 2 (level-triggered burst).
|
||||
|
||||
Obsolete properties:
|
||||
- fsl,spi-num-chipselects : Contains the number of the chipselect
|
||||
|
||||
Example:
|
||||
|
||||
ecspi@70010000 {
|
||||
|
2
Makefile
2
Makefile
@ -2,7 +2,7 @@
|
||||
VERSION = 4
|
||||
PATCHLEVEL = 15
|
||||
SUBLEVEL = 0
|
||||
EXTRAVERSION = -rc3
|
||||
EXTRAVERSION = -rc4
|
||||
NAME = Fearless Coyote
|
||||
|
||||
# *DOCUMENTATION*
|
||||
|
@ -85,7 +85,11 @@
|
||||
.pushsection .text.fixup,"ax"
|
||||
.align 4
|
||||
9001: mov r4, #-EFAULT
|
||||
#ifdef CONFIG_CPU_SW_DOMAIN_PAN
|
||||
ldr r5, [sp, #9*4] @ *err_ptr
|
||||
#else
|
||||
ldr r5, [sp, #8*4] @ *err_ptr
|
||||
#endif
|
||||
str r4, [r5]
|
||||
ldmia sp, {r1, r2} @ retrieve dst, len
|
||||
add r2, r2, r1
|
||||
|
@ -74,6 +74,9 @@ static void __hyp_text __debug_save_spe_nvhe(u64 *pmscr_el1)
|
||||
{
|
||||
u64 reg;
|
||||
|
||||
/* Clear pmscr in case of early return */
|
||||
*pmscr_el1 = 0;
|
||||
|
||||
/* SPE present on this CPU? */
|
||||
if (!cpuid_feature_extract_unsigned_field(read_sysreg(id_aa64dfr0_el1),
|
||||
ID_AA64DFR0_PMSVER_SHIFT))
|
||||
|
@ -35,7 +35,12 @@ struct thread_info {
|
||||
|
||||
/* thread information allocation */
|
||||
|
||||
#ifdef CONFIG_IRQSTACKS
|
||||
#define THREAD_SIZE_ORDER 2 /* PA-RISC requires at least 16k stack */
|
||||
#else
|
||||
#define THREAD_SIZE_ORDER 3 /* PA-RISC requires at least 32k stack */
|
||||
#endif
|
||||
|
||||
/* Be sure to hunt all references to this down when you change the size of
|
||||
* the kernel stack */
|
||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||
|
@ -878,9 +878,6 @@ ENTRY_CFI(syscall_exit_rfi)
|
||||
STREG %r19,PT_SR7(%r16)
|
||||
|
||||
intr_return:
|
||||
/* NOTE: Need to enable interrupts incase we schedule. */
|
||||
ssm PSW_SM_I, %r0
|
||||
|
||||
/* check for reschedule */
|
||||
mfctl %cr30,%r1
|
||||
LDREG TI_FLAGS(%r1),%r19 /* sched.h: TIF_NEED_RESCHED */
|
||||
@ -907,6 +904,11 @@ intr_check_sig:
|
||||
LDREG PT_IASQ1(%r16), %r20
|
||||
cmpib,COND(=),n 0,%r20,intr_restore /* backward */
|
||||
|
||||
/* NOTE: We need to enable interrupts if we have to deliver
|
||||
* signals. We used to do this earlier but it caused kernel
|
||||
* stack overflows. */
|
||||
ssm PSW_SM_I, %r0
|
||||
|
||||
copy %r0, %r25 /* long in_syscall = 0 */
|
||||
#ifdef CONFIG_64BIT
|
||||
ldo -16(%r30),%r29 /* Reference param save area */
|
||||
@ -958,6 +960,10 @@ intr_do_resched:
|
||||
cmpib,COND(=) 0, %r20, intr_do_preempt
|
||||
nop
|
||||
|
||||
/* NOTE: We need to enable interrupts if we schedule. We used
|
||||
* to do this earlier but it caused kernel stack overflows. */
|
||||
ssm PSW_SM_I, %r0
|
||||
|
||||
#ifdef CONFIG_64BIT
|
||||
ldo -16(%r30),%r29 /* Reference param save area */
|
||||
#endif
|
||||
|
@ -305,6 +305,7 @@ ENDPROC_CFI(os_hpmc)
|
||||
|
||||
|
||||
__INITRODATA
|
||||
.align 4
|
||||
.export os_hpmc_size
|
||||
os_hpmc_size:
|
||||
.word .os_hpmc_end-.os_hpmc
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include <linux/slab.h>
|
||||
#include <linux/kallsyms.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include <linux/uaccess.h>
|
||||
#include <asm/assembly.h>
|
||||
|
@ -16,9 +16,7 @@
|
||||
#include <linux/preempt.h>
|
||||
#include <linux/init.h>
|
||||
|
||||
#include <asm/processor.h>
|
||||
#include <asm/delay.h>
|
||||
|
||||
#include <asm/special_insns.h> /* for mfctl() */
|
||||
#include <asm/processor.h> /* for boot_cpu_data */
|
||||
|
||||
|
@ -763,7 +763,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
|
||||
func = (u8 *) __bpf_call_base + imm;
|
||||
|
||||
/* Save skb pointer if we need to re-cache skb data */
|
||||
if (bpf_helper_changes_pkt_data(func))
|
||||
if ((ctx->seen & SEEN_SKB) &&
|
||||
bpf_helper_changes_pkt_data(func))
|
||||
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
|
||||
|
||||
bpf_jit_emit_func_call(image, ctx, (u64)func);
|
||||
@ -772,7 +773,8 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
|
||||
PPC_MR(b2p[BPF_REG_0], 3);
|
||||
|
||||
/* refresh skb cache */
|
||||
if (bpf_helper_changes_pkt_data(func)) {
|
||||
if ((ctx->seen & SEEN_SKB) &&
|
||||
bpf_helper_changes_pkt_data(func)) {
|
||||
/* reload skb pointer to r3 */
|
||||
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
|
||||
bpf_jit_emit_skb_loads(image, ctx);
|
||||
|
@ -55,8 +55,7 @@ struct bpf_jit {
|
||||
#define SEEN_LITERAL 8 /* code uses literals */
|
||||
#define SEEN_FUNC 16 /* calls C functions */
|
||||
#define SEEN_TAIL_CALL 32 /* code uses tail calls */
|
||||
#define SEEN_SKB_CHANGE 64 /* code changes skb data */
|
||||
#define SEEN_REG_AX 128 /* code uses constant blinding */
|
||||
#define SEEN_REG_AX 64 /* code uses constant blinding */
|
||||
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
|
||||
|
||||
/*
|
||||
@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
|
||||
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
|
||||
REG_15, 152);
|
||||
}
|
||||
if (jit->seen & SEEN_SKB)
|
||||
if (jit->seen & SEEN_SKB) {
|
||||
emit_load_skb_data_hlen(jit);
|
||||
if (jit->seen & SEEN_SKB_CHANGE)
|
||||
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
|
||||
EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
|
||||
STK_OFF_SKBP);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
|
||||
EMIT2(0x0d00, REG_14, REG_W1);
|
||||
/* lgr %b0,%r2: load return value into %b0 */
|
||||
EMIT4(0xb9040000, BPF_REG_0, REG_2);
|
||||
if (bpf_helper_changes_pkt_data((void *)func)) {
|
||||
jit->seen |= SEEN_SKB_CHANGE;
|
||||
if ((jit->seen & SEEN_SKB) &&
|
||||
bpf_helper_changes_pkt_data((void *)func)) {
|
||||
/* lg %b1,ST_OFF_SKBP(%r15) */
|
||||
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
|
||||
REG_15, STK_OFF_SKBP);
|
||||
|
@ -113,7 +113,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
|
||||
if (!printk_ratelimit())
|
||||
return;
|
||||
|
||||
printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
|
||||
printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
|
||||
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
||||
tsk->comm, task_pid_nr(tsk), address,
|
||||
(void *)regs->pc, (void *)regs->u_regs[UREG_I7],
|
||||
|
@ -154,7 +154,7 @@ show_signal_msg(struct pt_regs *regs, int sig, int code,
|
||||
if (!printk_ratelimit())
|
||||
return;
|
||||
|
||||
printk("%s%s[%d]: segfault at %lx ip %p (rpc %p) sp %p error %x",
|
||||
printk("%s%s[%d]: segfault at %lx ip %px (rpc %px) sp %px error %x",
|
||||
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
||||
tsk->comm, task_pid_nr(tsk), address,
|
||||
(void *)regs->tpc, (void *)regs->u_regs[UREG_I7],
|
||||
|
@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||
u8 *func = ((u8 *)__bpf_call_base) + imm;
|
||||
|
||||
ctx->saw_call = true;
|
||||
if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
|
||||
emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
|
||||
|
||||
emit_call((u32 *)func, ctx);
|
||||
emit_nop(ctx);
|
||||
|
||||
emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
|
||||
|
||||
if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
|
||||
load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
|
||||
if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
|
||||
load_skb_regs(ctx, L7);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -150,7 +150,7 @@ static void show_segv_info(struct uml_pt_regs *regs)
|
||||
if (!printk_ratelimit())
|
||||
return;
|
||||
|
||||
printk("%s%s[%d]: segfault at %lx ip %p sp %p error %x",
|
||||
printk("%s%s[%d]: segfault at %lx ip %px sp %px error %x",
|
||||
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
||||
tsk->comm, task_pid_nr(tsk), FAULT_ADDRESS(*fi),
|
||||
(void *)UPT_IP(regs), (void *)UPT_SP(regs),
|
||||
|
@ -941,7 +941,8 @@ ENTRY(debug)
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
||||
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
||||
addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
jb .Ldebug_from_sysenter_stack
|
||||
@ -984,7 +985,8 @@ ENTRY(nmi)
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
||||
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
||||
addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
jb .Lnmi_from_sysenter_stack
|
||||
|
@ -140,6 +140,64 @@ END(native_usergs_sysret64)
|
||||
* with them due to bugs in both AMD and Intel CPUs.
|
||||
*/
|
||||
|
||||
.pushsection .entry_trampoline, "ax"
|
||||
|
||||
/*
|
||||
* The code in here gets remapped into cpu_entry_area's trampoline. This means
|
||||
* that the assembler and linker have the wrong idea as to where this code
|
||||
* lives (and, in fact, it's mapped more than once, so it's not even at a
|
||||
* fixed address). So we can't reference any symbols outside the entry
|
||||
* trampoline and expect it to work.
|
||||
*
|
||||
* Instead, we carefully abuse %rip-relative addressing.
|
||||
* _entry_trampoline(%rip) refers to the start of the remapped) entry
|
||||
* trampoline. We can thus find cpu_entry_area with this macro:
|
||||
*/
|
||||
|
||||
#define CPU_ENTRY_AREA \
|
||||
_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
|
||||
|
||||
/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
|
||||
#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \
|
||||
SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
|
||||
|
||||
ENTRY(entry_SYSCALL_64_trampoline)
|
||||
UNWIND_HINT_EMPTY
|
||||
swapgs
|
||||
|
||||
/* Stash the user RSP. */
|
||||
movq %rsp, RSP_SCRATCH
|
||||
|
||||
/* Load the top of the task stack into RSP */
|
||||
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
|
||||
|
||||
/* Start building the simulated IRET frame. */
|
||||
pushq $__USER_DS /* pt_regs->ss */
|
||||
pushq RSP_SCRATCH /* pt_regs->sp */
|
||||
pushq %r11 /* pt_regs->flags */
|
||||
pushq $__USER_CS /* pt_regs->cs */
|
||||
pushq %rcx /* pt_regs->ip */
|
||||
|
||||
/*
|
||||
* x86 lacks a near absolute jump, and we can't jump to the real
|
||||
* entry text with a relative jump. We could push the target
|
||||
* address and then use retq, but this destroys the pipeline on
|
||||
* many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
|
||||
* spill RDI and restore it in a second-stage trampoline.
|
||||
*/
|
||||
pushq %rdi
|
||||
movq $entry_SYSCALL_64_stage2, %rdi
|
||||
jmp *%rdi
|
||||
END(entry_SYSCALL_64_trampoline)
|
||||
|
||||
.popsection
|
||||
|
||||
ENTRY(entry_SYSCALL_64_stage2)
|
||||
UNWIND_HINT_EMPTY
|
||||
popq %rdi
|
||||
jmp entry_SYSCALL_64_after_hwframe
|
||||
END(entry_SYSCALL_64_stage2)
|
||||
|
||||
ENTRY(entry_SYSCALL_64)
|
||||
UNWIND_HINT_EMPTY
|
||||
/*
|
||||
@ -330,8 +388,24 @@ syscall_return_via_sysret:
|
||||
popq %rsi /* skip rcx */
|
||||
popq %rdx
|
||||
popq %rsi
|
||||
|
||||
/*
|
||||
* Now all regs are restored except RSP and RDI.
|
||||
* Save old stack pointer and switch to trampoline stack.
|
||||
*/
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
|
||||
|
||||
pushq RSP-RDI(%rdi) /* RSP */
|
||||
pushq (%rdi) /* RDI */
|
||||
|
||||
/*
|
||||
* We are on the trampoline stack. All regs except RDI are live.
|
||||
* We can do future final exit work right here.
|
||||
*/
|
||||
|
||||
popq %rdi
|
||||
movq RSP-ORIG_RAX(%rsp), %rsp
|
||||
popq %rsp
|
||||
USERGS_SYSRET64
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
@ -466,12 +540,13 @@ END(irq_entries_start)
|
||||
|
||||
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
pushfq
|
||||
testl $X86_EFLAGS_IF, (%rsp)
|
||||
pushq %rax
|
||||
SAVE_FLAGS(CLBR_RAX)
|
||||
testl $X86_EFLAGS_IF, %eax
|
||||
jz .Lokay_\@
|
||||
ud2
|
||||
.Lokay_\@:
|
||||
addq $8, %rsp
|
||||
popq %rax
|
||||
#endif
|
||||
.endm
|
||||
|
||||
@ -563,6 +638,13 @@ END(irq_entries_start)
|
||||
/* 0(%rsp): ~(interrupt number) */
|
||||
.macro interrupt func
|
||||
cld
|
||||
|
||||
testb $3, CS-ORIG_RAX(%rsp)
|
||||
jz 1f
|
||||
SWAPGS
|
||||
call switch_to_thread_stack
|
||||
1:
|
||||
|
||||
ALLOC_PT_GPREGS_ON_STACK
|
||||
SAVE_C_REGS
|
||||
SAVE_EXTRA_REGS
|
||||
@ -572,12 +654,8 @@ END(irq_entries_start)
|
||||
jz 1f
|
||||
|
||||
/*
|
||||
* IRQ from user mode. Switch to kernel gsbase and inform context
|
||||
* tracking that we're in kernel mode.
|
||||
*/
|
||||
SWAPGS
|
||||
|
||||
/*
|
||||
* IRQ from user mode.
|
||||
*
|
||||
* We need to tell lockdep that IRQs are off. We can't do this until
|
||||
* we fix gsbase, and we should do it before enter_from_user_mode
|
||||
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
|
||||
@ -630,10 +708,41 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||
ud2
|
||||
1:
|
||||
#endif
|
||||
SWAPGS
|
||||
POP_EXTRA_REGS
|
||||
POP_C_REGS
|
||||
addq $8, %rsp /* skip regs->orig_ax */
|
||||
popq %r11
|
||||
popq %r10
|
||||
popq %r9
|
||||
popq %r8
|
||||
popq %rax
|
||||
popq %rcx
|
||||
popq %rdx
|
||||
popq %rsi
|
||||
|
||||
/*
|
||||
* The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
|
||||
* Save old stack pointer and switch to trampoline stack.
|
||||
*/
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
|
||||
|
||||
/* Copy the IRET frame to the trampoline stack. */
|
||||
pushq 6*8(%rdi) /* SS */
|
||||
pushq 5*8(%rdi) /* RSP */
|
||||
pushq 4*8(%rdi) /* EFLAGS */
|
||||
pushq 3*8(%rdi) /* CS */
|
||||
pushq 2*8(%rdi) /* RIP */
|
||||
|
||||
/* Push user RDI on the trampoline stack. */
|
||||
pushq (%rdi)
|
||||
|
||||
/*
|
||||
* We are on the trampoline stack. All regs except RDI are live.
|
||||
* We can do future final exit work right here.
|
||||
*/
|
||||
|
||||
/* Restore RDI. */
|
||||
popq %rdi
|
||||
SWAPGS
|
||||
INTERRUPT_RETURN
|
||||
|
||||
|
||||
@ -829,7 +938,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||
/*
|
||||
* Exception entry points.
|
||||
*/
|
||||
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
|
||||
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
|
||||
|
||||
/*
|
||||
* Switch to the thread stack. This is called with the IRET frame and
|
||||
* orig_ax on the stack. (That is, RDI..R12 are not on the stack and
|
||||
* space has not been allocated for them.)
|
||||
*/
|
||||
ENTRY(switch_to_thread_stack)
|
||||
UNWIND_HINT_FUNC
|
||||
|
||||
pushq %rdi
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
|
||||
|
||||
pushq 7*8(%rdi) /* regs->ss */
|
||||
pushq 6*8(%rdi) /* regs->rsp */
|
||||
pushq 5*8(%rdi) /* regs->eflags */
|
||||
pushq 4*8(%rdi) /* regs->cs */
|
||||
pushq 3*8(%rdi) /* regs->ip */
|
||||
pushq 2*8(%rdi) /* regs->orig_ax */
|
||||
pushq 8(%rdi) /* return address */
|
||||
UNWIND_HINT_FUNC
|
||||
|
||||
movq (%rdi), %rdi
|
||||
ret
|
||||
END(switch_to_thread_stack)
|
||||
|
||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
||||
ENTRY(\sym)
|
||||
@ -848,11 +983,12 @@ ENTRY(\sym)
|
||||
|
||||
ALLOC_PT_GPREGS_ON_STACK
|
||||
|
||||
.if \paranoid
|
||||
.if \paranoid == 1
|
||||
.if \paranoid < 2
|
||||
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
|
||||
jnz 1f
|
||||
jnz .Lfrom_usermode_switch_stack_\@
|
||||
.endif
|
||||
|
||||
.if \paranoid
|
||||
call paranoid_entry
|
||||
.else
|
||||
call error_entry
|
||||
@ -894,20 +1030,15 @@ ENTRY(\sym)
|
||||
jmp error_exit
|
||||
.endif
|
||||
|
||||
.if \paranoid == 1
|
||||
.if \paranoid < 2
|
||||
/*
|
||||
* Paranoid entry from userspace. Switch stacks and treat it
|
||||
* Entry from userspace. Switch stacks and treat it
|
||||
* as a normal entry. This means that paranoid handlers
|
||||
* run in real process context if user_mode(regs).
|
||||
*/
|
||||
1:
|
||||
.Lfrom_usermode_switch_stack_\@:
|
||||
call error_entry
|
||||
|
||||
|
||||
movq %rsp, %rdi /* pt_regs pointer */
|
||||
call sync_regs
|
||||
movq %rax, %rsp /* switch stack */
|
||||
|
||||
movq %rsp, %rdi /* pt_regs pointer */
|
||||
|
||||
.if \has_error_code
|
||||
@ -1170,6 +1301,14 @@ ENTRY(error_entry)
|
||||
SWAPGS
|
||||
|
||||
.Lerror_entry_from_usermode_after_swapgs:
|
||||
/* Put us onto the real thread stack. */
|
||||
popq %r12 /* save return addr in %12 */
|
||||
movq %rsp, %rdi /* arg0 = pt_regs pointer */
|
||||
call sync_regs
|
||||
movq %rax, %rsp /* switch stack */
|
||||
ENCODE_FRAME_POINTER
|
||||
pushq %r12
|
||||
|
||||
/*
|
||||
* We need to tell lockdep that IRQs are off. We can't do this until
|
||||
* we fix gsbase, and we should do it before enter_from_user_mode
|
||||
|
@ -48,7 +48,7 @@
|
||||
*/
|
||||
ENTRY(entry_SYSENTER_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
SWAPGS_UNSAFE_STACK
|
||||
SWAPGS
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
/*
|
||||
@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
|
||||
*/
|
||||
movl %eax, %eax
|
||||
|
||||
/* Construct struct pt_regs on stack (iret frame is already on stack) */
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
|
||||
/* switch to thread stack expects orig_ax to be pushed */
|
||||
call switch_to_thread_stack
|
||||
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
pushq %rdx /* pt_regs->dx */
|
||||
|
@ -135,6 +135,8 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
|
||||
set_bit(bit, (unsigned long *)cpu_caps_set); \
|
||||
} while (0)
|
||||
|
||||
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
|
||||
|
||||
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
|
||||
/*
|
||||
* Static testing of CPU features. Used the same as boot_cpu_has().
|
||||
|
@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
|
||||
return this_cpu_ptr(&gdt_page)->gdt;
|
||||
}
|
||||
|
||||
/* Get the fixmap index for a specific processor */
|
||||
static inline unsigned int get_cpu_gdt_ro_index(int cpu)
|
||||
{
|
||||
return FIX_GDT_REMAP_BEGIN + cpu;
|
||||
}
|
||||
|
||||
/* Provide the fixmap address of the remapped GDT */
|
||||
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
|
||||
{
|
||||
unsigned int idx = get_cpu_gdt_ro_index(cpu);
|
||||
return (struct desc_struct *)__fix_to_virt(idx);
|
||||
return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
|
||||
}
|
||||
|
||||
/* Provide the current read-only GDT */
|
||||
@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
|
||||
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
|
||||
{
|
||||
struct desc_struct *d = get_cpu_gdt_rw(cpu);
|
||||
tss_desc tss;
|
||||
|
@ -44,6 +44,45 @@ extern unsigned long __FIXADDR_TOP;
|
||||
PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* cpu_entry_area is a percpu region in the fixmap that contains things
|
||||
* needed by the CPU and early entry/exit code. Real types aren't used
|
||||
* for all fields here to avoid circular header dependencies.
|
||||
*
|
||||
* Every field is a virtual alias of some other allocated backing store.
|
||||
* There is no direct allocation of a struct cpu_entry_area.
|
||||
*/
|
||||
struct cpu_entry_area {
|
||||
char gdt[PAGE_SIZE];
|
||||
|
||||
/*
|
||||
* The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
|
||||
* a a read-only guard page.
|
||||
*/
|
||||
struct SYSENTER_stack_page SYSENTER_stack_page;
|
||||
|
||||
/*
|
||||
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
|
||||
* we need task switches to work, and task switches write to the TSS.
|
||||
*/
|
||||
struct tss_struct tss;
|
||||
|
||||
char entry_trampoline[PAGE_SIZE];
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Exception stacks used for IST entries.
|
||||
*
|
||||
* In the future, this should have a separate slot for each stack
|
||||
* with guard pages between them.
|
||||
*/
|
||||
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
|
||||
#endif
|
||||
};
|
||||
|
||||
#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
|
||||
|
||||
extern void setup_cpu_entry_areas(void);
|
||||
|
||||
/*
|
||||
* Here we define all the compile-time 'special' virtual
|
||||
@ -101,8 +140,8 @@ enum fixed_addresses {
|
||||
FIX_LNW_VRTC,
|
||||
#endif
|
||||
/* Fixmap entries to remap the GDTs, one per processor. */
|
||||
FIX_GDT_REMAP_BEGIN,
|
||||
FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
|
||||
FIX_CPU_ENTRY_AREA_TOP,
|
||||
FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI_GHES
|
||||
/* Used for GHES mapping from assorted contexts */
|
||||
@ -191,5 +230,30 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
|
||||
void __early_set_fixmap(enum fixed_addresses idx,
|
||||
phys_addr_t phys, pgprot_t flags);
|
||||
|
||||
static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
|
||||
|
||||
return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
|
||||
}
|
||||
|
||||
#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \
|
||||
BUILD_BUG_ON(offset % PAGE_SIZE != 0); \
|
||||
__get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \
|
||||
})
|
||||
|
||||
#define get_cpu_entry_area_index(cpu, field) \
|
||||
__get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
|
||||
|
||||
static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
||||
{
|
||||
return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
|
||||
}
|
||||
|
||||
static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
|
||||
{
|
||||
return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
|
||||
}
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif /* _ASM_X86_FIXMAP_H */
|
||||
|
@ -20,16 +20,7 @@
|
||||
#ifndef _ASM_X86_HYPERVISOR_H
|
||||
#define _ASM_X86_HYPERVISOR_H
|
||||
|
||||
#ifdef CONFIG_HYPERVISOR_GUEST
|
||||
|
||||
#include <asm/kvm_para.h>
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
/*
|
||||
* x86 hypervisor information
|
||||
*/
|
||||
|
||||
/* x86 hypervisor types */
|
||||
enum x86_hypervisor_type {
|
||||
X86_HYPER_NATIVE = 0,
|
||||
X86_HYPER_VMWARE,
|
||||
@ -39,6 +30,12 @@ enum x86_hypervisor_type {
|
||||
X86_HYPER_KVM,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HYPERVISOR_GUEST
|
||||
|
||||
#include <asm/kvm_para.h>
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
struct hypervisor_x86 {
|
||||
/* Hypervisor name */
|
||||
const char *name;
|
||||
@ -58,7 +55,15 @@ struct hypervisor_x86 {
|
||||
|
||||
extern enum x86_hypervisor_type x86_hyper_type;
|
||||
extern void init_hypervisor_platform(void);
|
||||
static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
|
||||
{
|
||||
return x86_hyper_type == type;
|
||||
}
|
||||
#else
|
||||
static inline void init_hypervisor_platform(void) { }
|
||||
static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
|
||||
{
|
||||
return type == X86_HYPER_NATIVE;
|
||||
}
|
||||
#endif /* CONFIG_HYPERVISOR_GUEST */
|
||||
#endif /* _ASM_X86_HYPERVISOR_H */
|
||||
|
@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
|
||||
swapgs; \
|
||||
sysretl
|
||||
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
#define SAVE_FLAGS(x) pushfq; popq %rax
|
||||
#endif
|
||||
#else
|
||||
#define INTERRUPT_RETURN iret
|
||||
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
|
||||
|
@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
|
||||
extern int __must_check __die(const char *, struct pt_regs *, long);
|
||||
extern void show_stack_regs(struct pt_regs *regs);
|
||||
extern void __show_regs(struct pt_regs *regs, int all);
|
||||
extern void show_iret_regs(struct pt_regs *regs);
|
||||
extern unsigned long oops_begin(void);
|
||||
extern void oops_end(unsigned long, struct pt_regs *, int signr);
|
||||
|
||||
|
@ -927,6 +927,15 @@ extern void default_banner(void);
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
|
||||
CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
|
||||
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
#define SAVE_FLAGS(clobbers) \
|
||||
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
|
||||
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
|
||||
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
|
||||
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
@ -163,9 +163,9 @@ enum cpuid_regs_idx {
|
||||
extern struct cpuinfo_x86 boot_cpu_data;
|
||||
extern struct cpuinfo_x86 new_cpu_data;
|
||||
|
||||
extern struct tss_struct doublefault_tss;
|
||||
extern __u32 cpu_caps_cleared[NCAPINTS];
|
||||
extern __u32 cpu_caps_set[NCAPINTS];
|
||||
extern struct x86_hw_tss doublefault_tss;
|
||||
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
|
||||
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
|
||||
@ -253,6 +253,11 @@ static inline void load_cr3(pgd_t *pgdir)
|
||||
write_cr3(__sme_pa(pgdir));
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that while the legacy 'TSS' name comes from 'Task State Segment',
|
||||
* on modern x86 CPUs the TSS also holds information important to 64-bit mode,
|
||||
* unrelated to the task-switch mechanism:
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
/* This is the TSS defined by the hardware. */
|
||||
struct x86_hw_tss {
|
||||
@ -305,7 +310,13 @@ struct x86_hw_tss {
|
||||
struct x86_hw_tss {
|
||||
u32 reserved1;
|
||||
u64 sp0;
|
||||
|
||||
/*
|
||||
* We store cpu_current_top_of_stack in sp1 so it's always accessible.
|
||||
* Linux does not use ring 1, so sp1 is not otherwise needed.
|
||||
*/
|
||||
u64 sp1;
|
||||
|
||||
u64 sp2;
|
||||
u64 reserved2;
|
||||
u64 ist[7];
|
||||
@ -323,12 +334,22 @@ struct x86_hw_tss {
|
||||
#define IO_BITMAP_BITS 65536
|
||||
#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
|
||||
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
|
||||
#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
|
||||
#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
|
||||
#define INVALID_IO_BITMAP_OFFSET 0x8000
|
||||
|
||||
struct SYSENTER_stack {
|
||||
unsigned long words[64];
|
||||
};
|
||||
|
||||
struct SYSENTER_stack_page {
|
||||
struct SYSENTER_stack stack;
|
||||
} __aligned(PAGE_SIZE);
|
||||
|
||||
struct tss_struct {
|
||||
/*
|
||||
* The hardware state:
|
||||
* The fixed hardware portion. This must not cross a page boundary
|
||||
* at risk of violating the SDM's advice and potentially triggering
|
||||
* errata.
|
||||
*/
|
||||
struct x86_hw_tss x86_tss;
|
||||
|
||||
@ -339,18 +360,9 @@ struct tss_struct {
|
||||
* be within the limit.
|
||||
*/
|
||||
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
|
||||
} __aligned(PAGE_SIZE);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Space for the temporary SYSENTER stack.
|
||||
*/
|
||||
unsigned long SYSENTER_stack_canary;
|
||||
unsigned long SYSENTER_stack[64];
|
||||
#endif
|
||||
|
||||
} ____cacheline_aligned;
|
||||
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
|
||||
DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
|
||||
|
||||
/*
|
||||
* sizeof(unsigned long) coming from an extra "long" at the end
|
||||
@ -364,6 +376,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||
#else
|
||||
/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
|
||||
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -523,7 +538,7 @@ static inline void native_set_iopl_mask(unsigned mask)
|
||||
static inline void
|
||||
native_load_sp0(unsigned long sp0)
|
||||
{
|
||||
this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
|
||||
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
|
||||
}
|
||||
|
||||
static inline void native_swapgs(void)
|
||||
@ -535,12 +550,12 @@ static inline void native_swapgs(void)
|
||||
|
||||
static inline unsigned long current_top_of_stack(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
|
||||
#else
|
||||
/* sp0 on x86_32 is special in and around vm86 mode. */
|
||||
/*
|
||||
* We can't read directly from tss.sp0: sp0 on x86_32 is special in
|
||||
* and around vm86 mode and sp0 on x86_64 is special because of the
|
||||
* entry trampoline.
|
||||
*/
|
||||
return this_cpu_read_stable(cpu_current_top_of_stack);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool on_thread_stack(void)
|
||||
|
@ -16,6 +16,7 @@ enum stack_type {
|
||||
STACK_TYPE_TASK,
|
||||
STACK_TYPE_IRQ,
|
||||
STACK_TYPE_SOFTIRQ,
|
||||
STACK_TYPE_SYSENTER,
|
||||
STACK_TYPE_EXCEPTION,
|
||||
STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
|
||||
};
|
||||
@ -28,6 +29,8 @@ struct stack_info {
|
||||
bool in_task_stack(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info);
|
||||
|
||||
bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
|
||||
|
||||
int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info, unsigned long *visit_mask);
|
||||
|
||||
|
@ -79,10 +79,10 @@ do { \
|
||||
static inline void refresh_sysenter_cs(struct thread_struct *thread)
|
||||
{
|
||||
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
|
||||
if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
|
||||
if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
|
||||
return;
|
||||
|
||||
this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
|
||||
this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||
}
|
||||
#endif
|
||||
@ -90,9 +90,11 @@ static inline void refresh_sysenter_cs(struct thread_struct *thread)
|
||||
/* This is used when switching tasks or entering/exiting vm86 mode. */
|
||||
static inline void update_sp0(struct task_struct *task)
|
||||
{
|
||||
/* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
|
||||
#ifdef CONFIG_X86_32
|
||||
load_sp0(task->thread.sp0);
|
||||
#else
|
||||
if (static_cpu_has(X86_FEATURE_XENPV))
|
||||
load_sp0(task_top_of_stack(task));
|
||||
#endif
|
||||
}
|
||||
|
@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
|
||||
# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
|
||||
dotraplinkage void do_stack_segment(struct pt_regs *, long);
|
||||
#ifdef CONFIG_X86_64
|
||||
dotraplinkage void do_double_fault(struct pt_regs *, long);
|
||||
asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
|
||||
#endif
|
||||
dotraplinkage void do_general_protection(struct pt_regs *, long);
|
||||
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
|
||||
|
@ -7,6 +7,9 @@
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
|
||||
#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
|
||||
|
||||
struct unwind_state {
|
||||
struct stack_info stack_info;
|
||||
unsigned long stack_mask;
|
||||
@ -52,6 +55,10 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
}
|
||||
|
||||
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||
/*
|
||||
* WARNING: The entire pt_regs may not be safe to dereference. In some cases,
|
||||
* only the iret frame registers are accessible. Use with caution!
|
||||
*/
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
{
|
||||
if (unwind_done(state))
|
||||
|
@ -93,4 +93,10 @@ void common(void) {
|
||||
|
||||
BLANK();
|
||||
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
|
||||
|
||||
/* Layout info for cpu_entry_area */
|
||||
OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
|
||||
OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
|
||||
OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
|
||||
DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
|
||||
}
|
||||
|
@ -47,13 +47,8 @@ void foo(void)
|
||||
BLANK();
|
||||
|
||||
/* Offset from the sysenter stack to tss.sp0 */
|
||||
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
|
||||
offsetofend(struct tss_struct, SYSENTER_stack));
|
||||
|
||||
/* Offset from cpu_tss to SYSENTER_stack */
|
||||
OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
|
||||
/* Size of SYSENTER_stack */
|
||||
DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
|
||||
DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
|
||||
offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
BLANK();
|
||||
|
@ -23,6 +23,9 @@ int main(void)
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
|
||||
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
|
||||
#endif
|
||||
BLANK();
|
||||
#endif
|
||||
|
||||
@ -63,6 +66,7 @@ int main(void)
|
||||
|
||||
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
|
||||
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
|
||||
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
|
||||
BLANK();
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
|
@ -476,8 +476,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
|
||||
return NULL; /* Not found */
|
||||
}
|
||||
|
||||
__u32 cpu_caps_cleared[NCAPINTS];
|
||||
__u32 cpu_caps_set[NCAPINTS];
|
||||
__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
|
||||
__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
|
||||
|
||||
void load_percpu_segment(int cpu)
|
||||
{
|
||||
@ -490,27 +490,116 @@ void load_percpu_segment(int cpu)
|
||||
load_stack_canary_segment();
|
||||
}
|
||||
|
||||
/* Setup the fixmap mapping only once per-processor */
|
||||
static inline void setup_fixmap_gdt(int cpu)
|
||||
#ifdef CONFIG_X86_32
|
||||
/* The 32-bit entry code needs to find cpu_entry_area. */
|
||||
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Special IST stacks which the CPU switches to when it calls
|
||||
* an IST-marked descriptor entry. Up to 7 stacks (hardware
|
||||
* limit), all of them are 4K, except the debug stack which
|
||||
* is 8K.
|
||||
*/
|
||||
static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
||||
#endif
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
|
||||
SYSENTER_stack_storage);
|
||||
|
||||
static void __init
|
||||
set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
|
||||
{
|
||||
for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
|
||||
__set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
|
||||
}
|
||||
|
||||
/* Setup the fixmap mappings only once per-processor */
|
||||
static void __init setup_cpu_entry_area(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
/* On 64-bit systems, we use a read-only fixmap GDT. */
|
||||
pgprot_t prot = PAGE_KERNEL_RO;
|
||||
extern char _entry_trampoline[];
|
||||
|
||||
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
|
||||
pgprot_t gdt_prot = PAGE_KERNEL_RO;
|
||||
pgprot_t tss_prot = PAGE_KERNEL_RO;
|
||||
#else
|
||||
/*
|
||||
* On native 32-bit systems, the GDT cannot be read-only because
|
||||
* our double fault handler uses a task gate, and entering through
|
||||
* a task gate needs to change an available TSS to busy. If the GDT
|
||||
* is read-only, that will triple fault.
|
||||
* a task gate needs to change an available TSS to busy. If the
|
||||
* GDT is read-only, that will triple fault. The TSS cannot be
|
||||
* read-only because the CPU writes to it on task switches.
|
||||
*
|
||||
* On Xen PV, the GDT must be read-only because the hypervisor requires
|
||||
* it.
|
||||
* On Xen PV, the GDT must be read-only because the hypervisor
|
||||
* requires it.
|
||||
*/
|
||||
pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
|
||||
pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
|
||||
PAGE_KERNEL_RO : PAGE_KERNEL;
|
||||
pgprot_t tss_prot = PAGE_KERNEL;
|
||||
#endif
|
||||
|
||||
__set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
|
||||
__set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
|
||||
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
|
||||
per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
|
||||
PAGE_KERNEL);
|
||||
|
||||
/*
|
||||
* The Intel SDM says (Volume 3, 7.2.1):
|
||||
*
|
||||
* Avoid placing a page boundary in the part of the TSS that the
|
||||
* processor reads during a task switch (the first 104 bytes). The
|
||||
* processor may not correctly perform address translations if a
|
||||
* boundary occurs in this area. During a task switch, the processor
|
||||
* reads and writes into the first 104 bytes of each TSS (using
|
||||
* contiguous physical addresses beginning with the physical address
|
||||
* of the first byte of the TSS). So, after TSS access begins, if
|
||||
* part of the 104 bytes is not physically contiguous, the processor
|
||||
* will access incorrect information without generating a page-fault
|
||||
* exception.
|
||||
*
|
||||
* There are also a lot of errata involving the TSS spanning a page
|
||||
* boundary. Assert that we're not doing that.
|
||||
*/
|
||||
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
|
||||
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
|
||||
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
|
||||
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
|
||||
&per_cpu(cpu_tss_rw, cpu),
|
||||
sizeof(struct tss_struct) / PAGE_SIZE,
|
||||
tss_prot);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) !=
|
||||
sizeof(((struct cpu_entry_area *)0)->exception_stacks));
|
||||
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
|
||||
&per_cpu(exception_stacks, cpu),
|
||||
sizeof(exception_stacks) / PAGE_SIZE,
|
||||
PAGE_KERNEL);
|
||||
|
||||
__set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
|
||||
__pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init setup_cpu_entry_areas(void)
|
||||
{
|
||||
unsigned int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
setup_cpu_entry_area(cpu);
|
||||
}
|
||||
|
||||
/* Load the original GDT from the per-cpu structure */
|
||||
@ -747,7 +836,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NCAPINTS; i++) {
|
||||
for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
|
||||
c->x86_capability[i] &= ~cpu_caps_cleared[i];
|
||||
c->x86_capability[i] |= cpu_caps_set[i];
|
||||
}
|
||||
@ -1250,7 +1339,7 @@ void enable_sep_cpu(void)
|
||||
return;
|
||||
|
||||
cpu = get_cpu();
|
||||
tss = &per_cpu(cpu_tss, cpu);
|
||||
tss = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
/*
|
||||
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
|
||||
@ -1259,11 +1348,7 @@ void enable_sep_cpu(void)
|
||||
|
||||
tss->x86_tss.ss1 = __KERNEL_CS;
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
|
||||
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP,
|
||||
(unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
|
||||
0);
|
||||
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
|
||||
|
||||
put_cpu();
|
||||
@ -1357,25 +1442,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
|
||||
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
||||
EXPORT_PER_CPU_SYMBOL(__preempt_count);
|
||||
|
||||
/*
|
||||
* Special IST stacks which the CPU switches to when it calls
|
||||
* an IST-marked descriptor entry. Up to 7 stacks (hardware
|
||||
* limit), all of them are 4K, except the debug stack which
|
||||
* is 8K.
|
||||
*/
|
||||
static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
||||
|
||||
/* May not be marked __init: used by software suspend */
|
||||
void syscall_init(void)
|
||||
{
|
||||
extern char _entry_trampoline[];
|
||||
extern char entry_SYSCALL_64_trampoline[];
|
||||
|
||||
int cpu = smp_processor_id();
|
||||
unsigned long SYSCALL64_entry_trampoline =
|
||||
(unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
|
||||
(entry_SYSCALL_64_trampoline - _entry_trampoline);
|
||||
|
||||
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
|
||||
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
|
||||
wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
|
||||
@ -1386,7 +1465,7 @@ void syscall_init(void)
|
||||
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
|
||||
*/
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
|
||||
#else
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
|
||||
@ -1530,7 +1609,7 @@ void cpu_init(void)
|
||||
if (cpu)
|
||||
load_ucode_ap();
|
||||
|
||||
t = &per_cpu(cpu_tss, cpu);
|
||||
t = &per_cpu(cpu_tss_rw, cpu);
|
||||
oist = &per_cpu(orig_ist, cpu);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
@ -1569,7 +1648,7 @@ void cpu_init(void)
|
||||
* set up and load the per-CPU TSS
|
||||
*/
|
||||
if (!oist->ist[0]) {
|
||||
char *estacks = per_cpu(exception_stacks, cpu);
|
||||
char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
|
||||
|
||||
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
||||
estacks += exception_stack_sizes[v];
|
||||
@ -1580,7 +1659,7 @@ void cpu_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
||||
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
||||
|
||||
/*
|
||||
* <= is required because the CPU will access up to
|
||||
@ -1596,11 +1675,12 @@ void cpu_init(void)
|
||||
enter_lazy_tlb(&init_mm, me);
|
||||
|
||||
/*
|
||||
* Initialize the TSS. Don't bother initializing sp0, as the initial
|
||||
* task never enters user mode.
|
||||
* Initialize the TSS. sp0 points to the entry trampoline stack
|
||||
* regardless of what task is running.
|
||||
*/
|
||||
set_tss_desc(cpu, t);
|
||||
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
|
||||
load_TR_desc();
|
||||
load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
|
||||
|
||||
load_mm_ldt(&init_mm);
|
||||
|
||||
@ -1612,7 +1692,6 @@ void cpu_init(void)
|
||||
if (is_uv_system())
|
||||
uv_cpu_init();
|
||||
|
||||
setup_fixmap_gdt(cpu);
|
||||
load_fixmap_gdt(cpu);
|
||||
}
|
||||
|
||||
@ -1622,7 +1701,7 @@ void cpu_init(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct task_struct *curr = current;
|
||||
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
||||
struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
wait_for_master_cpu(cpu);
|
||||
|
||||
@ -1657,12 +1736,12 @@ void cpu_init(void)
|
||||
* Initialize the TSS. Don't bother initializing sp0, as the initial
|
||||
* task never enters user mode.
|
||||
*/
|
||||
set_tss_desc(cpu, t);
|
||||
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
|
||||
load_TR_desc();
|
||||
|
||||
load_mm_ldt(&init_mm);
|
||||
|
||||
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
||||
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
||||
|
||||
#ifdef CONFIG_DOUBLEFAULT
|
||||
/* Set up doublefault TSS pointer in the GDT */
|
||||
@ -1674,7 +1753,6 @@ void cpu_init(void)
|
||||
|
||||
fpu__init_cpu();
|
||||
|
||||
setup_fixmap_gdt(cpu);
|
||||
load_fixmap_gdt(cpu);
|
||||
}
|
||||
#endif
|
||||
|
@ -50,8 +50,7 @@ static void doublefault_fn(void)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
struct tss_struct doublefault_tss __cacheline_aligned = {
|
||||
.x86_tss = {
|
||||
struct x86_hw_tss doublefault_tss __cacheline_aligned = {
|
||||
.sp0 = STACK_START,
|
||||
.ss0 = __KERNEL_DS,
|
||||
.ldt = 0,
|
||||
@ -68,7 +67,6 @@ struct tss_struct doublefault_tss __cacheline_aligned = {
|
||||
.fs = __KERNEL_PERCPU,
|
||||
|
||||
.__cr3 = __pa_nodebug(swapper_pg_dir),
|
||||
}
|
||||
};
|
||||
|
||||
/* dummy for do_double_fault() call */
|
||||
|
@ -43,6 +43,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
|
||||
|
||||
void *begin = ss;
|
||||
void *end = ss + 1;
|
||||
|
||||
if ((void *)stack < begin || (void *)stack >= end)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_SYSENTER;
|
||||
info->begin = begin;
|
||||
info->end = end;
|
||||
info->next_sp = NULL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void printk_stack_address(unsigned long address, int reliable,
|
||||
char *log_lvl)
|
||||
{
|
||||
@ -50,6 +68,28 @@ static void printk_stack_address(unsigned long address, int reliable,
|
||||
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
|
||||
}
|
||||
|
||||
void show_iret_regs(struct pt_regs *regs)
|
||||
{
|
||||
printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
|
||||
printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
|
||||
regs->sp, regs->flags);
|
||||
}
|
||||
|
||||
static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
|
||||
{
|
||||
if (on_stack(info, regs, sizeof(*regs)))
|
||||
__show_regs(regs, 0);
|
||||
else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
|
||||
IRET_FRAME_SIZE)) {
|
||||
/*
|
||||
* When an interrupt or exception occurs in entry code, the
|
||||
* full pt_regs might not have been saved yet. In that case
|
||||
* just print the iret frame.
|
||||
*/
|
||||
show_iret_regs(regs);
|
||||
}
|
||||
}
|
||||
|
||||
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, char *log_lvl)
|
||||
{
|
||||
@ -71,31 +111,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
* - task stack
|
||||
* - interrupt stack
|
||||
* - HW exception stacks (double fault, nmi, debug, mce)
|
||||
* - SYSENTER stack
|
||||
*
|
||||
* x86-32 can have up to three stacks:
|
||||
* x86-32 can have up to four stacks:
|
||||
* - task stack
|
||||
* - softirq stack
|
||||
* - hardirq stack
|
||||
* - SYSENTER stack
|
||||
*/
|
||||
for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
|
||||
const char *stack_name;
|
||||
|
||||
if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
|
||||
/*
|
||||
* If we overflowed the task stack into a guard page, jump back
|
||||
* to the bottom of the usable stack.
|
||||
* We weren't on a valid stack. It's possible that
|
||||
* we overflowed a valid stack into a guard page.
|
||||
* See if the next page up is valid so that we can
|
||||
* generate some kind of backtrace if this happens.
|
||||
*/
|
||||
if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
|
||||
stack = task_stack_page(task);
|
||||
|
||||
stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
|
||||
if (get_stack_info(stack, task, &stack_info, &visit_mask))
|
||||
break;
|
||||
}
|
||||
|
||||
stack_name = stack_type_name(stack_info.type);
|
||||
if (stack_name)
|
||||
printk("%s <%s>\n", log_lvl, stack_name);
|
||||
|
||||
if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
|
||||
__show_regs(regs, 0);
|
||||
if (regs)
|
||||
show_regs_safe(&stack_info, regs);
|
||||
|
||||
/*
|
||||
* Scan the stack, printing any text addresses we find. At the
|
||||
@ -119,7 +163,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
|
||||
/*
|
||||
* Don't print regs->ip again if it was already printed
|
||||
* by __show_regs() below.
|
||||
* by show_regs_safe() below.
|
||||
*/
|
||||
if (regs && stack == ®s->ip)
|
||||
goto next;
|
||||
@ -155,8 +199,8 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
|
||||
/* if the frame has entry regs, print them */
|
||||
regs = unwind_get_entry_regs(&state);
|
||||
if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
|
||||
__show_regs(regs, 0);
|
||||
if (regs)
|
||||
show_regs_safe(&stack_info, regs);
|
||||
}
|
||||
|
||||
if (stack_name)
|
||||
|
@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
|
||||
if (type == STACK_TYPE_SOFTIRQ)
|
||||
return "SOFTIRQ";
|
||||
|
||||
if (type == STACK_TYPE_SYSENTER)
|
||||
return "SYSENTER";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
if (task != current)
|
||||
goto unknown;
|
||||
|
||||
if (in_sysenter_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
if (in_hardirq_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
|
@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type)
|
||||
if (type == STACK_TYPE_IRQ)
|
||||
return "IRQ";
|
||||
|
||||
if (type == STACK_TYPE_SYSENTER)
|
||||
return "SYSENTER";
|
||||
|
||||
if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
|
||||
return exception_stack_names[type - STACK_TYPE_EXCEPTION];
|
||||
|
||||
@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
if (in_irq_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
if (in_sysenter_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
goto unknown;
|
||||
|
||||
recursion_check:
|
||||
|
@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
||||
* because the ->io_bitmap_max value must match the bitmap
|
||||
* contents:
|
||||
*/
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
tss = &per_cpu(cpu_tss_rw, get_cpu());
|
||||
|
||||
if (turn_on)
|
||||
bitmap_clear(t->io_bitmap_ptr, from, num);
|
||||
|
@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
|
||||
/* high bit used in ret_from_ code */
|
||||
unsigned vector = ~regs->orig_ax;
|
||||
|
||||
/*
|
||||
* NB: Unlike exception entries, IRQ entries do not reliably
|
||||
* handle context tracking in the low-level entry code. This is
|
||||
* because syscall entries execute briefly with IRQs on before
|
||||
* updating context tracking state, so we can take an IRQ from
|
||||
* kernel mode with CONTEXT_USER. The low-level entry code only
|
||||
* updates the context if we came from user mode, so we won't
|
||||
* switch to CONTEXT_KERNEL. We'll fix that once the syscall
|
||||
* code is cleaned up enough that we can cleanly defer enabling
|
||||
* IRQs.
|
||||
*/
|
||||
|
||||
entering_irq();
|
||||
|
||||
/* entering_irq() tells RCU that we're not quiescent. Check it. */
|
||||
|
@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
|
||||
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
|
||||
return;
|
||||
|
||||
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
|
||||
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
|
||||
current->comm, curbase, regs->sp,
|
||||
irq_stack_top, irq_stack_bottom,
|
||||
estack_top, estack_bottom);
|
||||
estack_top, estack_bottom, (void *)regs->ip);
|
||||
|
||||
if (sysctl_panic_on_stackoverflow)
|
||||
panic("low stack detected by irq handler - check messages\n");
|
||||
|
@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
|
||||
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
|
||||
DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
|
||||
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
|
||||
|
||||
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
|
||||
@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
PATCH_SITE(pv_mmu_ops, read_cr2);
|
||||
PATCH_SITE(pv_mmu_ops, read_cr3);
|
||||
PATCH_SITE(pv_mmu_ops, write_cr3);
|
||||
PATCH_SITE(pv_mmu_ops, flush_tlb_single);
|
||||
PATCH_SITE(pv_cpu_ops, wbinvd);
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
|
||||
|
@ -47,7 +47,7 @@
|
||||
* section. Since TSS's are completely CPU-local, we want them
|
||||
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
|
||||
*/
|
||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
|
||||
.x86_tss = {
|
||||
/*
|
||||
* .sp0 is only used when entering ring 0 from a lower
|
||||
@ -56,6 +56,16 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
||||
* Poison it.
|
||||
*/
|
||||
.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* .sp1 is cpu_current_top_of_stack. The init task never
|
||||
* runs user code, but cpu_current_top_of_stack should still
|
||||
* be well defined before the first context switch.
|
||||
*/
|
||||
.sp1 = TOP_OF_INIT_STACK,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
.ss0 = __KERNEL_DS,
|
||||
.ss1 = __KERNEL_CS,
|
||||
@ -71,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
||||
*/
|
||||
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
|
||||
#endif
|
||||
#ifdef CONFIG_X86_32
|
||||
.SYSENTER_stack_canary = STACK_END_MAGIC,
|
||||
#endif
|
||||
};
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_tss);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
|
||||
|
||||
DEFINE_PER_CPU(bool, __tss_limit_invalid);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
|
||||
@ -104,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
|
||||
struct fpu *fpu = &t->fpu;
|
||||
|
||||
if (bp) {
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
|
||||
|
||||
t->io_bitmap_ptr = NULL;
|
||||
clear_thread_flag(TIF_IO_BITMAP);
|
||||
|
@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
struct fpu *prev_fpu = &prev->fpu;
|
||||
struct fpu *next_fpu = &next->fpu;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
||||
|
||||
|
@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
unsigned int fsindex, gsindex;
|
||||
unsigned int ds, cs, es;
|
||||
|
||||
printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
|
||||
printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
|
||||
regs->sp, regs->flags);
|
||||
show_iret_regs(regs);
|
||||
|
||||
if (regs->orig_ax != -1)
|
||||
pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
|
||||
else
|
||||
@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
|
||||
regs->r13, regs->r14, regs->r15);
|
||||
|
||||
if (!all)
|
||||
return;
|
||||
|
||||
asm("movl %%ds,%0" : "=r" (ds));
|
||||
asm("movl %%cs,%0" : "=r" (cs));
|
||||
asm("movl %%es,%0" : "=r" (es));
|
||||
@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
rdmsrl(MSR_GS_BASE, gs);
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
|
||||
|
||||
if (!all)
|
||||
return;
|
||||
|
||||
cr0 = read_cr0();
|
||||
cr2 = read_cr2();
|
||||
cr3 = __read_cr3();
|
||||
@ -400,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
struct fpu *prev_fpu = &prev->fpu;
|
||||
struct fpu *next_fpu = &next->fpu;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
|
||||
this_cpu_read(irq_count) != -1);
|
||||
@ -462,6 +461,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
* Switch the PDA and FPU contexts.
|
||||
*/
|
||||
this_cpu_write(current_task, next_p);
|
||||
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
|
||||
|
||||
/* Reload sp0. */
|
||||
update_sp0(next_p);
|
||||
|
@ -348,9 +348,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
|
||||
/*
|
||||
* If IRET takes a non-IST fault on the espfix64 stack, then we
|
||||
* end up promoting it to a doublefault. In that case, modify
|
||||
* the stack to make it look like we just entered the #GP
|
||||
* handler from user space, similar to bad_iret.
|
||||
* end up promoting it to a doublefault. In that case, take
|
||||
* advantage of the fact that we're not using the normal (TSS.sp0)
|
||||
* stack right now. We can write a fake #GP(0) frame at TSS.sp0
|
||||
* and then modify our own IRET frame so that, when we return,
|
||||
* we land directly at the #GP(0) vector with the stack already
|
||||
* set up according to its expectations.
|
||||
*
|
||||
* The net result is that our #GP handler will think that we
|
||||
* entered from usermode with the bad user context.
|
||||
*
|
||||
* No need for ist_enter here because we don't use RCU.
|
||||
*/
|
||||
@ -358,13 +364,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
regs->cs == __KERNEL_CS &&
|
||||
regs->ip == (unsigned long)native_irq_return_iret)
|
||||
{
|
||||
struct pt_regs *normal_regs = task_pt_regs(current);
|
||||
struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
||||
|
||||
/* Fake a #GP(0) from userspace. */
|
||||
memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
|
||||
normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
|
||||
/*
|
||||
* regs->sp points to the failing IRET frame on the
|
||||
* ESPFIX64 stack. Copy it to the entry stack. This fills
|
||||
* in gpregs->ss through gpregs->ip.
|
||||
*
|
||||
*/
|
||||
memmove(&gpregs->ip, (void *)regs->sp, 5*8);
|
||||
gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
|
||||
|
||||
/*
|
||||
* Adjust our frame so that we return straight to the #GP
|
||||
* vector with the expected RSP value. This is safe because
|
||||
* we won't enable interupts or schedule before we invoke
|
||||
* general_protection, so nothing will clobber the stack
|
||||
* frame we just set up.
|
||||
*/
|
||||
regs->ip = (unsigned long)general_protection;
|
||||
regs->sp = (unsigned long)&normal_regs->orig_ax;
|
||||
regs->sp = (unsigned long)&gpregs->orig_ax;
|
||||
|
||||
return;
|
||||
}
|
||||
@ -389,7 +408,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
*
|
||||
* Processors update CR2 whenever a page fault is detected. If a
|
||||
* second page fault occurs while an earlier page fault is being
|
||||
* deliv- ered, the faulting linear address of the second fault will
|
||||
* delivered, the faulting linear address of the second fault will
|
||||
* overwrite the contents of CR2 (replacing the previous
|
||||
* address). These updates to CR2 occur even if the page fault
|
||||
* results in a double fault or occurs during the delivery of a
|
||||
@ -605,13 +624,14 @@ NOKPROBE_SYMBOL(do_int3);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Help handler running on IST stack to switch off the IST stack if the
|
||||
* interrupted code was in user mode. The actual stack switch is done in
|
||||
* entry_64.S
|
||||
* Help handler running on a per-cpu (IST or entry trampoline) stack
|
||||
* to switch to the normal thread stack if the interrupted code was in
|
||||
* user mode. The actual stack switch is done in entry_64.S
|
||||
*/
|
||||
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
|
||||
{
|
||||
struct pt_regs *regs = task_pt_regs(current);
|
||||
struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
|
||||
if (regs != eregs)
|
||||
*regs = *eregs;
|
||||
return regs;
|
||||
}
|
||||
@ -628,13 +648,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
|
||||
/*
|
||||
* This is called from entry_64.S early in handling a fault
|
||||
* caused by a bad iret to user mode. To handle the fault
|
||||
* correctly, we want move our stack frame to task_pt_regs
|
||||
* and we want to pretend that the exception came from the
|
||||
* iret target.
|
||||
* correctly, we want to move our stack frame to where it would
|
||||
* be had we entered directly on the entry stack (rather than
|
||||
* just below the IRET frame) and we want to pretend that the
|
||||
* exception came from the IRET target.
|
||||
*/
|
||||
struct bad_iret_stack *new_stack =
|
||||
container_of(task_pt_regs(current),
|
||||
struct bad_iret_stack, regs);
|
||||
(struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
||||
|
||||
/* Copy the IRET target to the new stack. */
|
||||
memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
|
||||
@ -795,14 +815,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
debug_stack_usage_dec();
|
||||
|
||||
exit:
|
||||
#if defined(CONFIG_X86_32)
|
||||
/*
|
||||
* This is the most likely code path that involves non-trivial use
|
||||
* of the SYSENTER stack. Check that we haven't overrun it.
|
||||
*/
|
||||
WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
|
||||
"Overran or corrupted SYSENTER stack\n");
|
||||
#endif
|
||||
ist_exit(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_debug);
|
||||
@ -929,6 +941,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
|
||||
void __init trap_init(void)
|
||||
{
|
||||
/* Init cpu_entry_area before IST entries are set up */
|
||||
setup_cpu_entry_areas();
|
||||
|
||||
idt_setup_traps();
|
||||
|
||||
/*
|
||||
|
@ -253,21 +253,14 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
|
||||
static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
|
||||
size_t len)
|
||||
{
|
||||
struct stack_info *info = &state->stack_info;
|
||||
void *addr = (void *)_addr;
|
||||
|
||||
/*
|
||||
* If the address isn't on the current stack, switch to the next one.
|
||||
*
|
||||
* We may have to traverse multiple stacks to deal with the possibility
|
||||
* that info->next_sp could point to an empty stack and the address
|
||||
* could be on a subsequent stack.
|
||||
*/
|
||||
while (!on_stack(info, (void *)addr, len))
|
||||
if (get_stack_info(info->next_sp, state->task, info,
|
||||
&state->stack_mask))
|
||||
if (!on_stack(info, addr, len) &&
|
||||
(get_stack_info(addr, state->task, info, &state->stack_mask)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
|
||||
return true;
|
||||
}
|
||||
|
||||
#define REGS_SIZE (sizeof(struct pt_regs))
|
||||
#define SP_OFFSET (offsetof(struct pt_regs, sp))
|
||||
#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
|
||||
#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
|
||||
|
||||
static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
|
||||
unsigned long *ip, unsigned long *sp, bool full)
|
||||
unsigned long *ip, unsigned long *sp)
|
||||
{
|
||||
size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
|
||||
size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
|
||||
struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
|
||||
struct pt_regs *regs = (struct pt_regs *)addr;
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
if (!stack_access_ok(state, addr, regs_size))
|
||||
/* x86-32 support will be more complicated due to the ®s->sp hack */
|
||||
BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
|
||||
|
||||
if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
|
||||
return false;
|
||||
|
||||
*ip = regs->ip;
|
||||
*sp = regs->sp;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!stack_access_ok(state, addr, sp_offset))
|
||||
static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
|
||||
unsigned long *ip, unsigned long *sp)
|
||||
{
|
||||
struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
|
||||
|
||||
if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
|
||||
return false;
|
||||
|
||||
*ip = regs->ip;
|
||||
|
||||
if (user_mode(regs)) {
|
||||
if (!stack_access_ok(state, addr + sp_offset,
|
||||
REGS_SIZE - SP_OFFSET))
|
||||
return false;
|
||||
|
||||
*sp = regs->sp;
|
||||
} else
|
||||
*sp = (unsigned long)®s->sp;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
|
||||
enum stack_type prev_type = state->stack_info.type;
|
||||
struct orc_entry *orc;
|
||||
struct pt_regs *ptregs;
|
||||
bool indirect = false;
|
||||
|
||||
if (unwind_done(state))
|
||||
@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
break;
|
||||
|
||||
case ORC_TYPE_REGS:
|
||||
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
|
||||
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
|
||||
orc_warn("can't dereference registers at %p for ip %pB\n",
|
||||
(void *)sp, (void *)orig_ip);
|
||||
goto done;
|
||||
@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
break;
|
||||
|
||||
case ORC_TYPE_REGS_IRET:
|
||||
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
|
||||
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
|
||||
orc_warn("can't dereference iret registers at %p for ip %pB\n",
|
||||
(void *)sp, (void *)orig_ip);
|
||||
goto done;
|
||||
}
|
||||
|
||||
ptregs = container_of((void *)sp, struct pt_regs, ip);
|
||||
if ((unsigned long)ptregs >= prev_sp &&
|
||||
on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
|
||||
state->regs = ptregs;
|
||||
state->regs = (void *)sp - IRET_FRAME_OFFSET;
|
||||
state->full_regs = false;
|
||||
} else
|
||||
state->regs = NULL;
|
||||
|
||||
state->signal = true;
|
||||
break;
|
||||
|
||||
@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
}
|
||||
|
||||
if (get_stack_info((unsigned long *)state->sp, state->task,
|
||||
&state->stack_info, &state->stack_mask))
|
||||
&state->stack_info, &state->stack_mask)) {
|
||||
/*
|
||||
* We weren't on a valid stack. It's possible that
|
||||
* we overflowed a valid stack into a guard page.
|
||||
* See if the next page up is valid so that we can
|
||||
* generate some kind of backtrace if this happens.
|
||||
*/
|
||||
void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
|
||||
if (get_stack_info(next_page, state->task, &state->stack_info,
|
||||
&state->stack_mask))
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller can provide the address of the first frame directly
|
||||
|
@ -107,6 +107,15 @@ SECTIONS
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
_entry_trampoline = .;
|
||||
*(.entry_trampoline)
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
|
||||
#endif
|
||||
|
||||
/* End of text section */
|
||||
_etext = .;
|
||||
} :text = 0x9090
|
||||
|
@ -2390,9 +2390,21 @@ static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, u64 smbase, int n)
|
||||
}
|
||||
|
||||
static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
|
||||
u64 cr0, u64 cr4)
|
||||
u64 cr0, u64 cr3, u64 cr4)
|
||||
{
|
||||
int bad;
|
||||
u64 pcid;
|
||||
|
||||
/* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */
|
||||
pcid = 0;
|
||||
if (cr4 & X86_CR4_PCIDE) {
|
||||
pcid = cr3 & 0xfff;
|
||||
cr3 &= ~0xfff;
|
||||
}
|
||||
|
||||
bad = ctxt->ops->set_cr(ctxt, 3, cr3);
|
||||
if (bad)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
|
||||
/*
|
||||
* First enable PAE, long mode needs it before CR0.PG = 1 is set.
|
||||
@ -2411,6 +2423,12 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
|
||||
bad = ctxt->ops->set_cr(ctxt, 4, cr4);
|
||||
if (bad)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
if (pcid) {
|
||||
bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
|
||||
if (bad)
|
||||
return X86EMUL_UNHANDLEABLE;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return X86EMUL_CONTINUE;
|
||||
@ -2421,11 +2439,11 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
|
||||
struct desc_struct desc;
|
||||
struct desc_ptr dt;
|
||||
u16 selector;
|
||||
u32 val, cr0, cr4;
|
||||
u32 val, cr0, cr3, cr4;
|
||||
int i;
|
||||
|
||||
cr0 = GET_SMSTATE(u32, smbase, 0x7ffc);
|
||||
ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u32, smbase, 0x7ff8));
|
||||
cr3 = GET_SMSTATE(u32, smbase, 0x7ff8);
|
||||
ctxt->eflags = GET_SMSTATE(u32, smbase, 0x7ff4) | X86_EFLAGS_FIXED;
|
||||
ctxt->_eip = GET_SMSTATE(u32, smbase, 0x7ff0);
|
||||
|
||||
@ -2467,14 +2485,14 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, u64 smbase)
|
||||
|
||||
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7ef8));
|
||||
|
||||
return rsm_enter_protected_mode(ctxt, cr0, cr4);
|
||||
return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
|
||||
}
|
||||
|
||||
static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
|
||||
{
|
||||
struct desc_struct desc;
|
||||
struct desc_ptr dt;
|
||||
u64 val, cr0, cr4;
|
||||
u64 val, cr0, cr3, cr4;
|
||||
u32 base3;
|
||||
u16 selector;
|
||||
int i, r;
|
||||
@ -2491,7 +2509,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
|
||||
ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1);
|
||||
|
||||
cr0 = GET_SMSTATE(u64, smbase, 0x7f58);
|
||||
ctxt->ops->set_cr(ctxt, 3, GET_SMSTATE(u64, smbase, 0x7f50));
|
||||
cr3 = GET_SMSTATE(u64, smbase, 0x7f50);
|
||||
cr4 = GET_SMSTATE(u64, smbase, 0x7f48);
|
||||
ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smbase, 0x7f00));
|
||||
val = GET_SMSTATE(u64, smbase, 0x7ed0);
|
||||
@ -2519,7 +2537,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, u64 smbase)
|
||||
dt.address = GET_SMSTATE(u64, smbase, 0x7e68);
|
||||
ctxt->ops->set_gdt(ctxt, &dt);
|
||||
|
||||
r = rsm_enter_protected_mode(ctxt, cr0, cr4);
|
||||
r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
|
||||
if (r != X86EMUL_CONTINUE)
|
||||
return r;
|
||||
|
||||
|
@ -3395,7 +3395,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if(make_mmu_pages_available(vcpu) < 0) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
return -ENOSPC;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, 0, 0,
|
||||
vcpu->arch.mmu.shadow_root_level, 1, ACC_ALL);
|
||||
@ -3410,7 +3410,7 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (make_mmu_pages_available(vcpu) < 0) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
return -ENOSPC;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT),
|
||||
i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL);
|
||||
@ -3450,7 +3450,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (make_mmu_pages_available(vcpu) < 0) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
return -ENOSPC;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, root_gfn, 0,
|
||||
vcpu->arch.mmu.shadow_root_level, 0, ACC_ALL);
|
||||
@ -3487,7 +3487,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
|
||||
spin_lock(&vcpu->kvm->mmu_lock);
|
||||
if (make_mmu_pages_available(vcpu) < 0) {
|
||||
spin_unlock(&vcpu->kvm->mmu_lock);
|
||||
return 1;
|
||||
return -ENOSPC;
|
||||
}
|
||||
sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL,
|
||||
0, ACC_ALL);
|
||||
|
@ -2302,7 +2302,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
* processors. See 22.2.4.
|
||||
*/
|
||||
vmcs_writel(HOST_TR_BASE,
|
||||
(unsigned long)this_cpu_ptr(&cpu_tss));
|
||||
(unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
|
||||
vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
|
||||
|
||||
/*
|
||||
|
@ -4384,7 +4384,7 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
|
||||
addr, n, v))
|
||||
&& kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
|
||||
break;
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, v);
|
||||
handled += n;
|
||||
addr += n;
|
||||
len -= n;
|
||||
@ -4643,7 +4643,7 @@ static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes)
|
||||
{
|
||||
if (vcpu->mmio_read_completed) {
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes,
|
||||
vcpu->mmio_fragments[0].gpa, *(u64 *)val);
|
||||
vcpu->mmio_fragments[0].gpa, val);
|
||||
vcpu->mmio_read_completed = 0;
|
||||
return 1;
|
||||
}
|
||||
@ -4665,14 +4665,14 @@ static int write_emulate(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
|
||||
static int write_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, int bytes, void *val)
|
||||
{
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, val);
|
||||
return vcpu_mmio_write(vcpu, gpa, bytes, val);
|
||||
}
|
||||
|
||||
static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
|
||||
void *val, int bytes)
|
||||
{
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, 0);
|
||||
trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, bytes, gpa, NULL);
|
||||
return X86EMUL_IO_NEEDED;
|
||||
}
|
||||
|
||||
@ -7264,13 +7264,12 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
|
||||
|
||||
int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
{
|
||||
struct fpu *fpu = ¤t->thread.fpu;
|
||||
int r;
|
||||
|
||||
fpu__initialize(fpu);
|
||||
|
||||
kvm_sigset_activate(vcpu);
|
||||
|
||||
kvm_load_guest_fpu(vcpu);
|
||||
|
||||
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
|
||||
if (kvm_run->immediate_exit) {
|
||||
r = -EINTR;
|
||||
@ -7296,14 +7295,12 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
}
|
||||
}
|
||||
|
||||
kvm_load_guest_fpu(vcpu);
|
||||
|
||||
if (unlikely(vcpu->arch.complete_userspace_io)) {
|
||||
int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
|
||||
vcpu->arch.complete_userspace_io = NULL;
|
||||
r = cui(vcpu);
|
||||
if (r <= 0)
|
||||
goto out_fpu;
|
||||
goto out;
|
||||
} else
|
||||
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
|
||||
|
||||
@ -7312,9 +7309,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
|
||||
else
|
||||
r = vcpu_run(vcpu);
|
||||
|
||||
out_fpu:
|
||||
kvm_put_guest_fpu(vcpu);
|
||||
out:
|
||||
kvm_put_guest_fpu(vcpu);
|
||||
post_kvm_run_save(vcpu);
|
||||
kvm_sigset_deactivate(vcpu);
|
||||
|
||||
@ -7384,7 +7380,7 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
|
||||
#endif
|
||||
|
||||
kvm_rip_write(vcpu, regs->rip);
|
||||
kvm_set_rflags(vcpu, regs->rflags);
|
||||
kvm_set_rflags(vcpu, regs->rflags | X86_EFLAGS_FIXED);
|
||||
|
||||
vcpu->arch.exception.pending = false;
|
||||
|
||||
@ -7498,6 +7494,29 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(kvm_task_switch);
|
||||
|
||||
int kvm_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
|
||||
{
|
||||
if ((sregs->efer & EFER_LME) && (sregs->cr0 & X86_CR0_PG_BIT)) {
|
||||
/*
|
||||
* When EFER.LME and CR0.PG are set, the processor is in
|
||||
* 64-bit mode (though maybe in a 32-bit code segment).
|
||||
* CR4.PAE and EFER.LMA must be set.
|
||||
*/
|
||||
if (!(sregs->cr4 & X86_CR4_PAE_BIT)
|
||||
|| !(sregs->efer & EFER_LMA))
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/*
|
||||
* Not in 64-bit mode: EFER.LMA is clear and the code
|
||||
* segment cannot be 64-bit.
|
||||
*/
|
||||
if (sregs->efer & EFER_LMA || sregs->cs.l)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
struct kvm_sregs *sregs)
|
||||
{
|
||||
@ -7510,6 +7529,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
|
||||
(sregs->cr4 & X86_CR4_OSXSAVE))
|
||||
return -EINVAL;
|
||||
|
||||
if (kvm_valid_sregs(vcpu, sregs))
|
||||
return -EINVAL;
|
||||
|
||||
apic_base_msr.data = sregs->apic_base;
|
||||
apic_base_msr.host_initiated = true;
|
||||
if (kvm_set_apic_base(vcpu, &apic_base_msr))
|
||||
|
@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
|
||||
delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
|
||||
|
||||
/*
|
||||
* Use cpu_tss as a cacheline-aligned, seldomly
|
||||
* Use cpu_tss_rw as a cacheline-aligned, seldomly
|
||||
* accessed per-cpu variable as the monitor target.
|
||||
*/
|
||||
__monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
|
||||
__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
|
||||
|
||||
/*
|
||||
* AMD, like Intel, supports the EAX hint and EAX=0xf
|
||||
|
@ -860,7 +860,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
|
||||
if (!printk_ratelimit())
|
||||
return;
|
||||
|
||||
printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
|
||||
printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
|
||||
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
||||
tsk->comm, task_pid_nr(tsk), address,
|
||||
(void *)regs->ip, (void *)regs->sp, error_code);
|
||||
|
@ -277,6 +277,7 @@ void __init kasan_early_init(void)
|
||||
void __init kasan_init(void)
|
||||
{
|
||||
int i;
|
||||
void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
register_die_notifier(&kasan_die_notifier);
|
||||
@ -329,8 +330,23 @@ void __init kasan_init(void)
|
||||
(unsigned long)kasan_mem_to_shadow(_end),
|
||||
early_pfn_to_nid(__pa(_stext)));
|
||||
|
||||
shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
|
||||
shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
|
||||
shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
|
||||
PAGE_SIZE);
|
||||
|
||||
shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
|
||||
shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
|
||||
shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
|
||||
PAGE_SIZE);
|
||||
|
||||
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
|
||||
(void *)KASAN_SHADOW_END);
|
||||
shadow_cpu_entry_begin);
|
||||
|
||||
kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
|
||||
(unsigned long)shadow_cpu_entry_end, 0);
|
||||
|
||||
kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
|
||||
|
||||
load_cr3(init_top_pgt);
|
||||
__flush_tlb_all();
|
||||
|
@ -152,17 +152,19 @@ static void do_fpu_end(void)
|
||||
static void fix_processor_context(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
||||
#ifdef CONFIG_X86_64
|
||||
struct desc_struct *desc = get_cpu_gdt_rw(cpu);
|
||||
tss_desc tss;
|
||||
#endif
|
||||
set_tss_desc(cpu, t); /*
|
||||
* This just modifies memory; should not be
|
||||
* necessary. But... This is necessary, because
|
||||
* 386 hardware has concept of busy TSS or some
|
||||
* similar stupidity.
|
||||
|
||||
/*
|
||||
* We need to reload TR, which requires that we change the
|
||||
* GDT entry to indicate "available" first.
|
||||
*
|
||||
* XXX: This could probably all be replaced by a call to
|
||||
* force_reload_TR().
|
||||
*/
|
||||
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
|
||||
|
@ -826,7 +826,7 @@ static void xen_load_sp0(unsigned long sp0)
|
||||
mcs = xen_mc_entry(0);
|
||||
MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
|
||||
xen_mc_issue(PARAVIRT_LAZY_CPU);
|
||||
this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
|
||||
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
|
||||
}
|
||||
|
||||
void xen_set_iopl_mask(unsigned mask)
|
||||
|
@ -2272,7 +2272,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
|
||||
#endif
|
||||
case FIX_TEXT_POKE0:
|
||||
case FIX_TEXT_POKE1:
|
||||
case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
|
||||
case FIX_CPU_ENTRY_AREA_TOP ... FIX_CPU_ENTRY_AREA_BOTTOM:
|
||||
/* All local page mappings */
|
||||
pte = pfn_pte(phys, prot);
|
||||
break;
|
||||
|
@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
|
||||
bio->bi_disk = bio_src->bi_disk;
|
||||
bio->bi_partno = bio_src->bi_partno;
|
||||
bio_set_flag(bio, BIO_CLONED);
|
||||
if (bio_flagged(bio_src, BIO_THROTTLED))
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
bio->bi_opf = bio_src->bi_opf;
|
||||
bio->bi_write_hint = bio_src->bi_write_hint;
|
||||
bio->bi_iter = bio_src->bi_iter;
|
||||
|
@ -12,22 +12,29 @@
|
||||
#include "blk.h"
|
||||
|
||||
/*
|
||||
* Append a bio to a passthrough request. Only works can be merged into
|
||||
* the request based on the driver constraints.
|
||||
* Append a bio to a passthrough request. Only works if the bio can be merged
|
||||
* into the request based on the driver constraints.
|
||||
*/
|
||||
int blk_rq_append_bio(struct request *rq, struct bio *bio)
|
||||
int blk_rq_append_bio(struct request *rq, struct bio **bio)
|
||||
{
|
||||
blk_queue_bounce(rq->q, &bio);
|
||||
struct bio *orig_bio = *bio;
|
||||
|
||||
blk_queue_bounce(rq->q, bio);
|
||||
|
||||
if (!rq->bio) {
|
||||
blk_rq_bio_prep(rq->q, rq, bio);
|
||||
blk_rq_bio_prep(rq->q, rq, *bio);
|
||||
} else {
|
||||
if (!ll_back_merge_fn(rq->q, rq, bio))
|
||||
if (!ll_back_merge_fn(rq->q, rq, *bio)) {
|
||||
if (orig_bio != *bio) {
|
||||
bio_put(*bio);
|
||||
*bio = orig_bio;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rq->biotail->bi_next = bio;
|
||||
rq->biotail = bio;
|
||||
rq->__data_len += bio->bi_iter.bi_size;
|
||||
rq->biotail->bi_next = *bio;
|
||||
rq->biotail = *bio;
|
||||
rq->__data_len += (*bio)->bi_iter.bi_size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -73,14 +80,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
|
||||
* We link the bounce buffer in and could have to traverse it
|
||||
* later so we have to get a ref to prevent it from being freed
|
||||
*/
|
||||
ret = blk_rq_append_bio(rq, bio);
|
||||
bio_get(bio);
|
||||
ret = blk_rq_append_bio(rq, &bio);
|
||||
if (ret) {
|
||||
bio_endio(bio);
|
||||
__blk_rq_unmap_user(orig_bio);
|
||||
bio_put(bio);
|
||||
return ret;
|
||||
}
|
||||
bio_get(bio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -213,7 +218,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
||||
int reading = rq_data_dir(rq) == READ;
|
||||
unsigned long addr = (unsigned long) kbuf;
|
||||
int do_copy = 0;
|
||||
struct bio *bio;
|
||||
struct bio *bio, *orig_bio;
|
||||
int ret;
|
||||
|
||||
if (len > (queue_max_hw_sectors(q) << 9))
|
||||
@ -236,10 +241,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
||||
if (do_copy)
|
||||
rq->rq_flags |= RQF_COPY_USER;
|
||||
|
||||
ret = blk_rq_append_bio(rq, bio);
|
||||
orig_bio = bio;
|
||||
ret = blk_rq_append_bio(rq, &bio);
|
||||
if (unlikely(ret)) {
|
||||
/* request is too big */
|
||||
bio_put(bio);
|
||||
bio_put(orig_bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -2226,13 +2226,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
|
||||
out_unlock:
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
out:
|
||||
/*
|
||||
* As multiple blk-throtls may stack in the same issue path, we
|
||||
* don't want bios to leave with the flag set. Clear the flag if
|
||||
* being issued.
|
||||
*/
|
||||
if (!throttled)
|
||||
bio_clear_flag(bio, BIO_THROTTLED);
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
if (throttled || !td->track_bio_latency)
|
||||
|
@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
|
||||
unsigned i = 0;
|
||||
bool bounce = false;
|
||||
int sectors = 0;
|
||||
bool passthrough = bio_is_passthrough(*bio_orig);
|
||||
|
||||
bio_for_each_segment(from, *bio_orig, iter) {
|
||||
if (i++ < BIO_MAX_PAGES)
|
||||
@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
|
||||
if (!bounce)
|
||||
return;
|
||||
|
||||
if (sectors < bio_sectors(*bio_orig)) {
|
||||
if (!passthrough && sectors < bio_sectors(*bio_orig)) {
|
||||
bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
|
||||
bio_chain(bio, *bio_orig);
|
||||
generic_make_request(*bio_orig);
|
||||
*bio_orig = bio;
|
||||
}
|
||||
bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
|
||||
bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
|
||||
bounce_bio_set);
|
||||
|
||||
bio_for_each_segment_all(to, bio, i) {
|
||||
struct page *page = to->bv_page;
|
||||
|
@ -100,9 +100,13 @@ struct kyber_hctx_data {
|
||||
unsigned int cur_domain;
|
||||
unsigned int batching;
|
||||
wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
|
||||
struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
|
||||
atomic_t wait_index[KYBER_NUM_DOMAINS];
|
||||
};
|
||||
|
||||
static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int flags,
|
||||
void *key);
|
||||
|
||||
static int rq_sched_domain(const struct request *rq)
|
||||
{
|
||||
unsigned int op = rq->cmd_flags;
|
||||
@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
|
||||
|
||||
for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
|
||||
INIT_LIST_HEAD(&khd->rqs[i]);
|
||||
init_waitqueue_func_entry(&khd->domain_wait[i],
|
||||
kyber_domain_wake);
|
||||
khd->domain_wait[i].private = hctx;
|
||||
INIT_LIST_HEAD(&khd->domain_wait[i].entry);
|
||||
atomic_set(&khd->wait_index[i], 0);
|
||||
}
|
||||
@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data *kqd,
|
||||
int nr;
|
||||
|
||||
nr = __sbitmap_queue_get(domain_tokens);
|
||||
if (nr >= 0)
|
||||
return nr;
|
||||
|
||||
/*
|
||||
* If we failed to get a domain token, make sure the hardware queue is
|
||||
* run when one becomes available. Note that this is serialized on
|
||||
* khd->lock, but we still need to be careful about the waker.
|
||||
*/
|
||||
if (list_empty_careful(&wait->entry)) {
|
||||
init_waitqueue_func_entry(wait, kyber_domain_wake);
|
||||
wait->private = hctx;
|
||||
if (nr < 0 && list_empty_careful(&wait->entry)) {
|
||||
ws = sbq_wait_ptr(domain_tokens,
|
||||
&khd->wait_index[sched_domain]);
|
||||
khd->domain_ws[sched_domain] = ws;
|
||||
add_wait_queue(&ws->wait, wait);
|
||||
|
||||
/*
|
||||
* Try again in case a token was freed before we got on the wait
|
||||
* queue. The waker may have already removed the entry from the
|
||||
* wait queue, but list_del_init() is okay with that.
|
||||
* queue.
|
||||
*/
|
||||
nr = __sbitmap_queue_get(domain_tokens);
|
||||
if (nr >= 0) {
|
||||
unsigned long flags;
|
||||
}
|
||||
|
||||
spin_lock_irqsave(&ws->wait.lock, flags);
|
||||
/*
|
||||
* If we got a token while we were on the wait queue, remove ourselves
|
||||
* from the wait queue to ensure that all wake ups make forward
|
||||
* progress. It's possible that the waker already deleted the entry
|
||||
* between the !list_empty_careful() check and us grabbing the lock, but
|
||||
* list_del_init() is okay with that.
|
||||
*/
|
||||
if (nr >= 0 && !list_empty_careful(&wait->entry)) {
|
||||
ws = khd->domain_ws[sched_domain];
|
||||
spin_lock_irq(&ws->wait.lock);
|
||||
list_del_init(&wait->entry);
|
||||
spin_unlock_irqrestore(&ws->wait.lock, flags);
|
||||
}
|
||||
spin_unlock_irq(&ws->wait.lock);
|
||||
}
|
||||
|
||||
return nr;
|
||||
}
|
||||
|
||||
|
@ -1007,7 +1007,7 @@ static ssize_t erst_reader(struct pstore_record *record)
|
||||
/* The record may be cleared by others, try read next record */
|
||||
if (len == -ENOENT)
|
||||
goto skip;
|
||||
else if (len < sizeof(*rcd)) {
|
||||
else if (len < 0 || len < sizeof(*rcd)) {
|
||||
rc = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
@ -1171,7 +1171,7 @@ int cppc_set_perf(int cpu, struct cppc_perf_ctrls *perf_ctrls)
|
||||
struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpu);
|
||||
struct cpc_register_resource *desired_reg;
|
||||
int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpu);
|
||||
struct cppc_pcc_data *pcc_ss_data = pcc_data[pcc_ss_id];
|
||||
struct cppc_pcc_data *pcc_ss_data;
|
||||
int ret = 0;
|
||||
|
||||
if (!cpc_desc || pcc_ss_id < 0) {
|
||||
|
@ -35,13 +35,13 @@ static inline u64 mb_per_tick(int mbps)
|
||||
struct nullb_cmd {
|
||||
struct list_head list;
|
||||
struct llist_node ll_list;
|
||||
call_single_data_t csd;
|
||||
struct __call_single_data csd;
|
||||
struct request *rq;
|
||||
struct bio *bio;
|
||||
unsigned int tag;
|
||||
blk_status_t error;
|
||||
struct nullb_queue *nq;
|
||||
struct hrtimer timer;
|
||||
blk_status_t error;
|
||||
};
|
||||
|
||||
struct nullb_queue {
|
||||
|
@ -22,6 +22,8 @@
|
||||
|
||||
#include "cpufreq_governor.h"
|
||||
|
||||
#define CPUFREQ_DBS_MIN_SAMPLING_INTERVAL (2 * TICK_NSEC / NSEC_PER_USEC)
|
||||
|
||||
static DEFINE_PER_CPU(struct cpu_dbs_info, cpu_dbs);
|
||||
|
||||
static DEFINE_MUTEX(gov_dbs_data_mutex);
|
||||
@ -47,11 +49,15 @@ ssize_t store_sampling_rate(struct gov_attr_set *attr_set, const char *buf,
|
||||
{
|
||||
struct dbs_data *dbs_data = to_dbs_data(attr_set);
|
||||
struct policy_dbs_info *policy_dbs;
|
||||
unsigned int sampling_interval;
|
||||
int ret;
|
||||
ret = sscanf(buf, "%u", &dbs_data->sampling_rate);
|
||||
if (ret != 1)
|
||||
|
||||
ret = sscanf(buf, "%u", &sampling_interval);
|
||||
if (ret != 1 || sampling_interval < CPUFREQ_DBS_MIN_SAMPLING_INTERVAL)
|
||||
return -EINVAL;
|
||||
|
||||
dbs_data->sampling_rate = sampling_interval;
|
||||
|
||||
/*
|
||||
* We are operating under dbs_data->mutex and so the list and its
|
||||
* entries can't be freed concurrently.
|
||||
@ -430,7 +436,14 @@ int cpufreq_dbs_governor_init(struct cpufreq_policy *policy)
|
||||
if (ret)
|
||||
goto free_policy_dbs_info;
|
||||
|
||||
dbs_data->sampling_rate = cpufreq_policy_transition_delay_us(policy);
|
||||
/*
|
||||
* The sampling interval should not be less than the transition latency
|
||||
* of the CPU and it also cannot be too small for dbs_update() to work
|
||||
* correctly.
|
||||
*/
|
||||
dbs_data->sampling_rate = max_t(unsigned int,
|
||||
CPUFREQ_DBS_MIN_SAMPLING_INTERVAL,
|
||||
cpufreq_policy_transition_delay_us(policy));
|
||||
|
||||
if (!have_governor_per_policy())
|
||||
gov->gdbs_data = dbs_data;
|
||||
|
@ -226,17 +226,18 @@ static void imx6q_opp_check_speed_grading(struct device *dev)
|
||||
val >>= OCOTP_CFG3_SPEED_SHIFT;
|
||||
val &= 0x3;
|
||||
|
||||
if ((val != OCOTP_CFG3_SPEED_1P2GHZ) &&
|
||||
of_machine_is_compatible("fsl,imx6q"))
|
||||
if (dev_pm_opp_disable(dev, 1200000000))
|
||||
dev_warn(dev, "failed to disable 1.2GHz OPP\n");
|
||||
if (val < OCOTP_CFG3_SPEED_996MHZ)
|
||||
if (dev_pm_opp_disable(dev, 996000000))
|
||||
dev_warn(dev, "failed to disable 996MHz OPP\n");
|
||||
if (of_machine_is_compatible("fsl,imx6q")) {
|
||||
|
||||
if (of_machine_is_compatible("fsl,imx6q") ||
|
||||
of_machine_is_compatible("fsl,imx6qp")) {
|
||||
if (val != OCOTP_CFG3_SPEED_852MHZ)
|
||||
if (dev_pm_opp_disable(dev, 852000000))
|
||||
dev_warn(dev, "failed to disable 852MHz OPP\n");
|
||||
if (val != OCOTP_CFG3_SPEED_1P2GHZ)
|
||||
if (dev_pm_opp_disable(dev, 1200000000))
|
||||
dev_warn(dev, "failed to disable 1.2GHz OPP\n");
|
||||
}
|
||||
iounmap(base);
|
||||
put_node:
|
||||
|
@ -708,7 +708,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct at_dma_chan *atchan = to_at_dma_chan(chan);
|
||||
struct data_chunk *first = xt->sgl;
|
||||
struct data_chunk *first;
|
||||
struct at_desc *desc = NULL;
|
||||
size_t xfer_count;
|
||||
unsigned int dwidth;
|
||||
@ -720,6 +720,8 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
|
||||
if (unlikely(!xt || xt->numf != 1 || !xt->frame_size))
|
||||
return NULL;
|
||||
|
||||
first = xt->sgl;
|
||||
|
||||
dev_info(chan2dev(chan),
|
||||
"%s: src=%pad, dest=%pad, numf=%d, frame_size=%d, flags=0x%lx\n",
|
||||
__func__, &xt->src_start, &xt->dst_start, xt->numf,
|
||||
|
@ -555,7 +555,7 @@ static int jz4740_dma_probe(struct platform_device *pdev)
|
||||
|
||||
ret = dma_async_device_register(dd);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto err_clk;
|
||||
|
||||
irq = platform_get_irq(pdev, 0);
|
||||
ret = request_irq(irq, jz4740_dma_irq, 0, dev_name(&pdev->dev), dmadev);
|
||||
@ -568,6 +568,8 @@ static int jz4740_dma_probe(struct platform_device *pdev)
|
||||
|
||||
err_unregister:
|
||||
dma_async_device_unregister(dd);
|
||||
err_clk:
|
||||
clk_disable_unprepare(dmadev->clk);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -155,6 +155,12 @@ MODULE_PARM_DESC(run, "Run the test (default: false)");
|
||||
#define PATTERN_COUNT_MASK 0x1f
|
||||
#define PATTERN_MEMSET_IDX 0x01
|
||||
|
||||
/* poor man's completion - we want to use wait_event_freezable() on it */
|
||||
struct dmatest_done {
|
||||
bool done;
|
||||
wait_queue_head_t *wait;
|
||||
};
|
||||
|
||||
struct dmatest_thread {
|
||||
struct list_head node;
|
||||
struct dmatest_info *info;
|
||||
@ -165,6 +171,8 @@ struct dmatest_thread {
|
||||
u8 **dsts;
|
||||
u8 **udsts;
|
||||
enum dma_transaction_type type;
|
||||
wait_queue_head_t done_wait;
|
||||
struct dmatest_done test_done;
|
||||
bool done;
|
||||
};
|
||||
|
||||
@ -342,18 +350,25 @@ static unsigned int dmatest_verify(u8 **bufs, unsigned int start,
|
||||
return error_count;
|
||||
}
|
||||
|
||||
/* poor man's completion - we want to use wait_event_freezable() on it */
|
||||
struct dmatest_done {
|
||||
bool done;
|
||||
wait_queue_head_t *wait;
|
||||
};
|
||||
|
||||
static void dmatest_callback(void *arg)
|
||||
{
|
||||
struct dmatest_done *done = arg;
|
||||
|
||||
struct dmatest_thread *thread =
|
||||
container_of(arg, struct dmatest_thread, done_wait);
|
||||
if (!thread->done) {
|
||||
done->done = true;
|
||||
wake_up_all(done->wait);
|
||||
} else {
|
||||
/*
|
||||
* If thread->done, it means that this callback occurred
|
||||
* after the parent thread has cleaned up. This can
|
||||
* happen in the case that driver doesn't implement
|
||||
* the terminate_all() functionality and a dma operation
|
||||
* did not occur within the timeout period
|
||||
*/
|
||||
WARN(1, "dmatest: Kernel memory may be corrupted!!\n");
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned int min_odd(unsigned int x, unsigned int y)
|
||||
@ -424,9 +439,8 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
|
||||
*/
|
||||
static int dmatest_func(void *data)
|
||||
{
|
||||
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_wait);
|
||||
struct dmatest_thread *thread = data;
|
||||
struct dmatest_done done = { .wait = &done_wait };
|
||||
struct dmatest_done *done = &thread->test_done;
|
||||
struct dmatest_info *info;
|
||||
struct dmatest_params *params;
|
||||
struct dma_chan *chan;
|
||||
@ -673,9 +687,9 @@ static int dmatest_func(void *data)
|
||||
continue;
|
||||
}
|
||||
|
||||
done.done = false;
|
||||
done->done = false;
|
||||
tx->callback = dmatest_callback;
|
||||
tx->callback_param = &done;
|
||||
tx->callback_param = done;
|
||||
cookie = tx->tx_submit(tx);
|
||||
|
||||
if (dma_submit_error(cookie)) {
|
||||
@ -688,21 +702,12 @@ static int dmatest_func(void *data)
|
||||
}
|
||||
dma_async_issue_pending(chan);
|
||||
|
||||
wait_event_freezable_timeout(done_wait, done.done,
|
||||
wait_event_freezable_timeout(thread->done_wait, done->done,
|
||||
msecs_to_jiffies(params->timeout));
|
||||
|
||||
status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
|
||||
|
||||
if (!done.done) {
|
||||
/*
|
||||
* We're leaving the timed out dma operation with
|
||||
* dangling pointer to done_wait. To make this
|
||||
* correct, we'll need to allocate wait_done for
|
||||
* each test iteration and perform "who's gonna
|
||||
* free it this time?" dancing. For now, just
|
||||
* leave it dangling.
|
||||
*/
|
||||
WARN(1, "dmatest: Kernel stack may be corrupted!!\n");
|
||||
if (!done->done) {
|
||||
dmaengine_unmap_put(um);
|
||||
result("test timed out", total_tests, src_off, dst_off,
|
||||
len, 0);
|
||||
@ -789,7 +794,7 @@ static int dmatest_func(void *data)
|
||||
dmatest_KBs(runtime, total_len), ret);
|
||||
|
||||
/* terminate all transfers on specified channels */
|
||||
if (ret)
|
||||
if (ret || failed_tests)
|
||||
dmaengine_terminate_all(chan);
|
||||
|
||||
thread->done = true;
|
||||
@ -849,6 +854,8 @@ static int dmatest_add_threads(struct dmatest_info *info,
|
||||
thread->info = info;
|
||||
thread->chan = dtc->chan;
|
||||
thread->type = type;
|
||||
thread->test_done.wait = &thread->done_wait;
|
||||
init_waitqueue_head(&thread->done_wait);
|
||||
smp_wmb();
|
||||
thread->task = kthread_create(dmatest_func, thread, "%s-%s%u",
|
||||
dma_chan_name(chan), op, i);
|
||||
|
@ -863,11 +863,11 @@ static void fsl_edma_irq_exit(
|
||||
}
|
||||
}
|
||||
|
||||
static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma)
|
||||
static void fsl_disable_clocks(struct fsl_edma_engine *fsl_edma, int nr_clocks)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < DMAMUX_NR; i++)
|
||||
for (i = 0; i < nr_clocks; i++)
|
||||
clk_disable_unprepare(fsl_edma->muxclk[i]);
|
||||
}
|
||||
|
||||
@ -904,25 +904,25 @@ static int fsl_edma_probe(struct platform_device *pdev)
|
||||
|
||||
res = platform_get_resource(pdev, IORESOURCE_MEM, 1 + i);
|
||||
fsl_edma->muxbase[i] = devm_ioremap_resource(&pdev->dev, res);
|
||||
if (IS_ERR(fsl_edma->muxbase[i]))
|
||||
if (IS_ERR(fsl_edma->muxbase[i])) {
|
||||
/* on error: disable all previously enabled clks */
|
||||
fsl_disable_clocks(fsl_edma, i);
|
||||
return PTR_ERR(fsl_edma->muxbase[i]);
|
||||
}
|
||||
|
||||
sprintf(clkname, "dmamux%d", i);
|
||||
fsl_edma->muxclk[i] = devm_clk_get(&pdev->dev, clkname);
|
||||
if (IS_ERR(fsl_edma->muxclk[i])) {
|
||||
dev_err(&pdev->dev, "Missing DMAMUX block clock.\n");
|
||||
/* on error: disable all previously enabled clks */
|
||||
fsl_disable_clocks(fsl_edma, i);
|
||||
return PTR_ERR(fsl_edma->muxclk[i]);
|
||||
}
|
||||
|
||||
ret = clk_prepare_enable(fsl_edma->muxclk[i]);
|
||||
if (ret) {
|
||||
/* disable only clks which were enabled on error */
|
||||
for (; i >= 0; i--)
|
||||
clk_disable_unprepare(fsl_edma->muxclk[i]);
|
||||
|
||||
dev_err(&pdev->dev, "DMAMUX clk block failed.\n");
|
||||
return ret;
|
||||
}
|
||||
if (ret)
|
||||
/* on error: disable all previously enabled clks */
|
||||
fsl_disable_clocks(fsl_edma, i);
|
||||
|
||||
}
|
||||
|
||||
@ -976,7 +976,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
|
||||
if (ret) {
|
||||
dev_err(&pdev->dev,
|
||||
"Can't register Freescale eDMA engine. (%d)\n", ret);
|
||||
fsl_disable_clocks(fsl_edma);
|
||||
fsl_disable_clocks(fsl_edma, DMAMUX_NR);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -985,7 +985,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
|
||||
dev_err(&pdev->dev,
|
||||
"Can't register Freescale eDMA of_dma. (%d)\n", ret);
|
||||
dma_async_device_unregister(&fsl_edma->dma_dev);
|
||||
fsl_disable_clocks(fsl_edma);
|
||||
fsl_disable_clocks(fsl_edma, DMAMUX_NR);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1015,7 +1015,7 @@ static int fsl_edma_remove(struct platform_device *pdev)
|
||||
fsl_edma_cleanup_vchan(&fsl_edma->dma_dev);
|
||||
of_dma_controller_free(np);
|
||||
dma_async_device_unregister(&fsl_edma->dma_dev);
|
||||
fsl_disable_clocks(fsl_edma);
|
||||
fsl_disable_clocks(fsl_edma, DMAMUX_NR);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -390,7 +390,7 @@ static int ioat_dma_self_test(struct ioatdma_device *ioat_dma)
|
||||
if (memcmp(src, dest, IOAT_TEST_SIZE)) {
|
||||
dev_err(dev, "Self-test copy failed compare, disabling\n");
|
||||
err = -ENODEV;
|
||||
goto free_resources;
|
||||
goto unmap_dma;
|
||||
}
|
||||
|
||||
unmap_dma:
|
||||
|
@ -377,6 +377,7 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
|
||||
u8 *ptr;
|
||||
u8 *rx_buf;
|
||||
u8 sum;
|
||||
u8 rx_byte;
|
||||
int ret = 0, final_ret;
|
||||
|
||||
len = cros_ec_prepare_tx(ec_dev, ec_msg);
|
||||
@ -421,24 +422,21 @@ static int cros_ec_pkt_xfer_spi(struct cros_ec_device *ec_dev,
|
||||
if (!ret) {
|
||||
/* Verify that EC can process command */
|
||||
for (i = 0; i < len; i++) {
|
||||
switch (rx_buf[i]) {
|
||||
case EC_SPI_PAST_END:
|
||||
case EC_SPI_RX_BAD_DATA:
|
||||
case EC_SPI_NOT_READY:
|
||||
ret = -EAGAIN;
|
||||
ec_msg->result = EC_RES_IN_PROGRESS;
|
||||
default:
|
||||
rx_byte = rx_buf[i];
|
||||
if (rx_byte == EC_SPI_PAST_END ||
|
||||
rx_byte == EC_SPI_RX_BAD_DATA ||
|
||||
rx_byte == EC_SPI_NOT_READY) {
|
||||
ret = -EREMOTEIO;
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = cros_ec_spi_receive_packet(ec_dev,
|
||||
ec_msg->insize + sizeof(*response));
|
||||
} else {
|
||||
else
|
||||
dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
|
||||
}
|
||||
|
||||
final_ret = terminate_request(ec_dev);
|
||||
|
||||
@ -508,6 +506,7 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
|
||||
int i, len;
|
||||
u8 *ptr;
|
||||
u8 *rx_buf;
|
||||
u8 rx_byte;
|
||||
int sum;
|
||||
int ret = 0, final_ret;
|
||||
|
||||
@ -544,24 +543,21 @@ static int cros_ec_cmd_xfer_spi(struct cros_ec_device *ec_dev,
|
||||
if (!ret) {
|
||||
/* Verify that EC can process command */
|
||||
for (i = 0; i < len; i++) {
|
||||
switch (rx_buf[i]) {
|
||||
case EC_SPI_PAST_END:
|
||||
case EC_SPI_RX_BAD_DATA:
|
||||
case EC_SPI_NOT_READY:
|
||||
ret = -EAGAIN;
|
||||
ec_msg->result = EC_RES_IN_PROGRESS;
|
||||
default:
|
||||
rx_byte = rx_buf[i];
|
||||
if (rx_byte == EC_SPI_PAST_END ||
|
||||
rx_byte == EC_SPI_RX_BAD_DATA ||
|
||||
rx_byte == EC_SPI_NOT_READY) {
|
||||
ret = -EREMOTEIO;
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ret)
|
||||
ret = cros_ec_spi_receive_response(ec_dev,
|
||||
ec_msg->insize + EC_MSG_TX_PROTO_BYTES);
|
||||
} else {
|
||||
else
|
||||
dev_err(ec_dev->dev, "spi transfer failed: %d\n", ret);
|
||||
}
|
||||
|
||||
final_ret = terminate_request(ec_dev);
|
||||
|
||||
@ -667,6 +663,7 @@ static int cros_ec_spi_probe(struct spi_device *spi)
|
||||
sizeof(struct ec_response_get_protocol_info);
|
||||
ec_dev->dout_size = sizeof(struct ec_host_request);
|
||||
|
||||
ec_spi->last_transfer_ns = ktime_get_ns();
|
||||
|
||||
err = cros_ec_register(ec_dev);
|
||||
if (err) {
|
||||
|
@ -159,13 +159,18 @@ unsigned int twl4030_audio_get_mclk(void)
|
||||
EXPORT_SYMBOL_GPL(twl4030_audio_get_mclk);
|
||||
|
||||
static bool twl4030_audio_has_codec(struct twl4030_audio_data *pdata,
|
||||
struct device_node *node)
|
||||
struct device_node *parent)
|
||||
{
|
||||
struct device_node *node;
|
||||
|
||||
if (pdata && pdata->codec)
|
||||
return true;
|
||||
|
||||
if (of_find_node_by_name(node, "codec"))
|
||||
node = of_get_child_by_name(parent, "codec");
|
||||
if (node) {
|
||||
of_node_put(node);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
@ -97,12 +97,16 @@ static struct reg_sequence twl6040_patch[] = {
|
||||
};
|
||||
|
||||
|
||||
static bool twl6040_has_vibra(struct device_node *node)
|
||||
static bool twl6040_has_vibra(struct device_node *parent)
|
||||
{
|
||||
#ifdef CONFIG_OF
|
||||
if (of_find_node_by_name(node, "vibra"))
|
||||
struct device_node *node;
|
||||
|
||||
node = of_get_child_by_name(parent, "vibra");
|
||||
if (node) {
|
||||
of_node_put(node);
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -32,7 +32,7 @@
|
||||
#include <linux/pci.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/pti.h>
|
||||
#include <linux/intel-pti.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
|
@ -1114,7 +1114,7 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs,
|
||||
if (!ops->oobbuf)
|
||||
ops->ooblen = 0;
|
||||
|
||||
if (offs < 0 || offs + ops->len >= mtd->size)
|
||||
if (offs < 0 || offs + ops->len > mtd->size)
|
||||
return -EINVAL;
|
||||
|
||||
if (ops->ooblen) {
|
||||
|
@ -1763,7 +1763,7 @@ static int brcmnand_read(struct mtd_info *mtd, struct nand_chip *chip,
|
||||
err = brcmstb_nand_verify_erased_page(mtd, chip, buf,
|
||||
addr);
|
||||
/* erased page bitflips corrected */
|
||||
if (err > 0)
|
||||
if (err >= 0)
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -253,9 +253,9 @@ static int gpio_nand_probe(struct platform_device *pdev)
|
||||
goto out_ce;
|
||||
}
|
||||
|
||||
gpiomtd->nwp = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
|
||||
if (IS_ERR(gpiomtd->nwp)) {
|
||||
ret = PTR_ERR(gpiomtd->nwp);
|
||||
gpiomtd->ale = devm_gpiod_get(dev, "ale", GPIOD_OUT_LOW);
|
||||
if (IS_ERR(gpiomtd->ale)) {
|
||||
ret = PTR_ERR(gpiomtd->ale);
|
||||
goto out_ce;
|
||||
}
|
||||
|
||||
|
@ -1067,9 +1067,6 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* handle the block mark swapping */
|
||||
block_mark_swapping(this, payload_virt, auxiliary_virt);
|
||||
|
||||
/* Loop over status bytes, accumulating ECC status. */
|
||||
status = auxiliary_virt + nfc_geo->auxiliary_status_offset;
|
||||
|
||||
@ -1158,6 +1155,9 @@ static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
|
||||
max_bitflips = max_t(unsigned int, max_bitflips, *status);
|
||||
}
|
||||
|
||||
/* handle the block mark swapping */
|
||||
block_mark_swapping(this, buf, auxiliary_virt);
|
||||
|
||||
if (oob_required) {
|
||||
/*
|
||||
* It's time to deliver the OOB bytes. See gpmi_ecc_read_oob()
|
||||
|
@ -159,6 +159,8 @@ struct arc_emac_priv {
|
||||
unsigned int link;
|
||||
unsigned int duplex;
|
||||
unsigned int speed;
|
||||
|
||||
unsigned int rx_missed_errors;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -26,6 +26,8 @@
|
||||
|
||||
#include "emac.h"
|
||||
|
||||
static void arc_emac_restart(struct net_device *ndev);
|
||||
|
||||
/**
|
||||
* arc_emac_tx_avail - Return the number of available slots in the tx ring.
|
||||
* @priv: Pointer to ARC EMAC private data structure.
|
||||
@ -210,39 +212,48 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
|
||||
continue;
|
||||
}
|
||||
|
||||
pktlen = info & LEN_MASK;
|
||||
stats->rx_packets++;
|
||||
stats->rx_bytes += pktlen;
|
||||
skb = rx_buff->skb;
|
||||
skb_put(skb, pktlen);
|
||||
skb->dev = ndev;
|
||||
skb->protocol = eth_type_trans(skb, ndev);
|
||||
|
||||
dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
|
||||
dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
|
||||
|
||||
/* Prepare the BD for next cycle */
|
||||
rx_buff->skb = netdev_alloc_skb_ip_align(ndev,
|
||||
EMAC_BUFFER_SIZE);
|
||||
if (unlikely(!rx_buff->skb)) {
|
||||
/* Prepare the BD for next cycle. netif_receive_skb()
|
||||
* only if new skb was allocated and mapped to avoid holes
|
||||
* in the RX fifo.
|
||||
*/
|
||||
skb = netdev_alloc_skb_ip_align(ndev, EMAC_BUFFER_SIZE);
|
||||
if (unlikely(!skb)) {
|
||||
if (net_ratelimit())
|
||||
netdev_err(ndev, "cannot allocate skb\n");
|
||||
/* Return ownership to EMAC */
|
||||
rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
|
||||
stats->rx_errors++;
|
||||
/* Because receive_skb is below, increment rx_dropped */
|
||||
stats->rx_dropped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* receive_skb only if new skb was allocated to avoid holes */
|
||||
netif_receive_skb(skb);
|
||||
|
||||
addr = dma_map_single(&ndev->dev, (void *)rx_buff->skb->data,
|
||||
addr = dma_map_single(&ndev->dev, (void *)skb->data,
|
||||
EMAC_BUFFER_SIZE, DMA_FROM_DEVICE);
|
||||
if (dma_mapping_error(&ndev->dev, addr)) {
|
||||
if (net_ratelimit())
|
||||
netdev_err(ndev, "cannot dma map\n");
|
||||
dev_kfree_skb(rx_buff->skb);
|
||||
netdev_err(ndev, "cannot map dma buffer\n");
|
||||
dev_kfree_skb(skb);
|
||||
/* Return ownership to EMAC */
|
||||
rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
|
||||
stats->rx_errors++;
|
||||
stats->rx_dropped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* unmap previosly mapped skb */
|
||||
dma_unmap_single(&ndev->dev, dma_unmap_addr(rx_buff, addr),
|
||||
dma_unmap_len(rx_buff, len), DMA_FROM_DEVICE);
|
||||
|
||||
pktlen = info & LEN_MASK;
|
||||
stats->rx_packets++;
|
||||
stats->rx_bytes += pktlen;
|
||||
skb_put(rx_buff->skb, pktlen);
|
||||
rx_buff->skb->dev = ndev;
|
||||
rx_buff->skb->protocol = eth_type_trans(rx_buff->skb, ndev);
|
||||
|
||||
netif_receive_skb(rx_buff->skb);
|
||||
|
||||
rx_buff->skb = skb;
|
||||
dma_unmap_addr_set(rx_buff, addr, addr);
|
||||
dma_unmap_len_set(rx_buff, len, EMAC_BUFFER_SIZE);
|
||||
|
||||
@ -258,6 +269,53 @@ static int arc_emac_rx(struct net_device *ndev, int budget)
|
||||
return work_done;
|
||||
}
|
||||
|
||||
/**
|
||||
* arc_emac_rx_miss_handle - handle R_MISS register
|
||||
* @ndev: Pointer to the net_device structure.
|
||||
*/
|
||||
static void arc_emac_rx_miss_handle(struct net_device *ndev)
|
||||
{
|
||||
struct arc_emac_priv *priv = netdev_priv(ndev);
|
||||
struct net_device_stats *stats = &ndev->stats;
|
||||
unsigned int miss;
|
||||
|
||||
miss = arc_reg_get(priv, R_MISS);
|
||||
if (miss) {
|
||||
stats->rx_errors += miss;
|
||||
stats->rx_missed_errors += miss;
|
||||
priv->rx_missed_errors += miss;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* arc_emac_rx_stall_check - check RX stall
|
||||
* @ndev: Pointer to the net_device structure.
|
||||
* @budget: How many BDs requested to process on 1 call.
|
||||
* @work_done: How many BDs processed
|
||||
*
|
||||
* Under certain conditions EMAC stop reception of incoming packets and
|
||||
* continuously increment R_MISS register instead of saving data into
|
||||
* provided buffer. This function detect that condition and restart
|
||||
* EMAC.
|
||||
*/
|
||||
static void arc_emac_rx_stall_check(struct net_device *ndev,
|
||||
int budget, unsigned int work_done)
|
||||
{
|
||||
struct arc_emac_priv *priv = netdev_priv(ndev);
|
||||
struct arc_emac_bd *rxbd;
|
||||
|
||||
if (work_done)
|
||||
priv->rx_missed_errors = 0;
|
||||
|
||||
if (priv->rx_missed_errors && budget) {
|
||||
rxbd = &priv->rxbd[priv->last_rx_bd];
|
||||
if (le32_to_cpu(rxbd->info) & FOR_EMAC) {
|
||||
arc_emac_restart(ndev);
|
||||
priv->rx_missed_errors = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* arc_emac_poll - NAPI poll handler.
|
||||
* @napi: Pointer to napi_struct structure.
|
||||
@ -272,6 +330,7 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
|
||||
unsigned int work_done;
|
||||
|
||||
arc_emac_tx_clean(ndev);
|
||||
arc_emac_rx_miss_handle(ndev);
|
||||
|
||||
work_done = arc_emac_rx(ndev, budget);
|
||||
if (work_done < budget) {
|
||||
@ -279,6 +338,8 @@ static int arc_emac_poll(struct napi_struct *napi, int budget)
|
||||
arc_reg_or(priv, R_ENABLE, RXINT_MASK | TXINT_MASK);
|
||||
}
|
||||
|
||||
arc_emac_rx_stall_check(ndev, budget, work_done);
|
||||
|
||||
return work_done;
|
||||
}
|
||||
|
||||
@ -320,6 +381,8 @@ static irqreturn_t arc_emac_intr(int irq, void *dev_instance)
|
||||
if (status & MSER_MASK) {
|
||||
stats->rx_missed_errors += 0x100;
|
||||
stats->rx_errors += 0x100;
|
||||
priv->rx_missed_errors += 0x100;
|
||||
napi_schedule(&priv->napi);
|
||||
}
|
||||
|
||||
if (status & RXCR_MASK) {
|
||||
@ -732,6 +795,63 @@ static int arc_emac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* arc_emac_restart - Restart EMAC
|
||||
* @ndev: Pointer to net_device structure.
|
||||
*
|
||||
* This function do hardware reset of EMAC in order to restore
|
||||
* network packets reception.
|
||||
*/
|
||||
static void arc_emac_restart(struct net_device *ndev)
|
||||
{
|
||||
struct arc_emac_priv *priv = netdev_priv(ndev);
|
||||
struct net_device_stats *stats = &ndev->stats;
|
||||
int i;
|
||||
|
||||
if (net_ratelimit())
|
||||
netdev_warn(ndev, "restarting stalled EMAC\n");
|
||||
|
||||
netif_stop_queue(ndev);
|
||||
|
||||
/* Disable interrupts */
|
||||
arc_reg_clr(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
|
||||
|
||||
/* Disable EMAC */
|
||||
arc_reg_clr(priv, R_CTRL, EN_MASK);
|
||||
|
||||
/* Return the sk_buff to system */
|
||||
arc_free_tx_queue(ndev);
|
||||
|
||||
/* Clean Tx BD's */
|
||||
priv->txbd_curr = 0;
|
||||
priv->txbd_dirty = 0;
|
||||
memset(priv->txbd, 0, TX_RING_SZ);
|
||||
|
||||
for (i = 0; i < RX_BD_NUM; i++) {
|
||||
struct arc_emac_bd *rxbd = &priv->rxbd[i];
|
||||
unsigned int info = le32_to_cpu(rxbd->info);
|
||||
|
||||
if (!(info & FOR_EMAC)) {
|
||||
stats->rx_errors++;
|
||||
stats->rx_dropped++;
|
||||
}
|
||||
/* Return ownership to EMAC */
|
||||
rxbd->info = cpu_to_le32(FOR_EMAC | EMAC_BUFFER_SIZE);
|
||||
}
|
||||
priv->last_rx_bd = 0;
|
||||
|
||||
/* Make sure info is visible to EMAC before enable */
|
||||
wmb();
|
||||
|
||||
/* Enable interrupts */
|
||||
arc_reg_set(priv, R_ENABLE, RXINT_MASK | TXINT_MASK | ERR_MASK);
|
||||
|
||||
/* Enable EMAC */
|
||||
arc_reg_or(priv, R_CTRL, EN_MASK);
|
||||
|
||||
netif_start_queue(ndev);
|
||||
}
|
||||
|
||||
static const struct net_device_ops arc_emac_netdev_ops = {
|
||||
.ndo_open = arc_emac_open,
|
||||
.ndo_stop = arc_emac_stop,
|
||||
|
@ -14225,7 +14225,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
|
||||
/* Reset PHY, otherwise the read DMA engine will be in a mode that
|
||||
* breaks all requests to 256 bytes.
|
||||
*/
|
||||
if (tg3_asic_rev(tp) == ASIC_REV_57766)
|
||||
if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
|
||||
tg3_asic_rev(tp) == ASIC_REV_5717 ||
|
||||
tg3_asic_rev(tp) == ASIC_REV_5719)
|
||||
reset_phy = true;
|
||||
|
||||
err = tg3_restart_hw(tp, reset_phy);
|
||||
|
@ -1214,6 +1214,10 @@ static void mvneta_port_disable(struct mvneta_port *pp)
|
||||
val &= ~MVNETA_GMAC0_PORT_ENABLE;
|
||||
mvreg_write(pp, MVNETA_GMAC_CTRL_0, val);
|
||||
|
||||
pp->link = 0;
|
||||
pp->duplex = -1;
|
||||
pp->speed = 0;
|
||||
|
||||
udelay(200);
|
||||
}
|
||||
|
||||
@ -1958,9 +1962,9 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
|
||||
|
||||
if (!mvneta_rxq_desc_is_first_last(rx_status) ||
|
||||
(rx_status & MVNETA_RXD_ERR_SUMMARY)) {
|
||||
mvneta_rx_error(pp, rx_desc);
|
||||
err_drop_frame:
|
||||
dev->stats.rx_errors++;
|
||||
mvneta_rx_error(pp, rx_desc);
|
||||
/* leave the descriptor untouched */
|
||||
continue;
|
||||
}
|
||||
@ -3011,7 +3015,7 @@ static void mvneta_cleanup_rxqs(struct mvneta_port *pp)
|
||||
{
|
||||
int queue;
|
||||
|
||||
for (queue = 0; queue < txq_number; queue++)
|
||||
for (queue = 0; queue < rxq_number; queue++)
|
||||
mvneta_rxq_deinit(pp, &pp->rxqs[queue]);
|
||||
}
|
||||
|
||||
|
@ -1961,11 +1961,12 @@ static int mtk_hw_init(struct mtk_eth *eth)
|
||||
/* set GE2 TUNE */
|
||||
regmap_write(eth->pctl, GPIO_BIAS_CTRL, 0x0);
|
||||
|
||||
/* GE1, Force 1000M/FD, FC ON */
|
||||
mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(0));
|
||||
|
||||
/* GE2, Force 1000M/FD, FC ON */
|
||||
mtk_w32(eth, MAC_MCR_FIXED_LINK, MTK_MAC_MCR(1));
|
||||
/* Set linkdown as the default for each GMAC. Its own MCR would be set
|
||||
* up with the more appropriate value when mtk_phy_link_adjust call is
|
||||
* being invoked.
|
||||
*/
|
||||
for (i = 0; i < MTK_MAC_COUNT; i++)
|
||||
mtk_w32(eth, 0, MTK_MAC_MCR(i));
|
||||
|
||||
/* Indicates CDM to parse the MTK special tag from CPU
|
||||
* which also is working out for untag packets.
|
||||
|
@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
|
||||
case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
|
||||
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
|
||||
case MLX5_CMD_OP_QUERY_Q_COUNTER:
|
||||
case MLX5_CMD_OP_SET_RATE_LIMIT:
|
||||
case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
|
||||
case MLX5_CMD_OP_QUERY_RATE_LIMIT:
|
||||
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
|
||||
case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
|
||||
@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
|
||||
MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
|
||||
MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
|
||||
MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
|
||||
MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
|
||||
MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
|
||||
MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
|
||||
MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
|
||||
MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
|
||||
|
@ -82,6 +82,9 @@
|
||||
max_t(u32, MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(mdev), req)
|
||||
#define MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 6)
|
||||
#define MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) MLX5_MPWRQ_LOG_STRIDE_SZ(mdev, 8)
|
||||
#define MLX5E_MPWQE_STRIDE_SZ(mdev, cqe_cmprs) \
|
||||
(cqe_cmprs ? MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) : \
|
||||
MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev))
|
||||
|
||||
#define MLX5_MPWRQ_LOG_WQE_SZ 18
|
||||
#define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \
|
||||
@ -590,6 +593,7 @@ struct mlx5e_channel {
|
||||
struct mlx5_core_dev *mdev;
|
||||
struct hwtstamp_config *tstamp;
|
||||
int ix;
|
||||
int cpu;
|
||||
};
|
||||
|
||||
struct mlx5e_channels {
|
||||
@ -935,8 +939,9 @@ void mlx5e_set_tx_cq_mode_params(struct mlx5e_params *params,
|
||||
u8 cq_period_mode);
|
||||
void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params,
|
||||
u8 cq_period_mode);
|
||||
void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
|
||||
struct mlx5e_params *params, u8 rq_type);
|
||||
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
|
||||
struct mlx5e_params *params,
|
||||
u8 rq_type);
|
||||
|
||||
static inline bool mlx5e_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev)
|
||||
{
|
||||
|
@ -274,6 +274,7 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
|
||||
static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
|
||||
struct ieee_ets *ets)
|
||||
{
|
||||
bool have_ets_tc = false;
|
||||
int bw_sum = 0;
|
||||
int i;
|
||||
|
||||
@ -288,11 +289,14 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
|
||||
}
|
||||
|
||||
/* Validate Bandwidth Sum */
|
||||
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
|
||||
if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
|
||||
for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
|
||||
if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
|
||||
have_ets_tc = true;
|
||||
bw_sum += ets->tc_tx_bw[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (bw_sum != 0 && bw_sum != 100) {
|
||||
if (have_ets_tc && bw_sum != 100) {
|
||||
netdev_err(netdev,
|
||||
"Failed to validate ETS: BW sum is illegal\n");
|
||||
return -EINVAL;
|
||||
|
@ -1523,8 +1523,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
|
||||
new_channels.params = priv->channels.params;
|
||||
MLX5E_SET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_COMPRESS, new_val);
|
||||
|
||||
mlx5e_set_rq_type_params(priv->mdev, &new_channels.params,
|
||||
new_channels.params.rq_wq_type);
|
||||
new_channels.params.mpwqe_log_stride_sz =
|
||||
MLX5E_MPWQE_STRIDE_SZ(priv->mdev, new_val);
|
||||
new_channels.params.mpwqe_log_num_strides =
|
||||
MLX5_MPWRQ_LOG_WQE_SZ - new_channels.params.mpwqe_log_stride_sz;
|
||||
|
||||
if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
|
||||
priv->channels.params = new_channels.params;
|
||||
@ -1536,6 +1538,10 @@ int mlx5e_modify_rx_cqe_compression_locked(struct mlx5e_priv *priv, bool new_val
|
||||
return err;
|
||||
|
||||
mlx5e_switch_priv_channels(priv, &new_channels, NULL);
|
||||
mlx5e_dbg(DRV, priv, "MLX5E: RxCqeCmprss was turned %s\n",
|
||||
MLX5E_GET_PFLAG(&priv->channels.params,
|
||||
MLX5E_PFLAG_RX_CQE_COMPRESS) ? "ON" : "OFF");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -71,11 +71,6 @@ struct mlx5e_channel_param {
|
||||
struct mlx5e_cq_param icosq_cq;
|
||||
};
|
||||
|
||||
static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
|
||||
{
|
||||
return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
|
||||
}
|
||||
|
||||
static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
|
||||
{
|
||||
return MLX5_CAP_GEN(mdev, striding_rq) &&
|
||||
@ -83,7 +78,7 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
|
||||
MLX5_CAP_ETH(mdev, reg_umr_sq);
|
||||
}
|
||||
|
||||
void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
|
||||
void mlx5e_init_rq_type_params(struct mlx5_core_dev *mdev,
|
||||
struct mlx5e_params *params, u8 rq_type)
|
||||
{
|
||||
params->rq_wq_type = rq_type;
|
||||
@ -93,10 +88,8 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
|
||||
params->log_rq_size = is_kdump_kernel() ?
|
||||
MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW :
|
||||
MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
|
||||
params->mpwqe_log_stride_sz =
|
||||
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
|
||||
MLX5_MPWRQ_CQE_CMPRS_LOG_STRIDE_SZ(mdev) :
|
||||
MLX5_MPWRQ_DEF_LOG_STRIDE_SZ(mdev);
|
||||
params->mpwqe_log_stride_sz = MLX5E_MPWQE_STRIDE_SZ(mdev,
|
||||
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
|
||||
params->mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
|
||||
params->mpwqe_log_stride_sz;
|
||||
break;
|
||||
@ -120,13 +113,14 @@ void mlx5e_set_rq_type_params(struct mlx5_core_dev *mdev,
|
||||
MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS));
|
||||
}
|
||||
|
||||
static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
|
||||
static void mlx5e_set_rq_params(struct mlx5_core_dev *mdev,
|
||||
struct mlx5e_params *params)
|
||||
{
|
||||
u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) &&
|
||||
!params->xdp_prog && !MLX5_IPSEC_DEV(mdev) ?
|
||||
MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ :
|
||||
MLX5_WQ_TYPE_LINKED_LIST;
|
||||
mlx5e_set_rq_type_params(mdev, params, rq_type);
|
||||
mlx5e_init_rq_type_params(mdev, params, rq_type);
|
||||
}
|
||||
|
||||
static void mlx5e_update_carrier(struct mlx5e_priv *priv)
|
||||
@ -444,17 +438,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
|
||||
int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
|
||||
int mtt_sz = mlx5e_get_wqe_mtt_sz();
|
||||
int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
|
||||
int node = mlx5e_get_node(c->priv, c->ix);
|
||||
int i;
|
||||
|
||||
rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
|
||||
GFP_KERNEL, node);
|
||||
GFP_KERNEL, cpu_to_node(c->cpu));
|
||||
if (!rq->mpwqe.info)
|
||||
goto err_out;
|
||||
|
||||
/* We allocate more than mtt_sz as we will align the pointer */
|
||||
rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
|
||||
GFP_KERNEL, node);
|
||||
rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
|
||||
cpu_to_node(c->cpu));
|
||||
if (unlikely(!rq->mpwqe.mtt_no_align))
|
||||
goto err_free_wqe_info;
|
||||
|
||||
@ -562,7 +555,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
|
||||
int err;
|
||||
int i;
|
||||
|
||||
rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
rqp->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
|
||||
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
|
||||
&rq->wq_ctrl);
|
||||
@ -629,8 +622,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
|
||||
default: /* MLX5_WQ_TYPE_LINKED_LIST */
|
||||
rq->wqe.frag_info =
|
||||
kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
|
||||
GFP_KERNEL,
|
||||
mlx5e_get_node(c->priv, c->ix));
|
||||
GFP_KERNEL, cpu_to_node(c->cpu));
|
||||
if (!rq->wqe.frag_info) {
|
||||
err = -ENOMEM;
|
||||
goto err_rq_wq_destroy;
|
||||
@ -1000,13 +992,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
|
||||
sq->uar_map = mdev->mlx5e_res.bfreg.map;
|
||||
sq->min_inline_mode = params->tx_min_inline_mode;
|
||||
|
||||
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
|
||||
if (err)
|
||||
return err;
|
||||
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
|
||||
|
||||
err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
|
||||
err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
|
||||
if (err)
|
||||
goto err_sq_wq_destroy;
|
||||
|
||||
@ -1053,13 +1045,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
|
||||
sq->channel = c;
|
||||
sq->uar_map = mdev->mlx5e_res.bfreg.map;
|
||||
|
||||
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
|
||||
if (err)
|
||||
return err;
|
||||
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
|
||||
|
||||
err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
|
||||
err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
|
||||
if (err)
|
||||
goto err_sq_wq_destroy;
|
||||
|
||||
@ -1126,13 +1118,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
|
||||
if (MLX5_IPSEC_DEV(c->priv->mdev))
|
||||
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
|
||||
|
||||
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
|
||||
if (err)
|
||||
return err;
|
||||
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
|
||||
|
||||
err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
|
||||
err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
|
||||
if (err)
|
||||
goto err_sq_wq_destroy;
|
||||
|
||||
@ -1504,8 +1496,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
|
||||
struct mlx5_core_dev *mdev = c->priv->mdev;
|
||||
int err;
|
||||
|
||||
param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.buf_numa_node = cpu_to_node(c->cpu);
|
||||
param->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
param->eq_ix = c->ix;
|
||||
|
||||
err = mlx5e_alloc_cq_common(mdev, param, cq);
|
||||
@ -1604,6 +1596,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
|
||||
mlx5e_free_cq(cq);
|
||||
}
|
||||
|
||||
static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
|
||||
{
|
||||
return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
|
||||
}
|
||||
|
||||
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
|
||||
struct mlx5e_params *params,
|
||||
struct mlx5e_channel_param *cparam)
|
||||
@ -1752,12 +1749,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
|
||||
{
|
||||
struct mlx5e_cq_moder icocq_moder = {0, 0};
|
||||
struct net_device *netdev = priv->netdev;
|
||||
int cpu = mlx5e_get_cpu(priv, ix);
|
||||
struct mlx5e_channel *c;
|
||||
unsigned int irq;
|
||||
int err;
|
||||
int eqn;
|
||||
|
||||
c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
|
||||
c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (!c)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1765,6 +1763,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
|
||||
c->mdev = priv->mdev;
|
||||
c->tstamp = &priv->tstamp;
|
||||
c->ix = ix;
|
||||
c->cpu = cpu;
|
||||
c->pdev = &priv->mdev->pdev->dev;
|
||||
c->netdev = priv->netdev;
|
||||
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
|
||||
@ -1853,8 +1852,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
|
||||
for (tc = 0; tc < c->num_tc; tc++)
|
||||
mlx5e_activate_txqsq(&c->sq[tc]);
|
||||
mlx5e_activate_rq(&c->rq);
|
||||
netif_set_xps_queue(c->netdev,
|
||||
mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
|
||||
netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
|
||||
}
|
||||
|
||||
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
|
||||
@ -3679,6 +3677,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
|
||||
struct sk_buff *skb,
|
||||
netdev_features_t features)
|
||||
{
|
||||
unsigned int offset = 0;
|
||||
struct udphdr *udph;
|
||||
u8 proto;
|
||||
u16 port;
|
||||
@ -3688,7 +3687,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
|
||||
proto = ip_hdr(skb)->protocol;
|
||||
break;
|
||||
case htons(ETH_P_IPV6):
|
||||
proto = ipv6_hdr(skb)->nexthdr;
|
||||
proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
|
@ -466,7 +466,7 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
|
||||
break;
|
||||
case MLX5_EVENT_TYPE_CQ_ERROR:
|
||||
cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
|
||||
mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrom 0x%x\n",
|
||||
mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
|
||||
cqn, eqe->data.cq_err.syndrome);
|
||||
mlx5_cq_event(dev, cqn, eqe->type);
|
||||
break;
|
||||
@ -775,7 +775,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
|
||||
return err;
|
||||
}
|
||||
|
||||
int mlx5_stop_eqs(struct mlx5_core_dev *dev)
|
||||
void mlx5_stop_eqs(struct mlx5_core_dev *dev)
|
||||
{
|
||||
struct mlx5_eq_table *table = &dev->priv.eq_table;
|
||||
int err;
|
||||
@ -784,22 +784,26 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev)
|
||||
if (MLX5_CAP_GEN(dev, pg)) {
|
||||
err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
|
||||
if (err)
|
||||
return err;
|
||||
mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
|
||||
err);
|
||||
}
|
||||
#endif
|
||||
|
||||
err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
|
||||
if (err)
|
||||
return err;
|
||||
mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
|
||||
err);
|
||||
|
||||
mlx5_destroy_unmap_eq(dev, &table->async_eq);
|
||||
err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
|
||||
if (err)
|
||||
mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
|
||||
err);
|
||||
mlx5_cmd_use_polling(dev);
|
||||
|
||||
err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
|
||||
if (err)
|
||||
mlx5_cmd_use_events(dev);
|
||||
|
||||
return err;
|
||||
mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
|
||||
err);
|
||||
}
|
||||
|
||||
int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
|
||||
|
@ -66,6 +66,9 @@ static int mlx5_fpga_mem_read_i2c(struct mlx5_fpga_device *fdev, size_t size,
|
||||
u8 actual_size;
|
||||
int err;
|
||||
|
||||
if (!size)
|
||||
return -EINVAL;
|
||||
|
||||
if (!fdev->mdev)
|
||||
return -ENOTCONN;
|
||||
|
||||
@ -95,6 +98,9 @@ static int mlx5_fpga_mem_write_i2c(struct mlx5_fpga_device *fdev, size_t size,
|
||||
u8 actual_size;
|
||||
int err;
|
||||
|
||||
if (!size)
|
||||
return -EINVAL;
|
||||
|
||||
if (!fdev->mdev)
|
||||
return -ENOTCONN;
|
||||
|
||||
|
@ -174,6 +174,8 @@ static void del_hw_fte(struct fs_node *node);
|
||||
static void del_sw_flow_table(struct fs_node *node);
|
||||
static void del_sw_flow_group(struct fs_node *node);
|
||||
static void del_sw_fte(struct fs_node *node);
|
||||
static void del_sw_prio(struct fs_node *node);
|
||||
static void del_sw_ns(struct fs_node *node);
|
||||
/* Delete rule (destination) is special case that
|
||||
* requires to lock the FTE for all the deletion process.
|
||||
*/
|
||||
@ -408,6 +410,16 @@ static inline struct mlx5_core_dev *get_dev(struct fs_node *node)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void del_sw_ns(struct fs_node *node)
|
||||
{
|
||||
kfree(node);
|
||||
}
|
||||
|
||||
static void del_sw_prio(struct fs_node *node)
|
||||
{
|
||||
kfree(node);
|
||||
}
|
||||
|
||||
static void del_hw_flow_table(struct fs_node *node)
|
||||
{
|
||||
struct mlx5_flow_table *ft;
|
||||
@ -2064,7 +2076,7 @@ static struct fs_prio *fs_create_prio(struct mlx5_flow_namespace *ns,
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
fs_prio->node.type = FS_TYPE_PRIO;
|
||||
tree_init_node(&fs_prio->node, NULL, NULL);
|
||||
tree_init_node(&fs_prio->node, NULL, del_sw_prio);
|
||||
tree_add_node(&fs_prio->node, &ns->node);
|
||||
fs_prio->num_levels = num_levels;
|
||||
fs_prio->prio = prio;
|
||||
@ -2090,7 +2102,7 @@ static struct mlx5_flow_namespace *fs_create_namespace(struct fs_prio *prio)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
fs_init_namespace(ns);
|
||||
tree_init_node(&ns->node, NULL, NULL);
|
||||
tree_init_node(&ns->node, NULL, del_sw_ns);
|
||||
tree_add_node(&ns->node, &prio->node);
|
||||
list_add_tail(&ns->node.list, &prio->node.children);
|
||||
|
||||
|
@ -241,7 +241,7 @@ static void print_health_info(struct mlx5_core_dev *dev)
|
||||
u32 fw;
|
||||
int i;
|
||||
|
||||
/* If the syndrom is 0, the device is OK and no need to print buffer */
|
||||
/* If the syndrome is 0, the device is OK and no need to print buffer */
|
||||
if (!ioread8(&h->synd))
|
||||
return;
|
||||
|
||||
|
@ -57,7 +57,7 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
|
||||
struct mlx5e_params *params)
|
||||
{
|
||||
/* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
|
||||
mlx5e_set_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
|
||||
mlx5e_init_rq_type_params(mdev, params, MLX5_WQ_TYPE_LINKED_LIST);
|
||||
|
||||
/* RQ size in ipoib by default is 512 */
|
||||
params->log_rq_size = is_kdump_kernel() ?
|
||||
|
@ -317,9 +317,6 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
|
||||
{
|
||||
struct mlx5_priv *priv = &dev->priv;
|
||||
struct mlx5_eq_table *table = &priv->eq_table;
|
||||
struct irq_affinity irqdesc = {
|
||||
.pre_vectors = MLX5_EQ_VEC_COMP_BASE,
|
||||
};
|
||||
int num_eqs = 1 << MLX5_CAP_GEN(dev, log_max_eq);
|
||||
int nvec;
|
||||
|
||||
@ -333,10 +330,9 @@ static int mlx5_alloc_irq_vectors(struct mlx5_core_dev *dev)
|
||||
if (!priv->irq_info)
|
||||
goto err_free_msix;
|
||||
|
||||
nvec = pci_alloc_irq_vectors_affinity(dev->pdev,
|
||||
nvec = pci_alloc_irq_vectors(dev->pdev,
|
||||
MLX5_EQ_VEC_COMP_BASE + 1, nvec,
|
||||
PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
|
||||
&irqdesc);
|
||||
PCI_IRQ_MSIX);
|
||||
if (nvec < 0)
|
||||
return nvec;
|
||||
|
||||
@ -622,6 +618,63 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev)
|
||||
return (u64)timer_l | (u64)timer_h1 << 32;
|
||||
}
|
||||
|
||||
static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
|
||||
{
|
||||
struct mlx5_priv *priv = &mdev->priv;
|
||||
int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
|
||||
|
||||
if (!zalloc_cpumask_var(&priv->irq_info[i].mask, GFP_KERNEL)) {
|
||||
mlx5_core_warn(mdev, "zalloc_cpumask_var failed");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
cpumask_set_cpu(cpumask_local_spread(i, priv->numa_node),
|
||||
priv->irq_info[i].mask);
|
||||
|
||||
if (IS_ENABLED(CONFIG_SMP) &&
|
||||
irq_set_affinity_hint(irq, priv->irq_info[i].mask))
|
||||
mlx5_core_warn(mdev, "irq_set_affinity_hint failed, irq 0x%.4x", irq);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void mlx5_irq_clear_affinity_hint(struct mlx5_core_dev *mdev, int i)
|
||||
{
|
||||
struct mlx5_priv *priv = &mdev->priv;
|
||||
int irq = pci_irq_vector(mdev->pdev, MLX5_EQ_VEC_COMP_BASE + i);
|
||||
|
||||
irq_set_affinity_hint(irq, NULL);
|
||||
free_cpumask_var(priv->irq_info[i].mask);
|
||||
}
|
||||
|
||||
static int mlx5_irq_set_affinity_hints(struct mlx5_core_dev *mdev)
|
||||
{
|
||||
int err;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++) {
|
||||
err = mlx5_irq_set_affinity_hint(mdev, i);
|
||||
if (err)
|
||||
goto err_out;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_out:
|
||||
for (i--; i >= 0; i--)
|
||||
mlx5_irq_clear_affinity_hint(mdev, i);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void mlx5_irq_clear_affinity_hints(struct mlx5_core_dev *mdev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < mdev->priv.eq_table.num_comp_vectors; i++)
|
||||
mlx5_irq_clear_affinity_hint(mdev, i);
|
||||
}
|
||||
|
||||
int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn,
|
||||
unsigned int *irqn)
|
||||
{
|
||||
@ -1097,6 +1150,12 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
|
||||
goto err_stop_eqs;
|
||||
}
|
||||
|
||||
err = mlx5_irq_set_affinity_hints(dev);
|
||||
if (err) {
|
||||
dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n");
|
||||
goto err_affinity_hints;
|
||||
}
|
||||
|
||||
err = mlx5_init_fs(dev);
|
||||
if (err) {
|
||||
dev_err(&pdev->dev, "Failed to init flow steering\n");
|
||||
@ -1154,6 +1213,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
|
||||
mlx5_cleanup_fs(dev);
|
||||
|
||||
err_fs:
|
||||
mlx5_irq_clear_affinity_hints(dev);
|
||||
|
||||
err_affinity_hints:
|
||||
free_comp_eqs(dev);
|
||||
|
||||
err_stop_eqs:
|
||||
@ -1222,6 +1284,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
|
||||
|
||||
mlx5_sriov_detach(dev);
|
||||
mlx5_cleanup_fs(dev);
|
||||
mlx5_irq_clear_affinity_hints(dev);
|
||||
free_comp_eqs(dev);
|
||||
mlx5_stop_eqs(dev);
|
||||
mlx5_put_uars_page(dev, priv->uar);
|
||||
|
@ -213,8 +213,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
|
||||
err_cmd:
|
||||
memset(din, 0, sizeof(din));
|
||||
memset(dout, 0, sizeof(dout));
|
||||
MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
|
||||
MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
|
||||
MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
|
||||
MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
|
||||
mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
|
||||
return err;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user