mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
05ac653054
Under certain loads, this soft lockup has been observed: BUG: soft lockup - CPU#2 stuck for 22s! [ip6tables:1016] Modules linked in: ip6t_rpfilter ip6t_REJECT cfg80211 rfkill xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw vfat fat efivarfs xfs libcrc32c CPU: 2 PID: 1016 Comm: ip6tables Not tainted 3.13.0-0.rc7.30.sa2.aarch64 #1 task: fffffe03e81d1400 ti: fffffe03f01f8000 task.ti: fffffe03f01f8000 PC is at __cpu_flush_kern_tlb_range+0xc/0x40 LR is at __purge_vmap_area_lazy+0x28c/0x3ac pc : [<fffffe000009c5cc>] lr : [<fffffe0000182710>] pstate: 80000145 sp : fffffe03f01fbb70 x29: fffffe03f01fbb70 x28: fffffe03f01f8000 x27: fffffe0000b19000 x26: 00000000000000d0 x25: 000000000000001c x24: fffffe03f01fbc50 x23: fffffe03f01fbc58 x22: fffffe03f01fbc10 x21: fffffe0000b2a3f8 x20: 0000000000000802 x19: fffffe0000b2a3c8 x18: 000003fffdf52710 x17: 000003ff9d8bb910 x16: fffffe000050fbfc x15: 0000000000005735 x14: 000003ff9d7e1a5c x13: 0000000000000000 x12: 000003ff9d7e1a5c x11: 0000000000000007 x10: fffffe0000c09af0 x9 : fffffe0000ad1000 x8 : 000000000000005c x7 : fffffe03e8624000 x6 : 0000000000000000 x5 : 0000000000000000 x4 : 0000000000000000 x3 : fffffe0000c09cc8 x2 : 0000000000000000 x1 : 000fffffdfffca80 x0 : 000fffffcd742150 The __cpu_flush_kern_tlb_range() function looks like: ENTRY(__cpu_flush_kern_tlb_range) dsb sy lsr x0, x0, #12 lsr x1, x1, #12 1: tlbi vaae1is, x0 add x0, x0, #1 cmp x0, x1 b.lo 1b dsb sy isb ret ENDPROC(__cpu_flush_kern_tlb_range) The above soft lockup shows the PC at tlbi insn with: x0 = 0x000fffffcd742150 x1 = 0x000fffffdfffca80 So __cpu_flush_kern_tlb_range has 0x128ba930 tlbi flushes left after it has already been looping for 23 seconds!. Looking up one frame at __purge_vmap_area_lazy(), there is: ... list_for_each_entry_rcu(va, &vmap_area_list, list) { if (va->flags & VM_LAZY_FREE) { if (va->va_start < *start) *start = va->va_start; if (va->va_end > *end) *end = va->va_end; nr += (va->va_end - va->va_start) >> PAGE_SHIFT; list_add_tail(&va->purge_list, &valist); va->flags |= VM_LAZY_FREEING; va->flags &= ~VM_LAZY_FREE; } } ... if (nr || force_flush) flush_tlb_kernel_range(*start, *end); So if two areas are being freed, the range passed to flush_tlb_kernel_range() may be as large as the vmalloc space. For arm64, this is ~240GB for 4k pagesize and ~2TB for 64kpage size. This patch works around this problem by adding a loop limit. If the range is larger than the limit, use flush_tlb_all() rather than flushing based on individual pages. The limit chosen is arbitrary as the TLB size is implementation specific and not accessible in an architected way. The aim of the arbitrary limit is to avoid soft lockup. Signed-off-by: Mark Salter <msalter@redhat.com> [catalin.marinas@arm.com: commit log update] [catalin.marinas@arm.com: marginal optimisation] [catalin.marinas@arm.com: changed to MAX_TLB_RANGE and added comment] Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
169 lines
4.4 KiB
C
169 lines
4.4 KiB
C
/*
|
|
* Based on arch/arm/include/asm/tlbflush.h
|
|
*
|
|
* Copyright (C) 1999-2003 Russell King
|
|
* Copyright (C) 2012 ARM Ltd.
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#ifndef __ASM_TLBFLUSH_H
|
|
#define __ASM_TLBFLUSH_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/sched.h>
|
|
#include <asm/cputype.h>
|
|
|
|
extern void __cpu_flush_user_tlb_range(unsigned long, unsigned long, struct vm_area_struct *);
|
|
extern void __cpu_flush_kern_tlb_range(unsigned long, unsigned long);
|
|
|
|
extern struct cpu_tlb_fns cpu_tlb;
|
|
|
|
/*
|
|
* TLB Management
|
|
* ==============
|
|
*
|
|
* The arch/arm64/mm/tlb.S files implement these methods.
|
|
*
|
|
* The TLB specific code is expected to perform whatever tests it needs
|
|
* to determine if it should invalidate the TLB for each call. Start
|
|
* addresses are inclusive and end addresses are exclusive; it is safe to
|
|
* round these addresses down.
|
|
*
|
|
* flush_tlb_all()
|
|
*
|
|
* Invalidate the entire TLB.
|
|
*
|
|
* flush_tlb_mm(mm)
|
|
*
|
|
* Invalidate all TLB entries in a particular address space.
|
|
* - mm - mm_struct describing address space
|
|
*
|
|
* flush_tlb_range(mm,start,end)
|
|
*
|
|
* Invalidate a range of TLB entries in the specified address
|
|
* space.
|
|
* - mm - mm_struct describing address space
|
|
* - start - start address (may not be aligned)
|
|
* - end - end address (exclusive, may not be aligned)
|
|
*
|
|
* flush_tlb_page(vaddr,vma)
|
|
*
|
|
* Invalidate the specified page in the specified address range.
|
|
* - vaddr - virtual address (may not be aligned)
|
|
* - vma - vma_struct describing address range
|
|
*
|
|
* flush_kern_tlb_page(kaddr)
|
|
*
|
|
* Invalidate the TLB entry for the specified page. The address
|
|
* will be in the kernels virtual memory space. Current uses
|
|
* only require the D-TLB to be invalidated.
|
|
* - kaddr - Kernel virtual memory address
|
|
*/
|
|
static inline void flush_tlb_all(void)
|
|
{
|
|
dsb(ishst);
|
|
asm("tlbi vmalle1is");
|
|
dsb(ish);
|
|
isb();
|
|
}
|
|
|
|
static inline void flush_tlb_mm(struct mm_struct *mm)
|
|
{
|
|
unsigned long asid = (unsigned long)ASID(mm) << 48;
|
|
|
|
dsb(ishst);
|
|
asm("tlbi aside1is, %0" : : "r" (asid));
|
|
dsb(ish);
|
|
}
|
|
|
|
static inline void flush_tlb_page(struct vm_area_struct *vma,
|
|
unsigned long uaddr)
|
|
{
|
|
unsigned long addr = uaddr >> 12 |
|
|
((unsigned long)ASID(vma->vm_mm) << 48);
|
|
|
|
dsb(ishst);
|
|
asm("tlbi vae1is, %0" : : "r" (addr));
|
|
dsb(ish);
|
|
}
|
|
|
|
static inline void __flush_tlb_range(struct vm_area_struct *vma,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48;
|
|
unsigned long addr;
|
|
start = asid | (start >> 12);
|
|
end = asid | (end >> 12);
|
|
|
|
dsb(ishst);
|
|
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
|
|
asm("tlbi vae1is, %0" : : "r"(addr));
|
|
dsb(ish);
|
|
}
|
|
|
|
static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
|
{
|
|
unsigned long addr;
|
|
start >>= 12;
|
|
end >>= 12;
|
|
|
|
dsb(ishst);
|
|
for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12))
|
|
asm("tlbi vaae1is, %0" : : "r"(addr));
|
|
dsb(ish);
|
|
isb();
|
|
}
|
|
|
|
/*
|
|
* This is meant to avoid soft lock-ups on large TLB flushing ranges and not
|
|
* necessarily a performance improvement.
|
|
*/
|
|
#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT)
|
|
|
|
static inline void flush_tlb_range(struct vm_area_struct *vma,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
if ((end - start) <= MAX_TLB_RANGE)
|
|
__flush_tlb_range(vma, start, end);
|
|
else
|
|
flush_tlb_mm(vma->vm_mm);
|
|
}
|
|
|
|
static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
|
|
{
|
|
if ((end - start) <= MAX_TLB_RANGE)
|
|
__flush_tlb_kernel_range(start, end);
|
|
else
|
|
flush_tlb_all();
|
|
}
|
|
|
|
/*
|
|
* On AArch64, the cache coherency is handled via the set_pte_at() function.
|
|
*/
|
|
static inline void update_mmu_cache(struct vm_area_struct *vma,
|
|
unsigned long addr, pte_t *ptep)
|
|
{
|
|
/*
|
|
* set_pte() does not have a DSB for user mappings, so make sure that
|
|
* the page table write is visible.
|
|
*/
|
|
dsb(ishst);
|
|
}
|
|
|
|
#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
|
|
|
|
#endif
|
|
|
|
#endif
|