mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-03 15:16:45 +07:00
585fa72493
On the 34K the redundant cache operations were causing excessive stalls resulting in realtime code running on the second VPE missing its deadline. For all other platforms this patch is just a significant performance improvment as illustrated by below benchmark numbers. Processor, Processes - times in microseconds - smaller is better ------------------------------------------------------------------------------ Host OS Mhz null null open slct sig sig fork exec sh call I/O stat clos TCP inst hndl proc proc proc --------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- 25Kf 2.6.18-rc4 533 0.49 1.16 7.57 33.4 30.5 1.34 12.4 5497 17.K 54.K 25Kf 2.6.18-rc4-p 533 0.49 1.16 6.68 23.0 30.7 1.36 8.55 5030 16.K 48.K 4Kc 2.6.18-rc4 80 4.21 15.0 131. 289. 261. 16.5 258. 18.K 70.K 227K 4Kc 2.6.18-rc4-p 80 4.34 13.1 128. 285. 262. 18.2 258. 12.K 52.K 176K 34Kc 2.6.18-rc4 40 5.01 14.0 61.6 90.0 477. 17.9 94.7 29.K 108K 342K 34Kc 2.6.18-rc4-p 40 4.98 13.9 61.2 89.7 475. 17.6 93.7 8758 44.K 158K BCM1480 2.6.18-rc4 700 0.28 0.60 3.68 5.92 16.0 0.78 5.08 931. 3163 15.K BCM1480 2.6.18-rc4-p 700 0.28 0.61 3.65 5.85 16.0 0.79 5.20 395. 1464 8385 TX49-16K 2.6.18-rc3 197 0.73 2.41 19.0 37.8 82.9 2.94 17.5 4438 14.K 56.K TX49-16K 2.6.18-rc3-p 197 0.73 2.40 19.9 36.3 82.9 2.94 23.4 2577 9103 38.K TX49-32K 2.6.18-rc3 396 0.36 1.19 6.80 11.8 41.0 1.46 8.17 2738 8465 32.K TX49-32K 2.6.18-rc3-p 396 0.36 1.19 6.82 10.2 41.0 1.46 8.18 1330 4638 18.K Original patch by me with enhancements by Atsushi Nemoto. Signed-off-by: Ralf Baechle <ralf@linux-mips.org> Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
357 lines
8.0 KiB
C
357 lines
8.0 KiB
C
/*
|
|
* r2300.c: R2000 and R3000 specific mmu/cache code.
|
|
*
|
|
* Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
|
|
*
|
|
* with a lot of changes to make this thing work for R3000s
|
|
* Tx39XX R4k style caches added. HK
|
|
* Copyright (C) 1998, 1999, 2000 Harald Koerfgen
|
|
* Copyright (C) 1998 Gleb Raiko & Vladimir Roganov
|
|
* Copyright (C) 2001, 2004 Maciej W. Rozycki
|
|
*/
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/mm.h>
|
|
|
|
#include <asm/page.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/system.h>
|
|
#include <asm/isadep.h>
|
|
#include <asm/io.h>
|
|
#include <asm/bootinfo.h>
|
|
#include <asm/cpu.h>
|
|
|
|
static unsigned long icache_size, dcache_size; /* Size in bytes */
|
|
static unsigned long icache_lsize, dcache_lsize; /* Size in bytes */
|
|
|
|
#undef DEBUG_CACHE
|
|
|
|
unsigned long __init r3k_cache_size(unsigned long ca_flags)
|
|
{
|
|
unsigned long flags, status, dummy, size;
|
|
volatile unsigned long *p;
|
|
|
|
p = (volatile unsigned long *) KSEG0;
|
|
|
|
flags = read_c0_status();
|
|
|
|
/* isolate cache space */
|
|
write_c0_status((ca_flags|flags)&~ST0_IEC);
|
|
|
|
*p = 0xa5a55a5a;
|
|
dummy = *p;
|
|
status = read_c0_status();
|
|
|
|
if (dummy != 0xa5a55a5a || (status & ST0_CM)) {
|
|
size = 0;
|
|
} else {
|
|
for (size = 128; size <= 0x40000; size <<= 1)
|
|
*(p + size) = 0;
|
|
*p = -1;
|
|
for (size = 128;
|
|
(size <= 0x40000) && (*(p + size) == 0);
|
|
size <<= 1)
|
|
;
|
|
if (size > 0x40000)
|
|
size = 0;
|
|
}
|
|
|
|
write_c0_status(flags);
|
|
|
|
return size * sizeof(*p);
|
|
}
|
|
|
|
unsigned long __init r3k_cache_lsize(unsigned long ca_flags)
|
|
{
|
|
unsigned long flags, status, lsize, i;
|
|
volatile unsigned long *p;
|
|
|
|
p = (volatile unsigned long *) KSEG0;
|
|
|
|
flags = read_c0_status();
|
|
|
|
/* isolate cache space */
|
|
write_c0_status((ca_flags|flags)&~ST0_IEC);
|
|
|
|
for (i = 0; i < 128; i++)
|
|
*(p + i) = 0;
|
|
*(volatile unsigned char *)p = 0;
|
|
for (lsize = 1; lsize < 128; lsize <<= 1) {
|
|
*(p + lsize);
|
|
status = read_c0_status();
|
|
if (!(status & ST0_CM))
|
|
break;
|
|
}
|
|
for (i = 0; i < 128; i += lsize)
|
|
*(volatile unsigned char *)(p + i) = 0;
|
|
|
|
write_c0_status(flags);
|
|
|
|
return lsize * sizeof(*p);
|
|
}
|
|
|
|
static void __init r3k_probe_cache(void)
|
|
{
|
|
dcache_size = r3k_cache_size(ST0_ISC);
|
|
if (dcache_size)
|
|
dcache_lsize = r3k_cache_lsize(ST0_ISC);
|
|
|
|
icache_size = r3k_cache_size(ST0_ISC|ST0_SWC);
|
|
if (icache_size)
|
|
icache_lsize = r3k_cache_lsize(ST0_ISC|ST0_SWC);
|
|
}
|
|
|
|
static void r3k_flush_icache_range(unsigned long start, unsigned long end)
|
|
{
|
|
unsigned long size, i, flags;
|
|
volatile unsigned char *p;
|
|
|
|
size = end - start;
|
|
if (size > icache_size || KSEGX(start) != KSEG0) {
|
|
start = KSEG0;
|
|
size = icache_size;
|
|
}
|
|
p = (char *)start;
|
|
|
|
flags = read_c0_status();
|
|
|
|
/* isolate cache space */
|
|
write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC);
|
|
|
|
for (i = 0; i < size; i += 0x080) {
|
|
asm ( "sb\t$0, 0x000(%0)\n\t"
|
|
"sb\t$0, 0x004(%0)\n\t"
|
|
"sb\t$0, 0x008(%0)\n\t"
|
|
"sb\t$0, 0x00c(%0)\n\t"
|
|
"sb\t$0, 0x010(%0)\n\t"
|
|
"sb\t$0, 0x014(%0)\n\t"
|
|
"sb\t$0, 0x018(%0)\n\t"
|
|
"sb\t$0, 0x01c(%0)\n\t"
|
|
"sb\t$0, 0x020(%0)\n\t"
|
|
"sb\t$0, 0x024(%0)\n\t"
|
|
"sb\t$0, 0x028(%0)\n\t"
|
|
"sb\t$0, 0x02c(%0)\n\t"
|
|
"sb\t$0, 0x030(%0)\n\t"
|
|
"sb\t$0, 0x034(%0)\n\t"
|
|
"sb\t$0, 0x038(%0)\n\t"
|
|
"sb\t$0, 0x03c(%0)\n\t"
|
|
"sb\t$0, 0x040(%0)\n\t"
|
|
"sb\t$0, 0x044(%0)\n\t"
|
|
"sb\t$0, 0x048(%0)\n\t"
|
|
"sb\t$0, 0x04c(%0)\n\t"
|
|
"sb\t$0, 0x050(%0)\n\t"
|
|
"sb\t$0, 0x054(%0)\n\t"
|
|
"sb\t$0, 0x058(%0)\n\t"
|
|
"sb\t$0, 0x05c(%0)\n\t"
|
|
"sb\t$0, 0x060(%0)\n\t"
|
|
"sb\t$0, 0x064(%0)\n\t"
|
|
"sb\t$0, 0x068(%0)\n\t"
|
|
"sb\t$0, 0x06c(%0)\n\t"
|
|
"sb\t$0, 0x070(%0)\n\t"
|
|
"sb\t$0, 0x074(%0)\n\t"
|
|
"sb\t$0, 0x078(%0)\n\t"
|
|
"sb\t$0, 0x07c(%0)\n\t"
|
|
: : "r" (p) );
|
|
p += 0x080;
|
|
}
|
|
|
|
write_c0_status(flags);
|
|
}
|
|
|
|
static void r3k_flush_dcache_range(unsigned long start, unsigned long end)
|
|
{
|
|
unsigned long size, i, flags;
|
|
volatile unsigned char *p;
|
|
|
|
size = end - start;
|
|
if (size > dcache_size || KSEGX(start) != KSEG0) {
|
|
start = KSEG0;
|
|
size = dcache_size;
|
|
}
|
|
p = (char *)start;
|
|
|
|
flags = read_c0_status();
|
|
|
|
/* isolate cache space */
|
|
write_c0_status((ST0_ISC|flags)&~ST0_IEC);
|
|
|
|
for (i = 0; i < size; i += 0x080) {
|
|
asm ( "sb\t$0, 0x000(%0)\n\t"
|
|
"sb\t$0, 0x004(%0)\n\t"
|
|
"sb\t$0, 0x008(%0)\n\t"
|
|
"sb\t$0, 0x00c(%0)\n\t"
|
|
"sb\t$0, 0x010(%0)\n\t"
|
|
"sb\t$0, 0x014(%0)\n\t"
|
|
"sb\t$0, 0x018(%0)\n\t"
|
|
"sb\t$0, 0x01c(%0)\n\t"
|
|
"sb\t$0, 0x020(%0)\n\t"
|
|
"sb\t$0, 0x024(%0)\n\t"
|
|
"sb\t$0, 0x028(%0)\n\t"
|
|
"sb\t$0, 0x02c(%0)\n\t"
|
|
"sb\t$0, 0x030(%0)\n\t"
|
|
"sb\t$0, 0x034(%0)\n\t"
|
|
"sb\t$0, 0x038(%0)\n\t"
|
|
"sb\t$0, 0x03c(%0)\n\t"
|
|
"sb\t$0, 0x040(%0)\n\t"
|
|
"sb\t$0, 0x044(%0)\n\t"
|
|
"sb\t$0, 0x048(%0)\n\t"
|
|
"sb\t$0, 0x04c(%0)\n\t"
|
|
"sb\t$0, 0x050(%0)\n\t"
|
|
"sb\t$0, 0x054(%0)\n\t"
|
|
"sb\t$0, 0x058(%0)\n\t"
|
|
"sb\t$0, 0x05c(%0)\n\t"
|
|
"sb\t$0, 0x060(%0)\n\t"
|
|
"sb\t$0, 0x064(%0)\n\t"
|
|
"sb\t$0, 0x068(%0)\n\t"
|
|
"sb\t$0, 0x06c(%0)\n\t"
|
|
"sb\t$0, 0x070(%0)\n\t"
|
|
"sb\t$0, 0x074(%0)\n\t"
|
|
"sb\t$0, 0x078(%0)\n\t"
|
|
"sb\t$0, 0x07c(%0)\n\t"
|
|
: : "r" (p) );
|
|
p += 0x080;
|
|
}
|
|
|
|
write_c0_status(flags);
|
|
}
|
|
|
|
static inline unsigned long get_phys_page (unsigned long addr,
|
|
struct mm_struct *mm)
|
|
{
|
|
pgd_t *pgd;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
unsigned long physpage;
|
|
|
|
pgd = pgd_offset(mm, addr);
|
|
pud = pud_offset(pgd, addr);
|
|
pmd = pmd_offset(pud, addr);
|
|
pte = pte_offset(pmd, addr);
|
|
|
|
if ((physpage = pte_val(*pte)) & _PAGE_VALID)
|
|
return KSEG0ADDR(physpage & PAGE_MASK);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static inline void r3k_flush_cache_all(void)
|
|
{
|
|
}
|
|
|
|
static inline void r3k___flush_cache_all(void)
|
|
{
|
|
r3k_flush_dcache_range(KSEG0, KSEG0 + dcache_size);
|
|
r3k_flush_icache_range(KSEG0, KSEG0 + icache_size);
|
|
}
|
|
|
|
static void r3k_flush_cache_mm(struct mm_struct *mm)
|
|
{
|
|
}
|
|
|
|
static void r3k_flush_cache_range(struct vm_area_struct *vma,
|
|
unsigned long start, unsigned long end)
|
|
{
|
|
}
|
|
|
|
static void r3k_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn)
|
|
{
|
|
}
|
|
|
|
static void local_r3k_flush_data_cache_page(unsigned long addr)
|
|
{
|
|
}
|
|
|
|
static void r3k_flush_data_cache_page(unsigned long addr)
|
|
{
|
|
}
|
|
|
|
static void r3k_flush_icache_page(struct vm_area_struct *vma, struct page *page)
|
|
{
|
|
struct mm_struct *mm = vma->vm_mm;
|
|
unsigned long physpage;
|
|
|
|
if (cpu_context(smp_processor_id(), mm) == 0)
|
|
return;
|
|
|
|
if (!(vma->vm_flags & VM_EXEC))
|
|
return;
|
|
|
|
#ifdef DEBUG_CACHE
|
|
printk("cpage[%d,%08lx]", cpu_context(smp_processor_id(), mm), page);
|
|
#endif
|
|
|
|
physpage = (unsigned long) page_address(page);
|
|
if (physpage)
|
|
r3k_flush_icache_range(physpage, physpage + PAGE_SIZE);
|
|
}
|
|
|
|
static void r3k_flush_cache_sigtramp(unsigned long addr)
|
|
{
|
|
unsigned long flags;
|
|
|
|
#ifdef DEBUG_CACHE
|
|
printk("csigtramp[%08lx]", addr);
|
|
#endif
|
|
|
|
flags = read_c0_status();
|
|
|
|
write_c0_status(flags&~ST0_IEC);
|
|
|
|
/* Fill the TLB to avoid an exception with caches isolated. */
|
|
asm ( "lw\t$0, 0x000(%0)\n\t"
|
|
"lw\t$0, 0x004(%0)\n\t"
|
|
: : "r" (addr) );
|
|
|
|
write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC);
|
|
|
|
asm ( "sb\t$0, 0x000(%0)\n\t"
|
|
"sb\t$0, 0x004(%0)\n\t"
|
|
: : "r" (addr) );
|
|
|
|
write_c0_status(flags);
|
|
}
|
|
|
|
static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size)
|
|
{
|
|
/* Catch bad driver code */
|
|
BUG_ON(size == 0);
|
|
|
|
iob();
|
|
r3k_flush_dcache_range(start, start + size);
|
|
}
|
|
|
|
void __init r3k_cache_init(void)
|
|
{
|
|
extern void build_clear_page(void);
|
|
extern void build_copy_page(void);
|
|
|
|
r3k_probe_cache();
|
|
|
|
flush_cache_all = r3k_flush_cache_all;
|
|
__flush_cache_all = r3k___flush_cache_all;
|
|
flush_cache_mm = r3k_flush_cache_mm;
|
|
flush_cache_range = r3k_flush_cache_range;
|
|
flush_cache_page = r3k_flush_cache_page;
|
|
__flush_icache_page = r3k_flush_icache_page;
|
|
flush_icache_range = r3k_flush_icache_range;
|
|
|
|
flush_cache_sigtramp = r3k_flush_cache_sigtramp;
|
|
local_flush_data_cache_page = local_r3k_flush_data_cache_page;
|
|
flush_data_cache_page = r3k_flush_data_cache_page;
|
|
|
|
_dma_cache_wback_inv = r3k_dma_cache_wback_inv;
|
|
_dma_cache_wback = r3k_dma_cache_wback_inv;
|
|
_dma_cache_inv = r3k_dma_cache_wback_inv;
|
|
|
|
printk("Primary instruction cache %ldkB, linesize %ld bytes.\n",
|
|
icache_size >> 10, icache_lsize);
|
|
printk("Primary data cache %ldkB, linesize %ld bytes.\n",
|
|
dcache_size >> 10, dcache_lsize);
|
|
|
|
build_clear_page();
|
|
build_copy_page();
|
|
}
|