linux_dsm_epyc7002/arch/mips/mm/c-r3k.c
Ralf Baechle 585fa72493 [MIPS] Retire flush_icache_page from mm use.
On the 34K the redundant cache operations were causing excessive stalls
resulting in realtime code running on the second VPE missing its deadline.
For all other platforms this patch is just a significant performance
improvment as illustrated by below benchmark numbers.

Processor, Processes - times in microseconds - smaller is better
------------------------------------------------------------------------------
Host                 OS  Mhz null null      open slct sig  sig  fork exec sh
                             call  I/O stat clos TCP  inst hndl proc proc proc
--------- ------------- ---- ---- ---- ---- ---- ---- ---- ---- ---- ---- ----
25Kf      2.6.18-rc4     533 0.49 1.16 7.57 33.4 30.5 1.34 12.4 5497 17.K 54.K
25Kf      2.6.18-rc4-p   533 0.49 1.16 6.68 23.0 30.7 1.36 8.55 5030 16.K 48.K
4Kc       2.6.18-rc4      80 4.21 15.0 131. 289. 261. 16.5 258. 18.K 70.K 227K
4Kc       2.6.18-rc4-p    80 4.34 13.1 128. 285. 262. 18.2 258. 12.K 52.K 176K
34Kc      2.6.18-rc4      40 5.01 14.0 61.6 90.0 477. 17.9 94.7 29.K 108K 342K
34Kc      2.6.18-rc4-p    40 4.98 13.9 61.2 89.7 475. 17.6 93.7 8758 44.K 158K
BCM1480   2.6.18-rc4     700 0.28 0.60 3.68 5.92 16.0 0.78 5.08 931. 3163 15.K
BCM1480   2.6.18-rc4-p   700 0.28 0.61 3.65 5.85 16.0 0.79 5.20 395. 1464 8385
TX49-16K  2.6.18-rc3     197 0.73 2.41 19.0 37.8 82.9 2.94 17.5 4438 14.K 56.K
TX49-16K  2.6.18-rc3-p   197 0.73 2.40 19.9 36.3 82.9 2.94 23.4 2577 9103 38.K
TX49-32K  2.6.18-rc3     396 0.36 1.19 6.80 11.8 41.0 1.46 8.17 2738 8465 32.K
TX49-32K  2.6.18-rc3-p   396 0.36 1.19 6.82 10.2 41.0 1.46 8.18 1330 4638 18.K
    
Original patch by me with enhancements by Atsushi Nemoto.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
2006-09-27 13:37:34 +01:00

357 lines
8.0 KiB
C

/*
* r2300.c: R2000 and R3000 specific mmu/cache code.
*
* Copyright (C) 1996 David S. Miller (dm@engr.sgi.com)
*
* with a lot of changes to make this thing work for R3000s
* Tx39XX R4k style caches added. HK
* Copyright (C) 1998, 1999, 2000 Harald Koerfgen
* Copyright (C) 1998 Gleb Raiko & Vladimir Roganov
* Copyright (C) 2001, 2004 Maciej W. Rozycki
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/mmu_context.h>
#include <asm/system.h>
#include <asm/isadep.h>
#include <asm/io.h>
#include <asm/bootinfo.h>
#include <asm/cpu.h>
static unsigned long icache_size, dcache_size; /* Size in bytes */
static unsigned long icache_lsize, dcache_lsize; /* Size in bytes */
#undef DEBUG_CACHE
unsigned long __init r3k_cache_size(unsigned long ca_flags)
{
unsigned long flags, status, dummy, size;
volatile unsigned long *p;
p = (volatile unsigned long *) KSEG0;
flags = read_c0_status();
/* isolate cache space */
write_c0_status((ca_flags|flags)&~ST0_IEC);
*p = 0xa5a55a5a;
dummy = *p;
status = read_c0_status();
if (dummy != 0xa5a55a5a || (status & ST0_CM)) {
size = 0;
} else {
for (size = 128; size <= 0x40000; size <<= 1)
*(p + size) = 0;
*p = -1;
for (size = 128;
(size <= 0x40000) && (*(p + size) == 0);
size <<= 1)
;
if (size > 0x40000)
size = 0;
}
write_c0_status(flags);
return size * sizeof(*p);
}
unsigned long __init r3k_cache_lsize(unsigned long ca_flags)
{
unsigned long flags, status, lsize, i;
volatile unsigned long *p;
p = (volatile unsigned long *) KSEG0;
flags = read_c0_status();
/* isolate cache space */
write_c0_status((ca_flags|flags)&~ST0_IEC);
for (i = 0; i < 128; i++)
*(p + i) = 0;
*(volatile unsigned char *)p = 0;
for (lsize = 1; lsize < 128; lsize <<= 1) {
*(p + lsize);
status = read_c0_status();
if (!(status & ST0_CM))
break;
}
for (i = 0; i < 128; i += lsize)
*(volatile unsigned char *)(p + i) = 0;
write_c0_status(flags);
return lsize * sizeof(*p);
}
static void __init r3k_probe_cache(void)
{
dcache_size = r3k_cache_size(ST0_ISC);
if (dcache_size)
dcache_lsize = r3k_cache_lsize(ST0_ISC);
icache_size = r3k_cache_size(ST0_ISC|ST0_SWC);
if (icache_size)
icache_lsize = r3k_cache_lsize(ST0_ISC|ST0_SWC);
}
static void r3k_flush_icache_range(unsigned long start, unsigned long end)
{
unsigned long size, i, flags;
volatile unsigned char *p;
size = end - start;
if (size > icache_size || KSEGX(start) != KSEG0) {
start = KSEG0;
size = icache_size;
}
p = (char *)start;
flags = read_c0_status();
/* isolate cache space */
write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC);
for (i = 0; i < size; i += 0x080) {
asm ( "sb\t$0, 0x000(%0)\n\t"
"sb\t$0, 0x004(%0)\n\t"
"sb\t$0, 0x008(%0)\n\t"
"sb\t$0, 0x00c(%0)\n\t"
"sb\t$0, 0x010(%0)\n\t"
"sb\t$0, 0x014(%0)\n\t"
"sb\t$0, 0x018(%0)\n\t"
"sb\t$0, 0x01c(%0)\n\t"
"sb\t$0, 0x020(%0)\n\t"
"sb\t$0, 0x024(%0)\n\t"
"sb\t$0, 0x028(%0)\n\t"
"sb\t$0, 0x02c(%0)\n\t"
"sb\t$0, 0x030(%0)\n\t"
"sb\t$0, 0x034(%0)\n\t"
"sb\t$0, 0x038(%0)\n\t"
"sb\t$0, 0x03c(%0)\n\t"
"sb\t$0, 0x040(%0)\n\t"
"sb\t$0, 0x044(%0)\n\t"
"sb\t$0, 0x048(%0)\n\t"
"sb\t$0, 0x04c(%0)\n\t"
"sb\t$0, 0x050(%0)\n\t"
"sb\t$0, 0x054(%0)\n\t"
"sb\t$0, 0x058(%0)\n\t"
"sb\t$0, 0x05c(%0)\n\t"
"sb\t$0, 0x060(%0)\n\t"
"sb\t$0, 0x064(%0)\n\t"
"sb\t$0, 0x068(%0)\n\t"
"sb\t$0, 0x06c(%0)\n\t"
"sb\t$0, 0x070(%0)\n\t"
"sb\t$0, 0x074(%0)\n\t"
"sb\t$0, 0x078(%0)\n\t"
"sb\t$0, 0x07c(%0)\n\t"
: : "r" (p) );
p += 0x080;
}
write_c0_status(flags);
}
static void r3k_flush_dcache_range(unsigned long start, unsigned long end)
{
unsigned long size, i, flags;
volatile unsigned char *p;
size = end - start;
if (size > dcache_size || KSEGX(start) != KSEG0) {
start = KSEG0;
size = dcache_size;
}
p = (char *)start;
flags = read_c0_status();
/* isolate cache space */
write_c0_status((ST0_ISC|flags)&~ST0_IEC);
for (i = 0; i < size; i += 0x080) {
asm ( "sb\t$0, 0x000(%0)\n\t"
"sb\t$0, 0x004(%0)\n\t"
"sb\t$0, 0x008(%0)\n\t"
"sb\t$0, 0x00c(%0)\n\t"
"sb\t$0, 0x010(%0)\n\t"
"sb\t$0, 0x014(%0)\n\t"
"sb\t$0, 0x018(%0)\n\t"
"sb\t$0, 0x01c(%0)\n\t"
"sb\t$0, 0x020(%0)\n\t"
"sb\t$0, 0x024(%0)\n\t"
"sb\t$0, 0x028(%0)\n\t"
"sb\t$0, 0x02c(%0)\n\t"
"sb\t$0, 0x030(%0)\n\t"
"sb\t$0, 0x034(%0)\n\t"
"sb\t$0, 0x038(%0)\n\t"
"sb\t$0, 0x03c(%0)\n\t"
"sb\t$0, 0x040(%0)\n\t"
"sb\t$0, 0x044(%0)\n\t"
"sb\t$0, 0x048(%0)\n\t"
"sb\t$0, 0x04c(%0)\n\t"
"sb\t$0, 0x050(%0)\n\t"
"sb\t$0, 0x054(%0)\n\t"
"sb\t$0, 0x058(%0)\n\t"
"sb\t$0, 0x05c(%0)\n\t"
"sb\t$0, 0x060(%0)\n\t"
"sb\t$0, 0x064(%0)\n\t"
"sb\t$0, 0x068(%0)\n\t"
"sb\t$0, 0x06c(%0)\n\t"
"sb\t$0, 0x070(%0)\n\t"
"sb\t$0, 0x074(%0)\n\t"
"sb\t$0, 0x078(%0)\n\t"
"sb\t$0, 0x07c(%0)\n\t"
: : "r" (p) );
p += 0x080;
}
write_c0_status(flags);
}
static inline unsigned long get_phys_page (unsigned long addr,
struct mm_struct *mm)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
unsigned long physpage;
pgd = pgd_offset(mm, addr);
pud = pud_offset(pgd, addr);
pmd = pmd_offset(pud, addr);
pte = pte_offset(pmd, addr);
if ((physpage = pte_val(*pte)) & _PAGE_VALID)
return KSEG0ADDR(physpage & PAGE_MASK);
return 0;
}
static inline void r3k_flush_cache_all(void)
{
}
static inline void r3k___flush_cache_all(void)
{
r3k_flush_dcache_range(KSEG0, KSEG0 + dcache_size);
r3k_flush_icache_range(KSEG0, KSEG0 + icache_size);
}
static void r3k_flush_cache_mm(struct mm_struct *mm)
{
}
static void r3k_flush_cache_range(struct vm_area_struct *vma,
unsigned long start, unsigned long end)
{
}
static void r3k_flush_cache_page(struct vm_area_struct *vma, unsigned long page, unsigned long pfn)
{
}
static void local_r3k_flush_data_cache_page(unsigned long addr)
{
}
static void r3k_flush_data_cache_page(unsigned long addr)
{
}
static void r3k_flush_icache_page(struct vm_area_struct *vma, struct page *page)
{
struct mm_struct *mm = vma->vm_mm;
unsigned long physpage;
if (cpu_context(smp_processor_id(), mm) == 0)
return;
if (!(vma->vm_flags & VM_EXEC))
return;
#ifdef DEBUG_CACHE
printk("cpage[%d,%08lx]", cpu_context(smp_processor_id(), mm), page);
#endif
physpage = (unsigned long) page_address(page);
if (physpage)
r3k_flush_icache_range(physpage, physpage + PAGE_SIZE);
}
static void r3k_flush_cache_sigtramp(unsigned long addr)
{
unsigned long flags;
#ifdef DEBUG_CACHE
printk("csigtramp[%08lx]", addr);
#endif
flags = read_c0_status();
write_c0_status(flags&~ST0_IEC);
/* Fill the TLB to avoid an exception with caches isolated. */
asm ( "lw\t$0, 0x000(%0)\n\t"
"lw\t$0, 0x004(%0)\n\t"
: : "r" (addr) );
write_c0_status((ST0_ISC|ST0_SWC|flags)&~ST0_IEC);
asm ( "sb\t$0, 0x000(%0)\n\t"
"sb\t$0, 0x004(%0)\n\t"
: : "r" (addr) );
write_c0_status(flags);
}
static void r3k_dma_cache_wback_inv(unsigned long start, unsigned long size)
{
/* Catch bad driver code */
BUG_ON(size == 0);
iob();
r3k_flush_dcache_range(start, start + size);
}
void __init r3k_cache_init(void)
{
extern void build_clear_page(void);
extern void build_copy_page(void);
r3k_probe_cache();
flush_cache_all = r3k_flush_cache_all;
__flush_cache_all = r3k___flush_cache_all;
flush_cache_mm = r3k_flush_cache_mm;
flush_cache_range = r3k_flush_cache_range;
flush_cache_page = r3k_flush_cache_page;
__flush_icache_page = r3k_flush_icache_page;
flush_icache_range = r3k_flush_icache_range;
flush_cache_sigtramp = r3k_flush_cache_sigtramp;
local_flush_data_cache_page = local_r3k_flush_data_cache_page;
flush_data_cache_page = r3k_flush_data_cache_page;
_dma_cache_wback_inv = r3k_dma_cache_wback_inv;
_dma_cache_wback = r3k_dma_cache_wback_inv;
_dma_cache_inv = r3k_dma_cache_wback_inv;
printk("Primary instruction cache %ldkB, linesize %ld bytes.\n",
icache_size >> 10, icache_lsize);
printk("Primary data cache %ldkB, linesize %ld bytes.\n",
dcache_size >> 10, dcache_lsize);
build_clear_page();
build_copy_page();
}