sh: sh4_flush_cache_mm() optimizations.

The i-cache flush in the case of VM_EXEC was added way back when as a
sanity measure, and in practice we only care about evicting aliases from
the d-cache. As a result, it's possible to drop the i-cache flush
completely here.

After careful profiling it's also come up that all of the work associated
with hunting down aliases and doing ranged flushing ends up generating
more overhead than simply blasting away the entire dcache, particularly
if there are many mm's that need to be iterated over. As a result of
that, just move back to flush_dcache_all() in these cases, which restores
the old behaviour, and vastly simplifies the path.

Additionally, on platforms without aliases at all, this can simply be
nopped out. Presently we have the alias check in the SH-4 specific
version, but this is true for all of the platforms, so move the check up
to a generic location. This cuts down quite a bit on superfluous cacheop
IPIs.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
This commit is contained in:
Paul Mundt 2009-09-09 14:04:06 +09:00
parent 682f88ab74
commit 654d364e26
2 changed files with 10 additions and 120 deletions

View File

@ -170,89 +170,13 @@ static void sh4_flush_cache_all(void *unused)
flush_icache_all(); flush_icache_all();
} }
static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
unsigned long end)
{
unsigned long d = 0, p = start & PAGE_MASK;
unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
unsigned long select_bit;
unsigned long all_aliases_mask;
unsigned long addr_offset;
pgd_t *dir;
pmd_t *pmd;
pud_t *pud;
pte_t *pte;
int i;
dir = pgd_offset(mm, p);
pud = pud_offset(dir, p);
pmd = pmd_offset(pud, p);
end = PAGE_ALIGN(end);
all_aliases_mask = (1 << n_aliases) - 1;
do {
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
p &= PMD_MASK;
p += PMD_SIZE;
pmd++;
continue;
}
pte = pte_offset_kernel(pmd, p);
do {
unsigned long phys;
pte_t entry = *pte;
if (!(pte_val(entry) & _PAGE_PRESENT)) {
pte++;
p += PAGE_SIZE;
continue;
}
phys = pte_val(entry) & PTE_PHYS_MASK;
if ((p ^ phys) & alias_mask) {
d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
if (d == all_aliases_mask)
goto loop_exit;
}
pte++;
p += PAGE_SIZE;
} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
pmd++;
} while (p < end);
loop_exit:
addr_offset = 0;
select_bit = 1;
for (i = 0; i < n_aliases; i++) {
if (d & select_bit) {
(*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
wmb();
}
select_bit <<= 1;
addr_offset += PAGE_SIZE;
}
}
/* /*
* Note : (RPC) since the caches are physically tagged, the only point * Note : (RPC) since the caches are physically tagged, the only point
* of flush_cache_mm for SH-4 is to get rid of aliases from the * of flush_cache_mm for SH-4 is to get rid of aliases from the
* D-cache. The assumption elsewhere, e.g. flush_cache_range, is that * D-cache. The assumption elsewhere, e.g. flush_cache_range, is that
* lines can stay resident so long as the virtual address they were * lines can stay resident so long as the virtual address they were
* accessed with (hence cache set) is in accord with the physical * accessed with (hence cache set) is in accord with the physical
* address (i.e. tag). It's no different here. So I reckon we don't * address (i.e. tag). It's no different here.
* need to flush the I-cache, since aliases don't matter for that. We
* should try that.
* *
* Caller takes mm->mmap_sem. * Caller takes mm->mmap_sem.
*/ */
@ -263,33 +187,7 @@ static void sh4_flush_cache_mm(void *arg)
if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT) if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
return; return;
/* flush_dcache_all();
* If cache is only 4k-per-way, there are never any 'aliases'. Since
* the cache is physically tagged, the data can just be left in there.
*/
if (boot_cpu_data.dcache.n_aliases == 0)
return;
/*
* Don't bother groveling around the dcache for the VMA ranges
* if there are too many PTEs to make it worthwhile.
*/
if (mm->nr_ptes >= MAX_DCACHE_PAGES)
flush_dcache_all();
else {
struct vm_area_struct *vma;
/*
* In this case there are reasonably sized ranges to flush,
* iterate through the VMA list and take care of any aliases.
*/
for (vma = mm->mmap; vma; vma = vma->vm_next)
__flush_cache_mm(mm, vma->vm_start, vma->vm_end);
}
/* Only touch the icache if one of the VMAs has VM_EXEC set. */
if (mm->exec_vm)
flush_icache_all();
} }
/* /*
@ -372,24 +270,10 @@ static void sh4_flush_cache_range(void *args)
if (boot_cpu_data.dcache.n_aliases == 0) if (boot_cpu_data.dcache.n_aliases == 0)
return; return;
/* flush_dcache_all();
* Don't bother with the lookup and alias check if we have a
* wide range to cover, just blow away the dcache in its
* entirety instead. -- PFM.
*/
if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
flush_dcache_all();
else
__flush_cache_mm(vma->vm_mm, start, end);
if (vma->vm_flags & VM_EXEC) { if (vma->vm_flags & VM_EXEC)
/*
* TODO: Is this required??? Need to look at how I-cache
* coherency is assured when new programs are loaded to see if
* this matters.
*/
flush_icache_all(); flush_icache_all();
}
} }
/** /**

View File

@ -164,11 +164,17 @@ void flush_cache_all(void)
void flush_cache_mm(struct mm_struct *mm) void flush_cache_mm(struct mm_struct *mm)
{ {
if (boot_cpu_data.dcache.n_aliases == 0)
return;
cacheop_on_each_cpu(local_flush_cache_mm, mm, 1); cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
} }
void flush_cache_dup_mm(struct mm_struct *mm) void flush_cache_dup_mm(struct mm_struct *mm)
{ {
if (boot_cpu_data.dcache.n_aliases == 0)
return;
cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1); cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
} }