sh: sh4_flush_cache_mm() optimizations.

The i-cache flush in the case of VM_EXEC was added way back when as a sanity measure, and in practice we only care about evicting aliases from the d-cache. As a result, it's possible to drop the i-cache flush completely here. After careful profiling it's also come up that all of the work associated with hunting down aliases and doing ranged flushing ends up generating more overhead than simply blasting away the entire dcache, particularly if there are many mm's that need to be iterated over. As a result of that, just move back to flush_dcache_all() in these cases, which restores the old behaviour, and vastly simplifies the path. Additionally, on platforms without aliases at all, this can simply be nopped out. Presently we have the alias check in the SH-4 specific version, but this is true for all of the platforms, so move the check up to a generic location. This cuts down quite a bit on superfluous cacheop IPIs. Signed-off-by: Paul Mundt <lethal@linux-sh.org>
2024-12-03 04:36:51 +07:00 · 2009-09-09 14:04:06 +09:00 · 2009-09-09 14:04:06 +09:00 · 654d364e26
commit 654d364e26
parent 682f88ab74
2 changed files with 10 additions and 120 deletions
--- a/arch/sh/mm/cache-sh4.c
+++ b/arch/sh/mm/cache-sh4.c
@ -170,89 +170,13 @@ static void sh4_flush_cache_all(void *unused)
 	flush_icache_all();
 }
 static void __flush_cache_mm(struct mm_struct *mm, unsigned long start,
 			     unsigned long end)
 {
 	unsigned long d = 0, p = start & PAGE_MASK;
 	unsigned long alias_mask = boot_cpu_data.dcache.alias_mask;
 	unsigned long n_aliases = boot_cpu_data.dcache.n_aliases;
 	unsigned long select_bit;
 	unsigned long all_aliases_mask;
 	unsigned long addr_offset;
 	pgd_t *dir;
 	pmd_t *pmd;
 	pud_t *pud;
 	pte_t *pte;
 	int i;
 	dir = pgd_offset(mm, p);
 	pud = pud_offset(dir, p);
 	pmd = pmd_offset(pud, p);
 	end = PAGE_ALIGN(end);
 	all_aliases_mask = (1 << n_aliases) - 1;
 	do {
 		if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) {
 			p &= PMD_MASK;
 			p += PMD_SIZE;
 			pmd++;
 			continue;
 		}
 		pte = pte_offset_kernel(pmd, p);
 		do {
 			unsigned long phys;
 			pte_t entry = *pte;
 			if (!(pte_val(entry) & _PAGE_PRESENT)) {
 				pte++;
 				p += PAGE_SIZE;
 				continue;
 			}
 			phys = pte_val(entry) & PTE_PHYS_MASK;
 			if ((p ^ phys) & alias_mask) {
 				d |= 1 << ((p & alias_mask) >> PAGE_SHIFT);
 				d |= 1 << ((phys & alias_mask) >> PAGE_SHIFT);
 				if (d == all_aliases_mask)
 					goto loop_exit;
 			}
 			pte++;
 			p += PAGE_SIZE;
 		} while (p < end && ((unsigned long)pte & ~PAGE_MASK));
 		pmd++;
 	} while (p < end);
 loop_exit:
 	addr_offset = 0;
 	select_bit = 1;
 	for (i = 0; i < n_aliases; i++) {
 		if (d & select_bit) {
 			(*__flush_dcache_segment_fn)(addr_offset, PAGE_SIZE);
 			wmb();
 		}
 		select_bit <<= 1;
 		addr_offset += PAGE_SIZE;
 	}
 }
 /*
 * Note : (RPC) since the caches are physically tagged, the only point
 * of flush_cache_mm for SH-4 is to get rid of aliases from the
 * D-cache.  The assumption elsewhere, e.g. flush_cache_range, is that
 * lines can stay resident so long as the virtual address they were
 * accessed with (hence cache set) is in accord with the physical
- * address (i.e. tag).  It's no different here.  So I reckon we don't
+ * address (i.e. tag).  It's no different here.
 * need to flush the I-cache, since aliases don't matter for that.  We
 * should try that.
 *
 * Caller takes mm->mmap_sem.
 */
@ -263,33 +187,7 @@ static void sh4_flush_cache_mm(void *arg)
 	if (cpu_context(smp_processor_id(), mm) == NO_CONTEXT)
 		return;
-	/*
+	flush_dcache_all();
 	 * If cache is only 4k-per-way, there are never any 'aliases'.  Since
 	 * the cache is physically tagged, the data can just be left in there.
 	 */
 	if (boot_cpu_data.dcache.n_aliases == 0)
 		return;
 	/*
 	 * Don't bother groveling around the dcache for the VMA ranges
 	 * if there are too many PTEs to make it worthwhile.
 	 */
 	if (mm->nr_ptes >= MAX_DCACHE_PAGES)
 		flush_dcache_all();
 	else {
 		struct vm_area_struct *vma;
 		/*
 		 * In this case there are reasonably sized ranges to flush,
 		 * iterate through the VMA list and take care of any aliases.
 		 */
 		for (vma = mm->mmap; vma; vma = vma->vm_next)
 			__flush_cache_mm(mm, vma->vm_start, vma->vm_end);
 	}
 	/* Only touch the icache if one of the VMAs has VM_EXEC set. */
 	if (mm->exec_vm)
 		flush_icache_all();
 }
 /*
@ -372,24 +270,10 @@ static void sh4_flush_cache_range(void *args)
 	if (boot_cpu_data.dcache.n_aliases == 0)
 		return;
-	/*
+	flush_dcache_all();
 	 * Don't bother with the lookup and alias check if we have a
 	 * wide range to cover, just blow away the dcache in its
 	 * entirety instead. -- PFM.
 	 */
 	if (((end - start) >> PAGE_SHIFT) >= MAX_DCACHE_PAGES)
 		flush_dcache_all();
 	else
 		__flush_cache_mm(vma->vm_mm, start, end);
-	if (vma->vm_flags & VM_EXEC) {
+	if (vma->vm_flags & VM_EXEC)
 		/*
 		 * TODO: Is this required???  Need to look at how I-cache
 		 * coherency is assured when new programs are loaded to see if
 		 * this matters.
 		 */
 		flush_icache_all();
 	}
 }
 /**
--- a/arch/sh/mm/cache.c
+++ b/arch/sh/mm/cache.c
@ -164,11 +164,17 @@ void flush_cache_all(void)
 void flush_cache_mm(struct mm_struct *mm)
 {
 	if (boot_cpu_data.dcache.n_aliases == 0)
 		return;
 	cacheop_on_each_cpu(local_flush_cache_mm, mm, 1);
 }
 void flush_cache_dup_mm(struct mm_struct *mm)
 {
 	if (boot_cpu_data.dcache.n_aliases == 0)
 		return;
 	cacheop_on_each_cpu(local_flush_cache_dup_mm, mm, 1);
 }