mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-25 13:20:52 +07:00
e269b08517
A profile of a network benchmark showed iommu_num_pages rather high up: 0.52% iommu_num_pages Looking at the profile, an integer divide is taking almost all of the time: % : c000000000376ea4 <.iommu_num_pages>: 1.93 : c000000000376ea4: fb e1 ff f8 std r31,-8(r1) 0.00 : c000000000376ea8: f8 21 ff c1 stdu r1,-64(r1) 0.00 : c000000000376eac: 7c 3f 0b 78 mr r31,r1 3.86 : c000000000376eb0: 38 84 ff ff addi r4,r4,-1 0.00 : c000000000376eb4: 38 05 ff ff addi r0,r5,-1 0.00 : c000000000376eb8: 7c 84 2a 14 add r4,r4,r5 46.95 : c000000000376ebc: 7c 00 18 38 and r0,r0,r3 45.66 : c000000000376ec0: 7c 84 02 14 add r4,r4,r0 0.00 : c000000000376ec4: 7c 64 2b 92 divdu r3,r4,r5 0.00 : c000000000376ec8: 38 3f 00 40 addi r1,r31,64 0.00 : c000000000376ecc: eb e1 ff f8 ld r31,-8(r1) 1.61 : c000000000376ed0: 4e 80 00 20 blr Since every caller of iommu_num_pages passes in a constant power of two we can inline this such that the divide is replaced by a shift. The entire function is only a few instructions once optimised, so it is a good candidate for inlining overall. Signed-off-by: Anton Blanchard <anton@samba.org> Cc: Akinobu Mita <akinobu.mita@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
41 lines
1006 B
C
41 lines
1006 B
C
/*
|
|
* IOMMU helper functions for the free area management
|
|
*/
|
|
|
|
#include <linux/module.h>
|
|
#include <linux/bitmap.h>
|
|
|
|
int iommu_is_span_boundary(unsigned int index, unsigned int nr,
|
|
unsigned long shift,
|
|
unsigned long boundary_size)
|
|
{
|
|
BUG_ON(!is_power_of_2(boundary_size));
|
|
|
|
shift = (shift + index) & (boundary_size - 1);
|
|
return shift + nr > boundary_size;
|
|
}
|
|
|
|
unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
|
|
unsigned long start, unsigned int nr,
|
|
unsigned long shift, unsigned long boundary_size,
|
|
unsigned long align_mask)
|
|
{
|
|
unsigned long index;
|
|
|
|
/* We don't want the last of the limit */
|
|
size -= 1;
|
|
again:
|
|
index = bitmap_find_next_zero_area(map, size, start, nr, align_mask);
|
|
if (index < size) {
|
|
if (iommu_is_span_boundary(index, nr, shift, boundary_size)) {
|
|
/* we could do more effectively */
|
|
start = index + 1;
|
|
goto again;
|
|
}
|
|
bitmap_set(map, index, nr);
|
|
return index;
|
|
}
|
|
return -1;
|
|
}
|
|
EXPORT_SYMBOL(iommu_area_alloc);
|