mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-30 02:06:49 +07:00
lib/int_sqrt.c: optimize square root algorithm
Optimize the current version of the shift-and-subtract (hardware) algorithm, described by John von Newmann[1] and Guy L Steele. Iterating 1,000,000 times, perf shows for the current version: Performance counter stats for './sqrt-curr' (10 runs): 27.170996 task-clock # 0.979 CPUs utilized ( +- 3.19% ) 3 context-switches # 0.103 K/sec ( +- 4.76% ) 0 cpu-migrations # 0.004 K/sec ( +-100.00% ) 104 page-faults # 0.004 M/sec ( +- 0.16% ) 64,921,199 cycles # 2.389 GHz ( +- 0.03% ) 28,967,789 stalled-cycles-frontend # 44.62% frontend cycles idle ( +- 0.18% ) <not supported> stalled-cycles-backend 104,502,623 instructions # 1.61 insns per cycle # 0.28 stalled cycles per insn ( +- 0.00% ) 34,088,368 branches # 1254.587 M/sec ( +- 0.00% ) 4,901 branch-misses # 0.01% of all branches ( +- 1.32% ) 0.027763015 seconds time elapsed ( +- 3.22% ) And for the new version: Performance counter stats for './sqrt-new' (10 runs): 0.496869 task-clock # 0.519 CPUs utilized ( +- 2.38% ) 0 context-switches # 0.000 K/sec 0 cpu-migrations # 0.403 K/sec ( +-100.00% ) 104 page-faults # 0.209 M/sec ( +- 0.15% ) 590,760 cycles # 1.189 GHz ( +- 2.35% ) 395,053 stalled-cycles-frontend # 66.87% frontend cycles idle ( +- 3.67% ) <not supported> stalled-cycles-backend 398,963 instructions # 0.68 insns per cycle # 0.99 stalled cycles per insn ( +- 0.39% ) 70,228 branches # 141.341 M/sec ( +- 0.36% ) 3,364 branch-misses # 4.79% of all branches ( +- 5.45% ) 0.000957440 seconds time elapsed ( +- 2.42% ) Furthermore, this saves space in instruction text: text data bss dec hex filename 111 0 0 111 6f lib/int_sqrt-baseline.o 89 0 0 89 59 lib/int_sqrt.o [1] http://en.wikipedia.org/wiki/First_Draft_of_a_Report_on_the_EDVAC Signed-off-by: Davidlohr Bueso <davidlohr.bueso@hp.com> Reviewed-by: Jonathan Gonzalez <jgonzlez@linets.cl> Tested-by: Jonathan Gonzalez <jgonzlez@linets.cl> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
4d22f8c306
commit
30493cc9dd
@ -1,3 +1,9 @@
|
||||
/*
|
||||
* Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com>
|
||||
*
|
||||
* Based on the shift-and-subtract algorithm for computing integer
|
||||
* square root from Guy L. Steele.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/export.h>
|
||||
@ -10,23 +16,23 @@
|
||||
*/
|
||||
unsigned long int_sqrt(unsigned long x)
|
||||
{
|
||||
unsigned long op, res, one;
|
||||
unsigned long b, m, y = 0;
|
||||
|
||||
op = x;
|
||||
res = 0;
|
||||
if (x <= 1)
|
||||
return x;
|
||||
|
||||
one = 1UL << (BITS_PER_LONG - 2);
|
||||
while (one > op)
|
||||
one >>= 2;
|
||||
m = 1UL << (BITS_PER_LONG - 2);
|
||||
while (m != 0) {
|
||||
b = y + m;
|
||||
y >>= 1;
|
||||
|
||||
while (one != 0) {
|
||||
if (op >= res + one) {
|
||||
op = op - (res + one);
|
||||
res = res + 2 * one;
|
||||
if (x >= b) {
|
||||
x -= b;
|
||||
y += m;
|
||||
}
|
||||
res /= 2;
|
||||
one /= 4;
|
||||
m >>= 2;
|
||||
}
|
||||
return res;
|
||||
|
||||
return y;
|
||||
}
|
||||
EXPORT_SYMBOL(int_sqrt);
|
||||
|
Loading…
Reference in New Issue
Block a user