mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-11-24 13:50:52 +07:00
common implementation of iterative div/mod
We have a few instances of the open-coded iterative div/mod loop, used when we don't expcet the dividend to be much bigger than the divisor. Unfortunately modern gcc's have the tendency to strength "reduce" this into a full mod operation, which isn't necessarily any faster, and even if it were, doesn't exist if gcc implements it in libgcc. The workaround is to put a dummy asm statement in the loop to prevent gcc from performing the transformation. This patch creates a single implementation of this loop, and uses it to replace the open-coded versions I know about. Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: john stultz <johnstul@us.ibm.com> Cc: Segher Boessenkool <segher@kernel.crashing.org> Cc: Christian Kujau <lists@nerdbynature.de> Cc: Robert Hancock <hancockr@shaw.ca> Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
parent
5e70b7f3c2
commit
f595ec964d
@ -12,6 +12,7 @@
|
|||||||
#include <linux/clocksource.h>
|
#include <linux/clocksource.h>
|
||||||
#include <linux/clockchips.h>
|
#include <linux/clockchips.h>
|
||||||
#include <linux/kernel_stat.h>
|
#include <linux/kernel_stat.h>
|
||||||
|
#include <linux/math64.h>
|
||||||
|
|
||||||
#include <asm/xen/hypervisor.h>
|
#include <asm/xen/hypervisor.h>
|
||||||
#include <asm/xen/hypercall.h>
|
#include <asm/xen/hypercall.h>
|
||||||
@ -150,11 +151,7 @@ static void do_stolen_accounting(void)
|
|||||||
if (stolen < 0)
|
if (stolen < 0)
|
||||||
stolen = 0;
|
stolen = 0;
|
||||||
|
|
||||||
ticks = 0;
|
ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
|
||||||
while (stolen >= NS_PER_TICK) {
|
|
||||||
ticks++;
|
|
||||||
stolen -= NS_PER_TICK;
|
|
||||||
}
|
|
||||||
__get_cpu_var(residual_stolen) = stolen;
|
__get_cpu_var(residual_stolen) = stolen;
|
||||||
account_steal_time(NULL, ticks);
|
account_steal_time(NULL, ticks);
|
||||||
|
|
||||||
@ -166,11 +163,7 @@ static void do_stolen_accounting(void)
|
|||||||
if (blocked < 0)
|
if (blocked < 0)
|
||||||
blocked = 0;
|
blocked = 0;
|
||||||
|
|
||||||
ticks = 0;
|
ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
|
||||||
while (blocked >= NS_PER_TICK) {
|
|
||||||
ticks++;
|
|
||||||
blocked -= NS_PER_TICK;
|
|
||||||
}
|
|
||||||
__get_cpu_var(residual_blocked) = blocked;
|
__get_cpu_var(residual_blocked) = blocked;
|
||||||
account_steal_time(idle_task(smp_processor_id()), ticks);
|
account_steal_time(idle_task(smp_processor_id()), ticks);
|
||||||
}
|
}
|
||||||
|
@ -81,4 +81,6 @@ static inline s64 div_s64(s64 dividend, s32 divisor)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder);
|
||||||
|
|
||||||
#endif /* _LINUX_MATH64_H */
|
#endif /* _LINUX_MATH64_H */
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#ifdef __KERNEL__
|
#ifdef __KERNEL__
|
||||||
# include <linux/cache.h>
|
# include <linux/cache.h>
|
||||||
# include <linux/seqlock.h>
|
# include <linux/seqlock.h>
|
||||||
|
# include <linux/math64.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef _STRUCT_TIMESPEC
|
#ifndef _STRUCT_TIMESPEC
|
||||||
@ -172,15 +173,7 @@ extern struct timeval ns_to_timeval(const s64 nsec);
|
|||||||
*/
|
*/
|
||||||
static inline void timespec_add_ns(struct timespec *a, u64 ns)
|
static inline void timespec_add_ns(struct timespec *a, u64 ns)
|
||||||
{
|
{
|
||||||
ns += a->tv_nsec;
|
a->tv_sec += iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns);
|
||||||
while(unlikely(ns >= NSEC_PER_SEC)) {
|
|
||||||
/* The following asm() prevents the compiler from
|
|
||||||
* optimising this loop into a modulo operation. */
|
|
||||||
asm("" : "+r"(ns));
|
|
||||||
|
|
||||||
ns -= NSEC_PER_SEC;
|
|
||||||
a->tv_sec++;
|
|
||||||
}
|
|
||||||
a->tv_nsec = ns;
|
a->tv_nsec = ns;
|
||||||
}
|
}
|
||||||
#endif /* __KERNEL__ */
|
#endif /* __KERNEL__ */
|
||||||
|
23
lib/div64.c
23
lib/div64.c
@ -98,3 +98,26 @@ EXPORT_SYMBOL(div64_u64);
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* BITS_PER_LONG == 32 */
|
#endif /* BITS_PER_LONG == 32 */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Iterative div/mod for use when dividend is not expected to be much
|
||||||
|
* bigger than divisor.
|
||||||
|
*/
|
||||||
|
u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
|
||||||
|
{
|
||||||
|
u32 ret = 0;
|
||||||
|
|
||||||
|
while (dividend >= divisor) {
|
||||||
|
/* The following asm() prevents the compiler from
|
||||||
|
optimising this loop into a modulo operation. */
|
||||||
|
asm("" : "+rm"(dividend));
|
||||||
|
|
||||||
|
dividend -= divisor;
|
||||||
|
ret++;
|
||||||
|
}
|
||||||
|
|
||||||
|
*remainder = dividend;
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL(iter_div_u64_rem);
|
||||||
|
Loading…
Reference in New Issue
Block a user