mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-28 11:18:45 +07:00
3dc167ba57
People report that utime and stime from /proc/<pid>/stat become very wrong when the numbers are big enough, especially if you watch these counters incrementally. Specifically, the current implementation of: stime*rtime/total, results in a saw-tooth function on top of the desired line, where the teeth grow in size the larger the values become. IOW, it has a relative error. The result is that, when watching incrementally as time progresses (for large values), we'll see periods of pure stime or utime increase, irrespective of the actual ratio we're striving for. Replace scale_stime() with a math64.h helper: mul_u64_u64_div_u64() that is far more accurate. This also allows architectures to override the implementation -- for instance they can opt for the old algorithm if this new one turns out to be too expensive for them. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20200519172506.GA317395@hirez.programming.kicks-ass.net
234 lines
5.0 KiB
C
234 lines
5.0 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
|
|
*
|
|
* Based on former do_div() implementation from asm-parisc/div64.h:
|
|
* Copyright (C) 1999 Hewlett-Packard Co
|
|
* Copyright (C) 1999 David Mosberger-Tang <davidm@hpl.hp.com>
|
|
*
|
|
*
|
|
* Generic C version of 64bit/32bit division and modulo, with
|
|
* 64bit result and 32bit remainder.
|
|
*
|
|
* The fast case for (n>>32 == 0) is handled inline by do_div().
|
|
*
|
|
* Code generated for this function might be very inefficient
|
|
* for some CPUs. __div64_32() can be overridden by linking arch-specific
|
|
* assembly versions such as arch/ppc/lib/div64.S and arch/sh/lib/div64.S
|
|
* or by defining a preprocessor macro in arch/include/asm/div64.h.
|
|
*/
|
|
|
|
#include <linux/export.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/math64.h>
|
|
|
|
/* Not needed on 64bit architectures */
|
|
#if BITS_PER_LONG == 32
|
|
|
|
#ifndef __div64_32
|
|
uint32_t __attribute__((weak)) __div64_32(uint64_t *n, uint32_t base)
|
|
{
|
|
uint64_t rem = *n;
|
|
uint64_t b = base;
|
|
uint64_t res, d = 1;
|
|
uint32_t high = rem >> 32;
|
|
|
|
/* Reduce the thing a bit first */
|
|
res = 0;
|
|
if (high >= base) {
|
|
high /= base;
|
|
res = (uint64_t) high << 32;
|
|
rem -= (uint64_t) (high*base) << 32;
|
|
}
|
|
|
|
while ((int64_t)b > 0 && b < rem) {
|
|
b = b+b;
|
|
d = d+d;
|
|
}
|
|
|
|
do {
|
|
if (rem >= b) {
|
|
rem -= b;
|
|
res += d;
|
|
}
|
|
b >>= 1;
|
|
d >>= 1;
|
|
} while (d);
|
|
|
|
*n = res;
|
|
return rem;
|
|
}
|
|
EXPORT_SYMBOL(__div64_32);
|
|
#endif
|
|
|
|
/**
|
|
* div_s64_rem - signed 64bit divide with 64bit divisor and remainder
|
|
* @dividend: 64bit dividend
|
|
* @divisor: 64bit divisor
|
|
* @remainder: 64bit remainder
|
|
*/
|
|
#ifndef div_s64_rem
|
|
s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder)
|
|
{
|
|
u64 quotient;
|
|
|
|
if (dividend < 0) {
|
|
quotient = div_u64_rem(-dividend, abs(divisor), (u32 *)remainder);
|
|
*remainder = -*remainder;
|
|
if (divisor > 0)
|
|
quotient = -quotient;
|
|
} else {
|
|
quotient = div_u64_rem(dividend, abs(divisor), (u32 *)remainder);
|
|
if (divisor < 0)
|
|
quotient = -quotient;
|
|
}
|
|
return quotient;
|
|
}
|
|
EXPORT_SYMBOL(div_s64_rem);
|
|
#endif
|
|
|
|
/**
|
|
* div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder
|
|
* @dividend: 64bit dividend
|
|
* @divisor: 64bit divisor
|
|
* @remainder: 64bit remainder
|
|
*
|
|
* This implementation is a comparable to algorithm used by div64_u64.
|
|
* But this operation, which includes math for calculating the remainder,
|
|
* is kept distinct to avoid slowing down the div64_u64 operation on 32bit
|
|
* systems.
|
|
*/
|
|
#ifndef div64_u64_rem
|
|
u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder)
|
|
{
|
|
u32 high = divisor >> 32;
|
|
u64 quot;
|
|
|
|
if (high == 0) {
|
|
u32 rem32;
|
|
quot = div_u64_rem(dividend, divisor, &rem32);
|
|
*remainder = rem32;
|
|
} else {
|
|
int n = fls(high);
|
|
quot = div_u64(dividend >> n, divisor >> n);
|
|
|
|
if (quot != 0)
|
|
quot--;
|
|
|
|
*remainder = dividend - quot * divisor;
|
|
if (*remainder >= divisor) {
|
|
quot++;
|
|
*remainder -= divisor;
|
|
}
|
|
}
|
|
|
|
return quot;
|
|
}
|
|
EXPORT_SYMBOL(div64_u64_rem);
|
|
#endif
|
|
|
|
/**
|
|
* div64_u64 - unsigned 64bit divide with 64bit divisor
|
|
* @dividend: 64bit dividend
|
|
* @divisor: 64bit divisor
|
|
*
|
|
* This implementation is a modified version of the algorithm proposed
|
|
* by the book 'Hacker's Delight'. The original source and full proof
|
|
* can be found here and is available for use without restriction.
|
|
*
|
|
* 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt'
|
|
*/
|
|
#ifndef div64_u64
|
|
u64 div64_u64(u64 dividend, u64 divisor)
|
|
{
|
|
u32 high = divisor >> 32;
|
|
u64 quot;
|
|
|
|
if (high == 0) {
|
|
quot = div_u64(dividend, divisor);
|
|
} else {
|
|
int n = fls(high);
|
|
quot = div_u64(dividend >> n, divisor >> n);
|
|
|
|
if (quot != 0)
|
|
quot--;
|
|
if ((dividend - quot * divisor) >= divisor)
|
|
quot++;
|
|
}
|
|
|
|
return quot;
|
|
}
|
|
EXPORT_SYMBOL(div64_u64);
|
|
#endif
|
|
|
|
/**
|
|
* div64_s64 - signed 64bit divide with 64bit divisor
|
|
* @dividend: 64bit dividend
|
|
* @divisor: 64bit divisor
|
|
*/
|
|
#ifndef div64_s64
|
|
s64 div64_s64(s64 dividend, s64 divisor)
|
|
{
|
|
s64 quot, t;
|
|
|
|
quot = div64_u64(abs(dividend), abs(divisor));
|
|
t = (dividend ^ divisor) >> 63;
|
|
|
|
return (quot ^ t) - t;
|
|
}
|
|
EXPORT_SYMBOL(div64_s64);
|
|
#endif
|
|
|
|
#endif /* BITS_PER_LONG == 32 */
|
|
|
|
/*
|
|
* Iterative div/mod for use when dividend is not expected to be much
|
|
* bigger than divisor.
|
|
*/
|
|
u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
|
|
{
|
|
return __iter_div_u64_rem(dividend, divisor, remainder);
|
|
}
|
|
EXPORT_SYMBOL(iter_div_u64_rem);
|
|
|
|
#ifndef mul_u64_u64_div_u64
|
|
u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
|
|
{
|
|
u64 res = 0, div, rem;
|
|
int shift;
|
|
|
|
/* can a * b overflow ? */
|
|
if (ilog2(a) + ilog2(b) > 62) {
|
|
/*
|
|
* (b * a) / c is equal to
|
|
*
|
|
* (b / c) * a +
|
|
* (b % c) * a / c
|
|
*
|
|
* if nothing overflows. Can the 1st multiplication
|
|
* overflow? Yes, but we do not care: this can only
|
|
* happen if the end result can't fit in u64 anyway.
|
|
*
|
|
* So the code below does
|
|
*
|
|
* res = (b / c) * a;
|
|
* b = b % c;
|
|
*/
|
|
div = div64_u64_rem(b, c, &rem);
|
|
res = div * a;
|
|
b = rem;
|
|
|
|
shift = ilog2(a) + ilog2(b) - 62;
|
|
if (shift > 0) {
|
|
/* drop precision */
|
|
b >>= shift;
|
|
c >>= shift;
|
|
if (!c)
|
|
return res;
|
|
}
|
|
}
|
|
|
|
return res + div64_u64(a * b, c);
|
|
}
|
|
#endif
|