mirror of
https://github.com/AuxXxilium/linux_dsm_epyc7002.git
synced 2024-12-05 17:06:40 +07:00
b1b5f65e53
Systems with unmaskable interrupts such as SMIs may massively underestimate loops_per_jiffy, and fail to converge anywhere near the real value. A case seen on x86_64 was an initial estimate of 256<<12, which converged to 511<<12 where the real value should have been over 630<<12. This admitedly requires bypassing the TSC calibration (lpj_fine), and a failure to settle in the direct calibration too, but is physically possible. This failure does not depend on my previous calibration optimisation, but by luck is easy to fix with the optimisation in place with a trivial retry loop. In the context of the optimised converging method, as we can no longer trust the starting estimate, enlarge the search bounds exponentially so that the number of retries is logarithmically bounded. [akpm@linux-foundation.org: mention x86_64 SMIs in comment] Signed-off-by: Phil Carmody <ext-phil.2.carmody@nokia.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: "H. Peter Anvin" <hpa@zytor.com> Tested-by: Stephen Boyd <sboyd@codeaurora.org> Cc: Greg KH <greg@kroah.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
215 lines
5.8 KiB
C
215 lines
5.8 KiB
C
/* calibrate.c: default delay calibration
|
|
*
|
|
* Excised from init/main.c
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
*/
|
|
|
|
#include <linux/jiffies.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/init.h>
|
|
#include <linux/timex.h>
|
|
#include <linux/smp.h>
|
|
|
|
unsigned long lpj_fine;
|
|
unsigned long preset_lpj;
|
|
static int __init lpj_setup(char *str)
|
|
{
|
|
preset_lpj = simple_strtoul(str,NULL,0);
|
|
return 1;
|
|
}
|
|
|
|
__setup("lpj=", lpj_setup);
|
|
|
|
#ifdef ARCH_HAS_READ_CURRENT_TIMER
|
|
|
|
/* This routine uses the read_current_timer() routine and gets the
|
|
* loops per jiffy directly, instead of guessing it using delay().
|
|
* Also, this code tries to handle non-maskable asynchronous events
|
|
* (like SMIs)
|
|
*/
|
|
#define DELAY_CALIBRATION_TICKS ((HZ < 100) ? 1 : (HZ/100))
|
|
#define MAX_DIRECT_CALIBRATION_RETRIES 5
|
|
|
|
static unsigned long __cpuinit calibrate_delay_direct(void)
|
|
{
|
|
unsigned long pre_start, start, post_start;
|
|
unsigned long pre_end, end, post_end;
|
|
unsigned long start_jiffies;
|
|
unsigned long timer_rate_min, timer_rate_max;
|
|
unsigned long good_timer_sum = 0;
|
|
unsigned long good_timer_count = 0;
|
|
int i;
|
|
|
|
if (read_current_timer(&pre_start) < 0 )
|
|
return 0;
|
|
|
|
/*
|
|
* A simple loop like
|
|
* while ( jiffies < start_jiffies+1)
|
|
* start = read_current_timer();
|
|
* will not do. As we don't really know whether jiffy switch
|
|
* happened first or timer_value was read first. And some asynchronous
|
|
* event can happen between these two events introducing errors in lpj.
|
|
*
|
|
* So, we do
|
|
* 1. pre_start <- When we are sure that jiffy switch hasn't happened
|
|
* 2. check jiffy switch
|
|
* 3. start <- timer value before or after jiffy switch
|
|
* 4. post_start <- When we are sure that jiffy switch has happened
|
|
*
|
|
* Note, we don't know anything about order of 2 and 3.
|
|
* Now, by looking at post_start and pre_start difference, we can
|
|
* check whether any asynchronous event happened or not
|
|
*/
|
|
|
|
for (i = 0; i < MAX_DIRECT_CALIBRATION_RETRIES; i++) {
|
|
pre_start = 0;
|
|
read_current_timer(&start);
|
|
start_jiffies = jiffies;
|
|
while (time_before_eq(jiffies, start_jiffies + 1)) {
|
|
pre_start = start;
|
|
read_current_timer(&start);
|
|
}
|
|
read_current_timer(&post_start);
|
|
|
|
pre_end = 0;
|
|
end = post_start;
|
|
while (time_before_eq(jiffies, start_jiffies + 1 +
|
|
DELAY_CALIBRATION_TICKS)) {
|
|
pre_end = end;
|
|
read_current_timer(&end);
|
|
}
|
|
read_current_timer(&post_end);
|
|
|
|
timer_rate_max = (post_end - pre_start) /
|
|
DELAY_CALIBRATION_TICKS;
|
|
timer_rate_min = (pre_end - post_start) /
|
|
DELAY_CALIBRATION_TICKS;
|
|
|
|
/*
|
|
* If the upper limit and lower limit of the timer_rate is
|
|
* >= 12.5% apart, redo calibration.
|
|
*/
|
|
if (pre_start != 0 && pre_end != 0 &&
|
|
(timer_rate_max - timer_rate_min) < (timer_rate_max >> 3)) {
|
|
good_timer_count++;
|
|
good_timer_sum += timer_rate_max;
|
|
}
|
|
}
|
|
|
|
if (good_timer_count)
|
|
return (good_timer_sum/good_timer_count);
|
|
|
|
printk(KERN_WARNING "calibrate_delay_direct() failed to get a good "
|
|
"estimate for loops_per_jiffy.\nProbably due to long platform interrupts. Consider using \"lpj=\" boot option.\n");
|
|
return 0;
|
|
}
|
|
#else
|
|
static unsigned long __cpuinit calibrate_delay_direct(void) {return 0;}
|
|
#endif
|
|
|
|
/*
|
|
* This is the number of bits of precision for the loops_per_jiffy. Each
|
|
* time we refine our estimate after the first takes 1.5/HZ seconds, so try
|
|
* to start with a good estimate.
|
|
* For the boot cpu we can skip the delay calibration and assign it a value
|
|
* calculated based on the timer frequency.
|
|
* For the rest of the CPUs we cannot assume that the timer frequency is same as
|
|
* the cpu frequency, hence do the calibration for those.
|
|
*/
|
|
#define LPS_PREC 8
|
|
|
|
static unsigned long __cpuinit calibrate_delay_converge(void)
|
|
{
|
|
/* First stage - slowly accelerate to find initial bounds */
|
|
unsigned long lpj, lpj_base, ticks, loopadd, loopadd_base, chop_limit;
|
|
int trials = 0, band = 0, trial_in_band = 0;
|
|
|
|
lpj = (1<<12);
|
|
|
|
/* wait for "start of" clock tick */
|
|
ticks = jiffies;
|
|
while (ticks == jiffies)
|
|
; /* nothing */
|
|
/* Go .. */
|
|
ticks = jiffies;
|
|
do {
|
|
if (++trial_in_band == (1<<band)) {
|
|
++band;
|
|
trial_in_band = 0;
|
|
}
|
|
__delay(lpj * band);
|
|
trials += band;
|
|
} while (ticks == jiffies);
|
|
/*
|
|
* We overshot, so retreat to a clear underestimate. Then estimate
|
|
* the largest likely undershoot. This defines our chop bounds.
|
|
*/
|
|
trials -= band;
|
|
loopadd_base = lpj * band;
|
|
lpj_base = lpj * trials;
|
|
|
|
recalibrate:
|
|
lpj = lpj_base;
|
|
loopadd = loopadd_base;
|
|
|
|
/*
|
|
* Do a binary approximation to get lpj set to
|
|
* equal one clock (up to LPS_PREC bits)
|
|
*/
|
|
chop_limit = lpj >> LPS_PREC;
|
|
while (loopadd > chop_limit) {
|
|
lpj += loopadd;
|
|
ticks = jiffies;
|
|
while (ticks == jiffies)
|
|
; /* nothing */
|
|
ticks = jiffies;
|
|
__delay(lpj);
|
|
if (jiffies != ticks) /* longer than 1 tick */
|
|
lpj -= loopadd;
|
|
loopadd >>= 1;
|
|
}
|
|
/*
|
|
* If we incremented every single time possible, presume we've
|
|
* massively underestimated initially, and retry with a higher
|
|
* start, and larger range. (Only seen on x86_64, due to SMIs)
|
|
*/
|
|
if (lpj + loopadd * 2 == lpj_base + loopadd_base * 2) {
|
|
lpj_base = lpj;
|
|
loopadd_base <<= 2;
|
|
goto recalibrate;
|
|
}
|
|
|
|
return lpj;
|
|
}
|
|
|
|
void __cpuinit calibrate_delay(void)
|
|
{
|
|
static bool printed;
|
|
|
|
if (preset_lpj) {
|
|
loops_per_jiffy = preset_lpj;
|
|
if (!printed)
|
|
pr_info("Calibrating delay loop (skipped) "
|
|
"preset value.. ");
|
|
} else if ((!printed) && lpj_fine) {
|
|
loops_per_jiffy = lpj_fine;
|
|
pr_info("Calibrating delay loop (skipped), "
|
|
"value calculated using timer frequency.. ");
|
|
} else if ((loops_per_jiffy = calibrate_delay_direct()) != 0) {
|
|
if (!printed)
|
|
pr_info("Calibrating delay using timer "
|
|
"specific routine.. ");
|
|
} else {
|
|
if (!printed)
|
|
pr_info("Calibrating delay loop... ");
|
|
loops_per_jiffy = calibrate_delay_converge();
|
|
}
|
|
if (!printed)
|
|
pr_cont("%lu.%02lu BogoMIPS (lpj=%lu)\n",
|
|
loops_per_jiffy/(500000/HZ),
|
|
(loops_per_jiffy/(5000/HZ)) % 100, loops_per_jiffy);
|
|
|
|
printed = true;
|
|
}
|