x86/tsc: Introduce early tsc clocksource

Without TSC_KNOWN_FREQ the TSC clocksource is registered so late that the
kernel first switches to the HPET. Using HPET on large CPU count machines is
undesirable.

Therefore register a tsc-early clocksource using the preliminary tsc_khz
from quick calibration. Then when the final TSC calibration is done, it
can switch to the tuned frequency.

The only notably problem is that the real tsc clocksource must be marked
with CLOCK_SOURCE_VALID_FOR_HRES, otherwise it will not be selected when
unregistering tsc-early. tsc-early cannot be left registered, because then
the clocksource code would fall back to it when we tsc clocksource is
marked unstable later.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: len.brown@intel.com
Cc: rui.zhang@intel.com
Cc: Len Brown <lenb@kernel.org>
Link: https://lkml.kernel.org/r/20171222092243.431585460@infradead.org
This commit is contained in:
Peter Zijlstra 2017-12-22 10:20:13 +01:00 committed by Thomas Gleixner
parent 6d671e1b85
commit aa83c45762

View File

@ -1006,8 +1006,6 @@ static void __init detect_art(void)
/* clocksource code */ /* clocksource code */
static struct clocksource clocksource_tsc;
static void tsc_resume(struct clocksource *cs) static void tsc_resume(struct clocksource *cs)
{ {
tsc_verify_tsc_adjust(true); tsc_verify_tsc_adjust(true);
@ -1058,12 +1056,31 @@ static void tsc_cs_tick_stable(struct clocksource *cs)
/* /*
* .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc()
*/ */
static struct clocksource clocksource_tsc_early = {
.name = "tsc-early",
.rating = 299,
.read = read_tsc,
.mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume,
.mark_unstable = tsc_cs_mark_unstable,
.tick_stable = tsc_cs_tick_stable,
};
/*
* Must mark VALID_FOR_HRES early such that when we unregister tsc_early
* this one will immediately take over. We will only register if TSC has
* been found good.
*/
static struct clocksource clocksource_tsc = { static struct clocksource clocksource_tsc = {
.name = "tsc", .name = "tsc",
.rating = 300, .rating = 300,
.read = read_tsc, .read = read_tsc,
.mask = CLOCKSOURCE_MASK(64), .mask = CLOCKSOURCE_MASK(64),
.flags = CLOCK_SOURCE_IS_CONTINUOUS | .flags = CLOCK_SOURCE_IS_CONTINUOUS |
CLOCK_SOURCE_VALID_FOR_HRES |
CLOCK_SOURCE_MUST_VERIFY, CLOCK_SOURCE_MUST_VERIFY,
.archdata = { .vclock_mode = VCLOCK_TSC }, .archdata = { .vclock_mode = VCLOCK_TSC },
.resume = tsc_resume, .resume = tsc_resume,
@ -1187,8 +1204,8 @@ static void tsc_refine_calibration_work(struct work_struct *work)
int cpu; int cpu;
/* Don't bother refining TSC on unstable systems */ /* Don't bother refining TSC on unstable systems */
if (check_tsc_unstable()) if (tsc_unstable)
goto out; return;
/* /*
* Since the work is started early in boot, we may be * Since the work is started early in boot, we may be
@ -1240,9 +1257,13 @@ static void tsc_refine_calibration_work(struct work_struct *work)
set_cyc2ns_scale(tsc_khz, cpu, tsc_stop); set_cyc2ns_scale(tsc_khz, cpu, tsc_stop);
out: out:
if (tsc_unstable)
return;
if (boot_cpu_has(X86_FEATURE_ART)) if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc; art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz); clocksource_register_khz(&clocksource_tsc, tsc_khz);
clocksource_unregister(&clocksource_tsc_early);
} }
@ -1251,13 +1272,11 @@ static int __init init_tsc_clocksource(void)
if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz)
return 0; return 0;
if (check_tsc_unstable())
return 0;
if (tsc_clocksource_reliable) if (tsc_clocksource_reliable)
clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
/* lower the rating if we already know its unstable: */
if (check_tsc_unstable()) {
clocksource_tsc.rating = 0;
clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
}
if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3)) if (boot_cpu_has(X86_FEATURE_NONSTOP_TSC_S3))
clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP; clocksource_tsc.flags |= CLOCK_SOURCE_SUSPEND_NONSTOP;
@ -1270,6 +1289,7 @@ static int __init init_tsc_clocksource(void)
if (boot_cpu_has(X86_FEATURE_ART)) if (boot_cpu_has(X86_FEATURE_ART))
art_related_clocksource = &clocksource_tsc; art_related_clocksource = &clocksource_tsc;
clocksource_register_khz(&clocksource_tsc, tsc_khz); clocksource_register_khz(&clocksource_tsc, tsc_khz);
clocksource_unregister(&clocksource_tsc_early);
return 0; return 0;
} }
@ -1374,9 +1394,12 @@ void __init tsc_init(void)
check_system_tsc_reliable(); check_system_tsc_reliable();
if (unsynchronized_tsc()) if (unsynchronized_tsc()) {
mark_tsc_unstable("TSCs unsynchronized"); mark_tsc_unstable("TSCs unsynchronized");
return;
}
clocksource_register_khz(&clocksource_tsc_early, tsc_khz);
detect_art(); detect_art();
} }