From dc949530e194827349e2be3c8371febb6c1ff3cf Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Wed, 17 Nov 2021 00:25:26 -0800 Subject: [PATCH 10/21] x86: implement tsc=directsync for systems without IA32_TSC_ADJUST Signed-off-by: Steven Noonan [fwd-port to v6.5] Signed-off-by: Cristian Ciocaltea --- .../admin-guide/kernel-parameters.txt | 2 + arch/x86/include/asm/tsc.h | 1 + arch/x86/kernel/tsc.c | 3 ++ arch/x86/kernel/tsc_sync.c | 48 +++++++++++++++---- 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 11e57ba2985c..127021ae9d71 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -6868,6 +6868,8 @@ This will be suppressed by an earlier tsc=nowatchdog and can be overridden by a later tsc=nowatchdog. A console message will flag any such suppression or overriding. + [x86] directsync: attempt to sync the tsc via direct + writes if MSR_IA32_TSC_ADJUST isn't available tsc_early_khz= [X86,EARLY] Skip early TSC calibration and use the given value instead. Useful when the early TSC frequency discovery diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 405efb3e4996..eda44749b34c 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h @@ -42,6 +42,7 @@ extern unsigned long native_calibrate_tsc(void); extern unsigned long long native_sched_clock_from_tsc(u64 tsc); extern int tsc_clocksource_reliable; +extern int tsc_allow_direct_sync; #ifdef CONFIG_X86_TSC extern bool tsc_async_resets; #else diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 06b170759e5b..a306acecff7c 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -47,6 +47,7 @@ static unsigned int __initdata tsc_early_khz; static DEFINE_STATIC_KEY_FALSE_RO(__use_tsc); int tsc_clocksource_reliable; +int tsc_allow_direct_sync; static int __read_mostly tsc_force_recalibrate; @@ -339,6 +340,8 @@ static int __init tsc_setup(char *str) else tsc_as_watchdog = 1; } + if (!strcmp(str, "directsync")) + tsc_allow_direct_sync = 1; return 1; } diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 4334033658ed..3cc241632221 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -33,6 +33,8 @@ struct tsc_adjust { static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust); static struct timer_list tsc_sync_check_timer; +extern int tsc_allow_direct_sync; + /* * TSC's on different sockets may be reset asynchronously. * This may cause the TSC ADJUST value on socket 0 to be NOT 0. @@ -338,6 +340,8 @@ static cycles_t check_tsc_warp(unsigned int timeout) */ static inline unsigned int loop_timeout(int cpu) { + if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) + return 30; return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20; } @@ -358,13 +362,16 @@ static void check_tsc_sync_source(void *__cpu) /* * Set the maximum number of test runs to - * 1 if the CPU does not provide the TSC_ADJUST MSR - * 3 if the MSR is available, so the target can try to adjust + * 5 if we can write TSC_ADJUST to compensate + * 1000 if we are allowed to write to the TSC MSR to compensate + * 1 if we cannot write MSRs to synchronize TSCs */ - if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST)) - atomic_set(&test_runs, 1); - else + if (boot_cpu_has(X86_FEATURE_TSC_ADJUST)) atomic_set(&test_runs, 3); + else if (tsc_allow_direct_sync) + atomic_set(&test_runs, 1000); + else + atomic_set(&test_runs, 1); retry: /* Wait for the target to start. */ while (atomic_read(&start_count) != cpus - 1) @@ -425,6 +432,21 @@ static void check_tsc_sync_source(void *__cpu) goto retry; } +static inline cycles_t write_tsc_adjustment(s64 adjustment) +{ + cycles_t adjval, nextval; + + rdmsrl(MSR_IA32_TSC, adjval); + adjval += adjustment; + wrmsrl(MSR_IA32_TSC, adjval); + rdmsrl(MSR_IA32_TSC, nextval); + + /* + * Estimated clock cycle overhead for wrmsr + rdmsr + */ + return nextval - adjval; +} + /* * Freshly booted CPUs call into this: */ @@ -432,7 +454,7 @@ void check_tsc_sync_target(void) { struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust); unsigned int cpu = smp_processor_id(); - cycles_t cur_max_warp, gbl_max_warp; + cycles_t cur_max_warp, gbl_max_warp, est_overhead = 0; int cpus = 2; /* Also aborts if there is no TSC. */ @@ -513,12 +535,18 @@ void check_tsc_sync_target(void) * value is used. In the worst case the adjustment needs to go * through a 3rd run for fine tuning. */ - cur->adjusted += cur_max_warp; + if (boot_cpu_has(X86_FEATURE_TSC_ADJUST)) { + cur->adjusted += cur_max_warp + est_overhead; - pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n", - cpu, cur_max_warp, cur->adjusted); + pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n", + cpu, cur_max_warp, cur->adjusted); - wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted); + wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted); + } else { + pr_debug("TSC direct sync: CPU%u observed %lld warp. Overhead: %lld\n", + cpu, cur_max_warp, est_overhead); + est_overhead = write_tsc_adjustment(cur_max_warp + est_overhead); + } goto retry; } -- 2.45.2