From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: Re: [PATCH] x86: don't write_tsc() non-zero values on CPUs updating only the lower 32 bits Date: Thu, 14 Apr 2011 08:28:50 +0100 Message-ID: <4DA6BE52020000780003B87D@vpn.id2.novell.com> References: <4DA6BBD1020000780003B865@vpn.id2.novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Return-path: In-Reply-To: <4DA6BBD1020000780003B865@vpn.id2.novell.com> Content-Disposition: inline List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: "xen-devel@lists.xensource.com" Cc: "winston.l.wang" List-Id: xen-devel@lists.xenproject.org >>> On 14.04.11 at 09:18, "Jan Beulich" wrote: > This means suppressing the uses in time_calibration_tsc_rendezvous(), > cstate_restore_tsc(), and synchronize_tsc_slave(), and fixes a boot > hang of Linux Dom0 when loading processor.ko on such systems that > have support for C states above C1. >=20 > Signed-off-by: Jan Beulich A note on backporting to 4.1 and 4.0 (the problem was really found on 4.0.x): An additional adjustment to hpet_disable_legacy_broadcast() is necessary here, as other than in -unstable it isn't prepared to be called before hpet_broadcast_init(), e.g.: --- a/xen/arch/x86/hpet.c +++ b/xen/arch/x86/hpet.c @@ -632,6 +632,9 @@ void hpet_disable_legacy_broadcast(void) u32 cfg; unsigned long flags; =20 + if ( !legacy_hpet_event.shift ) + return; + spin_lock_irqsave(&legacy_hpet_event.lock, flags); =20 legacy_hpet_event.flags |=3D HPET_EVT_DISABLE; Additionally, the disabling of TSC sync isn't necessary on 4.0 - zero gets written into the TSC MSR there. Jan > --- a/xen/arch/x86/acpi/cpu_idle.c > +++ b/xen/arch/x86/acpi/cpu_idle.c > @@ -1098,3 +1098,7 @@ void cpuidle_disable_deep_cstate(void) > hpet_disable_legacy_broadcast(); > } > =20 > +bool_t cpuidle_using_deep_cstate(void) > +{ > + return xen_cpuidle && max_cstate > (local_apic_timer_c2_ok ? 2 : = 1); > +} > --- a/xen/arch/x86/smpboot.c > +++ b/xen/arch/x86/smpboot.c > @@ -41,6 +41,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -124,6 +125,12 @@ static void smp_store_cpu_info(int id) > ; > } > =20 > +/* > + * TSC's upper 32 bits can't be written in earlier CPUs (before > + * Prescott), there is no way to resync one AP against BP. > + */ > +bool_t disable_tsc_sync; > + > static atomic_t tsc_count; > static uint64_t tsc_value; > static cpumask_t tsc_sync_cpu_mask; > @@ -132,6 +139,9 @@ static void synchronize_tsc_master(unsig > { > unsigned int i; > =20 > + if ( disable_tsc_sync ) > + return; > + > if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) && > !cpu_isset(slave, tsc_sync_cpu_mask) ) > return; > @@ -153,6 +163,9 @@ static void synchronize_tsc_slave(unsign > { > unsigned int i; > =20 > + if ( disable_tsc_sync ) > + return; > + > if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) && > !cpu_isset(slave, tsc_sync_cpu_mask) ) > return; > --- a/xen/arch/x86/time.c > +++ b/xen/arch/x86/time.c > @@ -21,6 +21,7 @@ > #include > #include > #include > +#include > #include > #include > #include > @@ -1385,6 +1386,9 @@ void init_percpu_time(void) > /* Late init function (after all CPUs are booted). */ > int __init init_xen_time(void) > { > + u64 tsc, tmp; > + const char *what =3D NULL; > + > if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) ) > { > /* > @@ -1398,6 +1402,45 @@ int __init init_xen_time(void) > setup_clear_cpu_cap(X86_FEATURE_TSC_RELIABLE); > } > =20 > + /* > + * On certain older Intel CPUs writing the TSC MSR clears the upper > + * 32 bits. Obviously we must not use write_tsc() on such CPUs. > + * > + * Additionally, AMD specifies that being able to write the TSC MSR > + * is not an architectural feature (but, other than their manual = says, > + * also cannot be determined from CPUID bits). > + */ > + rdtscll(tsc); > + if ( wrmsr_safe(MSR_IA32_TSC, (u32)tsc) =3D=3D 0 ) > + { > + u64 tmp2; > + > + rdtscll(tmp2); > + write_tsc(tsc | (1ULL << 32)); > + rdtscll(tmp); > + if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) ) > + what =3D "only partially"; > + } > + else > + what =3D "not"; > + if ( what ) > + { > + printk(XENLOG_WARNING "TSC %s writable\n", what); > + > + /* time_calibration_tsc_rendezvous() must not be used */ > + if ( !boot_cpu_has(X86_FEATURE_TSC_RELIABLE) ) > + setup_clear_cpu_cap(X86_FEATURE_CONSTANT_TSC); > + > + /* cstate_restore_tsc() must not be used (or do nothing) */ > + if ( !boot_cpu_has(X86_FEATURE_NONSTOP_TSC) ) > + cpuidle_disable_deep_cstate(); > + > + /* synchronize_tsc_slave() must do nothing */ > + disable_tsc_sync =3D 1; > + } > + else > + write_tsc(tsc + 4 * (s32)(tmp - tsc)); > + > /* If we have constant-rate TSCs then scale factor can be shared. = */ > if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) ) > { > @@ -1451,7 +1494,7 @@ static int _disable_pit_irq(void(*hpet_b > * XXX dom0 may rely on RTC interrupt delivery, so only enable > * hpet_broadcast if FSB mode available or if force_hpet_broadcast. > */ > - if ( xen_cpuidle && !boot_cpu_has(X86_FEATURE_ARAT) ) > + if ( cpuidle_using_deep_cstate() && !boot_cpu_has(X86_FEATURE_ARAT) = ) > { > hpet_broadcast_setup(); > if ( !hpet_broadcast_is_available() ) > --- a/xen/include/asm-x86/setup.h > +++ b/xen/include/asm-x86/setup.h > @@ -4,7 +4,6 @@ > #include > =20 > extern bool_t early_boot; > -extern s8 xen_cpuidle; > extern unsigned long xenheap_initial_phys_start; > =20 > void init_done(void); > --- a/xen/include/asm-x86/time.h > +++ b/xen/include/asm-x86/time.h > @@ -24,6 +24,8 @@ > =20 > typedef u64 cycles_t; > =20 > +extern bool_t disable_tsc_sync; > + > static inline cycles_t get_cycles(void) > { > cycles_t c; > --- a/xen/include/xen/cpuidle.h > +++ b/xen/include/xen/cpuidle.h > @@ -85,7 +85,10 @@ struct cpuidle_governor > void (*reflect) (struct acpi_processor_power *dev); > }; > =20 > +extern s8 xen_cpuidle; > extern struct cpuidle_governor *cpuidle_current_governor; > + > +bool_t cpuidle_using_deep_cstate(void); > void cpuidle_disable_deep_cstate(void); > =20 > extern void cpuidle_wakeup_mwait(cpumask_t *mask); >=20 >=20 >=20 > _______________________________________________ > Xen-devel mailing list > Xen-devel@lists.xensource.com=20 > http://lists.xensource.com/xen-devel=20