From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: [PATCH] x86: Adjust rdtsc inline assembly Date: Wed, 18 Feb 2015 12:25:18 +0000 Message-ID: <1424262318-3405-1-git-send-email-andrew.cooper3@citrix.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Xen-devel Cc: Andrew Cooper , Keir Fraser , Jan Beulich List-Id: xen-devel@lists.xenproject.org Currently there are three related rdtsc macros, all of which are lowercase and not obviously macros, which write by value to their parameters. This is non-intuitive to program which, being contrary to C semantics for code appearing to be a regular function call. It is also causes Coverity to conclude that __udelay() has an infinite loop, as all of its loop conditions are constant. Two of these macros (rdtsc() and rdtscl()) have only a handful of uses while the vast majority of code uses the rdtscll() variant. rdtsc() and rdtscll() are equivalent, while rdtscl() discards the high word. Replace all 3 macros with a static inline which returns the complete tsc. Most of this patch is a mechanical change of - rdtscll($FOO); + $FOO = rdtsc(); And a diff of the generated assembly confirms that this is no change at all. The single use of the old rdtsc() in emulate_privileged_op() is altered to use the new rdtsc() and the rdmsr_writeback path to set eax/edx appropriately. The pair of use of rdtscl() in __udelay() are extended to use full 64bit values, which makes the overflow edge condition (and early exit from the loop) far rarer. Signed-off-by: Andrew Cooper CC: Keir Fraser CC: Jan Beulich --- xen/arch/x86/apic.c | 4 ++-- xen/arch/x86/cpu/mcheck/mce.c | 2 +- xen/arch/x86/delay.c | 4 ++-- xen/arch/x86/hvm/hvm.c | 4 ++-- xen/arch/x86/hvm/save.c | 4 ++-- xen/arch/x86/hvm/svm/svm.c | 2 +- xen/arch/x86/platform_hypercall.c | 4 ++-- xen/arch/x86/smpboot.c | 2 +- xen/arch/x86/time.c | 31 +++++++++++++++---------------- xen/arch/x86/traps.c | 5 ++++- xen/include/asm-x86/msr.h | 15 ++++++--------- xen/include/asm-x86/time.h | 4 +--- 12 files changed, 39 insertions(+), 42 deletions(-) diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c index 39cd9e5..3217bdf 100644 --- a/xen/arch/x86/apic.c +++ b/xen/arch/x86/apic.c @@ -1148,7 +1148,7 @@ static int __init calibrate_APIC_clock(void) * We wrapped around just now. Let's start: */ if (cpu_has_tsc) - rdtscll(t1); + t1 = rdtsc(); tt1 = apic_read(APIC_TMCCT); /* @@ -1159,7 +1159,7 @@ static int __init calibrate_APIC_clock(void) tt2 = apic_read(APIC_TMCCT); if (cpu_has_tsc) - rdtscll(t2); + t2 = rdtsc(); /* * The APIC bus clock counter is 32 bits only, it diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c index 05a86fb..3a3b4dc 100644 --- a/xen/arch/x86/cpu/mcheck/mce.c +++ b/xen/arch/x86/cpu/mcheck/mce.c @@ -235,7 +235,7 @@ static void mca_init_bank(enum mca_source who, if (who == MCA_CMCI_HANDLER) { mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank); - rdtscll(mib->mc_tsc); + mib->mc_tsc = rdtsc(); } } diff --git a/xen/arch/x86/delay.c b/xen/arch/x86/delay.c index bc1772e..ef6bc5d 100644 --- a/xen/arch/x86/delay.c +++ b/xen/arch/x86/delay.c @@ -21,10 +21,10 @@ void __udelay(unsigned long usecs) unsigned long ticks = usecs * (cpu_khz / 1000); unsigned long s, e; - rdtscl(s); + s = rdtsc(); do { rep_nop(); - rdtscl(e); + e = rdtsc(); } while ((e-s) < ticks); } diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c index a52c6e0..72e383f 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -292,7 +292,7 @@ void hvm_set_guest_tsc_fixed(struct vcpu *v, u64 guest_tsc, u64 at_tsc) } else { - rdtscll(tsc); + tsc = rdtsc(); } delta_tsc = guest_tsc - tsc; @@ -326,7 +326,7 @@ u64 hvm_get_guest_tsc_fixed(struct vcpu *v, uint64_t at_tsc) } else { - rdtscll(tsc); + tsc = rdtsc(); } return tsc + v->arch.hvm_vcpu.cache_tsc_offset; diff --git a/xen/arch/x86/hvm/save.c b/xen/arch/x86/hvm/save.c index 6af19be..61f780d 100644 --- a/xen/arch/x86/hvm/save.c +++ b/xen/arch/x86/hvm/save.c @@ -36,7 +36,7 @@ void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr) hdr->gtsc_khz = d->arch.tsc_khz; /* Time when saving started */ - rdtscll(d->arch.hvm_domain.sync_tsc); + d->arch.hvm_domain.sync_tsc = rdtsc(); } int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr) @@ -71,7 +71,7 @@ int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr) hvm_set_rdtsc_exiting(d, 1); /* Time when restore started */ - rdtscll(d->arch.hvm_domain.sync_tsc); + d->arch.hvm_domain.sync_tsc = rdtsc(); /* VGA state is not saved/restored, so we nobble the cache. */ d->arch.hvm_domain.stdvga.cache = 0; diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index 018dd70..c83c483 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -805,7 +805,7 @@ static void svm_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc) if ( at_tsc ) host_tsc = at_tsc; else - rdtscll(host_tsc); + host_tsc = rdtsc(); offset = svm_get_tsc_offset(host_tsc, guest_tsc, vcpu_tsc_ratio(v)); } diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c index b427852..11c510d 100644 --- a/xen/arch/x86/platform_hypercall.c +++ b/xen/arch/x86/platform_hypercall.c @@ -146,7 +146,7 @@ static void resource_access(void *info) { unsigned long flags = 0; /* - * If next entry is MSR_IA32_TSC read, then the actual rdtscll + * If next entry is MSR_IA32_TSC read, then the actual rdtsc * is performed together with current entry, with IRQ disabled. */ bool_t read_tsc = (i < ra->nr_done - 1 && @@ -159,7 +159,7 @@ static void resource_access(void *info) if ( unlikely(read_tsc) ) { - rdtscll(tsc); + tsc = rdtsc(); local_irq_restore(flags); } } diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index c54be7e..7ae561c 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -142,7 +142,7 @@ static void synchronize_tsc_master(unsigned int slave) for ( i = 1; i <= 5; i++ ) { - rdtscll(tsc_value); + tsc_value = rdtsc(); wmb(); atomic_inc(&tsc_count); while ( atomic_read(&tsc_count) != (i<<1) ) diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c index b609938..11c9189 100644 --- a/xen/arch/x86/time.c +++ b/xen/arch/x86/time.c @@ -261,10 +261,10 @@ static u64 init_pit_and_calibrate_tsc(void) outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */ outb(CALIBRATE_LATCH >> 8, PIT_CH2); /* MSB of count */ - rdtscll(start); + start = rdtsc(); for ( count = 0; (inb(0x61) & 0x20) == 0; count++ ) continue; - rdtscll(end); + end = rdtsc(); /* Error if the CTC doesn't behave itself. */ if ( count == 0 ) @@ -764,7 +764,7 @@ s_time_t get_s_time_fixed(u64 at_tsc) if ( at_tsc ) tsc = at_tsc; else - rdtscll(tsc); + tsc = rdtsc(); delta = tsc - t->local_tsc_stamp; now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale); @@ -971,7 +971,7 @@ int cpu_frequency_change(u64 freq) /* TSC-extrapolated time may be bogus after frequency change. */ /*t->stime_local_stamp = get_s_time();*/ t->stime_local_stamp = t->stime_master_stamp; - rdtscll(curr_tsc); + curr_tsc = rdtsc(); t->local_tsc_stamp = curr_tsc; set_time_scale(&t->tsc_scale, freq); local_irq_enable(); @@ -1307,7 +1307,7 @@ static void time_calibration_tsc_rendezvous(void *_r) if ( r->master_stime == 0 ) { r->master_stime = read_platform_stime(); - rdtscll(r->master_tsc_stamp); + r->master_tsc_stamp = rdtsc(); } atomic_inc(&r->semaphore); @@ -1333,7 +1333,7 @@ static void time_calibration_tsc_rendezvous(void *_r) } } - rdtscll(c->local_tsc_stamp); + c->local_tsc_stamp = rdtsc(); c->stime_local_stamp = get_s_time(); c->stime_master_stamp = r->master_stime; @@ -1363,7 +1363,7 @@ static void time_calibration_std_rendezvous(void *_r) mb(); /* receive signal /then/ read r->master_stime */ } - rdtscll(c->local_tsc_stamp); + c->local_tsc_stamp = rdtsc(); c->stime_local_stamp = get_s_time(); c->stime_master_stamp = r->master_stime; @@ -1397,7 +1397,7 @@ void init_percpu_time(void) t->tsc_scale = per_cpu(cpu_time, 0).tsc_scale; local_irq_save(flags); - rdtscll(t->local_tsc_stamp); + t->local_tsc_stamp = rdtsc(); now = read_platform_stime(); local_irq_restore(flags); @@ -1426,13 +1426,13 @@ static void __init tsc_check_writability(void) if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) ) return; - rdtscll(tsc); + tsc = rdtsc(); if ( wrmsr_safe(MSR_IA32_TSC, 0) == 0 ) { uint64_t tmp, tmp2; - rdtscll(tmp2); + tmp2 = rdtsc(); write_tsc(tsc | (1ULL << 32)); - rdtscll(tmp); + tmp = rdtsc(); if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) ) what = "only partially"; } @@ -1868,7 +1868,7 @@ void tsc_get_info(struct domain *d, uint32_t *tsc_mode, *gtsc_khz = d->arch.tsc_khz; break; } - rdtscll(tsc); + tsc = rdtsc(); *elapsed_nsec = scale_delta(tsc, &d->arch.vtsc_to_ns); *gtsc_khz = cpu_khz; break; @@ -1880,7 +1880,7 @@ void tsc_get_info(struct domain *d, uint32_t *tsc_mode, } else { - rdtscll(tsc); + tsc = rdtsc(); *elapsed_nsec = scale_delta(tsc, &d->arch.vtsc_to_ns) - d->arch.vtsc_offset; *gtsc_khz = 0; /* ignored by tsc_set_info */ @@ -1973,8 +1973,7 @@ void tsc_set_info(struct domain *d, else { /* when using native TSC, offset is nsec relative to power-on * of physical machine */ - uint64_t tsc = 0; - rdtscll(tsc); + uint64_t tsc = rdtsc(); d->arch.vtsc_offset = scale_delta(tsc,&d->arch.vtsc_to_ns) - elapsed_nsec; } @@ -1994,7 +1993,7 @@ void tsc_set_info(struct domain *d, * call set_tsc_offset() later from hvm_vcpu_reset_state() and they * will sync their TSC to BSP's sync_tsc. */ - rdtscll(d->arch.hvm_domain.sync_tsc); + d->arch.hvm_domain.sync_tsc = rdtsc(); hvm_funcs.set_tsc_offset(d->vcpu[0], d->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset, d->arch.hvm_domain.sync_tsc); diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index 057a7af..14e2563 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -2663,7 +2663,10 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) if ( v->domain->arch.vtsc ) pv_soft_rdtsc(v, regs, 0); else - rdtsc(regs->eax, regs->edx); + { + val = rdtsc(); + goto rdmsr_writeback; + } break; case 0x32: /* RDMSR */ diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h index 52cae4b..3072f29 100644 --- a/xen/include/asm-x86/msr.h +++ b/xen/include/asm-x86/msr.h @@ -71,17 +71,14 @@ static inline int wrmsr_safe(unsigned int msr, uint64_t val) return _rc; } -#define rdtsc(low,high) \ - __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)) +static inline uint64_t rdtsc(void) +{ + uint32_t low, high; -#define rdtscl(low) \ - __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx") + __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high)); -#define rdtscll(val) do { \ - unsigned int _eax, _edx; \ - asm volatile("rdtsc" : "=a" (_eax), "=d" (_edx)); \ - (val) = ((unsigned long)_eax) | (((unsigned long)_edx)<<32); \ -} while(0) + return (uint64_t)high << 32 | low; +} #define __write_tsc(val) wrmsrl(MSR_IA32_TSC, val) #define write_tsc(val) ({ \ diff --git a/xen/include/asm-x86/time.h b/xen/include/asm-x86/time.h index c4d82f6..39d6bf3 100644 --- a/xen/include/asm-x86/time.h +++ b/xen/include/asm-x86/time.h @@ -28,9 +28,7 @@ static inline cycles_t get_cycles(void) { - cycles_t c; - rdtscll(c); - return c; + return rdtsc(); } unsigned long -- 1.7.10.4