All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] x86: Adjust rdtsc inline assembly
@ 2015-02-18 12:25 Andrew Cooper
  2015-02-18 13:11 ` Jan Beulich
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew Cooper @ 2015-02-18 12:25 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Keir Fraser, Jan Beulich

Currently there are three related rdtsc macros, all of which are lowercase and
not obviously macros, which write by value to their parameters.

This is non-intuitive to program which, being contrary to C semantics for code
appearing to be a regular function call.  It is also causes Coverity to
conclude that __udelay() has an infinite loop, as all of its loop conditions
are constant.

Two of these macros (rdtsc() and rdtscl()) have only a handful of uses while
the vast majority of code uses the rdtscll() variant.  rdtsc() and rdtscll()
are equivalent, while rdtscl() discards the high word.

Replace all 3 macros with a static inline which returns the complete tsc.

Most of this patch is a mechanical change of

  - rdtscll($FOO);
  + $FOO = rdtsc();

And a diff of the generated assembly confirms that this is no change at all.

The single use of the old rdtsc() in emulate_privileged_op() is altered to use
the new rdtsc() and the rdmsr_writeback path to set eax/edx appropriately.

The pair of use of rdtscl() in __udelay() are extended to use full 64bit
values, which makes the overflow edge condition (and early exit from the loop)
far rarer.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Keir Fraser <keir@xen.org>
CC: Jan Beulich <JBeulich@suse.com>
---
 xen/arch/x86/apic.c               |    4 ++--
 xen/arch/x86/cpu/mcheck/mce.c     |    2 +-
 xen/arch/x86/delay.c              |    4 ++--
 xen/arch/x86/hvm/hvm.c            |    4 ++--
 xen/arch/x86/hvm/save.c           |    4 ++--
 xen/arch/x86/hvm/svm/svm.c        |    2 +-
 xen/arch/x86/platform_hypercall.c |    4 ++--
 xen/arch/x86/smpboot.c            |    2 +-
 xen/arch/x86/time.c               |   31 +++++++++++++++----------------
 xen/arch/x86/traps.c              |    5 ++++-
 xen/include/asm-x86/msr.h         |   15 ++++++---------
 xen/include/asm-x86/time.h        |    4 +---
 12 files changed, 39 insertions(+), 42 deletions(-)

diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
index 39cd9e5..3217bdf 100644
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1148,7 +1148,7 @@ static int __init calibrate_APIC_clock(void)
      * We wrapped around just now. Let's start:
      */
     if (cpu_has_tsc)
-        rdtscll(t1);
+        t1 = rdtsc();
     tt1 = apic_read(APIC_TMCCT);
 
     /*
@@ -1159,7 +1159,7 @@ static int __init calibrate_APIC_clock(void)
 
     tt2 = apic_read(APIC_TMCCT);
     if (cpu_has_tsc)
-        rdtscll(t2);
+        t2 = rdtsc();
 
     /*
      * The APIC bus clock counter is 32 bits only, it
diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
index 05a86fb..3a3b4dc 100644
--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -235,7 +235,7 @@ static void mca_init_bank(enum mca_source who,
 
     if (who == MCA_CMCI_HANDLER) {
         mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank);
-        rdtscll(mib->mc_tsc);
+        mib->mc_tsc = rdtsc();
     }
 }
 
diff --git a/xen/arch/x86/delay.c b/xen/arch/x86/delay.c
index bc1772e..ef6bc5d 100644
--- a/xen/arch/x86/delay.c
+++ b/xen/arch/x86/delay.c
@@ -21,10 +21,10 @@ void __udelay(unsigned long usecs)
     unsigned long ticks = usecs * (cpu_khz / 1000);
     unsigned long s, e;
 
-    rdtscl(s);
+    s = rdtsc();
     do
     {
         rep_nop();
-        rdtscl(e);
+        e = rdtsc();
     } while ((e-s) < ticks);
 }
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index a52c6e0..72e383f 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -292,7 +292,7 @@ void hvm_set_guest_tsc_fixed(struct vcpu *v, u64 guest_tsc, u64 at_tsc)
     }
     else
     {
-        rdtscll(tsc);
+        tsc = rdtsc();
     }
 
     delta_tsc = guest_tsc - tsc;
@@ -326,7 +326,7 @@ u64 hvm_get_guest_tsc_fixed(struct vcpu *v, uint64_t at_tsc)
     }
     else
     {
-        rdtscll(tsc);
+        tsc = rdtsc();
     }
 
     return tsc + v->arch.hvm_vcpu.cache_tsc_offset;
diff --git a/xen/arch/x86/hvm/save.c b/xen/arch/x86/hvm/save.c
index 6af19be..61f780d 100644
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -36,7 +36,7 @@ void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr)
     hdr->gtsc_khz = d->arch.tsc_khz;
 
     /* Time when saving started */
-    rdtscll(d->arch.hvm_domain.sync_tsc);
+    d->arch.hvm_domain.sync_tsc = rdtsc();
 }
 
 int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -71,7 +71,7 @@ int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
         hvm_set_rdtsc_exiting(d, 1);
 
     /* Time when restore started  */
-    rdtscll(d->arch.hvm_domain.sync_tsc);
+    d->arch.hvm_domain.sync_tsc = rdtsc();
 
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm_domain.stdvga.cache = 0;
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 018dd70..c83c483 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -805,7 +805,7 @@ static void svm_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc)
         if ( at_tsc )
             host_tsc = at_tsc;
         else
-            rdtscll(host_tsc);
+            host_tsc = rdtsc();
         offset = svm_get_tsc_offset(host_tsc, guest_tsc, vcpu_tsc_ratio(v));
     }
 
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index b427852..11c510d 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -146,7 +146,7 @@ static void resource_access(void *info)
             {
                 unsigned long flags = 0;
                 /*
-                 * If next entry is MSR_IA32_TSC read, then the actual rdtscll
+                 * If next entry is MSR_IA32_TSC read, then the actual rdtsc
                  * is performed together with current entry, with IRQ disabled.
                  */
                 bool_t read_tsc = (i < ra->nr_done - 1 &&
@@ -159,7 +159,7 @@ static void resource_access(void *info)
 
                 if ( unlikely(read_tsc) )
                 {
-                    rdtscll(tsc);
+                    tsc = rdtsc();
                     local_irq_restore(flags);
                 }
             }
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index c54be7e..7ae561c 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -142,7 +142,7 @@ static void synchronize_tsc_master(unsigned int slave)
 
     for ( i = 1; i <= 5; i++ )
     {
-        rdtscll(tsc_value);
+        tsc_value = rdtsc();
         wmb();
         atomic_inc(&tsc_count);
         while ( atomic_read(&tsc_count) != (i<<1) )
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index b609938..11c9189 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -261,10 +261,10 @@ static u64 init_pit_and_calibrate_tsc(void)
     outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
     outb(CALIBRATE_LATCH >> 8, PIT_CH2);   /* MSB of count */
 
-    rdtscll(start);
+    start = rdtsc();
     for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
         continue;
-    rdtscll(end);
+    end = rdtsc();
 
     /* Error if the CTC doesn't behave itself. */
     if ( count == 0 )
@@ -764,7 +764,7 @@ s_time_t get_s_time_fixed(u64 at_tsc)
     if ( at_tsc )
         tsc = at_tsc;
     else
-        rdtscll(tsc);
+        tsc = rdtsc();
     delta = tsc - t->local_tsc_stamp;
     now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
 
@@ -971,7 +971,7 @@ int cpu_frequency_change(u64 freq)
     /* TSC-extrapolated time may be bogus after frequency change. */
     /*t->stime_local_stamp = get_s_time();*/
     t->stime_local_stamp = t->stime_master_stamp;
-    rdtscll(curr_tsc);
+    curr_tsc = rdtsc();
     t->local_tsc_stamp = curr_tsc;
     set_time_scale(&t->tsc_scale, freq);
     local_irq_enable();
@@ -1307,7 +1307,7 @@ static void time_calibration_tsc_rendezvous(void *_r)
             if ( r->master_stime == 0 )
             {
                 r->master_stime = read_platform_stime();
-                rdtscll(r->master_tsc_stamp);
+                r->master_tsc_stamp = rdtsc();
             }
             atomic_inc(&r->semaphore);
 
@@ -1333,7 +1333,7 @@ static void time_calibration_tsc_rendezvous(void *_r)
         }
     }
 
-    rdtscll(c->local_tsc_stamp);
+    c->local_tsc_stamp = rdtsc();
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 
@@ -1363,7 +1363,7 @@ static void time_calibration_std_rendezvous(void *_r)
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
-    rdtscll(c->local_tsc_stamp);
+    c->local_tsc_stamp = rdtsc();
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 
@@ -1397,7 +1397,7 @@ void init_percpu_time(void)
     t->tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
 
     local_irq_save(flags);
-    rdtscll(t->local_tsc_stamp);
+    t->local_tsc_stamp = rdtsc();
     now = read_platform_stime();
     local_irq_restore(flags);
 
@@ -1426,13 +1426,13 @@ static void __init tsc_check_writability(void)
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
         return;
 
-    rdtscll(tsc);
+    tsc = rdtsc();
     if ( wrmsr_safe(MSR_IA32_TSC, 0) == 0 )
     {
         uint64_t tmp, tmp2;
-        rdtscll(tmp2);
+        tmp2 = rdtsc();
         write_tsc(tsc | (1ULL << 32));
-        rdtscll(tmp);
+        tmp = rdtsc();
         if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) )
             what = "only partially";
     }
@@ -1868,7 +1868,7 @@ void tsc_get_info(struct domain *d, uint32_t *tsc_mode,
             *gtsc_khz = d->arch.tsc_khz;
             break;
         }
-        rdtscll(tsc);
+        tsc = rdtsc();
         *elapsed_nsec = scale_delta(tsc, &d->arch.vtsc_to_ns);
         *gtsc_khz = cpu_khz;
         break;
@@ -1880,7 +1880,7 @@ void tsc_get_info(struct domain *d, uint32_t *tsc_mode,
         }
         else
         {
-            rdtscll(tsc);
+            tsc = rdtsc();
             *elapsed_nsec = scale_delta(tsc, &d->arch.vtsc_to_ns) -
                             d->arch.vtsc_offset;
             *gtsc_khz = 0; /* ignored by tsc_set_info */
@@ -1973,8 +1973,7 @@ void tsc_set_info(struct domain *d,
         else {
             /* when using native TSC, offset is nsec relative to power-on
              * of physical machine */
-            uint64_t tsc = 0;
-            rdtscll(tsc);
+            uint64_t tsc = rdtsc();
             d->arch.vtsc_offset = scale_delta(tsc,&d->arch.vtsc_to_ns) -
                                   elapsed_nsec;
         }
@@ -1994,7 +1993,7 @@ void tsc_set_info(struct domain *d,
              * call set_tsc_offset() later from hvm_vcpu_reset_state() and they
              * will sync their TSC to BSP's sync_tsc.
              */
-            rdtscll(d->arch.hvm_domain.sync_tsc);
+            d->arch.hvm_domain.sync_tsc = rdtsc();
             hvm_funcs.set_tsc_offset(d->vcpu[0],
                                      d->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset,
                                      d->arch.hvm_domain.sync_tsc);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 057a7af..14e2563 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2663,7 +2663,10 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
         if ( v->domain->arch.vtsc )
             pv_soft_rdtsc(v, regs, 0);
         else
-            rdtsc(regs->eax, regs->edx);
+        {
+            val = rdtsc();
+            goto rdmsr_writeback;
+        }
         break;
 
     case 0x32: /* RDMSR */
diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index 52cae4b..3072f29 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -71,17 +71,14 @@ static inline int wrmsr_safe(unsigned int msr, uint64_t val)
     return _rc;
 }
 
-#define rdtsc(low,high) \
-     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+static inline uint64_t rdtsc(void)
+{
+    uint32_t low, high;
 
-#define rdtscl(low) \
-     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
+    __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
 
-#define rdtscll(val) do { \
-     unsigned int _eax, _edx; \
-     asm volatile("rdtsc" : "=a" (_eax), "=d" (_edx)); \
-     (val) = ((unsigned long)_eax) | (((unsigned long)_edx)<<32); \
-} while(0)
+    return (uint64_t)high << 32 | low;
+}
 
 #define __write_tsc(val) wrmsrl(MSR_IA32_TSC, val)
 #define write_tsc(val) ({                                       \
diff --git a/xen/include/asm-x86/time.h b/xen/include/asm-x86/time.h
index c4d82f6..39d6bf3 100644
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -28,9 +28,7 @@
 
 static inline cycles_t get_cycles(void)
 {
-    cycles_t c;
-    rdtscll(c);
-    return c;
+    return rdtsc();
 }
 
 unsigned long
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] x86: Adjust rdtsc inline assembly
  2015-02-18 12:25 [PATCH] x86: Adjust rdtsc inline assembly Andrew Cooper
@ 2015-02-18 13:11 ` Jan Beulich
  2015-02-18 13:59   ` Andrew Cooper
  0 siblings, 1 reply; 4+ messages in thread
From: Jan Beulich @ 2015-02-18 13:11 UTC (permalink / raw)
  To: Andrew Cooper; +Cc: Keir Fraser, Xen-devel

>>> On 18.02.15 at 13:25, <andrew.cooper3@citrix.com> wrote:
> The single use of the old rdtsc() in emulate_privileged_op() is altered to use
> the new rdtsc() and the rdmsr_writeback path to set eax/edx appropriately.

I'm not entirely sure about this one - the current code surely is slightly
faster than the replacement. Question is how much this matters. I'd
suggest to be on the safe side and simply open-code the asm() there.
If you decide to follow that, there are a few more cosmetic things which
otherwise I would adjust while committing:

> @@ -1426,13 +1426,13 @@ static void __init tsc_check_writability(void)
>      if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
>          return;
>  
> -    rdtscll(tsc);
> +    tsc = rdtsc();
>      if ( wrmsr_safe(MSR_IA32_TSC, 0) == 0 )
>      {
>          uint64_t tmp, tmp2;
> -        rdtscll(tmp2);
> +        tmp2 = rdtsc();

Blank line between declaration and statements. And perhaps the
assignment could become the variable's initializer.

> @@ -1973,8 +1973,7 @@ void tsc_set_info(struct domain *d,
>          else {
>              /* when using native TSC, offset is nsec relative to power-on
>               * of physical machine */
> -            uint64_t tsc = 0;
> -            rdtscll(tsc);
> +            uint64_t tsc = rdtsc();
>              d->arch.vtsc_offset = scale_delta(tsc,&d->arch.vtsc_to_ns) -

Blank line missing again.

> --- a/xen/include/asm-x86/msr.h
> +++ b/xen/include/asm-x86/msr.h
> @@ -71,17 +71,14 @@ static inline int wrmsr_safe(unsigned int msr, uint64_t val)
>      return _rc;
>  }
>  
> -#define rdtsc(low,high) \
> -     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
> +static inline uint64_t rdtsc(void)
> +{
> +    uint32_t low, high;
>  
> -#define rdtscl(low) \
> -     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
> +    __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
>  
> -#define rdtscll(val) do { \
> -     unsigned int _eax, _edx; \
> -     asm volatile("rdtsc" : "=a" (_eax), "=d" (_edx)); \
> -     (val) = ((unsigned long)_eax) | (((unsigned long)_edx)<<32); \
> -} while(0)
> +    return (uint64_t)high << 32 | low;

Parentheses around the << please.

Jan

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] x86: Adjust rdtsc inline assembly
  2015-02-18 13:11 ` Jan Beulich
@ 2015-02-18 13:59   ` Andrew Cooper
  2015-02-18 14:05     ` [PATCH v2] " Andrew Cooper
  0 siblings, 1 reply; 4+ messages in thread
From: Andrew Cooper @ 2015-02-18 13:59 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Keir Fraser, Xen-devel

On 18/02/15 13:11, Jan Beulich wrote:
>>>> On 18.02.15 at 13:25, <andrew.cooper3@citrix.com> wrote:
>> The single use of the old rdtsc() in emulate_privileged_op() is altered to use
>> the new rdtsc() and the rdmsr_writeback path to set eax/edx appropriately.
> I'm not entirely sure about this one - the current code surely is slightly
> faster than the replacement. Question is how much this matters. I'd
> suggest to be on the safe side and simply open-code the asm() there.

It is a matter of a 4 instructions on a fault slowpath, which which only
ever executed in the unlikely case that vtsc is enabled on a domain, or
the kernel has set CR4.TSD.

I would not loose sleep over the introduced inefficiently.

~Andrew

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2] x86: Adjust rdtsc inline assembly
  2015-02-18 13:59   ` Andrew Cooper
@ 2015-02-18 14:05     ` Andrew Cooper
  0 siblings, 0 replies; 4+ messages in thread
From: Andrew Cooper @ 2015-02-18 14:05 UTC (permalink / raw)
  To: Xen-devel; +Cc: Andrew Cooper, Keir Fraser, Jan Beulich

Currently there are three related rdtsc macros, all of which are lowercase and
not obviously macros, which write by value to their parameters.

This is non-intuitive to program which, being contrary to C semantics for code
appearing to be a regular function call.  It is also causes Coverity to
conclude that __udelay() has an infinite loop, as all of its loop conditions
are constant.

Two of these macros (rdtsc() and rdtscl()) have only a handful of uses while
the vast majority of code uses the rdtscll() variant.  rdtsc() and rdtscll()
are equivalent, while rdtscl() discards the high word.

Replace all 3 macros with a static inline which returns the complete tsc.

Most of this patch is a mechanical change of

  - rdtscll($FOO);
  + $FOO = rdtsc();

And a diff of the generated assembly confirms that this is no change at all.

The single use of the old rdtsc() in emulate_privileged_op() is altered to use
the new rdtsc() and the rdmsr_writeback path to set eax/edx appropriately.

The pair of use of rdtscl() in __udelay() are extended to use full 64bit
values, which makes the overflow edge condition (and early exit from the loop)
far rarer.

Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
CC: Keir Fraser <keir@xen.org>
CC: Jan Beulich <JBeulich@suse.com>

---
v2: Style adjustments, suggested by Jan
---
 xen/arch/x86/apic.c               |    4 ++--
 xen/arch/x86/cpu/mcheck/mce.c     |    2 +-
 xen/arch/x86/delay.c              |    4 ++--
 xen/arch/x86/hvm/hvm.c            |    4 ++--
 xen/arch/x86/hvm/save.c           |    4 ++--
 xen/arch/x86/hvm/svm/svm.c        |    2 +-
 xen/arch/x86/platform_hypercall.c |    4 ++--
 xen/arch/x86/smpboot.c            |    2 +-
 xen/arch/x86/time.c               |   34 ++++++++++++++++------------------
 xen/arch/x86/traps.c              |    5 ++++-
 xen/include/asm-x86/msr.h         |   15 ++++++---------
 xen/include/asm-x86/time.h        |    4 +---
 12 files changed, 40 insertions(+), 44 deletions(-)

diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c
index 39cd9e5..3217bdf 100644
--- a/xen/arch/x86/apic.c
+++ b/xen/arch/x86/apic.c
@@ -1148,7 +1148,7 @@ static int __init calibrate_APIC_clock(void)
      * We wrapped around just now. Let's start:
      */
     if (cpu_has_tsc)
-        rdtscll(t1);
+        t1 = rdtsc();
     tt1 = apic_read(APIC_TMCCT);
 
     /*
@@ -1159,7 +1159,7 @@ static int __init calibrate_APIC_clock(void)
 
     tt2 = apic_read(APIC_TMCCT);
     if (cpu_has_tsc)
-        rdtscll(t2);
+        t2 = rdtsc();
 
     /*
      * The APIC bus clock counter is 32 bits only, it
diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
index 05a86fb..3a3b4dc 100644
--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -235,7 +235,7 @@ static void mca_init_bank(enum mca_source who,
 
     if (who == MCA_CMCI_HANDLER) {
         mib->mc_ctrl2 = mca_rdmsr(MSR_IA32_MC0_CTL2 + bank);
-        rdtscll(mib->mc_tsc);
+        mib->mc_tsc = rdtsc();
     }
 }
 
diff --git a/xen/arch/x86/delay.c b/xen/arch/x86/delay.c
index bc1772e..ef6bc5d 100644
--- a/xen/arch/x86/delay.c
+++ b/xen/arch/x86/delay.c
@@ -21,10 +21,10 @@ void __udelay(unsigned long usecs)
     unsigned long ticks = usecs * (cpu_khz / 1000);
     unsigned long s, e;
 
-    rdtscl(s);
+    s = rdtsc();
     do
     {
         rep_nop();
-        rdtscl(e);
+        e = rdtsc();
     } while ((e-s) < ticks);
 }
diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
index a52c6e0..72e383f 100644
--- a/xen/arch/x86/hvm/hvm.c
+++ b/xen/arch/x86/hvm/hvm.c
@@ -292,7 +292,7 @@ void hvm_set_guest_tsc_fixed(struct vcpu *v, u64 guest_tsc, u64 at_tsc)
     }
     else
     {
-        rdtscll(tsc);
+        tsc = rdtsc();
     }
 
     delta_tsc = guest_tsc - tsc;
@@ -326,7 +326,7 @@ u64 hvm_get_guest_tsc_fixed(struct vcpu *v, uint64_t at_tsc)
     }
     else
     {
-        rdtscll(tsc);
+        tsc = rdtsc();
     }
 
     return tsc + v->arch.hvm_vcpu.cache_tsc_offset;
diff --git a/xen/arch/x86/hvm/save.c b/xen/arch/x86/hvm/save.c
index 6af19be..61f780d 100644
--- a/xen/arch/x86/hvm/save.c
+++ b/xen/arch/x86/hvm/save.c
@@ -36,7 +36,7 @@ void arch_hvm_save(struct domain *d, struct hvm_save_header *hdr)
     hdr->gtsc_khz = d->arch.tsc_khz;
 
     /* Time when saving started */
-    rdtscll(d->arch.hvm_domain.sync_tsc);
+    d->arch.hvm_domain.sync_tsc = rdtsc();
 }
 
 int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
@@ -71,7 +71,7 @@ int arch_hvm_load(struct domain *d, struct hvm_save_header *hdr)
         hvm_set_rdtsc_exiting(d, 1);
 
     /* Time when restore started  */
-    rdtscll(d->arch.hvm_domain.sync_tsc);
+    d->arch.hvm_domain.sync_tsc = rdtsc();
 
     /* VGA state is not saved/restored, so we nobble the cache. */
     d->arch.hvm_domain.stdvga.cache = 0;
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 018dd70..c83c483 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -805,7 +805,7 @@ static void svm_set_tsc_offset(struct vcpu *v, u64 offset, u64 at_tsc)
         if ( at_tsc )
             host_tsc = at_tsc;
         else
-            rdtscll(host_tsc);
+            host_tsc = rdtsc();
         offset = svm_get_tsc_offset(host_tsc, guest_tsc, vcpu_tsc_ratio(v));
     }
 
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index b427852..11c510d 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -146,7 +146,7 @@ static void resource_access(void *info)
             {
                 unsigned long flags = 0;
                 /*
-                 * If next entry is MSR_IA32_TSC read, then the actual rdtscll
+                 * If next entry is MSR_IA32_TSC read, then the actual rdtsc
                  * is performed together with current entry, with IRQ disabled.
                  */
                 bool_t read_tsc = (i < ra->nr_done - 1 &&
@@ -159,7 +159,7 @@ static void resource_access(void *info)
 
                 if ( unlikely(read_tsc) )
                 {
-                    rdtscll(tsc);
+                    tsc = rdtsc();
                     local_irq_restore(flags);
                 }
             }
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index c54be7e..7ae561c 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -142,7 +142,7 @@ static void synchronize_tsc_master(unsigned int slave)
 
     for ( i = 1; i <= 5; i++ )
     {
-        rdtscll(tsc_value);
+        tsc_value = rdtsc();
         wmb();
         atomic_inc(&tsc_count);
         while ( atomic_read(&tsc_count) != (i<<1) )
diff --git a/xen/arch/x86/time.c b/xen/arch/x86/time.c
index b609938..994aa1a 100644
--- a/xen/arch/x86/time.c
+++ b/xen/arch/x86/time.c
@@ -261,10 +261,10 @@ static u64 init_pit_and_calibrate_tsc(void)
     outb(CALIBRATE_LATCH & 0xff, PIT_CH2); /* LSB of count */
     outb(CALIBRATE_LATCH >> 8, PIT_CH2);   /* MSB of count */
 
-    rdtscll(start);
+    start = rdtsc();
     for ( count = 0; (inb(0x61) & 0x20) == 0; count++ )
         continue;
-    rdtscll(end);
+    end = rdtsc();
 
     /* Error if the CTC doesn't behave itself. */
     if ( count == 0 )
@@ -764,7 +764,7 @@ s_time_t get_s_time_fixed(u64 at_tsc)
     if ( at_tsc )
         tsc = at_tsc;
     else
-        rdtscll(tsc);
+        tsc = rdtsc();
     delta = tsc - t->local_tsc_stamp;
     now = t->stime_local_stamp + scale_delta(delta, &t->tsc_scale);
 
@@ -971,7 +971,7 @@ int cpu_frequency_change(u64 freq)
     /* TSC-extrapolated time may be bogus after frequency change. */
     /*t->stime_local_stamp = get_s_time();*/
     t->stime_local_stamp = t->stime_master_stamp;
-    rdtscll(curr_tsc);
+    curr_tsc = rdtsc();
     t->local_tsc_stamp = curr_tsc;
     set_time_scale(&t->tsc_scale, freq);
     local_irq_enable();
@@ -1307,7 +1307,7 @@ static void time_calibration_tsc_rendezvous(void *_r)
             if ( r->master_stime == 0 )
             {
                 r->master_stime = read_platform_stime();
-                rdtscll(r->master_tsc_stamp);
+                r->master_tsc_stamp = rdtsc();
             }
             atomic_inc(&r->semaphore);
 
@@ -1333,7 +1333,7 @@ static void time_calibration_tsc_rendezvous(void *_r)
         }
     }
 
-    rdtscll(c->local_tsc_stamp);
+    c->local_tsc_stamp = rdtsc();
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 
@@ -1363,7 +1363,7 @@ static void time_calibration_std_rendezvous(void *_r)
         mb(); /* receive signal /then/ read r->master_stime */
     }
 
-    rdtscll(c->local_tsc_stamp);
+    c->local_tsc_stamp = rdtsc();
     c->stime_local_stamp = get_s_time();
     c->stime_master_stamp = r->master_stime;
 
@@ -1397,7 +1397,7 @@ void init_percpu_time(void)
     t->tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
 
     local_irq_save(flags);
-    rdtscll(t->local_tsc_stamp);
+    t->local_tsc_stamp = rdtsc();
     now = read_platform_stime();
     local_irq_restore(flags);
 
@@ -1426,13 +1426,13 @@ static void __init tsc_check_writability(void)
     if ( boot_cpu_has(X86_FEATURE_TSC_RELIABLE) )
         return;
 
-    rdtscll(tsc);
+    tsc = rdtsc();
     if ( wrmsr_safe(MSR_IA32_TSC, 0) == 0 )
     {
-        uint64_t tmp, tmp2;
-        rdtscll(tmp2);
+        uint64_t tmp, tmp2 = rdtsc();
+
         write_tsc(tsc | (1ULL << 32));
-        rdtscll(tmp);
+        tmp = rdtsc();
         if ( ABS((s64)tmp - (s64)tmp2) < (1LL << 31) )
             what = "only partially";
     }
@@ -1868,7 +1868,7 @@ void tsc_get_info(struct domain *d, uint32_t *tsc_mode,
             *gtsc_khz = d->arch.tsc_khz;
             break;
         }
-        rdtscll(tsc);
+        tsc = rdtsc();
         *elapsed_nsec = scale_delta(tsc, &d->arch.vtsc_to_ns);
         *gtsc_khz = cpu_khz;
         break;
@@ -1880,7 +1880,7 @@ void tsc_get_info(struct domain *d, uint32_t *tsc_mode,
         }
         else
         {
-            rdtscll(tsc);
+            tsc = rdtsc();
             *elapsed_nsec = scale_delta(tsc, &d->arch.vtsc_to_ns) -
                             d->arch.vtsc_offset;
             *gtsc_khz = 0; /* ignored by tsc_set_info */
@@ -1973,9 +1973,7 @@ void tsc_set_info(struct domain *d,
         else {
             /* when using native TSC, offset is nsec relative to power-on
              * of physical machine */
-            uint64_t tsc = 0;
-            rdtscll(tsc);
-            d->arch.vtsc_offset = scale_delta(tsc,&d->arch.vtsc_to_ns) -
+            d->arch.vtsc_offset = scale_delta(rdtsc(), &d->arch.vtsc_to_ns) -
                                   elapsed_nsec;
         }
         break;
@@ -1994,7 +1992,7 @@ void tsc_set_info(struct domain *d,
              * call set_tsc_offset() later from hvm_vcpu_reset_state() and they
              * will sync their TSC to BSP's sync_tsc.
              */
-            rdtscll(d->arch.hvm_domain.sync_tsc);
+            d->arch.hvm_domain.sync_tsc = rdtsc();
             hvm_funcs.set_tsc_offset(d->vcpu[0],
                                      d->vcpu[0]->arch.hvm_vcpu.cache_tsc_offset,
                                      d->arch.hvm_domain.sync_tsc);
diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
index 057a7af..14e2563 100644
--- a/xen/arch/x86/traps.c
+++ b/xen/arch/x86/traps.c
@@ -2663,7 +2663,10 @@ static int emulate_privileged_op(struct cpu_user_regs *regs)
         if ( v->domain->arch.vtsc )
             pv_soft_rdtsc(v, regs, 0);
         else
-            rdtsc(regs->eax, regs->edx);
+        {
+            val = rdtsc();
+            goto rdmsr_writeback;
+        }
         break;
 
     case 0x32: /* RDMSR */
diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
index 52cae4b..4f233d5 100644
--- a/xen/include/asm-x86/msr.h
+++ b/xen/include/asm-x86/msr.h
@@ -71,17 +71,14 @@ static inline int wrmsr_safe(unsigned int msr, uint64_t val)
     return _rc;
 }
 
-#define rdtsc(low,high) \
-     __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high))
+static inline uint64_t rdtsc(void)
+{
+    uint32_t low, high;
 
-#define rdtscl(low) \
-     __asm__ __volatile__("rdtsc" : "=a" (low) : : "edx")
+    __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
 
-#define rdtscll(val) do { \
-     unsigned int _eax, _edx; \
-     asm volatile("rdtsc" : "=a" (_eax), "=d" (_edx)); \
-     (val) = ((unsigned long)_eax) | (((unsigned long)_edx)<<32); \
-} while(0)
+    return ((uint64_t)high << 32) | low;
+}
 
 #define __write_tsc(val) wrmsrl(MSR_IA32_TSC, val)
 #define write_tsc(val) ({                                       \
diff --git a/xen/include/asm-x86/time.h b/xen/include/asm-x86/time.h
index c4d82f6..39d6bf3 100644
--- a/xen/include/asm-x86/time.h
+++ b/xen/include/asm-x86/time.h
@@ -28,9 +28,7 @@
 
 static inline cycles_t get_cycles(void)
 {
-    cycles_t c;
-    rdtscll(c);
-    return c;
+    return rdtsc();
 }
 
 unsigned long
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-02-18 14:05 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-18 12:25 [PATCH] x86: Adjust rdtsc inline assembly Andrew Cooper
2015-02-18 13:11 ` Jan Beulich
2015-02-18 13:59   ` Andrew Cooper
2015-02-18 14:05     ` [PATCH v2] " Andrew Cooper

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.