* [PATCH v2 1/2] pvclock: introduce seqcount-like API
2016-06-15 12:46 [PATCH v2 0/2] pvclock: more code cleanup Paolo Bonzini
@ 2016-06-15 12:46 ` Paolo Bonzini
2016-06-15 12:46 ` [PATCH v2 2/2] x86: vdso: use __pvclock_read_cycles Paolo Bonzini
2016-06-27 14:16 ` [PATCH v2 0/2] pvclock: more code cleanup Paolo Bonzini
2 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-06-15 12:46 UTC (permalink / raw)
To: linux-kernel, kvm; +Cc: x86, Andy Lutomirski, Ingo Molnar, Minfei Huang
The version field in struct pvclock_vcpu_time_info basically implements
a seqcount. Wrap it with the usual read_begin and read_retry functions,
and use these APIs instead of peppering the code with smp_rmb()s.
While at it, change it to the more pedantically correct virt_rmb().
With this change, __pvclock_read_cycles can be simplified noticeably.
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Minfei Huang <mnghuan@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
arch/x86/entry/vdso/vclock_gettime.c | 9 ++-------
arch/x86/include/asm/pvclock.h | 39 +++++++++++++++++++++---------------
arch/x86/kernel/pvclock.c | 17 ++++++----------
3 files changed, 31 insertions(+), 34 deletions(-)
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 2f02d23a05ef..db1e3b4c3693 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -123,9 +123,7 @@ static notrace cycle_t vread_pvclock(int *mode)
*/
do {
- version = pvti->version;
-
- smp_rmb();
+ version = pvclock_read_begin(pvti);
if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
*mode = VCLOCK_NONE;
@@ -137,10 +135,7 @@ static notrace cycle_t vread_pvclock(int *mode)
pvti_tsc_shift = pvti->tsc_shift;
pvti_system_time = pvti->system_time;
pvti_tsc = pvti->tsc_timestamp;
-
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while (unlikely((version & 1) || version != pvti->version));
+ } while (pvclock_read_retry(pvti, version));
delta = tsc - pvti_tsc;
ret = pvti_system_time +
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 7c1c89598688..0ee92db1e9f3 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -25,6 +25,24 @@ void pvclock_resume(void);
void pvclock_touch_watchdogs(void);
+static __always_inline
+unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src)
+{
+ unsigned version = src->version & ~1;
+ /* Make sure that the version is read before the data. */
+ virt_rmb();
+ return version;
+}
+
+static __always_inline
+bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
+ unsigned version)
+{
+ /* Make sure that the version is re-read after the data. */
+ virt_rmb();
+ return version != src->version;
+}
+
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
@@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
}
static __always_inline
-unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
- cycle_t *cycles, u8 *flags)
+cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src)
{
- unsigned version;
- cycle_t offset;
- u64 delta;
-
- version = src->version;
- /* Make the latest version visible */
- smp_rmb();
-
- delta = rdtsc_ordered() - src->tsc_timestamp;
- offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
- src->tsc_shift);
- *cycles = src->system_time + offset;
- *flags = src->flags;
- return version;
+ u64 delta = rdtsc_ordered() - src->tsc_timestamp;
+ cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+ src->tsc_shift);
+ return src->system_time + offset;
}
struct pvclock_vsyscall_time_info {
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 06c58ce46762..3599404e3089 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
u8 flags;
do {
- version = src->version;
- /* Make the latest version visible */
- smp_rmb();
-
+ version = pvclock_read_begin(src);
flags = src->flags;
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while ((src->version & 1) || version != src->version);
+ } while (pvclock_read_retry(src, version));
return flags & valid_flags;
}
@@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
u8 flags;
do {
- version = __pvclock_read_cycles(src, &ret, &flags);
- /* Make sure that the version double-check is last. */
- smp_rmb();
- } while ((src->version & 1) || version != src->version);
+ version = pvclock_read_begin(src);
+ ret = __pvclock_read_cycles(src);
+ flags = src->flags;
+ } while (pvclock_read_retry(src, version));
if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
src->flags &= ~PVCLOCK_GUEST_STOPPED;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v2 2/2] x86: vdso: use __pvclock_read_cycles
2016-06-15 12:46 [PATCH v2 0/2] pvclock: more code cleanup Paolo Bonzini
2016-06-15 12:46 ` [PATCH v2 1/2] pvclock: introduce seqcount-like API Paolo Bonzini
@ 2016-06-15 12:46 ` Paolo Bonzini
2016-06-27 14:16 ` [PATCH v2 0/2] pvclock: more code cleanup Paolo Bonzini
2 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-06-15 12:46 UTC (permalink / raw)
To: linux-kernel, kvm; +Cc: x86, Andy Lutomirski, Ingo Molnar, Minfei Huang
The new simplified __pvclock_read_cycles does the same computation
as vread_pvclock, except that (because it takes the pvclock_vcpu_time_info
pointer) it has to be moved inside the loop. Since the loop is expected to
never roll, this makes no difference.
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Minfei Huang <mnghuan@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
If acked I'd like to take this through the KVM tree.
arch/x86/entry/vdso/vclock_gettime.c | 16 +++-------------
1 file changed, 3 insertions(+), 13 deletions(-)
diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index db1e3b4c3693..94d54d0defa7 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode)
{
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
cycle_t ret;
- u64 tsc, pvti_tsc;
- u64 last, delta, pvti_system_time;
- u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+ u64 last;
+ u32 version;
/*
* Note: The kernel and hypervisor must guarantee that cpu ID
@@ -130,18 +129,9 @@ static notrace cycle_t vread_pvclock(int *mode)
return 0;
}
- tsc = rdtsc_ordered();
- pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
- pvti_tsc_shift = pvti->tsc_shift;
- pvti_system_time = pvti->system_time;
- pvti_tsc = pvti->tsc_timestamp;
+ ret = __pvclock_read_cycles(pvti);
} while (pvclock_read_retry(pvti, version));
- delta = tsc - pvti_tsc;
- ret = pvti_system_time +
- pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
- pvti_tsc_shift);
-
/* refer to vread_tsc() comment for rationale */
last = gtod->cycle_last;
--
1.8.3.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v2 0/2] pvclock: more code cleanup
2016-06-15 12:46 [PATCH v2 0/2] pvclock: more code cleanup Paolo Bonzini
2016-06-15 12:46 ` [PATCH v2 1/2] pvclock: introduce seqcount-like API Paolo Bonzini
2016-06-15 12:46 ` [PATCH v2 2/2] x86: vdso: use __pvclock_read_cycles Paolo Bonzini
@ 2016-06-27 14:16 ` Paolo Bonzini
2 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-06-27 14:16 UTC (permalink / raw)
To: linux-kernel, kvm; +Cc: Andy Lutomirski, Ingo Molnar, x86, Minfei Huang
On 15/06/2016 14:46, Paolo Bonzini wrote:
> Patch 1 simplifies the pvclock.h API by moving seqcount logic into
> two new functions pvclock_read_begin and pvclock_read_retry.
> Patch 2 uses the new simplified API in the vDSO.
Andy, I've now benchmarked the patches.
Patch 2 introduces no meaningful difference, however patch 1 makes
clock_gettime slower by about 3%. I can get this back with:
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index 0ee92db1e9f3..d019f0cc80ec 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -40,7 +40,7 @@ bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
{
/* Make sure that the version is re-read after the data. */
virt_rmb();
- return version != src->version;
+ return unlikely(version != src->version);
}
/*
... which for whatever reason makes GCC inline more aggressively.
I'm going to send v3.
Paolo
> Paolo
>
> Paolo Bonzini (2):
> pvclock: introduce seqcount-like API
> x86: vdso: use __pvclock_read_cycles
>
> arch/x86/entry/vdso/vclock_gettime.c | 25 +++++------------------
> arch/x86/include/asm/pvclock.h | 39 +++++++++++++++++++++---------------
> arch/x86/kernel/pvclock.c | 17 ++++++----------
> 3 files changed, 34 insertions(+), 47 deletions(-)
>
^ permalink raw reply related [flat|nested] 4+ messages in thread