kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: [PATCH kvm-unit-test 5/6] Add a simple kvmclock driver
       [not found] <206398826.826221283138725330.JavaMail.root@zmail05.collab.prod.int.phx2.redhat.com>
@ 2010-08-30  3:25 ` Jason Wang
  0 siblings, 0 replies; 3+ messages in thread
From: Jason Wang @ 2010-08-30  3:25 UTC (permalink / raw)
  To: Glauber Costa; +Cc: mtosatti, avi, kvm


----- "Glauber Costa" <glommer@redhat.com> wrote:

> On Fri, Aug 27, 2010 at 01:49:45PM +0800, Jason Wang wrote:
> > +#define unlikely(x)	__builtin_expect(!!(x), 0)
> > +
> > +struct pvclock_vcpu_time_info hv_clock[MAX_CPU];
> this structure have to be 4-byte aligned. Let the compiler
> help you guaranteeing it here.
> 
> > +#define MAX_CPU 4
> > +
> Any particular reason for the number 4? I'd like
> to see these tests running in really big boxes,
> where things get really interesting.

Just because the max cpu num was limited by smp initialization code. I
would change it to 64.


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH kvm-unit-test 5/6] Add a simple kvmclock driver
  2010-08-27  5:49 ` [PATCH kvm-unit-test 5/6] Add a simple kvmclock driver Jason Wang
@ 2010-08-27 11:31   ` Glauber Costa
  0 siblings, 0 replies; 3+ messages in thread
From: Glauber Costa @ 2010-08-27 11:31 UTC (permalink / raw)
  To: Jason Wang; +Cc: mtosatti, avi, kvm

On Fri, Aug 27, 2010 at 01:49:45PM +0800, Jason Wang wrote:
> +#define unlikely(x)	__builtin_expect(!!(x), 0)
> +
> +struct pvclock_vcpu_time_info hv_clock[MAX_CPU];
this structure have to be 4-byte aligned. Let the compiler
help you guaranteeing it here.

> +#define MAX_CPU 4
> +
Any particular reason for the number 4? I'd like
to see these tests running in really big boxes,
where things get really interesting.

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH kvm-unit-test 5/6] Add a simple kvmclock driver
  2010-08-27  5:49 [PATCH kvm-unit-test 0/6] Kvmclock test Jason Wang
@ 2010-08-27  5:49 ` Jason Wang
  2010-08-27 11:31   ` Glauber Costa
  0 siblings, 1 reply; 3+ messages in thread
From: Jason Wang @ 2010-08-27  5:49 UTC (permalink / raw)
  To: mtosatti, avi, kvm; +Cc: glommer

Most of the codes were borrowed from arxh/x86/kernel/kvmclock.c. A
special bit: PV_CLOCK_CYCLE_RAW_TEST_BIT is used to notify the driver
to return unadjusted cycles.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 x86/kvmclock.c |  166 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 x86/kvmclock.h |   53 ++++++++++++++++++
 2 files changed, 219 insertions(+), 0 deletions(-)
 create mode 100644 x86/kvmclock.c
 create mode 100644 x86/kvmclock.h

diff --git a/x86/kvmclock.c b/x86/kvmclock.c
new file mode 100644
index 0000000..6bfc858
--- /dev/null
+++ b/x86/kvmclock.c
@@ -0,0 +1,166 @@
+#include "libcflat.h"
+#include "smp.h"
+#include "atomic.h"
+#include "processor.h"
+#include "kvmclock.h"
+
+#define unlikely(x)	__builtin_expect(!!(x), 0)
+
+struct pvclock_vcpu_time_info hv_clock[MAX_CPU];
+struct pvclock_wall_clock wall_clock;
+static unsigned char valid_flags = 0;
+static atomic64_t last_value = ATOMIC64_INIT(0);
+
+/*
+ * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
+ * yielding a 64-bit result.
+ */
+static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
+{
+	u64 product;
+#ifdef __i386__
+	u32 tmp1, tmp2;
+#endif
+
+	if (shift < 0)
+		delta >>= -shift;
+	else
+		delta <<= shift;
+
+#ifdef __i386__
+	__asm__ (
+		"mul  %5       ; "
+		"mov  %4,%%eax ; "
+		"mov  %%edx,%4 ; "
+		"mul  %5       ; "
+		"xor  %5,%5    ; "
+		"add  %4,%%eax ; "
+		"adc  %5,%%edx ; "
+		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
+		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
+#elif defined(__x86_64__)
+	__asm__ (
+		"mul %%rdx ; shrd $32,%%rdx,%%rax"
+		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
+#else
+#error implement me!
+#endif
+
+	return product;
+}
+
+static u64 pvclock_get_nsec_offset(struct pvclock_shadow_time *shadow)
+{
+	u64 delta = rdtsc() - shadow->tsc_timestamp;
+	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
+}
+
+/*
+ * Reads a consistent set of time-base values from hypervisor,
+ * into a shadow data area.
+ */
+static unsigned pvclock_get_time_values(struct pvclock_shadow_time *dst,
+					struct pvclock_vcpu_time_info *src)
+{
+	do {
+		dst->version = src->version;
+		rmb();		/* fetch version before data */
+		dst->tsc_timestamp     = src->tsc_timestamp;
+		dst->system_timestamp  = src->system_time;
+		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
+		dst->tsc_shift         = src->tsc_shift;
+		dst->flags             = src->flags;
+		rmb();		/* test version after fetching data */
+	} while ((src->version & 1) || (dst->version != src->version));
+
+	return dst->version;
+}
+
+cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
+{
+	struct pvclock_shadow_time shadow;
+	unsigned version;
+	cycle_t ret, offset;
+	u64 last;
+
+	do {
+		version = pvclock_get_time_values(&shadow, src);
+		barrier();
+		offset = pvclock_get_nsec_offset(&shadow);
+		ret = shadow.system_timestamp + offset;
+		barrier();
+	} while (version != src->version);
+
+	if ((valid_flags & PVCLOCK_RAW_CYCLE_BIT) ||
+            ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
+             (shadow.flags & PVCLOCK_TSC_STABLE_BIT)))
+                return ret;
+
+	/*
+	 * Assumption here is that last_value, a global accumulator, always goes
+	 * forward. If we are less than that, we should not be much smaller.
+	 * We assume there is an error marging we're inside, and then the correction
+	 * does not sacrifice accuracy.
+	 *
+	 * For reads: global may have changed between test and return,
+	 * but this means someone else updated poked the clock at a later time.
+	 * We just need to make sure we are not seeing a backwards event.
+	 *
+	 * For updates: last_value = ret is not enough, since two vcpus could be
+	 * updating at the same time, and one of them could be slightly behind,
+	 * making the assumption that last_value always go forward fail to hold.
+	 */
+	last = atomic64_read(&last_value);
+	do {
+		if (ret < last)
+			return last;
+		last = atomic64_cmpxchg(&last_value, last, ret);
+	} while (unlikely(last != ret));
+
+	return ret;
+}
+
+cycle_t kvm_clock_read()
+{
+        struct pvclock_vcpu_time_info *src;
+        cycle_t ret;
+        int index = smp_id();
+
+        src = &hv_clock[index];
+        ret = pvclock_clocksource_read(src);
+        return ret;
+}
+
+void kvm_clock_init(void *data)
+{
+        int index = smp_id();
+        struct pvclock_vcpu_time_info *hvc = &hv_clock[index];
+
+        printf("kvm-clock: cpu %d, msr 0x:%lx \n", index, hvc);
+        wrmsr(MSR_KVM_SYSTEM_TIME, (unsigned long)hvc | 1);
+}
+
+void kvm_clock_clear(void *data)
+{
+        wrmsr(MSR_KVM_SYSTEM_TIME, 0LL);
+}
+
+void kvm_get_wallclock(struct timespec *ts)
+{
+        u32 version;
+        wrmsr(MSR_KVM_WALL_CLOCK, (u64)&wall_clock);
+
+        do {
+                version = wall_clock.version;
+                rmb();		/* fetch version before time */
+                ts->sec = wall_clock.sec;
+                ts->nsec = wall_clock.nsec;
+                rmb();		/* fetch time before checking version */
+        } while ((wall_clock.version & 1) || (version != wall_clock.version));
+
+}
+
+void pvclock_set_flags(unsigned char flags)
+{
+        valid_flags = flags;
+}
diff --git a/x86/kvmclock.h b/x86/kvmclock.h
new file mode 100644
index 0000000..e3a1c02
--- /dev/null
+++ b/x86/kvmclock.h
@@ -0,0 +1,53 @@
+#ifndef KVMCLOCK_H
+#define KVMCLOCK_H
+
+#define MSR_KVM_WALL_CLOCK  0x11
+#define MSR_KVM_SYSTEM_TIME 0x12
+
+#define MAX_CPU 4
+
+#define PVCLOCK_TSC_STABLE_BIT (1 << 0)
+#define PVCLOCK_RAW_CYCLE_BIT (1 << 7) /* Get raw cycle */
+
+# define NSEC_PER_SEC			1000000000ULL
+
+typedef u64 cycle_t;
+
+struct pvclock_vcpu_time_info {
+        u32   version;
+        u32   pad0;
+        u64   tsc_timestamp;
+        u64   system_time;
+        u32   tsc_to_system_mul;
+        signed char    tsc_shift;
+        u8    flags;
+        u8    pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+	u32   version;
+	u32   sec;
+	u32   nsec;
+} __attribute__((__packed__));
+
+struct pvclock_shadow_time {
+        u64 tsc_timestamp;     /* TSC at last update of time vals.  */
+        u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
+        u32 tsc_to_nsec_mul;
+        int tsc_shift;
+        u32 version;
+        u8  flags;
+};
+
+struct timespec {
+        u32   sec;
+        u32   nsec;
+};
+
+void pvclock_set_flags(unsigned char flags);
+cycle_t kvm_clock_read();
+void kvm_get_wallclock(struct timespec *ts);
+void kvm_clock_init(void *data);
+void kvm_clock_clear(void *data);
+
+#endif


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-08-30  3:25 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <206398826.826221283138725330.JavaMail.root@zmail05.collab.prod.int.phx2.redhat.com>
2010-08-30  3:25 ` [PATCH kvm-unit-test 5/6] Add a simple kvmclock driver Jason Wang
2010-08-27  5:49 [PATCH kvm-unit-test 0/6] Kvmclock test Jason Wang
2010-08-27  5:49 ` [PATCH kvm-unit-test 5/6] Add a simple kvmclock driver Jason Wang
2010-08-27 11:31   ` Glauber Costa

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).