kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Denis Plotnikov <dplotnikov@virtuozzo.com>
To: pbonzini@redhat.com, rkrcmar@redhat.com, kvm@vger.kernel.org
Cc: rkagan@virtuozzo.com, den@virtuozzo.com, svt-core@lists.sw.ru
Subject: [PATCH v3 2/6] KVM: x86: switch to masterclock update using timekeeper functionality
Date: Sat, 29 Jul 2017 15:35:07 +0300	[thread overview]
Message-ID: <1501331711-12961-3-git-send-email-dplotnikov@virtuozzo.com> (raw)
In-Reply-To: <1501331711-12961-1-git-send-email-dplotnikov@virtuozzo.com>

It is reasonable to switch KVM to using a more simple, effective
and conceptually correct scheme of dealing with the data needed
for kvm masterclock values calculation.

With the current scheme the kvm needs to have an up-to-date copy of
some timekeeper data to provide a guest using kvmclock with necessary
information.

This is not:
    - simple
        KVM has to have a lot of code to do that, instead KVM could use
        a timekeeper function to get all the data it needs
    - effective
        the copy of the data used for time data calculation is updated
        every time it changed although this is not necessary since
	the updated piece of time data is needed in certain moments only
        (e.g masterclock updating), instead KVM can request this data
        directly form the timekeeper at the moments when it's really needed
    - conceptually correct
        to do the work (calculate the time data) which the other part
	of the system (timekeeper) has been designed and is able to do
        is not the proper way, instead deligate the work to the proper part

This patch switches KVM to using the improved timekeeper function for
the kvm masterclock time data.

Removing the leftovers of the old scheme is the matter of the next patches.

Signed-off-by: Denis Plotnikov <dplotnikov@virtuozzo.com>
---
 arch/x86/kernel/kvmclock.c  | 14 ++++++++++++--
 arch/x86/kernel/tsc.c       |  6 ++++++
 arch/x86/kvm/x86.c          | 26 ++++++++++++++++++--------
 include/linux/clocksource.h |  3 +++
 include/linux/timekeeping.h |  2 ++
 kernel/time/timekeeping.c   | 21 +++++++++++++++++++--
 6 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index f5cfc5d..52156d9 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -82,7 +82,7 @@ static int kvm_set_wallclock(const struct timespec *now)
 	return -1;
 }
 
-static u64 kvm_clock_read(void)
+static inline u64 __kvm_clock_read(u64 *cycles)
 {
 	struct pvclock_vcpu_time_info *src;
 	u64 ret;
@@ -91,10 +91,14 @@ static u64 kvm_clock_read(void)
 	preempt_disable_notrace();
 	cpu = smp_processor_id();
 	src = &hv_clock[cpu].pvti;
-	ret = pvclock_clocksource_read(src, NULL);
+	ret = pvclock_clocksource_read(src, cycles);
 	preempt_enable_notrace();
 	return ret;
 }
+static u64 kvm_clock_read(void)
+{
+	return __kvm_clock_read(NULL);
+}
 
 static u64 kvm_clock_get_cycles(struct clocksource *cs)
 {
@@ -177,9 +181,15 @@ bool kvm_check_and_clear_guest_paused(void)
 	return ret;
 }
 
+static void kvm_clock_read_with_cycles(u64 *cycles, u64 *cycles_stamp)
+{
+	*cycles = __kvm_clock_read(cycles_stamp);
+}
+
 struct clocksource kvm_clock = {
 	.name = "kvm-clock",
 	.read = kvm_clock_get_cycles,
+	.read_with_cycles = kvm_clock_read_with_cycles,
 	.rating = 400,
 	.mask = CLOCKSOURCE_MASK(64),
 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 796d96b..5d655af 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1015,6 +1015,11 @@ static u64 read_tsc(struct clocksource *cs)
 	return (u64)rdtsc_ordered();
 }
 
+static bool is_tsc_stable(void)
+{
+	return !tsc_unstable;
+}
+
 static void tsc_cs_mark_unstable(struct clocksource *cs)
 {
 	if (tsc_unstable)
@@ -1043,6 +1048,7 @@ static struct clocksource clocksource_tsc = {
 	.name                   = "tsc",
 	.rating                 = 300,
 	.read                   = read_tsc,
+	.is_stable		= is_tsc_stable,
 	.mask                   = CLOCKSOURCE_MASK(64),
 	.flags                  = CLOCK_SOURCE_IS_CONTINUOUS |
 				  CLOCK_SOURCE_MUST_VERIFY,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6c97c82..496e731 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1643,22 +1643,32 @@ static int do_realtime(struct timespec *ts, u64 *cycle_now)
 /* returns true if host is using tsc clocksource */
 static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now)
 {
-	/* checked again under seqlock below */
-	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
-		return false;
+	struct system_time_snapshot systime_snapshot;
+
+	ktime_get_snapshot(&systime_snapshot);
+
+	if (systime_snapshot.cs_stable) {
+		*kernel_ns = ktime_to_ns(systime_snapshot.boot);
+		*cycle_now = systime_snapshot.cycles;
+	}
 
-	return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC;
+	return systime_snapshot.cs_stable;
 }
 
 /* returns true if host is using tsc clocksource */
 static bool kvm_get_walltime_and_clockread(struct timespec *ts,
 					   u64 *cycle_now)
 {
-	/* checked again under seqlock below */
-	if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC)
-		return false;
+	struct system_time_snapshot systime_snapshot;
+
+	ktime_get_snapshot(&systime_snapshot);
+
+	if (systime_snapshot.cs_stable) {
+		*ts = ktime_to_timespec(systime_snapshot.real);
+		*cycle_now = systime_snapshot.cycles;
+	}
 
-	return do_realtime(ts, cycle_now) == VCLOCK_TSC;
+	return systime_snapshot.cs_stable;
 }
 #endif
 
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index a78cb18..f849b91 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -49,6 +49,7 @@ struct module;
  *				The ideal clocksource. A must-use where
  *				available.
  * @read:		returns a cycle value, passes clocksource as argument
+ * @read_with_cycles
  * @enable:		optional function to enable the clocksource
  * @disable:		optional function to disable the clocksource
  * @mask:		bitmask for two's complement
@@ -78,6 +79,8 @@ struct module;
  */
 struct clocksource {
 	u64 (*read)(struct clocksource *cs);
+	void (*read_with_cycles)(u64 *cycles, u64 *cycles_stamp);
+	bool (*is_stable)(void);
 	u64 mask;
 	u32 mult;
 	u32 shift;
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index ddc229f..21917fa 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -290,8 +290,10 @@ struct system_time_snapshot {
 	u64		cycles;
 	ktime_t		real;
 	ktime_t		raw;
+	ktime_t		boot;
 	unsigned int	clock_was_set_seq;
 	u8		cs_was_changed_seq;
+	bool		cs_stable;
 };
 
 /*
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index cedafa0..a2bfc12 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -953,27 +953,44 @@ void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
 	unsigned long seq;
 	ktime_t base_raw;
 	ktime_t base_real;
+	ktime_t base_boot;
 	u64 nsec_raw;
 	u64 nsec_real;
 	u64 now;
+	struct clocksource *clock;
 
 	WARN_ON_ONCE(timekeeping_suspended);
 
 	do {
 		seq = read_seqcount_begin(&tk_core.seq);
-		now = tk_clock_read(&tk->tkr_mono);
+		clock = tk->tkr_mono.clock;
+
+		if (clock->is_stable)
+			systime_snapshot->cs_stable = clock->is_stable();
+		else
+			systime_snapshot->cs_stable = false;
+
+		if (clock->read_with_cycles) {
+			clock->read_with_cycles(
+				&now, &systime_snapshot->cycles);
+		} else {
+			now = tk_clock_read(&tk->tkr_mono);
+			systime_snapshot->cycles = now;
+		}
 		systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
 		systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
 		base_real = ktime_add(tk->tkr_mono.base,
 				      tk_core.timekeeper.offs_real);
 		base_raw = tk->tkr_raw.base;
+		base_boot = ktime_add(tk->tkr_mono.base,
+				      tk_core.timekeeper.offs_boot);
 		nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
 		nsec_raw  = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
-	systime_snapshot->cycles = now;
 	systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
 	systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
+	systime_snapshot->boot = ktime_add_ns(base_boot, nsec_real);
 }
 EXPORT_SYMBOL_GPL(ktime_get_snapshot);
 
-- 
2.7.4

  parent reply	other threads:[~2017-07-29 12:35 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-29 12:35 [PATCH v3 0/6] make L2's kvm-clock stable, get rid of pvclock_gtod Denis Plotnikov
2017-07-29 12:35 ` [PATCH v3 1/6] pvclock: add parameter to store cycles stamp to pvclock reading function Denis Plotnikov
2017-07-31 14:08   ` Paolo Bonzini
2017-07-29 12:35 ` Denis Plotnikov [this message]
2017-07-31 14:20   ` [PATCH v3 2/6] KVM: x86: switch to masterclock update using timekeeper functionality Paolo Bonzini
2017-08-01  9:30     ` Denis Plotnikov
2017-08-01 10:03       ` Paolo Bonzini
2017-08-01 10:16         ` Paolo Bonzini
2017-08-01 12:11         ` Denis Plotnikov
2017-08-01 12:28           ` Denis Plotnikov
2017-08-01 12:41           ` Paolo Bonzini
2017-08-01 12:46             ` Denis Plotnikov
2017-08-01 17:47               ` Radim Krčmář
2017-07-29 12:35 ` [PATCH v3 3/6] timekeeper: add clocksource change notifier Denis Plotnikov
2017-07-29 12:35 ` [PATCH v3 4/6] KVM: x86: remove not used pvclock_gtod_copy Denis Plotnikov
2017-07-29 12:35 ` [PATCH v3 5/6] pvclock: add clocksource change notification on changing of tsc stable bit Denis Plotnikov
2017-07-31 14:21   ` Paolo Bonzini
2017-07-29 12:35 ` [PATCH v3 6/6] kvmclock: add the clocksource stability querying function Denis Plotnikov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1501331711-12961-3-git-send-email-dplotnikov@virtuozzo.com \
    --to=dplotnikov@virtuozzo.com \
    --cc=den@virtuozzo.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=rkagan@virtuozzo.com \
    --cc=rkrcmar@redhat.com \
    --cc=svt-core@lists.sw.ru \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).