All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Jan Beulich" <JBeulich@suse.com>
To: Dongli Zhang <dongli.zhang@oracle.com>
Cc: xen.list@daevel.fr, peterz@infradead.org,
	dario.faggioli@citrix.com, bevan@bi-co.net,
	linux-kernel@vger.kernel.org, xen-devel@lists.xen.org,
	mingo@redhat.com, joao.m.martins@oracle.com
Subject: Re: [PATCH 1/1] sched/cputime: do not decrease steal time after live migration on xen
Date: Tue, 10 Oct 2017 04:07:34 -0600	[thread overview]
Message-ID: <59DCB80602000078001844B7__21937.2667874724$1507630115$gmane$org@prv-mh.provo.novell.com> (raw)
In-Reply-To: <1507626848-24148-1-git-send-email-dongli.zhang@oracle.com>

>>> On 10.10.17 at 11:14, <dongli.zhang@oracle.com> wrote:
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -238,10 +238,17 @@ static __always_inline u64 steal_account_process_time(u64 maxtime)
>  {
>  #ifdef CONFIG_PARAVIRT
>  	if (static_key_false(&paravirt_steal_enabled)) {
> -		u64 steal;
> +		u64 steal, steal_time;
> +		s64 steal_delta;
> +
> +		steal_time = paravirt_steal_clock(smp_processor_id());
> +		steal = steal_delta = steal_time - this_rq()->prev_steal_time;
> +
> +		if (unlikely(steal_delta < 0)) {
> +			this_rq()->prev_steal_time = steal_time;
> +			return 0;
> +		}
>  
> -		steal = paravirt_steal_clock(smp_processor_id());
> -		steal -= this_rq()->prev_steal_time;
>  		steal = min(steal, maxtime);
>  		account_steal_time(steal);
>  		this_rq()->prev_steal_time += steal;

While I can see this making the issue less pronounced, I don't see
how it fully addresses it: Why would only a negative delta represent
a discontinuity? In our old XenoLinux derived kernel we had the
change below (unlikely to be upstreamable as is, so just to give you
an idea).

Jan

--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -112,6 +112,47 @@ static inline void task_group_account_fi
 	cpuacct_account_field(p, index, tmp);
 }
 
+#if !defined(CONFIG_XEN) || defined(CONFIG_VIRT_CPU_ACCOUNTING)
+# define _cputime_adjust(t) (t)
+#else
+# include <linux/syscore_ops.h>
+# define NS_PER_TICK (1000000000 / HZ)
+
+static DEFINE_PER_CPU(u64, steal_snapshot);
+static DEFINE_PER_CPU(unsigned int, steal_residual);
+
+static u64 _cputime_adjust(u64 t)
+{
+	u64 s = this_vcpu_read(runstate.time[RUNSTATE_runnable]);
+	unsigned long adj = div_u64_rem(s - __this_cpu_read(steal_snapshot)
+					  + __this_cpu_read(steal_residual),
+					NS_PER_TICK,
+					this_cpu_ptr(&steal_residual));
+
+	__this_cpu_write(steal_snapshot, s);
+	if (t < jiffies_to_nsecs(adj))
+		return 0;
+
+	return t - jiffies_to_nsecs(adj);
+}
+
+static void steal_resume(void)
+{
+	_cputime_adjust((1ULL << 63) - 1);
+}
+
+static struct syscore_ops steal_syscore_ops = {
+	.resume	= steal_resume,
+};
+
+static int __init steal_register(void)
+{
+	register_syscore_ops(&steal_syscore_ops);
+	return 0;
+}
+core_initcall(steal_register);
+#endif
+
 /*
  * Account user cpu time to a process.
  * @p: the process that the cpu time gets accounted to
@@ -128,7 +169,7 @@ void account_user_time(struct task_struc
 	index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
 
 	/* Add user time to cpustat. */
-	task_group_account_field(p, index, cputime);
+	task_group_account_field(p, index, _cputime_adjust(cputime));
 
 	/* Account for user time used */
 	acct_account_cputime(p);
@@ -172,7 +213,7 @@ void account_system_index_time(struct ta
 	account_group_system_time(p, cputime);
 
 	/* Add system time to cpustat. */
-	task_group_account_field(p, index, cputime);
+	task_group_account_field(p, index, _cputime_adjust(cputime));
 
 	/* Account for system time used */
 	acct_account_cputime(p);
@@ -224,9 +265,9 @@ void account_idle_time(u64 cputime)
 	struct rq *rq = this_rq();
 
 	if (atomic_read(&rq->nr_iowait) > 0)
-		cpustat[CPUTIME_IOWAIT] += cputime;
+		cpustat[CPUTIME_IOWAIT] += _cputime_adjust(cputime);
 	else
-		cpustat[CPUTIME_IDLE] += cputime;
+		cpustat[CPUTIME_IDLE] += _cputime_adjust(cputime);
 }
 
 /*





_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

  reply	other threads:[~2017-10-10 10:07 UTC|newest]

Thread overview: 18+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-10-10  9:14 [PATCH 1/1] sched/cputime: do not decrease steal time after live migration on xen Dongli Zhang
2017-10-10 10:07 ` Jan Beulich [this message]
2017-10-10 10:07 ` [Xen-devel] " Jan Beulich
2017-10-10 10:59 ` Ingo Molnar
2017-10-10 10:59 ` Ingo Molnar
2017-10-10 12:42   ` Stanislaw Gruszka
2017-10-10 12:42   ` Stanislaw Gruszka
2017-10-10 12:48     ` Peter Zijlstra
2017-10-10 12:48     ` Peter Zijlstra
2017-10-10 14:01       ` Rik van Riel
2017-10-10 14:01       ` Rik van Riel
2017-10-11  7:47         ` Dongli Zhang
2017-10-11  7:47         ` Dongli Zhang
2017-10-11  7:29     ` Dongli Zhang
2017-10-11  7:29     ` Dongli Zhang
2017-10-10 11:58 ` Peter Zijlstra
2017-10-10 11:58 ` Peter Zijlstra
2017-10-10  9:14 Dongli Zhang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='59DCB80602000078001844B7__21937.2667874724$1507630115$gmane$org@prv-mh.provo.novell.com' \
    --to=jbeulich@suse.com \
    --cc=bevan@bi-co.net \
    --cc=dario.faggioli@citrix.com \
    --cc=dongli.zhang@oracle.com \
    --cc=joao.m.martins@oracle.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=xen-devel@lists.xen.org \
    --cc=xen.list@daevel.fr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.