linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule
@ 2018-03-23 21:58 Eric Dumazet
  2018-03-23 21:58 ` [PATCH v3 2/2] x86, cpuid: allow cpuid_read() " Eric Dumazet
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Eric Dumazet @ 2018-03-23 21:58 UTC (permalink / raw)
  To: x86
  Cc: lkml, Eric Dumazet, Eric Dumazet, H. Peter Anvin,
	Thomas Gleixner, Ingo Molnar, Borislav Petkov, Hugh Dickins

I noticed high latencies caused by a daemon periodically reading
various MSR on all cpus. KASAN kernels would see ~10ms latencies
simply reading one MSR. Even without KASAN, sending IPI to CPU
in deep sleep state or blocking hard IRQ in a a long section,
then waiting for the answer can consume hundreds of usec.

Converts rdmsr_safe_on_cpu() to use a completion instead
of busy polling.

Overall daemon cpu usage was reduced by 35 %,
and latencies caused by msr_read() disappeared.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Hugh Dickins <hughd@google.com>
---
 arch/x86/lib/msr-smp.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index 693cce0be82dffb822cecd0c7e38d2821aff896c..761ba062afdaf7f7d0603ed94ed6cc3e46b37f76 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -2,6 +2,7 @@
 #include <linux/export.h>
 #include <linux/preempt.h>
 #include <linux/smp.h>
+#include <linux/completion.h>
 #include <asm/msr.h>
 
 static void __rdmsr_on_cpu(void *info)
@@ -143,13 +144,19 @@ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
 }
 EXPORT_SYMBOL(wrmsr_on_cpus);
 
+struct msr_info_completion {
+	struct msr_info		msr;
+	struct completion	done;
+};
+
 /* These "safe" variants are slower and should be used when the target MSR
    may not actually exist. */
 static void __rdmsr_safe_on_cpu(void *info)
 {
-	struct msr_info *rv = info;
+	struct msr_info_completion *rv = info;
 
-	rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
+	rv->msr.err = rdmsr_safe(rv->msr.msr_no, &rv->msr.reg.l, &rv->msr.reg.h);
+	complete(&rv->done);
 }
 
 static void __wrmsr_safe_on_cpu(void *info)
@@ -161,17 +168,26 @@ static void __wrmsr_safe_on_cpu(void *info)
 
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
+	struct msr_info_completion rv;
+	call_single_data_t csd = {
+		.func	= __rdmsr_safe_on_cpu,
+		.info	= &rv,
+	};
 	int err;
-	struct msr_info rv;
 
 	memset(&rv, 0, sizeof(rv));
+	init_completion(&rv.done);
+	rv.msr.msr_no = msr_no;
 
-	rv.msr_no = msr_no;
-	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
-	*l = rv.reg.l;
-	*h = rv.reg.h;
+	err = smp_call_function_single_async(cpu, &csd);
+	if (!err) {
+		wait_for_completion(&rv.done);
+		err = rv.msr.err;
+	}
+	*l = rv.msr.reg.l;
+	*h = rv.msr.reg.h;
 
-	return err ? err : rv.err;
+	return err;
 }
 EXPORT_SYMBOL(rdmsr_safe_on_cpu);
 
-- 
2.17.0.rc0.231.g781580f067-goog

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH v3 2/2] x86, cpuid: allow cpuid_read() to schedule
  2018-03-23 21:58 [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule Eric Dumazet
@ 2018-03-23 21:58 ` Eric Dumazet
  2018-03-23 22:17   ` H. Peter Anvin
  2018-03-27 10:10   ` [tip:x86/cleanups] x86/cpuid: Allow " tip-bot for Eric Dumazet
  2018-03-24  8:09 ` [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() " Ingo Molnar
  2018-03-27 10:10 ` [tip:x86/cleanups] x86/msr: Allow " tip-bot for Eric Dumazet
  2 siblings, 2 replies; 15+ messages in thread
From: Eric Dumazet @ 2018-03-23 21:58 UTC (permalink / raw)
  To: x86
  Cc: lkml, Eric Dumazet, Eric Dumazet, H. Peter Anvin,
	Thomas Gleixner, Borislav Petkov, Ingo Molnar, Hugh Dickins

I noticed high latencies caused by a daemon periodically reading various
MSR and cpuid on all cpus. KASAN kernels would see ~10ms latencies
simply reading one cpuid. Even without KASAN, sending IPI to CPU
in deep sleep state or blocking hard IRQ in a a long section,
then waiting for the answer can consume hundreds of usec or more.

Switching to smp_call_function_single_async() and a completion
allows to reschedule and not burn cpu cycles.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
---
 arch/x86/kernel/cpuid.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 0931a105ffe16cde4640e759efa600b23a756d84..1d300f96df4b316dbe3392c8221467cfd8593272 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -40,6 +40,7 @@
 #include <linux/notifier.h>
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
+#include <linux/completion.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -47,19 +48,27 @@
 static struct class *cpuid_class;
 static enum cpuhp_state cpuhp_cpuid_state;
 
+struct cpuid_regs_done {
+	struct cpuid_regs regs;
+	struct completion done;
+};
+
 static void cpuid_smp_cpuid(void *cmd_block)
 {
-	struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
+	struct cpuid_regs_done *cmd = cmd_block;
+
+	cpuid_count(cmd->regs.eax, cmd->regs.ecx,
+		    &cmd->regs.eax, &cmd->regs.ebx,
+		    &cmd->regs.ecx, &cmd->regs.edx);
 
-	cpuid_count(cmd->eax, cmd->ecx,
-		    &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx);
+	complete(&cmd->done);
 }
 
 static ssize_t cpuid_read(struct file *file, char __user *buf,
 			  size_t count, loff_t *ppos)
 {
 	char __user *tmp = buf;
-	struct cpuid_regs cmd;
+	struct cpuid_regs_done cmd;
 	int cpu = iminor(file_inode(file));
 	u64 pos = *ppos;
 	ssize_t bytes = 0;
@@ -68,19 +77,28 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 	if (count % 16)
 		return -EINVAL;	/* Invalid chunk size */
 
+	init_completion(&cmd.done);
 	for (; count; count -= 16) {
-		cmd.eax = pos;
-		cmd.ecx = pos >> 32;
-		err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
+		call_single_data_t csd = {
+			.func = cpuid_smp_cpuid,
+			.info = &cmd,
+		};
+
+		cmd.regs.eax = pos;
+		cmd.regs.ecx = pos >> 32;
+
+		err = smp_call_function_single_async(cpu, &csd);
 		if (err)
 			break;
-		if (copy_to_user(tmp, &cmd, 16)) {
+		wait_for_completion(&cmd.done);
+		if (copy_to_user(tmp, &cmd.regs, 16)) {
 			err = -EFAULT;
 			break;
 		}
 		tmp += 16;
 		bytes += 16;
 		*ppos = ++pos;
+		reinit_completion(&cmd.done);
 	}
 
 	return bytes ? bytes : err;
-- 
2.17.0.rc0.231.g781580f067-goog

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] x86, cpuid: allow cpuid_read() to schedule
  2018-03-23 21:58 ` [PATCH v3 2/2] x86, cpuid: allow cpuid_read() " Eric Dumazet
@ 2018-03-23 22:17   ` H. Peter Anvin
  2018-03-24  1:01     ` Eric Dumazet
  2018-03-27 10:10   ` [tip:x86/cleanups] x86/cpuid: Allow " tip-bot for Eric Dumazet
  1 sibling, 1 reply; 15+ messages in thread
From: H. Peter Anvin @ 2018-03-23 22:17 UTC (permalink / raw)
  To: Eric Dumazet, x86
  Cc: lkml, Eric Dumazet, Thomas Gleixner, Borislav Petkov,
	Ingo Molnar, Hugh Dickins

On 03/23/18 14:58, Eric Dumazet wrote:
> I noticed high latencies caused by a daemon periodically reading various
> MSR and cpuid on all cpus. KASAN kernels would see ~10ms latencies
> simply reading one cpuid. Even without KASAN, sending IPI to CPU
> in deep sleep state or blocking hard IRQ in a a long section,
> then waiting for the answer can consume hundreds of usec or more.
> 
> Switching to smp_call_function_single_async() and a completion
> allows to reschedule and not burn cpu cycles.

That being said, the Right Way for a daemon to read multiple MSRs and
CPUIDs on multiple CPUs is to spawn a thread for each CPU and use CPU
affinity to lock them down.  No IPI is needed to access MSRs on the
current CPU, and CPUID doesn't even need kernel entry.

	-hpa

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] x86, cpuid: allow cpuid_read() to schedule
  2018-03-23 22:17   ` H. Peter Anvin
@ 2018-03-24  1:01     ` Eric Dumazet
  2018-03-25 22:11       ` H. Peter Anvin
  0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2018-03-24  1:01 UTC (permalink / raw)
  To: H. Peter Anvin, Eric Dumazet, x86
  Cc: lkml, Thomas Gleixner, Borislav Petkov, Ingo Molnar, Hugh Dickins



On 03/23/2018 03:17 PM, H. Peter Anvin wrote:
> On 03/23/18 14:58, Eric Dumazet wrote:
>> I noticed high latencies caused by a daemon periodically reading various
>> MSR and cpuid on all cpus. KASAN kernels would see ~10ms latencies
>> simply reading one cpuid. Even without KASAN, sending IPI to CPU
>> in deep sleep state or blocking hard IRQ in a a long section,
>> then waiting for the answer can consume hundreds of usec or more.
>>
>> Switching to smp_call_function_single_async() and a completion
>> allows to reschedule and not burn cpu cycles.
> 
> That being said, the Right Way for a daemon to read multiple MSRs and
> CPUIDs on multiple CPUs is to spawn a thread for each CPU and use CPU
> affinity to lock them down.  No IPI is needed to access MSRs on the
> current CPU, and CPUID doesn't even need kernel entry.

Indeed, assuming a daemon can have threads running on all cpus :/

Some environments like to partition cpus for different jobs/containers.

Yes, we can avoid IPI by carefully re-designing these user programs.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule
  2018-03-23 21:58 [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule Eric Dumazet
  2018-03-23 21:58 ` [PATCH v3 2/2] x86, cpuid: allow cpuid_read() " Eric Dumazet
@ 2018-03-24  8:09 ` Ingo Molnar
  2018-03-24 10:50   ` Thomas Gleixner
  2018-03-24 14:29   ` Eric Dumazet
  2018-03-27 10:10 ` [tip:x86/cleanups] x86/msr: Allow " tip-bot for Eric Dumazet
  2 siblings, 2 replies; 15+ messages in thread
From: Ingo Molnar @ 2018-03-24  8:09 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: x86, lkml, Eric Dumazet, H. Peter Anvin, Thomas Gleixner,
	Ingo Molnar, Borislav Petkov, Hugh Dickins, Peter Zijlstra


* Eric Dumazet <edumazet@google.com> wrote:

> I noticed high latencies caused by a daemon periodically reading
> various MSR on all cpus. KASAN kernels would see ~10ms latencies
> simply reading one MSR. Even without KASAN, sending IPI to CPU
> in deep sleep state or blocking hard IRQ in a a long section,
> then waiting for the answer can consume hundreds of usec.
> 
> Converts rdmsr_safe_on_cpu() to use a completion instead
> of busy polling.
> 
> Overall daemon cpu usage was reduced by 35 %,
> and latencies caused by msr_read() disappeared.

What "daemon" is this and why is it reading MSRs?

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule
  2018-03-24  8:09 ` [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() " Ingo Molnar
@ 2018-03-24 10:50   ` Thomas Gleixner
  2018-03-24 14:29   ` Eric Dumazet
  1 sibling, 0 replies; 15+ messages in thread
From: Thomas Gleixner @ 2018-03-24 10:50 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Eric Dumazet, x86, lkml, Eric Dumazet, H. Peter Anvin,
	Ingo Molnar, Borislav Petkov, Hugh Dickins, Peter Zijlstra

On Sat, 24 Mar 2018, Ingo Molnar wrote:
> * Eric Dumazet <edumazet@google.com> wrote:
> 
> > I noticed high latencies caused by a daemon periodically reading
> > various MSR on all cpus. KASAN kernels would see ~10ms latencies
> > simply reading one MSR. Even without KASAN, sending IPI to CPU
> > in deep sleep state or blocking hard IRQ in a a long section,
> > then waiting for the answer can consume hundreds of usec.
> > 
> > Converts rdmsr_safe_on_cpu() to use a completion instead
> > of busy polling.
> > 
> > Overall daemon cpu usage was reduced by 35 %,
> > and latencies caused by msr_read() disappeared.
> 
> What "daemon" is this and why is it reading MSRs?

Why? Just because it can ....

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule
  2018-03-24  8:09 ` [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() " Ingo Molnar
  2018-03-24 10:50   ` Thomas Gleixner
@ 2018-03-24 14:29   ` Eric Dumazet
  2018-03-25 14:12     ` Borislav Petkov
  1 sibling, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2018-03-24 14:29 UTC (permalink / raw)
  To: Ingo Molnar, Eric Dumazet
  Cc: x86, lkml, H. Peter Anvin, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Hugh Dickins, Peter Zijlstra



On 03/24/2018 01:09 AM, Ingo Molnar wrote:
> 
> * Eric Dumazet <edumazet@google.com> wrote:
> 
>> I noticed high latencies caused by a daemon periodically reading
>> various MSR on all cpus. KASAN kernels would see ~10ms latencies
>> simply reading one MSR. Even without KASAN, sending IPI to CPU
>> in deep sleep state or blocking hard IRQ in a a long section,
>> then waiting for the answer can consume hundreds of usec.
>>
>> Converts rdmsr_safe_on_cpu() to use a completion instead
>> of busy polling.
>>
>> Overall daemon cpu usage was reduced by 35 %,
>> and latencies caused by msr_read() disappeared.
> 
> What "daemon" is this and why is it reading MSRs?

It is named gsysd, "Google System Tool", a daemon+cli that is run 
on all machines in production to provide a generic interface
for interacting with the system hardware.

I am not sure if this answers your question, I probably 
could give a rough estimation of MWh this daemon consumes on the planet
if that helps.

Note that the source of the problem is not reading the MSR, but having cpus
blocking hard irqs for a long time.

Ingo, it looks like any loop protected by unlock_task_sighand() might be the main
offender.

Application writers seem to love getrusage() for example.
Can we rewrite it to not block hard irqs ?

Thanks !

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule
  2018-03-24 14:29   ` Eric Dumazet
@ 2018-03-25 14:12     ` Borislav Petkov
  2018-03-26  1:21       ` H. Peter Anvin
  2018-03-26  6:40       ` Ingo Molnar
  0 siblings, 2 replies; 15+ messages in thread
From: Borislav Petkov @ 2018-03-25 14:12 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Ingo Molnar, Eric Dumazet, x86, lkml, H. Peter Anvin,
	Thomas Gleixner, Ingo Molnar, Hugh Dickins, Peter Zijlstra

On Sat, Mar 24, 2018 at 07:29:48AM -0700, Eric Dumazet wrote:
> It is named gsysd, "Google System Tool", a daemon+cli that is run 
> on all machines in production to provide a generic interface
> for interacting with the system hardware.

So I'm wondering if poking at the hardware like that is a really optimal
design. Maybe it would be cleaner if the OS would provide properly
abstracted sysfs interfaces instead of raw MSRs. For a couple of
reasons:

* different vendors have different MSR ranges giving the same info so
instead of differentiating that in your daemon, we can do that nicely in
the kernel.

* exposing raw MSRs instead of having clearly defined sysfs files is
always a pain when a new CPU decides to change those MSRs. Hiding that
change in the OS is always easier.

* periodically polling MSRs which don't change that often is, of course,
wasting power and so reading a cached result is leaner.

* <another reason which I'll think of after hitting send... :\ >

In general, we should've never have had exposed that raw MSR access but
it is too late now - that ship has sailed. We can still try to design
new interfaces more cleanly, though.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 2/2] x86, cpuid: allow cpuid_read() to schedule
  2018-03-24  1:01     ` Eric Dumazet
@ 2018-03-25 22:11       ` H. Peter Anvin
  0 siblings, 0 replies; 15+ messages in thread
From: H. Peter Anvin @ 2018-03-25 22:11 UTC (permalink / raw)
  To: Eric Dumazet, Eric Dumazet, x86
  Cc: lkml, Thomas Gleixner, Borislav Petkov, Ingo Molnar, Hugh Dickins

On 03/23/18 18:01, Eric Dumazet wrote:
> 
> Indeed, assuming a daemon can have threads running on all cpus :/
> 
> Some environments like to partition cpus for different jobs/containers.
> 

Then it doesn't need to run it on all cpus... just the one that that
particular daemon is "responsible" for.

	-hpa

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule
  2018-03-25 14:12     ` Borislav Petkov
@ 2018-03-26  1:21       ` H. Peter Anvin
  2018-03-26  6:40       ` Ingo Molnar
  1 sibling, 0 replies; 15+ messages in thread
From: H. Peter Anvin @ 2018-03-26  1:21 UTC (permalink / raw)
  To: Borislav Petkov, Eric Dumazet
  Cc: Ingo Molnar, Eric Dumazet, x86, lkml, Thomas Gleixner,
	Ingo Molnar, Hugh Dickins, Peter Zijlstra

On 03/25/18 07:12, Borislav Petkov wrote:
> 
> So I'm wondering if poking at the hardware like that is a really optimal
> design. Maybe it would be cleaner if the OS would provide properly
> abstracted sysfs interfaces instead of raw MSRs. For a couple of
> reasons:
> 

It's most definitely not.

	-hpa

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule
  2018-03-25 14:12     ` Borislav Petkov
  2018-03-26  1:21       ` H. Peter Anvin
@ 2018-03-26  6:40       ` Ingo Molnar
       [not found]         ` <CANn89iKy_jVpBvAebJFm1UKVKfG=p+R4B1tXmC4waeK7YzZh2g@mail.gmail.com>
  1 sibling, 1 reply; 15+ messages in thread
From: Ingo Molnar @ 2018-03-26  6:40 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Eric Dumazet, Eric Dumazet, x86, lkml, H. Peter Anvin,
	Thomas Gleixner, Ingo Molnar, Hugh Dickins, Peter Zijlstra


* Borislav Petkov <bp@alien8.de> wrote:

> On Sat, Mar 24, 2018 at 07:29:48AM -0700, Eric Dumazet wrote:
> > It is named gsysd, "Google System Tool", a daemon+cli that is run 
> > on all machines in production to provide a generic interface
> > for interacting with the system hardware.
> 
> So I'm wondering if poking at the hardware like that is a really optimal
> design. Maybe it would be cleaner if the OS would provide properly
> abstracted sysfs interfaces instead of raw MSRs.

It's not ideal to read /dev/msr.

A SysFS interface to enumerate 'system statistics' MSRs already exists via perf, 
under:

  /sys/bus/event_source/devices/msr/events/

This is for simple platform specific hardware statistics counters.

This is implemented in arch/x86/events/msr.c, with a number of MSRs already 
abstracted out:

enum perf_msr_id {
        PERF_MSR_TSC                    = 0,
        PERF_MSR_APERF                  = 1,
        PERF_MSR_MPERF                  = 2,
        PERF_MSR_PPERF                  = 3,
        PERF_MSR_SMI                    = 4,
        PERF_MSR_PTSC                   = 5,
        PERF_MSR_IRPERF                 = 6,
        PERF_MSR_THERM                  = 7,
        PERF_MSR_THERM_SNAP             = 8,
        PERF_MSR_THERM_UNIT             = 9,
        PERF_MSR_EVENT_MAX,
};

It's very easy to add new MSRs:

 9ae21dd66b97: perf/x86/msr: Add support for MSR_IA32_THERM_STATUS
 aaf248848db5: perf/x86/msr: Add AMD IRPERF (Instructions Retired) performance counter
 8a2242618477: perf/x86/msr: Add AMD PTSC (Performance Time-Stamp Counter) support

This commit added an MSR to msr-index and added MSR event support for it as well 
with proper CPU-ID dependent enumeration:

 8a2242618477: perf/x86/msr: Add AMD PTSC (Performance Time-Stamp Counter) support

 arch/x86/events/msr.c              | 8 ++++++++
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/include/asm/msr-index.h   | 1 +
 3 files changed, 10 insertions(+)

More complex MSR value encodings are supported as well - see for example 
MSR_IA32_THERM_STATUS, which is encoded as:

               /* if valid, extract digital readout, other set to -1 */
               now = now & (1ULL << 31) ? (now >> 16) & 0x3f :  -1;
               local64_set(&event->count, now);

If an MSR counter is a plain integer counter then no such code has to be added.

Only those MSRs that are valid on a system are 'live', and thus tooling can 
enumerate them programmatically and has easy symbolic access to them:

  galatea:~> perf list | grep msr/

  msr/aperf/                                         [Kernel PMU event]
  msr/cpu_thermal_margin/                            [Kernel PMU event]
  msr/mperf/                                         [Kernel PMU event]
  msr/smi/                                           [Kernel PMU event]
  msr/tsc/                                           [Kernel PMU event]

These can be used as simple counters that are read - and it's using perf not 
/dev/msr so they are fast and scalable - but the full set of perf features is also 
available, so we can do:

 galatea:~> perf stat -a -e msr/aperf/ sleep 1

 Performance counter stats for 'system wide':

       354,442,500      msr/aperf/                                                  

       1.001918252 seconds time elapsed

This interface is vastly superior to /dev/msr: it does not have the various 
usability, scalability and security limitations of /dev/msr.

/dev/msr is basically for debugging.

If performance matters then the relevant MSR events should be added and gsysd 
should be updated to support MSR events.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule
       [not found]         ` <CANn89iKy_jVpBvAebJFm1UKVKfG=p+R4B1tXmC4waeK7YzZh2g@mail.gmail.com>
@ 2018-03-26 14:23           ` Thomas Gleixner
  2018-03-27  9:36             ` Ingo Molnar
  0 siblings, 1 reply; 15+ messages in thread
From: Thomas Gleixner @ 2018-03-26 14:23 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Ingo Molnar, Borislav Petkov, Eric Dumazet,
	the arch/x86 maintainers, LKML, H. Peter Anvin, Ingo Molnar,
	Hugh Dickins, a.p.zijlstra

On Mon, 26 Mar 2018, Eric Dumazet wrote:
> On Sun, Mar 25, 2018 at 11:40 PM Ingo Molnar <mingo@kernel.org> wrote:
> > If performance matters then the relevant MSR events should be added and
> > gsysd
> > should be updated to support MSR events.
> >
> 
> Thanks for the hints Ingo. I have forwarded them.
> 
> I am pretty sure gsysd was designed and written years before perf even
> existed,
> so I am not sure all this will ever be implemented.
> 
> I guess I will apply the kernel patches locally for the time being.

I'm still considering to merge them because there is no real reason not to
do so. The interfaces exist and are used. Reducing their impact is
certainly worthwhile.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule
  2018-03-26 14:23           ` Thomas Gleixner
@ 2018-03-27  9:36             ` Ingo Molnar
  0 siblings, 0 replies; 15+ messages in thread
From: Ingo Molnar @ 2018-03-27  9:36 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Eric Dumazet, Borislav Petkov, Eric Dumazet,
	the arch/x86 maintainers, LKML, H. Peter Anvin, Ingo Molnar,
	Hugh Dickins, a.p.zijlstra


* Thomas Gleixner <tglx@linutronix.de> wrote:

> On Mon, 26 Mar 2018, Eric Dumazet wrote:
> > On Sun, Mar 25, 2018 at 11:40 PM Ingo Molnar <mingo@kernel.org> wrote:
> > > If performance matters then the relevant MSR events should be added and
> > > gsysd
> > > should be updated to support MSR events.
> > >
> > 
> > Thanks for the hints Ingo. I have forwarded them.
> > 
> > I am pretty sure gsysd was designed and written years before perf even
> > existed,
> > so I am not sure all this will ever be implemented.
> > 
> > I guess I will apply the kernel patches locally for the time being.
> 
> I'm still considering to merge them because there is no real reason not to
> do so. The interfaces exist and are used. Reducing their impact is
> certainly worthwhile.

Ok, that's a good point, and the patches themselves are sane:

  Acked-by: Ingo Molnar <mingo@kernel.org>

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 15+ messages in thread

* [tip:x86/cleanups] x86/msr: Allow rdmsr_safe_on_cpu() to schedule
  2018-03-23 21:58 [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule Eric Dumazet
  2018-03-23 21:58 ` [PATCH v3 2/2] x86, cpuid: allow cpuid_read() " Eric Dumazet
  2018-03-24  8:09 ` [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() " Ingo Molnar
@ 2018-03-27 10:10 ` tip-bot for Eric Dumazet
  2 siblings, 0 replies; 15+ messages in thread
From: tip-bot for Eric Dumazet @ 2018-03-27 10:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, bp, eric.dumazet, linux-kernel, hpa, tglx, edumazet, hughd

Commit-ID:  07cde313b2d21f728cec2836db7cdb55476f7a26
Gitweb:     https://git.kernel.org/tip/07cde313b2d21f728cec2836db7cdb55476f7a26
Author:     Eric Dumazet <edumazet@google.com>
AuthorDate: Fri, 23 Mar 2018 14:58:17 -0700
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 27 Mar 2018 12:01:47 +0200

x86/msr: Allow rdmsr_safe_on_cpu() to schedule

High latencies can be observed caused by a daemon periodically reading
various MSR on all cpus. On KASAN enabled kernels ~10ms latencies can be
observed simply reading one MSR. Even without KASAN, sending an IPI to a
CPU, which is in a deep sleep state or in a long hard IRQ disabled section,
waiting for the answer can consume hundreds of microseconds.

All usage sites are in preemptible context, convert rdmsr_safe_on_cpu() to
use a completion instead of busy polling.

Overall daemon cpu usage was reduced by 35 %, and latencies caused by
msr_read() disappeared.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Link: https://lkml.kernel.org/r/20180323215818.127774-1-edumazet@google.com

---
 arch/x86/lib/msr-smp.c | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/arch/x86/lib/msr-smp.c b/arch/x86/lib/msr-smp.c
index 693cce0be82d..761ba062afda 100644
--- a/arch/x86/lib/msr-smp.c
+++ b/arch/x86/lib/msr-smp.c
@@ -2,6 +2,7 @@
 #include <linux/export.h>
 #include <linux/preempt.h>
 #include <linux/smp.h>
+#include <linux/completion.h>
 #include <asm/msr.h>
 
 static void __rdmsr_on_cpu(void *info)
@@ -143,13 +144,19 @@ void wrmsr_on_cpus(const struct cpumask *mask, u32 msr_no, struct msr *msrs)
 }
 EXPORT_SYMBOL(wrmsr_on_cpus);
 
+struct msr_info_completion {
+	struct msr_info		msr;
+	struct completion	done;
+};
+
 /* These "safe" variants are slower and should be used when the target MSR
    may not actually exist. */
 static void __rdmsr_safe_on_cpu(void *info)
 {
-	struct msr_info *rv = info;
+	struct msr_info_completion *rv = info;
 
-	rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h);
+	rv->msr.err = rdmsr_safe(rv->msr.msr_no, &rv->msr.reg.l, &rv->msr.reg.h);
+	complete(&rv->done);
 }
 
 static void __wrmsr_safe_on_cpu(void *info)
@@ -161,17 +168,26 @@ static void __wrmsr_safe_on_cpu(void *info)
 
 int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h)
 {
+	struct msr_info_completion rv;
+	call_single_data_t csd = {
+		.func	= __rdmsr_safe_on_cpu,
+		.info	= &rv,
+	};
 	int err;
-	struct msr_info rv;
 
 	memset(&rv, 0, sizeof(rv));
+	init_completion(&rv.done);
+	rv.msr.msr_no = msr_no;
 
-	rv.msr_no = msr_no;
-	err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1);
-	*l = rv.reg.l;
-	*h = rv.reg.h;
+	err = smp_call_function_single_async(cpu, &csd);
+	if (!err) {
+		wait_for_completion(&rv.done);
+		err = rv.msr.err;
+	}
+	*l = rv.msr.reg.l;
+	*h = rv.msr.reg.h;
 
-	return err ? err : rv.err;
+	return err;
 }
 EXPORT_SYMBOL(rdmsr_safe_on_cpu);
 

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [tip:x86/cleanups] x86/cpuid: Allow cpuid_read() to schedule
  2018-03-23 21:58 ` [PATCH v3 2/2] x86, cpuid: allow cpuid_read() " Eric Dumazet
  2018-03-23 22:17   ` H. Peter Anvin
@ 2018-03-27 10:10   ` tip-bot for Eric Dumazet
  1 sibling, 0 replies; 15+ messages in thread
From: tip-bot for Eric Dumazet @ 2018-03-27 10:10 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: hughd, edumazet, linux-kernel, eric.dumazet, mingo, tglx, hpa, bp

Commit-ID:  67bbd7a8d6bcdc44cc27105ae8c374e9176ceaf1
Gitweb:     https://git.kernel.org/tip/67bbd7a8d6bcdc44cc27105ae8c374e9176ceaf1
Author:     Eric Dumazet <edumazet@google.com>
AuthorDate: Fri, 23 Mar 2018 14:58:18 -0700
Committer:  Thomas Gleixner <tglx@linutronix.de>
CommitDate: Tue, 27 Mar 2018 12:01:48 +0200

x86/cpuid: Allow cpuid_read() to schedule

High latencies can be observed caused by a daemon periodically reading
CPUID on all cpus. On KASAN enabled kernels ~10ms latencies can be
observed. Even without KASAN, sending an IPI to a CPU, which is in a deep
sleep state or in a long hard IRQ disabled section, waiting for the answer
can consume hundreds of microseconds.

cpuid_read() is invoked in preemptible context, so it can be converted to
sleep instead of busy wait.

Switching to smp_call_function_single_async() and a completion allows to
reschedule and reduces CPU usage and latencies.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Link: https://lkml.kernel.org/r/20180323215818.127774-2-edumazet@google.com

---
 arch/x86/kernel/cpuid.c | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 0931a105ffe1..1d300f96df4b 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -40,6 +40,7 @@
 #include <linux/notifier.h>
 #include <linux/uaccess.h>
 #include <linux/gfp.h>
+#include <linux/completion.h>
 
 #include <asm/processor.h>
 #include <asm/msr.h>
@@ -47,19 +48,27 @@
 static struct class *cpuid_class;
 static enum cpuhp_state cpuhp_cpuid_state;
 
+struct cpuid_regs_done {
+	struct cpuid_regs regs;
+	struct completion done;
+};
+
 static void cpuid_smp_cpuid(void *cmd_block)
 {
-	struct cpuid_regs *cmd = (struct cpuid_regs *)cmd_block;
+	struct cpuid_regs_done *cmd = cmd_block;
+
+	cpuid_count(cmd->regs.eax, cmd->regs.ecx,
+		    &cmd->regs.eax, &cmd->regs.ebx,
+		    &cmd->regs.ecx, &cmd->regs.edx);
 
-	cpuid_count(cmd->eax, cmd->ecx,
-		    &cmd->eax, &cmd->ebx, &cmd->ecx, &cmd->edx);
+	complete(&cmd->done);
 }
 
 static ssize_t cpuid_read(struct file *file, char __user *buf,
 			  size_t count, loff_t *ppos)
 {
 	char __user *tmp = buf;
-	struct cpuid_regs cmd;
+	struct cpuid_regs_done cmd;
 	int cpu = iminor(file_inode(file));
 	u64 pos = *ppos;
 	ssize_t bytes = 0;
@@ -68,19 +77,28 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
 	if (count % 16)
 		return -EINVAL;	/* Invalid chunk size */
 
+	init_completion(&cmd.done);
 	for (; count; count -= 16) {
-		cmd.eax = pos;
-		cmd.ecx = pos >> 32;
-		err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
+		call_single_data_t csd = {
+			.func = cpuid_smp_cpuid,
+			.info = &cmd,
+		};
+
+		cmd.regs.eax = pos;
+		cmd.regs.ecx = pos >> 32;
+
+		err = smp_call_function_single_async(cpu, &csd);
 		if (err)
 			break;
-		if (copy_to_user(tmp, &cmd, 16)) {
+		wait_for_completion(&cmd.done);
+		if (copy_to_user(tmp, &cmd.regs, 16)) {
 			err = -EFAULT;
 			break;
 		}
 		tmp += 16;
 		bytes += 16;
 		*ppos = ++pos;
+		reinit_completion(&cmd.done);
 	}
 
 	return bytes ? bytes : err;

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2018-03-27 10:11 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-23 21:58 [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() to schedule Eric Dumazet
2018-03-23 21:58 ` [PATCH v3 2/2] x86, cpuid: allow cpuid_read() " Eric Dumazet
2018-03-23 22:17   ` H. Peter Anvin
2018-03-24  1:01     ` Eric Dumazet
2018-03-25 22:11       ` H. Peter Anvin
2018-03-27 10:10   ` [tip:x86/cleanups] x86/cpuid: Allow " tip-bot for Eric Dumazet
2018-03-24  8:09 ` [PATCH v3 1/2] x86, msr: allow rdmsr_safe_on_cpu() " Ingo Molnar
2018-03-24 10:50   ` Thomas Gleixner
2018-03-24 14:29   ` Eric Dumazet
2018-03-25 14:12     ` Borislav Petkov
2018-03-26  1:21       ` H. Peter Anvin
2018-03-26  6:40       ` Ingo Molnar
     [not found]         ` <CANn89iKy_jVpBvAebJFm1UKVKfG=p+R4B1tXmC4waeK7YzZh2g@mail.gmail.com>
2018-03-26 14:23           ` Thomas Gleixner
2018-03-27  9:36             ` Ingo Molnar
2018-03-27 10:10 ` [tip:x86/cleanups] x86/msr: Allow " tip-bot for Eric Dumazet

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).