All of lore.kernel.org
 help / color / mirror / Atom feed
From: Borislav Petkov <bp@alien8.de>
To: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>,
	Oleg Nesterov <oleg@redhat.com>, Rik van Riel <riel@redhat.com>,
	x86@kernel.org, linux-kernel@vger.kernel.org,
	Linus Torvalds <torvalds@linux-foundation.org>
Subject: Re: [RFC PATCH] x86, fpu: Use eagerfpu by default on all CPUs
Date: Sat, 21 Feb 2015 22:36:25 +0100	[thread overview]
Message-ID: <20150221213625.GD32073@pd.tnic> (raw)
In-Reply-To: <20150221192352.GA10027@gmail.com>

On Sat, Feb 21, 2015 at 08:23:52PM +0100, Ingo Molnar wrote:
> to switch between the modes?

I went all out and did a debugfs file, see patch at the end, which
counts FPU saves. Then I ran this script:

---
#!/bin/bash

D="/sys/kernel/debug/fpu/eager"

echo "Lazy FPU: "
echo 0 > $D
echo -n "  FPU saves before: "; cat $D

perf stat -a -e task-clock,cycles,instructions,branch-misses,cache-misses,faults,context-switches,migrations --sync --pre ~/bin/pre-build-kernel.sh make -s -j12

echo -n "  FPU saves after: "; cat $D

echo ""
echo "Eager FPU: "
echo 1 > $D
echo -n "  FPU saves before: "; cat $D

perf stat -a -e task-clock,cycles,instructions,branch-misses,cache-misses,faults,context-switches,migrations --sync --pre ~/bin/pre-build-kernel.sh make -s -j12

echo -n "  FPU saves after: "; cat $D
---

which spit this:

Lazy FPU:
  FPU saves before: 3
Setup is 16252 bytes (padded to 16384 bytes).
System is 4222 kB
CRC c79a13ab
Kernel: arch/x86/boot/bzImage is ready  (#41)

 Performance counter stats for 'system wide':

    1315527.989020      task-clock (msec)         #    6.003 CPUs utilized           [100.00%]
 3,042,312,057,208      cycles                    #    2.313 GHz                     [100.00%]
 2,790,807,863,402      instructions              #    0.92  insns per cycle         [100.00%]
    31,658,299,111      branch-misses             #   24.065 M/sec                   [100.00%]
    27,504,255,277      cache-misses              #   20.907 M/sec
        26,802,015      faults                    #    0.020 M/sec                   [100.00%]
         1,248,899      context-switches          #    0.949 K/sec                   [100.00%]
            69,553      migrations                #    0.053 K/sec

     219.127929718 seconds time elapsed

  FPU saves after: 704186

Eager FPU:
  FPU saves before: 4
Setup is 16252 bytes (padded to 16384 bytes).
System is 4222 kB
CRC 6767bb2e
Kernel: arch/x86/boot/bzImage is ready  (#42)

 Performance counter stats for 'system wide':

    1321651.543922      task-clock (msec)         #    6.003 CPUs utilized           [100.00%]
 3,044,403,437,364      cycles                    #    2.303 GHz                     [100.00%]
 2,790,835,886,565      instructions              #    0.92  insns per cycle         [100.00%]
    31,638,090,259      branch-misses             #   23.938 M/sec                   [100.00%]
    27,491,643,095      cache-misses              #   20.801 M/sec
        26,869,732      faults                    #    0.020 M/sec                   [100.00%]
         1,252,034      context-switches          #    0.947 K/sec                   [100.00%]
            69,247      migrations                #    0.052 K/sec

     220.148034331 seconds time elapsed

  FPU saves after: 901638

---
so we have a second slowdown and 200K FPU saves more in eager mode.

Provided I've not done a mistake, looks like the increase in cycles gets
mirrored in 1 second time longer. I've not done the --repeat 10 thing
again, maybe I should do it too, just to be fair as this is a single
run.

---
 arch/x86/include/asm/fpu-internal.h |  4 ++++
 arch/x86/kernel/xsave.c             | 47 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index e97622f57722..7141f353e960 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -38,6 +38,8 @@ int ia32_setup_frame(int sig, struct ksignal *ksig,
 # define ia32_setup_rt_frame	__setup_rt_frame
 #endif
 
+
+extern unsigned long fpu_saved;
 extern unsigned int mxcsr_feature_mask;
 extern void fpu_init(void);
 extern void eager_fpu_init(void);
@@ -242,6 +244,8 @@ static inline void fpu_fxsave(struct fpu *fpu)
  */
 static inline int fpu_save_init(struct fpu *fpu)
 {
+	fpu_saved++;
+
 	if (use_xsave()) {
 		fpu_xsave(fpu);
 
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 0de1fae2bdf0..029de8b629d0 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -14,6 +14,8 @@
 #include <asm/sigframe.h>
 #include <asm/xcr.h>
 
+#include <linux/debugfs.h>
+
 /*
  * Supported feature mask by the CPU and the kernel.
  */
@@ -638,7 +640,7 @@ static void __init xstate_enable_boot_cpu(void)
 	setup_init_fpu_buf();
 
 	/* Auto enable eagerfpu for xsaveopt */
-	if (cpu_has_xsaveopt && eagerfpu != DISABLE)
+	if (eagerfpu != DISABLE)
 		eagerfpu = ENABLE;
 
 	if (pcntxt_mask & XSTATE_EAGER) {
@@ -739,3 +741,46 @@ void *get_xsave_addr(struct xsave_struct *xsave, int xstate)
 	return (void *)xsave + xstate_comp_offsets[feature];
 }
 EXPORT_SYMBOL_GPL(get_xsave_addr);
+
+unsigned long fpu_saved;
+
+static int eager_get(void *data, u64 *val)
+{
+	*val = fpu_saved;
+
+	return 0;
+}
+
+static int eager_set(void *data, u64 val)
+{
+	if (val)
+		setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
+	else
+		setup_clear_cpu_cap(X86_FEATURE_EAGER_FPU);
+
+	fpu_saved = 0;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(eager_fops, eager_get, eager_set, "%llu\n");
+
+static int __init setup_eagerfpu_knob(void)
+{
+	static struct dentry *d_eager, *f_eager;
+
+	d_eager = debugfs_create_dir("fpu", NULL);
+	if (!d_eager) {
+		pr_err("Error creating fpu debugfs dir\n");
+		return -ENOMEM;
+	}
+
+	f_eager = debugfs_create_file("eager", 0644, d_eager, NULL, &eager_fops);
+	if (!f_eager) {
+		pr_err("Error creating fpu debugfs node\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+late_initcall(setup_eagerfpu_knob);
-- 
2.2.0.33.gc18b867

-- 
Regards/Gruss,
    Boris.

ECO tip #101: Trim your mails when you reply.
--

  reply	other threads:[~2015-02-21 21:37 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-20 18:58 [RFC PATCH] x86, fpu: Use eagerfpu by default on all CPUs Andy Lutomirski
2015-02-20 19:05 ` Borislav Petkov
2015-02-21  9:31 ` Ingo Molnar
2015-02-21 16:38   ` Borislav Petkov
2015-02-21 17:29     ` Borislav Petkov
2015-02-21 18:39       ` Ingo Molnar
2015-02-21 19:15         ` Borislav Petkov
2015-02-21 19:23           ` Ingo Molnar
2015-02-21 21:36             ` Borislav Petkov [this message]
2015-02-22  8:18               ` Ingo Molnar
2015-02-22  8:22                 ` Ingo Molnar
2015-02-22 10:48                 ` Borislav Petkov
2015-02-22 12:50                 ` Borislav Petkov
2015-02-22 12:57                   ` Ingo Molnar
2015-02-22 13:21                     ` Borislav Petkov
2015-02-22  0:34       ` Maciej W. Rozycki
2015-02-22  2:18         ` Andy Lutomirski
2015-02-22 11:06           ` Borislav Petkov
2015-02-23  1:45             ` Rik van Riel
2015-02-23  5:22               ` Andy Lutomirski
2015-02-23 12:51                 ` Rik van Riel
2015-02-23 15:03                   ` Borislav Petkov
2015-02-23 15:51                     ` Rik van Riel
2015-02-23 18:06                       ` Borislav Petkov
2015-02-23 21:17           ` Maciej W. Rozycki
2015-02-23 21:21             ` Rik van Riel
2015-02-23 22:14               ` Linus Torvalds
2015-02-24  0:56                 ` Maciej W. Rozycki
2015-02-24  0:59                   ` Andy Lutomirski
2015-02-23 22:27               ` Maciej W. Rozycki
2015-02-23 23:44                 ` Andy Lutomirski
2015-02-24  2:14                   ` Maciej W. Rozycki
2015-02-24  2:31                     ` Andy Lutomirski
2015-02-24 14:43                       ` Rik van Riel
2015-02-21 18:34     ` Ingo Molnar
2015-02-23 14:59 ` Oleg Nesterov
2015-02-23 15:11   ` Borislav Petkov
2015-02-23 15:53     ` Rik van Riel
2015-02-23 18:40       ` Oleg Nesterov
2015-02-24 19:15 ` Denys Vlasenko
2015-02-25  0:07   ` Andy Lutomirski
2015-02-25 10:37     ` Borislav Petkov
2015-02-25 10:50       ` Ingo Molnar
2015-02-25 10:45     ` Ingo Molnar
2015-02-25 17:12 ` Some results (was: Re: [RFC PATCH] x86, fpu: Use eagerfpu by default on all CPUs) Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150221213625.GD32073@pd.tnic \
    --to=bp@alien8.de \
    --cc=linux-kernel@vger.kernel.org \
    --cc=luto@amacapital.net \
    --cc=mingo@kernel.org \
    --cc=oleg@redhat.com \
    --cc=riel@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.