All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: qemu-devel@nongnu.org
Cc: "Alex Bennée" <alex.bennee@linaro.org>,
	"Richard Henderson" <richard.henderson@linaro.org>,
	"BALATON Zoltan" <balaton@eik.bme.hu>,
	"Aurelien Jarno" <aurelien@aurel32.net>,
	"Peter Maydell" <peter.maydell@linaro.org>
Subject: [RFC PATCH] softfloat: use QEMU_FLATTEN to avoid mistaken isra inlining
Date: Tue, 23 May 2023 14:11:07 +0100	[thread overview]
Message-ID: <20230523131107.3680641-1-alex.bennee@linaro.org> (raw)

Balton discovered that asserts for the extract/deposit calls had a
significant impact on a lame benchmark on qemu-ppc. Replicating with:

  ./qemu-ppc64 ~/lsrc/tests/lame.git-svn/builds/ppc64/frontend/lame \
    -h pts-trondheim-3.wav pts-trondheim-3.mp3

showed up the pack/unpack routines not eliding the assert checks as it
should have done causing them to prominently figure in the profile:

  11.44%  qemu-ppc64  qemu-ppc64               [.] unpack_raw64.isra.0
  11.03%  qemu-ppc64  qemu-ppc64               [.] parts64_uncanon_normal
   8.26%  qemu-ppc64  qemu-ppc64               [.] helper_compute_fprf_float64
   6.75%  qemu-ppc64  qemu-ppc64               [.] do_float_check_status
   5.34%  qemu-ppc64  qemu-ppc64               [.] parts64_muladd
   4.75%  qemu-ppc64  qemu-ppc64               [.] pack_raw64.isra.0
   4.38%  qemu-ppc64  qemu-ppc64               [.] parts64_canonicalize
   3.62%  qemu-ppc64  qemu-ppc64               [.] float64r32_round_pack_canonical

After this patch the same test runs 31 seconds faster with a profile
where the generated code dominates more:

+   14.12%     0.00%  qemu-ppc64  [unknown]                [.] 0x0000004000619420
+   13.30%     0.00%  qemu-ppc64  [unknown]                [.] 0x0000004000616850
+   12.58%    12.19%  qemu-ppc64  qemu-ppc64               [.] parts64_uncanon_normal
+   10.62%     0.00%  qemu-ppc64  [unknown]                [.] 0x000000400061bf70
+    9.91%     9.73%  qemu-ppc64  qemu-ppc64               [.] helper_compute_fprf_float64
+    7.84%     7.82%  qemu-ppc64  qemu-ppc64               [.] do_float_check_status
+    6.47%     5.78%  qemu-ppc64  qemu-ppc64               [.] parts64_canonicalize.constprop.0
+    6.46%     0.00%  qemu-ppc64  [unknown]                [.] 0x0000004000620130
+    6.42%     0.00%  qemu-ppc64  [unknown]                [.] 0x0000004000619400
+    6.17%     6.04%  qemu-ppc64  qemu-ppc64               [.] parts64_muladd
+    5.85%     0.00%  qemu-ppc64  [unknown]                [.] 0x00000040006167e0
+    5.74%     0.00%  qemu-ppc64  [unknown]                [.] 0x0000b693fcffffd3
+    5.45%     4.78%  qemu-ppc64  qemu-ppc64               [.] float64r32_round_pack_canonical

Suggested-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <ec9cfe5a-d5f2-466d-34dc-c35817e7e010@linaro.org>
[AJB: Patchified rth's suggestion]
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Cc: BALATON Zoltan <balaton@eik.bme.hu>
---
 fpu/softfloat.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 108f9cb224..42e6c188b4 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -593,27 +593,27 @@ static void unpack_raw64(FloatParts64 *r, const FloatFmt *fmt, uint64_t raw)
     };
 }
 
-static inline void float16_unpack_raw(FloatParts64 *p, float16 f)
+static void QEMU_FLATTEN float16_unpack_raw(FloatParts64 *p, float16 f)
 {
     unpack_raw64(p, &float16_params, f);
 }
 
-static inline void bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
+static void QEMU_FLATTEN bfloat16_unpack_raw(FloatParts64 *p, bfloat16 f)
 {
     unpack_raw64(p, &bfloat16_params, f);
 }
 
-static inline void float32_unpack_raw(FloatParts64 *p, float32 f)
+static void QEMU_FLATTEN float32_unpack_raw(FloatParts64 *p, float32 f)
 {
     unpack_raw64(p, &float32_params, f);
 }
 
-static inline void float64_unpack_raw(FloatParts64 *p, float64 f)
+static void QEMU_FLATTEN float64_unpack_raw(FloatParts64 *p, float64 f)
 {
     unpack_raw64(p, &float64_params, f);
 }
 
-static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
+static void QEMU_FLATTEN floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
 {
     *p = (FloatParts128) {
         .cls = float_class_unclassified,
@@ -623,7 +623,7 @@ static void floatx80_unpack_raw(FloatParts128 *p, floatx80 f)
     };
 }
 
-static void float128_unpack_raw(FloatParts128 *p, float128 f)
+static void QEMU_FLATTEN float128_unpack_raw(FloatParts128 *p, float128 f)
 {
     const int f_size = float128_params.frac_size - 64;
     const int e_size = float128_params.exp_size;
@@ -650,27 +650,27 @@ static uint64_t pack_raw64(const FloatParts64 *p, const FloatFmt *fmt)
     return ret;
 }
 
-static inline float16 float16_pack_raw(const FloatParts64 *p)
+static float16 QEMU_FLATTEN float16_pack_raw(const FloatParts64 *p)
 {
     return make_float16(pack_raw64(p, &float16_params));
 }
 
-static inline bfloat16 bfloat16_pack_raw(const FloatParts64 *p)
+static bfloat16 QEMU_FLATTEN bfloat16_pack_raw(const FloatParts64 *p)
 {
     return pack_raw64(p, &bfloat16_params);
 }
 
-static inline float32 float32_pack_raw(const FloatParts64 *p)
+static float32 QEMU_FLATTEN float32_pack_raw(const FloatParts64 *p)
 {
     return make_float32(pack_raw64(p, &float32_params));
 }
 
-static inline float64 float64_pack_raw(const FloatParts64 *p)
+static float64 QEMU_FLATTEN float64_pack_raw(const FloatParts64 *p)
 {
     return make_float64(pack_raw64(p, &float64_params));
 }
 
-static float128 float128_pack_raw(const FloatParts128 *p)
+static float128 QEMU_FLATTEN float128_pack_raw(const FloatParts128 *p)
 {
     const int f_size = float128_params.frac_size - 64;
     const int e_size = float128_params.exp_size;
-- 
2.39.2



             reply	other threads:[~2023-05-23 13:11 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-23 13:11 Alex Bennée [this message]
2023-05-23 13:57 ` [RFC PATCH] softfloat: use QEMU_FLATTEN to avoid mistaken isra inlining BALATON Zoltan
2023-05-23 14:33   ` Richard Henderson
2023-05-23 17:51     ` BALATON Zoltan
2023-05-25 13:22     ` Paolo Bonzini
2023-05-25 13:30       ` Richard Henderson
2023-05-25 23:15       ` BALATON Zoltan
2023-05-25 13:30     ` Paolo Bonzini
2023-05-25 23:19       ` BALATON Zoltan
2023-05-26 11:56   ` BALATON Zoltan
2023-06-22 20:55   ` BALATON Zoltan
2023-06-23  5:50     ` Richard Henderson
2023-05-23 14:18 ` Philippe Mathieu-Daudé
2023-05-23 15:34 ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230523131107.3680641-1-alex.bennee@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=aurelien@aurel32.net \
    --cc=balaton@eik.bme.hu \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.