All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: bharata@linux.ibm.com, alex.bennee@linaro.org, david@redhat.com
Subject: [PATCH v2 07/10] softfloat: Use x86_64 assembly for {add, sub}{192, 256}
Date: Fri, 25 Sep 2020 08:20:44 -0700	[thread overview]
Message-ID: <20200925152047.709901-8-richard.henderson@linaro.org> (raw)
In-Reply-To: <20200925152047.709901-1-richard.henderson@linaro.org>

The compiler cannot chain more than two additions together.
Use inline assembly for 3 or 4 additions.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/fpu/softfloat-macros.h | 18 ++++++++++++++++--
 fpu/softfloat.c                | 29 +++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/include/fpu/softfloat-macros.h b/include/fpu/softfloat-macros.h
index 95d88d05b8..99fa124e56 100644
--- a/include/fpu/softfloat-macros.h
+++ b/include/fpu/softfloat-macros.h
@@ -436,6 +436,13 @@ static inline void
      uint64_t *z2Ptr
  )
 {
+#ifdef __x86_64__
+    asm("add %5, %2\n\t"
+        "adc %4, %1\n\t"
+        "adc %3, %0"
+        : "=&r"(*z0Ptr), "=&r"(*z1Ptr), "=&r"(*z2Ptr)
+        : "rm"(b0), "rm"(b1), "rm"(b2), "0"(a0), "1"(a1), "2"(a2));
+#else
     uint64_t z0, z1, z2;
     int8_t carry0, carry1;
 
@@ -450,7 +457,7 @@ static inline void
     *z2Ptr = z2;
     *z1Ptr = z1;
     *z0Ptr = z0;
-
+#endif
 }
 
 /*----------------------------------------------------------------------------
@@ -494,6 +501,13 @@ static inline void
      uint64_t *z2Ptr
  )
 {
+#ifdef __x86_64__
+    asm("sub %5, %2\n\t"
+        "sbb %4, %1\n\t"
+        "sbb %3, %0"
+        : "=&r"(*z0Ptr), "=&r"(*z1Ptr), "=&r"(*z2Ptr)
+        : "rm"(b0), "rm"(b1), "rm"(b2), "0"(a0), "1"(a1), "2"(a2));
+#else
     uint64_t z0, z1, z2;
     int8_t borrow0, borrow1;
 
@@ -508,7 +522,7 @@ static inline void
     *z2Ptr = z2;
     *z1Ptr = z1;
     *z0Ptr = z0;
-
+#endif
 }
 
 /*----------------------------------------------------------------------------
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 49de31fec2..54d0b210ac 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -7340,6 +7340,15 @@ static inline void shift256RightJamming(UInt256 *p, unsigned count)
 /* R = A - B */
 static void sub256(UInt256 *r, UInt256 *a, UInt256 *b)
 {
+#if defined(__x86_64__)
+    asm("sub %7, %3\n\t"
+        "sbb %6, %2\n\t"
+        "sbb %5, %1\n\t"
+        "sbb %4, %0"
+        : "=&r"(r->w[0]), "=&r"(r->w[1]), "=&r"(r->w[2]), "=&r"(r->w[3])
+        : "rme"(b->w[0]), "rme"(b->w[1]), "rme"(b->w[2]), "rme"(b->w[3]),
+            "0"(a->w[0]),   "1"(a->w[1]),   "2"(a->w[2]),   "3"(a->w[3]));
+#else
     bool borrow = false;
 
     for (int i = 3; i >= 0; --i) {
@@ -7355,11 +7364,21 @@ static void sub256(UInt256 *r, UInt256 *a, UInt256 *b)
         }
         r->w[i] = rt;
     }
+#endif
 }
 
 /* A = -A */
 static void neg256(UInt256 *a)
 {
+#if defined(__x86_64__)
+    asm("negq %3\n\t"
+        "sbb %6, %2\n\t"
+        "sbb %5, %1\n\t"
+        "sbb %4, %0"
+        : "=&r"(a->w[0]), "=&r"(a->w[1]), "=&r"(a->w[2]), "+rm"(a->w[3])
+        : "rme"(a->w[0]), "rme"(a->w[1]), "rme"(a->w[2]),
+          "0"(0), "1"(0), "2"(0));
+#else
     /*
      * Recall that -X - 1 = ~X, and that since this is negation,
      * once we find a non-zero number, all subsequent words will
@@ -7388,11 +7407,20 @@ static void neg256(UInt256 *a)
     a->w[1] = ~a->w[1];
  not0:
     a->w[0] = ~a->w[0];
+#endif
 }
 
 /* A += B */
 static void add256(UInt256 *a, UInt256 *b)
 {
+#if defined(__x86_64__)
+    asm("add %7, %3\n\t"
+        "adc %6, %2\n\t"
+        "adc %5, %1\n\t"
+        "adc %4, %0"
+        :  "+r"(a->w[0]),  "+r"(a->w[1]),  "+r"(a->w[2]),  "+r"(a->w[3])
+        : "rme"(b->w[0]), "rme"(b->w[1]), "rme"(b->w[2]), "rme"(b->w[3]));
+#else
     bool carry = false;
 
     for (int i = 3; i >= 0; --i) {
@@ -7407,6 +7435,7 @@ static void add256(UInt256 *a, UInt256 *b)
         }
         a->w[i] = at;
     }
+#endif
 }
 
 float128 float128_muladd(float128 a_f, float128 b_f, float128 c_f,
-- 
2.25.1



  parent reply	other threads:[~2020-09-25 15:23 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-25 15:20 [PATCH v2 00/10] softfloat: Implement float128_muladd Richard Henderson
2020-09-25 15:20 ` [PATCH v2 01/10] softfloat: Use mulu64 for mul64To128 Richard Henderson
2020-10-15 19:08   ` Alex Bennée
2020-09-25 15:20 ` [PATCH v2 02/10] softfloat: Use int128.h for some operations Richard Henderson
2020-10-15 19:10   ` Alex Bennée
2020-09-25 15:20 ` [PATCH v2 03/10] softfloat: Tidy a * b + inf return Richard Henderson
2020-10-16  9:40   ` Alex Bennée
2020-10-16 17:04   ` Philippe Mathieu-Daudé
2020-09-25 15:20 ` [PATCH v2 04/10] softfloat: Add float_cmask and constants Richard Henderson
2020-10-16  9:44   ` Alex Bennée
2020-09-25 15:20 ` [PATCH v2 05/10] softfloat: Inline pick_nan_muladd into its caller Richard Henderson
2020-10-16 16:20   ` Alex Bennée
2020-10-16 16:36     ` Richard Henderson
2020-10-18 21:06       ` [PATCH] softfpu: Generalize pick_nan_muladd to opaque structures Richard Henderson
2020-10-19  9:57         ` Alex Bennée
2020-09-25 15:20 ` [PATCH v2 06/10] softfloat: Implement float128_muladd Richard Henderson
2020-10-16 16:31   ` Alex Bennée
2020-10-16 16:55     ` Richard Henderson
2020-09-25 15:20 ` Richard Henderson [this message]
2020-09-25 15:20 ` [PATCH v2 08/10] softfloat: Use x86_64 assembly for sh[rl]_double Richard Henderson
2020-09-25 15:20 ` [PATCH v2 09/10] softfloat: Use aarch64 assembly for {add, sub}{192, 256} Richard Henderson
2020-09-25 15:20 ` [PATCH v2 10/10] softfloat: Use ppc64 " Richard Henderson
2020-10-15 17:23 ` [PATCH v2 00/10] softfloat: Implement float128_muladd Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200925152047.709901-8-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alex.bennee@linaro.org \
    --cc=bharata@linux.ibm.com \
    --cc=david@redhat.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.