All of lore.kernel.org
 help / color / mirror / Atom feed
From: Luis Pires <luis.pires@eldorado.org.br>
To: qemu-devel@nongnu.org, qemu-ppc@nongnu.org
Cc: Luis Pires <luis.pires@eldorado.org.br>,
	richard.henderson@linaro.org, groug@kaod.org,
	david@gibson.dropbear.id.au
Subject: [PATCH 04/19] host-utils: add 128-bit quotient support to divu128/divs128
Date: Tue, 24 Aug 2021 11:27:15 -0300	[thread overview]
Message-ID: <20210824142730.102421-5-luis.pires@eldorado.org.br> (raw)
In-Reply-To: <20210824142730.102421-1-luis.pires@eldorado.org.br>

These will be used to implement new decimal floating point
instructions from Power ISA 3.1.

A new argument, prem, was added to divu128/divs128 to receive the
remainder, freeing up phigh to receive the high 64 bits of the
quotient.

For scenarios supported by the previous implementation
(<= 64-bit quotient) with large (> 64-bit) dividends, testing showed
that:
- when dividend >> divisor, the performance of the new implementation
is equivalent to the old one.
- as the dividend and the divisor get closer (e.g. 65-bit dividend and
64-bit divisor), the performance is significantly improved, due to the
smaller number of shift-subtract iterations.

Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
---
 include/hw/clock.h        |   8 +--
 include/qemu/host-utils.h |  20 ++++--
 target/ppc/int_helper.c   |  13 ++--
 util/host-utils.c         | 128 +++++++++++++++++++++++++++-----------
 4 files changed, 113 insertions(+), 56 deletions(-)

diff --git a/include/hw/clock.h b/include/hw/clock.h
index 5a40a076aa..2f162f7a6f 100644
--- a/include/hw/clock.h
+++ b/include/hw/clock.h
@@ -319,12 +319,8 @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
     if (clk->period == 0) {
         return 0;
     }
-    /*
-     * BUG: when CONFIG_INT128 is not defined, the current implementation of
-     * divu128 does not return a valid truncated quotient, so the result will
-     * be wrong.
-     */
-    divu128(&lo, &hi, clk->period);
+
+    divu128(&lo, &hi, NULL, clk->period);
     return lo;
 }
 
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 97a3fbb06a..8e8cab9a3e 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -52,26 +52,34 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
     return (__int128_t)a * b / c;
 }
 
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t *prem,
+                           uint64_t divisor)
 {
     __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
     __uint128_t result = dividend / divisor;
     *plow = result;
-    *phigh = dividend % divisor;
+    *phigh = result >> 64;
+    if (prem) {
+        *prem = dividend % divisor;
+    }
 }
 
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+static inline void divs128(uint64_t *plow, int64_t *phigh, int64_t *prem,
+                           int64_t divisor)
 {
     __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
     __int128_t result = dividend / divisor;
     *plow = result;
-    *phigh = dividend % divisor;
+    *phigh = result >> 64;
+    if (prem) {
+        *prem = dividend % divisor;
+    }
 }
 #else
 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t *prem, uint64_t divisor);
+void divs128(uint64_t *plow, int64_t *phigh, int64_t *prem, int64_t divisor);
 
 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
 {
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 227e2ce0ec..53b234d808 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -108,7 +108,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
         overflow = 1;
         rt = 0; /* Undefined */
     } else {
-        divu128(&rt, &ra, rb);
+        divu128(&rt, &ra, NULL, rb);
     }
 
     if (oe) {
@@ -120,7 +120,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 
 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 {
-    int64_t rt = 0;
+    uint64_t rt = 0;
     int64_t ra = (int64_t)rau;
     int64_t rb = (int64_t)rbu;
     int overflow = 0;
@@ -129,7 +129,7 @@ uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
         overflow = 1;
         rt = 0; /* Undefined */
     } else {
-        divs128(&rt, &ra, rb);
+        divs128(&rt, &ra, NULL, rb);
     }
 
     if (oe) {
@@ -2524,6 +2524,7 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
     int cr;
     uint64_t lo_value;
     uint64_t hi_value;
+    uint64_t rem;
     ppc_avr_t ret = { .u64 = { 0, 0 } };
 
     if (b->VsrSD(0) < 0) {
@@ -2559,10 +2560,10 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
          * In that case, we leave r unchanged.
          */
     } else {
-        divu128(&lo_value, &hi_value, 1000000000000000ULL);
+        divu128(&lo_value, &hi_value, &rem, 1000000000000000ULL);
 
-        for (i = 1; i < 16; hi_value /= 10, i++) {
-            bcd_put_digit(&ret, hi_value % 10, i);
+        for (i = 1; i < 16; rem /= 10, i++) {
+            bcd_put_digit(&ret, rem % 10, i);
         }
 
         for (; i < 32; lo_value /= 10, i++) {
diff --git a/util/host-utils.c b/util/host-utils.c
index ff75fdf1e1..80f5a45cbd 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -87,75 +87,127 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
 }
 
 /*
- * Unsigned 128-by-64 division. Returns quotient via plow and
- * remainder via phigh.
- * The result must fit in 64 bits (plow) - otherwise, the result
- * is undefined.
- * This function will cause a division by zero if passed a zero divisor.
+ * Unsigned 128-by-64 division.
+ * Returns quotient via plow and phigh.
+ * Optionally (if prem != NULL), returns the remainder via prem.
  */
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+void divu128(uint64_t *plow, uint64_t *phigh, uint64_t *prem, uint64_t divisor)
 {
     uint64_t dhi = *phigh;
     uint64_t dlo = *plow;
+    uint64_t result_bit;
+    uint64_t carry_bit = 0;
     unsigned i;
-    uint64_t carry = 0;
+    int dividend_lz_bits, divisor_lz_bits;
+    int diff_lz_bits;
 
     if (divisor == 0) {
         /* intentionally cause a division by 0 */
         *plow = 1 / divisor;
     } else if (dhi == 0) {
         *plow  = dlo / divisor;
-        *phigh = dlo % divisor;
+        *phigh = 0;
+        if (prem) {
+            *prem = dlo % divisor;
+        }
     } else {
+        dividend_lz_bits = clz64(dhi);
+        divisor_lz_bits = clz64(divisor);
+        diff_lz_bits = dividend_lz_bits - divisor_lz_bits;
 
-        for (i = 0; i < 64; i++) {
-            carry = dhi >> 63;
-            dhi = (dhi << 1) | (dlo >> 63);
-            if (carry || (dhi >= divisor)) {
+        /*
+         * Move relevant bits of dividend and divisor all the way to the left
+         */
+        if (dividend_lz_bits > 0) {
+            /* 0 < dividend_lz_bits < 64 */
+            dhi = dhi << dividend_lz_bits | dlo >> (64 - dividend_lz_bits);
+            dlo = dlo << dividend_lz_bits;
+        }
+        if (divisor_lz_bits > 0) {
+            /* 0 < divisor_lz_bits < 64 */
+            divisor = divisor << divisor_lz_bits;
+        }
+
+        for (i = 0; i < 65 - diff_lz_bits; i++) {
+            if (carry_bit || (dhi >= divisor)) {
                 dhi -= divisor;
-                carry = 1;
+                result_bit = 1;
             } else {
-                carry = 0;
+                result_bit = 0;
             }
-            dlo = (dlo << 1) | carry;
+
+            carry_bit = dhi >> 63;
+            dhi = (dhi << 1) | (dlo >> 63);
+            dlo = (dlo << 1) | result_bit;
         }
 
+        if (prem) {
+            if (divisor_lz_bits == 63) {
+                *prem = carry_bit;
+            } else {
+                *prem = carry_bit << (63 - divisor_lz_bits) |
+                    dhi >> (divisor_lz_bits + 1);
+            }
+        }
         *plow = dlo;
-        *phigh = dhi;
+        if (diff_lz_bits <= 0) {
+            *phigh = dhi & (0xffffffffffffffffULL >> (63 + diff_lz_bits));
+        } else {
+            *phigh = 0;
+        }
     }
 }
 
 /*
- * Signed 128-by-64 division. Returns quotient via plow and
- * remainder via phigh.
- * The result must fit in 64 bits (plow) - otherwise, the result
- * is undefined.
- * This function will cause a division by zero if passed a zero divisor.
+ * Signed 128-by-64 division.
+ * Returns quotient via plow and phigh.
+ * Optionally (if prem != NULL), returns the remainder via prem.
  */
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+void divs128(uint64_t *plow, int64_t *phigh, int64_t *prem, int64_t divisor)
 {
-    int sgn_dvdnd = *phigh < 0;
-    int sgn_divsr = divisor < 0;
+    int neg_quotient = 0, neg_remainder = 0;
+    uint64_t unsig_hi = *phigh, unsig_lo = *plow;
+    uint64_t rem;
 
-    if (sgn_dvdnd) {
-        *plow = ~(*plow);
-        *phigh = ~(*phigh);
-        if (*plow == (int64_t)-1) {
-            *plow = 0;
-            (*phigh)++;
-         } else {
-            (*plow)++;
-         }
+    if (*phigh < 0) {
+        neg_quotient = ~neg_quotient;
+        neg_remainder = ~neg_remainder;
+
+        if (unsig_lo == 0) {
+            unsig_hi = -unsig_hi;
+        } else {
+            unsig_hi = ~unsig_hi;
+            unsig_lo = -unsig_lo;
+        }
     }
 
-    if (sgn_divsr) {
-        divisor = 0 - divisor;
+    if (divisor < 0) {
+        neg_quotient = ~neg_quotient;
+
+        divisor = -divisor;
     }
 
-    divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
+    divu128(&unsig_lo, &unsig_hi, &rem, (uint64_t)divisor);
 
-    if (sgn_dvdnd  ^ sgn_divsr) {
-        *plow = 0 - *plow;
+    if (neg_quotient) {
+        if (unsig_lo == 0) {
+            *phigh = -unsig_hi;
+            *plow = 0;
+        } else {
+            *phigh = ~unsig_hi;
+            *plow = -unsig_lo;
+        }
+    } else {
+        *phigh = unsig_hi;
+        *plow = unsig_lo;
+    }
+
+    if (prem) {
+        if (neg_remainder) {
+            *prem = -rem;
+        } else {
+            *prem = rem;
+        }
     }
 }
 #endif
-- 
2.25.1



  parent reply	other threads:[~2021-08-24 14:30 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-24 14:27 [PATCH 00/19] target/ppc: DFP instructions using decodetree Luis Pires
2021-08-24 14:27 ` [PATCH 01/19] host-utils: Fix overflow detection in divu128() Luis Pires
2021-08-24 14:27 ` [PATCH 02/19] host-utils: move abs64() to host-utils Luis Pires
2021-08-25  3:43   ` David Gibson
2021-08-25 12:48     ` Luis Fernando Fujita Pires
2021-08-25 20:26       ` Eduardo Habkost
2021-08-25 20:37         ` Luis Fernando Fujita Pires
2021-08-25 21:18           ` Eduardo Habkost
2021-08-25 21:27             ` Philippe Mathieu-Daudé
2021-08-27 14:28               ` Luis Fernando Fujita Pires
2021-08-24 14:27 ` [PATCH 03/19] host-utils: move checks out of divu128/divs128 Luis Pires
2021-08-24 14:27 ` Luis Pires [this message]
2021-08-24 14:27 ` [PATCH 05/19] host-utils: add unit tests for divu128/divs128 Luis Pires
2021-08-24 14:27 ` [PATCH 06/19] libdecnumber: introduce decNumberFrom[U]Int128 Luis Pires
2021-08-24 14:27 ` [PATCH 07/19] target/ppc: Move REQUIRE_ALTIVEC/VECTOR to translate.c Luis Pires
2021-08-25  3:46   ` David Gibson
2021-08-24 14:27 ` [PATCH 08/19] target/ppc: Introduce REQUIRE_FPU Luis Pires
2021-08-25  3:46   ` David Gibson
2021-08-24 14:27 ` [PATCH 09/19] target/ppc: Implement DCFFIXQQ Luis Pires
2021-08-24 14:27 ` [PATCH 10/19] host-utils: Introduce mulu128 Luis Pires
2021-08-24 14:27 ` [PATCH 11/19] libdecnumber: Introduce decNumberIntegralToInt128 Luis Pires
2021-08-24 14:27 ` [PATCH 12/19] target/ppc: Implement DCTFIXQQ Luis Pires
2021-08-24 14:27 ` [PATCH 13/19] target/ppc: Move dtstdc[q]/dtstdg[q] to decodetree Luis Pires
2021-08-25 13:15   ` Philippe Mathieu-Daudé
2021-08-24 14:27 ` [PATCH 14/19] target/ppc: Move d{add, sub, mul, div, iex}[q] " Luis Pires
2021-08-25 13:16   ` Philippe Mathieu-Daudé
2021-08-24 14:27 ` [PATCH 15/19] target/ppc: Move dcmp{u, o}[q], dts{tex, tsf, tsfi}[q] " Luis Pires
2021-08-24 14:27 ` [PATCH 16/19] target/ppc: Move dquai[q], drint{x,n}[q] " Luis Pires
2021-08-25 13:18   ` Philippe Mathieu-Daudé
2021-08-24 14:27 ` [PATCH 17/19] target/ppc: Move dqua[q], drrnd[q] " Luis Pires
2021-08-25 13:18   ` Philippe Mathieu-Daudé
2021-08-24 14:27 ` [PATCH 18/19] target/ppc: Move dct{dp, qpq}, dr{sp, dpq}, dc{f, t}fix[q], dxex[q] " Luis Pires
2021-08-25 13:19   ` Philippe Mathieu-Daudé
2021-08-24 14:27 ` [PATCH 19/19] target/ppc: Move ddedpd[q], denbcd[q], dscli[q], dscri[q] " Luis Pires
2021-08-25 13:20   ` Philippe Mathieu-Daudé

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210824142730.102421-5-luis.pires@eldorado.org.br \
    --to=luis.pires@eldorado.org.br \
    --cc=david@gibson.dropbear.id.au \
    --cc=groug@kaod.org \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.