All of lore.kernel.org
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: Luis Pires <luis.pires@eldorado.org.br>
Subject: [PULL 04/56] host-utils: add 128-bit quotient support to divu128/divs128
Date: Wed, 27 Oct 2021 19:40:39 -0700	[thread overview]
Message-ID: <20211028024131.1492790-5-richard.henderson@linaro.org> (raw)
In-Reply-To: <20211028024131.1492790-1-richard.henderson@linaro.org>

From: Luis Pires <luis.pires@eldorado.org.br>

These will be used to implement new decimal floating point
instructions from Power ISA 3.1.

The remainder is now returned directly by divu128/divs128,
freeing up phigh to receive the high 64 bits of the quotient.

Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20211025191154.350831-4-luis.pires@eldorado.org.br>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/hw/clock.h        |   6 +-
 include/qemu/host-utils.h |  20 ++++--
 target/ppc/int_helper.c   |   9 +--
 util/host-utils.c         | 133 +++++++++++++++++++++++++-------------
 4 files changed, 108 insertions(+), 60 deletions(-)

diff --git a/include/hw/clock.h b/include/hw/clock.h
index 7443e6c4ab..5c927cee7f 100644
--- a/include/hw/clock.h
+++ b/include/hw/clock.h
@@ -323,11 +323,7 @@ static inline uint64_t clock_ns_to_ticks(const Clock *clk, uint64_t ns)
     if (clk->period == 0) {
         return 0;
     }
-    /*
-     * BUG: when CONFIG_INT128 is not defined, the current implementation of
-     * divu128 does not return a valid truncated quotient, so the result will
-     * be wrong.
-     */
+
     divu128(&lo, &hi, clk->period);
     return lo;
 }
diff --git a/include/qemu/host-utils.h b/include/qemu/host-utils.h
index 08a17e16e5..a3a7ced78d 100644
--- a/include/qemu/host-utils.h
+++ b/include/qemu/host-utils.h
@@ -56,26 +56,32 @@ static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
     return (__int128_t)a * b / c;
 }
 
-static inline void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+static inline uint64_t divu128(uint64_t *plow, uint64_t *phigh,
+                               uint64_t divisor)
 {
     __uint128_t dividend = ((__uint128_t)*phigh << 64) | *plow;
     __uint128_t result = dividend / divisor;
+
     *plow = result;
-    *phigh = dividend % divisor;
+    *phigh = result >> 64;
+    return dividend % divisor;
 }
 
-static inline void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+static inline int64_t divs128(uint64_t *plow, int64_t *phigh,
+                              int64_t divisor)
 {
-    __int128_t dividend = ((__int128_t)*phigh << 64) | (uint64_t)*plow;
+    __int128_t dividend = ((__int128_t)*phigh << 64) | *plow;
     __int128_t result = dividend / divisor;
+
     *plow = result;
-    *phigh = dividend % divisor;
+    *phigh = result >> 64;
+    return dividend % divisor;
 }
 #else
 void muls64(uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b);
 void mulu64(uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b);
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor);
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor);
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor);
 
 static inline uint64_t muldiv64(uint64_t a, uint32_t b, uint32_t c)
 {
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index 510faf24cf..eeb7781a9e 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -120,7 +120,7 @@ uint64_t helper_divdeu(CPUPPCState *env, uint64_t ra, uint64_t rb, uint32_t oe)
 
 uint64_t helper_divde(CPUPPCState *env, uint64_t rau, uint64_t rbu, uint32_t oe)
 {
-    int64_t rt = 0;
+    uint64_t rt = 0;
     int64_t ra = (int64_t)rau;
     int64_t rb = (int64_t)rbu;
     int overflow = 0;
@@ -2506,6 +2506,7 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
     int cr;
     uint64_t lo_value;
     uint64_t hi_value;
+    uint64_t rem;
     ppc_avr_t ret = { .u64 = { 0, 0 } };
 
     if (b->VsrSD(0) < 0) {
@@ -2541,10 +2542,10 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
          * In that case, we leave r unchanged.
          */
     } else {
-        divu128(&lo_value, &hi_value, 1000000000000000ULL);
+        rem = divu128(&lo_value, &hi_value, 1000000000000000ULL);
 
-        for (i = 1; i < 16; hi_value /= 10, i++) {
-            bcd_put_digit(&ret, hi_value % 10, i);
+        for (i = 1; i < 16; rem /= 10, i++) {
+            bcd_put_digit(&ret, rem % 10, i);
         }
 
         for (; i < 32; lo_value /= 10, i++) {
diff --git a/util/host-utils.c b/util/host-utils.c
index 701a371843..bcc772b8ec 100644
--- a/util/host-utils.c
+++ b/util/host-utils.c
@@ -87,72 +87,117 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
 }
 
 /*
- * Unsigned 128-by-64 division. Returns quotient via plow and
- * remainder via phigh.
- * The result must fit in 64 bits (plow) - otherwise, the result
- * is undefined.
- * This function will cause a division by zero if passed a zero divisor.
+ * Unsigned 128-by-64 division.
+ * Returns the remainder.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
  */
-void divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+uint64_t divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
 {
     uint64_t dhi = *phigh;
     uint64_t dlo = *plow;
-    unsigned i;
-    uint64_t carry = 0;
+    uint64_t rem, dhighest;
+    int sh;
 
     if (divisor == 0 || dhi == 0) {
         *plow  = dlo / divisor;
-        *phigh = dlo % divisor;
+        *phigh = 0;
+        return dlo % divisor;
     } else {
+        sh = clz64(divisor);
 
-        for (i = 0; i < 64; i++) {
-            carry = dhi >> 63;
-            dhi = (dhi << 1) | (dlo >> 63);
-            if (carry || (dhi >= divisor)) {
-                dhi -= divisor;
-                carry = 1;
-            } else {
-                carry = 0;
+        if (dhi < divisor) {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
             }
-            dlo = (dlo << 1) | carry;
+
+            *phigh = 0;
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
+        } else {
+            if (sh != 0) {
+                /* normalize the divisor, shifting the dividend accordingly */
+                divisor <<= sh;
+                dhighest = dhi >> (64 - sh);
+                dhi = (dhi << sh) | (dlo >> (64 - sh));
+                dlo <<= sh;
+
+                *phigh = udiv_qrnnd(&dhi, dhighest, dhi, divisor);
+            } else {
+                /**
+                 * dhi >= divisor
+                 * Since the MSB of divisor is set (sh == 0),
+                 * (dhi - divisor) < divisor
+                 *
+                 * Thus, the high part of the quotient is 1, and we can
+                 * calculate the low part with a single call to udiv_qrnnd
+                 * after subtracting divisor from dhi
+                 */
+                dhi -= divisor;
+                *phigh = 1;
+            }
+
+            *plow = udiv_qrnnd(&rem, dhi, dlo, divisor);
         }
 
-        *plow = dlo;
-        *phigh = dhi;
+        /*
+         * since the dividend/divisor might have been normalized,
+         * the remainder might also have to be shifted back
+         */
+        return rem >> sh;
     }
 }
 
 /*
- * Signed 128-by-64 division. Returns quotient via plow and
- * remainder via phigh.
- * The result must fit in 64 bits (plow) - otherwise, the result
- * is undefined.
- * This function will cause a division by zero if passed a zero divisor.
+ * Signed 128-by-64 division.
+ * Returns quotient via plow and phigh.
+ * Also returns the remainder via the function return value.
  */
-void divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+int64_t divs128(uint64_t *plow, int64_t *phigh, int64_t divisor)
 {
-    int sgn_dvdnd = *phigh < 0;
-    int sgn_divsr = divisor < 0;
+    bool neg_quotient = false, neg_remainder = false;
+    uint64_t unsig_hi = *phigh, unsig_lo = *plow;
+    uint64_t rem;
 
-    if (sgn_dvdnd) {
-        *plow = ~(*plow);
-        *phigh = ~(*phigh);
-        if (*plow == (int64_t)-1) {
+    if (*phigh < 0) {
+        neg_quotient = !neg_quotient;
+        neg_remainder = !neg_remainder;
+
+        if (unsig_lo == 0) {
+            unsig_hi = -unsig_hi;
+        } else {
+            unsig_hi = ~unsig_hi;
+            unsig_lo = -unsig_lo;
+        }
+    }
+
+    if (divisor < 0) {
+        neg_quotient = !neg_quotient;
+
+        divisor = -divisor;
+    }
+
+    rem = divu128(&unsig_lo, &unsig_hi, (uint64_t)divisor);
+
+    if (neg_quotient) {
+        if (unsig_lo == 0) {
+            *phigh = -unsig_hi;
             *plow = 0;
-            (*phigh)++;
-         } else {
-            (*plow)++;
-         }
+        } else {
+            *phigh = ~unsig_hi;
+            *plow = -unsig_lo;
+        }
+    } else {
+        *phigh = unsig_hi;
+        *plow = unsig_lo;
     }
 
-    if (sgn_divsr) {
-        divisor = 0 - divisor;
-    }
-
-    divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
-
-    if (sgn_dvdnd  ^ sgn_divsr) {
-        *plow = 0 - *plow;
+    if (neg_remainder) {
+        return -rem;
+    } else {
+        return rem;
     }
 }
 #endif
-- 
2.25.1



  parent reply	other threads:[~2021-10-28  2:57 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-28  2:40 [PULL 00/56] tcg patch queue Richard Henderson
2021-10-28  2:40 ` [PULL 01/56] qemu/int128: Add int128_{not,xor} Richard Henderson
2021-10-28  2:40 ` [PULL 02/56] host-utils: move checks out of divu128/divs128 Richard Henderson
2021-10-28  2:40 ` [PULL 03/56] host-utils: move udiv_qrnnd() to host-utils Richard Henderson
2021-10-28  2:40 ` Richard Henderson [this message]
2021-10-28  2:40 ` [PULL 05/56] host-utils: add unit tests for divu128/divs128 Richard Henderson
2021-10-28  2:40 ` [PULL 06/56] tcg/optimize: Rename "mask" to "z_mask" Richard Henderson
2021-10-28  2:40 ` [PULL 07/56] tcg/optimize: Split out OptContext Richard Henderson
2021-10-28  2:40 ` [PULL 08/56] tcg/optimize: Remove do_default label Richard Henderson
2021-10-28  2:40 ` [PULL 09/56] tcg/optimize: Change tcg_opt_gen_{mov,movi} interface Richard Henderson
2021-10-28  2:40 ` [PULL 10/56] tcg/optimize: Move prev_mb into OptContext Richard Henderson
2021-10-28  2:40 ` [PULL 11/56] tcg/optimize: Split out init_arguments Richard Henderson
2021-10-28  2:40 ` [PULL 12/56] tcg/optimize: Split out copy_propagate Richard Henderson
2021-10-28  2:40 ` [PULL 13/56] tcg/optimize: Split out fold_call Richard Henderson
2021-10-28  2:40 ` [PULL 14/56] tcg/optimize: Drop nb_oargs, nb_iargs locals Richard Henderson
2021-10-28  2:40 ` [PULL 15/56] tcg/optimize: Change fail return for do_constant_folding_cond* Richard Henderson
2021-10-28  2:40 ` [PULL 16/56] tcg/optimize: Return true from tcg_opt_gen_{mov,movi} Richard Henderson
2021-10-28  2:40 ` [PULL 17/56] tcg/optimize: Split out finish_folding Richard Henderson
2021-10-28  2:40 ` [PULL 18/56] tcg/optimize: Use a boolean to avoid a mass of continues Richard Henderson
2021-10-28  2:40 ` [PULL 19/56] tcg/optimize: Split out fold_mb, fold_qemu_{ld,st} Richard Henderson
2021-10-28  2:40 ` [PULL 20/56] tcg/optimize: Split out fold_const{1,2} Richard Henderson
2021-10-28  2:40 ` [PULL 21/56] tcg/optimize: Split out fold_setcond2 Richard Henderson
2021-10-28  2:40 ` [PULL 22/56] tcg/optimize: Split out fold_brcond2 Richard Henderson
2021-10-28  2:40 ` [PULL 23/56] tcg/optimize: Split out fold_brcond Richard Henderson
2021-10-28  2:40 ` [PULL 24/56] tcg/optimize: Split out fold_setcond Richard Henderson
2021-10-28  2:41 ` [PULL 25/56] tcg/optimize: Split out fold_mulu2_i32 Richard Henderson
2021-10-28  2:41 ` [PULL 26/56] tcg/optimize: Split out fold_addsub2_i32 Richard Henderson
2021-10-28  2:41 ` [PULL 27/56] tcg/optimize: Split out fold_movcond Richard Henderson
2021-10-28  2:41 ` [PULL 28/56] tcg/optimize: Split out fold_extract2 Richard Henderson
2021-10-28  2:41 ` [PULL 29/56] tcg/optimize: Split out fold_extract, fold_sextract Richard Henderson
2021-10-28  2:41 ` [PULL 30/56] tcg/optimize: Split out fold_deposit Richard Henderson
2021-10-28  2:41 ` [PULL 31/56] tcg/optimize: Split out fold_count_zeros Richard Henderson
2021-10-28  2:41 ` [PULL 32/56] tcg/optimize: Split out fold_bswap Richard Henderson
2021-10-28  2:41 ` [PULL 33/56] tcg/optimize: Split out fold_dup, fold_dup2 Richard Henderson
2021-10-28  2:41 ` [PULL 34/56] tcg/optimize: Split out fold_mov Richard Henderson
2021-10-28  2:41 ` [PULL 35/56] tcg/optimize: Split out fold_xx_to_i Richard Henderson
2021-10-28  2:41 ` [PULL 36/56] tcg/optimize: Split out fold_xx_to_x Richard Henderson
2021-10-28  2:41 ` [PULL 37/56] tcg/optimize: Split out fold_xi_to_i Richard Henderson
2021-10-28  2:41 ` [PULL 38/56] tcg/optimize: Add type to OptContext Richard Henderson
2021-10-28  2:41 ` [PULL 39/56] tcg/optimize: Split out fold_to_not Richard Henderson
2021-10-28  2:41 ` [PULL 40/56] tcg/optimize: Split out fold_sub_to_neg Richard Henderson
2021-10-28  2:41 ` [PULL 41/56] tcg/optimize: Split out fold_xi_to_x Richard Henderson
2021-10-28  2:41 ` [PULL 42/56] tcg/optimize: Split out fold_ix_to_i Richard Henderson
2021-10-28  2:41 ` [PULL 43/56] tcg/optimize: Split out fold_masks Richard Henderson
2021-10-28  2:41 ` [PULL 44/56] tcg/optimize: Expand fold_mulu2_i32 to all 4-arg multiplies Richard Henderson
2021-10-28  2:41 ` [PULL 45/56] tcg/optimize: Expand fold_addsub2_i32 to 64-bit ops Richard Henderson
2021-10-28  2:41 ` [PULL 46/56] tcg/optimize: Sink commutative operand swapping into fold functions Richard Henderson
2021-10-28  2:41 ` [PULL 47/56] tcg/optimize: Stop forcing z_mask to "garbage" for 32-bit values Richard Henderson
2021-10-28  2:41 ` [PULL 48/56] tcg/optimize: Use fold_xx_to_i for orc Richard Henderson
2021-10-28  2:41 ` [PULL 49/56] tcg/optimize: Use fold_xi_to_x for mul Richard Henderson
2021-10-28  2:41 ` [PULL 50/56] tcg/optimize: Use fold_xi_to_x for div Richard Henderson
2021-10-28  2:41 ` [PULL 51/56] tcg/optimize: Use fold_xx_to_i for rem Richard Henderson
2021-10-28  2:41 ` [PULL 52/56] tcg/optimize: Optimize sign extensions Richard Henderson
2021-10-28  2:41 ` [PULL 53/56] tcg/optimize: Propagate sign info for logical operations Richard Henderson
2021-10-28  2:41 ` [PULL 54/56] tcg/optimize: Propagate sign info for setcond Richard Henderson
2021-10-28  2:41 ` [PULL 55/56] tcg/optimize: Propagate sign info for bit counting Richard Henderson
2021-10-28  2:41 ` [PULL 56/56] tcg/optimize: Propagate sign info for shifting Richard Henderson
2021-10-28 14:51 ` [PULL 00/56] tcg patch queue Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211028024131.1492790-5-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=luis.pires@eldorado.org.br \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.