qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] target/ppc: fix setting of CR flags in bcdcfsq
@ 2021-08-23 15:02 Luis Pires
  2021-08-25  3:12 ` David Gibson
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Luis Pires @ 2021-08-23 15:02 UTC (permalink / raw)
  To: qemu-devel, qemu-ppc; +Cc: Luis Pires, david

According to the ISA, CR should be set based on the source value, and
not on the packed decimal result.
The way this was implemented would cause GT, LT and EQ to be set
incorrectly when the source value was too large and the 31 least
significant digits of the packed decimal result ended up being all zero.
This would happen for source values of +/-10^31, +/-10^32, etc.

The new implementation fixes this and also skips the result calculation
altogether in case of src overflow.

Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>
---
 target/ppc/int_helper.c | 61 ++++++++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index efa833ef64..de56056429 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -2498,10 +2498,26 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
     return cr;
 }
 
+/**
+ * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
+ *
+ * Returns:
+ * > 0 if ahi|alo > bhi|blo,
+ * 0 if ahi|alo == bhi|blo,
+ * < 0 if ahi|alo < bhi|blo
+ */
+static inline int ucmp128(uint64_t alo, uint64_t ahi,
+                          uint64_t blo, uint64_t bhi)
+{
+    return (ahi == bhi) ?
+        (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
+        (ahi > bhi ? 1 : -1);
+}
+
 uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
 {
     int i;
-    int cr = 0;
+    int cr;
     uint64_t lo_value;
     uint64_t hi_value;
     ppc_avr_t ret = { .u64 = { 0, 0 } };
@@ -2510,28 +2526,47 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
         lo_value = -b->VsrSD(1);
         hi_value = ~b->VsrD(0) + !lo_value;
         bcd_put_digit(&ret, 0xD, 0);
+
+        cr = CRF_LT;
     } else {
         lo_value = b->VsrD(1);
         hi_value = b->VsrD(0);
         bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
-    }
 
-    if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
-            lo_value > 9999999999999999ULL) {
-        cr = CRF_SO;
+        if (hi_value == 0 && lo_value == 0) {
+            cr = CRF_EQ;
+        } else {
+            cr = CRF_GT;
+        }
     }
 
-    for (i = 1; i < 16; hi_value /= 10, i++) {
-        bcd_put_digit(&ret, hi_value % 10, i);
-    }
+    /*
+     * Check src limits: abs(src) <= 10^31 - 1
+     *
+     * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
+     */
+    if (ucmp128(lo_value, hi_value,
+                0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
+        cr |= CRF_SO;
 
-    for (; i < 32; lo_value /= 10, i++) {
-        bcd_put_digit(&ret, lo_value % 10, i);
-    }
+        /*
+         * According to the ISA, if src wouldn't fit in the destination
+         * register, the result is undefined.
+         * In that case, we leave r unchanged.
+         */
+    } else {
+        divu128(&lo_value, &hi_value, 1000000000000000ULL);
 
-    cr |= bcd_cmp_zero(&ret);
+        for (i = 1; i < 16; hi_value /= 10, i++) {
+            bcd_put_digit(&ret, hi_value % 10, i);
+        }
 
-    *r = ret;
+        for (; i < 32; lo_value /= 10, i++) {
+            bcd_put_digit(&ret, lo_value % 10, i);
+        }
+
+        *r = ret;
+    }
 
     return cr;
 }
-- 
2.25.1



^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] target/ppc: fix setting of CR flags in bcdcfsq
  2021-08-23 15:02 [PATCH] target/ppc: fix setting of CR flags in bcdcfsq Luis Pires
@ 2021-08-25  3:12 ` David Gibson
  2021-09-05 10:47 ` Richard Henderson
  2021-09-06  2:55 ` David Gibson
  2 siblings, 0 replies; 4+ messages in thread
From: David Gibson @ 2021-08-25  3:12 UTC (permalink / raw)
  To: Luis Pires; +Cc: Richard Henderson, qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 3946 bytes --]

On Mon, Aug 23, 2021 at 12:02:35PM -0300, Luis Pires wrote:
> According to the ISA, CR should be set based on the source value, and
> not on the packed decimal result.
> The way this was implemented would cause GT, LT and EQ to be set
> incorrectly when the source value was too large and the 31 least
> significant digits of the packed decimal result ended up being all zero.
> This would happen for source values of +/-10^31, +/-10^32, etc.
> 
> The new implementation fixes this and also skips the result calculation
> altogether in case of src overflow.
> 
> Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>

CCing Richard Henderson.

In general Richard's probably a better person than me to review actual
ppc TCG mechanics.

> ---
>  target/ppc/int_helper.c | 61 ++++++++++++++++++++++++++++++++---------
>  1 file changed, 48 insertions(+), 13 deletions(-)
> 
> diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
> index efa833ef64..de56056429 100644
> --- a/target/ppc/int_helper.c
> +++ b/target/ppc/int_helper.c
> @@ -2498,10 +2498,26 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
>      return cr;
>  }
>  
> +/**
> + * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
> + *
> + * Returns:
> + * > 0 if ahi|alo > bhi|blo,
> + * 0 if ahi|alo == bhi|blo,
> + * < 0 if ahi|alo < bhi|blo
> + */
> +static inline int ucmp128(uint64_t alo, uint64_t ahi,
> +                          uint64_t blo, uint64_t bhi)
> +{
> +    return (ahi == bhi) ?
> +        (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
> +        (ahi > bhi ? 1 : -1);
> +}
> +
>  uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
>  {
>      int i;
> -    int cr = 0;
> +    int cr;
>      uint64_t lo_value;
>      uint64_t hi_value;
>      ppc_avr_t ret = { .u64 = { 0, 0 } };
> @@ -2510,28 +2526,47 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
>          lo_value = -b->VsrSD(1);
>          hi_value = ~b->VsrD(0) + !lo_value;
>          bcd_put_digit(&ret, 0xD, 0);
> +
> +        cr = CRF_LT;
>      } else {
>          lo_value = b->VsrD(1);
>          hi_value = b->VsrD(0);
>          bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
> -    }
>  
> -    if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
> -            lo_value > 9999999999999999ULL) {
> -        cr = CRF_SO;
> +        if (hi_value == 0 && lo_value == 0) {
> +            cr = CRF_EQ;
> +        } else {
> +            cr = CRF_GT;
> +        }
>      }
>  
> -    for (i = 1; i < 16; hi_value /= 10, i++) {
> -        bcd_put_digit(&ret, hi_value % 10, i);
> -    }
> +    /*
> +     * Check src limits: abs(src) <= 10^31 - 1
> +     *
> +     * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
> +     */
> +    if (ucmp128(lo_value, hi_value,
> +                0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
> +        cr |= CRF_SO;
>  
> -    for (; i < 32; lo_value /= 10, i++) {
> -        bcd_put_digit(&ret, lo_value % 10, i);
> -    }
> +        /*
> +         * According to the ISA, if src wouldn't fit in the destination
> +         * register, the result is undefined.
> +         * In that case, we leave r unchanged.
> +         */
> +    } else {
> +        divu128(&lo_value, &hi_value, 1000000000000000ULL);
>  
> -    cr |= bcd_cmp_zero(&ret);
> +        for (i = 1; i < 16; hi_value /= 10, i++) {
> +            bcd_put_digit(&ret, hi_value % 10, i);
> +        }
>  
> -    *r = ret;
> +        for (; i < 32; lo_value /= 10, i++) {
> +            bcd_put_digit(&ret, lo_value % 10, i);
> +        }
> +
> +        *r = ret;
> +    }
>  
>      return cr;
>  }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] target/ppc: fix setting of CR flags in bcdcfsq
  2021-08-23 15:02 [PATCH] target/ppc: fix setting of CR flags in bcdcfsq Luis Pires
  2021-08-25  3:12 ` David Gibson
@ 2021-09-05 10:47 ` Richard Henderson
  2021-09-06  2:55 ` David Gibson
  2 siblings, 0 replies; 4+ messages in thread
From: Richard Henderson @ 2021-09-05 10:47 UTC (permalink / raw)
  To: Luis Pires, qemu-devel, qemu-ppc; +Cc: david

On 8/23/21 5:02 PM, Luis Pires wrote:
> According to the ISA, CR should be set based on the source value, and
> not on the packed decimal result.
> The way this was implemented would cause GT, LT and EQ to be set
> incorrectly when the source value was too large and the 31 least
> significant digits of the packed decimal result ended up being all zero.
> This would happen for source values of +/-10^31, +/-10^32, etc.
> 
> The new implementation fixes this and also skips the result calculation
> altogether in case of src overflow.
> 
> Signed-off-by: Luis Pires<luis.pires@eldorado.org.br>
> ---
>   target/ppc/int_helper.c | 61 ++++++++++++++++++++++++++++++++---------
>   1 file changed, 48 insertions(+), 13 deletions(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>

r~


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] target/ppc: fix setting of CR flags in bcdcfsq
  2021-08-23 15:02 [PATCH] target/ppc: fix setting of CR flags in bcdcfsq Luis Pires
  2021-08-25  3:12 ` David Gibson
  2021-09-05 10:47 ` Richard Henderson
@ 2021-09-06  2:55 ` David Gibson
  2 siblings, 0 replies; 4+ messages in thread
From: David Gibson @ 2021-09-06  2:55 UTC (permalink / raw)
  To: Luis Pires; +Cc: qemu-ppc, qemu-devel

[-- Attachment #1: Type: text/plain, Size: 3859 bytes --]

On Mon, Aug 23, 2021 at 12:02:35PM -0300, Luis Pires wrote:
> According to the ISA, CR should be set based on the source value, and
> not on the packed decimal result.
> The way this was implemented would cause GT, LT and EQ to be set
> incorrectly when the source value was too large and the 31 least
> significant digits of the packed decimal result ended up being all zero.
> This would happen for source values of +/-10^31, +/-10^32, etc.
> 
> The new implementation fixes this and also skips the result calculation
> altogether in case of src overflow.
> 
> Signed-off-by: Luis Pires <luis.pires@eldorado.org.br>

Applied to ppc-for-6.2, thanks.

> ---
>  target/ppc/int_helper.c | 61 ++++++++++++++++++++++++++++++++---------
>  1 file changed, 48 insertions(+), 13 deletions(-)
> 
> diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
> index efa833ef64..de56056429 100644
> --- a/target/ppc/int_helper.c
> +++ b/target/ppc/int_helper.c
> @@ -2498,10 +2498,26 @@ uint32_t helper_bcdctz(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
>      return cr;
>  }
>  
> +/**
> + * Compare 2 128-bit unsigned integers, passed in as unsigned 64-bit pairs
> + *
> + * Returns:
> + * > 0 if ahi|alo > bhi|blo,
> + * 0 if ahi|alo == bhi|blo,
> + * < 0 if ahi|alo < bhi|blo
> + */
> +static inline int ucmp128(uint64_t alo, uint64_t ahi,
> +                          uint64_t blo, uint64_t bhi)
> +{
> +    return (ahi == bhi) ?
> +        (alo > blo ? 1 : (alo == blo ? 0 : -1)) :
> +        (ahi > bhi ? 1 : -1);
> +}
> +
>  uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
>  {
>      int i;
> -    int cr = 0;
> +    int cr;
>      uint64_t lo_value;
>      uint64_t hi_value;
>      ppc_avr_t ret = { .u64 = { 0, 0 } };
> @@ -2510,28 +2526,47 @@ uint32_t helper_bcdcfsq(ppc_avr_t *r, ppc_avr_t *b, uint32_t ps)
>          lo_value = -b->VsrSD(1);
>          hi_value = ~b->VsrD(0) + !lo_value;
>          bcd_put_digit(&ret, 0xD, 0);
> +
> +        cr = CRF_LT;
>      } else {
>          lo_value = b->VsrD(1);
>          hi_value = b->VsrD(0);
>          bcd_put_digit(&ret, bcd_preferred_sgn(0, ps), 0);
> -    }
>  
> -    if (divu128(&lo_value, &hi_value, 1000000000000000ULL) ||
> -            lo_value > 9999999999999999ULL) {
> -        cr = CRF_SO;
> +        if (hi_value == 0 && lo_value == 0) {
> +            cr = CRF_EQ;
> +        } else {
> +            cr = CRF_GT;
> +        }
>      }
>  
> -    for (i = 1; i < 16; hi_value /= 10, i++) {
> -        bcd_put_digit(&ret, hi_value % 10, i);
> -    }
> +    /*
> +     * Check src limits: abs(src) <= 10^31 - 1
> +     *
> +     * 10^31 - 1 = 0x0000007e37be2022 c0914b267fffffff
> +     */
> +    if (ucmp128(lo_value, hi_value,
> +                0xc0914b267fffffffULL, 0x7e37be2022ULL) > 0) {
> +        cr |= CRF_SO;
>  
> -    for (; i < 32; lo_value /= 10, i++) {
> -        bcd_put_digit(&ret, lo_value % 10, i);
> -    }
> +        /*
> +         * According to the ISA, if src wouldn't fit in the destination
> +         * register, the result is undefined.
> +         * In that case, we leave r unchanged.
> +         */
> +    } else {
> +        divu128(&lo_value, &hi_value, 1000000000000000ULL);
>  
> -    cr |= bcd_cmp_zero(&ret);
> +        for (i = 1; i < 16; hi_value /= 10, i++) {
> +            bcd_put_digit(&ret, hi_value % 10, i);
> +        }
>  
> -    *r = ret;
> +        for (; i < 32; lo_value /= 10, i++) {
> +            bcd_put_digit(&ret, lo_value % 10, i);
> +        }
> +
> +        *r = ret;
> +    }
>  
>      return cr;
>  }

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-09-06  3:04 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-23 15:02 [PATCH] target/ppc: fix setting of CR flags in bcdcfsq Luis Pires
2021-08-25  3:12 ` David Gibson
2021-09-05 10:47 ` Richard Henderson
2021-09-06  2:55 ` David Gibson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).