All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF
@ 2023-05-23 20:25 Richard Henderson
  2023-05-23 22:50 ` BALATON Zoltan
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Richard Henderson @ 2023-05-23 20:25 UTC (permalink / raw)
  To: qemu-devel; +Cc: alex.bennee, qemu-ppc, balaton

Instead of computing an artifical "class" bitmask then
converting that to the fprf value, compute the final
value from the start.

Reorder the tests to check the most likely cases first.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 target/ppc/fpu_helper.c | 78 ++++++++++++-----------------------------
 1 file changed, 22 insertions(+), 56 deletions(-)

diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
index a66e16c212..03150a0f10 100644
--- a/target/ppc/fpu_helper.c
+++ b/target/ppc/fpu_helper.c
@@ -141,62 +141,28 @@ static inline int ppc_float64_get_unbiased_exp(float64 f)
     return ((f >> 52) & 0x7FF) - 1023;
 }
 
-/* Classify a floating-point number.  */
-enum {
-    is_normal   = 1,
-    is_zero     = 2,
-    is_denormal = 4,
-    is_inf      = 8,
-    is_qnan     = 16,
-    is_snan     = 32,
-    is_neg      = 64,
-};
-
-#define COMPUTE_CLASS(tp)                                      \
-static int tp##_classify(tp arg)                               \
-{                                                              \
-    int ret = tp##_is_neg(arg) * is_neg;                       \
-    if (unlikely(tp##_is_any_nan(arg))) {                      \
-        float_status dummy = { };  /* snan_bit_is_one = 0 */   \
-        ret |= (tp##_is_signaling_nan(arg, &dummy)             \
-                ? is_snan : is_qnan);                          \
-    } else if (unlikely(tp##_is_infinity(arg))) {              \
-        ret |= is_inf;                                         \
-    } else if (tp##_is_zero(arg)) {                            \
-        ret |= is_zero;                                        \
-    } else if (tp##_is_zero_or_denormal(arg)) {                \
-        ret |= is_denormal;                                    \
-    } else {                                                   \
-        ret |= is_normal;                                      \
-    }                                                          \
-    return ret;                                                \
-}
-
-COMPUTE_CLASS(float16)
-COMPUTE_CLASS(float32)
-COMPUTE_CLASS(float64)
-COMPUTE_CLASS(float128)
-
-static void set_fprf_from_class(CPUPPCState *env, int class)
-{
-    static const uint8_t fprf[6][2] = {
-        { 0x04, 0x08 },  /* normalized */
-        { 0x02, 0x12 },  /* zero */
-        { 0x14, 0x18 },  /* denormalized */
-        { 0x05, 0x09 },  /* infinity */
-        { 0x11, 0x11 },  /* qnan */
-        { 0x00, 0x00 },  /* snan -- flags are undefined */
-    };
-    bool isneg = class & is_neg;
-
-    env->fpscr &= ~FP_FPRF;
-    env->fpscr |= fprf[ctz32(class)][isneg] << FPSCR_FPRF;
-}
-
-#define COMPUTE_FPRF(tp)                                \
-void helper_compute_fprf_##tp(CPUPPCState *env, tp arg) \
-{                                                       \
-    set_fprf_from_class(env, tp##_classify(arg));       \
+#define COMPUTE_FPRF(tp)                                          \
+void helper_compute_fprf_##tp(CPUPPCState *env, tp arg)           \
+{                                                                 \
+    bool neg = tp##_is_neg(arg);                                  \
+    target_ulong fprf;                                            \
+    if (likely(tp##_is_normal(arg))) {                            \
+        fprf = neg ? 0x08 << FPSCR_FPRF : 0x04 << FPSCR_FPRF;     \
+    } else if (tp##_is_zero(arg)) {                               \
+        fprf = neg ? 0x12 << FPSCR_FPRF : 0x02 << FPSCR_FPRF;     \
+    } else if (tp##_is_zero_or_denormal(arg)) {                   \
+        fprf = neg ? 0x18 << FPSCR_FPRF : 0x14 << FPSCR_FPRF;     \
+    } else if (tp##_is_infinity(arg)) {                           \
+        fprf = neg ? 0x09 << FPSCR_FPRF : 0x05 << FPSCR_FPRF;     \
+    } else {                                                      \
+        float_status dummy = { };  /* snan_bit_is_one = 0 */      \
+        if (tp##_is_signaling_nan(arg, &dummy)) {                 \
+            fprf = 0x00 << FPSCR_FPRF;                            \
+        } else {                                                  \
+            fprf = 0x11 << FPSCR_FPRF;                            \
+        }                                                         \
+    }                                                             \
+    env->fpscr = (env->fpscr & ~FP_FPRF) | fprf;                  \
 }
 
 COMPUTE_FPRF(float16)
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF
  2023-05-23 20:25 [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF Richard Henderson
@ 2023-05-23 22:50 ` BALATON Zoltan
  2023-05-23 23:02 ` BALATON Zoltan
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: BALATON Zoltan @ 2023-05-23 22:50 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, alex.bennee, qemu-ppc

On Tue, 23 May 2023, Richard Henderson wrote:
> Instead of computing an artifical "class" bitmask then
> converting that to the fprf value, compute the final
> value from the start.
>
> Reorder the tests to check the most likely cases first.

This seems to work in that it makes the function go down in the profile 
but does not seem to gain much speed, that is still about the same. The 
profile looks like this with this patch (above these there are only 
cpu_exec TCG related functions and muladd, fmadds helper but all with 
less self percent, the ones with above 5% self count are all here):

children   self   command          symbol
10.54%     3.08%  qemu-system-ppc  float64r32_round_pack_canonical
  8.67%     0.43%  qemu-system-ppc  parts64_uncanon
  8.50%     0.62%  qemu-system-ppc  helper_fmuls
  7.96%     7.96%  qemu-system-ppc  parts64_uncanon_normal
  7.84%     1.78%  qemu-system-ppc  float64r32_mul
  7.77%     6.69%  qemu-system-ppc  parts64_muladd
  7.72%     1.88%  qemu-system-ppc  helper_float_check_status
  7.70%     7.20%  qemu-system-ppc  helper_compute_fprf_float64
  6.67%     0.48%  qemu-system-ppc  helper_fadds

Regards,
BALATON Zoltan

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/ppc/fpu_helper.c | 78 ++++++++++++-----------------------------
> 1 file changed, 22 insertions(+), 56 deletions(-)
>
> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
> index a66e16c212..03150a0f10 100644
> --- a/target/ppc/fpu_helper.c
> +++ b/target/ppc/fpu_helper.c
> @@ -141,62 +141,28 @@ static inline int ppc_float64_get_unbiased_exp(float64 f)
>     return ((f >> 52) & 0x7FF) - 1023;
> }
>
> -/* Classify a floating-point number.  */
> -enum {
> -    is_normal   = 1,
> -    is_zero     = 2,
> -    is_denormal = 4,
> -    is_inf      = 8,
> -    is_qnan     = 16,
> -    is_snan     = 32,
> -    is_neg      = 64,
> -};
> -
> -#define COMPUTE_CLASS(tp)                                      \
> -static int tp##_classify(tp arg)                               \
> -{                                                              \
> -    int ret = tp##_is_neg(arg) * is_neg;                       \
> -    if (unlikely(tp##_is_any_nan(arg))) {                      \
> -        float_status dummy = { };  /* snan_bit_is_one = 0 */   \
> -        ret |= (tp##_is_signaling_nan(arg, &dummy)             \
> -                ? is_snan : is_qnan);                          \
> -    } else if (unlikely(tp##_is_infinity(arg))) {              \
> -        ret |= is_inf;                                         \
> -    } else if (tp##_is_zero(arg)) {                            \
> -        ret |= is_zero;                                        \
> -    } else if (tp##_is_zero_or_denormal(arg)) {                \
> -        ret |= is_denormal;                                    \
> -    } else {                                                   \
> -        ret |= is_normal;                                      \
> -    }                                                          \
> -    return ret;                                                \
> -}
> -
> -COMPUTE_CLASS(float16)
> -COMPUTE_CLASS(float32)
> -COMPUTE_CLASS(float64)
> -COMPUTE_CLASS(float128)
> -
> -static void set_fprf_from_class(CPUPPCState *env, int class)
> -{
> -    static const uint8_t fprf[6][2] = {
> -        { 0x04, 0x08 },  /* normalized */
> -        { 0x02, 0x12 },  /* zero */
> -        { 0x14, 0x18 },  /* denormalized */
> -        { 0x05, 0x09 },  /* infinity */
> -        { 0x11, 0x11 },  /* qnan */
> -        { 0x00, 0x00 },  /* snan -- flags are undefined */
> -    };
> -    bool isneg = class & is_neg;
> -
> -    env->fpscr &= ~FP_FPRF;
> -    env->fpscr |= fprf[ctz32(class)][isneg] << FPSCR_FPRF;
> -}
> -
> -#define COMPUTE_FPRF(tp)                                \
> -void helper_compute_fprf_##tp(CPUPPCState *env, tp arg) \
> -{                                                       \
> -    set_fprf_from_class(env, tp##_classify(arg));       \
> +#define COMPUTE_FPRF(tp)                                          \
> +void helper_compute_fprf_##tp(CPUPPCState *env, tp arg)           \
> +{                                                                 \
> +    bool neg = tp##_is_neg(arg);                                  \
> +    target_ulong fprf;                                            \
> +    if (likely(tp##_is_normal(arg))) {                            \
> +        fprf = neg ? 0x08 << FPSCR_FPRF : 0x04 << FPSCR_FPRF;     \
> +    } else if (tp##_is_zero(arg)) {                               \
> +        fprf = neg ? 0x12 << FPSCR_FPRF : 0x02 << FPSCR_FPRF;     \
> +    } else if (tp##_is_zero_or_denormal(arg)) {                   \
> +        fprf = neg ? 0x18 << FPSCR_FPRF : 0x14 << FPSCR_FPRF;     \
> +    } else if (tp##_is_infinity(arg)) {                           \
> +        fprf = neg ? 0x09 << FPSCR_FPRF : 0x05 << FPSCR_FPRF;     \
> +    } else {                                                      \
> +        float_status dummy = { };  /* snan_bit_is_one = 0 */      \
> +        if (tp##_is_signaling_nan(arg, &dummy)) {                 \
> +            fprf = 0x00 << FPSCR_FPRF;                            \
> +        } else {                                                  \
> +            fprf = 0x11 << FPSCR_FPRF;                            \
> +        }                                                         \
> +    }                                                             \
> +    env->fpscr = (env->fpscr & ~FP_FPRF) | fprf;                  \
> }
>
> COMPUTE_FPRF(float16)
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF
  2023-05-23 20:25 [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF Richard Henderson
  2023-05-23 22:50 ` BALATON Zoltan
@ 2023-05-23 23:02 ` BALATON Zoltan
  2023-05-23 23:06   ` Richard Henderson
  2023-05-24 13:18 ` Alex Bennée
  2023-05-28 10:05 ` Daniel Henrique Barboza
  3 siblings, 1 reply; 7+ messages in thread
From: BALATON Zoltan @ 2023-05-23 23:02 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, alex.bennee, qemu-ppc

On Tue, 23 May 2023, Richard Henderson wrote:
> Instead of computing an artifical "class" bitmask then
> converting that to the fprf value, compute the final
> value from the start.
>
> Reorder the tests to check the most likely cases first.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/ppc/fpu_helper.c | 78 ++++++++++++-----------------------------
> 1 file changed, 22 insertions(+), 56 deletions(-)
>
> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
> index a66e16c212..03150a0f10 100644
> --- a/target/ppc/fpu_helper.c
> +++ b/target/ppc/fpu_helper.c
> @@ -141,62 +141,28 @@ static inline int ppc_float64_get_unbiased_exp(float64 f)
>     return ((f >> 52) & 0x7FF) - 1023;
> }
>
> -/* Classify a floating-point number.  */
> -enum {
> -    is_normal   = 1,
> -    is_zero     = 2,
> -    is_denormal = 4,
> -    is_inf      = 8,
> -    is_qnan     = 16,
> -    is_snan     = 32,
> -    is_neg      = 64,
> -};
> -
> -#define COMPUTE_CLASS(tp)                                      \
> -static int tp##_classify(tp arg)                               \
> -{                                                              \
> -    int ret = tp##_is_neg(arg) * is_neg;                       \
> -    if (unlikely(tp##_is_any_nan(arg))) {                      \
> -        float_status dummy = { };  /* snan_bit_is_one = 0 */   \
> -        ret |= (tp##_is_signaling_nan(arg, &dummy)             \
> -                ? is_snan : is_qnan);                          \
> -    } else if (unlikely(tp##_is_infinity(arg))) {              \
> -        ret |= is_inf;                                         \
> -    } else if (tp##_is_zero(arg)) {                            \
> -        ret |= is_zero;                                        \
> -    } else if (tp##_is_zero_or_denormal(arg)) {                \
> -        ret |= is_denormal;                                    \
> -    } else {                                                   \
> -        ret |= is_normal;                                      \
> -    }                                                          \
> -    return ret;                                                \
> -}
> -
> -COMPUTE_CLASS(float16)
> -COMPUTE_CLASS(float32)
> -COMPUTE_CLASS(float64)
> -COMPUTE_CLASS(float128)
> -
> -static void set_fprf_from_class(CPUPPCState *env, int class)
> -{
> -    static const uint8_t fprf[6][2] = {
> -        { 0x04, 0x08 },  /* normalized */
> -        { 0x02, 0x12 },  /* zero */
> -        { 0x14, 0x18 },  /* denormalized */
> -        { 0x05, 0x09 },  /* infinity */
> -        { 0x11, 0x11 },  /* qnan */
> -        { 0x00, 0x00 },  /* snan -- flags are undefined */
> -    };
> -    bool isneg = class & is_neg;
> -
> -    env->fpscr &= ~FP_FPRF;
> -    env->fpscr |= fprf[ctz32(class)][isneg] << FPSCR_FPRF;
> -}
> -
> -#define COMPUTE_FPRF(tp)                                \
> -void helper_compute_fprf_##tp(CPUPPCState *env, tp arg) \
> -{                                                       \
> -    set_fprf_from_class(env, tp##_classify(arg));       \
> +#define COMPUTE_FPRF(tp)                                          \
> +void helper_compute_fprf_##tp(CPUPPCState *env, tp arg)           \
> +{                                                                 \
> +    bool neg = tp##_is_neg(arg);                                  \
> +    target_ulong fprf;                                            \
> +    if (likely(tp##_is_normal(arg))) {                            \
> +        fprf = neg ? 0x08 << FPSCR_FPRF : 0x04 << FPSCR_FPRF;     \
> +    } else if (tp##_is_zero(arg)) {                               \
> +        fprf = neg ? 0x12 << FPSCR_FPRF : 0x02 << FPSCR_FPRF;     \
> +    } else if (tp##_is_zero_or_denormal(arg)) {                   \
> +        fprf = neg ? 0x18 << FPSCR_FPRF : 0x14 << FPSCR_FPRF;     \
> +    } else if (tp##_is_infinity(arg)) {                           \
> +        fprf = neg ? 0x09 << FPSCR_FPRF : 0x05 << FPSCR_FPRF;     \
> +    } else {                                                      \
> +        float_status dummy = { };  /* snan_bit_is_one = 0 */      \
> +        if (tp##_is_signaling_nan(arg, &dummy)) {                 \
> +            fprf = 0x00 << FPSCR_FPRF;                            \
> +        } else {                                                  \
> +            fprf = 0x11 << FPSCR_FPRF;                            \

If everything above is always shifted by FPSCR_FPRF then maybe it's easier 
to read with doing the shift once below an not in every case above.

Regards,
BALATON Zoltan

> +        }                                                         \
> +    }                                                             \
> +    env->fpscr = (env->fpscr & ~FP_FPRF) | fprf;                  \
> }
>
> COMPUTE_FPRF(float16)
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF
  2023-05-23 23:02 ` BALATON Zoltan
@ 2023-05-23 23:06   ` Richard Henderson
  2023-05-24  8:52     ` BALATON Zoltan
  0 siblings, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2023-05-23 23:06 UTC (permalink / raw)
  To: BALATON Zoltan; +Cc: qemu-devel, alex.bennee, qemu-ppc

On 5/23/23 16:02, BALATON Zoltan wrote:
> On Tue, 23 May 2023, Richard Henderson wrote:
>> Instead of computing an artifical "class" bitmask then
>> converting that to the fprf value, compute the final
>> value from the start.
>>
>> Reorder the tests to check the most likely cases first.
>>
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>> target/ppc/fpu_helper.c | 78 ++++++++++++-----------------------------
>> 1 file changed, 22 insertions(+), 56 deletions(-)
>>
>> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
>> index a66e16c212..03150a0f10 100644
>> --- a/target/ppc/fpu_helper.c
>> +++ b/target/ppc/fpu_helper.c
>> @@ -141,62 +141,28 @@ static inline int ppc_float64_get_unbiased_exp(float64 f)
>>     return ((f >> 52) & 0x7FF) - 1023;
>> }
>>
>> -/* Classify a floating-point number.  */
>> -enum {
>> -    is_normal   = 1,
>> -    is_zero     = 2,
>> -    is_denormal = 4,
>> -    is_inf      = 8,
>> -    is_qnan     = 16,
>> -    is_snan     = 32,
>> -    is_neg      = 64,
>> -};
>> -
>> -#define COMPUTE_CLASS(tp)                                      \
>> -static int tp##_classify(tp arg)                               \
>> -{                                                              \
>> -    int ret = tp##_is_neg(arg) * is_neg;                       \
>> -    if (unlikely(tp##_is_any_nan(arg))) {                      \
>> -        float_status dummy = { };  /* snan_bit_is_one = 0 */   \
>> -        ret |= (tp##_is_signaling_nan(arg, &dummy)             \
>> -                ? is_snan : is_qnan);                          \
>> -    } else if (unlikely(tp##_is_infinity(arg))) {              \
>> -        ret |= is_inf;                                         \
>> -    } else if (tp##_is_zero(arg)) {                            \
>> -        ret |= is_zero;                                        \
>> -    } else if (tp##_is_zero_or_denormal(arg)) {                \
>> -        ret |= is_denormal;                                    \
>> -    } else {                                                   \
>> -        ret |= is_normal;                                      \
>> -    }                                                          \
>> -    return ret;                                                \
>> -}
>> -
>> -COMPUTE_CLASS(float16)
>> -COMPUTE_CLASS(float32)
>> -COMPUTE_CLASS(float64)
>> -COMPUTE_CLASS(float128)
>> -
>> -static void set_fprf_from_class(CPUPPCState *env, int class)
>> -{
>> -    static const uint8_t fprf[6][2] = {
>> -        { 0x04, 0x08 },  /* normalized */
>> -        { 0x02, 0x12 },  /* zero */
>> -        { 0x14, 0x18 },  /* denormalized */
>> -        { 0x05, 0x09 },  /* infinity */
>> -        { 0x11, 0x11 },  /* qnan */
>> -        { 0x00, 0x00 },  /* snan -- flags are undefined */
>> -    };
>> -    bool isneg = class & is_neg;
>> -
>> -    env->fpscr &= ~FP_FPRF;
>> -    env->fpscr |= fprf[ctz32(class)][isneg] << FPSCR_FPRF;
>> -}
>> -
>> -#define COMPUTE_FPRF(tp)                                \
>> -void helper_compute_fprf_##tp(CPUPPCState *env, tp arg) \
>> -{                                                       \
>> -    set_fprf_from_class(env, tp##_classify(arg));       \
>> +#define COMPUTE_FPRF(tp)                                          \
>> +void helper_compute_fprf_##tp(CPUPPCState *env, tp arg)           \
>> +{                                                                 \
>> +    bool neg = tp##_is_neg(arg);                                  \
>> +    target_ulong fprf;                                            \
>> +    if (likely(tp##_is_normal(arg))) {                            \
>> +        fprf = neg ? 0x08 << FPSCR_FPRF : 0x04 << FPSCR_FPRF;     \
>> +    } else if (tp##_is_zero(arg)) {                               \
>> +        fprf = neg ? 0x12 << FPSCR_FPRF : 0x02 << FPSCR_FPRF;     \
>> +    } else if (tp##_is_zero_or_denormal(arg)) {                   \
>> +        fprf = neg ? 0x18 << FPSCR_FPRF : 0x14 << FPSCR_FPRF;     \
>> +    } else if (tp##_is_infinity(arg)) {                           \
>> +        fprf = neg ? 0x09 << FPSCR_FPRF : 0x05 << FPSCR_FPRF;     \
>> +    } else {                                                      \
>> +        float_status dummy = { };  /* snan_bit_is_one = 0 */      \
>> +        if (tp##_is_signaling_nan(arg, &dummy)) {                 \
>> +            fprf = 0x00 << FPSCR_FPRF;                            \
>> +        } else {                                                  \
>> +            fprf = 0x11 << FPSCR_FPRF;                            \
> 
> If everything above is always shifted by FPSCR_FPRF then maybe it's easier to read with 
> doing the shift once below an not in every case above.

I'm trying to make sure that the compiler generates all constants, instead of having a 
runtime shift of a constant ...

>> +    env->fpscr = (env->fpscr & ~FP_FPRF) | fprf;                  \

... here.


r~



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF
  2023-05-23 23:06   ` Richard Henderson
@ 2023-05-24  8:52     ` BALATON Zoltan
  0 siblings, 0 replies; 7+ messages in thread
From: BALATON Zoltan @ 2023-05-24  8:52 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, alex.bennee, qemu-ppc

[-- Attachment #1: Type: text/plain, Size: 6755 bytes --]

On Tue, 23 May 2023, Richard Henderson wrote:
> On 5/23/23 16:02, BALATON Zoltan wrote:
>> On Tue, 23 May 2023, Richard Henderson wrote:
>>> Instead of computing an artifical "class" bitmask then
>>> converting that to the fprf value, compute the final
>>> value from the start.
>>> 
>>> Reorder the tests to check the most likely cases first.
>>> 
>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>>> ---
>>> target/ppc/fpu_helper.c | 78 ++++++++++++-----------------------------
>>> 1 file changed, 22 insertions(+), 56 deletions(-)
>>> 
>>> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
>>> index a66e16c212..03150a0f10 100644
>>> --- a/target/ppc/fpu_helper.c
>>> +++ b/target/ppc/fpu_helper.c
>>> @@ -141,62 +141,28 @@ static inline int 
>>> ppc_float64_get_unbiased_exp(float64 f)
>>>     return ((f >> 52) & 0x7FF) - 1023;
>>> }
>>> 
>>> -/* Classify a floating-point number.  */
>>> -enum {
>>> -    is_normal   = 1,
>>> -    is_zero     = 2,
>>> -    is_denormal = 4,
>>> -    is_inf      = 8,
>>> -    is_qnan     = 16,
>>> -    is_snan     = 32,
>>> -    is_neg      = 64,
>>> -};
>>> -
>>> -#define COMPUTE_CLASS(tp)                                      \
>>> -static int tp##_classify(tp arg)                               \
>>> -{                                                              \
>>> -    int ret = tp##_is_neg(arg) * is_neg;                       \
>>> -    if (unlikely(tp##_is_any_nan(arg))) {                      \
>>> -        float_status dummy = { };  /* snan_bit_is_one = 0 */   \
>>> -        ret |= (tp##_is_signaling_nan(arg, &dummy)             \
>>> -                ? is_snan : is_qnan);                          \
>>> -    } else if (unlikely(tp##_is_infinity(arg))) {              \
>>> -        ret |= is_inf;                                         \
>>> -    } else if (tp##_is_zero(arg)) {                            \
>>> -        ret |= is_zero;                                        \
>>> -    } else if (tp##_is_zero_or_denormal(arg)) {                \
>>> -        ret |= is_denormal;                                    \
>>> -    } else {                                                   \
>>> -        ret |= is_normal;                                      \
>>> -    }                                                          \
>>> -    return ret;                                                \
>>> -}
>>> -
>>> -COMPUTE_CLASS(float16)
>>> -COMPUTE_CLASS(float32)
>>> -COMPUTE_CLASS(float64)
>>> -COMPUTE_CLASS(float128)
>>> -
>>> -static void set_fprf_from_class(CPUPPCState *env, int class)
>>> -{
>>> -    static const uint8_t fprf[6][2] = {
>>> -        { 0x04, 0x08 },  /* normalized */
>>> -        { 0x02, 0x12 },  /* zero */
>>> -        { 0x14, 0x18 },  /* denormalized */
>>> -        { 0x05, 0x09 },  /* infinity */
>>> -        { 0x11, 0x11 },  /* qnan */
>>> -        { 0x00, 0x00 },  /* snan -- flags are undefined */
>>> -    };
>>> -    bool isneg = class & is_neg;
>>> -
>>> -    env->fpscr &= ~FP_FPRF;
>>> -    env->fpscr |= fprf[ctz32(class)][isneg] << FPSCR_FPRF;
>>> -}
>>> -
>>> -#define COMPUTE_FPRF(tp)                                \
>>> -void helper_compute_fprf_##tp(CPUPPCState *env, tp arg) \
>>> -{                                                       \
>>> -    set_fprf_from_class(env, tp##_classify(arg));       \
>>> +#define COMPUTE_FPRF(tp)                                          \
>>> +void helper_compute_fprf_##tp(CPUPPCState *env, tp arg)           \
>>> +{                                                                 \
>>> +    bool neg = tp##_is_neg(arg);                                  \
>>> +    target_ulong fprf;                                            \
>>> +    if (likely(tp##_is_normal(arg))) {                            \
>>> +        fprf = neg ? 0x08 << FPSCR_FPRF : 0x04 << FPSCR_FPRF;     \
>>> +    } else if (tp##_is_zero(arg)) {                               \
>>> +        fprf = neg ? 0x12 << FPSCR_FPRF : 0x02 << FPSCR_FPRF;     \
>>> +    } else if (tp##_is_zero_or_denormal(arg)) {                   \
>>> +        fprf = neg ? 0x18 << FPSCR_FPRF : 0x14 << FPSCR_FPRF;     \
>>> +    } else if (tp##_is_infinity(arg)) {                           \
>>> +        fprf = neg ? 0x09 << FPSCR_FPRF : 0x05 << FPSCR_FPRF;     \
>>> +    } else {                                                      \
>>> +        float_status dummy = { };  /* snan_bit_is_one = 0 */      \
>>> +        if (tp##_is_signaling_nan(arg, &dummy)) {                 \
>>> +            fprf = 0x00 << FPSCR_FPRF;                            \
>>> +        } else {                                                  \
>>> +            fprf = 0x11 << FPSCR_FPRF;                            \
>> 
>> If everything above is always shifted by FPSCR_FPRF then maybe it's easier 
>> to read with doing the shift once below an not in every case above.
>
> I'm trying to make sure that the compiler generates all constants, instead of 
> having a runtime shift of a constant ...

Didn't think about that. Maybe adding a macro for it could make it more 
readable then or noting it in a comment or in the commit message might 
worth it.

Regards,
BALATON Zoltan

>>> +    env->fpscr = (env->fpscr & ~FP_FPRF) | fprf;                  \
>
> ... here.
>
>
> r~
>
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF
  2023-05-23 20:25 [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF Richard Henderson
  2023-05-23 22:50 ` BALATON Zoltan
  2023-05-23 23:02 ` BALATON Zoltan
@ 2023-05-24 13:18 ` Alex Bennée
  2023-05-28 10:05 ` Daniel Henrique Barboza
  3 siblings, 0 replies; 7+ messages in thread
From: Alex Bennée @ 2023-05-24 13:18 UTC (permalink / raw)
  To: Richard Henderson; +Cc: qemu-devel, qemu-ppc, balaton


Richard Henderson <richard.henderson@linaro.org> writes:

> Instead of computing an artifical "class" bitmask then
> converting that to the fprf value, compute the final
> value from the start.
>
> Reorder the tests to check the most likely cases first.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

We see a slight performance boost although my baseline run was a bit
noisy:

➜  # before
🕙07:31:14 alex@zen:qemu.git/builds/all  (b44e6e6) (REBASING 1/2) [$?] took 10m 
➜  hyperfine -w 2 "./qemu-ppc64 ~/lsrc/tests/lame.git-svn/builds/ppc64/frontend/lame -h pts-trondheim-3.wav pts-trondheim-3.mp3"
Benchmark 1: ./qemu-ppc64 ~/lsrc/tests/lame.git-svn/builds/ppc64/frontend/lame -h pts-trondheim-3.wav pts-trondheim-3.mp3
  Time (mean ± σ):     649.858 s ± 58.351 s    [User: 649.272 s, System: 0.448 s]
  Range (min … max):   616.148 s … 764.585 s    10 runs

# after

hyperfine "./qemu-ppc64 ~/lsrc/tests/lame.git-svn/builds/ppc64/frontend/lame -h pts-trondheim-3.wav pts-trondheim-3.mp3"
Benchmark 1: ./qemu-ppc64 ~/lsrc/tests/lame.git-svn/builds/ppc64/frontend/lame -h pts-trondheim-3.wav pts-trondheim-3.mp3
  Time (mean ± σ):     599.968 s ±  8.014 s    [User: 599.656 s, System: 0.181 s]
  Range (min … max):   591.171 s … 615.912 s    10 runs

Anyway

Tested-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF
  2023-05-23 20:25 [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF Richard Henderson
                   ` (2 preceding siblings ...)
  2023-05-24 13:18 ` Alex Bennée
@ 2023-05-28 10:05 ` Daniel Henrique Barboza
  3 siblings, 0 replies; 7+ messages in thread
From: Daniel Henrique Barboza @ 2023-05-28 10:05 UTC (permalink / raw)
  To: Richard Henderson, qemu-devel; +Cc: alex.bennee, qemu-ppc, balaton



On 5/23/23 17:25, Richard Henderson wrote:
> Instead of computing an artifical "class" bitmask then
> converting that to the fprf value, compute the final
> value from the start.
> 
> Reorder the tests to check the most likely cases first.

Queued in ppc-next ("artifical" typo fixed).


Thanks,

Daniel

> 
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>   target/ppc/fpu_helper.c | 78 ++++++++++++-----------------------------
>   1 file changed, 22 insertions(+), 56 deletions(-)
> 
> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c
> index a66e16c212..03150a0f10 100644
> --- a/target/ppc/fpu_helper.c
> +++ b/target/ppc/fpu_helper.c
> @@ -141,62 +141,28 @@ static inline int ppc_float64_get_unbiased_exp(float64 f)
>       return ((f >> 52) & 0x7FF) - 1023;
>   }
>   
> -/* Classify a floating-point number.  */
> -enum {
> -    is_normal   = 1,
> -    is_zero     = 2,
> -    is_denormal = 4,
> -    is_inf      = 8,
> -    is_qnan     = 16,
> -    is_snan     = 32,
> -    is_neg      = 64,
> -};
> -
> -#define COMPUTE_CLASS(tp)                                      \
> -static int tp##_classify(tp arg)                               \
> -{                                                              \
> -    int ret = tp##_is_neg(arg) * is_neg;                       \
> -    if (unlikely(tp##_is_any_nan(arg))) {                      \
> -        float_status dummy = { };  /* snan_bit_is_one = 0 */   \
> -        ret |= (tp##_is_signaling_nan(arg, &dummy)             \
> -                ? is_snan : is_qnan);                          \
> -    } else if (unlikely(tp##_is_infinity(arg))) {              \
> -        ret |= is_inf;                                         \
> -    } else if (tp##_is_zero(arg)) {                            \
> -        ret |= is_zero;                                        \
> -    } else if (tp##_is_zero_or_denormal(arg)) {                \
> -        ret |= is_denormal;                                    \
> -    } else {                                                   \
> -        ret |= is_normal;                                      \
> -    }                                                          \
> -    return ret;                                                \
> -}
> -
> -COMPUTE_CLASS(float16)
> -COMPUTE_CLASS(float32)
> -COMPUTE_CLASS(float64)
> -COMPUTE_CLASS(float128)
> -
> -static void set_fprf_from_class(CPUPPCState *env, int class)
> -{
> -    static const uint8_t fprf[6][2] = {
> -        { 0x04, 0x08 },  /* normalized */
> -        { 0x02, 0x12 },  /* zero */
> -        { 0x14, 0x18 },  /* denormalized */
> -        { 0x05, 0x09 },  /* infinity */
> -        { 0x11, 0x11 },  /* qnan */
> -        { 0x00, 0x00 },  /* snan -- flags are undefined */
> -    };
> -    bool isneg = class & is_neg;
> -
> -    env->fpscr &= ~FP_FPRF;
> -    env->fpscr |= fprf[ctz32(class)][isneg] << FPSCR_FPRF;
> -}
> -
> -#define COMPUTE_FPRF(tp)                                \
> -void helper_compute_fprf_##tp(CPUPPCState *env, tp arg) \
> -{                                                       \
> -    set_fprf_from_class(env, tp##_classify(arg));       \
> +#define COMPUTE_FPRF(tp)                                          \
> +void helper_compute_fprf_##tp(CPUPPCState *env, tp arg)           \
> +{                                                                 \
> +    bool neg = tp##_is_neg(arg);                                  \
> +    target_ulong fprf;                                            \
> +    if (likely(tp##_is_normal(arg))) {                            \
> +        fprf = neg ? 0x08 << FPSCR_FPRF : 0x04 << FPSCR_FPRF;     \
> +    } else if (tp##_is_zero(arg)) {                               \
> +        fprf = neg ? 0x12 << FPSCR_FPRF : 0x02 << FPSCR_FPRF;     \
> +    } else if (tp##_is_zero_or_denormal(arg)) {                   \
> +        fprf = neg ? 0x18 << FPSCR_FPRF : 0x14 << FPSCR_FPRF;     \
> +    } else if (tp##_is_infinity(arg)) {                           \
> +        fprf = neg ? 0x09 << FPSCR_FPRF : 0x05 << FPSCR_FPRF;     \
> +    } else {                                                      \
> +        float_status dummy = { };  /* snan_bit_is_one = 0 */      \
> +        if (tp##_is_signaling_nan(arg, &dummy)) {                 \
> +            fprf = 0x00 << FPSCR_FPRF;                            \
> +        } else {                                                  \
> +            fprf = 0x11 << FPSCR_FPRF;                            \
> +        }                                                         \
> +    }                                                             \
> +    env->fpscr = (env->fpscr & ~FP_FPRF) | fprf;                  \
>   }
>   
>   COMPUTE_FPRF(float16)


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2023-05-28 10:06 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-23 20:25 [PATCH] target/ppc: Merge COMPUTE_CLASS and COMPUTE_FPRF Richard Henderson
2023-05-23 22:50 ` BALATON Zoltan
2023-05-23 23:02 ` BALATON Zoltan
2023-05-23 23:06   ` Richard Henderson
2023-05-24  8:52     ` BALATON Zoltan
2023-05-24 13:18 ` Alex Bennée
2023-05-28 10:05 ` Daniel Henrique Barboza

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.