Re: [RFC PATCH] softfloat: use QEMU_FLATTEN to avoid mistaken isra inlining

From: BALATON Zoltan <balaton@eik.bme.hu>
To: Paolo Bonzini <pbonzini@redhat.com>
Cc: "Richard Henderson" <richard.henderson@linaro.org>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	qemu-devel@nongnu.org, "Aurelien Jarno" <aurelien@aurel32.net>,
	"Peter Maydell" <peter.maydell@linaro.org>
Subject: Re: [RFC PATCH] softfloat: use QEMU_FLATTEN to avoid mistaken isra inlining
Date: Fri, 26 May 2023 01:15:23 +0200 (CEST)	[thread overview]
Message-ID: <78e0f562-e79d-8171-b1b5-6ba6ce819c51@eik.bme.hu> (raw)
In-Reply-To: <77189848-d618-9f55-4ae9-92756e635371@redhat.com>

[-- Attachment #1: Type: text/plain, Size: 5184 bytes --]

On Thu, 25 May 2023, Paolo Bonzini wrote:
> On 5/23/23 16:33, Richard Henderson wrote:
>> The tests are poorly ordered, testing many unlikely things before the most 
>> likely thing (normal).  A better ordering would be
>>
>>      if (likely(tp##_is_normal(arg))) {
>>      } else if (tp##_is_zero(arg)) {
>>      } else if (tp##_is_zero_or_denormal(arg)) {
>>      } else if (tp##_is_infinity(arg)) {
>>      } else {
>>          // nan case
>>      }
>> 
>> Secondly, we compute the classify bitmask, and then deconstruct the mask 
>> again in set_fprf_from_class.  Since we don't use the classify bitmask for 
>> anything else, better would be to compute the fprf value directly in the 
>> if-ladder.
>
> So something like this:
>
> diff --git a/target/ppc/fpu_helper.c b/target/ppc/fpu_helper.c

I've tested this too (patch did not apply for some reason, had to apply by 
hand) but Richard's patch seems to get better results. This one also helps 
but takes a few seconds more than with Richard's patch and with that this 
functino gets 1% lower in profile.

Regards,
BALATON Zoltan

> index a66e16c2128c..daed97ca178e 100644
> --- a/target/ppc/fpu_helper.c
> +++ b/target/ppc/fpu_helper.c
> @@ -141,62 +141,30 @@ static inline int ppc_float64_get_unbiased_exp(float64 
> f)
>     return ((f >> 52) & 0x7FF) - 1023;
> }
> -/* Classify a floating-point number.  */
> -enum {
> -    is_normal   = 1,
> -    is_zero     = 2,
> -    is_denormal = 4,
> -    is_inf      = 8,
> -    is_qnan     = 16,
> -    is_snan     = 32,
> -    is_neg      = 64,
> -};
> -
> -#define COMPUTE_CLASS(tp)                                      \
> -static int tp##_classify(tp arg)                               \
> -{                                                              \
> -    int ret = tp##_is_neg(arg) * is_neg;                       \
> -    if (unlikely(tp##_is_any_nan(arg))) {                      \
> -        float_status dummy = { };  /* snan_bit_is_one = 0 */   \
> -        ret |= (tp##_is_signaling_nan(arg, &dummy)             \
> -                ? is_snan : is_qnan);                          \
> -    } else if (unlikely(tp##_is_infinity(arg))) {              \
> -        ret |= is_inf;                                         \
> -    } else if (tp##_is_zero(arg)) {                            \
> -        ret |= is_zero;                                        \
> -    } else if (tp##_is_zero_or_denormal(arg)) {                \
> -        ret |= is_denormal;                                    \
> -    } else {                                                   \
> -        ret |= is_normal;                                      \
> -    }                                                          \
> -    return ret;                                                \
> -}
> -
> -COMPUTE_CLASS(float16)
> -COMPUTE_CLASS(float32)
> -COMPUTE_CLASS(float64)
> -COMPUTE_CLASS(float128)
> -
> -static void set_fprf_from_class(CPUPPCState *env, int class)
> +static void set_fprf(CPUPPCState *env, uint8_t ret)
> {
> -    static const uint8_t fprf[6][2] = {
> -        { 0x04, 0x08 },  /* normalized */
> -        { 0x02, 0x12 },  /* zero */
> -        { 0x14, 0x18 },  /* denormalized */
> -        { 0x05, 0x09 },  /* infinity */
> -        { 0x11, 0x11 },  /* qnan */
> -        { 0x00, 0x00 },  /* snan -- flags are undefined */
> -    };
> -    bool isneg = class & is_neg;
> -
>     env->fpscr &= ~FP_FPRF;
> -    env->fpscr |= fprf[ctz32(class)][isneg] << FPSCR_FPRF;
> +    env->fpscr |= ret << FPSCR_FPRF;
> }
> -#define COMPUTE_FPRF(tp)                                \
> -void helper_compute_fprf_##tp(CPUPPCState *env, tp arg) \
> -{                                                       \
> -    set_fprf_from_class(env, tp##_classify(arg));       \
> +#define COMPUTE_FPRF(tp)                                       \
> +void helper_compute_fprf_##tp(CPUPPCState *env, tp arg)        \
> +{                                                              \
> +    int ret;                                                   \
> +    if (tp##_is_normal(arg)) {                                 \
> +        ret = 0x0408;                                          \
> +    } else if (tp##_is_zero(arg)) {                            \
> +        ret = 0x0212;                                          \
> +    } else if (tp##_is_zero_or_denormal(arg)) {                \
> +        ret = 0x1418;                                          \
> +    } else if (unlikely(tp##_is_infinity(arg))) {              \
> +        ret = 0x0509;                                          \
> +    } else {                                                   \
> +        float_status dummy = { };  /* snan_bit_is_one = 0 */   \
> +        ret = (tp##_is_signaling_nan(arg, &dummy)              \
> +               ? 0x0000 : 0x1111);                             \
> +    }                                                          \
> +    set_fprf(env, tp##_is_neg(arg) ? (uint8_t)ret : ret >> 8); \
> }
>  COMPUTE_FPRF(float16)
>
>
> Not tested beyond compilation, but if Zoltan reports that it helps
> I can write a commit message and submit it.
>
> Paolo
>
>