All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2
@ 2018-11-14  2:44 Joel Stanley
  2018-11-19 18:53 ` Nick Desaulniers
  0 siblings, 1 reply; 7+ messages in thread
From: Joel Stanley @ 2018-11-14  2:44 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Nick Desaulniers

The add_ssaaaa, sub_ddmmss, umul_ppmm and udiv_qrnnd macros originate
from GMP's longlong.h.
This was found when compiling with clang:

   arch/powerpc/math-emu/fnmsub.c:46:2: error: invalid use of a cast in a
   inline asm context requiring an l-value: remove the cast or build with
   -fheinous-gnu-extensions
           FP_ADD_D(R, T, B);
           ^~~~~~~~~~~~~~~~~
   ...

   ./arch/powerpc/include/asm/sfp-machine.h:283:27: note: expanded from
   macro 'sub_ddmmss'
                  : "=r" ((USItype)(sh)),                                  \
                          ~~~~~~~~~~^~~

Segher points out: this was fixed in GCC over 16 years ago
( https://gcc.gnu.org/r56600 ), and in GMP (where it comes from)
presumably before that.

Update to the latest version in order to git rid of the invalid casts.

The only functional change I noticed was this in udiv_qrnnd.

    __r1 = (n1) % __d1;
    __q1 = (n1) / __d1;

Becomes this:

    __q1 = (n1) / __d1;
    __r1 = (n1) - __q1 * __d1;

This is equivalent as it instead of calculating the remainder
using modulo, it uses the result of integer division to subtract the
count of 'whole' d1 from r1.

Link: https://github.com/ClangBuiltLinux/linux/issues/260
Signed-off-by: Joel Stanley <joel@jms.id.au>
---
v1:
 https://lore.kernel.org/linuxppc-dev/20181102033713.31916-1-joel@jms.id.au/
v2:
 Instead of setting the -fheinous-gnu-extensions for clang, fix the code.

 arch/powerpc/include/asm/sfp-machine.h | 99 +++++++++-----------------
 1 file changed, 32 insertions(+), 67 deletions(-)

diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h
index d89beaba26ff..a6353d9dd5ba 100644
--- a/arch/powerpc/include/asm/sfp-machine.h
+++ b/arch/powerpc/include/asm/sfp-machine.h
@@ -213,30 +213,18 @@
  * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
  * (i.e. carry out) is not stored anywhere, and is lost.
  */
-#define add_ssaaaa(sh, sl, ah, al, bh, bl)				\
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (bh) && (bh) == 0)				\
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"		\
-	     : "=r" ((USItype)(sh)),					\
-	       "=&r" ((USItype)(sl))					\
-	     : "%r" ((USItype)(ah)),					\
-	       "%r" ((USItype)(al)),					\
-	       "rI" ((USItype)(bl)));					\
-    else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)		\
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"		\
-	     : "=r" ((USItype)(sh)),					\
-	       "=&r" ((USItype)(sl))					\
-	     : "%r" ((USItype)(ah)),					\
-	       "%r" ((USItype)(al)),					\
-	       "rI" ((USItype)(bl)));					\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"			\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"			\
+	     : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));	\
     else								\
-      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"		\
-	     : "=r" ((USItype)(sh)),					\
-	       "=&r" ((USItype)(sl))					\
-	     : "%r" ((USItype)(ah)),					\
-	       "r" ((USItype)(bh)),					\
-	       "%r" ((USItype)(al)),					\
-	       "rI" ((USItype)(bl)));					\
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"			\
+	     : "=r" (sh), "=&r" (sl)					\
+	     : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));		\
   } while (0)
 
 /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to
@@ -248,44 +236,24 @@
  * and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
  * and is lost.
  */
-#define sub_ddmmss(sh, sl, ah, al, bh, bl)				\
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {									\
     if (__builtin_constant_p (ah) && (ah) == 0)				\
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"	\
-	       : "=r" ((USItype)(sh)),					\
-		 "=&r" ((USItype)(sl))					\
-	       : "r" ((USItype)(bh)),					\
-		 "rI" ((USItype)(al)),					\
-		 "r" ((USItype)(bl)));					\
-    else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0)		\
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"	\
-	       : "=r" ((USItype)(sh)),					\
-		 "=&r" ((USItype)(sl))					\
-	       : "r" ((USItype)(bh)),					\
-		 "rI" ((USItype)(al)),					\
-		 "r" ((USItype)(bl)));					\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
     else if (__builtin_constant_p (bh) && (bh) == 0)			\
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"		\
-	       : "=r" ((USItype)(sh)),					\
-		 "=&r" ((USItype)(sl))					\
-	       : "r" ((USItype)(ah)),					\
-		 "rI" ((USItype)(al)),					\
-		 "r" ((USItype)(bl)));					\
-    else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)		\
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"		\
-	       : "=r" ((USItype)(sh)),					\
-		 "=&r" ((USItype)(sl))					\
-	       : "r" ((USItype)(ah)),					\
-		 "rI" ((USItype)(al)),					\
-		 "r" ((USItype)(bl)));					\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
+    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)		\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"			\
+	       : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
     else								\
-      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"	\
-	       : "=r" ((USItype)(sh)),					\
-		 "=&r" ((USItype)(sl))					\
-	       : "r" ((USItype)(ah)),					\
-		 "r" ((USItype)(bh)),					\
-		 "rI" ((USItype)(al)),					\
-		 "r" ((USItype)(bl)));					\
+      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"			\
+	       : "=r" (sh), "=&r" (sl)					\
+	       : "r" (ah), "r" (bh), "rI" (al), "r" (bl));		\
   } while (0)
 
 /* asm fragments for mul and div */
@@ -294,13 +262,10 @@
  * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
  * word product in HIGH_PROD and LOW_PROD.
  */
-#define umul_ppmm(ph, pl, m0, m1)					\
+#define umul_ppmm(ph, pl, m0, m1) \
   do {									\
     USItype __m0 = (m0), __m1 = (m1);					\
-    __asm__ ("mulhwu %0,%1,%2"						\
-	     : "=r" ((USItype)(ph))					\
-	     : "%r" (__m0),						\
-               "r" (__m1));						\
+    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));	\
     (pl) = __m0 * __m1;							\
   } while (0)
 
@@ -312,28 +277,28 @@
  * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
  * UDIV_NEEDS_NORMALIZATION is defined to 1.
  */
-#define udiv_qrnnd(q, r, n1, n0, d)					\
+#define udiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;			\
     __d1 = __ll_highpart (d);						\
     __d0 = __ll_lowpart (d);						\
 									\
-    __r1 = (n1) % __d1;							\
     __q1 = (n1) / __d1;							\
-    __m = (UWtype) __q1 * __d0;						\
+    __r1 = (n1) - __q1 * __d1;						\
+    __m = __q1 * __d0;							\
     __r1 = __r1 * __ll_B | __ll_highpart (n0);				\
     if (__r1 < __m)							\
       {									\
 	__q1--, __r1 += (d);						\
-	if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */	\
+	if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
 	  if (__r1 < __m)						\
 	    __q1--, __r1 += (d);					\
       }									\
     __r1 -= __m;							\
 									\
-    __r0 = __r1 % __d1;							\
     __q0 = __r1 / __d1;							\
-    __m = (UWtype) __q0 * __d0;						\
+    __r0 = __r1  - __q0 * __d1;						\
+    __m = __q0 * __d0;							\
     __r0 = __r0 * __ll_B | __ll_lowpart (n0);				\
     if (__r0 < __m)							\
       {									\
@@ -344,7 +309,7 @@
       }									\
     __r0 -= __m;							\
 									\
-    (q) = (UWtype) __q1 * __ll_B | __q0;				\
+    (q) = __q1 * __ll_B | __q0;						\
     (r) = __r0;								\
   } while (0)
 
-- 
2.19.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2
  2018-11-14  2:44 [PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2 Joel Stanley
@ 2018-11-19 18:53 ` Nick Desaulniers
  2018-11-19 23:15   ` Joel Stanley
  0 siblings, 1 reply; 7+ messages in thread
From: Nick Desaulniers @ 2018-11-19 18:53 UTC (permalink / raw)
  To: joel; +Cc: linuxppc-dev

On Tue, Nov 13, 2018 at 6:44 PM Joel Stanley <joel@jms.id.au> wrote:
>
> The add_ssaaaa, sub_ddmmss, umul_ppmm and udiv_qrnnd macros originate
> from GMP's longlong.h.
> This was found when compiling with clang:
>
>    arch/powerpc/math-emu/fnmsub.c:46:2: error: invalid use of a cast in a
>    inline asm context requiring an l-value: remove the cast or build with
>    -fheinous-gnu-extensions
>            FP_ADD_D(R, T, B);
>            ^~~~~~~~~~~~~~~~~
>    ...
>
>    ./arch/powerpc/include/asm/sfp-machine.h:283:27: note: expanded from
>    macro 'sub_ddmmss'
>                   : "=r" ((USItype)(sh)),                                  \
>                           ~~~~~~~~~~^~~
>
> Segher points out: this was fixed in GCC over 16 years ago
> ( https://gcc.gnu.org/r56600 ), and in GMP (where it comes from)
> presumably before that.
>
> Update to the latest version in order to git rid of the invalid casts.

Hi Joel, thanks for this patch.  It's a much better fix IMO than v1.
One nit below.

>
> The only functional change I noticed was this in udiv_qrnnd.
>
>     __r1 = (n1) % __d1;
>     __q1 = (n1) / __d1;
>
> Becomes this:
>
>     __q1 = (n1) / __d1;
>     __r1 = (n1) - __q1 * __d1;
>
> This is equivalent as it instead of calculating the remainder
> using modulo, it uses the result of integer division to subtract the
> count of 'whole' d1 from r1.

I don't understand this; why was this functional change made?

>
> Link: https://github.com/ClangBuiltLinux/linux/issues/260
> Signed-off-by: Joel Stanley <joel@jms.id.au>
> ---
> v1:
>  https://lore.kernel.org/linuxppc-dev/20181102033713.31916-1-joel@jms.id.au/
> v2:
>  Instead of setting the -fheinous-gnu-extensions for clang, fix the code.
>
>  arch/powerpc/include/asm/sfp-machine.h | 99 +++++++++-----------------
>  1 file changed, 32 insertions(+), 67 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/sfp-machine.h b/arch/powerpc/include/asm/sfp-machine.h
> index d89beaba26ff..a6353d9dd5ba 100644
> --- a/arch/powerpc/include/asm/sfp-machine.h
> +++ b/arch/powerpc/include/asm/sfp-machine.h
> @@ -213,30 +213,18 @@
>   * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
>   * (i.e. carry out) is not stored anywhere, and is lost.
>   */
> -#define add_ssaaaa(sh, sl, ah, al, bh, bl)                             \
> +#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
>    do {                                                                 \
>      if (__builtin_constant_p (bh) && (bh) == 0)                                \
> -      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"          \
> -            : "=r" ((USItype)(sh)),                                    \
> -              "=&r" ((USItype)(sl))                                    \
> -            : "%r" ((USItype)(ah)),                                    \
> -              "%r" ((USItype)(al)),                                    \
> -              "rI" ((USItype)(bl)));                                   \
> -    else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)         \
> -      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"          \
> -            : "=r" ((USItype)(sh)),                                    \
> -              "=&r" ((USItype)(sl))                                    \
> -            : "%r" ((USItype)(ah)),                                    \
> -              "%r" ((USItype)(al)),                                    \
> -              "rI" ((USItype)(bl)));                                   \
> +      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                       \
> +            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
> +    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)                \
> +      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                       \
> +            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
>      else                                                               \
> -      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"         \
> -            : "=r" ((USItype)(sh)),                                    \
> -              "=&r" ((USItype)(sl))                                    \
> -            : "%r" ((USItype)(ah)),                                    \
> -              "r" ((USItype)(bh)),                                     \
> -              "%r" ((USItype)(al)),                                    \
> -              "rI" ((USItype)(bl)));                                   \
> +      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"                     \
> +            : "=r" (sh), "=&r" (sl)                                    \
> +            : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));               \
>    } while (0)
>
>  /* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to
> @@ -248,44 +236,24 @@
>   * and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
>   * and is lost.
>   */
> -#define sub_ddmmss(sh, sl, ah, al, bh, bl)                             \
> +#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
>    do {                                                                 \
>      if (__builtin_constant_p (ah) && (ah) == 0)                                \
> -      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"      \
> -              : "=r" ((USItype)(sh)),                                  \
> -                "=&r" ((USItype)(sl))                                  \
> -              : "r" ((USItype)(bh)),                                   \
> -                "rI" ((USItype)(al)),                                  \
> -                "r" ((USItype)(bl)));                                  \
> -    else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0)         \
> -      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"      \
> -              : "=r" ((USItype)(sh)),                                  \
> -                "=&r" ((USItype)(sl))                                  \
> -              : "r" ((USItype)(bh)),                                   \
> -                "rI" ((USItype)(al)),                                  \
> -                "r" ((USItype)(bl)));                                  \
> +      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"                     \
> +              : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
> +    else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)                \
> +      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"                     \
> +              : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
>      else if (__builtin_constant_p (bh) && (bh) == 0)                   \
> -      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"                \
> -              : "=r" ((USItype)(sh)),                                  \
> -                "=&r" ((USItype)(sl))                                  \
> -              : "r" ((USItype)(ah)),                                   \
> -                "rI" ((USItype)(al)),                                  \
> -                "r" ((USItype)(bl)));                                  \
> -    else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)         \
> -      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"                \
> -              : "=r" ((USItype)(sh)),                                  \
> -                "=&r" ((USItype)(sl))                                  \
> -              : "r" ((USItype)(ah)),                                   \
> -                "rI" ((USItype)(al)),                                  \
> -                "r" ((USItype)(bl)));                                  \
> +      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"                      \
> +              : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
> +    else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)                \
> +      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"                      \
> +              : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
>      else                                                               \
> -      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"     \
> -              : "=r" ((USItype)(sh)),                                  \
> -                "=&r" ((USItype)(sl))                                  \
> -              : "r" ((USItype)(ah)),                                   \
> -                "r" ((USItype)(bh)),                                   \
> -                "rI" ((USItype)(al)),                                  \
> -                "r" ((USItype)(bl)));                                  \
> +      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"                   \
> +              : "=r" (sh), "=&r" (sl)                                  \
> +              : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
>    } while (0)
>
>  /* asm fragments for mul and div */
> @@ -294,13 +262,10 @@
>   * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
>   * word product in HIGH_PROD and LOW_PROD.
>   */
> -#define umul_ppmm(ph, pl, m0, m1)                                      \
> +#define umul_ppmm(ph, pl, m0, m1) \
>    do {                                                                 \
>      USItype __m0 = (m0), __m1 = (m1);                                  \
> -    __asm__ ("mulhwu %0,%1,%2"                                         \
> -            : "=r" ((USItype)(ph))                                     \
> -            : "%r" (__m0),                                             \
> -               "r" (__m1));                                            \
> +    __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1));     \
>      (pl) = __m0 * __m1;                                                        \
>    } while (0)
>
> @@ -312,28 +277,28 @@
>   * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
>   * UDIV_NEEDS_NORMALIZATION is defined to 1.
>   */
> -#define udiv_qrnnd(q, r, n1, n0, d)                                    \
> +#define udiv_qrnnd(q, r, n1, n0, d) \
>    do {                                                                 \
>      UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;                    \
>      __d1 = __ll_highpart (d);                                          \
>      __d0 = __ll_lowpart (d);                                           \
>                                                                         \
> -    __r1 = (n1) % __d1;                                                        \
>      __q1 = (n1) / __d1;                                                        \
> -    __m = (UWtype) __q1 * __d0;                                                \
> +    __r1 = (n1) - __q1 * __d1;                                         \
> +    __m = __q1 * __d0;                                                 \
>      __r1 = __r1 * __ll_B | __ll_highpart (n0);                         \
>      if (__r1 < __m)                                                    \
>        {                                                                        \
>         __q1--, __r1 += (d);                                            \
> -       if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */  \
> +       if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\
>           if (__r1 < __m)                                               \
>             __q1--, __r1 += (d);                                        \
>        }                                                                        \
>      __r1 -= __m;                                                       \
>                                                                         \
> -    __r0 = __r1 % __d1;                                                        \
>      __q0 = __r1 / __d1;                                                        \
> -    __m = (UWtype) __q0 * __d0;                                                \
> +    __r0 = __r1  - __q0 * __d1;                                                \
> +    __m = __q0 * __d0;                                                 \
>      __r0 = __r0 * __ll_B | __ll_lowpart (n0);                          \
>      if (__r0 < __m)                                                    \
>        {                                                                        \
> @@ -344,7 +309,7 @@
>        }                                                                        \
>      __r0 -= __m;                                                       \
>                                                                         \
> -    (q) = (UWtype) __q1 * __ll_B | __q0;                               \
> +    (q) = __q1 * __ll_B | __q0;                                                \
>      (r) = __r0;                                                                \
>    } while (0)


This appears to now differ from the upstream source:
https://github.com/gcc-mirror/gcc/blob/f7289f563a5e447ac21a5901ed75aad0dbd37732/include/longlong.h#L1679
If we're going to borrow implementations from GCC, let's borrow the
same implementation. Otherwise it's hard to have confidence in this
part of the patch.
-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2
  2018-11-19 18:53 ` Nick Desaulniers
@ 2018-11-19 23:15   ` Joel Stanley
  2018-11-20 18:20     ` Segher Boessenkool
  0 siblings, 1 reply; 7+ messages in thread
From: Joel Stanley @ 2018-11-19 23:15 UTC (permalink / raw)
  To: Nick Desaulniers; +Cc: linuxppc-dev

On Tue, 20 Nov 2018 at 05:24, Nick Desaulniers <ndesaulniers@google.com> wrote:
> >
> > The only functional change I noticed was this in udiv_qrnnd.
> >
> >     __r1 = (n1) % __d1;
> >     __q1 = (n1) / __d1;
> >
> > Becomes this:
> >
> >     __q1 = (n1) / __d1;
> >     __r1 = (n1) - __q1 * __d1;
> >
> > This is equivalent as it instead of calculating the remainder
> > using modulo, it uses the result of integer division to subtract the
> > count of 'whole' d1 from r1.
>
> I don't understand this; why was this functional change made?

I couldn't see why. It pre-dates GMP's mecurial history that  .
                                              \
> > -    (q) = (UWtype) __q1 * __ll_B | __q0;                               \
> > +    (q) = __q1 * __ll_B | __q0;                                                \
> >      (r) = __r0;                                                                \
> >    } while (0)
>
> This appears to now differ from the upstream source:
> https://github.com/gcc-mirror/gcc/blob/f7289f563a5e447ac21a5901ed75aad0dbd37732/include/longlong.h#L1679

AIUI the upstream source is GMP:

 https://gmplib.org/repo/gmp/file/tip/longlong.h

> If we're going to borrow implementations from GCC, let's borrow the
> same implementation. Otherwise it's hard to have confidence in this
> part of the patch.

I agree we should use the upstream source.

Segher, which tree contains the One True Upstream for longlong.h?

Cheers,

Joel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2
  2018-11-19 23:15   ` Joel Stanley
@ 2018-11-20 18:20     ` Segher Boessenkool
  2018-11-20 18:45       ` Nick Desaulniers
  0 siblings, 1 reply; 7+ messages in thread
From: Segher Boessenkool @ 2018-11-20 18:20 UTC (permalink / raw)
  To: Joel Stanley; +Cc: linuxppc-dev, Nick Desaulniers

On Tue, Nov 20, 2018 at 09:45:33AM +1030, Joel Stanley wrote:
> On Tue, 20 Nov 2018 at 05:24, Nick Desaulniers <ndesaulniers@google.com> wrote:
> > >
> > > The only functional change I noticed was this in udiv_qrnnd.
> > >
> > >     __r1 = (n1) % __d1;
> > >     __q1 = (n1) / __d1;
> > >
> > > Becomes this:
> > >
> > >     __q1 = (n1) / __d1;
> > >     __r1 = (n1) - __q1 * __d1;
> > >
> > > This is equivalent as it instead of calculating the remainder
> > > using modulo, it uses the result of integer division to subtract the
> > > count of 'whole' d1 from r1.
> >
> > I don't understand this; why was this functional change made?
> 
> I couldn't see why. It pre-dates GMP's mecurial history that  .

Perhaps it is to make it more likely only a single divide machine
instruction results.  Maybe for -O0, maybe for older GCC, maybe for GCC
targets that do not get this right.  It of course means exactly the same.

> AIUI the upstream source is GMP:
> 
>  https://gmplib.org/repo/gmp/file/tip/longlong.h
> 
> > If we're going to borrow implementations from GCC, let's borrow the
> > same implementation. Otherwise it's hard to have confidence in this
> > part of the patch.
> 
> I agree we should use the upstream source.
> 
> Segher, which tree contains the One True Upstream for longlong.h?

You should probably get your updates from the same place as was used to
get the file in the first place.  Or, don't worry so much about it, how
often is this updated?  Once in ten years, or twenty?

If you do not want to include bigger (maybe irrelevant) changes from
upstream, you have already forked, effectively.


Segher

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2
  2018-11-20 18:20     ` Segher Boessenkool
@ 2018-11-20 18:45       ` Nick Desaulniers
  2018-11-20 20:15         ` Segher Boessenkool
  0 siblings, 1 reply; 7+ messages in thread
From: Nick Desaulniers @ 2018-11-20 18:45 UTC (permalink / raw)
  To: segher; +Cc: linuxppc-dev, joel

On Tue, Nov 20, 2018 at 10:20 AM Segher Boessenkool
<segher@kernel.crashing.org> wrote:
>
> On Tue, Nov 20, 2018 at 09:45:33AM +1030, Joel Stanley wrote:
> > On Tue, 20 Nov 2018 at 05:24, Nick Desaulniers <ndesaulniers@google.com> wrote:
> > > >
> > > > The only functional change I noticed was this in udiv_qrnnd.
> > AIUI the upstream source is GMP:
> >
> >  https://gmplib.org/repo/gmp/file/tip/longlong.h
> >
> > > If we're going to borrow implementations from GCC, let's borrow the
> > > same implementation. Otherwise it's hard to have confidence in this
> > > part of the patch.
> >
> > I agree we should use the upstream source.
> >
> > Segher, which tree contains the One True Upstream for longlong.h?
>
> You should probably get your updates from the same place as was used to
> get the file in the first place.

So sounds like GMP is the source to take then.  Joel, would you mind
sending a v3 with just the commit message updated to reflect the fact
that GCC and GMP differ in implementations, and this is taken from
GMP, maybe with some links?  I think that will provide more context
for future travelers.  I'd be happy to add my reviewed-by tag to that.

-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2
  2018-11-20 18:45       ` Nick Desaulniers
@ 2018-11-20 20:15         ` Segher Boessenkool
  2018-11-20 21:28           ` Nick Desaulniers
  0 siblings, 1 reply; 7+ messages in thread
From: Segher Boessenkool @ 2018-11-20 20:15 UTC (permalink / raw)
  To: Nick Desaulniers; +Cc: linuxppc-dev, joel

On Tue, Nov 20, 2018 at 10:45:19AM -0800, Nick Desaulniers wrote:
> On Tue, Nov 20, 2018 at 10:20 AM Segher Boessenkool
> <segher@kernel.crashing.org> wrote:
> > > Segher, which tree contains the One True Upstream for longlong.h?
> >
> > You should probably get your updates from the same place as was used to
> > get the file in the first place.
> 
> So sounds like GMP is the source to take then.

I'm pretty sure the kernel got it from GCC.  How much does the kernel
one differ from current GCC?

FWIW, the GCC version still uses this "abhorrent GCC extension" (or what
were those words) for most targets, just not for powerpc.


Segher

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2
  2018-11-20 20:15         ` Segher Boessenkool
@ 2018-11-20 21:28           ` Nick Desaulniers
  0 siblings, 0 replies; 7+ messages in thread
From: Nick Desaulniers @ 2018-11-20 21:28 UTC (permalink / raw)
  To: segher; +Cc: linuxppc-dev, joel

On Tue, Nov 20, 2018 at 12:15 PM Segher Boessenkool
<segher@kernel.crashing.org> wrote:
>
> On Tue, Nov 20, 2018 at 10:45:19AM -0800, Nick Desaulniers wrote:
> > On Tue, Nov 20, 2018 at 10:20 AM Segher Boessenkool
> > <segher@kernel.crashing.org> wrote:
> > > > Segher, which tree contains the One True Upstream for longlong.h?
> > >
> > > You should probably get your updates from the same place as was used to
> > > get the file in the first place.
> >
> > So sounds like GMP is the source to take then.
>
> I'm pretty sure the kernel got it from GCC.
> How much does the kernel
> one differ from current GCC?

Compare udiv_qrnnd implementations from:
gcc: https://github.com/gcc-mirror/gcc/blob/f7289f563a5e447ac21a5901ed75aad0dbd37732/include/longlong.h#L1679
gmp: https://gmplib.org/repo/gmp/file/tip/longlong.h#l2063

The kernel currently is using the GCC impl, which Joel's patch here
changes it to the GMP impl.

If you prefer the kernel to keep the kernel keep the GCC derivative,
then Joel can drop the change to udiv_qrnnd() as it does not contain
inline assembly relevant to the original issue.
-- 
Thanks,
~Nick Desaulniers

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-11-20 21:31 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-14  2:44 [PATCH v2] powerpc/math-emu: Update macros from gmp-6.1.2 Joel Stanley
2018-11-19 18:53 ` Nick Desaulniers
2018-11-19 23:15   ` Joel Stanley
2018-11-20 18:20     ` Segher Boessenkool
2018-11-20 18:45       ` Nick Desaulniers
2018-11-20 20:15         ` Segher Boessenkool
2018-11-20 21:28           ` Nick Desaulniers

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.