linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Re: do_div64 generic
       [not found] <3F1360F4.2040602@mvista.com>
@ 2003-07-15  5:17 ` Bernardo Innocenti
  2003-07-15  5:38   ` Andrew Morton
  0 siblings, 1 reply; 15+ messages in thread
From: Bernardo Innocenti @ 2003-07-15  5:17 UTC (permalink / raw)
  To: george anzinger; +Cc: linux-kernel, Andrew Morton, Russell King, Linus Torvalds

On Tuesday 15 July 2003 04:03, george anzinger wrote:

> As part of the timer conversion and POSIX timers the following was
> added to the x86 kernels do_div64().  What we want is a u64/u32 that
> also returns the remainder.  (As in nsecs/NSEC_PER_SEC returns seconds
> and nsecs to fill in the timespec structure.)

I see. do_div() which updates the dividend in-place did not quite apply
to the usage pattern of the new timer code.

> The using code tests for the existance of div_long_long_rem and uses
> the longer do_div() if it does not exist.
>
> Could you consider adding this to the generic code?
>
> /*
>   * (long)X = ((long long)divs) / (long)div
>   * (long)rem = ((long long)divs) % (long)div
>   *
>   * Warning, this will do an exception if X overflows.
>   */
> #define div_long_long_rem(a,b,c) div_ll_X_l_rem(a,b,c)
>
> extern inline long
> div_ll_X_l_rem(long long divs, long div, long *rem)
> {
> 	long dum2;
>        __asm__("divl %2":"=a"(dum2), "=d"(*rem)
>
>        :	"rm"(div), "A"(divs));
>
> 	return dum2;
>
> }

Here's a patch that takes care of all architectures. Being somewhat
invasive, I'd like to have this code tested on a few more archs
before asking Linus to apply. Could you please try it on your targets?

--------------------------------------------------------------------------

 - div_long_long_rem(): add a generic version in asm-generic/div64.h;

 - add div_long_long_rem() implemented in terms of do_div() for those
   architectures that define their own assembly optimized version;

 - asm-arm/div64.h:do_div(): add missing parenthesis for macro args

 - kill dupes of div_long_long_rem() defined on-the-fly in
   linux/time.h and kernel/posix-timers.c;

 - make div_long_long_rem() arguments consistent with do_div() and
   its current usage (it's u64/u32 -> u32/u32).

 - add casts where a signed integer was being passed for the remainder;

Applies to 2.6.0-test1. Tested on i386 and m68knommu. Sorry if it
breaks anything else (it shouldn't, but...).


diff -Nrup linux-2.6.0-test1.orig/include/asm-generic/div64.h linux-2.6.0-test1/include/asm-generic/div64.h
--- linux-2.6.0-test1.orig/include/asm-generic/div64.h	2003-07-14 05:37:32.000000000 +0200
+++ linux-2.6.0-test1/include/asm-generic/div64.h	2003-07-15 04:56:08.000000000 +0200
@@ -4,6 +4,7 @@
  * Copyright (C) 2003 Bernardo Innocenti <bernie@develer.com>
  * Based on former asm-ppc/div64.h and asm-m68knommu/div64.h
  *
+ *
  * The semantics of do_div() are:
  *
  * uint32_t do_div(uint64_t *n, uint32_t base)
@@ -15,6 +16,17 @@
  *
  * NOTE: macro parameter n is evaluated multiple times,
  *       beware of side effects!
+ *
+ *
+ * Semantics for div_long_long_rem():
+ *
+ * uint32_t div_long_long_rem(uint64_t n, uint32_t base, uint32_t *rem)
+ * {
+ * 	*rem = n % base;
+ * 	return n / base;
+ * }
+ *
+ * NOTE: this will do an exception if result overflows.
  */
 
 #include <linux/types.h>
@@ -30,6 +42,13 @@
 	__rem;							\
  })
 
+extern inline uint32_t
+div_long_long_rem(uint64_t n, uint32_t base, uint32_t *rem)
+{
+	*rem = n % base;
+	return n / base;
+}
+
 #elif BITS_PER_LONG == 32
 
 extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
@@ -49,6 +68,18 @@ extern uint32_t __div64_32(uint64_t *div
 	__rem;						\
  })
 
+extern inline uint32_t
+div_long_long_rem(uint64_t n, uint32_t base, uint32_t *rem)
+{
+	if (likely((n >> 32) == 0)) {
+		*rem = ((uint32_t)n) % base;
+		return ((uint32_t)n) / base;
+	} else {
+		*rem = __div64_32(&n, base);
+		return n;
+	}
+}
+
 #else /* BITS_PER_LONG == ?? */
 
 # error do_div() does not yet support the C64
@@ -56,3 +87,4 @@ extern uint32_t __div64_32(uint64_t *div
 #endif /* BITS_PER_LONG */
 
 #endif /* _ASM_GENERIC_DIV64_H */
+
diff -Nrup linux-2.6.0-test1.orig/include/asm-arm/div64.h linux-2.6.0-test1/include/asm-arm/div64.h
--- linux-2.6.0-test1.orig/include/asm-arm/div64.h	2003-07-14 05:33:50.000000000 +0200
+++ linux-2.6.0-test1/include/asm-arm/div64.h	2003-07-15 05:23:42.000000000 +0200
@@ -1,18 +1,32 @@
 #ifndef __ASM_ARM_DIV64
 #define __ASM_ARM_DIV64
 
+#include <linux/types.h>
+
 /* We're not 64-bit, but... */
 #define do_div(n,base)						\
 ({								\
-	register int __res asm("r2") = base;			\
-	register unsigned long long __n asm("r0") = n;		\
+	register int __res asm("r2") = (base);			\
+	register unsigned long long __n asm("r0") = (n);	\
 	asm("bl do_div64"					\
 		: "=r" (__n), "=r" (__res)			\
 		: "0" (__n), "1" (__res)			\
 		: "r3", "ip", "lr", "cc");			\
-	n = __n;						\
+	(n) = __n;						\
 	__res;							\
 })
 
+/*
+ * See <asm-generic/div64.h> for specifications.
+ *
+ * FIXME: performance could be improved by writing specific asm
+ */
+extern inline uint32_t
+div_long_long_rem(uint64_t n, uint32_t base, uint32_t *rem)
+{
+	*rem = do_div(n, base);
+	return n;
+}
+
 #endif
 
diff -Nrup linux-2.6.0-test1.orig/include/asm-i386/div64.h linux-2.6.0-test1/include/asm-i386/div64.h
--- linux-2.6.0-test1.orig/include/asm-i386/div64.h	2003-07-14 05:28:55.000000000 +0200
+++ linux-2.6.0-test1/include/asm-i386/div64.h	2003-07-15 06:02:42.000000000 +0200
@@ -1,6 +1,12 @@
 #ifndef __I386_DIV64
 #define __I386_DIV64
 
+#include <linux/types.h>
+
+/* See asm-generic/div64.h for semantics of
+ * do_div() and div_long_long_rem()
+ */
+
 #define do_div(n,base) ({ \
 	unsigned long __upper, __low, __high, __mod; \
 	asm("":"=a" (__low), "=d" (__high):"A" (n)); \
@@ -14,22 +20,14 @@
 	__mod; \
 })
 
-/*
- * (long)X = ((long long)divs) / (long)div
- * (long)rem = ((long long)divs) % (long)div
- *
- * Warning, this will do an exception if X overflows.
- */
-#define div_long_long_rem(a,b,c) div_ll_X_l_rem(a,b,c)
-
-extern inline long
-div_ll_X_l_rem(long long divs, long div, long *rem)
+extern inline uint32_t
+div_long_long_rem(uint64_t divs, uint32_t div, uint32_t *rem)
 {
-	long dum2;
-      __asm__("divl %2":"=a"(dum2), "=d"(*rem)
-      :	"rm"(div), "A"(divs));
-
-	return dum2;
+	uint32_t result;
+	__asm__("divl %2":"=a"(result), "=d"(*rem)
+		: "rm"(div), "A"(divs));
 
+	return result;
 }
+
 #endif
diff -Nrup linux-2.6.0-test1.orig/include/asm-m68k/div64.h linux-2.6.0-test1/include/asm-m68k/div64.h
--- linux-2.6.0-test1.orig/include/asm-m68k/div64.h	2003-07-14 05:37:59.000000000 +0200
+++ linux-2.6.0-test1/include/asm-m68k/div64.h	2003-07-15 05:23:53.000000000 +0200
@@ -1,6 +1,8 @@
 #ifndef _M68K_DIV64_H
 #define _M68K_DIV64_H
 
+#include <linux/types.h>
+
 /* n = n / base; return rem; */
 
 #define do_div(n, base) ({					\
@@ -23,4 +25,16 @@
 	__rem;							\
 })
 
+/*
+ * See <asm-generic/div64.h> for specifications.
+ *
+ * FIXME: performance could be improved by writing specific asm
+ */
+extern inline uint32_t
+div_long_long_rem(uint64_t n, uint32_t base, uint32_t *rem)
+{
+	*rem = do_div(n, base);
+	return n;
+}
+
 #endif /* _M68K_DIV64_H */
diff -Nrup linux-2.6.0-test1.orig/include/asm-mips/div64.h linux-2.6.0-test1/include/asm-mips/div64.h
--- linux-2.6.0-test1.orig/include/asm-mips/div64.h	2003-07-14 05:36:33.000000000 +0200
+++ linux-2.6.0-test1/include/asm-mips/div64.h	2003-07-15 05:24:16.000000000 +0200
@@ -8,6 +8,8 @@
 #ifndef _ASM_DIV64_H
 #define _ASM_DIV64_H
 
+#include <linux/types.h>
+
 /*
  * No traps on overflows for any of these...
  */
@@ -73,4 +75,16 @@
 	(n) = __quot; \
 	__mod; })
 
+/*
+ * See <asm-generic/div64.h> for specifications.
+ *
+ * FIXME: performance could be improved by writing specific asm
+ */
+extern inline uint32_t
+div_long_long_rem(uint64_t n, uint32_t base, uint32_t *rem)
+{
+	*rem = do_div(n, base);
+	return n;
+}
+
 #endif /* _ASM_DIV64_H */
diff -Nrup linux-2.6.0-test1.orig/include/asm-s390/div64.h linux-2.6.0-test1/include/asm-s390/div64.h
--- linux-2.6.0-test1.orig/include/asm-s390/div64.h	2003-07-14 05:35:52.000000000 +0200
+++ linux-2.6.0-test1/include/asm-s390/div64.h	2003-07-15 05:24:28.000000000 +0200
@@ -3,6 +3,8 @@
 
 #ifndef __s390x__
 
+#include <linux/types.h>
+
 /* for do_div "base" needs to be smaller than 2^31-1 */
 #define do_div(n, base) ({                                      \
 	unsigned long long __n = (n);				\
@@ -42,6 +44,18 @@
         __r;                                                    \
 })
 
+/*
+ * See <asm-generic/div64.h> for specifications.
+ *
+ * FIXME: performance could be improved by writing specific asm
+ */
+extern inline uint32_t
+div_long_long_rem(uint64_t n, uint32_t base, uint32_t *rem)
+{
+	*rem = do_div(n, base);
+	return n;
+}
+
 #else /* __s390x__ */
 #include <asm-generic/div64.h>
 #endif /* __s390x__ */
diff -Nrup linux-2.6.0-test1.orig/include/linux/time.h linux-2.6.0-test1/include/linux/time.h
--- linux-2.6.0-test1.orig/include/linux/time.h	2003-07-14 05:35:16.000000000 +0200
+++ linux-2.6.0-test1/include/linux/time.h	2003-07-15 05:20:26.000000000 +0200
@@ -28,14 +28,6 @@ struct timezone {
 #include <linux/seqlock.h>
 #include <linux/timex.h>
 #include <asm/div64.h>
-#ifndef div_long_long_rem
-
-#define div_long_long_rem(dividend,divisor,remainder) ({ \
-		       u64 result = dividend;		\
-		       *remainder = do_div(result,divisor); \
-		       result; })
-
-#endif
 
 /*
  * Have the 32 bit jiffies value wrap 5 minutes after boot
@@ -122,7 +114,7 @@ jiffies_to_timespec(unsigned long jiffie
 	 * one divide.
 	 */
 	u64 nsec = (u64)jiffies * TICK_NSEC; 
-	value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec);
+	value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, (uint32_t *)&value->tv_nsec);
 }
 
 /* Same for "timeval" */
@@ -150,7 +142,7 @@ jiffies_to_timeval(unsigned long jiffies
 	 * one divide.
 	 */
 	u64 nsec = (u64)jiffies * TICK_NSEC; 
-	value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_usec);
+	value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, (uint32_t *)&value->tv_usec);
 	value->tv_usec /= NSEC_PER_USEC;
 }
 
diff -Nrup linux-2.6.0-test1.orig/kernel/posix-timers.c linux-2.6.0-test1/kernel/posix-timers.c
--- linux-2.6.0-test1.orig/kernel/posix-timers.c	2003-07-14 05:34:32.000000000 +0200
+++ linux-2.6.0-test1/kernel/posix-timers.c	2003-07-15 05:21:24.000000000 +0200
@@ -23,16 +23,8 @@
 #include <linux/idr.h>
 #include <linux/posix-timers.h>
 #include <linux/wait.h>
-
-#ifndef div_long_long_rem
 #include <asm/div64.h>
 
-#define div_long_long_rem(dividend,divisor,remainder) ({ \
-		       u64 result = dividend;		\
-		       *remainder = do_div(result,divisor); \
-		       result; })
-
-#endif
 #define CLOCK_REALTIME_RES TICK_NSEC  // In nano seconds.
 
 static inline u64  mpy_l_X_l_ll(unsigned long mpy1,unsigned long mpy2)
@@ -1226,7 +1218,7 @@ do_clock_nanosleep(clockid_t which_clock
 		left *= TICK_NSEC;
 		tsave->tv_sec = div_long_long_rem(left, 
 						  NSEC_PER_SEC, 
-						  &tsave->tv_nsec);
+						  (uint32_t *)&tsave->tv_nsec);
 		restart_block->fn = clock_nanosleep_restart;
 		restart_block->arg0 = which_clock;
 		restart_block->arg1 = (unsigned long)tsave;

-- 
  // Bernardo Innocenti - Develer S.r.l., R&D dept.
\X/  http://www.develer.com/

Please don't send Word attachments - http://www.gnu.org/philosophy/no-word-attachments.html



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-15  5:17 ` do_div64 generic Bernardo Innocenti
@ 2003-07-15  5:38   ` Andrew Morton
  2003-07-15  6:23     ` Bernardo Innocenti
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Morton @ 2003-07-15  5:38 UTC (permalink / raw)
  To: Bernardo Innocenti; +Cc: george, linux-kernel, akpm, rmk, torvalds

Bernardo Innocenti <bernie@develer.com> wrote:
>
> I see. do_div() which updates the dividend in-place did not quite apply
>  to the usage pattern of the new timer code.

This is getting silly.  The timer stuff has been an ongoing bug factory and
now it is colluding with the do_div() saga.

Can we just get the things working, without mangling every architecture
again?

>  > The using code tests for the existance of div_long_long_rem and uses
>  > the longer do_div() if it does not exist.
>  >
>  > Could you consider adding this to the generic code?
>  >
>  > /*
>  >   * (long)X = ((long long)divs) / (long)div
>  >   * (long)rem = ((long long)divs) % (long)div
>  >   *
>  >   * Warning, this will do an exception if X overflows.
>  >   */
>  > #define div_long_long_rem(a,b,c) div_ll_X_l_rem(a,b,c)
>  >
>  > extern inline long
>  > div_ll_X_l_rem(long long divs, long div, long *rem)
>  > {
>  > 	long dum2;
>  >        __asm__("divl %2":"=a"(dum2), "=d"(*rem)
>  >
>  >        :	"rm"(div), "A"(divs));
>  >
>  > 	return dum2;
>  >
>  > }
> 
>  Here's a patch that takes care of all architectures.

AFAICT, we can just rework posix-timers.c to use the standard do_div() and
be done with it, can we not?  ie: no div_long_long_rem(), no
div_ll_X_l_rem().  Just do_div().

Please use `static inline', not `extern inline', btw.



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-15  5:38   ` Andrew Morton
@ 2003-07-15  6:23     ` Bernardo Innocenti
  2003-07-15  6:30       ` Andrew Morton
  2003-07-15 21:52       ` george anzinger
  0 siblings, 2 replies; 15+ messages in thread
From: Bernardo Innocenti @ 2003-07-15  6:23 UTC (permalink / raw)
  To: Andrew Morton; +Cc: george, linux-kernel, akpm, rmk, torvalds

On Tuesday 15 July 2003 07:38, Andrew Morton wrote:

> >  Here's a patch that takes care of all architectures.
>
> AFAICT, we can just rework posix-timers.c to use the standard do_div() and
> be done with it, can we not?  ie: no div_long_long_rem(), no
> div_ll_X_l_rem().  Just do_div().

We could, and it would be easy and almost as efficient in all places
where div_long_long_rem() is being used:

   value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec);

becomes:

   value->tv_nsec = do_div(nsec, NSEC_PER_SEC);
   value->tv_sec = nsec;

George, do you agree? May I go on and post a patch killing
div_long_long_rem() everywhere?

> Please use `static inline', not `extern inline', btw.

Oops. Fixed. I had just copied it over from asm-i386/div64.h.

Is it worth posting a big patch to replace all remaining
occurrences of 'extern inline' all over the kernel?

I'd also like to point out that __inline__ is often being
used inconsistently. We should be using __inline__ rather
than inline in public headers needed by glibc for apps
compiled with -ansi. Since it's so ugly, it shouldn't
be used in other places.

-- 
  // Bernardo Innocenti - Develer S.r.l., R&D dept.
\X/  http://www.develer.com/

Please don't send Word attachments - http://www.gnu.org/philosophy/no-word-attachments.html



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-15  6:23     ` Bernardo Innocenti
@ 2003-07-15  6:30       ` Andrew Morton
  2003-07-15 21:52       ` george anzinger
  1 sibling, 0 replies; 15+ messages in thread
From: Andrew Morton @ 2003-07-15  6:30 UTC (permalink / raw)
  To: Bernardo Innocenti; +Cc: george, linux-kernel, rmk, torvalds

Bernardo Innocenti <bernie@develer.com> wrote:
>
> Is it worth posting a big patch to replace all remaining
>  occurrences of 'extern inline' all over the kernel?

Not really.  It's a 400k patch.  I have a script which does it.  For those
rare occasions when one wants to build the kernel with -fnoinline.


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-15  6:23     ` Bernardo Innocenti
  2003-07-15  6:30       ` Andrew Morton
@ 2003-07-15 21:52       ` george anzinger
  2003-07-15 22:06         ` Andrew Morton
  1 sibling, 1 reply; 15+ messages in thread
From: george anzinger @ 2003-07-15 21:52 UTC (permalink / raw)
  To: Bernardo Innocenti; +Cc: Andrew Morton, linux-kernel, akpm, rmk, torvalds

Bernardo Innocenti wrote:
> On Tuesday 15 July 2003 07:38, Andrew Morton wrote:
> 
> 
>>> Here's a patch that takes care of all architectures.
>>
>>AFAICT, we can just rework posix-timers.c to use the standard do_div() and
>>be done with it, can we not?  ie: no div_long_long_rem(), no
>>div_ll_X_l_rem().  Just do_div().
> 
> 
> We could, and it would be easy and almost as efficient in all places
> where div_long_long_rem() is being used:
> 
>    value->tv_sec = div_long_long_rem(nsec, NSEC_PER_SEC, &value->tv_nsec);
> 
> becomes:
> 
>    value->tv_nsec = do_div(nsec, NSEC_PER_SEC);
>    value->tv_sec = nsec;
> 
> George, do you agree? May I go on and post a patch killing
> div_long_long_rem() everywhere?

The issue is that div is a very long instruction and the do_div() 
thing uses 2 or three of them, while the div_long_long_rem() is just 
1.  Also, a lot of archs already have the required div by a different 
name.  It all boils down to a performance thing.

-g
> 
> 
>>Please use `static inline', not `extern inline', btw.
> 
> 
> Oops. Fixed. I had just copied it over from asm-i386/div64.h.
> 
> Is it worth posting a big patch to replace all remaining
> occurrences of 'extern inline' all over the kernel?
> 
> I'd also like to point out that __inline__ is often being
> used inconsistently. We should be using __inline__ rather
> than inline in public headers needed by glibc for apps
> compiled with -ansi. Since it's so ugly, it shouldn't
> be used in other places.
> 

-- 
George Anzinger   george@mvista.com
High-res-timers:  http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-15 21:52       ` george anzinger
@ 2003-07-15 22:06         ` Andrew Morton
  2003-07-16  0:07           ` george anzinger
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Morton @ 2003-07-15 22:06 UTC (permalink / raw)
  To: george anzinger; +Cc: bernie, linux-kernel, rmk, torvalds

george anzinger <george@mvista.com> wrote:
>
> > George, do you agree? May I go on and post a patch killing
> > div_long_long_rem() everywhere?
> 
> The issue is that div is a very long instruction and the do_div() 
> thing uses 2 or three of them, while the div_long_long_rem() is just 
> 1.  Also, a lot of archs already have the required div by a different 
> name.  It all boils down to a performance thing.

It is only used in nanosleep(), and then only in the case where the sleep
terminated early.

If someone is calling nanosleep() so frequently for this to matter, the
time spent in divide is the least of their problems.  Unless you have some
real-worldish benchmarks to demonstrate otherwise?

You know what they say about premtur optmstns, and having to propagate
funky new divide primitives across N architectures is indeed evil.

Bernardo, can you do the patch please?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-15 22:06         ` Andrew Morton
@ 2003-07-16  0:07           ` george anzinger
  2003-07-16 18:33             ` Bernardo Innocenti
  0 siblings, 1 reply; 15+ messages in thread
From: george anzinger @ 2003-07-16  0:07 UTC (permalink / raw)
  To: Andrew Morton; +Cc: bernie, linux-kernel, rmk, torvalds

Andrew Morton wrote:
> george anzinger <george@mvista.com> wrote:
> 
>>>George, do you agree? May I go on and post a patch killing
>>>div_long_long_rem() everywhere?
>>
>>The issue is that div is a very long instruction and the do_div() 
>>thing uses 2 or three of them, while the div_long_long_rem() is just 
>>1.  Also, a lot of archs already have the required div by a different 
>>name.  It all boils down to a performance thing.
> 
> 
> It is only used in nanosleep(), and then only in the case where the sleep
> terminated early.
> 
> If someone is calling nanosleep() so frequently for this to matter, the
> time spent in divide is the least of their problems.  Unless you have some
> real-worldish benchmarks to demonstrate otherwise?

It is also used in the jiffies to timespec and jiffies to timeval code 
in timer.h, if memory serves.
> 
> You know what they say about premtur optmstns, and having to propagate
> funky new divide primitives across N architectures is indeed evil.

Hm.  I only want the simple div.  64-bit/32-bit in two 32-bit results. 
  Is this funky?  And the "evil" #ifdef allows archs to not do it.
> 
> Bernardo, can you do the patch please?
> 
> 

-- 
George Anzinger   george@mvista.com
High-res-timers:  http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-16  0:07           ` george anzinger
@ 2003-07-16 18:33             ` Bernardo Innocenti
  2003-07-17 21:10               ` Bernardo Innocenti
  0 siblings, 1 reply; 15+ messages in thread
From: Bernardo Innocenti @ 2003-07-16 18:33 UTC (permalink / raw)
  To: george anzinger, Andrew Morton; +Cc: linux-kernel, rmk, torvalds

On Wednesday 16 July 2003 02:07, george anzinger wrote:

> > It is only used in nanosleep(), and then only in the case where the sleep
> > terminated early.
> >
> > If someone is calling nanosleep() so frequently for this to matter, the
> > time spent in divide is the least of their problems.  Unless you have
> > some real-worldish benchmarks to demonstrate otherwise?
>
> It is also used in the jiffies to timespec and jiffies to timeval code
> in timer.h, if memory serves.

 It's true that it's being used in more places than Andrew initially thought,
but a quick scan for jiffies_to_timeval() and jiffies_to_timespec() indicates
that it's not being used in any place where performance matters too much.

 I still consider do_div() a big kludge to workaround extremely poor code
generated by GCC. If we allow this, I see no reason for not allowing a very
similar kludge which provides even better performance.

 I agree with Andrew that we've run out of time for breaking things again,
so I won't be suggesting to implement a whole new set of 64bit operations
and use them all over the kernel. However, the div_long_long_rem() function
has been _already_ in use on i386 for some time, therefore the safest fix
we can do at this point is adding generic support for all other archs.


> > You know what they say about premtur optmstns, and having to propagate
> > funky new divide primitives across N architectures is indeed evil.
>
> Hm.  I only want the simple div.  64-bit/32-bit in two 32-bit results.
>   Is this funky?  And the "evil" #ifdef allows archs to not do it.

The div_long_long_rem() interface is even somewhat cleaner than do_div() and
can be implemented easily on architectures providing a 64/32 -> 32q/32r operation
(most 32bit processors do have it).

> > Bernardo, can you do the patch please?

 I would be glad to do it once the discussion has settled, whatever
the final decision will be. Just don't make me do it twice, please ;-)

-- 
  // Bernardo Innocenti - Develer S.r.l., R&D dept.
\X/  http://www.develer.com/

Please don't send Word attachments - http://www.gnu.org/philosophy/no-word-attachments.html



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-16 18:33             ` Bernardo Innocenti
@ 2003-07-17 21:10               ` Bernardo Innocenti
  2003-07-17 21:16                 ` Andrew Morton
  2003-07-17 23:09                 ` george anzinger
  0 siblings, 2 replies; 15+ messages in thread
From: Bernardo Innocenti @ 2003-07-17 21:10 UTC (permalink / raw)
  To: george anzinger, Andrew Morton; +Cc: linux-kernel, rmk, torvalds

On Wednesday 16 July 2003 20:33, Bernardo Innocenti wrote:

> > > Bernardo, can you do the patch please?
>
>  I would be glad to do it once the discussion has settled, whatever
> the final decision will be. Just don't make me do it twice, please ;-)

So far nobody have commented and the problem is still unaddressed.
What shall I do? As far as I can tell, our options are:

1) add surrogates of div_long_long_rem() in asm-generic/div64.h and in
   all other archs that have their own optimized versions of do_div().
   I already have a patch for this, but it has been tested only on i386
   and m68knommu.

2) replace all uses of div_long_long_rem() (I see onlt 4 of them in
   2.6.0-test1) with do_div(). This is slightly less efficient, but
   easier to maintain in the long term.

I shall note that I _hate_ fixing compiler problems in the kernel. The
real fix I'm dreaming involves adding specialized patterns in GCC to
generate an optimal instruction sequence for all these cases.

Of course we should realize that we need to support older versions of
GCC and, even if we didn't, we can't wait for the next GCC release :-)

So, if we're going to live with do_div(), I think we could as well
have a set of macros for the most frequent cases. I've just spotted
another candidate in kernel/posix-timers.c: mpy_l_X_l_ll().

This is not a third option for fixing our immediate problem: it's
just an idea for future improvement.

Andrew, George, please comment.

-- 
  // Bernardo Innocenti - Develer S.r.l., R&D dept.
\X/  http://www.develer.com/

Please don't send Word attachments - http://www.gnu.org/philosophy/no-word-attachments.html



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-17 21:10               ` Bernardo Innocenti
@ 2003-07-17 21:16                 ` Andrew Morton
  2003-07-17 22:43                   ` Bernardo Innocenti
  2003-07-17 23:10                   ` george anzinger
  2003-07-17 23:09                 ` george anzinger
  1 sibling, 2 replies; 15+ messages in thread
From: Andrew Morton @ 2003-07-17 21:16 UTC (permalink / raw)
  To: Bernardo Innocenti; +Cc: george, linux-kernel, rmk, torvalds

Bernardo Innocenti <bernie@develer.com> wrote:
>
> 2) replace all uses of div_long_long_rem() (I see onlt 4 of them in
>    2.6.0-test1) with do_div(). This is slightly less efficient, but
>    easier to maintain in the long term.

Ths one's OK by me.  Let's just fix the bug with minimum risk and churn.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-17 21:16                 ` Andrew Morton
@ 2003-07-17 22:43                   ` Bernardo Innocenti
  2003-07-17 23:10                   ` george anzinger
  1 sibling, 0 replies; 15+ messages in thread
From: Bernardo Innocenti @ 2003-07-17 22:43 UTC (permalink / raw)
  To: Andrew Morton; +Cc: george, linux-kernel, rmk, torvalds

On Thursday 17 July 2003 23:16, Andrew Morton wrote:
> Bernardo Innocenti <bernie@develer.com> wrote:
> > 2) replace all uses of div_long_long_rem() (I see onlt 4 of them in
> >    2.6.0-test1) with do_div(). This is slightly less efficient, but
> >    easier to maintain in the long term.
>
> Ths one's OK by me.  Let's just fix the bug with minimum risk and churn.

Ok, I will be preparing a patch tomorrow unless there are more objections.

-- 
  // Bernardo Innocenti - Develer S.r.l., R&D dept.
\X/  http://www.develer.com/

Please don't send Word attachments - http://www.gnu.org/philosophy/no-word-attachments.html



^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-17 21:10               ` Bernardo Innocenti
  2003-07-17 21:16                 ` Andrew Morton
@ 2003-07-17 23:09                 ` george anzinger
  1 sibling, 0 replies; 15+ messages in thread
From: george anzinger @ 2003-07-17 23:09 UTC (permalink / raw)
  To: Bernardo Innocenti; +Cc: Andrew Morton, linux-kernel, rmk, torvalds

Bernardo Innocenti wrote:
> On Wednesday 16 July 2003 20:33, Bernardo Innocenti wrote:
> 
> 
>>>>Bernardo, can you do the patch please?
>>
>> I would be glad to do it once the discussion has settled, whatever
>>the final decision will be. Just don't make me do it twice, please ;-)
> 
> 
> So far nobody have commented and the problem is still unaddressed.
> What shall I do? As far as I can tell, our options are:
> 
> 1) add surrogates of div_long_long_rem() in asm-generic/div64.h and in
>    all other archs that have their own optimized versions of do_div().
>    I already have a patch for this, but it has been tested only on i386
>    and m68knommu.
> 
> 2) replace all uses of div_long_long_rem() (I see onlt 4 of them in
>    2.6.0-test1) with do_div(). This is slightly less efficient, but
>    easier to maintain in the long term.  

Actually, the macro to do this is already there.  Is there a real 
reason not to use it.  The using code sure looks cleaner with it.
> 
> I shall note that I _hate_ fixing compiler problems in the kernel. The
> real fix I'm dreaming involves adding specialized patterns in GCC to
> generate an optimal instruction sequence for all these cases.

I would love to get to the instruction via normal C.
> 
> Of course we should realize that we need to support older versions of
> GCC and, even if we didn't, we can't wait for the next GCC release :-)
> 
> So, if we're going to live with do_div(), I think we could as well
> have a set of macros for the most frequent cases. I've just spotted
> another candidate in kernel/posix-timers.c: mpy_l_X_l_ll().

The mpy_l_X_l_ll() is there because it is so easy to get it wrong.  It 
is standard C (well gcc) but if you don't get the casting just right 
it will throw away the high bits.
> 
> This is not a third option for fixing our immediate problem: it's
> just an idea for future improvement.
> 
> Andrew, George, please comment.

Is there any need to change any thing at all?  Or maybe a comment 
somewhere on what direction we would like things to go.

When I look at the div code on the risc machines I begin to really 
understand why gcc avoids the div instruction so actively.  (It 
optimizes away almost all divides by constants.)
> 

-- 
George Anzinger   george@mvista.com
High-res-timers:  http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-17 21:16                 ` Andrew Morton
  2003-07-17 22:43                   ` Bernardo Innocenti
@ 2003-07-17 23:10                   ` george anzinger
  2003-07-18  3:19                     ` Andrew Morton
  1 sibling, 1 reply; 15+ messages in thread
From: george anzinger @ 2003-07-17 23:10 UTC (permalink / raw)
  To: Andrew Morton; +Cc: Bernardo Innocenti, linux-kernel, rmk, torvalds

Andrew Morton wrote:
> Bernardo Innocenti <bernie@develer.com> wrote:
> 
>>2) replace all uses of div_long_long_rem() (I see onlt 4 of them in
>>   2.6.0-test1) with do_div(). This is slightly less efficient, but
>>   easier to maintain in the long term.
> 
> 
> Ths one's OK by me.  Let's just fix the bug with minimum risk and churn.

Uh, bug?  I was not aware that there was a bug.  As far as I know, 
nothing is broken.

> 
> 

-- 
George Anzinger   george@mvista.com
High-res-timers:  http://sourceforge.net/projects/high-res-timers/
Preemption patch: http://www.kernel.org/pub/linux/kernel/people/rml


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-17 23:10                   ` george anzinger
@ 2003-07-18  3:19                     ` Andrew Morton
  2003-07-18  4:14                       ` Bernardo Innocenti
  0 siblings, 1 reply; 15+ messages in thread
From: Andrew Morton @ 2003-07-18  3:19 UTC (permalink / raw)
  To: george anzinger; +Cc: bernie, linux-kernel, rmk, torvalds

george anzinger <george@mvista.com> wrote:
>
> > Ths one's OK by me.  Let's just fix the bug with minimum risk and churn.
> 
>  Uh, bug?  I was not aware that there was a bug.  As far as I know, 
>  nothing is broken.

wtf?  Then why are people running around brandishing big scary patches
at me?


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: do_div64 generic
  2003-07-18  3:19                     ` Andrew Morton
@ 2003-07-18  4:14                       ` Bernardo Innocenti
  0 siblings, 0 replies; 15+ messages in thread
From: Bernardo Innocenti @ 2003-07-18  4:14 UTC (permalink / raw)
  To: Andrew Morton; +Cc: george anzinger, linux-kernel, rmk, torvalds

Andrew Morton wrote:

>>>Ths one's OK by me.  Let's just fix the bug with minimum risk and churn.
>>
>> Uh, bug?  I was not aware that there was a bug.  As far as I know, 
>> nothing is broken.
> 
> wtf?  Then why are people running around brandishing big scary patches
> at me?

ROTFL! :-)

I've been asked by George to add generic support for the existing
div_long_long_rem() inline function.

The patch might be big, but not that much scary... I'm just replacing
two local redefinitions of the macro with a single inline in
asm-generic/div64.h. It's more a cleanup than a bug fix.

The patch also takes care of the few archs that wrote their own
optimized versions of do_div(). For those, I've implemented
div_long_long_rem() in terms of do_div() because it might still
be better than the generic version. I've left a FIXME there to
let the arch maintainers know that these could be better optimized.

And, as a special bonus, the patch also adds missing parentheses
around do_div() arguments in asm-arm/div64.h. This is the only real
bug being fixed.




^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2003-07-18  3:57 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <3F1360F4.2040602@mvista.com>
2003-07-15  5:17 ` do_div64 generic Bernardo Innocenti
2003-07-15  5:38   ` Andrew Morton
2003-07-15  6:23     ` Bernardo Innocenti
2003-07-15  6:30       ` Andrew Morton
2003-07-15 21:52       ` george anzinger
2003-07-15 22:06         ` Andrew Morton
2003-07-16  0:07           ` george anzinger
2003-07-16 18:33             ` Bernardo Innocenti
2003-07-17 21:10               ` Bernardo Innocenti
2003-07-17 21:16                 ` Andrew Morton
2003-07-17 22:43                   ` Bernardo Innocenti
2003-07-17 23:10                   ` george anzinger
2003-07-18  3:19                     ` Andrew Morton
2003-07-18  4:14                       ` Bernardo Innocenti
2003-07-17 23:09                 ` george anzinger

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).