linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] __div64_const32(): improve the generic C version
@ 2019-08-21  3:05 Nicolas Pitre
  2019-08-30  2:59 ` Nicolas Pitre
  2019-08-30 19:50 ` Arnd Bergmann
  0 siblings, 2 replies; 3+ messages in thread
From: Nicolas Pitre @ 2019-08-21  3:05 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: linux-kernel

Let's rework that code to avoid large immediate values and convert some
64-bit variables to 32-bit ones when possible. This allows gcc to
produce smaller and better code. This even produces optimal code on
RISC-V.

Signed-off-by: Nicolas Pitre <nico@fluxnic.net>

diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
index dc9726fdac..33358245b4 100644
--- a/include/asm-generic/div64.h
+++ b/include/asm-generic/div64.h
@@ -178,7 +178,8 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
 	uint32_t m_hi = m >> 32;
 	uint32_t n_lo = n;
 	uint32_t n_hi = n >> 32;
-	uint64_t res, tmp;
+	uint64_t res;
+	uint32_t res_lo, res_hi, tmp;
 
 	if (!bias) {
 		res = ((uint64_t)m_lo * n_lo) >> 32;
@@ -187,8 +188,9 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
 		res = (m + (uint64_t)m_lo * n_lo) >> 32;
 	} else {
 		res = m + (uint64_t)m_lo * n_lo;
-		tmp = (res < m) ? (1ULL << 32) : 0;
-		res = (res >> 32) + tmp;
+		res_lo = res >> 32;
+		res_hi = (res_lo < m_hi);
+		res = res_lo | ((uint64_t)res_hi << 32);
 	}
 
 	if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
@@ -197,10 +199,12 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
 		res += (uint64_t)m_hi * n_lo;
 		res >>= 32;
 	} else {
-		tmp = res += (uint64_t)m_lo * n_hi;
+		res += (uint64_t)m_lo * n_hi;
+		tmp = res >> 32;
 		res += (uint64_t)m_hi * n_lo;
-		tmp = (res < tmp) ? (1ULL << 32) : 0;
-		res = (res >> 32) + tmp;
+		res_lo = res >> 32;
+		res_hi = (res_lo < tmp);
+		res = res_lo | ((uint64_t)res_hi << 32);
 	}
 
 	res += (uint64_t)m_hi * n_hi;

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] __div64_const32(): improve the generic C version
  2019-08-21  3:05 [PATCH] __div64_const32(): improve the generic C version Nicolas Pitre
@ 2019-08-30  2:59 ` Nicolas Pitre
  2019-08-30 19:50 ` Arnd Bergmann
  1 sibling, 0 replies; 3+ messages in thread
From: Nicolas Pitre @ 2019-08-30  2:59 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: linux-kernel


Ping.

On Tue, 20 Aug 2019, Nicolas Pitre wrote:

> Let's rework that code to avoid large immediate values and convert some
> 64-bit variables to 32-bit ones when possible. This allows gcc to
> produce smaller and better code. This even produces optimal code on
> RISC-V.
> 
> Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
> 
> diff --git a/include/asm-generic/div64.h b/include/asm-generic/div64.h
> index dc9726fdac..33358245b4 100644
> --- a/include/asm-generic/div64.h
> +++ b/include/asm-generic/div64.h
> @@ -178,7 +178,8 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
>  	uint32_t m_hi = m >> 32;
>  	uint32_t n_lo = n;
>  	uint32_t n_hi = n >> 32;
> -	uint64_t res, tmp;
> +	uint64_t res;
> +	uint32_t res_lo, res_hi, tmp;
>  
>  	if (!bias) {
>  		res = ((uint64_t)m_lo * n_lo) >> 32;
> @@ -187,8 +188,9 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
>  		res = (m + (uint64_t)m_lo * n_lo) >> 32;
>  	} else {
>  		res = m + (uint64_t)m_lo * n_lo;
> -		tmp = (res < m) ? (1ULL << 32) : 0;
> -		res = (res >> 32) + tmp;
> +		res_lo = res >> 32;
> +		res_hi = (res_lo < m_hi);
> +		res = res_lo | ((uint64_t)res_hi << 32);
>  	}
>  
>  	if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
> @@ -197,10 +199,12 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
>  		res += (uint64_t)m_hi * n_lo;
>  		res >>= 32;
>  	} else {
> -		tmp = res += (uint64_t)m_lo * n_hi;
> +		res += (uint64_t)m_lo * n_hi;
> +		tmp = res >> 32;
>  		res += (uint64_t)m_hi * n_lo;
> -		tmp = (res < tmp) ? (1ULL << 32) : 0;
> -		res = (res >> 32) + tmp;
> +		res_lo = res >> 32;
> +		res_hi = (res_lo < tmp);
> +		res = res_lo | ((uint64_t)res_hi << 32);
>  	}
>  
>  	res += (uint64_t)m_hi * n_hi;
> 

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] __div64_const32(): improve the generic C version
  2019-08-21  3:05 [PATCH] __div64_const32(): improve the generic C version Nicolas Pitre
  2019-08-30  2:59 ` Nicolas Pitre
@ 2019-08-30 19:50 ` Arnd Bergmann
  1 sibling, 0 replies; 3+ messages in thread
From: Arnd Bergmann @ 2019-08-30 19:50 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: Linux Kernel Mailing List

On Wed, Aug 21, 2019 at 5:05 AM Nicolas Pitre <nico@fluxnic.net> wrote:
>
> Let's rework that code to avoid large immediate values and convert some
> 64-bit variables to 32-bit ones when possible. This allows gcc to
> produce smaller and better code. This even produces optimal code on
> RISC-V.
>
> Signed-off-by: Nicolas Pitre <nico@fluxnic.net>
>

Applied, thanks!

      Arnd

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2019-08-30 19:50 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-08-21  3:05 [PATCH] __div64_const32(): improve the generic C version Nicolas Pitre
2019-08-30  2:59 ` Nicolas Pitre
2019-08-30 19:50 ` Arnd Bergmann

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).