All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Jan Beulich <JBeulich@suse.com>,
	xen-devel <xen-devel@lists.xenproject.org>
Cc: Ian Campbell <Ian.Campbell@eu.citrix.com>,
	Keir Fraser <keir@xen.org>, Tim Deegan <tim@xen.org>,
	Ian Jackson <Ian.Jackson@eu.citrix.com>,
	Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Subject: Re: [PATCH 1/2] make fls() and ffs() consistent across architectures
Date: Fri, 23 Jan 2015 14:38:21 +0000	[thread overview]
Message-ID: <54C25CDD.4080200@citrix.com> (raw)
In-Reply-To: <54C10B4F0200007800058252@mail.emea.novell.com>

On 22/01/15 13:38, Jan Beulich wrote:
> Their parameter types differed between ARM and x86.
>
> Along with generalizing the functions this fixes
> - x86's non-long functions having long parameter types
> - ARM's ffs() using a long intermediate variable
> - generic_fls64() being broken when the upper half of the input is
>   non-zero
> - common (and in one case also ARM) code using fls() when flsl() was
>   meant
>
> Also drop ARM's constant_fls() in favor of the identical generic_fls().
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

The refactoring looks fine, so Reviewed-by: Andrew Cooper
<andrew.cooper3@citrix.com>

As for the x86 asm itself, what about
http://lkml.kernel.org/r/5058741E020000780009C014@nat28.tlf.novell.com ?
It would appear that the same optimisation is relevant for Xens __ffs()

~Andrew

>
> --- unstable.orig/xen/common/page_alloc.c	2014-11-07 17:16:38.000000000 +0100
> +++ unstable/xen/common/page_alloc.c	2015-01-21 08:53:14.000000000 +0100
> @@ -278,7 +278,7 @@ unsigned long __init alloc_boot_pages(
>  
>  #define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 1 : ((b) - PAGE_SHIFT))
>  #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN :  \
> -                          (fls(page_to_mfn(pg)) ? : 1))
> +                          (flsl(page_to_mfn(pg)) ? : 1))
>  
>  typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
>  static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
> @@ -1259,7 +1259,7 @@ void __init end_boot_allocator(void)
>      {
>  #ifdef CONFIG_X86
>          dma_bitsize = min_t(unsigned int,
> -                            fls(NODE_DATA(0)->node_spanned_pages) - 1
> +                            flsl(NODE_DATA(0)->node_spanned_pages) - 1
>                              + PAGE_SHIFT - 2,
>                              32);
>  #else
> @@ -1544,7 +1544,7 @@ static unsigned int __read_mostly xenhea
>  
>  void __init xenheap_max_mfn(unsigned long mfn)
>  {
> -    xenheap_bits = fls(mfn) + PAGE_SHIFT;
> +    xenheap_bits = flsl(mfn) + PAGE_SHIFT;
>  }
>  
>  void init_xenheap_pages(paddr_t ps, paddr_t pe)
> --- unstable.orig/xen/common/xmalloc_tlsf.c	2015-01-22 13:28:41.000000000 +0100
> +++ unstable/xen/common/xmalloc_tlsf.c	2015-01-21 08:53:05.000000000 +0100
> @@ -138,9 +138,9 @@ static inline void MAPPING_SEARCH(unsign
>      }
>      else
>      {
> -        t = (1 << (fls(*r) - 1 - MAX_LOG2_SLI)) - 1;
> +        t = (1 << (flsl(*r) - 1 - MAX_LOG2_SLI)) - 1;
>          *r = *r + t;
> -        *fl = fls(*r) - 1;
> +        *fl = flsl(*r) - 1;
>          *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
>          *fl -= FLI_OFFSET;
>          /*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0!
> @@ -164,7 +164,7 @@ static inline void MAPPING_INSERT(unsign
>      }
>      else
>      {
> -        *fl = fls(r) - 1;
> +        *fl = flsl(r) - 1;
>          *sl = (r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;
>          *fl -= FLI_OFFSET;
>      }
> --- unstable.orig/xen/include/asm-arm/arm32/bitops.h	2015-01-22 13:28:41.000000000 +0100
> +++ unstable/xen/include/asm-arm/arm32/bitops.h	2015-01-21 10:06:01.000000000 +0100
> @@ -15,6 +15,8 @@ extern int _test_and_change_bit(int nr, 
>  #define test_and_clear_bit(n,p)   _test_and_clear_bit(n,p)
>  #define test_and_change_bit(n,p)  _test_and_change_bit(n,p)
>  
> +#define flsl fls
> +
>  /*
>   * Little endian assembly bitops.  nr = 0 -> byte 0 bit 0.
>   */
> --- unstable.orig/xen/include/asm-arm/arm64/bitops.h	2015-01-22 13:28:41.000000000 +0100
> +++ unstable/xen/include/asm-arm/arm64/bitops.h	2015-01-21 08:28:43.000000000 +0100
> @@ -32,6 +32,17 @@ static /*__*/always_inline unsigned long
>   */
>  #define ffz(x)  __ffs(~(x))
>  
> +static inline int flsl(unsigned long x)
> +{
> +        int ret;
> +
> +        if (__builtin_constant_p(x))
> +               return generic_flsl(x);
> +
> +        asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
> +        return BITS_PER_LONG - ret;
> +}
> +
>  /* Based on linux/include/asm-generic/bitops/find.h */
>  
>  #ifndef find_next_bit
> --- unstable.orig/xen/include/asm-arm/bitops.h	2015-01-22 13:28:41.000000000 +0100
> +++ unstable/xen/include/asm-arm/bitops.h	2015-01-22 13:30:17.000000000 +0100
> @@ -99,46 +99,17 @@ static inline int test_bit(int nr, const
>          return 1UL & (p[BIT_WORD(nr)] >> (nr & (BITS_PER_WORD-1)));
>  }
>  
> -static inline int constant_fls(int x)
> -{
> -        int r = 32;
> -
> -        if (!x)
> -                return 0;
> -        if (!(x & 0xffff0000u)) {
> -                x <<= 16;
> -                r -= 16;
> -        }
> -        if (!(x & 0xff000000u)) {
> -                x <<= 8;
> -                r -= 8;
> -        }
> -        if (!(x & 0xf0000000u)) {
> -                x <<= 4;
> -                r -= 4;
> -        }
> -        if (!(x & 0xc0000000u)) {
> -                x <<= 2;
> -                r -= 2;
> -        }
> -        if (!(x & 0x80000000u)) {
> -                x <<= 1;
> -                r -= 1;
> -        }
> -        return r;
> -}
> -
>  /*
>   * On ARMv5 and above those functions can be implemented around
>   * the clz instruction for much better code efficiency.
>   */
>  
> -static inline int fls(int x)
> +static inline int fls(unsigned int x)
>  {
>          int ret;
>  
>          if (__builtin_constant_p(x))
> -               return constant_fls(x);
> +               return generic_fls(x);
>  
>          asm("clz\t%0, %1" : "=r" (ret) : "r" (x));
>          ret = BITS_PER_LONG - ret;
> @@ -146,7 +117,8 @@ static inline int fls(int x)
>  }
>  
>  
> -#define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); })
> +#define ffs(x) ({ unsigned int __t = (x); fls(__t & -__t); })
> +#define ffsl(x) ({ unsigned long __t = (x); flsl(__t & -__t); })
>  
>  /**
>   * find_first_set_bit - find the first set bit in @word
> @@ -157,7 +129,7 @@ static inline int fls(int x)
>   */
>  static inline unsigned int find_first_set_bit(unsigned long word)
>  {
> -        return ffs(word) - 1;
> +        return ffsl(word) - 1;
>  }
>  
>  /**
> --- unstable.orig/xen/include/asm-x86/bitops.h	2014-09-15 15:42:35.000000000 +0200
> +++ unstable/xen/include/asm-x86/bitops.h	2015-01-22 13:30:02.000000000 +0100
> @@ -401,7 +401,7 @@ static inline unsigned int find_first_se
>   *
>   * This is defined the same way as the libc and compiler builtin ffs routines.
>   */
> -static inline int ffs(unsigned long x)
> +static inline int ffsl(unsigned long x)
>  {
>      long r;
>  
> @@ -412,13 +412,24 @@ static inline int ffs(unsigned long x)
>      return (int)r+1;
>  }
>  
> +static inline int ffs(unsigned int x)
> +{
> +    int r;
> +
> +    asm ( "bsf %1,%0\n\t"
> +          "jnz 1f\n\t"
> +          "mov $-1,%0\n"
> +          "1:" : "=r" (r) : "rm" (x));
> +    return r + 1;
> +}
> +
>  /**
>   * fls - find last bit set
>   * @x: the word to search
>   *
>   * This is defined the same way as ffs.
>   */
> -static inline int fls(unsigned long x)
> +static inline int flsl(unsigned long x)
>  {
>      long r;
>  
> @@ -429,8 +440,16 @@ static inline int fls(unsigned long x)
>      return (int)r+1;
>  }
>  
> -#define ffs64 ffs
> -#define fls64 fls
> +static inline int fls(unsigned int x)
> +{
> +    int r;
> +
> +    asm ( "bsr %1,%0\n\t"
> +          "jnz 1f\n\t"
> +          "mov $-1,%0\n"
> +          "1:" : "=r" (r) : "rm" (x));
> +    return r + 1;
> +}
>  
>  /**
>   * hweightN - returns the hamming weight of a N-bit word
> --- unstable.orig/xen/include/xen/bitops.h	2015-01-22 13:28:41.000000000 +0100
> +++ unstable/xen/include/xen/bitops.h	2015-01-22 13:30:46.000000000 +0100
> @@ -70,20 +70,52 @@ static __inline__ int generic_fls(int x)
>      return r;
>  }
>  
> +#if BITS_PER_LONG == 64
> +
> +static inline int generic_ffsl(unsigned long x)
> +{
> +    return !x || (u32)x ? generic_ffs(x) : generic_ffs(x >> 32) + 32;
> +}
> +
> +static inline int generic_flsl(unsigned long x)
> +{
> +    u32 h = x >> 32;
> +
> +    return h ? generic_fls(h) + 32 : generic_fls(x);
> +}
> +
> +#else
> +# define generic_ffsl generic_ffs
> +# define generic_flsl generic_fls
> +#endif
> +
>  /*
>   * Include this here because some architectures need generic_ffs/fls in
>   * scope
>   */
>  #include <asm/bitops.h>
>  
> -
> +#if BITS_PER_LONG == 64
> +# define fls64 flsl
> +# define ffs64 ffsl
> +#else
> +# ifndef ffs64
> +static inline int generic_ffs64(__u64 x)
> +{
> +    return !x || (__u32)x ? ffs(x) : ffs(x >> 32) + 32;
> +}
> +#  define ffs64 generic_ffs64
> +# endif
> +# ifndef fls64
>  static inline int generic_fls64(__u64 x)
>  {
>      __u32 h = x >> 32;
> -    if (h)
> -        return fls(x) + 32;
> -    return fls(x);
> +
> +    return h ? fls(h) + 32 : fls(x);
>  }
> +#  define fls64 generic_fls64
> +# endif
> +#endif
>  
>  static __inline__ int get_bitmask_order(unsigned int count)
>  {
>
>

  reply	other threads:[~2015-01-23 14:38 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-01-22 13:26 [PATCH 0/2] fls() / ffs() adjustments Jan Beulich
2015-01-22 13:38 ` [PATCH 1/2] make fls() and ffs() consistent across architectures Jan Beulich
2015-01-23 14:38   ` Andrew Cooper [this message]
2015-01-23 14:53     ` Jan Beulich
2015-01-22 13:38 ` [PATCH 2/2] arm64: fix fls() Jan Beulich
2015-01-22 14:01   ` Tim Deegan
2015-01-22 14:05     ` Jan Beulich
2015-01-22 14:29       ` Tim Deegan
2015-01-22 14:59 ` [PATCH 0/2] fls() / ffs() adjustments Ian Campbell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=54C25CDD.4080200@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=Ian.Campbell@eu.citrix.com \
    --cc=Ian.Jackson@eu.citrix.com \
    --cc=JBeulich@suse.com \
    --cc=keir@xen.org \
    --cc=stefano.stabellini@eu.citrix.com \
    --cc=tim@xen.org \
    --cc=xen-devel@lists.xenproject.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.