From mboxrd@z Thu Jan 1 00:00:00 1970 From: Andrew Cooper Subject: Re: [PATCH 1/2] make fls() and ffs() consistent across architectures Date: Fri, 23 Jan 2015 14:38:21 +0000 Message-ID: <54C25CDD.4080200@citrix.com> References: <54C108910200007800058233@mail.emea.novell.com> <54C10B4F0200007800058252@mail.emea.novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit Return-path: Received: from mail6.bemta5.messagelabs.com ([195.245.231.135]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1YEfNV-0002R3-0m for xen-devel@lists.xenproject.org; Fri, 23 Jan 2015 14:38:29 +0000 In-Reply-To: <54C10B4F0200007800058252@mail.emea.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: Jan Beulich , xen-devel Cc: Ian Campbell , Keir Fraser , Tim Deegan , Ian Jackson , Stefano Stabellini List-Id: xen-devel@lists.xenproject.org On 22/01/15 13:38, Jan Beulich wrote: > Their parameter types differed between ARM and x86. > > Along with generalizing the functions this fixes > - x86's non-long functions having long parameter types > - ARM's ffs() using a long intermediate variable > - generic_fls64() being broken when the upper half of the input is > non-zero > - common (and in one case also ARM) code using fls() when flsl() was > meant > > Also drop ARM's constant_fls() in favor of the identical generic_fls(). > > Signed-off-by: Jan Beulich The refactoring looks fine, so Reviewed-by: Andrew Cooper As for the x86 asm itself, what about http://lkml.kernel.org/r/5058741E020000780009C014@nat28.tlf.novell.com ? It would appear that the same optimisation is relevant for Xens __ffs() ~Andrew > > --- unstable.orig/xen/common/page_alloc.c 2014-11-07 17:16:38.000000000 +0100 > +++ unstable/xen/common/page_alloc.c 2015-01-21 08:53:14.000000000 +0100 > @@ -278,7 +278,7 @@ unsigned long __init alloc_boot_pages( > > #define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 1 : ((b) - PAGE_SHIFT)) > #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \ > - (fls(page_to_mfn(pg)) ? : 1)) > + (flsl(page_to_mfn(pg)) ? : 1)) > > typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1]; > static heap_by_zone_and_order_t *_heap[MAX_NUMNODES]; > @@ -1259,7 +1259,7 @@ void __init end_boot_allocator(void) > { > #ifdef CONFIG_X86 > dma_bitsize = min_t(unsigned int, > - fls(NODE_DATA(0)->node_spanned_pages) - 1 > + flsl(NODE_DATA(0)->node_spanned_pages) - 1 > + PAGE_SHIFT - 2, > 32); > #else > @@ -1544,7 +1544,7 @@ static unsigned int __read_mostly xenhea > > void __init xenheap_max_mfn(unsigned long mfn) > { > - xenheap_bits = fls(mfn) + PAGE_SHIFT; > + xenheap_bits = flsl(mfn) + PAGE_SHIFT; > } > > void init_xenheap_pages(paddr_t ps, paddr_t pe) > --- unstable.orig/xen/common/xmalloc_tlsf.c 2015-01-22 13:28:41.000000000 +0100 > +++ unstable/xen/common/xmalloc_tlsf.c 2015-01-21 08:53:05.000000000 +0100 > @@ -138,9 +138,9 @@ static inline void MAPPING_SEARCH(unsign > } > else > { > - t = (1 << (fls(*r) - 1 - MAX_LOG2_SLI)) - 1; > + t = (1 << (flsl(*r) - 1 - MAX_LOG2_SLI)) - 1; > *r = *r + t; > - *fl = fls(*r) - 1; > + *fl = flsl(*r) - 1; > *sl = (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; > *fl -= FLI_OFFSET; > /*if ((*fl -= FLI_OFFSET) < 0) // FL will be always >0! > @@ -164,7 +164,7 @@ static inline void MAPPING_INSERT(unsign > } > else > { > - *fl = fls(r) - 1; > + *fl = flsl(r) - 1; > *sl = (r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; > *fl -= FLI_OFFSET; > } > --- unstable.orig/xen/include/asm-arm/arm32/bitops.h 2015-01-22 13:28:41.000000000 +0100 > +++ unstable/xen/include/asm-arm/arm32/bitops.h 2015-01-21 10:06:01.000000000 +0100 > @@ -15,6 +15,8 @@ extern int _test_and_change_bit(int nr, > #define test_and_clear_bit(n,p) _test_and_clear_bit(n,p) > #define test_and_change_bit(n,p) _test_and_change_bit(n,p) > > +#define flsl fls > + > /* > * Little endian assembly bitops. nr = 0 -> byte 0 bit 0. > */ > --- unstable.orig/xen/include/asm-arm/arm64/bitops.h 2015-01-22 13:28:41.000000000 +0100 > +++ unstable/xen/include/asm-arm/arm64/bitops.h 2015-01-21 08:28:43.000000000 +0100 > @@ -32,6 +32,17 @@ static /*__*/always_inline unsigned long > */ > #define ffz(x) __ffs(~(x)) > > +static inline int flsl(unsigned long x) > +{ > + int ret; > + > + if (__builtin_constant_p(x)) > + return generic_flsl(x); > + > + asm("clz\t%0, %1" : "=r" (ret) : "r" (x)); > + return BITS_PER_LONG - ret; > +} > + > /* Based on linux/include/asm-generic/bitops/find.h */ > > #ifndef find_next_bit > --- unstable.orig/xen/include/asm-arm/bitops.h 2015-01-22 13:28:41.000000000 +0100 > +++ unstable/xen/include/asm-arm/bitops.h 2015-01-22 13:30:17.000000000 +0100 > @@ -99,46 +99,17 @@ static inline int test_bit(int nr, const > return 1UL & (p[BIT_WORD(nr)] >> (nr & (BITS_PER_WORD-1))); > } > > -static inline int constant_fls(int x) > -{ > - int r = 32; > - > - if (!x) > - return 0; > - if (!(x & 0xffff0000u)) { > - x <<= 16; > - r -= 16; > - } > - if (!(x & 0xff000000u)) { > - x <<= 8; > - r -= 8; > - } > - if (!(x & 0xf0000000u)) { > - x <<= 4; > - r -= 4; > - } > - if (!(x & 0xc0000000u)) { > - x <<= 2; > - r -= 2; > - } > - if (!(x & 0x80000000u)) { > - x <<= 1; > - r -= 1; > - } > - return r; > -} > - > /* > * On ARMv5 and above those functions can be implemented around > * the clz instruction for much better code efficiency. > */ > > -static inline int fls(int x) > +static inline int fls(unsigned int x) > { > int ret; > > if (__builtin_constant_p(x)) > - return constant_fls(x); > + return generic_fls(x); > > asm("clz\t%0, %1" : "=r" (ret) : "r" (x)); > ret = BITS_PER_LONG - ret; > @@ -146,7 +117,8 @@ static inline int fls(int x) > } > > > -#define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); }) > +#define ffs(x) ({ unsigned int __t = (x); fls(__t & -__t); }) > +#define ffsl(x) ({ unsigned long __t = (x); flsl(__t & -__t); }) > > /** > * find_first_set_bit - find the first set bit in @word > @@ -157,7 +129,7 @@ static inline int fls(int x) > */ > static inline unsigned int find_first_set_bit(unsigned long word) > { > - return ffs(word) - 1; > + return ffsl(word) - 1; > } > > /** > --- unstable.orig/xen/include/asm-x86/bitops.h 2014-09-15 15:42:35.000000000 +0200 > +++ unstable/xen/include/asm-x86/bitops.h 2015-01-22 13:30:02.000000000 +0100 > @@ -401,7 +401,7 @@ static inline unsigned int find_first_se > * > * This is defined the same way as the libc and compiler builtin ffs routines. > */ > -static inline int ffs(unsigned long x) > +static inline int ffsl(unsigned long x) > { > long r; > > @@ -412,13 +412,24 @@ static inline int ffs(unsigned long x) > return (int)r+1; > } > > +static inline int ffs(unsigned int x) > +{ > + int r; > + > + asm ( "bsf %1,%0\n\t" > + "jnz 1f\n\t" > + "mov $-1,%0\n" > + "1:" : "=r" (r) : "rm" (x)); > + return r + 1; > +} > + > /** > * fls - find last bit set > * @x: the word to search > * > * This is defined the same way as ffs. > */ > -static inline int fls(unsigned long x) > +static inline int flsl(unsigned long x) > { > long r; > > @@ -429,8 +440,16 @@ static inline int fls(unsigned long x) > return (int)r+1; > } > > -#define ffs64 ffs > -#define fls64 fls > +static inline int fls(unsigned int x) > +{ > + int r; > + > + asm ( "bsr %1,%0\n\t" > + "jnz 1f\n\t" > + "mov $-1,%0\n" > + "1:" : "=r" (r) : "rm" (x)); > + return r + 1; > +} > > /** > * hweightN - returns the hamming weight of a N-bit word > --- unstable.orig/xen/include/xen/bitops.h 2015-01-22 13:28:41.000000000 +0100 > +++ unstable/xen/include/xen/bitops.h 2015-01-22 13:30:46.000000000 +0100 > @@ -70,20 +70,52 @@ static __inline__ int generic_fls(int x) > return r; > } > > +#if BITS_PER_LONG == 64 > + > +static inline int generic_ffsl(unsigned long x) > +{ > + return !x || (u32)x ? generic_ffs(x) : generic_ffs(x >> 32) + 32; > +} > + > +static inline int generic_flsl(unsigned long x) > +{ > + u32 h = x >> 32; > + > + return h ? generic_fls(h) + 32 : generic_fls(x); > +} > + > +#else > +# define generic_ffsl generic_ffs > +# define generic_flsl generic_fls > +#endif > + > /* > * Include this here because some architectures need generic_ffs/fls in > * scope > */ > #include > > - > +#if BITS_PER_LONG == 64 > +# define fls64 flsl > +# define ffs64 ffsl > +#else > +# ifndef ffs64 > +static inline int generic_ffs64(__u64 x) > +{ > + return !x || (__u32)x ? ffs(x) : ffs(x >> 32) + 32; > +} > +# define ffs64 generic_ffs64 > +# endif > +# ifndef fls64 > static inline int generic_fls64(__u64 x) > { > __u32 h = x >> 32; > - if (h) > - return fls(x) + 32; > - return fls(x); > + > + return h ? fls(h) + 32 : fls(x); > } > +# define fls64 generic_fls64 > +# endif > +#endif > > static __inline__ int get_bitmask_order(unsigned int count) > { > >