From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH 1/2] make fls() and ffs() consistent across architectures Date: Thu, 22 Jan 2015 13:38:07 +0000 Message-ID: <54C10B4F0200007800058252@mail.emea.novell.com> References: <54C108910200007800058233@mail.emea.novell.com> Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="=__Part2E1BB72F.2__=" Return-path: Received: from mail6.bemta4.messagelabs.com ([85.158.143.247]) by lists.xen.org with esmtp (Exim 4.72) (envelope-from ) id 1YEHxa-00032U-AA for xen-devel@lists.xenproject.org; Thu, 22 Jan 2015 13:38:10 +0000 In-Reply-To: <54C108910200007800058233@mail.emea.novell.com> List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xen.org Errors-To: xen-devel-bounces@lists.xen.org To: xen-devel Cc: Keir Fraser , Stefano Stabellini , Andrew Cooper , Tim Deegan , Ian Campbell , Ian Jackson List-Id: xen-devel@lists.xenproject.org This is a MIME message. If you are reading this text, you may want to consider changing to a mail reader or gateway that understands how to properly handle MIME multipart messages. --=__Part2E1BB72F.2__= Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Content-Disposition: inline Their parameter types differed between ARM and x86. Along with generalizing the functions this fixes - x86's non-long functions having long parameter types - ARM's ffs() using a long intermediate variable - generic_fls64() being broken when the upper half of the input is non-zero - common (and in one case also ARM) code using fls() when flsl() was meant Also drop ARM's constant_fls() in favor of the identical generic_fls(). Signed-off-by: Jan Beulich --- unstable.orig/xen/common/page_alloc.c 2014-11-07 17:16:38.0000000= 00 +0100 +++ unstable/xen/common/page_alloc.c 2015-01-21 08:53:14.000000000 = +0100 @@ -278,7 +278,7 @@ unsigned long __init alloc_boot_pages( =20 #define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 1 : ((b) - PAGE_SHIFT)= ) #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \ - (fls(page_to_mfn(pg)) ? : 1)) + (flsl(page_to_mfn(pg)) ? : 1)) =20 typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER= +1]; static heap_by_zone_and_order_t *_heap[MAX_NUMNODES]; @@ -1259,7 +1259,7 @@ void __init end_boot_allocator(void) { #ifdef CONFIG_X86 dma_bitsize =3D min_t(unsigned int, - fls(NODE_DATA(0)->node_spanned_pages) - 1 + flsl(NODE_DATA(0)->node_spanned_pages) - 1 + PAGE_SHIFT - 2, 32); #else @@ -1544,7 +1544,7 @@ static unsigned int __read_mostly xenhea =20 void __init xenheap_max_mfn(unsigned long mfn) { - xenheap_bits =3D fls(mfn) + PAGE_SHIFT; + xenheap_bits =3D flsl(mfn) + PAGE_SHIFT; } =20 void init_xenheap_pages(paddr_t ps, paddr_t pe) --- unstable.orig/xen/common/xmalloc_tlsf.c 2015-01-22 13:28:41.0000000= 00 +0100 +++ unstable/xen/common/xmalloc_tlsf.c 2015-01-21 08:53:05.000000000 = +0100 @@ -138,9 +138,9 @@ static inline void MAPPING_SEARCH(unsign } else { - t =3D (1 << (fls(*r) - 1 - MAX_LOG2_SLI)) - 1; + t =3D (1 << (flsl(*r) - 1 - MAX_LOG2_SLI)) - 1; *r =3D *r + t; - *fl =3D fls(*r) - 1; + *fl =3D flsl(*r) - 1; *sl =3D (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; *fl -=3D FLI_OFFSET; /*if ((*fl -=3D FLI_OFFSET) < 0) // FL will be always >0! @@ -164,7 +164,7 @@ static inline void MAPPING_INSERT(unsign } else { - *fl =3D fls(r) - 1; + *fl =3D flsl(r) - 1; *sl =3D (r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI; *fl -=3D FLI_OFFSET; } --- unstable.orig/xen/include/asm-arm/arm32/bitops.h 2015-01-22 = 13:28:41.000000000 +0100 +++ unstable/xen/include/asm-arm/arm32/bitops.h 2015-01-21 10:06:01.0000000= 00 +0100 @@ -15,6 +15,8 @@ extern int _test_and_change_bit(int nr,=20 #define test_and_clear_bit(n,p) _test_and_clear_bit(n,p) #define test_and_change_bit(n,p) _test_and_change_bit(n,p) =20 +#define flsl fls + /* * Little endian assembly bitops. nr =3D 0 -> byte 0 bit 0. */ --- unstable.orig/xen/include/asm-arm/arm64/bitops.h 2015-01-22 = 13:28:41.000000000 +0100 +++ unstable/xen/include/asm-arm/arm64/bitops.h 2015-01-21 08:28:43.0000000= 00 +0100 @@ -32,6 +32,17 @@ static /*__*/always_inline unsigned long */ #define ffz(x) __ffs(~(x)) =20 +static inline int flsl(unsigned long x) +{ + int ret; + + if (__builtin_constant_p(x)) + return generic_flsl(x); + + asm("clz\t%0, %1" : "=3Dr" (ret) : "r" (x)); + return BITS_PER_LONG - ret; +} + /* Based on linux/include/asm-generic/bitops/find.h */ =20 #ifndef find_next_bit --- unstable.orig/xen/include/asm-arm/bitops.h 2015-01-22 13:28:41.0000000= 00 +0100 +++ unstable/xen/include/asm-arm/bitops.h 2015-01-22 13:30:17.0000000= 00 +0100 @@ -99,46 +99,17 @@ static inline int test_bit(int nr, const return 1UL & (p[BIT_WORD(nr)] >> (nr & (BITS_PER_WORD-1))); } =20 -static inline int constant_fls(int x) -{ - int r =3D 32; - - if (!x) - return 0; - if (!(x & 0xffff0000u)) { - x <<=3D 16; - r -=3D 16; - } - if (!(x & 0xff000000u)) { - x <<=3D 8; - r -=3D 8; - } - if (!(x & 0xf0000000u)) { - x <<=3D 4; - r -=3D 4; - } - if (!(x & 0xc0000000u)) { - x <<=3D 2; - r -=3D 2; - } - if (!(x & 0x80000000u)) { - x <<=3D 1; - r -=3D 1; - } - return r; -} - /* * On ARMv5 and above those functions can be implemented around * the clz instruction for much better code efficiency. */ =20 -static inline int fls(int x) +static inline int fls(unsigned int x) { int ret; =20 if (__builtin_constant_p(x)) - return constant_fls(x); + return generic_fls(x); =20 asm("clz\t%0, %1" : "=3Dr" (ret) : "r" (x)); ret =3D BITS_PER_LONG - ret; @@ -146,7 +117,8 @@ static inline int fls(int x) } =20 =20 -#define ffs(x) ({ unsigned long __t =3D (x); fls(__t & -__t); }) +#define ffs(x) ({ unsigned int __t =3D (x); fls(__t & -__t); }) +#define ffsl(x) ({ unsigned long __t =3D (x); flsl(__t & -__t); }) =20 /** * find_first_set_bit - find the first set bit in @word @@ -157,7 +129,7 @@ static inline int fls(int x) */ static inline unsigned int find_first_set_bit(unsigned long word) { - return ffs(word) - 1; + return ffsl(word) - 1; } =20 /** --- unstable.orig/xen/include/asm-x86/bitops.h 2014-09-15 15:42:35.0000000= 00 +0200 +++ unstable/xen/include/asm-x86/bitops.h 2015-01-22 13:30:02.0000000= 00 +0100 @@ -401,7 +401,7 @@ static inline unsigned int find_first_se * * This is defined the same way as the libc and compiler builtin ffs = routines. */ -static inline int ffs(unsigned long x) +static inline int ffsl(unsigned long x) { long r; =20 @@ -412,13 +412,24 @@ static inline int ffs(unsigned long x) return (int)r+1; } =20 +static inline int ffs(unsigned int x) +{ + int r; + + asm ( "bsf %1,%0\n\t" + "jnz 1f\n\t" + "mov $-1,%0\n" + "1:" : "=3Dr" (r) : "rm" (x)); + return r + 1; +} + /** * fls - find last bit set * @x: the word to search * * This is defined the same way as ffs. */ -static inline int fls(unsigned long x) +static inline int flsl(unsigned long x) { long r; =20 @@ -429,8 +440,16 @@ static inline int fls(unsigned long x) return (int)r+1; } =20 -#define ffs64 ffs -#define fls64 fls +static inline int fls(unsigned int x) +{ + int r; + + asm ( "bsr %1,%0\n\t" + "jnz 1f\n\t" + "mov $-1,%0\n" + "1:" : "=3Dr" (r) : "rm" (x)); + return r + 1; +} =20 /** * hweightN - returns the hamming weight of a N-bit word --- unstable.orig/xen/include/xen/bitops.h 2015-01-22 13:28:41.0000000= 00 +0100 +++ unstable/xen/include/xen/bitops.h 2015-01-22 13:30:46.000000000 = +0100 @@ -70,20 +70,52 @@ static __inline__ int generic_fls(int x) return r; } =20 +#if BITS_PER_LONG =3D=3D 64 + +static inline int generic_ffsl(unsigned long x) +{ + return !x || (u32)x ? generic_ffs(x) : generic_ffs(x >> 32) + 32; +} + +static inline int generic_flsl(unsigned long x) +{ + u32 h =3D x >> 32; + + return h ? generic_fls(h) + 32 : generic_fls(x); +} + +#else +# define generic_ffsl generic_ffs +# define generic_flsl generic_fls +#endif + /* * Include this here because some architectures need generic_ffs/fls in * scope */ #include =20 - +#if BITS_PER_LONG =3D=3D 64 +# define fls64 flsl +# define ffs64 ffsl +#else +# ifndef ffs64 +static inline int generic_ffs64(__u64 x) +{ + return !x || (__u32)x ? ffs(x) : ffs(x >> 32) + 32; +} +# define ffs64 generic_ffs64 +# endif +# ifndef fls64 static inline int generic_fls64(__u64 x) { __u32 h =3D x >> 32; - if (h) - return fls(x) + 32; - return fls(x); + + return h ? fls(h) + 32 : fls(x); } +# define fls64 generic_fls64 +# endif +#endif =20 static __inline__ int get_bitmask_order(unsigned int count) { --=__Part2E1BB72F.2__= Content-Type: text/plain; name="fls-ffs.patch" Content-Transfer-Encoding: quoted-printable Content-Disposition: attachment; filename="fls-ffs.patch" make fls() and ffs() consistent across architectures=0A=0ATheir parameter = types differed between ARM and x86.=0A=0AAlong with generalizing the = functions this fixes=0A- x86's non-long functions having long parameter = types=0A- ARM's ffs() using a long intermediate variable=0A- generic_fls64(= ) being broken when the upper half of the input is=0A non-zero=0A- common = (and in one case also ARM) code using fls() when flsl() was=0A meant=0A=0A= Also drop ARM's constant_fls() in favor of the identical generic_fls().=0A= =0ASigned-off-by: Jan Beulich =0A=0A--- unstable.orig/xe= n/common/page_alloc.c 2014-11-07 17:16:38.000000000 +0100=0A+++ = unstable/xen/common/page_alloc.c 2015-01-21 08:53:14.000000000 = +0100=0A@@ -278,7 +278,7 @@ unsigned long __init alloc_boot_pages(=0A =0A = #define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 1 : ((b) - PAGE_SHIFT))= =0A #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \=0A- = (fls(page_to_mfn(pg)) ? : 1))=0A+ = (flsl(page_to_mfn(pg)) ? : 1))=0A =0A typedef struct page_list_head= heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];=0A static heap_by_zone_an= d_order_t *_heap[MAX_NUMNODES];=0A@@ -1259,7 +1259,7 @@ void __init = end_boot_allocator(void)=0A {=0A #ifdef CONFIG_X86=0A = dma_bitsize =3D min_t(unsigned int,=0A- = fls(NODE_DATA(0)->node_spanned_pages) - 1=0A+ = flsl(NODE_DATA(0)->node_spanned_pages) - 1=0A = + PAGE_SHIFT - 2,=0A 32);=0A #else=0A@@ = -1544,7 +1544,7 @@ static unsigned int __read_mostly xenhea=0A =0A void = __init xenheap_max_mfn(unsigned long mfn)=0A {=0A- xenheap_bits =3D = fls(mfn) + PAGE_SHIFT;=0A+ xenheap_bits =3D flsl(mfn) + PAGE_SHIFT;=0A = }=0A =0A void init_xenheap_pages(paddr_t ps, paddr_t pe)=0A--- unstable.ori= g/xen/common/xmalloc_tlsf.c 2015-01-22 13:28:41.000000000 +0100=0A+++ = unstable/xen/common/xmalloc_tlsf.c 2015-01-21 08:53:05.000000000 = +0100=0A@@ -138,9 +138,9 @@ static inline void MAPPING_SEARCH(unsign=0A = }=0A else=0A {=0A- t =3D (1 << (fls(*r) - 1 - MAX_LOG2_SLI)= ) - 1;=0A+ t =3D (1 << (flsl(*r) - 1 - MAX_LOG2_SLI)) - 1;=0A = *r =3D *r + t;=0A- *fl =3D fls(*r) - 1;=0A+ *fl =3D = flsl(*r) - 1;=0A *sl =3D (*r >> (*fl - MAX_LOG2_SLI)) - MAX_SLI;=0A= *fl -=3D FLI_OFFSET;=0A /*if ((*fl -=3D FLI_OFFSET) < 0) = // FL will be always >0!=0A@@ -164,7 +164,7 @@ static inline void = MAPPING_INSERT(unsign=0A }=0A else=0A {=0A- *fl =3D = fls(r) - 1;=0A+ *fl =3D flsl(r) - 1;=0A *sl =3D (r >> (*fl = - MAX_LOG2_SLI)) - MAX_SLI;=0A *fl -=3D FLI_OFFSET;=0A }=0A--- = unstable.orig/xen/include/asm-arm/arm32/bitops.h 2015-01-22 = 13:28:41.000000000 +0100=0A+++ unstable/xen/include/asm-arm/arm32/bitops.h = 2015-01-21 10:06:01.000000000 +0100=0A@@ -15,6 +15,8 @@ extern int = _test_and_change_bit(int nr, =0A #define test_and_clear_bit(n,p) = _test_and_clear_bit(n,p)=0A #define test_and_change_bit(n,p) _test_and_cha= nge_bit(n,p)=0A =0A+#define flsl fls=0A+=0A /*=0A * Little endian = assembly bitops. nr =3D 0 -> byte 0 bit 0.=0A */=0A--- unstable.orig/xen/= include/asm-arm/arm64/bitops.h 2015-01-22 13:28:41.000000000 +0100=0A+++ = unstable/xen/include/asm-arm/arm64/bitops.h 2015-01-21 08:28:43.0000000= 00 +0100=0A@@ -32,6 +32,17 @@ static /*__*/always_inline unsigned long=0A = */=0A #define ffz(x) __ffs(~(x))=0A =0A+static inline int flsl(unsigned = long x)=0A+{=0A+ int ret;=0A+=0A+ if (__builtin_constant_p(x)= )=0A+ return generic_flsl(x);=0A+=0A+ asm("clz\t%0, = %1" : "=3Dr" (ret) : "r" (x));=0A+ return BITS_PER_LONG - = ret;=0A+}=0A+=0A /* Based on linux/include/asm-generic/bitops/find.h */=0A = =0A #ifndef find_next_bit=0A--- unstable.orig/xen/include/asm-arm/bitops.h = 2015-01-22 13:28:41.000000000 +0100=0A+++ unstable/xen/include/asm-arm/bito= ps.h 2015-01-22 13:30:17.000000000 +0100=0A@@ -99,46 +99,17 @@ static = inline int test_bit(int nr, const=0A return 1UL & (p[BIT_WORD(nr)] = >> (nr & (BITS_PER_WORD-1)));=0A }=0A =0A-static inline int constant_fls(in= t x)=0A-{=0A- int r =3D 32;=0A-=0A- if (!x)=0A- = return 0;=0A- if (!(x & 0xffff0000u)) {=0A- x = <<=3D 16;=0A- r -=3D 16;=0A- }=0A- if (!(x & = 0xff000000u)) {=0A- x <<=3D 8;=0A- r -=3D = 8;=0A- }=0A- if (!(x & 0xf0000000u)) {=0A- x = <<=3D 4;=0A- r -=3D 4;=0A- }=0A- if (!(x & = 0xc0000000u)) {=0A- x <<=3D 2;=0A- r -=3D = 2;=0A- }=0A- if (!(x & 0x80000000u)) {=0A- x = <<=3D 1;=0A- r -=3D 1;=0A- }=0A- return = r;=0A-}=0A-=0A /*=0A * On ARMv5 and above those functions can be = implemented around=0A * the clz instruction for much better code = efficiency.=0A */=0A =0A-static inline int fls(int x)=0A+static inline = int fls(unsigned int x)=0A {=0A int ret;=0A =0A if = (__builtin_constant_p(x))=0A- return constant_fls(x);=0A+ = return generic_fls(x);=0A =0A asm("clz\t%0, %1" : = "=3Dr" (ret) : "r" (x));=0A ret =3D BITS_PER_LONG - ret;=0A@@ = -146,7 +117,8 @@ static inline int fls(int x)=0A }=0A =0A =0A-#define = ffs(x) ({ unsigned long __t =3D (x); fls(__t & -__t); })=0A+#define ffs(x) = ({ unsigned int __t =3D (x); fls(__t & -__t); })=0A+#define ffsl(x) ({ = unsigned long __t =3D (x); flsl(__t & -__t); })=0A =0A /**=0A * find_first= _set_bit - find the first set bit in @word=0A@@ -157,7 +129,7 @@ static = inline int fls(int x)=0A */=0A static inline unsigned int find_first_set_b= it(unsigned long word)=0A {=0A- return ffs(word) - 1;=0A+ = return ffsl(word) - 1;=0A }=0A =0A /**=0A--- unstable.orig/xen/include/asm-= x86/bitops.h 2014-09-15 15:42:35.000000000 +0200=0A+++ unstable/xen/incl= ude/asm-x86/bitops.h 2015-01-22 13:30:02.000000000 +0100=0A@@ -401,7 = +401,7 @@ static inline unsigned int find_first_se=0A *=0A * This is = defined the same way as the libc and compiler builtin ffs routines.=0A = */=0A-static inline int ffs(unsigned long x)=0A+static inline int = ffsl(unsigned long x)=0A {=0A long r;=0A =0A@@ -412,13 +412,24 @@ = static inline int ffs(unsigned long x)=0A return (int)r+1;=0A }=0A = =0A+static inline int ffs(unsigned int x)=0A+{=0A+ int r;=0A+=0A+ = asm ( "bsf %1,%0\n\t"=0A+ "jnz 1f\n\t"=0A+ "mov = $-1,%0\n"=0A+ "1:" : "=3Dr" (r) : "rm" (x));=0A+ return r + = 1;=0A+}=0A+=0A /**=0A * fls - find last bit set=0A * @x: the word to = search=0A *=0A * This is defined the same way as ffs.=0A */=0A-static = inline int fls(unsigned long x)=0A+static inline int flsl(unsigned long = x)=0A {=0A long r;=0A =0A@@ -429,8 +440,16 @@ static inline int = fls(unsigned long x)=0A return (int)r+1;=0A }=0A =0A-#define ffs64 = ffs=0A-#define fls64 fls=0A+static inline int fls(unsigned int x)=0A+{=0A+ = int r;=0A+=0A+ asm ( "bsr %1,%0\n\t"=0A+ "jnz 1f\n\t"=0A+ = "mov $-1,%0\n"=0A+ "1:" : "=3Dr" (r) : "rm" (x));=0A+ = return r + 1;=0A+}=0A =0A /**=0A * hweightN - returns the hamming weight = of a N-bit word=0A--- unstable.orig/xen/include/xen/bitops.h 2015-01-22 = 13:28:41.000000000 +0100=0A+++ unstable/xen/include/xen/bitops.h = 2015-01-22 13:30:46.000000000 +0100=0A@@ -70,20 +70,52 @@ static __inline__= int generic_fls(int x)=0A return r;=0A }=0A =0A+#if BITS_PER_LONG = =3D=3D 64=0A+=0A+static inline int generic_ffsl(unsigned long x)=0A+{=0A+ = return !x || (u32)x ? generic_ffs(x) : generic_ffs(x >> 32) + 32;=0A+}=0A= +=0A+static inline int generic_flsl(unsigned long x)=0A+{=0A+ u32 h =3D = x >> 32;=0A+=0A+ return h ? generic_fls(h) + 32 : generic_fls(x);=0A+}= =0A+=0A+#else=0A+# define generic_ffsl generic_ffs=0A+# define generic_flsl= generic_fls=0A+#endif=0A+=0A /*=0A * Include this here because some = architectures need generic_ffs/fls in=0A * scope=0A */=0A #include = =0A =0A-=0A+#if BITS_PER_LONG =3D=3D 64=0A+# define fls64 = flsl=0A+# define ffs64 ffsl=0A+#else=0A+# ifndef ffs64=0A+static inline = int generic_ffs64(__u64 x)=0A+{=0A+ return !x || (__u32)x ? ffs(x) : = ffs(x >> 32) + 32;=0A+}=0A+# define ffs64 generic_ffs64=0A+# endif=0A+# = ifndef fls64=0A static inline int generic_fls64(__u64 x)=0A {=0A __u32 = h =3D x >> 32;=0A- if (h)=0A- return fls(x) + 32;=0A- return = fls(x);=0A+=0A+ return h ? fls(h) + 32 : fls(x);=0A }=0A+# define = fls64 generic_fls64=0A+# endif=0A+#endif=0A =0A static __inline__ int = get_bitmask_order(unsigned int count)=0A {=0A --=__Part2E1BB72F.2__= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Disposition: inline _______________________________________________ Xen-devel mailing list Xen-devel@lists.xen.org http://lists.xen.org/xen-devel --=__Part2E1BB72F.2__=--