All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] xen: comment opaque expression in __page_to_virt
@ 2012-08-29 15:45 Ian Jackson
  2012-08-31 10:55 ` Jan Beulich
  0 siblings, 1 reply; 5+ messages in thread
From: Ian Jackson @ 2012-08-29 15:45 UTC (permalink / raw)
  To: xen-devel; +Cc: Ian Campbell

mm.h's __page_to_virt has a rather opaque expression.  Comment it.

The diff below shows the effect that the extra division and
multiplication has on gcc's output; the "-" lines are the result of
compiling
    return (void *)(DIRECTMAP_VIRT_START +
                    ((unsigned long)pg - FRAMETABLE_VIRT_START) /
                    (sizeof(*pg) ) *
                    (PAGE_SIZE )
                    );
instead.

NB that this patch is an RFC because I don't actually know whether
what I wrote in the comment about x86 performance, and the purpose, of
the code, is correct.  Jan, please confirm/deny/correct as
appropriate.

Reported-By: Ian Campbell <ian.campbell@citrix.com>
Cc: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>

--- page_alloc.tmp.mariner.31972.s	2012-08-29 16:32:44.000000000 +0100
+++ page_alloc.tmp.mariner.31960.s	2012-08-29 16:32:09.000000000 +0100
@@ -5338,15 +5338,15 @@
 # 325 "/u/iwj/work/xen-unstable-tools.hg/xen/include/asm/mm.h" 1
 	ud2 ; ret $1303; movl $.LC31, %esp; movl $.LC41, %esp
 # 0 "" 2
-	.loc 10 327 0
+	.loc 10 333 0
 #NO_APP
-	movl	$3, %ebx
+	movl	$24, %ebx
 .LVL543:
 	movl	$0, %edx
 	divl	%ebx
-	addl	$8355840, %eax
+	addl	$1044480, %eax
 	movl	%eax, %ebx
-	sall	$9, %ebx
+	sall	$12, %ebx
 .LBE737:
 .LBE736:
 	.loc 1 1179 0
@@ -5368,13 +5368,13 @@
 .LBE739:
 .LBB741:
 .LBB738:
-	.loc 10 327 0
+	.loc 10 333 0
 	movl	$-1431655765, %edx
 	mull	%edx
-	shrl	%edx
-	leal	8355840(%edx), %ebx
+	shrl	$4, %edx
+	leal	1044480(%edx), %ebx
 .LVL545:
-	sall	$9, %ebx
+	sall	$12, %ebx
 .LBE738:
 .LBE741:
 	.loc 1 1179 0

diff -r a0b5f8102a00 xen/include/asm-x86/mm.h
--- a/xen/include/asm-x86/mm.h	Tue Aug 28 22:40:45 2012 +0100
+++ b/xen/include/asm-x86/mm.h	Wed Aug 29 16:44:58 2012 +0100
@@ -323,6 +323,13 @@ static inline struct page_info *__virt_t
 static inline void *__page_to_virt(const struct page_info *pg)
 {
     ASSERT((unsigned long)pg - FRAMETABLE_VIRT_START < FRAMETABLE_VIRT_END);
+    /* (sizeof(*pg) & -sizeof(*pg)) selects the LS bit of sizeof(*pg).
+     * The division and re-multiplication arranges to do the easy part
+     * of the division with a shift, and then puts the shifted-out
+     * power of 2 back again in the multiplication.  This is
+     * beneficial because with gcc (at least with 4.4.5) it generates
+     * a division by 3 instead of a division by 8 which is faster.
+     */
     return (void *)(DIRECTMAP_VIRT_START +
                     ((unsigned long)pg - FRAMETABLE_VIRT_START) /
                     (sizeof(*pg) / (sizeof(*pg) & -sizeof(*pg))) *

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] xen: comment opaque expression in __page_to_virt
  2012-08-29 15:45 [RFC PATCH] xen: comment opaque expression in __page_to_virt Ian Jackson
@ 2012-08-31 10:55 ` Jan Beulich
  2012-08-31 14:36   ` Ian Jackson
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2012-08-31 10:55 UTC (permalink / raw)
  To: Ian Jackson; +Cc: Ian Campbell, xen-devel

>>> On 29.08.12 at 17:45, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:
> mm.h's __page_to_virt has a rather opaque expression.  Comment it.
> 
> The diff below shows the effect that the extra division and
> multiplication has on gcc's output; the "-" lines are the result of
> compiling
>     return (void *)(DIRECTMAP_VIRT_START +
>                     ((unsigned long)pg - FRAMETABLE_VIRT_START) /
>                     (sizeof(*pg) ) *
>                     (PAGE_SIZE )
>                     );
> instead.
> 
> NB that this patch is an RFC because I don't actually know whether
> what I wrote in the comment about x86 performance, and the purpose, of
> the code, is correct.  Jan, please confirm/deny/correct as
> appropriate.
> 
> Reported-By: Ian Campbell <ian.campbell@citrix.com>
> Cc: Jan Beulich <jbeulich@novell.com>
> Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
> 
> --- page_alloc.tmp.mariner.31972.s	2012-08-29 16:32:44.000000000 +0100
> +++ page_alloc.tmp.mariner.31960.s	2012-08-29 16:32:09.000000000 +0100
> @@ -5338,15 +5338,15 @@
>  # 325 "/u/iwj/work/xen-unstable-tools.hg/xen/include/asm/mm.h" 1
>  	ud2 ; ret $1303; movl $.LC31, %esp; movl $.LC41, %esp
>  # 0 "" 2
> -	.loc 10 327 0
> +	.loc 10 333 0
>  #NO_APP
> -	movl	$3, %ebx
> +	movl	$24, %ebx
>  .LVL543:
>  	movl	$0, %edx
>  	divl	%ebx
> -	addl	$8355840, %eax
> +	addl	$1044480, %eax
>  	movl	%eax, %ebx
> -	sall	$9, %ebx
> +	sall	$12, %ebx
>  .LBE737:
>  .LBE736:
>  	.loc 1 1179 0
> @@ -5368,13 +5368,13 @@
>  .LBE739:
>  .LBB741:
>  .LBB738:
> -	.loc 10 327 0
> +	.loc 10 333 0
>  	movl	$-1431655765, %edx
>  	mull	%edx
> -	shrl	%edx
> -	leal	8355840(%edx), %ebx
> +	shrl	$4, %edx
> +	leal	1044480(%edx), %ebx
>  .LVL545:
> -	sall	$9, %ebx
> +	sall	$12, %ebx
>  .LBE738:
>  .LBE741:
>  	.loc 1 1179 0
> 
> diff -r a0b5f8102a00 xen/include/asm-x86/mm.h
> --- a/xen/include/asm-x86/mm.h	Tue Aug 28 22:40:45 2012 +0100
> +++ b/xen/include/asm-x86/mm.h	Wed Aug 29 16:44:58 2012 +0100
> @@ -323,6 +323,13 @@ static inline struct page_info *__virt_t
>  static inline void *__page_to_virt(const struct page_info *pg)
>  {
>      ASSERT((unsigned long)pg - FRAMETABLE_VIRT_START < FRAMETABLE_VIRT_END);
> +    /* (sizeof(*pg) & -sizeof(*pg)) selects the LS bit of sizeof(*pg).
> +     * The division and re-multiplication arranges to do the easy part
> +     * of the division with a shift, and then puts the shifted-out
> +     * power of 2 back again in the multiplication.  This is
> +     * beneficial because with gcc (at least with 4.4.5) it generates
> +     * a division by 3 instead of a division by 8 which is faster.
> +     */

No, that's not precise. There's really not much of a win to be had
on 32-bit (division by 3 and division by 24 (sizeof(struct page_info))
should be the same in speed.

The win is on x86-64, where sizeof(struct page_info) is a power
of 2, and hence the pair of shifts (right, then left) can be reduced
to a single one.

Yet (for obvious reasons) the code ought to not break anything
if even on x86-64 the size of the structure would change, hence
it needs to be that complex (and can't be broken into separate,
simpler implementations for 32- and 64-bits).

Jan

>      return (void *)(DIRECTMAP_VIRT_START +
>                      ((unsigned long)pg - FRAMETABLE_VIRT_START) /
>                      (sizeof(*pg) / (sizeof(*pg) & -sizeof(*pg))) *

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] xen: comment opaque expression in __page_to_virt
  2012-08-31 10:55 ` Jan Beulich
@ 2012-08-31 14:36   ` Ian Jackson
  2012-08-31 14:53     ` [PATCH] " Jan Beulich
  0 siblings, 1 reply; 5+ messages in thread
From: Ian Jackson @ 2012-08-31 14:36 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Ian Campbell, xen-devel

Jan Beulich writes ("Re: [RFC PATCH] xen: comment opaque expression in __page_to_virt"):
> No, that's not precise. There's really not much of a win to be had
> on 32-bit (division by 3 and division by 24 (sizeof(struct page_info))
> should be the same in speed.
> 
> The win is on x86-64, where sizeof(struct page_info) is a power
> of 2, and hence the pair of shifts (right, then left) can be reduced
> to a single one.
> 
> Yet (for obvious reasons) the code ought to not break anything
> if even on x86-64 the size of the structure would change, hence
> it needs to be that complex (and can't be broken into separate,
> simpler implementations for 32- and 64-bits).

Thanks.  Do you want to post a revised version of my patch or shall I
do so ?  (If so please confirm that I should put your s-o-b on it for
your wording above.)

Ian.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH] Re: [RFC PATCH] xen: comment opaque expression in __page_to_virt
  2012-08-31 14:36   ` Ian Jackson
@ 2012-08-31 14:53     ` Jan Beulich
  2012-08-31 15:50       ` Keir Fraser
  0 siblings, 1 reply; 5+ messages in thread
From: Jan Beulich @ 2012-08-31 14:53 UTC (permalink / raw)
  To: Ian Jackson; +Cc: Ian Campbell, xen-devel

>>> On 31.08.12 at 16:36, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:
> Jan Beulich writes ("Re: [RFC PATCH] xen: comment opaque expression in 
> __page_to_virt"):
>> No, that's not precise. There's really not much of a win to be had
>> on 32-bit (division by 3 and division by 24 (sizeof(struct page_info))
>> should be the same in speed.
>> 
>> The win is on x86-64, where sizeof(struct page_info) is a power
>> of 2, and hence the pair of shifts (right, then left) can be reduced
>> to a single one.
>> 
>> Yet (for obvious reasons) the code ought to not break anything
>> if even on x86-64 the size of the structure would change, hence
>> it needs to be that complex (and can't be broken into separate,
>> simpler implementations for 32- and 64-bits).
> 
> Thanks.  Do you want to post a revised version of my patch or shall I
> do so ?  (If so please confirm that I should put your s-o-b on it for
> your wording above.)

x86: comment opaque expression in __page_to_virt()

mm.h's __page_to_virt() has a rather opaque expression. Comment it.

Reported-By: Ian Campbell <ian.campbell@citrix.com>
Suggested-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>

--- 2012-08-08.orig/xen/include/asm-x86/mm.h	2012-06-20 17:34:02.000000000 +0200
+++ 2012-08-08/xen/include/asm-x86/mm.h	2012-08-31 16:50:50.000000000 +0200
@@ -323,6 +323,12 @@ static inline struct page_info *__virt_t
 static inline void *__page_to_virt(const struct page_info *pg)
 {
     ASSERT((unsigned long)pg - FRAMETABLE_VIRT_START < FRAMETABLE_VIRT_END);
+    /*
+     * (sizeof(*pg) & -sizeof(*pg)) selects the LS bit of sizeof(*pg). The
+     * division and re-multiplication avoids one shift when sizeof(*pg) is a
+     * power of two (otherwise there would be a right shift followed by a
+     * left shift, which the compiler can't know it can fold into one).
+     */
     return (void *)(DIRECTMAP_VIRT_START +
                     ((unsigned long)pg - FRAMETABLE_VIRT_START) /
                     (sizeof(*pg) / (sizeof(*pg) & -sizeof(*pg))) *

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] Re: [RFC PATCH] xen: comment opaque expression in __page_to_virt
  2012-08-31 14:53     ` [PATCH] " Jan Beulich
@ 2012-08-31 15:50       ` Keir Fraser
  0 siblings, 0 replies; 5+ messages in thread
From: Keir Fraser @ 2012-08-31 15:50 UTC (permalink / raw)
  To: Jan Beulich, Ian Jackson; +Cc: Ian Campbell, xen-devel

On 31/08/2012 15:53, "Jan Beulich" <JBeulich@suse.com> wrote:

>>>> On 31.08.12 at 16:36, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:
>> Jan Beulich writes ("Re: [RFC PATCH] xen: comment opaque expression in
>> __page_to_virt"):
>>> No, that's not precise. There's really not much of a win to be had
>>> on 32-bit (division by 3 and division by 24 (sizeof(struct page_info))
>>> should be the same in speed.
>>> 
>>> The win is on x86-64, where sizeof(struct page_info) is a power
>>> of 2, and hence the pair of shifts (right, then left) can be reduced
>>> to a single one.
>>> 
>>> Yet (for obvious reasons) the code ought to not break anything
>>> if even on x86-64 the size of the structure would change, hence
>>> it needs to be that complex (and can't be broken into separate,
>>> simpler implementations for 32- and 64-bits).
>> 
>> Thanks.  Do you want to post a revised version of my patch or shall I
>> do so ?  (If so please confirm that I should put your s-o-b on it for
>> your wording above.)
> 
> x86: comment opaque expression in __page_to_virt()
> 
> mm.h's __page_to_virt() has a rather opaque expression. Comment it.
> 
> Reported-By: Ian Campbell <ian.campbell@citrix.com>
> Suggested-by: Ian Jackson <ian.jackson@eu.citrix.com>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>

Acked-by: Keir Fraser <keir@xen.org>

> --- 2012-08-08.orig/xen/include/asm-x86/mm.h 2012-06-20 17:34:02.000000000
> +0200
> +++ 2012-08-08/xen/include/asm-x86/mm.h 2012-08-31 16:50:50.000000000 +0200
> @@ -323,6 +323,12 @@ static inline struct page_info *__virt_t
>  static inline void *__page_to_virt(const struct page_info *pg)
>  {
>      ASSERT((unsigned long)pg - FRAMETABLE_VIRT_START < FRAMETABLE_VIRT_END);
> +    /*
> +     * (sizeof(*pg) & -sizeof(*pg)) selects the LS bit of sizeof(*pg). The
> +     * division and re-multiplication avoids one shift when sizeof(*pg) is a
> +     * power of two (otherwise there would be a right shift followed by a
> +     * left shift, which the compiler can't know it can fold into one).
> +     */
>      return (void *)(DIRECTMAP_VIRT_START +
>                      ((unsigned long)pg - FRAMETABLE_VIRT_START) /
>                      (sizeof(*pg) / (sizeof(*pg) & -sizeof(*pg))) *
> 
> 
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2012-08-31 15:50 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-08-29 15:45 [RFC PATCH] xen: comment opaque expression in __page_to_virt Ian Jackson
2012-08-31 10:55 ` Jan Beulich
2012-08-31 14:36   ` Ian Jackson
2012-08-31 14:53     ` [PATCH] " Jan Beulich
2012-08-31 15:50       ` Keir Fraser

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.