All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
@ 2015-04-08  0:47 Alexander Duyck
  2015-04-08  8:42   ` Michael S. Tsirkin
                   ` (2 more replies)
  0 siblings, 3 replies; 12+ messages in thread
From: Alexander Duyck @ 2015-04-08  0:47 UTC (permalink / raw)
  To: linux-kernel, virtualization; +Cc: rusty, mst

This change makes it so that instead of using smp_wmb/rmb which varies
depending on the kernel configuration we can can use dma_wmb/rmb which for
most architectures should be equal to or slightly more strict than
smp_wmb/rmb.

The advantage to this is that these barriers are available to uniprocessor
builds as well so the performance should improve under such a
configuration.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
 include/linux/virtio_ring.h |   23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 67e06fe18c03..8e50888a6d59 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -21,19 +21,20 @@
  * actually quite cheap.
  */
 
-#ifdef CONFIG_SMP
 static inline void virtio_mb(bool weak_barriers)
 {
+#ifdef CONFIG_SMP
 	if (weak_barriers)
 		smp_mb();
 	else
+#endif
 		mb();
 }
 
 static inline void virtio_rmb(bool weak_barriers)
 {
 	if (weak_barriers)
-		smp_rmb();
+		dma_rmb();
 	else
 		rmb();
 }
@@ -41,26 +42,10 @@ static inline void virtio_rmb(bool weak_barriers)
 static inline void virtio_wmb(bool weak_barriers)
 {
 	if (weak_barriers)
-		smp_wmb();
+		dma_wmb();
 	else
 		wmb();
 }
-#else
-static inline void virtio_mb(bool weak_barriers)
-{
-	mb();
-}
-
-static inline void virtio_rmb(bool weak_barriers)
-{
-	rmb();
-}
-
-static inline void virtio_wmb(bool weak_barriers)
-{
-	wmb();
-}
-#endif
 
 struct virtio_device;
 struct virtqueue;


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
  2015-04-08  0:47 [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb Alexander Duyck
@ 2015-04-08  8:42   ` Michael S. Tsirkin
  2015-04-10  3:17 ` Rusty Russell
  2015-04-10  3:17 ` Rusty Russell
  2 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2015-04-08  8:42 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: linux-kernel, virtualization, rusty

On Tue, Apr 07, 2015 at 05:47:42PM -0700, Alexander Duyck wrote:
> This change makes it so that instead of using smp_wmb/rmb which varies
> depending on the kernel configuration we can can use dma_wmb/rmb which for
> most architectures should be equal to or slightly more strict than
> smp_wmb/rmb.
> 
> The advantage to this is that these barriers are available to uniprocessor
> builds as well so the performance should improve under such a
> configuration.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>

Well the generic implementation has:
#ifndef dma_rmb
#define dma_rmb()       rmb()
#endif

#ifndef dma_wmb
#define dma_wmb()       wmb()
#endif

So for these arches you are slightly speeding up UP but slightly hurting SMP -
I think we did benchmark the difference as measureable in the past.

Additionally, isn't this relying on undocumented behaviour?
The documentation says:
	"These are for use with consistent memory"
and virtio does not bother to request consistent memory
allocations.

One wonders whether these will always be strong enough.



> ---
>  include/linux/virtio_ring.h |   23 ++++-------------------
>  1 file changed, 4 insertions(+), 19 deletions(-)
> 
> diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
> index 67e06fe18c03..8e50888a6d59 100644
> --- a/include/linux/virtio_ring.h
> +++ b/include/linux/virtio_ring.h
> @@ -21,19 +21,20 @@
>   * actually quite cheap.
>   */
>  
> -#ifdef CONFIG_SMP
>  static inline void virtio_mb(bool weak_barriers)
>  {
> +#ifdef CONFIG_SMP
>  	if (weak_barriers)
>  		smp_mb();
>  	else
> +#endif
>  		mb();
>  }
>  
>  static inline void virtio_rmb(bool weak_barriers)
>  {
>  	if (weak_barriers)
> -		smp_rmb();
> +		dma_rmb();
>  	else
>  		rmb();
>  }
> @@ -41,26 +42,10 @@ static inline void virtio_rmb(bool weak_barriers)
>  static inline void virtio_wmb(bool weak_barriers)
>  {
>  	if (weak_barriers)
> -		smp_wmb();
> +		dma_wmb();
>  	else
>  		wmb();
>  }
> -#else
> -static inline void virtio_mb(bool weak_barriers)
> -{
> -	mb();
> -}
> -
> -static inline void virtio_rmb(bool weak_barriers)
> -{
> -	rmb();
> -}
> -
> -static inline void virtio_wmb(bool weak_barriers)
> -{
> -	wmb();
> -}
> -#endif
>  
>  struct virtio_device;
>  struct virtqueue;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
@ 2015-04-08  8:42   ` Michael S. Tsirkin
  0 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2015-04-08  8:42 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: linux-kernel, virtualization

On Tue, Apr 07, 2015 at 05:47:42PM -0700, Alexander Duyck wrote:
> This change makes it so that instead of using smp_wmb/rmb which varies
> depending on the kernel configuration we can can use dma_wmb/rmb which for
> most architectures should be equal to or slightly more strict than
> smp_wmb/rmb.
> 
> The advantage to this is that these barriers are available to uniprocessor
> builds as well so the performance should improve under such a
> configuration.
> 
> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>

Well the generic implementation has:
#ifndef dma_rmb
#define dma_rmb()       rmb()
#endif

#ifndef dma_wmb
#define dma_wmb()       wmb()
#endif

So for these arches you are slightly speeding up UP but slightly hurting SMP -
I think we did benchmark the difference as measureable in the past.

Additionally, isn't this relying on undocumented behaviour?
The documentation says:
	"These are for use with consistent memory"
and virtio does not bother to request consistent memory
allocations.

One wonders whether these will always be strong enough.



> ---
>  include/linux/virtio_ring.h |   23 ++++-------------------
>  1 file changed, 4 insertions(+), 19 deletions(-)
> 
> diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
> index 67e06fe18c03..8e50888a6d59 100644
> --- a/include/linux/virtio_ring.h
> +++ b/include/linux/virtio_ring.h
> @@ -21,19 +21,20 @@
>   * actually quite cheap.
>   */
>  
> -#ifdef CONFIG_SMP
>  static inline void virtio_mb(bool weak_barriers)
>  {
> +#ifdef CONFIG_SMP
>  	if (weak_barriers)
>  		smp_mb();
>  	else
> +#endif
>  		mb();
>  }
>  
>  static inline void virtio_rmb(bool weak_barriers)
>  {
>  	if (weak_barriers)
> -		smp_rmb();
> +		dma_rmb();
>  	else
>  		rmb();
>  }
> @@ -41,26 +42,10 @@ static inline void virtio_rmb(bool weak_barriers)
>  static inline void virtio_wmb(bool weak_barriers)
>  {
>  	if (weak_barriers)
> -		smp_wmb();
> +		dma_wmb();
>  	else
>  		wmb();
>  }
> -#else
> -static inline void virtio_mb(bool weak_barriers)
> -{
> -	mb();
> -}
> -
> -static inline void virtio_rmb(bool weak_barriers)
> -{
> -	rmb();
> -}
> -
> -static inline void virtio_wmb(bool weak_barriers)
> -{
> -	wmb();
> -}
> -#endif
>  
>  struct virtio_device;
>  struct virtqueue;

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
  2015-04-08  8:42   ` Michael S. Tsirkin
@ 2015-04-08 14:41     ` Alexander Duyck
  -1 siblings, 0 replies; 12+ messages in thread
From: Alexander Duyck @ 2015-04-08 14:41 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: linux-kernel, virtualization, rusty


On 04/08/2015 01:42 AM, Michael S. Tsirkin wrote:
> On Tue, Apr 07, 2015 at 05:47:42PM -0700, Alexander Duyck wrote:
>> This change makes it so that instead of using smp_wmb/rmb which varies
>> depending on the kernel configuration we can can use dma_wmb/rmb which for
>> most architectures should be equal to or slightly more strict than
>> smp_wmb/rmb.
>>
>> The advantage to this is that these barriers are available to uniprocessor
>> builds as well so the performance should improve under such a
>> configuration.
>>
>> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
> Well the generic implementation has:
> #ifndef dma_rmb
> #define dma_rmb()       rmb()
> #endif
>
> #ifndef dma_wmb
> #define dma_wmb()       wmb()
> #endif
>
> So for these arches you are slightly speeding up UP but slightly hurting SMP -
> I think we did benchmark the difference as measureable in the past.

The generic implementation for the smp_ barriers does the same thing 
when CONFIG_SMP is defined.  The only spot where there should be an 
appreciable difference between the two is on ARM where we define the 
dma_ barriers as being in the outer shareable domain, and for the smp_ 
barriers they are inner shareable domain.

> Additionally, isn't this relying on undocumented behaviour?
> The documentation says:
> 	"These are for use with consistent memory"
> and virtio does not bother to request consistent memory
> allocations.

Consistent in this case represents memory that exists within one 
coherency domain.  So in the case of x86 for instance this represents 
writes only to system memory.  If you mix writes to system memory and 
device memory (PIO) then you should be using the full wmb/rmb to 
guarantee ordering between the two memories.

> One wonders whether these will always be strong enough.

For the purposes of weak barriers they should be, and they are only 
slightly stronger than SMP in one case so odds are strength will not be 
the issue.  As far as speed I would suspect that the difference between 
inner and outer shareable domain should be negligible compared to the 
difference between a dsb() and a dmb().

- Alex

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
@ 2015-04-08 14:41     ` Alexander Duyck
  0 siblings, 0 replies; 12+ messages in thread
From: Alexander Duyck @ 2015-04-08 14:41 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: linux-kernel, virtualization


On 04/08/2015 01:42 AM, Michael S. Tsirkin wrote:
> On Tue, Apr 07, 2015 at 05:47:42PM -0700, Alexander Duyck wrote:
>> This change makes it so that instead of using smp_wmb/rmb which varies
>> depending on the kernel configuration we can can use dma_wmb/rmb which for
>> most architectures should be equal to or slightly more strict than
>> smp_wmb/rmb.
>>
>> The advantage to this is that these barriers are available to uniprocessor
>> builds as well so the performance should improve under such a
>> configuration.
>>
>> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
> Well the generic implementation has:
> #ifndef dma_rmb
> #define dma_rmb()       rmb()
> #endif
>
> #ifndef dma_wmb
> #define dma_wmb()       wmb()
> #endif
>
> So for these arches you are slightly speeding up UP but slightly hurting SMP -
> I think we did benchmark the difference as measureable in the past.

The generic implementation for the smp_ barriers does the same thing 
when CONFIG_SMP is defined.  The only spot where there should be an 
appreciable difference between the two is on ARM where we define the 
dma_ barriers as being in the outer shareable domain, and for the smp_ 
barriers they are inner shareable domain.

> Additionally, isn't this relying on undocumented behaviour?
> The documentation says:
> 	"These are for use with consistent memory"
> and virtio does not bother to request consistent memory
> allocations.

Consistent in this case represents memory that exists within one 
coherency domain.  So in the case of x86 for instance this represents 
writes only to system memory.  If you mix writes to system memory and 
device memory (PIO) then you should be using the full wmb/rmb to 
guarantee ordering between the two memories.

> One wonders whether these will always be strong enough.

For the purposes of weak barriers they should be, and they are only 
slightly stronger than SMP in one case so odds are strength will not be 
the issue.  As far as speed I would suspect that the difference between 
inner and outer shareable domain should be negligible compared to the 
difference between a dsb() and a dmb().

- Alex

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
  2015-04-08 14:41     ` Alexander Duyck
@ 2015-04-08 18:37       ` Michael S. Tsirkin
  -1 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2015-04-08 18:37 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: linux-kernel, virtualization, rusty

On Wed, Apr 08, 2015 at 07:41:49AM -0700, Alexander Duyck wrote:
> 
> On 04/08/2015 01:42 AM, Michael S. Tsirkin wrote:
> >On Tue, Apr 07, 2015 at 05:47:42PM -0700, Alexander Duyck wrote:
> >>This change makes it so that instead of using smp_wmb/rmb which varies
> >>depending on the kernel configuration we can can use dma_wmb/rmb which for
> >>most architectures should be equal to or slightly more strict than
> >>smp_wmb/rmb.
> >>
> >>The advantage to this is that these barriers are available to uniprocessor
> >>builds as well so the performance should improve under such a
> >>configuration.
> >>
> >>Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
> >Well the generic implementation has:
> >#ifndef dma_rmb
> >#define dma_rmb()       rmb()
> >#endif
> >
> >#ifndef dma_wmb
> >#define dma_wmb()       wmb()
> >#endif
> >
> >So for these arches you are slightly speeding up UP but slightly hurting SMP -
> >I think we did benchmark the difference as measureable in the past.
> 
> The generic implementation for the smp_ barriers does the same thing when
> CONFIG_SMP is defined.  The only spot where there should be an appreciable
> difference between the two is on ARM where we define the dma_ barriers as
> being in the outer shareable domain, and for the smp_ barriers they are
> inner shareable domain.
> 
> >Additionally, isn't this relying on undocumented behaviour?
> >The documentation says:
> >	"These are for use with consistent memory"
> >and virtio does not bother to request consistent memory
> >allocations.
> 
> Consistent in this case represents memory that exists within one coherency
> domain.  So in the case of x86 for instance this represents writes only to
> system memory.  If you mix writes to system memory and device memory (PIO)
> then you should be using the full wmb/rmb to guarantee ordering between the
> two memories.
> 
> >One wonders whether these will always be strong enough.
> 
> For the purposes of weak barriers they should be, and they are only slightly
> stronger than SMP in one case so odds are strength will not be the issue.
> As far as speed I would suspect that the difference between inner and outer
> shareable domain should be negligible compared to the difference between a
> dsb() and a dmb().
> 
> - Alex

Maybe it's safe, and maybe there's no performance impact.  But what's
the purpose of the patch?  From the commit log, It sounds like it's an
optimization, but it's not an obvious win, and it's not accompanied by
any numbers.


-- 
MST

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
@ 2015-04-08 18:37       ` Michael S. Tsirkin
  0 siblings, 0 replies; 12+ messages in thread
From: Michael S. Tsirkin @ 2015-04-08 18:37 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: linux-kernel, virtualization

On Wed, Apr 08, 2015 at 07:41:49AM -0700, Alexander Duyck wrote:
> 
> On 04/08/2015 01:42 AM, Michael S. Tsirkin wrote:
> >On Tue, Apr 07, 2015 at 05:47:42PM -0700, Alexander Duyck wrote:
> >>This change makes it so that instead of using smp_wmb/rmb which varies
> >>depending on the kernel configuration we can can use dma_wmb/rmb which for
> >>most architectures should be equal to or slightly more strict than
> >>smp_wmb/rmb.
> >>
> >>The advantage to this is that these barriers are available to uniprocessor
> >>builds as well so the performance should improve under such a
> >>configuration.
> >>
> >>Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
> >Well the generic implementation has:
> >#ifndef dma_rmb
> >#define dma_rmb()       rmb()
> >#endif
> >
> >#ifndef dma_wmb
> >#define dma_wmb()       wmb()
> >#endif
> >
> >So for these arches you are slightly speeding up UP but slightly hurting SMP -
> >I think we did benchmark the difference as measureable in the past.
> 
> The generic implementation for the smp_ barriers does the same thing when
> CONFIG_SMP is defined.  The only spot where there should be an appreciable
> difference between the two is on ARM where we define the dma_ barriers as
> being in the outer shareable domain, and for the smp_ barriers they are
> inner shareable domain.
> 
> >Additionally, isn't this relying on undocumented behaviour?
> >The documentation says:
> >	"These are for use with consistent memory"
> >and virtio does not bother to request consistent memory
> >allocations.
> 
> Consistent in this case represents memory that exists within one coherency
> domain.  So in the case of x86 for instance this represents writes only to
> system memory.  If you mix writes to system memory and device memory (PIO)
> then you should be using the full wmb/rmb to guarantee ordering between the
> two memories.
> 
> >One wonders whether these will always be strong enough.
> 
> For the purposes of weak barriers they should be, and they are only slightly
> stronger than SMP in one case so odds are strength will not be the issue.
> As far as speed I would suspect that the difference between inner and outer
> shareable domain should be negligible compared to the difference between a
> dsb() and a dmb().
> 
> - Alex

Maybe it's safe, and maybe there's no performance impact.  But what's
the purpose of the patch?  From the commit log, It sounds like it's an
optimization, but it's not an obvious win, and it's not accompanied by
any numbers.


-- 
MST

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
  2015-04-08 18:37       ` Michael S. Tsirkin
  (?)
@ 2015-04-08 20:31       ` Alexander Duyck
  -1 siblings, 0 replies; 12+ messages in thread
From: Alexander Duyck @ 2015-04-08 20:31 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: linux-kernel, virtualization, rusty

On 04/08/2015 11:37 AM, Michael S. Tsirkin wrote:
> On Wed, Apr 08, 2015 at 07:41:49AM -0700, Alexander Duyck wrote:
>> On 04/08/2015 01:42 AM, Michael S. Tsirkin wrote:
>>> On Tue, Apr 07, 2015 at 05:47:42PM -0700, Alexander Duyck wrote:
>>>> This change makes it so that instead of using smp_wmb/rmb which varies
>>>> depending on the kernel configuration we can can use dma_wmb/rmb which for
>>>> most architectures should be equal to or slightly more strict than
>>>> smp_wmb/rmb.
>>>>
>>>> The advantage to this is that these barriers are available to uniprocessor
>>>> builds as well so the performance should improve under such a
>>>> configuration.
>>>>
>>>> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
>>> Well the generic implementation has:
>>> #ifndef dma_rmb
>>> #define dma_rmb()       rmb()
>>> #endif
>>>
>>> #ifndef dma_wmb
>>> #define dma_wmb()       wmb()
>>> #endif
>>>
>>> So for these arches you are slightly speeding up UP but slightly hurting SMP -
>>> I think we did benchmark the difference as measureable in the past.
>> The generic implementation for the smp_ barriers does the same thing when
>> CONFIG_SMP is defined.  The only spot where there should be an appreciable
>> difference between the two is on ARM where we define the dma_ barriers as
>> being in the outer shareable domain, and for the smp_ barriers they are
>> inner shareable domain.
>>
>>> Additionally, isn't this relying on undocumented behaviour?
>>> The documentation says:
>>> 	"These are for use with consistent memory"
>>> and virtio does not bother to request consistent memory
>>> allocations.
>> Consistent in this case represents memory that exists within one coherency
>> domain.  So in the case of x86 for instance this represents writes only to
>> system memory.  If you mix writes to system memory and device memory (PIO)
>> then you should be using the full wmb/rmb to guarantee ordering between the
>> two memories.
>>
>>> One wonders whether these will always be strong enough.
>> For the purposes of weak barriers they should be, and they are only slightly
>> stronger than SMP in one case so odds are strength will not be the issue.
>> As far as speed I would suspect that the difference between inner and outer
>> shareable domain should be negligible compared to the difference between a
>> dsb() and a dmb().
>>
>> - Alex
> Maybe it's safe, and maybe there's no performance impact.  But what's
> the purpose of the patch?  From the commit log, It sounds like it's an
> optimization, but it's not an obvious win, and it's not accompanied by
> any numbers.

The win would be that non-SMP should get the same performance from the 
barriers as SMP.  Based on the numbers for commit 7b21e34fd1c2 ("virtio: 
harsher barriers for rpmsg.") it sounds like the gains could be pretty 
significant (TCP_RR test improved by 35% CPU, 14% throughput).  The idea 
is to get the same benefits in a uniprocessor environment.  If needed I 
can gather the data for x86 for SMP and non-SMP, however I had 
considered the patch to be low hanging fruit on that architecture since 
the smp_ and dma_ barriers are the same.

The performance numbers that I would like to collect but can't would be 
on ARM 7 or later as that is the only spot where the smp_ and dma_ 
barriers differ in any significant way, however I don't have an ARM 
platform that I could test this patch on to generate such data.

- Alex

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
  2015-04-08 18:37       ` Michael S. Tsirkin
  (?)
  (?)
@ 2015-04-08 20:31       ` Alexander Duyck
  -1 siblings, 0 replies; 12+ messages in thread
From: Alexander Duyck @ 2015-04-08 20:31 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: linux-kernel, virtualization

On 04/08/2015 11:37 AM, Michael S. Tsirkin wrote:
> On Wed, Apr 08, 2015 at 07:41:49AM -0700, Alexander Duyck wrote:
>> On 04/08/2015 01:42 AM, Michael S. Tsirkin wrote:
>>> On Tue, Apr 07, 2015 at 05:47:42PM -0700, Alexander Duyck wrote:
>>>> This change makes it so that instead of using smp_wmb/rmb which varies
>>>> depending on the kernel configuration we can can use dma_wmb/rmb which for
>>>> most architectures should be equal to or slightly more strict than
>>>> smp_wmb/rmb.
>>>>
>>>> The advantage to this is that these barriers are available to uniprocessor
>>>> builds as well so the performance should improve under such a
>>>> configuration.
>>>>
>>>> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
>>> Well the generic implementation has:
>>> #ifndef dma_rmb
>>> #define dma_rmb()       rmb()
>>> #endif
>>>
>>> #ifndef dma_wmb
>>> #define dma_wmb()       wmb()
>>> #endif
>>>
>>> So for these arches you are slightly speeding up UP but slightly hurting SMP -
>>> I think we did benchmark the difference as measureable in the past.
>> The generic implementation for the smp_ barriers does the same thing when
>> CONFIG_SMP is defined.  The only spot where there should be an appreciable
>> difference between the two is on ARM where we define the dma_ barriers as
>> being in the outer shareable domain, and for the smp_ barriers they are
>> inner shareable domain.
>>
>>> Additionally, isn't this relying on undocumented behaviour?
>>> The documentation says:
>>> 	"These are for use with consistent memory"
>>> and virtio does not bother to request consistent memory
>>> allocations.
>> Consistent in this case represents memory that exists within one coherency
>> domain.  So in the case of x86 for instance this represents writes only to
>> system memory.  If you mix writes to system memory and device memory (PIO)
>> then you should be using the full wmb/rmb to guarantee ordering between the
>> two memories.
>>
>>> One wonders whether these will always be strong enough.
>> For the purposes of weak barriers they should be, and they are only slightly
>> stronger than SMP in one case so odds are strength will not be the issue.
>> As far as speed I would suspect that the difference between inner and outer
>> shareable domain should be negligible compared to the difference between a
>> dsb() and a dmb().
>>
>> - Alex
> Maybe it's safe, and maybe there's no performance impact.  But what's
> the purpose of the patch?  From the commit log, It sounds like it's an
> optimization, but it's not an obvious win, and it's not accompanied by
> any numbers.

The win would be that non-SMP should get the same performance from the 
barriers as SMP.  Based on the numbers for commit 7b21e34fd1c2 ("virtio: 
harsher barriers for rpmsg.") it sounds like the gains could be pretty 
significant (TCP_RR test improved by 35% CPU, 14% throughput).  The idea 
is to get the same benefits in a uniprocessor environment.  If needed I 
can gather the data for x86 for SMP and non-SMP, however I had 
considered the patch to be low hanging fruit on that architecture since 
the smp_ and dma_ barriers are the same.

The performance numbers that I would like to collect but can't would be 
on ARM 7 or later as that is the only spot where the smp_ and dma_ 
barriers differ in any significant way, however I don't have an ARM 
platform that I could test this patch on to generate such data.

- Alex

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
  2015-04-08  0:47 [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb Alexander Duyck
  2015-04-08  8:42   ` Michael S. Tsirkin
  2015-04-10  3:17 ` Rusty Russell
@ 2015-04-10  3:17 ` Rusty Russell
  2 siblings, 0 replies; 12+ messages in thread
From: Rusty Russell @ 2015-04-10  3:17 UTC (permalink / raw)
  To: Alexander Duyck, linux-kernel, virtualization; +Cc: mst

Alexander Duyck <alexander.h.duyck@redhat.com> writes:
> This change makes it so that instead of using smp_wmb/rmb which varies
> depending on the kernel configuration we can can use dma_wmb/rmb which for
> most architectures should be equal to or slightly more strict than
> smp_wmb/rmb.
>
> The advantage to this is that these barriers are available to uniprocessor
> builds as well so the performance should improve under such a
> configuration.
>
> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>

This seems OK to me, since it's really as much a cleanup as anything,
but like you I do wonder if there benefit on ARM in practice.

Applied, thanks.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
  2015-04-08  0:47 [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb Alexander Duyck
  2015-04-08  8:42   ` Michael S. Tsirkin
@ 2015-04-10  3:17 ` Rusty Russell
  2015-04-10  3:17 ` Rusty Russell
  2 siblings, 0 replies; 12+ messages in thread
From: Rusty Russell @ 2015-04-10  3:17 UTC (permalink / raw)
  To: Alexander Duyck, linux-kernel, virtualization; +Cc: mst

Alexander Duyck <alexander.h.duyck@redhat.com> writes:
> This change makes it so that instead of using smp_wmb/rmb which varies
> depending on the kernel configuration we can can use dma_wmb/rmb which for
> most architectures should be equal to or slightly more strict than
> smp_wmb/rmb.
>
> The advantage to this is that these barriers are available to uniprocessor
> builds as well so the performance should improve under such a
> configuration.
>
> Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>

This seems OK to me, since it's really as much a cleanup as anything,
but like you I do wonder if there benefit on ARM in practice.

Applied, thanks.

Cheers,
Rusty.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb
@ 2015-04-08  0:47 Alexander Duyck
  0 siblings, 0 replies; 12+ messages in thread
From: Alexander Duyck @ 2015-04-08  0:47 UTC (permalink / raw)
  To: linux-kernel, virtualization; +Cc: mst

This change makes it so that instead of using smp_wmb/rmb which varies
depending on the kernel configuration we can can use dma_wmb/rmb which for
most architectures should be equal to or slightly more strict than
smp_wmb/rmb.

The advantage to this is that these barriers are available to uniprocessor
builds as well so the performance should improve under such a
configuration.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
---
 include/linux/virtio_ring.h |   23 ++++-------------------
 1 file changed, 4 insertions(+), 19 deletions(-)

diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index 67e06fe18c03..8e50888a6d59 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -21,19 +21,20 @@
  * actually quite cheap.
  */
 
-#ifdef CONFIG_SMP
 static inline void virtio_mb(bool weak_barriers)
 {
+#ifdef CONFIG_SMP
 	if (weak_barriers)
 		smp_mb();
 	else
+#endif
 		mb();
 }
 
 static inline void virtio_rmb(bool weak_barriers)
 {
 	if (weak_barriers)
-		smp_rmb();
+		dma_rmb();
 	else
 		rmb();
 }
@@ -41,26 +42,10 @@ static inline void virtio_rmb(bool weak_barriers)
 static inline void virtio_wmb(bool weak_barriers)
 {
 	if (weak_barriers)
-		smp_wmb();
+		dma_wmb();
 	else
 		wmb();
 }
-#else
-static inline void virtio_mb(bool weak_barriers)
-{
-	mb();
-}
-
-static inline void virtio_rmb(bool weak_barriers)
-{
-	rmb();
-}
-
-static inline void virtio_wmb(bool weak_barriers)
-{
-	wmb();
-}
-#endif
 
 struct virtio_device;
 struct virtqueue;

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2015-04-11  0:31 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-08  0:47 [PATCH] virtio_ring: Update weak barriers to use dma_wmb/rmb Alexander Duyck
2015-04-08  8:42 ` Michael S. Tsirkin
2015-04-08  8:42   ` Michael S. Tsirkin
2015-04-08 14:41   ` Alexander Duyck
2015-04-08 14:41     ` Alexander Duyck
2015-04-08 18:37     ` Michael S. Tsirkin
2015-04-08 18:37       ` Michael S. Tsirkin
2015-04-08 20:31       ` Alexander Duyck
2015-04-08 20:31       ` Alexander Duyck
2015-04-10  3:17 ` Rusty Russell
2015-04-10  3:17 ` Rusty Russell
  -- strict thread matches above, loose matches on Subject: below --
2015-04-08  0:47 Alexander Duyck

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.