All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
@ 2013-01-23 20:46 Romain Francoise
  2013-01-23 21:04 ` Michael S. Tsirkin
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Romain Francoise @ 2013-01-23 20:46 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, netdev, linux-kernel

Creating a vhost-net device allocates an object large enough (34320 bytes
on x86-64) to trigger an order-4 allocation, which may fail if memory if
fragmented:

 libvirtd: page allocation failure: order:4, mode:0x2000d0
 ...
 SLAB: Unable to allocate memory on node 0 (gfp=0xd0)
   cache: size-65536, object size: 65536, order: 4
   node 0: slabs: 8/8, objs: 8/8, free: 0

In that situation, rather than forcing the caller to use regular
virtio-net, try to allocate the descriptor with vmalloc().

Signed-off-by: Romain Francoise <romain@orebokech.com>
---
 drivers/vhost/net.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ebd08b2..1ded79b 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -18,6 +18,7 @@
 #include <linux/rcupdate.h>
 #include <linux/file.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 
 #include <linux/net.h>
 #include <linux/if_packet.h>
@@ -603,12 +604,23 @@ static void handle_rx_net(struct vhost_work *work)
 	handle_rx(net);
 }
 
+static void vhost_net_kvfree(void *addr)
+{
+	if (is_vmalloc_addr(addr))
+		vfree(addr);
+	else
+		kfree(addr);
+}
+
 static int vhost_net_open(struct inode *inode, struct file *f)
 {
-	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
+	struct vhost_net *n;
 	struct vhost_dev *dev;
 	int r;
 
+	n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN);
+	if (!n)
+		n = vmalloc(sizeof *n);
 	if (!n)
 		return -ENOMEM;
 
@@ -617,7 +629,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 	n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
 	r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX);
 	if (r < 0) {
-		kfree(n);
+		vhost_net_kvfree(n);
 		return r;
 	}
 
@@ -719,7 +731,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
 	/* We do an extra flush before freeing memory,
 	 * since jobs can re-queue themselves. */
 	vhost_net_flush(n);
-	kfree(n);
+	vhost_net_kvfree(n);
 	return 0;
 }
 
-- 
1.8.1.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
  2013-01-23 20:46 [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails Romain Francoise
@ 2013-01-23 21:04 ` Michael S. Tsirkin
  2013-01-28  3:11   ` David Miller
  2013-06-28  7:16   ` Romain Francoise
  2013-01-24  9:45 ` David Laight
  2013-01-25  3:03 ` Cong Wang
  2 siblings, 2 replies; 10+ messages in thread
From: Michael S. Tsirkin @ 2013-01-23 21:04 UTC (permalink / raw)
  To: Romain Francoise; +Cc: kvm, netdev, linux-kernel

On Wed, Jan 23, 2013 at 09:46:47PM +0100, Romain Francoise wrote:
> Creating a vhost-net device allocates an object large enough (34320 bytes
> on x86-64) to trigger an order-4 allocation, which may fail if memory if
> fragmented:
> 
>  libvirtd: page allocation failure: order:4, mode:0x2000d0
>  ...
>  SLAB: Unable to allocate memory on node 0 (gfp=0xd0)
>    cache: size-65536, object size: 65536, order: 4
>    node 0: slabs: 8/8, objs: 8/8, free: 0
> 
> In that situation, rather than forcing the caller to use regular
> virtio-net, try to allocate the descriptor with vmalloc().
> 
> Signed-off-by: Romain Francoise <romain@orebokech.com>

Thanks for the patch.
Hmm, I haven't seen this.
Maybe we should try and reduce our memory usage,
I will look into this.

> ---
>  drivers/vhost/net.c | 18 +++++++++++++++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index ebd08b2..1ded79b 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -18,6 +18,7 @@
>  #include <linux/rcupdate.h>
>  #include <linux/file.h>
>  #include <linux/slab.h>
> +#include <linux/vmalloc.h>
>  
>  #include <linux/net.h>
>  #include <linux/if_packet.h>
> @@ -603,12 +604,23 @@ static void handle_rx_net(struct vhost_work *work)
>  	handle_rx(net);
>  }
>  
> +static void vhost_net_kvfree(void *addr)
> +{
> +	if (is_vmalloc_addr(addr))
> +		vfree(addr);
> +	else
> +		kfree(addr);
> +}
> +
>  static int vhost_net_open(struct inode *inode, struct file *f)
>  {
> -	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
> +	struct vhost_net *n;
>  	struct vhost_dev *dev;
>  	int r;
>  
> +	n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN);
> +	if (!n)
> +		n = vmalloc(sizeof *n);
>  	if (!n)
>  		return -ENOMEM;
>  
> @@ -617,7 +629,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
>  	n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
>  	r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX);
>  	if (r < 0) {
> -		kfree(n);
> +		vhost_net_kvfree(n);
>  		return r;
>  	}
>  
> @@ -719,7 +731,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
>  	/* We do an extra flush before freeing memory,
>  	 * since jobs can re-queue themselves. */
>  	vhost_net_flush(n);
> -	kfree(n);
> +	vhost_net_kvfree(n);
>  	return 0;
>  }
>  
> -- 
> 1.8.1.1

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
  2013-01-23 20:46 [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails Romain Francoise
  2013-01-23 21:04 ` Michael S. Tsirkin
@ 2013-01-24  9:45 ` David Laight
  2013-01-24 10:06   ` Michael S. Tsirkin
  2013-01-25  3:03 ` Cong Wang
  2 siblings, 1 reply; 10+ messages in thread
From: David Laight @ 2013-01-24  9:45 UTC (permalink / raw)
  To: Romain Francoise, Michael S. Tsirkin; +Cc: kvm, netdev, linux-kernel

> +	n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN);
> +	if (!n)
> +		n = vmalloc(sizeof *n);

I'm slightly confused by this construct.
I thought kmalloc(... GFP_KERNEL) would sleep waiting for
memory (rather than return NULL).

I realise that (for multi-page sizes) that kmalloc() and
vmalloc() both need to find a contiguous block of kernel
virtual addresses - in different address ranges, and
that vmalloc() then has to find physical memory pages
(which will not be contiguous).

I think this means that kmalloc() is likely to be faster
(if it doesn't have to sleep), but that vmalloc() is
allocating from a much larger resource.

This make me that that large allocations that are not
temporary should probably be allocated with vmalloc().

Is there a 'NO_SLEEP' flag to kmalloc()? is that all
GFP_ATOMIC requests? If so you might try a non-sleeping
kmalloc() with a vmalloc() if it fails.

This all looks as though there should be a GFP_NONCONTIG
flag (or similar) so that kmalloc() can make a decision itself.

Of at least a wrapper - like the one for free().

	David




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
  2013-01-24  9:45 ` David Laight
@ 2013-01-24 10:06   ` Michael S. Tsirkin
  2013-01-24 10:37     ` David Laight
  0 siblings, 1 reply; 10+ messages in thread
From: Michael S. Tsirkin @ 2013-01-24 10:06 UTC (permalink / raw)
  To: David Laight; +Cc: Romain Francoise, kvm, netdev, linux-kernel

On Thu, Jan 24, 2013 at 09:45:50AM -0000, David Laight wrote:
> > +	n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN);
> > +	if (!n)
> > +		n = vmalloc(sizeof *n);
> 
> I'm slightly confused by this construct.
> I thought kmalloc(... GFP_KERNEL) would sleep waiting for
> memory (rather than return NULL).
> 
> I realise that (for multi-page sizes) that kmalloc() and
> vmalloc() both need to find a contiguous block of kernel
> virtual addresses - in different address ranges, and
> that vmalloc() then has to find physical memory pages
> (which will not be contiguous).
> 
> I think this means that kmalloc() is likely to be faster
> (if it doesn't have to sleep), but that vmalloc() is
> allocating from a much larger resource.
> 
> This make me that that large allocations that are not
> temporary should probably be allocated with vmalloc().

vmalloc has some issues for example afaik it's not backed by
a huge page so  I think its use puts more stress on the TLB cache.

> Is there a 'NO_SLEEP' flag to kmalloc()? is that all
> GFP_ATOMIC requests? If so you might try a non-sleeping
> kmalloc() with a vmalloc() if it fails.
> 
> This all looks as though there should be a GFP_NONCONTIG
> flag (or similar) so that kmalloc() can make a decision itself.
> 
> Of at least a wrapper - like the one for free().
> 
> 	David
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
  2013-01-24 10:06   ` Michael S. Tsirkin
@ 2013-01-24 10:37     ` David Laight
  0 siblings, 0 replies; 10+ messages in thread
From: David Laight @ 2013-01-24 10:37 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Romain Francoise, kvm, netdev, linux-kernel

> > I think this means that kmalloc() is likely to be faster
> > (if it doesn't have to sleep), but that vmalloc() is
> > allocating from a much larger resource.
> >
> > This make me that that large allocations that are not
> > temporary should probably be allocated with vmalloc().
> 
> vmalloc has some issues for example afaik it's not backed by
> a huge page so  I think its use puts more stress on the TLB cache.

Thinks further ...
64bit systems are likely to have enough kernel VA to be able
to map all of physical memory into contiguous VA.

I don't know if Linux does that, but I know code to map it was added
to NetBSD amd64 in order to speed up kernel accesses to 'random'
pages - it might have been partially backed out due to bugs!

If physical memory is mapped like that then kmalloc() requests
can any of physical memory and be unlikely to fail - since user
pages can be moved in order to generate contiguous free blocks.

Doesn't help with 32bit systems - they had to stop mapping all
of physical memory into kernel space a long time ago.

	David




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
  2013-01-23 20:46 [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails Romain Francoise
  2013-01-23 21:04 ` Michael S. Tsirkin
  2013-01-24  9:45 ` David Laight
@ 2013-01-25  3:03 ` Cong Wang
  2013-01-25  5:15   ` Jason Wang
  2 siblings, 1 reply; 10+ messages in thread
From: Cong Wang @ 2013-01-25  3:03 UTC (permalink / raw)
  To: kvm; +Cc: netdev, linux-kernel

["Followup-To:" header set to gmane.linux.network.]
On Wed, 23 Jan 2013 at 20:46 GMT, Romain Francoise <romain@orebokech.com> wrote:
> Creating a vhost-net device allocates an object large enough (34320 bytes
> on x86-64) to trigger an order-4 allocation, which may fail if memory if
> fragmented:
>
>  libvirtd: page allocation failure: order:4, mode:0x2000d0
>  ...
>  SLAB: Unable to allocate memory on node 0 (gfp=0xd0)
>    cache: size-65536, object size: 65536, order: 4
>    node 0: slabs: 8/8, objs: 8/8, free: 0
>
> In that situation, rather than forcing the caller to use regular
> virtio-net, try to allocate the descriptor with vmalloc().
>


The real problem is vhost_net struct is really big, it
should be reduced rather than workarounded like this.


> +static void vhost_net_kvfree(void *addr)
> +{
> +	if (is_vmalloc_addr(addr))
> +		vfree(addr);
> +	else
> +		kfree(addr);
> +}
> +

This kind of stuff should really go to mm, not netdev.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
  2013-01-25  3:03 ` Cong Wang
@ 2013-01-25  5:15   ` Jason Wang
  0 siblings, 0 replies; 10+ messages in thread
From: Jason Wang @ 2013-01-25  5:15 UTC (permalink / raw)
  To: Cong Wang; +Cc: kvm, netdev, linux-kernel

On Friday, January 25, 2013 03:03:13 AM Cong Wang wrote:
> ["Followup-To:" header set to gmane.linux.network.]
> 
> On Wed, 23 Jan 2013 at 20:46 GMT, Romain Francoise <romain@orebokech.com> 
wrote:
> > Creating a vhost-net device allocates an object large enough (34320 bytes
> > on x86-64) to trigger an order-4 allocation, which may fail if memory if
> > 
> > fragmented:
> >  libvirtd: page allocation failure: order:4, mode:0x2000d0
> >  ...
> >  SLAB: Unable to allocate memory on node 0 (gfp=0xd0)
> >  
> >    cache: size-65536, object size: 65536, order: 4
> >    node 0: slabs: 8/8, objs: 8/8, free: 0
> > 
> > In that situation, rather than forcing the caller to use regular
> > virtio-net, try to allocate the descriptor with vmalloc().
> 
> The real problem is vhost_net struct is really big, it
> should be reduced rather than workarounded like this.
> 

Looks like iov in vhost_virtqueue is a little big:

    struct iovec iov[UIO_MAXIOV];

Maybe we can use pointer and allocate it like indirect in 
vhost_dev_alloc_iovecs().

> > +static void vhost_net_kvfree(void *addr)
> > +{
> > +	if (is_vmalloc_addr(addr))
> > +		vfree(addr);
> > +	else
> > +		kfree(addr);
> > +}
> > +
> 
> This kind of stuff should really go to mm, not netdev.
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
  2013-01-23 21:04 ` Michael S. Tsirkin
@ 2013-01-28  3:11   ` David Miller
  2013-01-28  9:23     ` Romain Francoise
  2013-06-28  7:16   ` Romain Francoise
  1 sibling, 1 reply; 10+ messages in thread
From: David Miller @ 2013-01-28  3:11 UTC (permalink / raw)
  To: mst; +Cc: romain, kvm, netdev, linux-kernel

From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 23 Jan 2013 23:04:11 +0200

> Maybe we should try and reduce our memory usage,
> I will look into this.

As has been pointed out, 32K of the size is from those iovecs in
the queues.

The size of this structure is frankly offensive, and even if you add
some levels of indirection even just one iovec chunk is 16K on 64-bit
which is in my opinion still unacceptably large.

TCP sockets aren't even %25 the size of this beast. :-)

I'm not going to apply this vmalloc patch, because if I apply it the
fundamental problem here just gets swept under the carpet even longer.

Sorry.



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
  2013-01-28  3:11   ` David Miller
@ 2013-01-28  9:23     ` Romain Francoise
  0 siblings, 0 replies; 10+ messages in thread
From: Romain Francoise @ 2013-01-28  9:23 UTC (permalink / raw)
  To: David Miller; +Cc: mst, kvm, netdev, linux-kernel

David Miller <davem@davemloft.net> writes:

> I'm not going to apply this vmalloc patch, because if I apply it the
> fundamental problem here just gets swept under the carpet even longer.

No problem, I'll keep this as a local change until vhost-net's allocation
strategy gains some sanity.

Thanks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails
  2013-01-23 21:04 ` Michael S. Tsirkin
  2013-01-28  3:11   ` David Miller
@ 2013-06-28  7:16   ` Romain Francoise
  1 sibling, 0 replies; 10+ messages in thread
From: Romain Francoise @ 2013-06-28  7:16 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: kvm, netdev, linux-kernel

"Michael S. Tsirkin" <mst@redhat.com> writes:

> On Wed, Jan 23, 2013 at 09:46:47PM +0100, Romain Francoise wrote:
>> Creating a vhost-net device allocates an object large enough (34320 bytes
>> on x86-64) to trigger an order-4 allocation, which may fail if memory if
>> fragmented:
>> 
>>  libvirtd: page allocation failure: order:4, mode:0x2000d0
>>  ...
>>  SLAB: Unable to allocate memory on node 0 (gfp=0xd0)
>>    cache: size-65536, object size: 65536, order: 4
>>    node 0: slabs: 8/8, objs: 8/8, free: 0
>> 
>> In that situation, rather than forcing the caller to use regular
>> virtio-net, try to allocate the descriptor with vmalloc().
>> 
>> Signed-off-by: Romain Francoise <romain@orebokech.com>

> Thanks for the patch.
> Hmm, I haven't seen this.
> Maybe we should try and reduce our memory usage,
> I will look into this.

Did you get a chance to investigate this? I'm still getting the same
allocation failures with v3.10-rc7 after reverting my local patch.

Thanks.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2013-06-28  7:25 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-01-23 20:46 [PATCH] vhost-net: fall back to vmalloc if high-order allocation fails Romain Francoise
2013-01-23 21:04 ` Michael S. Tsirkin
2013-01-28  3:11   ` David Miller
2013-01-28  9:23     ` Romain Francoise
2013-06-28  7:16   ` Romain Francoise
2013-01-24  9:45 ` David Laight
2013-01-24 10:06   ` Michael S. Tsirkin
2013-01-24 10:37     ` David Laight
2013-01-25  3:03 ` Cong Wang
2013-01-25  5:15   ` Jason Wang

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.