All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH 0/1] macvtap TX zero copy between guest and host kernel
@ 2010-09-13 20:43 Shirley Ma
  2010-09-13 20:47 ` RFC PATCH 1/2] macvtap: A new sock zero copy flag Shirley Ma
                   ` (2 more replies)
  0 siblings, 3 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-13 20:43 UTC (permalink / raw)
  To: Avi Kivity, Arnd Bergmann, mst; +Cc: xiaohui.xin, netdev, kvm, linux-kernel

This patch induces a new sock flag ZEROCOPY to avoid copy between
userspace and kernel. macvtap is the first user of zero copy between
guest and host kernel. It only uses when the lower device supports high
memory DMA. The first set of patch only addresses transmission TX side.
The test has shown big improvement on either CPU utilization reduction
or BW increase on 10GbE Intel NIC. Performance data will be submitted in
the coming email.

thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RFC PATCH 1/2] macvtap: A new sock zero copy flag
  2010-09-13 20:43 [RFC PATCH 0/1] macvtap TX zero copy between guest and host kernel Shirley Ma
@ 2010-09-13 20:47 ` Shirley Ma
  2010-09-13 20:48 ` [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel Shirley Ma
  2010-09-14 12:05 ` [RFC PATCH 0/1] macvtap " Michael S. Tsirkin
  2 siblings, 0 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-13 20:47 UTC (permalink / raw)
  To: Arnd Bergmann, mst, Avi Kivity; +Cc: xiaohui.xin, netdev, kvm, linux-kernel

/* Add a new flag to support sock zero copy from user space to kernel */

Signed-off-by: Shirley Ma <xma@us.ibm.com>
---

 include/net/sock.h |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index adab9dc..80172de 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -543,6 +543,7 @@ enum sock_flags {
 	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
 	SOCK_FASYNC, /* fasync() active */
 	SOCK_RXQ_OVFL,
+	SOCK_ZEROCOPY, /* zerocopy from user space to kernel */
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)



^ permalink raw reply	[flat|nested] 47+ messages in thread

* [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-13 20:43 [RFC PATCH 0/1] macvtap TX zero copy between guest and host kernel Shirley Ma
  2010-09-13 20:47 ` RFC PATCH 1/2] macvtap: A new sock zero copy flag Shirley Ma
@ 2010-09-13 20:48 ` Shirley Ma
  2010-09-14  3:17   ` David Miller
  2010-09-14 12:05 ` [RFC PATCH 0/1] macvtap " Michael S. Tsirkin
  2 siblings, 1 reply; 47+ messages in thread
From: Shirley Ma @ 2010-09-13 20:48 UTC (permalink / raw)
  To: Avi Kivity, Arnd Bergmann, mst; +Cc: xiaohui.xin, netdev, kvm, linux-kernel

Add zero copy feature between userspace and kernel in macvtap when lower device supports
high memory DMA.


Signed-off-by: Shirley Ma <xma@us.ibm.com>
---

 drivers/net/macvtap.c |  136 +++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 126 insertions(+), 10 deletions(-)

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 3b1c54a..186cde1 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -274,6 +274,7 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	struct net *net = current->nsproxy->net_ns;
 	struct net_device *dev = dev_get_by_index(net, iminor(inode));
 	struct macvtap_queue *q;
+	struct macvlan_dev *vlan = netdev_priv(dev);
 	int err;
 
 	err = -ENODEV;
@@ -302,6 +303,17 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
 	q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
 
+	/*
+	 * so far only VM uses macvtap, enable zero copy between guest
+	 * kernel and host kernel when lower device supports high memory
+	 * DMA
+	 */
+	if (vlan) {
+		if ((vlan->lowerdev->features & NETIF_F_HIGHDMA) &&
+		    (vlan->lowerdev->features & NETIF_F_SG))
+			sock_set_flag(&q->sk, SOCK_ZEROCOPY);
+	}
+
 	err = macvtap_set_queue(dev, file, q);
 	if (err)
 		sock_put(&q->sk);
@@ -343,6 +355,24 @@ out:
 	return mask;
 }
 
+#define GOODCOPY_LEN  (L1_CACHE_BYTES < 64 ? 64 : L1_CACHE_BYTES)
+
+static inline struct sk_buff *macvtap_alloc_skb_goodcopy(struct sock *sk,
+						 size_t prepad, size_t copy,
+						 int noblock, int *err)
+{
+	struct sk_buff *skb;
+
+	skb = sock_alloc_send_pskb(sk, prepad + copy, 0, noblock, err);
+	if (!skb)
+		return NULL;
+	skb_reserve(skb, prepad);
+	skb_put(skb, copy);
+
+	return skb;
+
+}
+
 static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
 						size_t len, size_t linear,
 						int noblock, int *err)
@@ -447,15 +477,91 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
 	return 0;
 }
 
+/* set skb frags from iovec, this can move to core network code for reuse */
+static int set_sg_from_iovec_zerocopy(struct sk_buff *skb,
+				      const struct iovec *from, int offset,
+				      size_t count)
+{
+	int len = iov_length(from, count) - offset;
+	int copy = skb_headlen(skb);
+	int size, offset1 = 0;
+	int i = 0;
+	skb_frag_t *f;
+
+	/* Skip over from offset */
+	while (offset >= from->iov_len) {
+		offset -= from->iov_len;
+		++from;
+		--count;
+	}
+
+	/* copy up to skb headlen */
+	while (copy > 0) {
+		size = min_t(unsigned int, copy, from->iov_len - offset);
+		if (copy_from_user(skb->data + offset1, from->iov_base + offset,
+				   size))
+			return -EFAULT;
+		if (copy > size) {
+			++from;
+			--count;
+		}
+		copy -= size;
+		offset1 += size;
+		offset = 0;
+	}
+
+	if (len == offset1)
+		return 0;
+
+	while (count--) {
+		struct page *page[MAX_SKB_FRAGS];
+		int num_pages;
+		unsigned long base;
+
+		len = from->iov_len - offset1;
+		if (!len) {
+			offset1 = 0;
+			++from;
+			continue;
+		}
+		base = (unsigned long)from->iov_base + offset1;
+		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
+		num_pages = get_user_pages_fast(base, size, 0, &page[i]);
+		if ((num_pages != size) ||
+		    (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
+			/* put_page is in skb free */
+			return -EFAULT;
+		while (len) {
+			f = &skb_shinfo(skb)->frags[i];
+			f->page = page[i];
+			f->page_offset = base & ~PAGE_MASK;
+			f->size = min_t(int, len, PAGE_SIZE - f->page_offset);
+			skb->data_len += f->size;
+			skb->len += f->size;
+			skb->truesize += f->size;
+			skb_shinfo(skb)->nr_frags++;
+			/* increase sk_wmem_alloc */
+			if (skb->sk && skb->destructor == sock_wfree)
+				atomic_add(f->size, &skb->sk->sk_wmem_alloc);
+			base += f->size;
+			len -= f->size;
+			i++;
+		}
+		offset1 = 0;
+		++from;
+	}
+	return 0;
+}
 
 /* Get packet from user space buffer */
 static ssize_t macvtap_get_user(struct macvtap_queue *q,
-				const struct iovec *iv, size_t count,
-				int noblock)
+				const struct iovec *iv,
+				unsigned long total_len,
+				unsigned long count, int noblock)
 {
 	struct sk_buff *skb;
 	struct macvlan_dev *vlan;
-	size_t len = count;
+	unsigned len = total_len;
 	int err;
 	struct virtio_net_hdr vnet_hdr = { 0 };
 	int vnet_hdr_len = 0;
@@ -485,12 +591,22 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
 	if (unlikely(len < ETH_HLEN))
 		goto err;
 
-	skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len,
-				noblock, &err);
+	if (sock_flag(&q->sk, SOCK_ZEROCOPY)) {
+		int copy = len > 2 * GOODCOPY_LEN ? GOODCOPY_LEN : len;
+		skb = macvtap_alloc_skb_goodcopy(&q->sk, NET_IP_ALIGN, copy,
+						 noblock, &err);
+	} else
+		skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len,
+					vnet_hdr.hdr_len, noblock, &err);
 	if (!skb)
 		goto err;
 
-	err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len);
+	if (sock_flag(&q->sk, SOCK_ZEROCOPY))
+		err = set_sg_from_iovec_zerocopy(skb, iv, vnet_hdr_len, count);
+	else
+		err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
+						   len);
+
 	if (err)
 		goto err_kfree;
 
@@ -512,7 +628,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
 		kfree_skb(skb);
 	rcu_read_unlock_bh();
 
-	return count;
+	return total_len;
 
 err_kfree:
 	kfree_skb(skb);
@@ -534,8 +650,8 @@ static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
 	ssize_t result = -ENOLINK;
 	struct macvtap_queue *q = file->private_data;
 
-	result = macvtap_get_user(q, iv, iov_length(iv, count),
-			      file->f_flags & O_NONBLOCK);
+	result = macvtap_get_user(q, iv, iov_length(iv, count), count,
+				  file->f_flags & O_NONBLOCK);
 	return result;
 }
 
@@ -748,7 +864,7 @@ static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
 			   struct msghdr *m, size_t total_len)
 {
 	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
-	return macvtap_get_user(q, m->msg_iov, total_len,
+	return macvtap_get_user(q, m->msg_iov, total_len, m->msg_iovlen,
 			    m->msg_flags & MSG_DONTWAIT);
 }
 



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-13 20:48 ` [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel Shirley Ma
@ 2010-09-14  3:17   ` David Miller
  2010-09-14  9:12     ` Avi Kivity
  0 siblings, 1 reply; 47+ messages in thread
From: David Miller @ 2010-09-14  3:17 UTC (permalink / raw)
  To: mashirle; +Cc: avi, arnd, mst, xiaohui.xin, netdev, kvm, linux-kernel

From: Shirley Ma <mashirle@us.ibm.com>
Date: Mon, 13 Sep 2010 13:48:03 -0700

> +		base = (unsigned long)from->iov_base + offset1;
> +		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
> +		num_pages = get_user_pages_fast(base, size, 0, &page[i]);
> +		if ((num_pages != size) ||
> +		    (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
> +			/* put_page is in skb free */
> +			return -EFAULT;

What keeps the user from writing to these pages in it's address space
after the write call returns?

A write() return of success means:

	"I wrote what you gave to me"

not

	"I wrote what you gave to me, oh and BTW don't touch these
         pages for a while."

In fact "a while" isn't even defined in any way, as there is no way
for the write() invoker to know when the networking card is done with
those pages.

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14  3:17   ` David Miller
@ 2010-09-14  9:12     ` Avi Kivity
  2010-09-14 15:05       ` Shirley Ma
  0 siblings, 1 reply; 47+ messages in thread
From: Avi Kivity @ 2010-09-14  9:12 UTC (permalink / raw)
  To: David Miller; +Cc: mashirle, arnd, mst, xiaohui.xin, netdev, kvm, linux-kernel

  On 09/14/2010 05:17 AM, David Miller wrote:
> From: Shirley Ma<mashirle@us.ibm.com>
> Date: Mon, 13 Sep 2010 13:48:03 -0700
>
>> +		base = (unsigned long)from->iov_base + offset1;
>> +		size = ((base&  ~PAGE_MASK) + len + ~PAGE_MASK)>>  PAGE_SHIFT;
>> +		num_pages = get_user_pages_fast(base, size, 0,&page[i]);
>> +		if ((num_pages != size) ||
>> +		    (num_pages>  MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
>> +			/* put_page is in skb free */
>> +			return -EFAULT;
> What keeps the user from writing to these pages in it's address space
> after the write call returns?
>
> A write() return of success means:
>
> 	"I wrote what you gave to me"
>
> not
>
> 	"I wrote what you gave to me, oh and BTW don't touch these
>           pages for a while."
>
> In fact "a while" isn't even defined in any way, as there is no way
> for the write() invoker to know when the networking card is done with
> those pages.

That's what io_submit() is for.  Then io_getevents() tells you what "a 
while" actually was.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 0/1] macvtap TX zero copy between guest and host kernel
  2010-09-13 20:43 [RFC PATCH 0/1] macvtap TX zero copy between guest and host kernel Shirley Ma
  2010-09-13 20:47 ` RFC PATCH 1/2] macvtap: A new sock zero copy flag Shirley Ma
  2010-09-13 20:48 ` [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel Shirley Ma
@ 2010-09-14 12:05 ` Michael S. Tsirkin
  2010-09-14 15:15   ` Shirley Ma
  2 siblings, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-14 12:05 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Avi Kivity, Arnd Bergmann, xiaohui.xin, netdev, kvm, linux-kernel

On Mon, Sep 13, 2010 at 01:43:00PM -0700, Shirley Ma wrote:
> This patch induces a new sock flag ZEROCOPY to avoid copy between
> userspace and kernel. macvtap is the first user of zero copy between
> guest and host kernel. It only uses when the lower device supports high
> memory DMA. The first set of patch only addresses transmission TX side.
> The test has shown big improvement on either CPU utilization reduction
> or BW increase on 10GbE Intel NIC. Performance data will be submitted in
> the coming email.
> 
> thanks
> Shirley

While others pointed out correctness issues with the patch,
I would still like to see the performance numbers, just so we
understand what's possible.

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14  9:12     ` Avi Kivity
@ 2010-09-14 15:05       ` Shirley Ma
  2010-09-14 15:21         ` Arnd Bergmann
  2010-09-15  1:50         ` Xin, Xiaohui
  0 siblings, 2 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-14 15:05 UTC (permalink / raw)
  To: Avi Kivity
  Cc: David Miller, arnd, mst, xiaohui.xin, netdev, kvm, linux-kernel

On Tue, 2010-09-14 at 11:12 +0200, Avi Kivity wrote:
> >> +            base = (unsigned long)from->iov_base + offset1;
> >> +            size = ((base&  ~PAGE_MASK) + len + ~PAGE_MASK)>>
> PAGE_SHIFT;
> >> +            num_pages = get_user_pages_fast(base, size,
> 0,&page[i]);
> >> +            if ((num_pages != size) ||
> >> +                (num_pages>  MAX_SKB_FRAGS -
> skb_shinfo(skb)->nr_frags))
> >> +                    /* put_page is in skb free */
> >> +                    return -EFAULT;
> > What keeps the user from writing to these pages in it's address
> space
> > after the write call returns?
> >
> > A write() return of success means:
> >
> >       "I wrote what you gave to me"
> >
> > not
> >
> >       "I wrote what you gave to me, oh and BTW don't touch these
> >           pages for a while."
> >
> > In fact "a while" isn't even defined in any way, as there is no way
> > for the write() invoker to know when the networking card is done
> with
> > those pages.
> 
> That's what io_submit() is for.  Then io_getevents() tells you what
> "a 
> while" actually was.

This macvtap zero copy uses iov buffers from vhost ring, which is
allocated from guest kernel. In host kernel, vhost calls macvtap
sendmsg. macvtap sendmsg calls get_user_pages_fast to pin these buffers'
pages for zero copy.

The patch is relying on how vhost handle these buffers. I need to look
at vhost code (qemu) first for addressing the questions here.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 0/1] macvtap TX zero copy between guest and host kernel
  2010-09-14 12:05 ` [RFC PATCH 0/1] macvtap " Michael S. Tsirkin
@ 2010-09-14 15:15   ` Shirley Ma
  0 siblings, 0 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-14 15:15 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Avi Kivity, Arnd Bergmann, xiaohui.xin, netdev, kvm, linux-kernel

Hello Miachel,

On Tue, 2010-09-14 at 14:05 +0200, Michael S. Tsirkin wrote:
> While others pointed out correctness issues with the patch,
> I would still like to see the performance numbers, just so we
> understand what's possible.

The performance looks good, it either saves the host CPU utilization the
guest is running on (by 8-10% in 8 cpus) or gain high BW w/i more guest
CPU utilization when host utilization is similar or less than before.
And I run 32 netperf instants and didn't hit any problem.

Here are output from host perf top: (I am upgrading my guest to most
recent kernel now to collect perf top data.) My guest has 2 vcpus, host
has 8 cpus.

Please let me know what performance data you would like to see. I will
run more

w/o zero copy patch:

-----------------------------------------------------------------------------------------------------------------------------------------------------------
   PerfTop:    1708 irqs/sec  kernel:63.7%  exact:  0.0% [1000Hz cycles],  (all, 8 CPUs)
-----------------------------------------------------------------------------------------------------------------------------------------------------------

             samples  pcnt function                     DSO
             _______ _____ ____________________________ __________________________________________________________

             6842.00 47.4% copy_user_generic_string     /lib/modules/2.6.36-rc3+/build/vmlinux
              329.00  2.3% get_page_from_freelist       /lib/modules/2.6.36-rc3+/build/vmlinux
              307.00  2.1% list_del                     /lib/modules/2.6.36-rc3+/build/vmlinux
              289.00  2.0% alloc_pages_current          /lib/modules/2.6.36-rc3+/build/vmlinux
              283.00  2.0% __alloc_pages_nodemask       /lib/modules/2.6.36-rc3+/build/vmlinux
              234.00  1.6% ixgbe_xmit_frame             /lib/modules/2.6.36-rc3+/kernel/drivers/net/ixgbe/ixgbe.ko
              232.00  1.6% vmx_vcpu_run                 /lib/modules/2.6.36-rc3+/kernel/arch/x86/kvm/kvm-intel.ko
              210.00  1.5% schedule                     /lib/modules/2.6.36-rc3+/build/vmlinux
              173.00  1.2% _cond_resched                /lib/modules/2.6.36-rc3+/build/vmlinux


w/i zero copy patch:

-------------------------------------------------------------------------------
   PerfTop:    1108 irqs/sec  kernel:43.0%  exact:  0.0% [1000Hz cycles],  (all, 8 CPUs)
-------------------------------------------------------------------------------

             samples  pcnt function                 DSO
             _______ _____ ________________________ ___________

              281.00  5.1% copy_user_generic_string [kernel]
              235.00  4.3% vmx_vcpu_run             [kvm_intel]
              228.00  4.1% gup_pte_range            [kernel]
              211.00  3.8% tg_shares_up             [kernel]
              179.00  3.2% schedule                 [kernel]
              148.00  2.7% _raw_spin_lock_irqsave   [kernel]
              139.00  2.5% iommu_no_mapping         [kernel]
              124.00  2.2% ixgbe_xmit_frame         [ixgbe]
              123.00  2.2% kvm_arch_vcpu_ioctl_run  [kvm]
              122.00  2.2% _raw_spin_lock           [kernel]
              113.00  2.1% put_page                 [kernel]
               92.00  1.7% vhost_get_vq_desc        [vhost_net]
               81.00  1.5% get_user_pages_fast      [kernel]
               81.00  1.5% memcpy_fromiovec         [kernel]
               80.00  1.5% translate_desc           [vhost_net]

w/i zero copy patch, and NIC IRQ cpu affinity (netper/netserver on cpu 0, interrupts on cpu1)

[root@localhost ~]# netperf -H 10.0.4.74 -c -C -l 60 -T0,0 -- -m 65536
TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.0.4.74 (10.0.4.74) port 0 AF_INET : cpu bind
Recv   Send    Send                          Utilization       Service Demand
Socket Socket  Message  Elapsed              Send     Recv     Send    Recv
Size   Size    Size     Time     Throughput  local    remote   local   remote
bytes  bytes   bytes    secs.    10^6bits/s  % S      % S      us/KB   us/KB

 87380  16384  65536    60.00      9384.25   53.92    13.62    0.941   0.951
[root@localhost ~]#







^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 15:05       ` Shirley Ma
@ 2010-09-14 15:21         ` Arnd Bergmann
  2010-09-14 15:22           ` Michael S. Tsirkin
  2010-09-15  1:56           ` Xin, Xiaohui
  2010-09-15  1:50         ` Xin, Xiaohui
  1 sibling, 2 replies; 47+ messages in thread
From: Arnd Bergmann @ 2010-09-14 15:21 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Avi Kivity, David Miller, mst, xiaohui.xin, netdev, kvm, linux-kernel

On Tuesday 14 September 2010, Shirley Ma wrote:
> On Tue, 2010-09-14 at 11:12 +0200, Avi Kivity wrote:
>
> > That's what io_submit() is for.  Then io_getevents() tells you what
> > "a 
> > while" actually was.
> 
> This macvtap zero copy uses iov buffers from vhost ring, which is
> allocated from guest kernel. In host kernel, vhost calls macvtap
> sendmsg. macvtap sendmsg calls get_user_pages_fast to pin these buffers'
> pages for zero copy.
> 
> The patch is relying on how vhost handle these buffers. I need to look
> at vhost code (qemu) first for addressing the questions here.

I guess the best solution would be to make macvtap_aio_write return
-EIOCBQUEUED when a packet gets passed down to the adapter, and
call aio_complete when the adapter is done with it.

This would change the regular behavior of macvtap into a model where
every write on the file blocks until the packet has left the machine,
which gives us better flow control, but does slow down the traffic
when we only put one packet at a time into the queue.

It also allows the user to call io_submit instead of write in order
to do an asynchronous submission as Avi was suggesting.

	Arnd

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 15:21         ` Arnd Bergmann
@ 2010-09-14 15:22           ` Michael S. Tsirkin
  2010-09-14 16:00             ` Shirley Ma
  2010-09-15  1:56           ` Xin, Xiaohui
  1 sibling, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-14 15:22 UTC (permalink / raw)
  To: Arnd Bergmann
  Cc: Shirley Ma, Avi Kivity, David Miller, xiaohui.xin, netdev, kvm,
	linux-kernel

On Tue, Sep 14, 2010 at 05:21:13PM +0200, Arnd Bergmann wrote:
> On Tuesday 14 September 2010, Shirley Ma wrote:
> > On Tue, 2010-09-14 at 11:12 +0200, Avi Kivity wrote:
> >
> > > That's what io_submit() is for.  Then io_getevents() tells you what
> > > "a 
> > > while" actually was.
> > 
> > This macvtap zero copy uses iov buffers from vhost ring, which is
> > allocated from guest kernel. In host kernel, vhost calls macvtap
> > sendmsg. macvtap sendmsg calls get_user_pages_fast to pin these buffers'
> > pages for zero copy.
> > 
> > The patch is relying on how vhost handle these buffers. I need to look
> > at vhost code (qemu) first for addressing the questions here.
> 
> I guess the best solution would be to make macvtap_aio_write return
> -EIOCBQUEUED when a packet gets passed down to the adapter, and
> call aio_complete when the adapter is done with it.
> 
> This would change the regular behavior of macvtap into a model where
> every write on the file blocks until the packet has left the machine,
> which gives us better flow control, but does slow down the traffic
> when we only put one packet at a time into the queue.
> 
> It also allows the user to call io_submit instead of write in order
> to do an asynchronous submission as Avi was suggesting.
> 
> 	Arnd

I would expect this to hurt performance significantly.
We could do this for asynchronous requests only to avoid the
slowdown.

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 15:22           ` Michael S. Tsirkin
@ 2010-09-14 16:00             ` Shirley Ma
  2010-09-14 16:29               ` Michael S. Tsirkin
  0 siblings, 1 reply; 47+ messages in thread
From: Shirley Ma @ 2010-09-14 16:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, David Miller, xiaohui.xin, netdev,
	kvm, linux-kernel

On Tue, 2010-09-14 at 17:22 +0200, Michael S. Tsirkin wrote:
> I would expect this to hurt performance significantly.
> We could do this for asynchronous requests only to avoid the
> slowdown. 

Is kiocb in sendmsg helpful here? It is not used now.

Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 16:00             ` Shirley Ma
@ 2010-09-14 16:29               ` Michael S. Tsirkin
  2010-09-14 17:02                 ` Shirley Ma
  2010-09-15  2:46                 ` Xin, Xiaohui
  0 siblings, 2 replies; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-14 16:29 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, Sep 14, 2010 at 09:00:25AM -0700, Shirley Ma wrote:
> On Tue, 2010-09-14 at 17:22 +0200, Michael S. Tsirkin wrote:
> > I would expect this to hurt performance significantly.
> > We could do this for asynchronous requests only to avoid the
> > slowdown. 
> 
> Is kiocb in sendmsg helpful here? It is not used now.
> 
> Shirley

Precisely. This is what the patch from Xin Xiaohui does.  That code
already seems to do most of what you are trying to do, right?

The main thing missing seems to be macvtap integration, so that we can fall back
on data copy if zero copy is unavailable?
How hard would it be to basically link the mp and macvtap modules
together to get us this functionality? Anyone?


-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 16:29               ` Michael S. Tsirkin
@ 2010-09-14 17:02                 ` Shirley Ma
  2010-09-14 18:27                   ` Michael S. Tsirkin
  2010-09-15  2:46                 ` Xin, Xiaohui
  1 sibling, 1 reply; 47+ messages in thread
From: Shirley Ma @ 2010-09-14 17:02 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, 2010-09-14 at 18:29 +0200, Michael S. Tsirkin wrote:
> Precisely. This is what the patch from Xin Xiaohui does.  That code
> already seems to do most of what you are trying to do, right?

I thought host pins guest kernel buffer pages was good enough for TX
thought I didn't look up xiaohui's vhost asycn io patch in details.

What's the performance data Xiaohui got from using kiocb? I haven't seen
any performance number from him yet.

> The main thing missing seems to be macvtap integration, so that we can
> fall back
> on data copy if zero copy is unavailable?
> How hard would it be to basically link the mp and macvtap modules
> together to get us this functionality? Anyone? 

The simple integration is using macvtap + xiaohui's vhost asycn io
patch. I can make a try for TX only.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 17:02                 ` Shirley Ma
@ 2010-09-14 18:27                   ` Michael S. Tsirkin
  2010-09-14 18:49                     ` Shirley Ma
  0 siblings, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-14 18:27 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, Sep 14, 2010 at 10:02:25AM -0700, Shirley Ma wrote:
> On Tue, 2010-09-14 at 18:29 +0200, Michael S. Tsirkin wrote:
> > Precisely. This is what the patch from Xin Xiaohui does.  That code
> > already seems to do most of what you are trying to do, right?
> 
> I thought host pins guest kernel buffer pages was good enough for TX
> thought I didn't look up xiaohui's vhost asycn io patch in details.

As others said, the harder issues for TX are in determining that it's safe
to unpin the memory, and how much memory is it safe to pin to beging
with.  For RX we have some more complexity.

> What's the performance data Xiaohui got from using kiocb? I haven't seen
> any performance number from him yet.
> 
> > The main thing missing seems to be macvtap integration, so that we can
> > fall back
> > on data copy if zero copy is unavailable?
> > How hard would it be to basically link the mp and macvtap modules
> > together to get us this functionality? Anyone? 
> 
> The simple integration is using macvtap + xiaohui's vhost asycn io
> patch. I can make a try for TX only.
> 
> Thanks
> Shirley

Well it's up to you of course, but this is not what I would try:
if you look at the
patchset vhost changes is not the largest part of it,
so this sounds a bit like effort duplication.

TX only is also much less interesting than full zero copy.

I think that you should be able to simply combine
the two drivers together, add an ioctl to
enable/disable zero copy mode of operation.

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 18:27                   ` Michael S. Tsirkin
@ 2010-09-14 18:49                     ` Shirley Ma
  2010-09-14 19:01                       ` Michael S. Tsirkin
  0 siblings, 1 reply; 47+ messages in thread
From: Shirley Ma @ 2010-09-14 18:49 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, 2010-09-14 at 20:27 +0200, Michael S. Tsirkin wrote:
> As others said, the harder issues for TX are in determining that it's
> safe
> to unpin the memory, and how much memory is it safe to pin to beging
> with.  For RX we have some more complexity.

I think unpin the memory is in kfree_skb() whenever the last reference
is gone for TX. What we discussed about here is when/how vhost get
notified to update ring buffer descriptors. Do I misunderstand something
here? 

> Well it's up to you of course, but this is not what I would try:
> if you look at the
> patchset vhost changes is not the largest part of it,
> so this sounds a bit like effort duplication.
> 
> TX only is also much less interesting than full zero copy.

It's not true. From the performance, TX only has gained big improvement.
We need to identify how much gain from TX zero copy, and how much gain
from RX zero copy.

> I think that you should be able to simply combine
> the two drivers together, add an ioctl to
> enable/disable zero copy mode of operation. 

That could work. But what's the purpose to have two drivers if one
driver can handle it?

Thanks
Shirley



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 18:49                     ` Shirley Ma
@ 2010-09-14 19:01                       ` Michael S. Tsirkin
  2010-09-14 19:20                         ` Shirley Ma
  2010-09-14 19:36                         ` Shirley Ma
  0 siblings, 2 replies; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-14 19:01 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, Sep 14, 2010 at 11:49:03AM -0700, Shirley Ma wrote:
> On Tue, 2010-09-14 at 20:27 +0200, Michael S. Tsirkin wrote:
> > As others said, the harder issues for TX are in determining that it's
> > safe
> > to unpin the memory, and how much memory is it safe to pin to beging
> > with.  For RX we have some more complexity.
> 
> I think unpin the memory is in kfree_skb() whenever the last reference
> is gone for TX. What we discussed about here is when/how vhost get
> notified to update ring buffer descriptors. Do I misunderstand something
> here? 

Right, that's a better way to put it.

> > Well it's up to you of course, but this is not what I would try:
> > if you look at the
> > patchset vhost changes is not the largest part of it,
> > so this sounds a bit like effort duplication.
> > 
> > TX only is also much less interesting than full zero copy.
> 
> It's not true. From the performance, TX only has gained big improvement.
> We need to identify how much gain from TX zero copy, and how much gain
> from RX zero copy.

I was speaking from the code point of view:
since we'll want both TX and RX eventually it's nice to
see that some thought was given to RX even if we only merge
TX as a first step.

>From the product POV, RX is already slower (more interrupts, etc)
than TX so speeding it up might be more important,
but I agree, every bit helps.

> > I think that you should be able to simply combine
> > the two drivers together, add an ioctl to
> > enable/disable zero copy mode of operation. 
> 
> That could work. But what's the purpose to have two drivers if one
> driver can handle it?
> 
> Thanks
> Shirley

This was just an idea: I thought it's a good way for people interested
in this zero copy thing to combine forces and avoid making
the same mistakes, but it's not a must of course.

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 19:01                       ` Michael S. Tsirkin
@ 2010-09-14 19:20                         ` Shirley Ma
  2010-09-15  5:31                           ` Michael S. Tsirkin
  2010-09-14 19:36                         ` Shirley Ma
  1 sibling, 1 reply; 47+ messages in thread
From: Shirley Ma @ 2010-09-14 19:20 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, 2010-09-14 at 21:01 +0200, Michael S. Tsirkin wrote:
> On Tue, Sep 14, 2010 at 11:49:03AM -0700, Shirley Ma wrote:
> > On Tue, 2010-09-14 at 20:27 +0200, Michael S. Tsirkin wrote:
> > > As others said, the harder issues for TX are in determining that
> it's
> > > safe
> > > to unpin the memory, and how much memory is it safe to pin to
> beging
> > > with.  For RX we have some more complexity.
> > 
> > I think unpin the memory is in kfree_skb() whenever the last
> reference
> > is gone for TX. What we discussed about here is when/how vhost get
> > notified to update ring buffer descriptors. Do I misunderstand
> something
> > here? 
> 
> Right, that's a better way to put it. 

That's how this macvtap patch did. For how much pinned pages,it is
limited by sk_wmem_alloc size in this patch.

thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 19:01                       ` Michael S. Tsirkin
  2010-09-14 19:20                         ` Shirley Ma
@ 2010-09-14 19:36                         ` Shirley Ma
  2010-09-15  5:12                           ` Michael S. Tsirkin
  1 sibling, 1 reply; 47+ messages in thread
From: Shirley Ma @ 2010-09-14 19:36 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, 2010-09-14 at 21:01 +0200, Michael S. Tsirkin wrote:
> > > I think that you should be able to simply combine
> > > the two drivers together, add an ioctl to
> > > enable/disable zero copy mode of operation. 
> > 
> > That could work. But what's the purpose to have two drivers if one
> > driver can handle it?
> > 
> > Thanks
> > Shirley
> 
> This was just an idea: I thought it's a good way for people interested
> in this zero copy thing to combine forces and avoid making
> the same mistakes, but it's not a must of course. 

Ok, I will make a simple patch by reusing Xiaohui's some vhost code on
handling vhost_add_used_and_signal() to see any performance changes.

The interesting thing here when I run 32 instances netperf/netserver I
didn't see any issue w/i this patch.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 15:05       ` Shirley Ma
  2010-09-14 15:21         ` Arnd Bergmann
@ 2010-09-15  1:50         ` Xin, Xiaohui
  2010-09-15  2:40           ` Shirley Ma
  1 sibling, 1 reply; 47+ messages in thread
From: Xin, Xiaohui @ 2010-09-15  1:50 UTC (permalink / raw)
  To: Shirley Ma, Avi Kivity; +Cc: David Miller, arnd, mst, netdev, kvm, linux-kernel

>From: Shirley Ma [mailto:mashirle@us.ibm.com]
>Sent: Tuesday, September 14, 2010 11:05 PM
>To: Avi Kivity
>Cc: David Miller; arnd@arndb.de; mst@redhat.com; Xin, Xiaohui; netdev@vger.kernel.org;
>kvm@vger.kernel.org; linux-kernel@vger.kernel.org
>Subject: Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
>
>On Tue, 2010-09-14 at 11:12 +0200, Avi Kivity wrote:
>> >> +            base = (unsigned long)from->iov_base + offset1;
>> >> +            size = ((base&  ~PAGE_MASK) + len + ~PAGE_MASK)>>
>> PAGE_SHIFT;
>> >> +            num_pages = get_user_pages_fast(base, size,
>> 0,&page[i]);
>> >> +            if ((num_pages != size) ||
>> >> +                (num_pages>  MAX_SKB_FRAGS -
>> skb_shinfo(skb)->nr_frags))
>> >> +                    /* put_page is in skb free */
>> >> +                    return -EFAULT;
>> > What keeps the user from writing to these pages in it's address
>> space
>> > after the write call returns?
>> >
>> > A write() return of success means:
>> >
>> >       "I wrote what you gave to me"
>> >
>> > not
>> >
>> >       "I wrote what you gave to me, oh and BTW don't touch these
>> >           pages for a while."
>> >
>> > In fact "a while" isn't even defined in any way, as there is no way
>> > for the write() invoker to know when the networking card is done
>> with
>> > those pages.
>>
>> That's what io_submit() is for.  Then io_getevents() tells you what
>> "a
>> while" actually was.
>
>This macvtap zero copy uses iov buffers from vhost ring, which is
>allocated from guest kernel. In host kernel, vhost calls macvtap
>sendmsg. macvtap sendmsg calls get_user_pages_fast to pin these buffers'
>pages for zero copy.
>
>The patch is relying on how vhost handle these buffers. I need to look
>at vhost code (qemu) first for addressing the questions here.
>
>Thanks
>Shirley

I think what David said is what we have thought before in mp device.
Since we are not sure the exact time the tx buffer was wrote though DMA operation.
But the deadline is when the tx buffer was freed. So we only notify the vhost stuff
about the write when tx buffer freed. But the deadline is maybe too late for performance.

Thanks
Xiaohui 


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 15:21         ` Arnd Bergmann
  2010-09-14 15:22           ` Michael S. Tsirkin
@ 2010-09-15  1:56           ` Xin, Xiaohui
  1 sibling, 0 replies; 47+ messages in thread
From: Xin, Xiaohui @ 2010-09-15  1:56 UTC (permalink / raw)
  To: Arnd Bergmann, Shirley Ma
  Cc: Avi Kivity, David Miller, mst, netdev, kvm, linux-kernel

>From: Arnd Bergmann [mailto:arnd@arndb.de]
>Sent: Tuesday, September 14, 2010 11:21 PM
>To: Shirley Ma
>Cc: Avi Kivity; David Miller; mst@redhat.com; Xin, Xiaohui; netdev@vger.kernel.org;
>kvm@vger.kernel.org; linux-kernel@vger.kernel.org
>Subject: Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
>
>On Tuesday 14 September 2010, Shirley Ma wrote:
>> On Tue, 2010-09-14 at 11:12 +0200, Avi Kivity wrote:
>>
>> > That's what io_submit() is for.  Then io_getevents() tells you what
>> > "a
>> > while" actually was.
>>
>> This macvtap zero copy uses iov buffers from vhost ring, which is
>> allocated from guest kernel. In host kernel, vhost calls macvtap
>> sendmsg. macvtap sendmsg calls get_user_pages_fast to pin these buffers'
>> pages for zero copy.
>>
>> The patch is relying on how vhost handle these buffers. I need to look
>> at vhost code (qemu) first for addressing the questions here.
>
>I guess the best solution would be to make macvtap_aio_write return
>-EIOCBQUEUED when a packet gets passed down to the adapter, and
>call aio_complete when the adapter is done with it.
>
>This would change the regular behavior of macvtap into a model where
>every write on the file blocks until the packet has left the machine,
>which gives us better flow control, but does slow down the traffic
>when we only put one packet at a time into the queue.
>
>It also allows the user to call io_submit instead of write in order
>to do an asynchronous submission as Avi was suggesting.
>

But currently, this patch is communicated with vhost-net, which is almost
in the kernel side. If it uses aio stuff, it should be communicate with user
space Backend. 
 
>	Arnd

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15  1:50         ` Xin, Xiaohui
@ 2010-09-15  2:40           ` Shirley Ma
  2010-09-15  2:55             ` Xin, Xiaohui
  2010-09-15  5:27             ` Michael S. Tsirkin
  0 siblings, 2 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-15  2:40 UTC (permalink / raw)
  To: Xin, Xiaohui
  Cc: Avi Kivity, David Miller, arnd, mst, netdev, kvm, linux-kernel

On Wed, 2010-09-15 at 09:50 +0800, Xin, Xiaohui wrote:
> I think what David said is what we have thought before in mp device.
> Since we are not sure the exact time the tx buffer was wrote though
> DMA operation.
> But the deadline is when the tx buffer was freed. So we only notify
> the vhost stuff
> about the write when tx buffer freed. But the deadline is maybe too
> late for performance.

Have you tried it? If so what's the performance penalty you have seen by
notifying vhost when tx buffer freed?

I am thinking to have a callback in skb destructor,
vhost_add_used_and_signal gets updated when skb is actually freed, vhost
vq & head need to be passed to the callback. This might requires vhost
ring size is at least as big as the lower device driver. 

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 16:29               ` Michael S. Tsirkin
  2010-09-14 17:02                 ` Shirley Ma
@ 2010-09-15  2:46                 ` Xin, Xiaohui
  2010-09-15  9:58                   ` Michael S. Tsirkin
  1 sibling, 1 reply; 47+ messages in thread
From: Xin, Xiaohui @ 2010-09-15  2:46 UTC (permalink / raw)
  To: Michael S. Tsirkin, Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, David Miller, netdev, kvm, linux-kernel

>From: Michael S. Tsirkin [mailto:mst@redhat.com]
>Sent: Wednesday, September 15, 2010 12:30 AM
>To: Shirley Ma
>Cc: Arnd Bergmann; Avi Kivity; Xin, Xiaohui; David Miller; netdev@vger.kernel.org;
>kvm@vger.kernel.org; linux-kernel@vger.kernel.org
>Subject: Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
>
>On Tue, Sep 14, 2010 at 09:00:25AM -0700, Shirley Ma wrote:
>> On Tue, 2010-09-14 at 17:22 +0200, Michael S. Tsirkin wrote:
>> > I would expect this to hurt performance significantly.
>> > We could do this for asynchronous requests only to avoid the
>> > slowdown.
>>
>> Is kiocb in sendmsg helpful here? It is not used now.
>>
>> Shirley
>
>Precisely. This is what the patch from Xin Xiaohui does.  That code
>already seems to do most of what you are trying to do, right?
>
>The main thing missing seems to be macvtap integration, so that we can fall back
>on data copy if zero copy is unavailable?
>How hard would it be to basically link the mp and macvtap modules
>together to get us this functionality? Anyone?
>
Michael,
Is to support macvtap with zero-copy through mp device the functionality
you mentioned above?
Before Shirley Ma has suggested to move the zero-copy functionality into
tun/tap device or macvtap device. How do you think about that? I suspect
there will be a lot of duplicate code in that three drivers except we can extract
code of zero-copy into kernel APIs and vhost APIs.
Do you think that's worth to do and help current process which is blocked too
long than I expected?

>
>--
>MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15  2:40           ` Shirley Ma
@ 2010-09-15  2:55             ` Xin, Xiaohui
  2010-09-15  5:27             ` Michael S. Tsirkin
  1 sibling, 0 replies; 47+ messages in thread
From: Xin, Xiaohui @ 2010-09-15  2:55 UTC (permalink / raw)
  To: Shirley Ma; +Cc: Avi Kivity, David Miller, arnd, mst, netdev, kvm, linux-kernel

>From: Shirley Ma [mailto:mashirle@us.ibm.com]
>Sent: Wednesday, September 15, 2010 10:41 AM
>To: Xin, Xiaohui
>Cc: Avi Kivity; David Miller; arnd@arndb.de; mst@redhat.com; netdev@vger.kernel.org;
>kvm@vger.kernel.org; linux-kernel@vger.kernel.org
>Subject: RE: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
>
>On Wed, 2010-09-15 at 09:50 +0800, Xin, Xiaohui wrote:
>> I think what David said is what we have thought before in mp device.
>> Since we are not sure the exact time the tx buffer was wrote though
>> DMA operation.
>> But the deadline is when the tx buffer was freed. So we only notify
>> the vhost stuff
>> about the write when tx buffer freed. But the deadline is maybe too
>> late for performance.
>
>Have you tried it? If so what's the performance penalty you have seen by
>notifying vhost when tx buffer freed?
>

We did not try it before, as we cared RX side more.

>I am thinking to have a callback in skb destructor,
>vhost_add_used_and_signal gets updated when skb is actually freed, vhost
>vq & head need to be passed to the callback. This might requires vhost
>ring size is at least as big as the lower device driver.
>

That's almost the same what we have done except we use destructor_arg and
another callback..

>Thanks
>Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 19:36                         ` Shirley Ma
@ 2010-09-15  5:12                           ` Michael S. Tsirkin
  2010-09-15  6:21                             ` Shirley Ma
  0 siblings, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-15  5:12 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, Sep 14, 2010 at 12:36:23PM -0700, Shirley Ma wrote:
> On Tue, 2010-09-14 at 21:01 +0200, Michael S. Tsirkin wrote:
> > > > I think that you should be able to simply combine
> > > > the two drivers together, add an ioctl to
> > > > enable/disable zero copy mode of operation. 
> > > 
> > > That could work. But what's the purpose to have two drivers if one
> > > driver can handle it?
> > > 
> > > Thanks
> > > Shirley
> > 
> > This was just an idea: I thought it's a good way for people interested
> > in this zero copy thing to combine forces and avoid making
> > the same mistakes, but it's not a must of course. 
> 
> Ok, I will make a simple patch by reusing Xiaohui's some vhost code on
> handling vhost_add_used_and_signal() to see any performance changes.
> 
> The interesting thing here when I run 32 instances netperf/netserver I
> didn't see any issue w/i this patch.
> 
> Thanks
> Shirley

Yes, I agree this patch is useful for demo purposes:
simple, and shows what kind of performance gains
we can expect for TX.

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15  2:40           ` Shirley Ma
  2010-09-15  2:55             ` Xin, Xiaohui
@ 2010-09-15  5:27             ` Michael S. Tsirkin
  2010-09-15  6:17               ` Shirley Ma
  1 sibling, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-15  5:27 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Xin, Xiaohui, Avi Kivity, David Miller, arnd, netdev, kvm, linux-kernel

On Tue, Sep 14, 2010 at 07:40:52PM -0700, Shirley Ma wrote:
> On Wed, 2010-09-15 at 09:50 +0800, Xin, Xiaohui wrote:
> > I think what David said is what we have thought before in mp device.
> > Since we are not sure the exact time the tx buffer was wrote though
> > DMA operation.
> > But the deadline is when the tx buffer was freed. So we only notify
> > the vhost stuff
> > about the write when tx buffer freed. But the deadline is maybe too
> > late for performance.
> 
> Have you tried it? If so what's the performance penalty you have seen by
> notifying vhost when tx buffer freed?
> 
> I am thinking to have a callback in skb destructor,
> vhost_add_used_and_signal gets updated when skb is actually freed, vhost
> vq & head need to be passed to the callback. This might requires vhost
> ring size is at least as big as the lower device driver. 
> 
> Thanks
> Shirley

For some of the issues, try following the discussion around
net: af_packet: don't call tpacket_destruct_skb() until the skb is sent
out.

Summary: it's difficult to do correctly generally. Limiting ourselves
to transmit on specific devices might make it possible.

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-14 19:20                         ` Shirley Ma
@ 2010-09-15  5:31                           ` Michael S. Tsirkin
  0 siblings, 0 replies; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-15  5:31 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, Sep 14, 2010 at 12:20:29PM -0700, Shirley Ma wrote:
> On Tue, 2010-09-14 at 21:01 +0200, Michael S. Tsirkin wrote:
> > On Tue, Sep 14, 2010 at 11:49:03AM -0700, Shirley Ma wrote:
> > > On Tue, 2010-09-14 at 20:27 +0200, Michael S. Tsirkin wrote:
> > > > As others said, the harder issues for TX are in determining that
> > it's
> > > > safe
> > > > to unpin the memory, and how much memory is it safe to pin to
> > beging
> > > > with.  For RX we have some more complexity.
> > > 
> > > I think unpin the memory is in kfree_skb() whenever the last
> > reference
> > > is gone for TX. What we discussed about here is when/how vhost get
> > > notified to update ring buffer descriptors. Do I misunderstand
> > something
> > > here? 
> > 
> > Right, that's a better way to put it. 
> 
> That's how this macvtap patch did. For how much pinned pages,it is
> limited by sk_wmem_alloc size in this patch.
> 
> thanks
> Shirley

Except that you seem to pin full pages but account sub-page size
in wmem.

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15  5:27             ` Michael S. Tsirkin
@ 2010-09-15  6:17               ` Shirley Ma
  0 siblings, 0 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-15  6:17 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Xin, Xiaohui, Avi Kivity, David Miller, arnd, netdev, kvm, linux-kernel

On Wed, 2010-09-15 at 07:27 +0200, Michael S. Tsirkin wrote:
> For some of the issues, try following the discussion around
> net: af_packet: don't call tpacket_destruct_skb() until the skb is
> sent
> out.
> 
> Summary: it's difficult to do correctly generally. Limiting ourselves
> to transmit on specific devices might make it possible.

Thanks for the tips.

Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15  5:12                           ` Michael S. Tsirkin
@ 2010-09-15  6:21                             ` Shirley Ma
  2010-09-15 10:10                               ` Michael S. Tsirkin
  0 siblings, 1 reply; 47+ messages in thread
From: Shirley Ma @ 2010-09-15  6:21 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, 2010-09-15 at 07:12 +0200, Michael S. Tsirkin wrote:
> Yes, I agree this patch is useful for demo purposes:
> simple, and shows what kind of performance gains
> we can expect for TX. 

Any other issue you can see in this patch beside vhost descriptors
update?  Don't you think once I address vhost_add_used_and_signal update
issue, it is a simple and complete patch for macvtap TX zero copy?

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15  2:46                 ` Xin, Xiaohui
@ 2010-09-15  9:58                   ` Michael S. Tsirkin
  2010-09-16  8:18                     ` Xin, Xiaohui
  0 siblings, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-15  9:58 UTC (permalink / raw)
  To: Xin, Xiaohui
  Cc: Shirley Ma, Arnd Bergmann, Avi Kivity, David Miller, netdev, kvm,
	linux-kernel

On Wed, Sep 15, 2010 at 10:46:02AM +0800, Xin, Xiaohui wrote:
> >From: Michael S. Tsirkin [mailto:mst@redhat.com]
> >Sent: Wednesday, September 15, 2010 12:30 AM
> >To: Shirley Ma
> >Cc: Arnd Bergmann; Avi Kivity; Xin, Xiaohui; David Miller; netdev@vger.kernel.org;
> >kvm@vger.kernel.org; linux-kernel@vger.kernel.org
> >Subject: Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
> >
> >On Tue, Sep 14, 2010 at 09:00:25AM -0700, Shirley Ma wrote:
> >> On Tue, 2010-09-14 at 17:22 +0200, Michael S. Tsirkin wrote:
> >> > I would expect this to hurt performance significantly.
> >> > We could do this for asynchronous requests only to avoid the
> >> > slowdown.
> >>
> >> Is kiocb in sendmsg helpful here? It is not used now.
> >>
> >> Shirley
> >
> >Precisely. This is what the patch from Xin Xiaohui does.  That code
> >already seems to do most of what you are trying to do, right?
> >
> >The main thing missing seems to be macvtap integration, so that we can fall back
> >on data copy if zero copy is unavailable?
> >How hard would it be to basically link the mp and macvtap modules
> >together to get us this functionality? Anyone?
> >
> Michael,
> Is to support macvtap with zero-copy through mp device the functionality
> you mentioned above?

I have trouble parsing the above question.  At some point Arnd suggested
that the mp device functionality would fit nicely as part of the macvtap
driver.  It seems to make sense superficially, the advantage if it
worked would be that we would get zero copy (mostly) transparently.

Do you agree with this goal?

> Before Shirley Ma has suggested to move the zero-copy functionality into
> tun/tap device or macvtap device. How do you think about that?
> I suspect
> there will be a lot of duplicate code in that three drivers except we can extract
> code of zero-copy into kernel APIs and vhost APIs.


tap would be very hard at this point as it does not bind to a device.
macvtap might work, we mainly need to figure out a way to detect that
device can do zero copy so the right mode is used.  I think a first step
could be to simply link mp code into macvtap module, pass necessary
ioctls on, then move some code around as necessary.  This might get rid
of code duplication nicely.


> Do you think that's worth to do and help current process which is blocked too
> long than I expected?

I think it's nice to have.

And if done hopefully this will get the folk working on the macvtap
driver to review the code, which will help find all issues faster.

I think if you post some performance numbers,
this will also help get people excited and looking at the code.

I also don't see the process as completely blocked, each review round points
out more issues: we aren't going back and forth changing
same lines again and again, are we?

One thing that might help is increase the frequency of updates,
try sending them out sooner.
On the other hand 10 new patches each revision is a lot:
if there is a part of patchset that has stabilised you can split it out,
post once and keep posting the changing part separately.

I hope these suggestions help.

> >
> >--
> >MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15  6:21                             ` Shirley Ma
@ 2010-09-15 10:10                               ` Michael S. Tsirkin
  2010-09-15 14:52                                 ` Shirley Ma
  0 siblings, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-15 10:10 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, Sep 14, 2010 at 11:21:15PM -0700, Shirley Ma wrote:
> On Wed, 2010-09-15 at 07:12 +0200, Michael S. Tsirkin wrote:
> > Yes, I agree this patch is useful for demo purposes:
> > simple, and shows what kind of performance gains
> > we can expect for TX. 
> 
> Any other issue you can see in this patch beside vhost descriptors
> update?

Another issue is that macvtap can be bound to almost
anything, including e.g. a tap device or a bridge,
which might hang on to skb fragments for unlimited time.
Zero copy TX won't easily work there.
I can imagine either somehow triggering a data copy after the
fact (hard), or detecting such devices and avoiding
zero copy (unfortunate for guest to guest, and drivers
will need tuning).

>  Don't you think once I address vhost_add_used_and_signal update
> issue, it is a simple and complete patch for macvtap TX zero copy?
> 
> Thanks
> Shirley

I like the fact that the patch is simple. Unfortunately
I suspect it'll stop being simple by the time it's complete :)

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15 10:10                               ` Michael S. Tsirkin
@ 2010-09-15 14:52                                 ` Shirley Ma
  2010-09-15 15:04                                   ` Michael S. Tsirkin
  2010-09-29  3:24                                   ` Shirley Ma
  0 siblings, 2 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-15 14:52 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, 2010-09-15 at 12:10 +0200, Michael S. Tsirkin wrote:
> Another issue is that macvtap can be bound to almost
> anything, including e.g. a tap device or a bridge,
> which might hang on to skb fragments for unlimited time.
> Zero copy TX won't easily work there.
> I can imagine either somehow triggering a data copy after the
> fact (hard), or detecting such devices and avoiding
> zero copy (unfortunate for guest to guest, and drivers
> will need tuning).

So far macvtap zero copy patch is limited to lower devices supports high
memory DMA, it doesn't apply to a tap device or a bridge.

> >  Don't you think once I address vhost_add_used_and_signal update
> > issue, it is a simple and complete patch for macvtap TX zero copy?
> > 
> > Thanks
> > Shirley
> 
> I like the fact that the patch is simple. Unfortunately
> I suspect it'll stop being simple by the time it's complete :) 

I can make a try. :)

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15 14:52                                 ` Shirley Ma
@ 2010-09-15 15:04                                   ` Michael S. Tsirkin
  2010-09-15 15:39                                     ` Michael S. Tsirkin
  2010-09-29  3:24                                   ` Shirley Ma
  1 sibling, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-15 15:04 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, Sep 15, 2010 at 07:52:34AM -0700, Shirley Ma wrote:
> On Wed, 2010-09-15 at 12:10 +0200, Michael S. Tsirkin wrote:
> > Another issue is that macvtap can be bound to almost
> > anything, including e.g. a tap device or a bridge,
> > which might hang on to skb fragments for unlimited time.
> > Zero copy TX won't easily work there.
> > I can imagine either somehow triggering a data copy after the
> > fact (hard), or detecting such devices and avoiding
> > zero copy (unfortunate for guest to guest, and drivers
> > will need tuning).
> 
> So far macvtap zero copy patch is limited to lower devices supports high
> memory DMA, it doesn't apply to a tap device or a bridge.

Okay. What about macvtap in bridge mode?

> > >  Don't you think once I address vhost_add_used_and_signal update
> > > issue, it is a simple and complete patch for macvtap TX zero copy?
> > > 
> > > Thanks
> > > Shirley
> > 
> > I like the fact that the patch is simple. Unfortunately
> > I suspect it'll stop being simple by the time it's complete :) 
> 
> I can make a try. :)
> 
> Thanks
> Shirley

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15 15:04                                   ` Michael S. Tsirkin
@ 2010-09-15 15:39                                     ` Michael S. Tsirkin
  2010-09-15 17:00                                       ` Shirley Ma
  0 siblings, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-15 15:39 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, Sep 15, 2010 at 05:04:12PM +0200, Michael S. Tsirkin wrote:
> On Wed, Sep 15, 2010 at 07:52:34AM -0700, Shirley Ma wrote:
> > On Wed, 2010-09-15 at 12:10 +0200, Michael S. Tsirkin wrote:
> > > Another issue is that macvtap can be bound to almost
> > > anything, including e.g. a tap device or a bridge,
> > > which might hang on to skb fragments for unlimited time.
> > > Zero copy TX won't easily work there.
> > > I can imagine either somehow triggering a data copy after the
> > > fact (hard), or detecting such devices and avoiding
> > > zero copy (unfortunate for guest to guest, and drivers
> > > will need tuning).
> > 
> > So far macvtap zero copy patch is limited to lower devices supports high
> > memory DMA, it doesn't apply to a tap device or a bridge.
> 
> Okay. What about macvtap in bridge mode?

In fact, I rechecked: both bridge and loopback have NETIF_F_HIGHDMA set.
So maybe we should check NETIF_F_NETNS_LOCAL ...

macvtap in bridged mode is interesting as well.

> > > >  Don't you think once I address vhost_add_used_and_signal update
> > > > issue, it is a simple and complete patch for macvtap TX zero copy?
> > > > 
> > > > Thanks
> > > > Shirley
> > > 
> > > I like the fact that the patch is simple. Unfortunately
> > > I suspect it'll stop being simple by the time it's complete :) 
> > 
> > I can make a try. :)
> > 
> > Thanks
> > Shirley

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15 15:39                                     ` Michael S. Tsirkin
@ 2010-09-15 17:00                                       ` Shirley Ma
  2010-09-15 17:30                                         ` Michael S. Tsirkin
  0 siblings, 1 reply; 47+ messages in thread
From: Shirley Ma @ 2010-09-15 17:00 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, 2010-09-15 at 17:39 +0200, Michael S. Tsirkin wrote:
> In fact, I rechecked: both bridge and loopback have NETIF_F_HIGHDMA
> set.
> So maybe we should check NETIF_F_NETNS_LOCAL ...
> 
> macvtap in bridged mode is interesting as well. 

I found that too, just wondered which flag to use is better. :)

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15 17:00                                       ` Shirley Ma
@ 2010-09-15 17:30                                         ` Michael S. Tsirkin
  2010-09-15 18:48                                           ` Shirley Ma
  0 siblings, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-15 17:30 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, Sep 15, 2010 at 10:00:04AM -0700, Shirley Ma wrote:
> On Wed, 2010-09-15 at 17:39 +0200, Michael S. Tsirkin wrote:
> > In fact, I rechecked: both bridge and loopback have NETIF_F_HIGHDMA
> > set.
> > So maybe we should check NETIF_F_NETNS_LOCAL ...
> > 
> > macvtap in bridged mode is interesting as well. 
> 
> I found that too, just wondered which flag to use is better. :)
> 
> Thanks
> Shirley

At some level NETIF_F_NETNS_LOCAL makes sense: local packets
can get anywhere. OTOH one wonders whether there might be other
issues, e.g. in theory devices could hang on to frag pages
just by doing get_page.  There might be other issues.
Maybe we are better off white-listing known-good drivers
with a new flag?

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15 17:30                                         ` Michael S. Tsirkin
@ 2010-09-15 18:48                                           ` Shirley Ma
  0 siblings, 0 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-15 18:48 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, 2010-09-15 at 19:30 +0200, Michael S. Tsirkin wrote:
> At some level NETIF_F_NETNS_LOCAL makes sense: local packets
> can get anywhere. OTOH one wonders whether there might be other
> issues, e.g. in theory devices could hang on to frag pages
> just by doing get_page.  There might be other issues.
> Maybe we are better off white-listing known-good drivers
> with a new flag? 

Sounds reasonable to me if there is no such a flag to be easily used.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* RE: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15  9:58                   ` Michael S. Tsirkin
@ 2010-09-16  8:18                     ` Xin, Xiaohui
  2010-09-16 10:02                       ` Michael S. Tsirkin
  0 siblings, 1 reply; 47+ messages in thread
From: Xin, Xiaohui @ 2010-09-16  8:18 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Shirley Ma, Arnd Bergmann, Avi Kivity, David Miller, netdev, kvm,
	linux-kernel

>From: Michael S. Tsirkin [mailto:mst@redhat.com]
>Sent: Wednesday, September 15, 2010 5:59 PM
>To: Xin, Xiaohui
>Cc: Shirley Ma; Arnd Bergmann; Avi Kivity; David Miller; netdev@vger.kernel.org;
>kvm@vger.kernel.org; linux-kernel@vger.kernel.org
>Subject: Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
>
>On Wed, Sep 15, 2010 at 10:46:02AM +0800, Xin, Xiaohui wrote:
>> >From: Michael S. Tsirkin [mailto:mst@redhat.com]
>> >Sent: Wednesday, September 15, 2010 12:30 AM
>> >To: Shirley Ma
>> >Cc: Arnd Bergmann; Avi Kivity; Xin, Xiaohui; David Miller; netdev@vger.kernel.org;
>> >kvm@vger.kernel.org; linux-kernel@vger.kernel.org
>> >Subject: Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
>> >
>> >On Tue, Sep 14, 2010 at 09:00:25AM -0700, Shirley Ma wrote:
>> >> On Tue, 2010-09-14 at 17:22 +0200, Michael S. Tsirkin wrote:
>> >> > I would expect this to hurt performance significantly.
>> >> > We could do this for asynchronous requests only to avoid the
>> >> > slowdown.
>> >>
>> >> Is kiocb in sendmsg helpful here? It is not used now.
>> >>
>> >> Shirley
>> >
>> >Precisely. This is what the patch from Xin Xiaohui does.  That code
>> >already seems to do most of what you are trying to do, right?
>> >
>> >The main thing missing seems to be macvtap integration, so that we can fall back
>> >on data copy if zero copy is unavailable?
>> >How hard would it be to basically link the mp and macvtap modules
>> >together to get us this functionality? Anyone?
>> >
>> Michael,
>> Is to support macvtap with zero-copy through mp device the functionality
>> you mentioned above?
>
>I have trouble parsing the above question.  At some point Arnd suggested
>that the mp device functionality would fit nicely as part of the macvtap
>driver.  It seems to make sense superficially, the advantage if it
>worked would be that we would get zero copy (mostly) transparently.
>
>Do you agree with this goal?
>

I would say yes.

>> Before Shirley Ma has suggested to move the zero-copy functionality into
>> tun/tap device or macvtap device. How do you think about that?
>> I suspect
>> there will be a lot of duplicate code in that three drivers except we can extract
>> code of zero-copy into kernel APIs and vhost APIs.
>
>
>tap would be very hard at this point as it does not bind to a device.
>macvtap might work, we mainly need to figure out a way to detect that
>device can do zero copy so the right mode is used.  I think a first step
>could be to simply link mp code into macvtap module, pass necessary
>ioctls on, then move some code around as necessary.  This might get rid
>of code duplication nicely.

I'll look into this to see how much effort would be.

>
>
>> Do you think that's worth to do and help current process which is blocked too
>> long than I expected?
>
>I think it's nice to have.
>
>And if done hopefully this will get the folk working on the macvtap
>driver to review the code, which will help find all issues faster.
>
>I think if you post some performance numbers,
>this will also help get people excited and looking at the code.
>

The performance data I have posted before is compared with raw socket on vhost-net.
But currently, the raw socket backend is removed from the qemu side.
So I can only compare with tap on vhost-net. But unfortunately, I missed something
that I even can't bring it up. I was blocked by this for a time.

>I also don't see the process as completely blocked, each review round points
>out more issues: we aren't going back and forth changing
>same lines again and again, are we?
>
>One thing that might help is increase the frequency of updates,
>try sending them out sooner.
>On the other hand 10 new patches each revision is a lot:
>if there is a part of patchset that has stabilised you can split it out,
>post once and keep posting the changing part separately.
>
>I hope these suggestions help.

Thanks, Michael!

>
>> >
>> >--
>> >MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-16  8:18                     ` Xin, Xiaohui
@ 2010-09-16 10:02                       ` Michael S. Tsirkin
  0 siblings, 0 replies; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-16 10:02 UTC (permalink / raw)
  To: Xin, Xiaohui
  Cc: Shirley Ma, Arnd Bergmann, Avi Kivity, David Miller, netdev, kvm,
	linux-kernel

On Thu, Sep 16, 2010 at 04:18:10PM +0800, Xin, Xiaohui wrote:
> >From: Michael S. Tsirkin [mailto:mst@redhat.com]
> >Sent: Wednesday, September 15, 2010 5:59 PM
> >To: Xin, Xiaohui
> >Cc: Shirley Ma; Arnd Bergmann; Avi Kivity; David Miller; netdev@vger.kernel.org;
> >kvm@vger.kernel.org; linux-kernel@vger.kernel.org
> >Subject: Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
> >
> >On Wed, Sep 15, 2010 at 10:46:02AM +0800, Xin, Xiaohui wrote:
> >> >From: Michael S. Tsirkin [mailto:mst@redhat.com]
> >> >Sent: Wednesday, September 15, 2010 12:30 AM
> >> >To: Shirley Ma
> >> >Cc: Arnd Bergmann; Avi Kivity; Xin, Xiaohui; David Miller; netdev@vger.kernel.org;
> >> >kvm@vger.kernel.org; linux-kernel@vger.kernel.org
> >> >Subject: Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
> >> >
> >> >On Tue, Sep 14, 2010 at 09:00:25AM -0700, Shirley Ma wrote:
> >> >> On Tue, 2010-09-14 at 17:22 +0200, Michael S. Tsirkin wrote:
> >> >> > I would expect this to hurt performance significantly.
> >> >> > We could do this for asynchronous requests only to avoid the
> >> >> > slowdown.
> >> >>
> >> >> Is kiocb in sendmsg helpful here? It is not used now.
> >> >>
> >> >> Shirley
> >> >
> >> >Precisely. This is what the patch from Xin Xiaohui does.  That code
> >> >already seems to do most of what you are trying to do, right?
> >> >
> >> >The main thing missing seems to be macvtap integration, so that we can fall back
> >> >on data copy if zero copy is unavailable?
> >> >How hard would it be to basically link the mp and macvtap modules
> >> >together to get us this functionality? Anyone?
> >> >
> >> Michael,
> >> Is to support macvtap with zero-copy through mp device the functionality
> >> you mentioned above?
> >
> >I have trouble parsing the above question.  At some point Arnd suggested
> >that the mp device functionality would fit nicely as part of the macvtap
> >driver.  It seems to make sense superficially, the advantage if it
> >worked would be that we would get zero copy (mostly) transparently.
> >
> >Do you agree with this goal?
> >
> 
> I would say yes.

In that case, it's a blocker for upstream merge because this change
affects userspace.

> >> Before Shirley Ma has suggested to move the zero-copy functionality into
> >> tun/tap device or macvtap device. How do you think about that?
> >> I suspect
> >> there will be a lot of duplicate code in that three drivers except we can extract
> >> code of zero-copy into kernel APIs and vhost APIs.
> >
> >
> >tap would be very hard at this point as it does not bind to a device.
> >macvtap might work, we mainly need to figure out a way to detect that
> >device can do zero copy so the right mode is used.  I think a first step
> >could be to simply link mp code into macvtap module, pass necessary
> >ioctls on, then move some code around as necessary.  This might get rid
> >of code duplication nicely.
> 
> I'll look into this to see how much effort would be.
> 
> >
> >
> >> Do you think that's worth to do and help current process which is blocked too
> >> long than I expected?
> >
> >I think it's nice to have.
> >
> >And if done hopefully this will get the folk working on the macvtap
> >driver to review the code, which will help find all issues faster.
> >
> >I think if you post some performance numbers,
> >this will also help get people excited and looking at the code.
> >
> 
> The performance data I have posted before is compared with raw socket on vhost-net.
> But currently, the raw socket backend is removed from the qemu side.
> So I can only compare with tap on vhost-net. But unfortunately, I missed something
> that I even can't bring it up. I was blocked by this for a time.

Hey, maybe you are seeing the bug that was reported recently.
Could you try tcpdump -i on the tap interface in host and ethX on guest
and tell me what you see?
If you see packet in guest but not in host, could you try
adding printks in vhost handle_tx to see whether it gets called
and if yes where it fails?

> >I also don't see the process as completely blocked, each review round points
> >out more issues: we aren't going back and forth changing
> >same lines again and again, are we?
> >
> >One thing that might help is increase the frequency of updates,
> >try sending them out sooner.
> >On the other hand 10 new patches each revision is a lot:
> >if there is a part of patchset that has stabilised you can split it out,
> >post once and keep posting the changing part separately.
> >
> >I hope these suggestions help.
> 
> Thanks, Michael!
> 
> >
> >> >
> >> >--
> >> >MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-15 14:52                                 ` Shirley Ma
  2010-09-15 15:04                                   ` Michael S. Tsirkin
@ 2010-09-29  3:24                                   ` Shirley Ma
  2010-09-29  8:16                                     ` Michael S. Tsirkin
  2010-09-29 15:14                                     ` Michael S. Tsirkin
  1 sibling, 2 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-29  3:24 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

Hello Michael,

On Wed, 2010-09-15 at 07:52 -0700, Shirley Ma wrote:
> > >  Don't you think once I address vhost_add_used_and_signal update
> > > issue, it is a simple and complete patch for macvtap TX zero copy?
> > > 
> > > Thanks
> > > Shirley
> > 
> > I like the fact that the patch is simple. Unfortunately
> > I suspect it'll stop being simple by the time it's complete :) 
> 
> I can make a try. :)

I compared several approaches for addressing the issue being raised here
on how/when to update vhost_add_used_and_signal. The simple approach I
have found is:

1. Adding completion field in struct virtqueue;
2. when it is a zero copy packet, put vhost thread wait for completion
to update vhost_add_used_and_signal;
3. passing vq from vhost to macvtap as skb destruct_arg;
4. when skb is freed for the last reference, signal vq completion

The test results show same performance as the original patch. How do you
think? If it sounds good to you. I will resubmit this reversion patch.
The patch still keeps as simple as it was before. :)

Thanks
Shirley



^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-29  3:24                                   ` Shirley Ma
@ 2010-09-29  8:16                                     ` Michael S. Tsirkin
  2010-09-29  8:28                                       ` Michael S. Tsirkin
                                                         ` (2 more replies)
  2010-09-29 15:14                                     ` Michael S. Tsirkin
  1 sibling, 3 replies; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-29  8:16 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, Sep 28, 2010 at 08:24:29PM -0700, Shirley Ma wrote:
> Hello Michael,
> 
> On Wed, 2010-09-15 at 07:52 -0700, Shirley Ma wrote:
> > > >  Don't you think once I address vhost_add_used_and_signal update
> > > > issue, it is a simple and complete patch for macvtap TX zero copy?
> > > > 
> > > > Thanks
> > > > Shirley
> > > 
> > > I like the fact that the patch is simple. Unfortunately
> > > I suspect it'll stop being simple by the time it's complete :) 
> > 
> > I can make a try. :)
> 
> I compared several approaches for addressing the issue being raised here
> on how/when to update vhost_add_used_and_signal. The simple approach I
> have found is:
> 
> 1. Adding completion field in struct virtqueue;
> 2. when it is a zero copy packet, put vhost thread wait for completion
> to update vhost_add_used_and_signal;
> 3. passing vq from vhost to macvtap as skb destruct_arg;
> 4. when skb is freed for the last reference, signal vq completion
> The test results show same performance as the original patch. How do you
> think? If it sounds good to you. I will resubmit this reversion patch.
> The patch still keeps as simple as it was before. :)
> 
> Thanks
> Shirley

If you look at dev_hard_start_xmit you will see a call
to skb_orphan_try which often calls the skb destructor.
So I suspect this is almost equivalent to your original patch,
and has the same correctness issue.

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-29  8:16                                     ` Michael S. Tsirkin
@ 2010-09-29  8:28                                       ` Michael S. Tsirkin
  2010-09-29 14:33                                         ` Shirley Ma
  2010-09-29 14:56                                         ` Shirley Ma
  2010-09-29 14:31                                       ` Shirley Ma
  2010-09-29 14:37                                       ` Shirley Ma
  2 siblings, 2 replies; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-29  8:28 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, Sep 29, 2010 at 10:16:45AM +0200, Michael S. Tsirkin wrote:
> On Tue, Sep 28, 2010 at 08:24:29PM -0700, Shirley Ma wrote:
> > Hello Michael,
> > 
> > On Wed, 2010-09-15 at 07:52 -0700, Shirley Ma wrote:
> > > > >  Don't you think once I address vhost_add_used_and_signal update
> > > > > issue, it is a simple and complete patch for macvtap TX zero copy?
> > > > > 
> > > > > Thanks
> > > > > Shirley
> > > > 
> > > > I like the fact that the patch is simple. Unfortunately
> > > > I suspect it'll stop being simple by the time it's complete :) 
> > > 
> > > I can make a try. :)
> > 
> > I compared several approaches for addressing the issue being raised here
> > on how/when to update vhost_add_used_and_signal. The simple approach I
> > have found is:
> > 
> > 1. Adding completion field in struct virtqueue;
> > 2. when it is a zero copy packet, put vhost thread wait for completion
> > to update vhost_add_used_and_signal;
> > 3. passing vq from vhost to macvtap as skb destruct_arg;
> > 4. when skb is freed for the last reference, signal vq completion
> > The test results show same performance as the original patch. How do you
> > think? If it sounds good to you. I will resubmit this reversion patch.
> > The patch still keeps as simple as it was before. :)
> > 
> > Thanks
> > Shirley
> 
> If you look at dev_hard_start_xmit you will see a call
> to skb_orphan_try which often calls the skb destructor.
> So I suspect this is almost equivalent to your original patch,
> and has the same correctness issue.

So you could try doing skb_tx(skb)->prevent_sk_orphan = 1
just to see what will happen. Might be interesting - just
make sure the device doesn't orphan the skb first thing.
I suspect lack of parallelism will result in bad throughput
esp for small messages.

Note this still won't make it correct (this has module unloading
issue, and devices might still orphan skb, clone it, or hang on to
paged data in some other way) but at least closer.

I think you should try testing with guest to external communication,
this will uncover some of these correctness issues for you.
I think netperf also has some flag to check data, might
be a good idea to use it for testing.

> -- 
> MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-29  8:16                                     ` Michael S. Tsirkin
  2010-09-29  8:28                                       ` Michael S. Tsirkin
@ 2010-09-29 14:31                                       ` Shirley Ma
  2010-09-29 14:37                                       ` Shirley Ma
  2 siblings, 0 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-29 14:31 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, 2010-09-29 at 10:16 +0200, Michael S. Tsirkin wrote:
> If you look at dev_hard_start_xmit you will see a call
> to skb_orphan_try which often calls the skb destructor.
> So I suspect this is almost equivalent to your original patch,
> and has the same correctness issue.

I forgot to mention, in skb_orphan_try, I still kept the skb->sk
reference to sk, I assign skb->sk to NULL when the DMA done.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-29  8:28                                       ` Michael S. Tsirkin
@ 2010-09-29 14:33                                         ` Shirley Ma
  2010-09-29 14:56                                         ` Shirley Ma
  1 sibling, 0 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-29 14:33 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, 2010-09-29 at 10:28 +0200, Michael S. Tsirkin wrote:
> I think you should try testing with guest to external communication,
> this will uncover some of these correctness issues for you.
> I think netperf also has some flag to check data, might
> be a good idea to use it for testing.

I always tested guest to remote host, remote host to guest. What other
external communication do you suggest here?

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-29  8:16                                     ` Michael S. Tsirkin
  2010-09-29  8:28                                       ` Michael S. Tsirkin
  2010-09-29 14:31                                       ` Shirley Ma
@ 2010-09-29 14:37                                       ` Shirley Ma
  2 siblings, 0 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-29 14:37 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, 2010-09-29 at 10:16 +0200, Michael S. Tsirkin wrote:
> > I compared several approaches for addressing the issue being raised
> here
> > on how/when to update vhost_add_used_and_signal. The simple approach
> I
> > have found is:
> > 
> > 1. Adding completion field in struct virtqueue;
> > 2. when it is a zero copy packet, put vhost thread wait for
> completion
> > to update vhost_add_used_and_signal;
> > 3. passing vq from vhost to macvtap as skb destruct_arg;
> > 4. when skb is freed for the last reference, signal vq completion
> > The test results show same performance as the original patch. How do
> you
> > think? If it sounds good to you. I will resubmit this reversion
> patch.
> > The patch still keeps as simple as it was before. :)
> > 
> > Thanks
> > Shirley
> 
> If you look at dev_hard_start_xmit you will see a call
> to skb_orphan_try which often calls the skb destructor.
> So I suspect this is almost equivalent to your original patch,
> and has the same correctness issue. 

If I didn't address this, then vhost net will wait for ever with wait
for completion :))


Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-29  8:28                                       ` Michael S. Tsirkin
  2010-09-29 14:33                                         ` Shirley Ma
@ 2010-09-29 14:56                                         ` Shirley Ma
  1 sibling, 0 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-29 14:56 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, 2010-09-29 at 10:28 +0200, Michael S. Tsirkin wrote:
> > > 1. Adding completion field in struct virtqueue;
> > > 2. when it is a zero copy packet, put vhost thread wait for
> completion
> > > to update vhost_add_used_and_signal;
> > > 3. passing vq from vhost to macvtap as skb destruct_arg;
> > > 4. when skb is freed for the last reference, signal vq completion
> > > The test results show same performance as the original patch. How
> do you
> > > think? If it sounds good to you. I will resubmit this reversion
> patch.
> > > The patch still keeps as simple as it was before. :)
> > > 
> > > Thanks
> > > Shirley
> > 
> > If you look at dev_hard_start_xmit you will see a call
> > to skb_orphan_try which often calls the skb destructor.
> > So I suspect this is almost equivalent to your original patch,
> > and has the same correctness issue.
> 
> So you could try doing skb_tx(skb)->prevent_sk_orphan = 1
> just to see what will happen. Might be interesting - just
> make sure the device doesn't orphan the skb first thing.
> I suspect lack of parallelism will result in bad throughput
> esp for small messages.
> 
> Note this still won't make it correct (this has module unloading
> issue, and devices might still orphan skb, clone it, or hang on to
> paged data in some other way) but at least closer. 

For message size smaller than 128, it still does copy. I tested some
small message size, I didn't see any regression. I will run more test to
focus on small message size between 128 - 4K.

I don't need prevent_sk_orphan since in skb_release_data for last
reference, I just need the ZEROCOPY flag from that sock to signal a
completion.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-29  3:24                                   ` Shirley Ma
  2010-09-29  8:16                                     ` Michael S. Tsirkin
@ 2010-09-29 15:14                                     ` Michael S. Tsirkin
  2010-09-29 15:23                                       ` Shirley Ma
  1 sibling, 1 reply; 47+ messages in thread
From: Michael S. Tsirkin @ 2010-09-29 15:14 UTC (permalink / raw)
  To: Shirley Ma
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Tue, Sep 28, 2010 at 08:24:29PM -0700, Shirley Ma wrote:
> Hello Michael,
> 
> On Wed, 2010-09-15 at 07:52 -0700, Shirley Ma wrote:
> > > >  Don't you think once I address vhost_add_used_and_signal update
> > > > issue, it is a simple and complete patch for macvtap TX zero copy?
> > > > 
> > > > Thanks
> > > > Shirley
> > > 
> > > I like the fact that the patch is simple. Unfortunately
> > > I suspect it'll stop being simple by the time it's complete :) 
> > 
> > I can make a try. :)
> 
> I compared several approaches for addressing the issue being raised here
> on how/when to update vhost_add_used_and_signal. The simple approach I
> have found is:
> 
> 1. Adding completion field in struct virtqueue;
> 2. when it is a zero copy packet, put vhost thread wait for completion
> to update vhost_add_used_and_signal;
> 3. passing vq from vhost to macvtap as skb destruct_arg;
> 4. when skb is freed for the last reference, signal vq completion
> 
> The test results show same performance as the original patch. How do you
> think? If it sounds good to you. I will resubmit this reversion patch.
> The patch still keeps as simple as it was before. :)
> 
> Thanks
> Shirley

I guess I just don't understand what your patch does.
If you send it, I can take a look.

-- 
MST

^ permalink raw reply	[flat|nested] 47+ messages in thread

* Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
  2010-09-29 15:14                                     ` Michael S. Tsirkin
@ 2010-09-29 15:23                                       ` Shirley Ma
  0 siblings, 0 replies; 47+ messages in thread
From: Shirley Ma @ 2010-09-29 15:23 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Arnd Bergmann, Avi Kivity, Xin, Xiaohui, David Miller, netdev,
	kvm, linux-kernel

On Wed, 2010-09-29 at 17:14 +0200, Michael S. Tsirkin wrote:
> I guess I just don't understand what your patch does.
> If you send it, I can take a look.

Ok, I will collect more performance data and send the patch.

Thanks
Shirley


^ permalink raw reply	[flat|nested] 47+ messages in thread

end of thread, other threads:[~2010-09-29 15:56 UTC | newest]

Thread overview: 47+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-09-13 20:43 [RFC PATCH 0/1] macvtap TX zero copy between guest and host kernel Shirley Ma
2010-09-13 20:47 ` RFC PATCH 1/2] macvtap: A new sock zero copy flag Shirley Ma
2010-09-13 20:48 ` [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel Shirley Ma
2010-09-14  3:17   ` David Miller
2010-09-14  9:12     ` Avi Kivity
2010-09-14 15:05       ` Shirley Ma
2010-09-14 15:21         ` Arnd Bergmann
2010-09-14 15:22           ` Michael S. Tsirkin
2010-09-14 16:00             ` Shirley Ma
2010-09-14 16:29               ` Michael S. Tsirkin
2010-09-14 17:02                 ` Shirley Ma
2010-09-14 18:27                   ` Michael S. Tsirkin
2010-09-14 18:49                     ` Shirley Ma
2010-09-14 19:01                       ` Michael S. Tsirkin
2010-09-14 19:20                         ` Shirley Ma
2010-09-15  5:31                           ` Michael S. Tsirkin
2010-09-14 19:36                         ` Shirley Ma
2010-09-15  5:12                           ` Michael S. Tsirkin
2010-09-15  6:21                             ` Shirley Ma
2010-09-15 10:10                               ` Michael S. Tsirkin
2010-09-15 14:52                                 ` Shirley Ma
2010-09-15 15:04                                   ` Michael S. Tsirkin
2010-09-15 15:39                                     ` Michael S. Tsirkin
2010-09-15 17:00                                       ` Shirley Ma
2010-09-15 17:30                                         ` Michael S. Tsirkin
2010-09-15 18:48                                           ` Shirley Ma
2010-09-29  3:24                                   ` Shirley Ma
2010-09-29  8:16                                     ` Michael S. Tsirkin
2010-09-29  8:28                                       ` Michael S. Tsirkin
2010-09-29 14:33                                         ` Shirley Ma
2010-09-29 14:56                                         ` Shirley Ma
2010-09-29 14:31                                       ` Shirley Ma
2010-09-29 14:37                                       ` Shirley Ma
2010-09-29 15:14                                     ` Michael S. Tsirkin
2010-09-29 15:23                                       ` Shirley Ma
2010-09-15  2:46                 ` Xin, Xiaohui
2010-09-15  9:58                   ` Michael S. Tsirkin
2010-09-16  8:18                     ` Xin, Xiaohui
2010-09-16 10:02                       ` Michael S. Tsirkin
2010-09-15  1:56           ` Xin, Xiaohui
2010-09-15  1:50         ` Xin, Xiaohui
2010-09-15  2:40           ` Shirley Ma
2010-09-15  2:55             ` Xin, Xiaohui
2010-09-15  5:27             ` Michael S. Tsirkin
2010-09-15  6:17               ` Shirley Ma
2010-09-14 12:05 ` [RFC PATCH 0/1] macvtap " Michael S. Tsirkin
2010-09-14 15:15   ` Shirley Ma

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.