All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor
@ 2016-10-04  9:29 Paul Durrant
  2016-10-04  9:29 ` [PATCH v2 net-next 1/7] xen-netback: separate guest side rx code into separate module Paul Durrant
                   ` (15 more replies)
  0 siblings, 16 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Paul Durrant

This series refactors the guest rx side of xen-netback:

- The code is moved into its own source module.

- The prefix variant of GSO handling is retired (since it is no longer
  in common use, and alternatives exist).

- The code is then simplified and modifications made to improve
  performance.

v2:
- Rebased onto refreshed net-next

David Vrabel (4):
  xen-netback: refactor guest rx
  xen-netback: immediately wake tx queue when guest rx queue has space
  xen-netback: process guest rx packets in batches
  xen-netback: batch copies for multiple to-guest rx packets

Paul Durrant (2):
  xen-netback: separate guest side rx code into separate module
  xen-netback: retire guest rx side prefix GSO feature

Ross Lagerwall (1):
  xen/netback: add fraglist support for to-guest rx

 drivers/net/xen-netback/Makefile    |   2 +-
 drivers/net/xen-netback/common.h    |  25 +-
 drivers/net/xen-netback/interface.c |   6 +-
 drivers/net/xen-netback/netback.c   | 754 ------------------------------------
 drivers/net/xen-netback/rx.c        | 628 ++++++++++++++++++++++++++++++
 drivers/net/xen-netback/xenbus.c    |  21 -
 6 files changed, 643 insertions(+), 793 deletions(-)
 create mode 100644 drivers/net/xen-netback/rx.c

-- 
2.1.4

^ permalink raw reply	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 1/7] xen-netback: separate guest side rx code into separate module
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04  9:29 ` Paul Durrant
                   ` (14 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Paul Durrant, Wei Liu

The netback source module has become very large and somewhat confusing.
This patch simply moves all code related to the backend to frontend (i.e
guest side rx) data-path into a separate rx source module.

This patch contains no functional change, it is code movement and
minimal changes to avoid patch style-check issues.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/Makefile  |   2 +-
 drivers/net/xen-netback/netback.c | 754 ------------------------------------
 drivers/net/xen-netback/rx.c      | 789 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 790 insertions(+), 755 deletions(-)
 create mode 100644 drivers/net/xen-netback/rx.c

diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
index 11e02be..d49798a 100644
--- a/drivers/net/xen-netback/Makefile
+++ b/drivers/net/xen-netback/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
 
-xen-netback-y := netback.o xenbus.o interface.o hash.o
+xen-netback-y := netback.o xenbus.o interface.o hash.o rx.o
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 3d0c989..47b4810 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -106,13 +106,6 @@ static void push_tx_responses(struct xenvif_queue *queue);
 
 static inline int tx_work_todo(struct xenvif_queue *queue);
 
-static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
-					     u16      id,
-					     s8       st,
-					     u16      offset,
-					     u16      size,
-					     u16      flags);
-
 static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
 				       u16 idx)
 {
@@ -155,571 +148,11 @@ static inline pending_ring_idx_t pending_index(unsigned i)
 	return i & (MAX_PENDING_REQS-1);
 }
 
-static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
-{
-	RING_IDX prod, cons;
-	struct sk_buff *skb;
-	int needed;
-
-	skb = skb_peek(&queue->rx_queue);
-	if (!skb)
-		return false;
-
-	needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
-	if (skb_is_gso(skb))
-		needed++;
-	if (skb->sw_hash)
-		needed++;
-
-	do {
-		prod = queue->rx.sring->req_prod;
-		cons = queue->rx.req_cons;
-
-		if (prod - cons >= needed)
-			return true;
-
-		queue->rx.sring->req_event = prod + 1;
-
-		/* Make sure event is visible before we check prod
-		 * again.
-		 */
-		mb();
-	} while (queue->rx.sring->req_prod != prod);
-
-	return false;
-}
-
-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&queue->rx_queue.lock, flags);
-
-	__skb_queue_tail(&queue->rx_queue, skb);
-
-	queue->rx_queue_len += skb->len;
-	if (queue->rx_queue_len > queue->rx_queue_max)
-		netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
-
-	spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
-}
-
-static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
-{
-	struct sk_buff *skb;
-
-	spin_lock_irq(&queue->rx_queue.lock);
-
-	skb = __skb_dequeue(&queue->rx_queue);
-	if (skb)
-		queue->rx_queue_len -= skb->len;
-
-	spin_unlock_irq(&queue->rx_queue.lock);
-
-	return skb;
-}
-
-static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
-{
-	spin_lock_irq(&queue->rx_queue.lock);
-
-	if (queue->rx_queue_len < queue->rx_queue_max)
-		netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
-
-	spin_unlock_irq(&queue->rx_queue.lock);
-}
-
-
-static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
-{
-	struct sk_buff *skb;
-	while ((skb = xenvif_rx_dequeue(queue)) != NULL)
-		kfree_skb(skb);
-}
-
-static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
-{
-	struct sk_buff *skb;
-
-	for(;;) {
-		skb = skb_peek(&queue->rx_queue);
-		if (!skb)
-			break;
-		if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
-			break;
-		xenvif_rx_dequeue(queue);
-		kfree_skb(skb);
-	}
-}
-
-struct netrx_pending_operations {
-	unsigned copy_prod, copy_cons;
-	unsigned meta_prod, meta_cons;
-	struct gnttab_copy *copy;
-	struct xenvif_rx_meta *meta;
-	int copy_off;
-	grant_ref_t copy_gref;
-};
-
-static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
-						 struct netrx_pending_operations *npo)
-{
-	struct xenvif_rx_meta *meta;
-	struct xen_netif_rx_request req;
-
-	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-
-	meta = npo->meta + npo->meta_prod++;
-	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
-	meta->gso_size = 0;
-	meta->size = 0;
-	meta->id = req.id;
-
-	npo->copy_off = 0;
-	npo->copy_gref = req.gref;
-
-	return meta;
-}
-
-struct gop_frag_copy {
-	struct xenvif_queue *queue;
-	struct netrx_pending_operations *npo;
-	struct xenvif_rx_meta *meta;
-	int head;
-	int gso_type;
-	int protocol;
-	int hash_present;
-
-	struct page *page;
-};
-
-static void xenvif_setup_copy_gop(unsigned long gfn,
-				  unsigned int offset,
-				  unsigned int *len,
-				  struct gop_frag_copy *info)
-{
-	struct gnttab_copy *copy_gop;
-	struct xen_page_foreign *foreign;
-	/* Convenient aliases */
-	struct xenvif_queue *queue = info->queue;
-	struct netrx_pending_operations *npo = info->npo;
-	struct page *page = info->page;
-
-	BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
-
-	if (npo->copy_off == MAX_BUFFER_OFFSET)
-		info->meta = get_next_rx_buffer(queue, npo);
-
-	if (npo->copy_off + *len > MAX_BUFFER_OFFSET)
-		*len = MAX_BUFFER_OFFSET - npo->copy_off;
-
-	copy_gop = npo->copy + npo->copy_prod++;
-	copy_gop->flags = GNTCOPY_dest_gref;
-	copy_gop->len = *len;
-
-	foreign = xen_page_foreign(page);
-	if (foreign) {
-		copy_gop->source.domid = foreign->domid;
-		copy_gop->source.u.ref = foreign->gref;
-		copy_gop->flags |= GNTCOPY_source_gref;
-	} else {
-		copy_gop->source.domid = DOMID_SELF;
-		copy_gop->source.u.gmfn = gfn;
-	}
-	copy_gop->source.offset = offset;
-
-	copy_gop->dest.domid = queue->vif->domid;
-	copy_gop->dest.offset = npo->copy_off;
-	copy_gop->dest.u.ref = npo->copy_gref;
-
-	npo->copy_off += *len;
-	info->meta->size += *len;
-
-	if (!info->head)
-		return;
-
-	/* Leave a gap for the GSO descriptor. */
-	if ((1 << info->gso_type) & queue->vif->gso_mask)
-		queue->rx.req_cons++;
-
-	/* Leave a gap for the hash extra segment. */
-	if (info->hash_present)
-		queue->rx.req_cons++;
-
-	info->head = 0; /* There must be something in this buffer now */
-}
-
-static void xenvif_gop_frag_copy_grant(unsigned long gfn,
-				       unsigned offset,
-				       unsigned int len,
-				       void *data)
-{
-	unsigned int bytes;
-
-	while (len) {
-		bytes = len;
-		xenvif_setup_copy_gop(gfn, offset, &bytes, data);
-		offset += bytes;
-		len -= bytes;
-	}
-}
-
-/*
- * Set up the grant operations for this fragment. If it's a flipping
- * interface, we also set up the unmap request from here.
- */
-static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb,
-				 struct netrx_pending_operations *npo,
-				 struct page *page, unsigned long size,
-				 unsigned long offset, int *head)
-{
-	struct gop_frag_copy info = {
-		.queue = queue,
-		.npo = npo,
-		.head = *head,
-		.gso_type = XEN_NETIF_GSO_TYPE_NONE,
-		/* xenvif_set_skb_hash() will have either set a s/w
-		 * hash or cleared the hash depending on
-		 * whether the the frontend wants a hash for this skb.
-		 */
-		.hash_present = skb->sw_hash,
-	};
-	unsigned long bytes;
-
-	if (skb_is_gso(skb)) {
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-	}
-
-	/* Data must not cross a page boundary. */
-	BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
-
-	info.meta = npo->meta + npo->meta_prod - 1;
-
-	/* Skip unused frames from start of page */
-	page += offset >> PAGE_SHIFT;
-	offset &= ~PAGE_MASK;
-
-	while (size > 0) {
-		BUG_ON(offset >= PAGE_SIZE);
-
-		bytes = PAGE_SIZE - offset;
-		if (bytes > size)
-			bytes = size;
-
-		info.page = page;
-		gnttab_foreach_grant_in_range(page, offset, bytes,
-					      xenvif_gop_frag_copy_grant,
-					      &info);
-		size -= bytes;
-		offset = 0;
-
-		/* Next page */
-		if (size) {
-			BUG_ON(!PageCompound(page));
-			page++;
-		}
-	}
-
-	*head = info.head;
-}
-
-/*
- * Prepare an SKB to be transmitted to the frontend.
- *
- * This function is responsible for allocating grant operations, meta
- * structures, etc.
- *
- * It returns the number of meta structures consumed. The number of
- * ring slots used is always equal to the number of meta slots used
- * plus the number of GSO descriptors used. Currently, we use either
- * zero GSO descriptors (for non-GSO packets) or one descriptor (for
- * frontend-side LRO).
- */
-static int xenvif_gop_skb(struct sk_buff *skb,
-			  struct netrx_pending_operations *npo,
-			  struct xenvif_queue *queue)
-{
-	struct xenvif *vif = netdev_priv(skb->dev);
-	int nr_frags = skb_shinfo(skb)->nr_frags;
-	int i;
-	struct xen_netif_rx_request req;
-	struct xenvif_rx_meta *meta;
-	unsigned char *data;
-	int head = 1;
-	int old_meta_prod;
-	int gso_type;
-
-	old_meta_prod = npo->meta_prod;
-
-	gso_type = XEN_NETIF_GSO_TYPE_NONE;
-	if (skb_is_gso(skb)) {
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-	}
-
-	/* Set up a GSO prefix descriptor, if necessary */
-	if ((1 << gso_type) & vif->gso_prefix_mask) {
-		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-		meta = npo->meta + npo->meta_prod++;
-		meta->gso_type = gso_type;
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-		meta->size = 0;
-		meta->id = req.id;
-	}
-
-	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-	meta = npo->meta + npo->meta_prod++;
-
-	if ((1 << gso_type) & vif->gso_mask) {
-		meta->gso_type = gso_type;
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-	} else {
-		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
-		meta->gso_size = 0;
-	}
-
-	meta->size = 0;
-	meta->id = req.id;
-	npo->copy_off = 0;
-	npo->copy_gref = req.gref;
-
-	data = skb->data;
-	while (data < skb_tail_pointer(skb)) {
-		unsigned int offset = offset_in_page(data);
-		unsigned int len = PAGE_SIZE - offset;
-
-		if (data + len > skb_tail_pointer(skb))
-			len = skb_tail_pointer(skb) - data;
-
-		xenvif_gop_frag_copy(queue, skb, npo,
-				     virt_to_page(data), len, offset, &head);
-		data += len;
-	}
-
-	for (i = 0; i < nr_frags; i++) {
-		xenvif_gop_frag_copy(queue, skb, npo,
-				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
-				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
-				     skb_shinfo(skb)->frags[i].page_offset,
-				     &head);
-	}
-
-	return npo->meta_prod - old_meta_prod;
-}
-
-/*
- * This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
- * used to set up the operations on the top of
- * netrx_pending_operations, which have since been done.  Check that
- * they didn't give any errors and advance over them.
- */
-static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
-			    struct netrx_pending_operations *npo)
-{
-	struct gnttab_copy     *copy_op;
-	int status = XEN_NETIF_RSP_OKAY;
-	int i;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		copy_op = npo->copy + npo->copy_cons++;
-		if (copy_op->status != GNTST_okay) {
-			netdev_dbg(vif->dev,
-				   "Bad status %d from copy to DOM%d.\n",
-				   copy_op->status, vif->domid);
-			status = XEN_NETIF_RSP_ERROR;
-		}
-	}
-
-	return status;
-}
-
-static void xenvif_add_frag_responses(struct xenvif_queue *queue, int status,
-				      struct xenvif_rx_meta *meta,
-				      int nr_meta_slots)
-{
-	int i;
-	unsigned long offset;
-
-	/* No fragments used */
-	if (nr_meta_slots <= 1)
-		return;
-
-	nr_meta_slots--;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		int flags;
-		if (i == nr_meta_slots - 1)
-			flags = 0;
-		else
-			flags = XEN_NETRXF_more_data;
-
-		offset = 0;
-		make_rx_response(queue, meta[i].id, status, offset,
-				 meta[i].size, flags);
-	}
-}
-
 void xenvif_kick_thread(struct xenvif_queue *queue)
 {
 	wake_up(&queue->wq);
 }
 
-static void xenvif_rx_action(struct xenvif_queue *queue)
-{
-	struct xenvif *vif = queue->vif;
-	s8 status;
-	u16 flags;
-	struct xen_netif_rx_response *resp;
-	struct sk_buff_head rxq;
-	struct sk_buff *skb;
-	LIST_HEAD(notify);
-	int ret;
-	unsigned long offset;
-	bool need_to_notify = false;
-
-	struct netrx_pending_operations npo = {
-		.copy  = queue->grant_copy_op,
-		.meta  = queue->meta,
-	};
-
-	skb_queue_head_init(&rxq);
-
-	while (xenvif_rx_ring_slots_available(queue)
-	       && (skb = xenvif_rx_dequeue(queue)) != NULL) {
-		queue->last_rx_time = jiffies;
-
-		XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
-
-		__skb_queue_tail(&rxq, skb);
-	}
-
-	BUG_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
-
-	if (!npo.copy_prod)
-		goto done;
-
-	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
-	gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
-
-	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-		struct xen_netif_extra_info *extra = NULL;
-
-		if ((1 << queue->meta[npo.meta_cons].gso_type) &
-		    vif->gso_prefix_mask) {
-			resp = RING_GET_RESPONSE(&queue->rx,
-						 queue->rx.rsp_prod_pvt++);
-
-			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
-
-			resp->offset = queue->meta[npo.meta_cons].gso_size;
-			resp->id = queue->meta[npo.meta_cons].id;
-			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
-
-			npo.meta_cons++;
-			XENVIF_RX_CB(skb)->meta_slots_used--;
-		}
-
-
-		queue->stats.tx_bytes += skb->len;
-		queue->stats.tx_packets++;
-
-		status = xenvif_check_gop(vif,
-					  XENVIF_RX_CB(skb)->meta_slots_used,
-					  &npo);
-
-		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
-			flags = 0;
-		else
-			flags = XEN_NETRXF_more_data;
-
-		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
-			flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
-		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-			/* remote but checksummed. */
-			flags |= XEN_NETRXF_data_validated;
-
-		offset = 0;
-		resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
-					status, offset,
-					queue->meta[npo.meta_cons].size,
-					flags);
-
-		if ((1 << queue->meta[npo.meta_cons].gso_type) &
-		    vif->gso_mask) {
-			extra = (struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&queue->rx,
-						  queue->rx.rsp_prod_pvt++);
-
-			resp->flags |= XEN_NETRXF_extra_info;
-
-			extra->u.gso.type = queue->meta[npo.meta_cons].gso_type;
-			extra->u.gso.size = queue->meta[npo.meta_cons].gso_size;
-			extra->u.gso.pad = 0;
-			extra->u.gso.features = 0;
-
-			extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
-			extra->flags = 0;
-		}
-
-		if (skb->sw_hash) {
-			/* Since the skb got here via xenvif_select_queue()
-			 * we know that the hash has been re-calculated
-			 * according to a configuration set by the frontend
-			 * and therefore we know that it is legitimate to
-			 * pass it to the frontend.
-			 */
-			if (resp->flags & XEN_NETRXF_extra_info)
-				extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
-			else
-				resp->flags |= XEN_NETRXF_extra_info;
-
-			extra = (struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&queue->rx,
-						  queue->rx.rsp_prod_pvt++);
-
-			extra->u.hash.algorithm =
-				XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
-
-			if (skb->l4_hash)
-				extra->u.hash.type =
-					skb->protocol == htons(ETH_P_IP) ?
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
-			else
-				extra->u.hash.type =
-					skb->protocol == htons(ETH_P_IP) ?
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV6;
-
-			*(uint32_t *)extra->u.hash.value =
-				skb_get_hash_raw(skb);
-
-			extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
-			extra->flags = 0;
-		}
-
-		xenvif_add_frag_responses(queue, status,
-					  queue->meta + npo.meta_cons + 1,
-					  XENVIF_RX_CB(skb)->meta_slots_used);
-
-		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
-
-		need_to_notify |= !!ret;
-
-		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
-		dev_kfree_skb(skb);
-	}
-
-done:
-	if (need_to_notify)
-		notify_remote_via_irq(queue->rx_irq);
-}
-
 void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
 {
 	int more_to_do;
@@ -1951,29 +1384,6 @@ static void push_tx_responses(struct xenvif_queue *queue)
 		notify_remote_via_irq(queue->tx_irq);
 }
 
-static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
-					     u16      id,
-					     s8       st,
-					     u16      offset,
-					     u16      size,
-					     u16      flags)
-{
-	RING_IDX i = queue->rx.rsp_prod_pvt;
-	struct xen_netif_rx_response *resp;
-
-	resp = RING_GET_RESPONSE(&queue->rx, i);
-	resp->offset     = offset;
-	resp->flags      = flags;
-	resp->id         = id;
-	resp->status     = (s16)size;
-	if (st < 0)
-		resp->status = (s16)st;
-
-	queue->rx.rsp_prod_pvt = ++i;
-
-	return resp;
-}
-
 void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
 {
 	int ret;
@@ -2055,170 +1465,6 @@ err:
 	return err;
 }
 
-static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
-{
-	struct xenvif *vif = queue->vif;
-
-	queue->stalled = true;
-
-	/* At least one queue has stalled? Disable the carrier. */
-	spin_lock(&vif->lock);
-	if (vif->stalled_queues++ == 0) {
-		netdev_info(vif->dev, "Guest Rx stalled");
-		netif_carrier_off(vif->dev);
-	}
-	spin_unlock(&vif->lock);
-}
-
-static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
-{
-	struct xenvif *vif = queue->vif;
-
-	queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
-	queue->stalled = false;
-
-	/* All queues are ready? Enable the carrier. */
-	spin_lock(&vif->lock);
-	if (--vif->stalled_queues == 0) {
-		netdev_info(vif->dev, "Guest Rx ready");
-		netif_carrier_on(vif->dev);
-	}
-	spin_unlock(&vif->lock);
-}
-
-static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
-{
-	RING_IDX prod, cons;
-
-	prod = queue->rx.sring->req_prod;
-	cons = queue->rx.req_cons;
-
-	return !queue->stalled && prod - cons < 1
-		&& time_after(jiffies,
-			      queue->last_rx_time + queue->vif->stall_timeout);
-}
-
-static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
-{
-	RING_IDX prod, cons;
-
-	prod = queue->rx.sring->req_prod;
-	cons = queue->rx.req_cons;
-
-	return queue->stalled && prod - cons >= 1;
-}
-
-static bool xenvif_have_rx_work(struct xenvif_queue *queue)
-{
-	return xenvif_rx_ring_slots_available(queue)
-		|| (queue->vif->stall_timeout &&
-		    (xenvif_rx_queue_stalled(queue)
-		     || xenvif_rx_queue_ready(queue)))
-		|| kthread_should_stop()
-		|| queue->vif->disabled;
-}
-
-static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
-{
-	struct sk_buff *skb;
-	long timeout;
-
-	skb = skb_peek(&queue->rx_queue);
-	if (!skb)
-		return MAX_SCHEDULE_TIMEOUT;
-
-	timeout = XENVIF_RX_CB(skb)->expires - jiffies;
-	return timeout < 0 ? 0 : timeout;
-}
-
-/* Wait until the guest Rx thread has work.
- *
- * The timeout needs to be adjusted based on the current head of the
- * queue (and not just the head at the beginning).  In particular, if
- * the queue is initially empty an infinite timeout is used and this
- * needs to be reduced when a skb is queued.
- *
- * This cannot be done with wait_event_timeout() because it only
- * calculates the timeout once.
- */
-static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
-{
-	DEFINE_WAIT(wait);
-
-	if (xenvif_have_rx_work(queue))
-		return;
-
-	for (;;) {
-		long ret;
-
-		prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
-		if (xenvif_have_rx_work(queue))
-			break;
-		ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
-		if (!ret)
-			break;
-	}
-	finish_wait(&queue->wq, &wait);
-}
-
-int xenvif_kthread_guest_rx(void *data)
-{
-	struct xenvif_queue *queue = data;
-	struct xenvif *vif = queue->vif;
-
-	if (!vif->stall_timeout)
-		xenvif_queue_carrier_on(queue);
-
-	for (;;) {
-		xenvif_wait_for_rx_work(queue);
-
-		if (kthread_should_stop())
-			break;
-
-		/* This frontend is found to be rogue, disable it in
-		 * kthread context. Currently this is only set when
-		 * netback finds out frontend sends malformed packet,
-		 * but we cannot disable the interface in softirq
-		 * context so we defer it here, if this thread is
-		 * associated with queue 0.
-		 */
-		if (unlikely(vif->disabled && queue->id == 0)) {
-			xenvif_carrier_off(vif);
-			break;
-		}
-
-		if (!skb_queue_empty(&queue->rx_queue))
-			xenvif_rx_action(queue);
-
-		/* If the guest hasn't provided any Rx slots for a
-		 * while it's probably not responsive, drop the
-		 * carrier so packets are dropped earlier.
-		 */
-		if (vif->stall_timeout) {
-			if (xenvif_rx_queue_stalled(queue))
-				xenvif_queue_carrier_off(queue);
-			else if (xenvif_rx_queue_ready(queue))
-				xenvif_queue_carrier_on(queue);
-		}
-
-		/* Queued packets may have foreign pages from other
-		 * domains.  These cannot be queued indefinitely as
-		 * this would starve guests of grant refs and transmit
-		 * slots.
-		 */
-		xenvif_rx_queue_drop_expired(queue);
-
-		xenvif_rx_queue_maybe_wake(queue);
-
-		cond_resched();
-	}
-
-	/* Bin any remaining skbs */
-	xenvif_rx_queue_purge(queue);
-
-	return 0;
-}
-
 static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
 {
 	/* Dealloc thread must remain running until all inflight
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
new file mode 100644
index 0000000..03836aa
--- /dev/null
+++ b/drivers/net/xen-netback/rx.c
@@ -0,0 +1,789 @@
+/*
+ * Copyright (c) 2016 Citrix Systems Inc.
+ * Copyright (c) 2002-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+
+#include <linux/kthread.h>
+
+#include <xen/xen.h>
+#include <xen/events.h>
+
+static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
+{
+	RING_IDX prod, cons;
+	struct sk_buff *skb;
+	int needed;
+
+	skb = skb_peek(&queue->rx_queue);
+	if (!skb)
+		return false;
+
+	needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
+	if (skb_is_gso(skb))
+		needed++;
+	if (skb->sw_hash)
+		needed++;
+
+	do {
+		prod = queue->rx.sring->req_prod;
+		cons = queue->rx.req_cons;
+
+		if (prod - cons >= needed)
+			return true;
+
+		queue->rx.sring->req_event = prod + 1;
+
+		/* Make sure event is visible before we check prod
+		 * again.
+		 */
+		mb();
+	} while (queue->rx.sring->req_prod != prod);
+
+	return false;
+}
+
+void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->rx_queue.lock, flags);
+
+	__skb_queue_tail(&queue->rx_queue, skb);
+
+	queue->rx_queue_len += skb->len;
+	if (queue->rx_queue_len > queue->rx_queue_max) {
+		struct net_device *dev = queue->vif->dev;
+
+		netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
+	}
+
+	spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+}
+
+static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+{
+	struct sk_buff *skb;
+
+	spin_lock_irq(&queue->rx_queue.lock);
+
+	skb = __skb_dequeue(&queue->rx_queue);
+	if (skb)
+		queue->rx_queue_len -= skb->len;
+
+	spin_unlock_irq(&queue->rx_queue.lock);
+
+	return skb;
+}
+
+static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
+{
+	spin_lock_irq(&queue->rx_queue.lock);
+
+	if (queue->rx_queue_len < queue->rx_queue_max) {
+		struct net_device *dev = queue->vif->dev;
+
+		netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
+	}
+
+	spin_unlock_irq(&queue->rx_queue.lock);
+}
+
+static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
+{
+	struct sk_buff *skb;
+
+	while ((skb = xenvif_rx_dequeue(queue)) != NULL)
+		kfree_skb(skb);
+}
+
+static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
+{
+	struct sk_buff *skb;
+
+	for (;;) {
+		skb = skb_peek(&queue->rx_queue);
+		if (!skb)
+			break;
+		if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
+			break;
+		xenvif_rx_dequeue(queue);
+		kfree_skb(skb);
+	}
+}
+
+struct netrx_pending_operations {
+	unsigned int copy_prod, copy_cons;
+	unsigned int meta_prod, meta_cons;
+	struct gnttab_copy *copy;
+	struct xenvif_rx_meta *meta;
+	int copy_off;
+	grant_ref_t copy_gref;
+};
+
+static struct xenvif_rx_meta *get_next_rx_buffer(
+	struct xenvif_queue *queue,
+	struct netrx_pending_operations *npo)
+{
+	struct xenvif_rx_meta *meta;
+	struct xen_netif_rx_request req;
+
+	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+
+	meta = npo->meta + npo->meta_prod++;
+	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
+	meta->gso_size = 0;
+	meta->size = 0;
+	meta->id = req.id;
+
+	npo->copy_off = 0;
+	npo->copy_gref = req.gref;
+
+	return meta;
+}
+
+struct gop_frag_copy {
+	struct xenvif_queue *queue;
+	struct netrx_pending_operations *npo;
+	struct xenvif_rx_meta *meta;
+	int head;
+	int gso_type;
+	int protocol;
+	int hash_present;
+
+	struct page *page;
+};
+
+static void xenvif_setup_copy_gop(unsigned long gfn,
+				  unsigned int offset,
+				  unsigned int *len,
+				  struct gop_frag_copy *info)
+{
+	struct gnttab_copy *copy_gop;
+	struct xen_page_foreign *foreign;
+	/* Convenient aliases */
+	struct xenvif_queue *queue = info->queue;
+	struct netrx_pending_operations *npo = info->npo;
+	struct page *page = info->page;
+
+	WARN_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+
+	if (npo->copy_off == MAX_BUFFER_OFFSET)
+		info->meta = get_next_rx_buffer(queue, npo);
+
+	if (npo->copy_off + *len > MAX_BUFFER_OFFSET)
+		*len = MAX_BUFFER_OFFSET - npo->copy_off;
+
+	copy_gop = npo->copy + npo->copy_prod++;
+	copy_gop->flags = GNTCOPY_dest_gref;
+	copy_gop->len = *len;
+
+	foreign = xen_page_foreign(page);
+	if (foreign) {
+		copy_gop->source.domid = foreign->domid;
+		copy_gop->source.u.ref = foreign->gref;
+		copy_gop->flags |= GNTCOPY_source_gref;
+	} else {
+		copy_gop->source.domid = DOMID_SELF;
+		copy_gop->source.u.gmfn = gfn;
+	}
+	copy_gop->source.offset = offset;
+
+	copy_gop->dest.domid = queue->vif->domid;
+	copy_gop->dest.offset = npo->copy_off;
+	copy_gop->dest.u.ref = npo->copy_gref;
+
+	npo->copy_off += *len;
+	info->meta->size += *len;
+
+	if (!info->head)
+		return;
+
+	/* Leave a gap for the GSO descriptor. */
+	if ((1 << info->gso_type) & queue->vif->gso_mask)
+		queue->rx.req_cons++;
+
+	/* Leave a gap for the hash extra segment. */
+	if (info->hash_present)
+		queue->rx.req_cons++;
+
+	info->head = 0; /* There must be something in this buffer now */
+}
+
+static void xenvif_gop_frag_copy_grant(unsigned long gfn,
+				       unsigned int offset,
+				       unsigned int len,
+				       void *data)
+{
+	unsigned int bytes;
+
+	while (len) {
+		bytes = len;
+		xenvif_setup_copy_gop(gfn, offset, &bytes, data);
+		offset += bytes;
+		len -= bytes;
+	}
+}
+
+/* Set up the grant operations for this fragment. If it's a flipping
+ * interface, we also set up the unmap request from here.
+ */
+static void xenvif_gop_frag_copy(struct xenvif_queue *queue,
+				 struct sk_buff *skb,
+				 struct netrx_pending_operations *npo,
+				 struct page *page, unsigned long size,
+				 unsigned long offset, int *head)
+{
+	struct gop_frag_copy info = {
+		.queue = queue,
+		.npo = npo,
+		.head = *head,
+		.gso_type = XEN_NETIF_GSO_TYPE_NONE,
+		/* xenvif_set_skb_hash() will have either set a s/w
+		 * hash or cleared the hash depending on
+		 * whether the the frontend wants a hash for this skb.
+		 */
+		.hash_present = skb->sw_hash,
+	};
+	unsigned long bytes;
+
+	if (skb_is_gso(skb)) {
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
+		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
+	}
+
+	/* Data must not cross a page boundary. */
+	WARN_ON(size + offset > (PAGE_SIZE << compound_order(page)));
+
+	info.meta = npo->meta + npo->meta_prod - 1;
+
+	/* Skip unused frames from start of page */
+	page += offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+
+	while (size > 0) {
+		WARN_ON(offset >= PAGE_SIZE);
+
+		bytes = PAGE_SIZE - offset;
+		if (bytes > size)
+			bytes = size;
+
+		info.page = page;
+		gnttab_foreach_grant_in_range(page, offset, bytes,
+					      xenvif_gop_frag_copy_grant,
+					      &info);
+		size -= bytes;
+		offset = 0;
+
+		/* Next page */
+		if (size) {
+			WARN_ON(!PageCompound(page));
+			page++;
+		}
+	}
+
+	*head = info.head;
+}
+
+/* Prepare an SKB to be transmitted to the frontend.
+ *
+ * This function is responsible for allocating grant operations, meta
+ * structures, etc.
+ *
+ * It returns the number of meta structures consumed. The number of
+ * ring slots used is always equal to the number of meta slots used
+ * plus the number of GSO descriptors used. Currently, we use either
+ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
+ * frontend-side LRO).
+ */
+static int xenvif_gop_skb(struct sk_buff *skb,
+			  struct netrx_pending_operations *npo,
+			  struct xenvif_queue *queue)
+{
+	struct xenvif *vif = netdev_priv(skb->dev);
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	int i;
+	struct xen_netif_rx_request req;
+	struct xenvif_rx_meta *meta;
+	unsigned char *data;
+	int head = 1;
+	int old_meta_prod;
+	int gso_type;
+
+	old_meta_prod = npo->meta_prod;
+
+	gso_type = XEN_NETIF_GSO_TYPE_NONE;
+	if (skb_is_gso(skb)) {
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
+		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
+	}
+
+	/* Set up a GSO prefix descriptor, if necessary */
+	if ((1 << gso_type) & vif->gso_prefix_mask) {
+		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+		meta = npo->meta + npo->meta_prod++;
+		meta->gso_type = gso_type;
+		meta->gso_size = skb_shinfo(skb)->gso_size;
+		meta->size = 0;
+		meta->id = req.id;
+	}
+
+	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+	meta = npo->meta + npo->meta_prod++;
+
+	if ((1 << gso_type) & vif->gso_mask) {
+		meta->gso_type = gso_type;
+		meta->gso_size = skb_shinfo(skb)->gso_size;
+	} else {
+		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
+		meta->gso_size = 0;
+	}
+
+	meta->size = 0;
+	meta->id = req.id;
+	npo->copy_off = 0;
+	npo->copy_gref = req.gref;
+
+	data = skb->data;
+	while (data < skb_tail_pointer(skb)) {
+		unsigned int offset = offset_in_page(data);
+		unsigned int len = PAGE_SIZE - offset;
+
+		if (data + len > skb_tail_pointer(skb))
+			len = skb_tail_pointer(skb) - data;
+
+		xenvif_gop_frag_copy(queue, skb, npo,
+				     virt_to_page(data), len, offset, &head);
+		data += len;
+	}
+
+	for (i = 0; i < nr_frags; i++) {
+		xenvif_gop_frag_copy(queue, skb, npo,
+				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
+				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
+				     skb_shinfo(skb)->frags[i].page_offset,
+				     &head);
+	}
+
+	return npo->meta_prod - old_meta_prod;
+}
+
+/* This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
+ * used to set up the operations on the top of
+ * netrx_pending_operations, which have since been done.  Check that
+ * they didn't give any errors and advance over them.
+ */
+static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
+			    struct netrx_pending_operations *npo)
+{
+	struct gnttab_copy     *copy_op;
+	int status = XEN_NETIF_RSP_OKAY;
+	int i;
+
+	for (i = 0; i < nr_meta_slots; i++) {
+		copy_op = npo->copy + npo->copy_cons++;
+		if (copy_op->status != GNTST_okay) {
+			netdev_dbg(vif->dev,
+				   "Bad status %d from copy to DOM%d.\n",
+				   copy_op->status, vif->domid);
+			status = XEN_NETIF_RSP_ERROR;
+		}
+	}
+
+	return status;
+}
+
+static struct xen_netif_rx_response *make_rx_response(
+	struct xenvif_queue *queue, u16 id, s8 st, u16 offset, u16 size,
+	u16 flags)
+{
+	RING_IDX i = queue->rx.rsp_prod_pvt;
+	struct xen_netif_rx_response *resp;
+
+	resp = RING_GET_RESPONSE(&queue->rx, i);
+	resp->offset     = offset;
+	resp->flags      = flags;
+	resp->id         = id;
+	resp->status     = (s16)size;
+	if (st < 0)
+		resp->status = (s16)st;
+
+	queue->rx.rsp_prod_pvt = ++i;
+
+	return resp;
+}
+
+static void xenvif_add_frag_responses(struct xenvif_queue *queue,
+				      int status,
+				      struct xenvif_rx_meta *meta,
+				      int nr_meta_slots)
+{
+	int i;
+	unsigned long offset;
+
+	/* No fragments used */
+	if (nr_meta_slots <= 1)
+		return;
+
+	nr_meta_slots--;
+
+	for (i = 0; i < nr_meta_slots; i++) {
+		int flags;
+
+		if (i == nr_meta_slots - 1)
+			flags = 0;
+		else
+			flags = XEN_NETRXF_more_data;
+
+		offset = 0;
+		make_rx_response(queue, meta[i].id, status, offset,
+				 meta[i].size, flags);
+	}
+}
+
+static void xenvif_rx_action(struct xenvif_queue *queue)
+{
+	struct xenvif *vif = queue->vif;
+	s8 status;
+	u16 flags;
+	struct xen_netif_rx_response *resp;
+	struct sk_buff_head rxq;
+	struct sk_buff *skb;
+	LIST_HEAD(notify);
+	int ret;
+	unsigned long offset;
+	bool need_to_notify = false;
+
+	struct netrx_pending_operations npo = {
+		.copy  = queue->grant_copy_op,
+		.meta  = queue->meta,
+	};
+
+	skb_queue_head_init(&rxq);
+
+	while (xenvif_rx_ring_slots_available(queue) &&
+	       (skb = xenvif_rx_dequeue(queue)) != NULL) {
+		queue->last_rx_time = jiffies;
+
+		XENVIF_RX_CB(skb)->meta_slots_used =
+			xenvif_gop_skb(skb, &npo, queue);
+
+		__skb_queue_tail(&rxq, skb);
+	}
+
+	WARN_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
+
+	if (!npo.copy_prod)
+		goto done;
+
+	WARN_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
+	gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
+
+	while ((skb = __skb_dequeue(&rxq)) != NULL) {
+		struct xen_netif_extra_info *extra = NULL;
+
+		if ((1 << queue->meta[npo.meta_cons].gso_type) &
+		    vif->gso_prefix_mask) {
+			resp = RING_GET_RESPONSE(&queue->rx,
+						 queue->rx.rsp_prod_pvt++);
+
+			resp->flags = XEN_NETRXF_gso_prefix |
+				      XEN_NETRXF_more_data;
+
+			resp->offset = queue->meta[npo.meta_cons].gso_size;
+			resp->id = queue->meta[npo.meta_cons].id;
+			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
+
+			npo.meta_cons++;
+			XENVIF_RX_CB(skb)->meta_slots_used--;
+		}
+
+		queue->stats.tx_bytes += skb->len;
+		queue->stats.tx_packets++;
+
+		status = xenvif_check_gop(vif,
+					  XENVIF_RX_CB(skb)->meta_slots_used,
+					  &npo);
+
+		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
+			flags = 0;
+		else
+			flags = XEN_NETRXF_more_data;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+			flags |= XEN_NETRXF_csum_blank |
+				 XEN_NETRXF_data_validated;
+		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+			/* remote but checksummed. */
+			flags |= XEN_NETRXF_data_validated;
+
+		offset = 0;
+		resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
+					status, offset,
+					queue->meta[npo.meta_cons].size,
+					flags);
+
+		if ((1 << queue->meta[npo.meta_cons].gso_type) &
+		    vif->gso_mask) {
+			extra = (struct xen_netif_extra_info *)
+				RING_GET_RESPONSE(&queue->rx,
+						  queue->rx.rsp_prod_pvt++);
+
+			resp->flags |= XEN_NETRXF_extra_info;
+
+			extra->u.gso.type = queue->meta[npo.meta_cons].gso_type;
+			extra->u.gso.size = queue->meta[npo.meta_cons].gso_size;
+			extra->u.gso.pad = 0;
+			extra->u.gso.features = 0;
+
+			extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
+			extra->flags = 0;
+		}
+
+		if (skb->sw_hash) {
+			/* Since the skb got here via xenvif_select_queue()
+			 * we know that the hash has been re-calculated
+			 * according to a configuration set by the frontend
+			 * and therefore we know that it is legitimate to
+			 * pass it to the frontend.
+			 */
+			if (resp->flags & XEN_NETRXF_extra_info)
+				extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+			else
+				resp->flags |= XEN_NETRXF_extra_info;
+
+			extra = (struct xen_netif_extra_info *)
+				RING_GET_RESPONSE(&queue->rx,
+						  queue->rx.rsp_prod_pvt++);
+
+			extra->u.hash.algorithm =
+				XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
+
+			if (skb->l4_hash)
+				extra->u.hash.type =
+					skb->protocol == htons(ETH_P_IP) ?
+					_XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
+					_XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
+			else
+				extra->u.hash.type =
+					skb->protocol == htons(ETH_P_IP) ?
+					_XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
+					_XEN_NETIF_CTRL_HASH_TYPE_IPV6;
+
+			*(uint32_t *)extra->u.hash.value =
+				skb_get_hash_raw(skb);
+
+			extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
+			extra->flags = 0;
+		}
+
+		xenvif_add_frag_responses(queue, status,
+					  queue->meta + npo.meta_cons + 1,
+					  XENVIF_RX_CB(skb)->meta_slots_used);
+
+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
+
+		need_to_notify |= !!ret;
+
+		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
+		dev_kfree_skb(skb);
+	}
+
+done:
+	if (need_to_notify)
+		notify_remote_via_irq(queue->rx_irq);
+}
+
+static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
+{
+	RING_IDX prod, cons;
+
+	prod = queue->rx.sring->req_prod;
+	cons = queue->rx.req_cons;
+
+	return !queue->stalled &&
+		prod - cons < 1 &&
+		time_after(jiffies,
+			   queue->last_rx_time + queue->vif->stall_timeout);
+}
+
+static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
+{
+	RING_IDX prod, cons;
+
+	prod = queue->rx.sring->req_prod;
+	cons = queue->rx.req_cons;
+
+	return queue->stalled && prod - cons >= 1;
+}
+
+static bool xenvif_have_rx_work(struct xenvif_queue *queue)
+{
+	return xenvif_rx_ring_slots_available(queue) ||
+		(queue->vif->stall_timeout &&
+		 (xenvif_rx_queue_stalled(queue) ||
+		  xenvif_rx_queue_ready(queue))) ||
+		kthread_should_stop() ||
+		queue->vif->disabled;
+}
+
+static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
+{
+	struct sk_buff *skb;
+	long timeout;
+
+	skb = skb_peek(&queue->rx_queue);
+	if (!skb)
+		return MAX_SCHEDULE_TIMEOUT;
+
+	timeout = XENVIF_RX_CB(skb)->expires - jiffies;
+	return timeout < 0 ? 0 : timeout;
+}
+
+/* Wait until the guest Rx thread has work.
+ *
+ * The timeout needs to be adjusted based on the current head of the
+ * queue (and not just the head at the beginning).  In particular, if
+ * the queue is initially empty an infinite timeout is used and this
+ * needs to be reduced when a skb is queued.
+ *
+ * This cannot be done with wait_event_timeout() because it only
+ * calculates the timeout once.
+ */
+static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
+{
+	DEFINE_WAIT(wait);
+
+	if (xenvif_have_rx_work(queue))
+		return;
+
+	for (;;) {
+		long ret;
+
+		prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
+		if (xenvif_have_rx_work(queue))
+			break;
+		ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
+		if (!ret)
+			break;
+	}
+	finish_wait(&queue->wq, &wait);
+}
+
+static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
+{
+	struct xenvif *vif = queue->vif;
+
+	queue->stalled = true;
+
+	/* At least one queue has stalled? Disable the carrier. */
+	spin_lock(&vif->lock);
+	if (vif->stalled_queues++ == 0) {
+		netdev_info(vif->dev, "Guest Rx stalled");
+		netif_carrier_off(vif->dev);
+	}
+	spin_unlock(&vif->lock);
+}
+
+static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
+{
+	struct xenvif *vif = queue->vif;
+
+	queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
+	queue->stalled = false;
+
+	/* All queues are ready? Enable the carrier. */
+	spin_lock(&vif->lock);
+	if (--vif->stalled_queues == 0) {
+		netdev_info(vif->dev, "Guest Rx ready");
+		netif_carrier_on(vif->dev);
+	}
+	spin_unlock(&vif->lock);
+}
+
+int xenvif_kthread_guest_rx(void *data)
+{
+	struct xenvif_queue *queue = data;
+	struct xenvif *vif = queue->vif;
+
+	if (!vif->stall_timeout)
+		xenvif_queue_carrier_on(queue);
+
+	for (;;) {
+		xenvif_wait_for_rx_work(queue);
+
+		if (kthread_should_stop())
+			break;
+
+		/* This frontend is found to be rogue, disable it in
+		 * kthread context. Currently this is only set when
+		 * netback finds out frontend sends malformed packet,
+		 * but we cannot disable the interface in softirq
+		 * context so we defer it here, if this thread is
+		 * associated with queue 0.
+		 */
+		if (unlikely(vif->disabled && queue->id == 0)) {
+			xenvif_carrier_off(vif);
+			break;
+		}
+
+		if (!skb_queue_empty(&queue->rx_queue))
+			xenvif_rx_action(queue);
+
+		/* If the guest hasn't provided any Rx slots for a
+		 * while it's probably not responsive, drop the
+		 * carrier so packets are dropped earlier.
+		 */
+		if (vif->stall_timeout) {
+			if (xenvif_rx_queue_stalled(queue))
+				xenvif_queue_carrier_off(queue);
+			else if (xenvif_rx_queue_ready(queue))
+				xenvif_queue_carrier_on(queue);
+		}
+
+		/* Queued packets may have foreign pages from other
+		 * domains.  These cannot be queued indefinitely as
+		 * this would starve guests of grant refs and transmit
+		 * slots.
+		 */
+		xenvif_rx_queue_drop_expired(queue);
+
+		xenvif_rx_queue_maybe_wake(queue);
+
+		cond_resched();
+	}
+
+	/* Bin any remaining skbs */
+	xenvif_rx_queue_purge(queue);
+
+	return 0;
+}
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 1/7] xen-netback: separate guest side rx code into separate module
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
  2016-10-04  9:29 ` [PATCH v2 net-next 1/7] xen-netback: separate guest side rx code into separate module Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04  9:29 ` [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature Paul Durrant
                   ` (13 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Paul Durrant, Wei Liu

The netback source module has become very large and somewhat confusing.
This patch simply moves all code related to the backend to frontend (i.e
guest side rx) data-path into a separate rx source module.

This patch contains no functional change, it is code movement and
minimal changes to avoid patch style-check issues.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/Makefile  |   2 +-
 drivers/net/xen-netback/netback.c | 754 ------------------------------------
 drivers/net/xen-netback/rx.c      | 789 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 790 insertions(+), 755 deletions(-)
 create mode 100644 drivers/net/xen-netback/rx.c

diff --git a/drivers/net/xen-netback/Makefile b/drivers/net/xen-netback/Makefile
index 11e02be..d49798a 100644
--- a/drivers/net/xen-netback/Makefile
+++ b/drivers/net/xen-netback/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_XEN_NETDEV_BACKEND) := xen-netback.o
 
-xen-netback-y := netback.o xenbus.o interface.o hash.o
+xen-netback-y := netback.o xenbus.o interface.o hash.o rx.o
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 3d0c989..47b4810 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -106,13 +106,6 @@ static void push_tx_responses(struct xenvif_queue *queue);
 
 static inline int tx_work_todo(struct xenvif_queue *queue);
 
-static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
-					     u16      id,
-					     s8       st,
-					     u16      offset,
-					     u16      size,
-					     u16      flags);
-
 static inline unsigned long idx_to_pfn(struct xenvif_queue *queue,
 				       u16 idx)
 {
@@ -155,571 +148,11 @@ static inline pending_ring_idx_t pending_index(unsigned i)
 	return i & (MAX_PENDING_REQS-1);
 }
 
-static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
-{
-	RING_IDX prod, cons;
-	struct sk_buff *skb;
-	int needed;
-
-	skb = skb_peek(&queue->rx_queue);
-	if (!skb)
-		return false;
-
-	needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
-	if (skb_is_gso(skb))
-		needed++;
-	if (skb->sw_hash)
-		needed++;
-
-	do {
-		prod = queue->rx.sring->req_prod;
-		cons = queue->rx.req_cons;
-
-		if (prod - cons >= needed)
-			return true;
-
-		queue->rx.sring->req_event = prod + 1;
-
-		/* Make sure event is visible before we check prod
-		 * again.
-		 */
-		mb();
-	} while (queue->rx.sring->req_prod != prod);
-
-	return false;
-}
-
-void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&queue->rx_queue.lock, flags);
-
-	__skb_queue_tail(&queue->rx_queue, skb);
-
-	queue->rx_queue_len += skb->len;
-	if (queue->rx_queue_len > queue->rx_queue_max)
-		netif_tx_stop_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
-
-	spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
-}
-
-static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
-{
-	struct sk_buff *skb;
-
-	spin_lock_irq(&queue->rx_queue.lock);
-
-	skb = __skb_dequeue(&queue->rx_queue);
-	if (skb)
-		queue->rx_queue_len -= skb->len;
-
-	spin_unlock_irq(&queue->rx_queue.lock);
-
-	return skb;
-}
-
-static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
-{
-	spin_lock_irq(&queue->rx_queue.lock);
-
-	if (queue->rx_queue_len < queue->rx_queue_max)
-		netif_tx_wake_queue(netdev_get_tx_queue(queue->vif->dev, queue->id));
-
-	spin_unlock_irq(&queue->rx_queue.lock);
-}
-
-
-static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
-{
-	struct sk_buff *skb;
-	while ((skb = xenvif_rx_dequeue(queue)) != NULL)
-		kfree_skb(skb);
-}
-
-static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
-{
-	struct sk_buff *skb;
-
-	for(;;) {
-		skb = skb_peek(&queue->rx_queue);
-		if (!skb)
-			break;
-		if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
-			break;
-		xenvif_rx_dequeue(queue);
-		kfree_skb(skb);
-	}
-}
-
-struct netrx_pending_operations {
-	unsigned copy_prod, copy_cons;
-	unsigned meta_prod, meta_cons;
-	struct gnttab_copy *copy;
-	struct xenvif_rx_meta *meta;
-	int copy_off;
-	grant_ref_t copy_gref;
-};
-
-static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif_queue *queue,
-						 struct netrx_pending_operations *npo)
-{
-	struct xenvif_rx_meta *meta;
-	struct xen_netif_rx_request req;
-
-	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-
-	meta = npo->meta + npo->meta_prod++;
-	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
-	meta->gso_size = 0;
-	meta->size = 0;
-	meta->id = req.id;
-
-	npo->copy_off = 0;
-	npo->copy_gref = req.gref;
-
-	return meta;
-}
-
-struct gop_frag_copy {
-	struct xenvif_queue *queue;
-	struct netrx_pending_operations *npo;
-	struct xenvif_rx_meta *meta;
-	int head;
-	int gso_type;
-	int protocol;
-	int hash_present;
-
-	struct page *page;
-};
-
-static void xenvif_setup_copy_gop(unsigned long gfn,
-				  unsigned int offset,
-				  unsigned int *len,
-				  struct gop_frag_copy *info)
-{
-	struct gnttab_copy *copy_gop;
-	struct xen_page_foreign *foreign;
-	/* Convenient aliases */
-	struct xenvif_queue *queue = info->queue;
-	struct netrx_pending_operations *npo = info->npo;
-	struct page *page = info->page;
-
-	BUG_ON(npo->copy_off > MAX_BUFFER_OFFSET);
-
-	if (npo->copy_off == MAX_BUFFER_OFFSET)
-		info->meta = get_next_rx_buffer(queue, npo);
-
-	if (npo->copy_off + *len > MAX_BUFFER_OFFSET)
-		*len = MAX_BUFFER_OFFSET - npo->copy_off;
-
-	copy_gop = npo->copy + npo->copy_prod++;
-	copy_gop->flags = GNTCOPY_dest_gref;
-	copy_gop->len = *len;
-
-	foreign = xen_page_foreign(page);
-	if (foreign) {
-		copy_gop->source.domid = foreign->domid;
-		copy_gop->source.u.ref = foreign->gref;
-		copy_gop->flags |= GNTCOPY_source_gref;
-	} else {
-		copy_gop->source.domid = DOMID_SELF;
-		copy_gop->source.u.gmfn = gfn;
-	}
-	copy_gop->source.offset = offset;
-
-	copy_gop->dest.domid = queue->vif->domid;
-	copy_gop->dest.offset = npo->copy_off;
-	copy_gop->dest.u.ref = npo->copy_gref;
-
-	npo->copy_off += *len;
-	info->meta->size += *len;
-
-	if (!info->head)
-		return;
-
-	/* Leave a gap for the GSO descriptor. */
-	if ((1 << info->gso_type) & queue->vif->gso_mask)
-		queue->rx.req_cons++;
-
-	/* Leave a gap for the hash extra segment. */
-	if (info->hash_present)
-		queue->rx.req_cons++;
-
-	info->head = 0; /* There must be something in this buffer now */
-}
-
-static void xenvif_gop_frag_copy_grant(unsigned long gfn,
-				       unsigned offset,
-				       unsigned int len,
-				       void *data)
-{
-	unsigned int bytes;
-
-	while (len) {
-		bytes = len;
-		xenvif_setup_copy_gop(gfn, offset, &bytes, data);
-		offset += bytes;
-		len -= bytes;
-	}
-}
-
-/*
- * Set up the grant operations for this fragment. If it's a flipping
- * interface, we also set up the unmap request from here.
- */
-static void xenvif_gop_frag_copy(struct xenvif_queue *queue, struct sk_buff *skb,
-				 struct netrx_pending_operations *npo,
-				 struct page *page, unsigned long size,
-				 unsigned long offset, int *head)
-{
-	struct gop_frag_copy info = {
-		.queue = queue,
-		.npo = npo,
-		.head = *head,
-		.gso_type = XEN_NETIF_GSO_TYPE_NONE,
-		/* xenvif_set_skb_hash() will have either set a s/w
-		 * hash or cleared the hash depending on
-		 * whether the the frontend wants a hash for this skb.
-		 */
-		.hash_present = skb->sw_hash,
-	};
-	unsigned long bytes;
-
-	if (skb_is_gso(skb)) {
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-	}
-
-	/* Data must not cross a page boundary. */
-	BUG_ON(size + offset > PAGE_SIZE<<compound_order(page));
-
-	info.meta = npo->meta + npo->meta_prod - 1;
-
-	/* Skip unused frames from start of page */
-	page += offset >> PAGE_SHIFT;
-	offset &= ~PAGE_MASK;
-
-	while (size > 0) {
-		BUG_ON(offset >= PAGE_SIZE);
-
-		bytes = PAGE_SIZE - offset;
-		if (bytes > size)
-			bytes = size;
-
-		info.page = page;
-		gnttab_foreach_grant_in_range(page, offset, bytes,
-					      xenvif_gop_frag_copy_grant,
-					      &info);
-		size -= bytes;
-		offset = 0;
-
-		/* Next page */
-		if (size) {
-			BUG_ON(!PageCompound(page));
-			page++;
-		}
-	}
-
-	*head = info.head;
-}
-
-/*
- * Prepare an SKB to be transmitted to the frontend.
- *
- * This function is responsible for allocating grant operations, meta
- * structures, etc.
- *
- * It returns the number of meta structures consumed. The number of
- * ring slots used is always equal to the number of meta slots used
- * plus the number of GSO descriptors used. Currently, we use either
- * zero GSO descriptors (for non-GSO packets) or one descriptor (for
- * frontend-side LRO).
- */
-static int xenvif_gop_skb(struct sk_buff *skb,
-			  struct netrx_pending_operations *npo,
-			  struct xenvif_queue *queue)
-{
-	struct xenvif *vif = netdev_priv(skb->dev);
-	int nr_frags = skb_shinfo(skb)->nr_frags;
-	int i;
-	struct xen_netif_rx_request req;
-	struct xenvif_rx_meta *meta;
-	unsigned char *data;
-	int head = 1;
-	int old_meta_prod;
-	int gso_type;
-
-	old_meta_prod = npo->meta_prod;
-
-	gso_type = XEN_NETIF_GSO_TYPE_NONE;
-	if (skb_is_gso(skb)) {
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-	}
-
-	/* Set up a GSO prefix descriptor, if necessary */
-	if ((1 << gso_type) & vif->gso_prefix_mask) {
-		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-		meta = npo->meta + npo->meta_prod++;
-		meta->gso_type = gso_type;
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-		meta->size = 0;
-		meta->id = req.id;
-	}
-
-	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-	meta = npo->meta + npo->meta_prod++;
-
-	if ((1 << gso_type) & vif->gso_mask) {
-		meta->gso_type = gso_type;
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-	} else {
-		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
-		meta->gso_size = 0;
-	}
-
-	meta->size = 0;
-	meta->id = req.id;
-	npo->copy_off = 0;
-	npo->copy_gref = req.gref;
-
-	data = skb->data;
-	while (data < skb_tail_pointer(skb)) {
-		unsigned int offset = offset_in_page(data);
-		unsigned int len = PAGE_SIZE - offset;
-
-		if (data + len > skb_tail_pointer(skb))
-			len = skb_tail_pointer(skb) - data;
-
-		xenvif_gop_frag_copy(queue, skb, npo,
-				     virt_to_page(data), len, offset, &head);
-		data += len;
-	}
-
-	for (i = 0; i < nr_frags; i++) {
-		xenvif_gop_frag_copy(queue, skb, npo,
-				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
-				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
-				     skb_shinfo(skb)->frags[i].page_offset,
-				     &head);
-	}
-
-	return npo->meta_prod - old_meta_prod;
-}
-
-/*
- * This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
- * used to set up the operations on the top of
- * netrx_pending_operations, which have since been done.  Check that
- * they didn't give any errors and advance over them.
- */
-static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
-			    struct netrx_pending_operations *npo)
-{
-	struct gnttab_copy     *copy_op;
-	int status = XEN_NETIF_RSP_OKAY;
-	int i;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		copy_op = npo->copy + npo->copy_cons++;
-		if (copy_op->status != GNTST_okay) {
-			netdev_dbg(vif->dev,
-				   "Bad status %d from copy to DOM%d.\n",
-				   copy_op->status, vif->domid);
-			status = XEN_NETIF_RSP_ERROR;
-		}
-	}
-
-	return status;
-}
-
-static void xenvif_add_frag_responses(struct xenvif_queue *queue, int status,
-				      struct xenvif_rx_meta *meta,
-				      int nr_meta_slots)
-{
-	int i;
-	unsigned long offset;
-
-	/* No fragments used */
-	if (nr_meta_slots <= 1)
-		return;
-
-	nr_meta_slots--;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		int flags;
-		if (i == nr_meta_slots - 1)
-			flags = 0;
-		else
-			flags = XEN_NETRXF_more_data;
-
-		offset = 0;
-		make_rx_response(queue, meta[i].id, status, offset,
-				 meta[i].size, flags);
-	}
-}
-
 void xenvif_kick_thread(struct xenvif_queue *queue)
 {
 	wake_up(&queue->wq);
 }
 
-static void xenvif_rx_action(struct xenvif_queue *queue)
-{
-	struct xenvif *vif = queue->vif;
-	s8 status;
-	u16 flags;
-	struct xen_netif_rx_response *resp;
-	struct sk_buff_head rxq;
-	struct sk_buff *skb;
-	LIST_HEAD(notify);
-	int ret;
-	unsigned long offset;
-	bool need_to_notify = false;
-
-	struct netrx_pending_operations npo = {
-		.copy  = queue->grant_copy_op,
-		.meta  = queue->meta,
-	};
-
-	skb_queue_head_init(&rxq);
-
-	while (xenvif_rx_ring_slots_available(queue)
-	       && (skb = xenvif_rx_dequeue(queue)) != NULL) {
-		queue->last_rx_time = jiffies;
-
-		XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo, queue);
-
-		__skb_queue_tail(&rxq, skb);
-	}
-
-	BUG_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
-
-	if (!npo.copy_prod)
-		goto done;
-
-	BUG_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
-	gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
-
-	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-		struct xen_netif_extra_info *extra = NULL;
-
-		if ((1 << queue->meta[npo.meta_cons].gso_type) &
-		    vif->gso_prefix_mask) {
-			resp = RING_GET_RESPONSE(&queue->rx,
-						 queue->rx.rsp_prod_pvt++);
-
-			resp->flags = XEN_NETRXF_gso_prefix | XEN_NETRXF_more_data;
-
-			resp->offset = queue->meta[npo.meta_cons].gso_size;
-			resp->id = queue->meta[npo.meta_cons].id;
-			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
-
-			npo.meta_cons++;
-			XENVIF_RX_CB(skb)->meta_slots_used--;
-		}
-
-
-		queue->stats.tx_bytes += skb->len;
-		queue->stats.tx_packets++;
-
-		status = xenvif_check_gop(vif,
-					  XENVIF_RX_CB(skb)->meta_slots_used,
-					  &npo);
-
-		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
-			flags = 0;
-		else
-			flags = XEN_NETRXF_more_data;
-
-		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
-			flags |= XEN_NETRXF_csum_blank | XEN_NETRXF_data_validated;
-		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-			/* remote but checksummed. */
-			flags |= XEN_NETRXF_data_validated;
-
-		offset = 0;
-		resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
-					status, offset,
-					queue->meta[npo.meta_cons].size,
-					flags);
-
-		if ((1 << queue->meta[npo.meta_cons].gso_type) &
-		    vif->gso_mask) {
-			extra = (struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&queue->rx,
-						  queue->rx.rsp_prod_pvt++);
-
-			resp->flags |= XEN_NETRXF_extra_info;
-
-			extra->u.gso.type = queue->meta[npo.meta_cons].gso_type;
-			extra->u.gso.size = queue->meta[npo.meta_cons].gso_size;
-			extra->u.gso.pad = 0;
-			extra->u.gso.features = 0;
-
-			extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
-			extra->flags = 0;
-		}
-
-		if (skb->sw_hash) {
-			/* Since the skb got here via xenvif_select_queue()
-			 * we know that the hash has been re-calculated
-			 * according to a configuration set by the frontend
-			 * and therefore we know that it is legitimate to
-			 * pass it to the frontend.
-			 */
-			if (resp->flags & XEN_NETRXF_extra_info)
-				extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
-			else
-				resp->flags |= XEN_NETRXF_extra_info;
-
-			extra = (struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&queue->rx,
-						  queue->rx.rsp_prod_pvt++);
-
-			extra->u.hash.algorithm =
-				XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
-
-			if (skb->l4_hash)
-				extra->u.hash.type =
-					skb->protocol == htons(ETH_P_IP) ?
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
-			else
-				extra->u.hash.type =
-					skb->protocol == htons(ETH_P_IP) ?
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV6;
-
-			*(uint32_t *)extra->u.hash.value =
-				skb_get_hash_raw(skb);
-
-			extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
-			extra->flags = 0;
-		}
-
-		xenvif_add_frag_responses(queue, status,
-					  queue->meta + npo.meta_cons + 1,
-					  XENVIF_RX_CB(skb)->meta_slots_used);
-
-		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
-
-		need_to_notify |= !!ret;
-
-		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
-		dev_kfree_skb(skb);
-	}
-
-done:
-	if (need_to_notify)
-		notify_remote_via_irq(queue->rx_irq);
-}
-
 void xenvif_napi_schedule_or_enable_events(struct xenvif_queue *queue)
 {
 	int more_to_do;
@@ -1951,29 +1384,6 @@ static void push_tx_responses(struct xenvif_queue *queue)
 		notify_remote_via_irq(queue->tx_irq);
 }
 
-static struct xen_netif_rx_response *make_rx_response(struct xenvif_queue *queue,
-					     u16      id,
-					     s8       st,
-					     u16      offset,
-					     u16      size,
-					     u16      flags)
-{
-	RING_IDX i = queue->rx.rsp_prod_pvt;
-	struct xen_netif_rx_response *resp;
-
-	resp = RING_GET_RESPONSE(&queue->rx, i);
-	resp->offset     = offset;
-	resp->flags      = flags;
-	resp->id         = id;
-	resp->status     = (s16)size;
-	if (st < 0)
-		resp->status = (s16)st;
-
-	queue->rx.rsp_prod_pvt = ++i;
-
-	return resp;
-}
-
 void xenvif_idx_unmap(struct xenvif_queue *queue, u16 pending_idx)
 {
 	int ret;
@@ -2055,170 +1465,6 @@ err:
 	return err;
 }
 
-static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
-{
-	struct xenvif *vif = queue->vif;
-
-	queue->stalled = true;
-
-	/* At least one queue has stalled? Disable the carrier. */
-	spin_lock(&vif->lock);
-	if (vif->stalled_queues++ == 0) {
-		netdev_info(vif->dev, "Guest Rx stalled");
-		netif_carrier_off(vif->dev);
-	}
-	spin_unlock(&vif->lock);
-}
-
-static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
-{
-	struct xenvif *vif = queue->vif;
-
-	queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
-	queue->stalled = false;
-
-	/* All queues are ready? Enable the carrier. */
-	spin_lock(&vif->lock);
-	if (--vif->stalled_queues == 0) {
-		netdev_info(vif->dev, "Guest Rx ready");
-		netif_carrier_on(vif->dev);
-	}
-	spin_unlock(&vif->lock);
-}
-
-static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
-{
-	RING_IDX prod, cons;
-
-	prod = queue->rx.sring->req_prod;
-	cons = queue->rx.req_cons;
-
-	return !queue->stalled && prod - cons < 1
-		&& time_after(jiffies,
-			      queue->last_rx_time + queue->vif->stall_timeout);
-}
-
-static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
-{
-	RING_IDX prod, cons;
-
-	prod = queue->rx.sring->req_prod;
-	cons = queue->rx.req_cons;
-
-	return queue->stalled && prod - cons >= 1;
-}
-
-static bool xenvif_have_rx_work(struct xenvif_queue *queue)
-{
-	return xenvif_rx_ring_slots_available(queue)
-		|| (queue->vif->stall_timeout &&
-		    (xenvif_rx_queue_stalled(queue)
-		     || xenvif_rx_queue_ready(queue)))
-		|| kthread_should_stop()
-		|| queue->vif->disabled;
-}
-
-static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
-{
-	struct sk_buff *skb;
-	long timeout;
-
-	skb = skb_peek(&queue->rx_queue);
-	if (!skb)
-		return MAX_SCHEDULE_TIMEOUT;
-
-	timeout = XENVIF_RX_CB(skb)->expires - jiffies;
-	return timeout < 0 ? 0 : timeout;
-}
-
-/* Wait until the guest Rx thread has work.
- *
- * The timeout needs to be adjusted based on the current head of the
- * queue (and not just the head at the beginning).  In particular, if
- * the queue is initially empty an infinite timeout is used and this
- * needs to be reduced when a skb is queued.
- *
- * This cannot be done with wait_event_timeout() because it only
- * calculates the timeout once.
- */
-static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
-{
-	DEFINE_WAIT(wait);
-
-	if (xenvif_have_rx_work(queue))
-		return;
-
-	for (;;) {
-		long ret;
-
-		prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
-		if (xenvif_have_rx_work(queue))
-			break;
-		ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
-		if (!ret)
-			break;
-	}
-	finish_wait(&queue->wq, &wait);
-}
-
-int xenvif_kthread_guest_rx(void *data)
-{
-	struct xenvif_queue *queue = data;
-	struct xenvif *vif = queue->vif;
-
-	if (!vif->stall_timeout)
-		xenvif_queue_carrier_on(queue);
-
-	for (;;) {
-		xenvif_wait_for_rx_work(queue);
-
-		if (kthread_should_stop())
-			break;
-
-		/* This frontend is found to be rogue, disable it in
-		 * kthread context. Currently this is only set when
-		 * netback finds out frontend sends malformed packet,
-		 * but we cannot disable the interface in softirq
-		 * context so we defer it here, if this thread is
-		 * associated with queue 0.
-		 */
-		if (unlikely(vif->disabled && queue->id == 0)) {
-			xenvif_carrier_off(vif);
-			break;
-		}
-
-		if (!skb_queue_empty(&queue->rx_queue))
-			xenvif_rx_action(queue);
-
-		/* If the guest hasn't provided any Rx slots for a
-		 * while it's probably not responsive, drop the
-		 * carrier so packets are dropped earlier.
-		 */
-		if (vif->stall_timeout) {
-			if (xenvif_rx_queue_stalled(queue))
-				xenvif_queue_carrier_off(queue);
-			else if (xenvif_rx_queue_ready(queue))
-				xenvif_queue_carrier_on(queue);
-		}
-
-		/* Queued packets may have foreign pages from other
-		 * domains.  These cannot be queued indefinitely as
-		 * this would starve guests of grant refs and transmit
-		 * slots.
-		 */
-		xenvif_rx_queue_drop_expired(queue);
-
-		xenvif_rx_queue_maybe_wake(queue);
-
-		cond_resched();
-	}
-
-	/* Bin any remaining skbs */
-	xenvif_rx_queue_purge(queue);
-
-	return 0;
-}
-
 static bool xenvif_dealloc_kthread_should_stop(struct xenvif_queue *queue)
 {
 	/* Dealloc thread must remain running until all inflight
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
new file mode 100644
index 0000000..03836aa
--- /dev/null
+++ b/drivers/net/xen-netback/rx.c
@@ -0,0 +1,789 @@
+/*
+ * Copyright (c) 2016 Citrix Systems Inc.
+ * Copyright (c) 2002-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "common.h"
+
+#include <linux/kthread.h>
+
+#include <xen/xen.h>
+#include <xen/events.h>
+
+static bool xenvif_rx_ring_slots_available(struct xenvif_queue *queue)
+{
+	RING_IDX prod, cons;
+	struct sk_buff *skb;
+	int needed;
+
+	skb = skb_peek(&queue->rx_queue);
+	if (!skb)
+		return false;
+
+	needed = DIV_ROUND_UP(skb->len, XEN_PAGE_SIZE);
+	if (skb_is_gso(skb))
+		needed++;
+	if (skb->sw_hash)
+		needed++;
+
+	do {
+		prod = queue->rx.sring->req_prod;
+		cons = queue->rx.req_cons;
+
+		if (prod - cons >= needed)
+			return true;
+
+		queue->rx.sring->req_event = prod + 1;
+
+		/* Make sure event is visible before we check prod
+		 * again.
+		 */
+		mb();
+	} while (queue->rx.sring->req_prod != prod);
+
+	return false;
+}
+
+void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&queue->rx_queue.lock, flags);
+
+	__skb_queue_tail(&queue->rx_queue, skb);
+
+	queue->rx_queue_len += skb->len;
+	if (queue->rx_queue_len > queue->rx_queue_max) {
+		struct net_device *dev = queue->vif->dev;
+
+		netif_tx_stop_queue(netdev_get_tx_queue(dev, queue->id));
+	}
+
+	spin_unlock_irqrestore(&queue->rx_queue.lock, flags);
+}
+
+static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
+{
+	struct sk_buff *skb;
+
+	spin_lock_irq(&queue->rx_queue.lock);
+
+	skb = __skb_dequeue(&queue->rx_queue);
+	if (skb)
+		queue->rx_queue_len -= skb->len;
+
+	spin_unlock_irq(&queue->rx_queue.lock);
+
+	return skb;
+}
+
+static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
+{
+	spin_lock_irq(&queue->rx_queue.lock);
+
+	if (queue->rx_queue_len < queue->rx_queue_max) {
+		struct net_device *dev = queue->vif->dev;
+
+		netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
+	}
+
+	spin_unlock_irq(&queue->rx_queue.lock);
+}
+
+static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
+{
+	struct sk_buff *skb;
+
+	while ((skb = xenvif_rx_dequeue(queue)) != NULL)
+		kfree_skb(skb);
+}
+
+static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
+{
+	struct sk_buff *skb;
+
+	for (;;) {
+		skb = skb_peek(&queue->rx_queue);
+		if (!skb)
+			break;
+		if (time_before(jiffies, XENVIF_RX_CB(skb)->expires))
+			break;
+		xenvif_rx_dequeue(queue);
+		kfree_skb(skb);
+	}
+}
+
+struct netrx_pending_operations {
+	unsigned int copy_prod, copy_cons;
+	unsigned int meta_prod, meta_cons;
+	struct gnttab_copy *copy;
+	struct xenvif_rx_meta *meta;
+	int copy_off;
+	grant_ref_t copy_gref;
+};
+
+static struct xenvif_rx_meta *get_next_rx_buffer(
+	struct xenvif_queue *queue,
+	struct netrx_pending_operations *npo)
+{
+	struct xenvif_rx_meta *meta;
+	struct xen_netif_rx_request req;
+
+	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+
+	meta = npo->meta + npo->meta_prod++;
+	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
+	meta->gso_size = 0;
+	meta->size = 0;
+	meta->id = req.id;
+
+	npo->copy_off = 0;
+	npo->copy_gref = req.gref;
+
+	return meta;
+}
+
+struct gop_frag_copy {
+	struct xenvif_queue *queue;
+	struct netrx_pending_operations *npo;
+	struct xenvif_rx_meta *meta;
+	int head;
+	int gso_type;
+	int protocol;
+	int hash_present;
+
+	struct page *page;
+};
+
+static void xenvif_setup_copy_gop(unsigned long gfn,
+				  unsigned int offset,
+				  unsigned int *len,
+				  struct gop_frag_copy *info)
+{
+	struct gnttab_copy *copy_gop;
+	struct xen_page_foreign *foreign;
+	/* Convenient aliases */
+	struct xenvif_queue *queue = info->queue;
+	struct netrx_pending_operations *npo = info->npo;
+	struct page *page = info->page;
+
+	WARN_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+
+	if (npo->copy_off == MAX_BUFFER_OFFSET)
+		info->meta = get_next_rx_buffer(queue, npo);
+
+	if (npo->copy_off + *len > MAX_BUFFER_OFFSET)
+		*len = MAX_BUFFER_OFFSET - npo->copy_off;
+
+	copy_gop = npo->copy + npo->copy_prod++;
+	copy_gop->flags = GNTCOPY_dest_gref;
+	copy_gop->len = *len;
+
+	foreign = xen_page_foreign(page);
+	if (foreign) {
+		copy_gop->source.domid = foreign->domid;
+		copy_gop->source.u.ref = foreign->gref;
+		copy_gop->flags |= GNTCOPY_source_gref;
+	} else {
+		copy_gop->source.domid = DOMID_SELF;
+		copy_gop->source.u.gmfn = gfn;
+	}
+	copy_gop->source.offset = offset;
+
+	copy_gop->dest.domid = queue->vif->domid;
+	copy_gop->dest.offset = npo->copy_off;
+	copy_gop->dest.u.ref = npo->copy_gref;
+
+	npo->copy_off += *len;
+	info->meta->size += *len;
+
+	if (!info->head)
+		return;
+
+	/* Leave a gap for the GSO descriptor. */
+	if ((1 << info->gso_type) & queue->vif->gso_mask)
+		queue->rx.req_cons++;
+
+	/* Leave a gap for the hash extra segment. */
+	if (info->hash_present)
+		queue->rx.req_cons++;
+
+	info->head = 0; /* There must be something in this buffer now */
+}
+
+static void xenvif_gop_frag_copy_grant(unsigned long gfn,
+				       unsigned int offset,
+				       unsigned int len,
+				       void *data)
+{
+	unsigned int bytes;
+
+	while (len) {
+		bytes = len;
+		xenvif_setup_copy_gop(gfn, offset, &bytes, data);
+		offset += bytes;
+		len -= bytes;
+	}
+}
+
+/* Set up the grant operations for this fragment. If it's a flipping
+ * interface, we also set up the unmap request from here.
+ */
+static void xenvif_gop_frag_copy(struct xenvif_queue *queue,
+				 struct sk_buff *skb,
+				 struct netrx_pending_operations *npo,
+				 struct page *page, unsigned long size,
+				 unsigned long offset, int *head)
+{
+	struct gop_frag_copy info = {
+		.queue = queue,
+		.npo = npo,
+		.head = *head,
+		.gso_type = XEN_NETIF_GSO_TYPE_NONE,
+		/* xenvif_set_skb_hash() will have either set a s/w
+		 * hash or cleared the hash depending on
+		 * whether the the frontend wants a hash for this skb.
+		 */
+		.hash_present = skb->sw_hash,
+	};
+	unsigned long bytes;
+
+	if (skb_is_gso(skb)) {
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
+		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
+	}
+
+	/* Data must not cross a page boundary. */
+	WARN_ON(size + offset > (PAGE_SIZE << compound_order(page)));
+
+	info.meta = npo->meta + npo->meta_prod - 1;
+
+	/* Skip unused frames from start of page */
+	page += offset >> PAGE_SHIFT;
+	offset &= ~PAGE_MASK;
+
+	while (size > 0) {
+		WARN_ON(offset >= PAGE_SIZE);
+
+		bytes = PAGE_SIZE - offset;
+		if (bytes > size)
+			bytes = size;
+
+		info.page = page;
+		gnttab_foreach_grant_in_range(page, offset, bytes,
+					      xenvif_gop_frag_copy_grant,
+					      &info);
+		size -= bytes;
+		offset = 0;
+
+		/* Next page */
+		if (size) {
+			WARN_ON(!PageCompound(page));
+			page++;
+		}
+	}
+
+	*head = info.head;
+}
+
+/* Prepare an SKB to be transmitted to the frontend.
+ *
+ * This function is responsible for allocating grant operations, meta
+ * structures, etc.
+ *
+ * It returns the number of meta structures consumed. The number of
+ * ring slots used is always equal to the number of meta slots used
+ * plus the number of GSO descriptors used. Currently, we use either
+ * zero GSO descriptors (for non-GSO packets) or one descriptor (for
+ * frontend-side LRO).
+ */
+static int xenvif_gop_skb(struct sk_buff *skb,
+			  struct netrx_pending_operations *npo,
+			  struct xenvif_queue *queue)
+{
+	struct xenvif *vif = netdev_priv(skb->dev);
+	int nr_frags = skb_shinfo(skb)->nr_frags;
+	int i;
+	struct xen_netif_rx_request req;
+	struct xenvif_rx_meta *meta;
+	unsigned char *data;
+	int head = 1;
+	int old_meta_prod;
+	int gso_type;
+
+	old_meta_prod = npo->meta_prod;
+
+	gso_type = XEN_NETIF_GSO_TYPE_NONE;
+	if (skb_is_gso(skb)) {
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
+			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
+		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
+			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
+	}
+
+	/* Set up a GSO prefix descriptor, if necessary */
+	if ((1 << gso_type) & vif->gso_prefix_mask) {
+		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+		meta = npo->meta + npo->meta_prod++;
+		meta->gso_type = gso_type;
+		meta->gso_size = skb_shinfo(skb)->gso_size;
+		meta->size = 0;
+		meta->id = req.id;
+	}
+
+	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+	meta = npo->meta + npo->meta_prod++;
+
+	if ((1 << gso_type) & vif->gso_mask) {
+		meta->gso_type = gso_type;
+		meta->gso_size = skb_shinfo(skb)->gso_size;
+	} else {
+		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
+		meta->gso_size = 0;
+	}
+
+	meta->size = 0;
+	meta->id = req.id;
+	npo->copy_off = 0;
+	npo->copy_gref = req.gref;
+
+	data = skb->data;
+	while (data < skb_tail_pointer(skb)) {
+		unsigned int offset = offset_in_page(data);
+		unsigned int len = PAGE_SIZE - offset;
+
+		if (data + len > skb_tail_pointer(skb))
+			len = skb_tail_pointer(skb) - data;
+
+		xenvif_gop_frag_copy(queue, skb, npo,
+				     virt_to_page(data), len, offset, &head);
+		data += len;
+	}
+
+	for (i = 0; i < nr_frags; i++) {
+		xenvif_gop_frag_copy(queue, skb, npo,
+				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
+				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
+				     skb_shinfo(skb)->frags[i].page_offset,
+				     &head);
+	}
+
+	return npo->meta_prod - old_meta_prod;
+}
+
+/* This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
+ * used to set up the operations on the top of
+ * netrx_pending_operations, which have since been done.  Check that
+ * they didn't give any errors and advance over them.
+ */
+static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
+			    struct netrx_pending_operations *npo)
+{
+	struct gnttab_copy     *copy_op;
+	int status = XEN_NETIF_RSP_OKAY;
+	int i;
+
+	for (i = 0; i < nr_meta_slots; i++) {
+		copy_op = npo->copy + npo->copy_cons++;
+		if (copy_op->status != GNTST_okay) {
+			netdev_dbg(vif->dev,
+				   "Bad status %d from copy to DOM%d.\n",
+				   copy_op->status, vif->domid);
+			status = XEN_NETIF_RSP_ERROR;
+		}
+	}
+
+	return status;
+}
+
+static struct xen_netif_rx_response *make_rx_response(
+	struct xenvif_queue *queue, u16 id, s8 st, u16 offset, u16 size,
+	u16 flags)
+{
+	RING_IDX i = queue->rx.rsp_prod_pvt;
+	struct xen_netif_rx_response *resp;
+
+	resp = RING_GET_RESPONSE(&queue->rx, i);
+	resp->offset     = offset;
+	resp->flags      = flags;
+	resp->id         = id;
+	resp->status     = (s16)size;
+	if (st < 0)
+		resp->status = (s16)st;
+
+	queue->rx.rsp_prod_pvt = ++i;
+
+	return resp;
+}
+
+static void xenvif_add_frag_responses(struct xenvif_queue *queue,
+				      int status,
+				      struct xenvif_rx_meta *meta,
+				      int nr_meta_slots)
+{
+	int i;
+	unsigned long offset;
+
+	/* No fragments used */
+	if (nr_meta_slots <= 1)
+		return;
+
+	nr_meta_slots--;
+
+	for (i = 0; i < nr_meta_slots; i++) {
+		int flags;
+
+		if (i == nr_meta_slots - 1)
+			flags = 0;
+		else
+			flags = XEN_NETRXF_more_data;
+
+		offset = 0;
+		make_rx_response(queue, meta[i].id, status, offset,
+				 meta[i].size, flags);
+	}
+}
+
+static void xenvif_rx_action(struct xenvif_queue *queue)
+{
+	struct xenvif *vif = queue->vif;
+	s8 status;
+	u16 flags;
+	struct xen_netif_rx_response *resp;
+	struct sk_buff_head rxq;
+	struct sk_buff *skb;
+	LIST_HEAD(notify);
+	int ret;
+	unsigned long offset;
+	bool need_to_notify = false;
+
+	struct netrx_pending_operations npo = {
+		.copy  = queue->grant_copy_op,
+		.meta  = queue->meta,
+	};
+
+	skb_queue_head_init(&rxq);
+
+	while (xenvif_rx_ring_slots_available(queue) &&
+	       (skb = xenvif_rx_dequeue(queue)) != NULL) {
+		queue->last_rx_time = jiffies;
+
+		XENVIF_RX_CB(skb)->meta_slots_used =
+			xenvif_gop_skb(skb, &npo, queue);
+
+		__skb_queue_tail(&rxq, skb);
+	}
+
+	WARN_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
+
+	if (!npo.copy_prod)
+		goto done;
+
+	WARN_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
+	gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
+
+	while ((skb = __skb_dequeue(&rxq)) != NULL) {
+		struct xen_netif_extra_info *extra = NULL;
+
+		if ((1 << queue->meta[npo.meta_cons].gso_type) &
+		    vif->gso_prefix_mask) {
+			resp = RING_GET_RESPONSE(&queue->rx,
+						 queue->rx.rsp_prod_pvt++);
+
+			resp->flags = XEN_NETRXF_gso_prefix |
+				      XEN_NETRXF_more_data;
+
+			resp->offset = queue->meta[npo.meta_cons].gso_size;
+			resp->id = queue->meta[npo.meta_cons].id;
+			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
+
+			npo.meta_cons++;
+			XENVIF_RX_CB(skb)->meta_slots_used--;
+		}
+
+		queue->stats.tx_bytes += skb->len;
+		queue->stats.tx_packets++;
+
+		status = xenvif_check_gop(vif,
+					  XENVIF_RX_CB(skb)->meta_slots_used,
+					  &npo);
+
+		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
+			flags = 0;
+		else
+			flags = XEN_NETRXF_more_data;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+			flags |= XEN_NETRXF_csum_blank |
+				 XEN_NETRXF_data_validated;
+		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
+			/* remote but checksummed. */
+			flags |= XEN_NETRXF_data_validated;
+
+		offset = 0;
+		resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
+					status, offset,
+					queue->meta[npo.meta_cons].size,
+					flags);
+
+		if ((1 << queue->meta[npo.meta_cons].gso_type) &
+		    vif->gso_mask) {
+			extra = (struct xen_netif_extra_info *)
+				RING_GET_RESPONSE(&queue->rx,
+						  queue->rx.rsp_prod_pvt++);
+
+			resp->flags |= XEN_NETRXF_extra_info;
+
+			extra->u.gso.type = queue->meta[npo.meta_cons].gso_type;
+			extra->u.gso.size = queue->meta[npo.meta_cons].gso_size;
+			extra->u.gso.pad = 0;
+			extra->u.gso.features = 0;
+
+			extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
+			extra->flags = 0;
+		}
+
+		if (skb->sw_hash) {
+			/* Since the skb got here via xenvif_select_queue()
+			 * we know that the hash has been re-calculated
+			 * according to a configuration set by the frontend
+			 * and therefore we know that it is legitimate to
+			 * pass it to the frontend.
+			 */
+			if (resp->flags & XEN_NETRXF_extra_info)
+				extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+			else
+				resp->flags |= XEN_NETRXF_extra_info;
+
+			extra = (struct xen_netif_extra_info *)
+				RING_GET_RESPONSE(&queue->rx,
+						  queue->rx.rsp_prod_pvt++);
+
+			extra->u.hash.algorithm =
+				XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
+
+			if (skb->l4_hash)
+				extra->u.hash.type =
+					skb->protocol == htons(ETH_P_IP) ?
+					_XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
+					_XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
+			else
+				extra->u.hash.type =
+					skb->protocol == htons(ETH_P_IP) ?
+					_XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
+					_XEN_NETIF_CTRL_HASH_TYPE_IPV6;
+
+			*(uint32_t *)extra->u.hash.value =
+				skb_get_hash_raw(skb);
+
+			extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
+			extra->flags = 0;
+		}
+
+		xenvif_add_frag_responses(queue, status,
+					  queue->meta + npo.meta_cons + 1,
+					  XENVIF_RX_CB(skb)->meta_slots_used);
+
+		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
+
+		need_to_notify |= !!ret;
+
+		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
+		dev_kfree_skb(skb);
+	}
+
+done:
+	if (need_to_notify)
+		notify_remote_via_irq(queue->rx_irq);
+}
+
+static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
+{
+	RING_IDX prod, cons;
+
+	prod = queue->rx.sring->req_prod;
+	cons = queue->rx.req_cons;
+
+	return !queue->stalled &&
+		prod - cons < 1 &&
+		time_after(jiffies,
+			   queue->last_rx_time + queue->vif->stall_timeout);
+}
+
+static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
+{
+	RING_IDX prod, cons;
+
+	prod = queue->rx.sring->req_prod;
+	cons = queue->rx.req_cons;
+
+	return queue->stalled && prod - cons >= 1;
+}
+
+static bool xenvif_have_rx_work(struct xenvif_queue *queue)
+{
+	return xenvif_rx_ring_slots_available(queue) ||
+		(queue->vif->stall_timeout &&
+		 (xenvif_rx_queue_stalled(queue) ||
+		  xenvif_rx_queue_ready(queue))) ||
+		kthread_should_stop() ||
+		queue->vif->disabled;
+}
+
+static long xenvif_rx_queue_timeout(struct xenvif_queue *queue)
+{
+	struct sk_buff *skb;
+	long timeout;
+
+	skb = skb_peek(&queue->rx_queue);
+	if (!skb)
+		return MAX_SCHEDULE_TIMEOUT;
+
+	timeout = XENVIF_RX_CB(skb)->expires - jiffies;
+	return timeout < 0 ? 0 : timeout;
+}
+
+/* Wait until the guest Rx thread has work.
+ *
+ * The timeout needs to be adjusted based on the current head of the
+ * queue (and not just the head at the beginning).  In particular, if
+ * the queue is initially empty an infinite timeout is used and this
+ * needs to be reduced when a skb is queued.
+ *
+ * This cannot be done with wait_event_timeout() because it only
+ * calculates the timeout once.
+ */
+static void xenvif_wait_for_rx_work(struct xenvif_queue *queue)
+{
+	DEFINE_WAIT(wait);
+
+	if (xenvif_have_rx_work(queue))
+		return;
+
+	for (;;) {
+		long ret;
+
+		prepare_to_wait(&queue->wq, &wait, TASK_INTERRUPTIBLE);
+		if (xenvif_have_rx_work(queue))
+			break;
+		ret = schedule_timeout(xenvif_rx_queue_timeout(queue));
+		if (!ret)
+			break;
+	}
+	finish_wait(&queue->wq, &wait);
+}
+
+static void xenvif_queue_carrier_off(struct xenvif_queue *queue)
+{
+	struct xenvif *vif = queue->vif;
+
+	queue->stalled = true;
+
+	/* At least one queue has stalled? Disable the carrier. */
+	spin_lock(&vif->lock);
+	if (vif->stalled_queues++ == 0) {
+		netdev_info(vif->dev, "Guest Rx stalled");
+		netif_carrier_off(vif->dev);
+	}
+	spin_unlock(&vif->lock);
+}
+
+static void xenvif_queue_carrier_on(struct xenvif_queue *queue)
+{
+	struct xenvif *vif = queue->vif;
+
+	queue->last_rx_time = jiffies; /* Reset Rx stall detection. */
+	queue->stalled = false;
+
+	/* All queues are ready? Enable the carrier. */
+	spin_lock(&vif->lock);
+	if (--vif->stalled_queues == 0) {
+		netdev_info(vif->dev, "Guest Rx ready");
+		netif_carrier_on(vif->dev);
+	}
+	spin_unlock(&vif->lock);
+}
+
+int xenvif_kthread_guest_rx(void *data)
+{
+	struct xenvif_queue *queue = data;
+	struct xenvif *vif = queue->vif;
+
+	if (!vif->stall_timeout)
+		xenvif_queue_carrier_on(queue);
+
+	for (;;) {
+		xenvif_wait_for_rx_work(queue);
+
+		if (kthread_should_stop())
+			break;
+
+		/* This frontend is found to be rogue, disable it in
+		 * kthread context. Currently this is only set when
+		 * netback finds out frontend sends malformed packet,
+		 * but we cannot disable the interface in softirq
+		 * context so we defer it here, if this thread is
+		 * associated with queue 0.
+		 */
+		if (unlikely(vif->disabled && queue->id == 0)) {
+			xenvif_carrier_off(vif);
+			break;
+		}
+
+		if (!skb_queue_empty(&queue->rx_queue))
+			xenvif_rx_action(queue);
+
+		/* If the guest hasn't provided any Rx slots for a
+		 * while it's probably not responsive, drop the
+		 * carrier so packets are dropped earlier.
+		 */
+		if (vif->stall_timeout) {
+			if (xenvif_rx_queue_stalled(queue))
+				xenvif_queue_carrier_off(queue);
+			else if (xenvif_rx_queue_ready(queue))
+				xenvif_queue_carrier_on(queue);
+		}
+
+		/* Queued packets may have foreign pages from other
+		 * domains.  These cannot be queued indefinitely as
+		 * this would starve guests of grant refs and transmit
+		 * slots.
+		 */
+		xenvif_rx_queue_drop_expired(queue);
+
+		xenvif_rx_queue_maybe_wake(queue);
+
+		cond_resched();
+	}
+
+	/* Bin any remaining skbs */
+	xenvif_rx_queue_purge(queue);
+
+	return 0;
+}
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
  2016-10-04  9:29 ` [PATCH v2 net-next 1/7] xen-netback: separate guest side rx code into separate module Paul Durrant
  2016-10-04  9:29 ` Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04 10:14   ` David Vrabel
                     ` (3 more replies)
  2016-10-04  9:29 ` Paul Durrant
                   ` (12 subsequent siblings)
  15 siblings, 4 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Paul Durrant, Wei Liu

As far as I am aware only very old Windows network frontends make use of
this style of passing GSO packets from backend to frontend. These
frontends can easily be replaced by the freely available Xen Project
Windows PV network frontend, which uses the 'default' mechanism for
passing GSO packets, which is also used by all Linux frontends.

NOTE: Removal of this feature will not cause breakage in old Windows
      frontends. They simply will no longer receive GSO packets - the
      packets instead being fragmented in the backend.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/common.h    |  1 -
 drivers/net/xen-netback/interface.c |  4 ++--
 drivers/net/xen-netback/rx.c        | 26 --------------------------
 drivers/net/xen-netback/xenbus.c    | 21 ---------------------
 4 files changed, 2 insertions(+), 50 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index b38fb2c..0ba5910 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -260,7 +260,6 @@ struct xenvif {
 
 	/* Frontend feature information. */
 	int gso_mask;
-	int gso_prefix_mask;
 
 	u8 can_sg:1;
 	u8 ip_csum:1;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index fb50c6d..211d542 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -319,9 +319,9 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev,
 
 	if (!vif->can_sg)
 		features &= ~NETIF_F_SG;
-	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
+	if (~(vif->gso_mask) & GSO_BIT(TCPV4))
 		features &= ~NETIF_F_TSO;
-	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
+	if (~(vif->gso_mask) & GSO_BIT(TCPV6))
 		features &= ~NETIF_F_TSO6;
 	if (!vif->ip_csum)
 		features &= ~NETIF_F_IP_CSUM;
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 03836aa..6bd7d6e 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -347,16 +347,6 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
 	}
 
-	/* Set up a GSO prefix descriptor, if necessary */
-	if ((1 << gso_type) & vif->gso_prefix_mask) {
-		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-		meta = npo->meta + npo->meta_prod++;
-		meta->gso_type = gso_type;
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-		meta->size = 0;
-		meta->id = req.id;
-	}
-
 	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
 	meta = npo->meta + npo->meta_prod++;
 
@@ -511,22 +501,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
 		struct xen_netif_extra_info *extra = NULL;
 
-		if ((1 << queue->meta[npo.meta_cons].gso_type) &
-		    vif->gso_prefix_mask) {
-			resp = RING_GET_RESPONSE(&queue->rx,
-						 queue->rx.rsp_prod_pvt++);
-
-			resp->flags = XEN_NETRXF_gso_prefix |
-				      XEN_NETRXF_more_data;
-
-			resp->offset = queue->meta[npo.meta_cons].gso_size;
-			resp->id = queue->meta[npo.meta_cons].id;
-			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
-
-			npo.meta_cons++;
-			XENVIF_RX_CB(skb)->meta_slots_used--;
-		}
-
 		queue->stats.tx_bytes += skb->len;
 		queue->stats.tx_packets++;
 
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index daf4c78..7056404 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -1135,7 +1135,6 @@ static int read_xenbus_vif_flags(struct backend_info *be)
 	vif->can_sg = !!val;
 
 	vif->gso_mask = 0;
-	vif->gso_prefix_mask = 0;
 
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
 			 "%d", &val) < 0)
@@ -1143,32 +1142,12 @@ static int read_xenbus_vif_flags(struct backend_info *be)
 	if (val)
 		vif->gso_mask |= GSO_BIT(TCPV4);
 
-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
-			 "%d", &val) < 0)
-		val = 0;
-	if (val)
-		vif->gso_prefix_mask |= GSO_BIT(TCPV4);
-
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
 			 "%d", &val) < 0)
 		val = 0;
 	if (val)
 		vif->gso_mask |= GSO_BIT(TCPV6);
 
-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix",
-			 "%d", &val) < 0)
-		val = 0;
-	if (val)
-		vif->gso_prefix_mask |= GSO_BIT(TCPV6);
-
-	if (vif->gso_mask & vif->gso_prefix_mask) {
-		xenbus_dev_fatal(dev, err,
-				 "%s: gso and gso prefix flags are not "
-				 "mutually exclusive",
-				 dev->otherend);
-		return -EOPNOTSUPP;
-	}
-
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
 			 "%d", &val) < 0)
 		val = 0;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (2 preceding siblings ...)
  2016-10-04  9:29 ` [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04  9:29 ` [PATCH v2 net-next 3/7] xen-netback: refactor guest rx Paul Durrant
                   ` (11 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Paul Durrant, Wei Liu

As far as I am aware only very old Windows network frontends make use of
this style of passing GSO packets from backend to frontend. These
frontends can easily be replaced by the freely available Xen Project
Windows PV network frontend, which uses the 'default' mechanism for
passing GSO packets, which is also used by all Linux frontends.

NOTE: Removal of this feature will not cause breakage in old Windows
      frontends. They simply will no longer receive GSO packets - the
      packets instead being fragmented in the backend.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/common.h    |  1 -
 drivers/net/xen-netback/interface.c |  4 ++--
 drivers/net/xen-netback/rx.c        | 26 --------------------------
 drivers/net/xen-netback/xenbus.c    | 21 ---------------------
 4 files changed, 2 insertions(+), 50 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index b38fb2c..0ba5910 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -260,7 +260,6 @@ struct xenvif {
 
 	/* Frontend feature information. */
 	int gso_mask;
-	int gso_prefix_mask;
 
 	u8 can_sg:1;
 	u8 ip_csum:1;
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index fb50c6d..211d542 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -319,9 +319,9 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev,
 
 	if (!vif->can_sg)
 		features &= ~NETIF_F_SG;
-	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
+	if (~(vif->gso_mask) & GSO_BIT(TCPV4))
 		features &= ~NETIF_F_TSO;
-	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
+	if (~(vif->gso_mask) & GSO_BIT(TCPV6))
 		features &= ~NETIF_F_TSO6;
 	if (!vif->ip_csum)
 		features &= ~NETIF_F_IP_CSUM;
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 03836aa..6bd7d6e 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -347,16 +347,6 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
 	}
 
-	/* Set up a GSO prefix descriptor, if necessary */
-	if ((1 << gso_type) & vif->gso_prefix_mask) {
-		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-		meta = npo->meta + npo->meta_prod++;
-		meta->gso_type = gso_type;
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-		meta->size = 0;
-		meta->id = req.id;
-	}
-
 	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
 	meta = npo->meta + npo->meta_prod++;
 
@@ -511,22 +501,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
 		struct xen_netif_extra_info *extra = NULL;
 
-		if ((1 << queue->meta[npo.meta_cons].gso_type) &
-		    vif->gso_prefix_mask) {
-			resp = RING_GET_RESPONSE(&queue->rx,
-						 queue->rx.rsp_prod_pvt++);
-
-			resp->flags = XEN_NETRXF_gso_prefix |
-				      XEN_NETRXF_more_data;
-
-			resp->offset = queue->meta[npo.meta_cons].gso_size;
-			resp->id = queue->meta[npo.meta_cons].id;
-			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
-
-			npo.meta_cons++;
-			XENVIF_RX_CB(skb)->meta_slots_used--;
-		}
-
 		queue->stats.tx_bytes += skb->len;
 		queue->stats.tx_packets++;
 
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index daf4c78..7056404 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -1135,7 +1135,6 @@ static int read_xenbus_vif_flags(struct backend_info *be)
 	vif->can_sg = !!val;
 
 	vif->gso_mask = 0;
-	vif->gso_prefix_mask = 0;
 
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
 			 "%d", &val) < 0)
@@ -1143,32 +1142,12 @@ static int read_xenbus_vif_flags(struct backend_info *be)
 	if (val)
 		vif->gso_mask |= GSO_BIT(TCPV4);
 
-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
-			 "%d", &val) < 0)
-		val = 0;
-	if (val)
-		vif->gso_prefix_mask |= GSO_BIT(TCPV4);
-
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
 			 "%d", &val) < 0)
 		val = 0;
 	if (val)
 		vif->gso_mask |= GSO_BIT(TCPV6);
 
-	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix",
-			 "%d", &val) < 0)
-		val = 0;
-	if (val)
-		vif->gso_prefix_mask |= GSO_BIT(TCPV6);
-
-	if (vif->gso_mask & vif->gso_prefix_mask) {
-		xenbus_dev_fatal(dev, err,
-				 "%s: gso and gso prefix flags are not "
-				 "mutually exclusive",
-				 dev->otherend);
-		return -EOPNOTSUPP;
-	}
-
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
 			 "%d", &val) < 0)
 		val = 0;
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 3/7] xen-netback: refactor guest rx
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (3 preceding siblings ...)
  2016-10-04  9:29 ` Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04  9:29 ` Paul Durrant
                   ` (10 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: David Vrabel, Paul Durrant, Wei Liu

From: David Vrabel <david.vrabel@citrix.com>

Refactor the to-guest (rx) path to:

1. Push responses for completed skbs earlier, reducing latency.

2. Reduce the per-queue memory overhead by greatly reducing the
   maximum number of grant copy ops in each hypercall (from 4352 to
   64).  Each struct xenvif_queue is now only 44 kB instead of 220 kB.

3. Make the code more maintainable.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/common.h |  23 +-
 drivers/net/xen-netback/rx.c     | 654 +++++++++++++++------------------------
 2 files changed, 254 insertions(+), 423 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 0ba5910..7d12a38 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -91,13 +91,6 @@ struct xenvif_rx_meta {
  */
 #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
 
-/* It's possible for an skb to have a maximal number of frags
- * but still be less than MAX_BUFFER_OFFSET in size. Thus the
- * worst-case number of copy operations is MAX_XEN_SKB_FRAGS per
- * ring slot.
- */
-#define MAX_GRANT_COPY_OPS (MAX_XEN_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE)
-
 #define NETBACK_INVALID_HANDLE -1
 
 /* To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
@@ -133,6 +126,14 @@ struct xenvif_stats {
 	unsigned long tx_frag_overflow;
 };
 
+#define COPY_BATCH_SIZE 64
+
+struct xenvif_copy_state {
+	struct gnttab_copy op[COPY_BATCH_SIZE];
+	RING_IDX idx[COPY_BATCH_SIZE];
+	unsigned int num;
+};
+
 struct xenvif_queue { /* Per-queue data for xenvif */
 	unsigned int id; /* Queue ID, 0-based */
 	char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
@@ -189,12 +190,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	unsigned long last_rx_time;
 	bool stalled;
 
-	struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
-
-	/* We create one meta structure per ring request we consume, so
-	 * the maximum number is the same as the ring size.
-	 */
-	struct xenvif_rx_meta meta[XEN_NETIF_RX_RING_SIZE];
+	struct xenvif_copy_state rx_copy;
 
 	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
 	unsigned long   credit_bytes;
@@ -358,6 +354,7 @@ int xenvif_dealloc_kthread(void *data);
 
 irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
 
+void xenvif_rx_action(struct xenvif_queue *queue);
 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
 
 void xenvif_carrier_on(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 6bd7d6e..b0ce4c6 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -26,7 +26,6 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
-
 #include "common.h"
 
 #include <linux/kthread.h>
@@ -137,464 +136,299 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
 	}
 }
 
-struct netrx_pending_operations {
-	unsigned int copy_prod, copy_cons;
-	unsigned int meta_prod, meta_cons;
-	struct gnttab_copy *copy;
-	struct xenvif_rx_meta *meta;
-	int copy_off;
-	grant_ref_t copy_gref;
-};
-
-static struct xenvif_rx_meta *get_next_rx_buffer(
-	struct xenvif_queue *queue,
-	struct netrx_pending_operations *npo)
+static void xenvif_rx_copy_flush(struct xenvif_queue *queue)
 {
-	struct xenvif_rx_meta *meta;
-	struct xen_netif_rx_request req;
+	unsigned int i;
 
-	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+	gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num);
 
-	meta = npo->meta + npo->meta_prod++;
-	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
-	meta->gso_size = 0;
-	meta->size = 0;
-	meta->id = req.id;
+	for (i = 0; i < queue->rx_copy.num; i++) {
+		struct gnttab_copy *op;
 
-	npo->copy_off = 0;
-	npo->copy_gref = req.gref;
+		op = &queue->rx_copy.op[i];
 
-	return meta;
+		/* If the copy failed, overwrite the status field in
+		 * the corresponding response.
+		 */
+		if (unlikely(op->status != GNTST_okay)) {
+			struct xen_netif_rx_response *rsp;
+
+			rsp = RING_GET_RESPONSE(&queue->rx,
+						queue->rx_copy.idx[i]);
+			rsp->status = op->status;
+		}
+	}
+
+	queue->rx_copy.num = 0;
 }
 
-struct gop_frag_copy {
-	struct xenvif_queue *queue;
-	struct netrx_pending_operations *npo;
-	struct xenvif_rx_meta *meta;
-	int head;
-	int gso_type;
-	int protocol;
-	int hash_present;
-
-	struct page *page;
-};
-
-static void xenvif_setup_copy_gop(unsigned long gfn,
-				  unsigned int offset,
-				  unsigned int *len,
-				  struct gop_frag_copy *info)
+static void xenvif_rx_copy_add(struct xenvif_queue *queue,
+			       struct xen_netif_rx_request *req,
+			       unsigned int offset, void *data, size_t len)
 {
-	struct gnttab_copy *copy_gop;
+	struct gnttab_copy *op;
+	struct page *page;
 	struct xen_page_foreign *foreign;
-	/* Convenient aliases */
-	struct xenvif_queue *queue = info->queue;
-	struct netrx_pending_operations *npo = info->npo;
-	struct page *page = info->page;
 
-	WARN_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+	if (queue->rx_copy.num == COPY_BATCH_SIZE)
+		xenvif_rx_copy_flush(queue);
 
-	if (npo->copy_off == MAX_BUFFER_OFFSET)
-		info->meta = get_next_rx_buffer(queue, npo);
+	op = &queue->rx_copy.op[queue->rx_copy.num];
 
-	if (npo->copy_off + *len > MAX_BUFFER_OFFSET)
-		*len = MAX_BUFFER_OFFSET - npo->copy_off;
+	page = virt_to_page(data);
 
-	copy_gop = npo->copy + npo->copy_prod++;
-	copy_gop->flags = GNTCOPY_dest_gref;
-	copy_gop->len = *len;
+	op->flags = GNTCOPY_dest_gref;
 
 	foreign = xen_page_foreign(page);
 	if (foreign) {
-		copy_gop->source.domid = foreign->domid;
-		copy_gop->source.u.ref = foreign->gref;
-		copy_gop->flags |= GNTCOPY_source_gref;
+		op->source.domid = foreign->domid;
+		op->source.u.ref = foreign->gref;
+		op->flags |= GNTCOPY_source_gref;
 	} else {
-		copy_gop->source.domid = DOMID_SELF;
-		copy_gop->source.u.gmfn = gfn;
+		op->source.u.gmfn = virt_to_gfn(data);
+		op->source.domid  = DOMID_SELF;
 	}
-	copy_gop->source.offset = offset;
 
-	copy_gop->dest.domid = queue->vif->domid;
-	copy_gop->dest.offset = npo->copy_off;
-	copy_gop->dest.u.ref = npo->copy_gref;
+	op->source.offset = xen_offset_in_page(data);
+	op->dest.u.ref    = req->gref;
+	op->dest.domid    = queue->vif->domid;
+	op->dest.offset   = offset;
+	op->len           = len;
 
-	npo->copy_off += *len;
-	info->meta->size += *len;
-
-	if (!info->head)
-		return;
-
-	/* Leave a gap for the GSO descriptor. */
-	if ((1 << info->gso_type) & queue->vif->gso_mask)
-		queue->rx.req_cons++;
-
-	/* Leave a gap for the hash extra segment. */
-	if (info->hash_present)
-		queue->rx.req_cons++;
-
-	info->head = 0; /* There must be something in this buffer now */
+	queue->rx_copy.idx[queue->rx_copy.num] = queue->rx.req_cons;
+	queue->rx_copy.num++;
 }
 
-static void xenvif_gop_frag_copy_grant(unsigned long gfn,
-				       unsigned int offset,
-				       unsigned int len,
-				       void *data)
+static unsigned int xenvif_gso_type(struct sk_buff *skb)
 {
-	unsigned int bytes;
-
-	while (len) {
-		bytes = len;
-		xenvif_setup_copy_gop(gfn, offset, &bytes, data);
-		offset += bytes;
-		len -= bytes;
-	}
-}
-
-/* Set up the grant operations for this fragment. If it's a flipping
- * interface, we also set up the unmap request from here.
- */
-static void xenvif_gop_frag_copy(struct xenvif_queue *queue,
-				 struct sk_buff *skb,
-				 struct netrx_pending_operations *npo,
-				 struct page *page, unsigned long size,
-				 unsigned long offset, int *head)
-{
-	struct gop_frag_copy info = {
-		.queue = queue,
-		.npo = npo,
-		.head = *head,
-		.gso_type = XEN_NETIF_GSO_TYPE_NONE,
-		/* xenvif_set_skb_hash() will have either set a s/w
-		 * hash or cleared the hash depending on
-		 * whether the the frontend wants a hash for this skb.
-		 */
-		.hash_present = skb->sw_hash,
-	};
-	unsigned long bytes;
-
 	if (skb_is_gso(skb)) {
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-	}
-
-	/* Data must not cross a page boundary. */
-	WARN_ON(size + offset > (PAGE_SIZE << compound_order(page)));
-
-	info.meta = npo->meta + npo->meta_prod - 1;
-
-	/* Skip unused frames from start of page */
-	page += offset >> PAGE_SHIFT;
-	offset &= ~PAGE_MASK;
-
-	while (size > 0) {
-		WARN_ON(offset >= PAGE_SIZE);
-
-		bytes = PAGE_SIZE - offset;
-		if (bytes > size)
-			bytes = size;
-
-		info.page = page;
-		gnttab_foreach_grant_in_range(page, offset, bytes,
-					      xenvif_gop_frag_copy_grant,
-					      &info);
-		size -= bytes;
-		offset = 0;
-
-		/* Next page */
-		if (size) {
-			WARN_ON(!PageCompound(page));
-			page++;
-		}
-	}
-
-	*head = info.head;
-}
-
-/* Prepare an SKB to be transmitted to the frontend.
- *
- * This function is responsible for allocating grant operations, meta
- * structures, etc.
- *
- * It returns the number of meta structures consumed. The number of
- * ring slots used is always equal to the number of meta slots used
- * plus the number of GSO descriptors used. Currently, we use either
- * zero GSO descriptors (for non-GSO packets) or one descriptor (for
- * frontend-side LRO).
- */
-static int xenvif_gop_skb(struct sk_buff *skb,
-			  struct netrx_pending_operations *npo,
-			  struct xenvif_queue *queue)
-{
-	struct xenvif *vif = netdev_priv(skb->dev);
-	int nr_frags = skb_shinfo(skb)->nr_frags;
-	int i;
-	struct xen_netif_rx_request req;
-	struct xenvif_rx_meta *meta;
-	unsigned char *data;
-	int head = 1;
-	int old_meta_prod;
-	int gso_type;
-
-	old_meta_prod = npo->meta_prod;
-
-	gso_type = XEN_NETIF_GSO_TYPE_NONE;
-	if (skb_is_gso(skb)) {
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-	}
-
-	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-	meta = npo->meta + npo->meta_prod++;
-
-	if ((1 << gso_type) & vif->gso_mask) {
-		meta->gso_type = gso_type;
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-	} else {
-		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
-		meta->gso_size = 0;
-	}
-
-	meta->size = 0;
-	meta->id = req.id;
-	npo->copy_off = 0;
-	npo->copy_gref = req.gref;
-
-	data = skb->data;
-	while (data < skb_tail_pointer(skb)) {
-		unsigned int offset = offset_in_page(data);
-		unsigned int len = PAGE_SIZE - offset;
-
-		if (data + len > skb_tail_pointer(skb))
-			len = skb_tail_pointer(skb) - data;
-
-		xenvif_gop_frag_copy(queue, skb, npo,
-				     virt_to_page(data), len, offset, &head);
-		data += len;
-	}
-
-	for (i = 0; i < nr_frags; i++) {
-		xenvif_gop_frag_copy(queue, skb, npo,
-				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
-				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
-				     skb_shinfo(skb)->frags[i].page_offset,
-				     &head);
-	}
-
-	return npo->meta_prod - old_meta_prod;
-}
-
-/* This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
- * used to set up the operations on the top of
- * netrx_pending_operations, which have since been done.  Check that
- * they didn't give any errors and advance over them.
- */
-static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
-			    struct netrx_pending_operations *npo)
-{
-	struct gnttab_copy     *copy_op;
-	int status = XEN_NETIF_RSP_OKAY;
-	int i;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		copy_op = npo->copy + npo->copy_cons++;
-		if (copy_op->status != GNTST_okay) {
-			netdev_dbg(vif->dev,
-				   "Bad status %d from copy to DOM%d.\n",
-				   copy_op->status, vif->domid);
-			status = XEN_NETIF_RSP_ERROR;
-		}
-	}
-
-	return status;
-}
-
-static struct xen_netif_rx_response *make_rx_response(
-	struct xenvif_queue *queue, u16 id, s8 st, u16 offset, u16 size,
-	u16 flags)
-{
-	RING_IDX i = queue->rx.rsp_prod_pvt;
-	struct xen_netif_rx_response *resp;
-
-	resp = RING_GET_RESPONSE(&queue->rx, i);
-	resp->offset     = offset;
-	resp->flags      = flags;
-	resp->id         = id;
-	resp->status     = (s16)size;
-	if (st < 0)
-		resp->status = (s16)st;
-
-	queue->rx.rsp_prod_pvt = ++i;
-
-	return resp;
-}
-
-static void xenvif_add_frag_responses(struct xenvif_queue *queue,
-				      int status,
-				      struct xenvif_rx_meta *meta,
-				      int nr_meta_slots)
-{
-	int i;
-	unsigned long offset;
-
-	/* No fragments used */
-	if (nr_meta_slots <= 1)
-		return;
-
-	nr_meta_slots--;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		int flags;
-
-		if (i == nr_meta_slots - 1)
-			flags = 0;
+			return XEN_NETIF_GSO_TYPE_TCPV4;
 		else
-			flags = XEN_NETRXF_more_data;
-
-		offset = 0;
-		make_rx_response(queue, meta[i].id, status, offset,
-				 meta[i].size, flags);
+			return XEN_NETIF_GSO_TYPE_TCPV6;
 	}
+	return XEN_NETIF_GSO_TYPE_NONE;
 }
 
-static void xenvif_rx_action(struct xenvif_queue *queue)
-{
-	struct xenvif *vif = queue->vif;
-	s8 status;
-	u16 flags;
-	struct xen_netif_rx_response *resp;
-	struct sk_buff_head rxq;
+struct xenvif_pkt_state {
 	struct sk_buff *skb;
-	LIST_HEAD(notify);
-	int ret;
-	unsigned long offset;
-	bool need_to_notify = false;
+	size_t remaining_len;
+	int frag; /* frag == -1 => skb->head */
+	unsigned int frag_offset;
+	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+	unsigned int extra_count;
+	unsigned int slot;
+};
 
-	struct netrx_pending_operations npo = {
-		.copy  = queue->grant_copy_op,
-		.meta  = queue->meta,
-	};
+static void xenvif_rx_next_skb(struct xenvif_queue *queue,
+			       struct xenvif_pkt_state *pkt)
+{
+	struct sk_buff *skb;
+	unsigned int gso_type;
 
-	skb_queue_head_init(&rxq);
+	skb = xenvif_rx_dequeue(queue);
 
-	while (xenvif_rx_ring_slots_available(queue) &&
-	       (skb = xenvif_rx_dequeue(queue)) != NULL) {
-		queue->last_rx_time = jiffies;
+	queue->stats.tx_bytes += skb->len;
+	queue->stats.tx_packets++;
 
-		XENVIF_RX_CB(skb)->meta_slots_used =
-			xenvif_gop_skb(skb, &npo, queue);
+	/* Reset packet state. */
+	memset(pkt, 0, sizeof(struct xenvif_pkt_state));
 
-		__skb_queue_tail(&rxq, skb);
+	pkt->skb = skb;
+	pkt->remaining_len = skb->len;
+	pkt->frag = -1;
+
+	gso_type = xenvif_gso_type(skb);
+	if ((1 << gso_type) & queue->vif->gso_mask) {
+		struct xen_netif_extra_info *extra;
+
+		extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+		extra->u.gso.type = gso_type;
+		extra->u.gso.size = skb_shinfo(skb)->gso_size;
+		extra->u.gso.pad = 0;
+		extra->u.gso.features = 0;
+		extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
+		extra->flags = 0;
+
+		pkt->extra_count++;
 	}
 
-	WARN_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
+	if (skb->sw_hash) {
+		struct xen_netif_extra_info *extra;
 
-	if (!npo.copy_prod)
-		goto done;
+		extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_HASH - 1];
 
-	WARN_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
-	gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
+		extra->u.hash.algorithm =
+			XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
 
-	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-		struct xen_netif_extra_info *extra = NULL;
-
-		queue->stats.tx_bytes += skb->len;
-		queue->stats.tx_packets++;
-
-		status = xenvif_check_gop(vif,
-					  XENVIF_RX_CB(skb)->meta_slots_used,
-					  &npo);
-
-		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
-			flags = 0;
+		if (skb->l4_hash)
+			extra->u.hash.type =
+				skb->protocol == htons(ETH_P_IP) ?
+				_XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
+				_XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
 		else
-			flags = XEN_NETRXF_more_data;
+			extra->u.hash.type =
+				skb->protocol == htons(ETH_P_IP) ?
+				_XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
+				_XEN_NETIF_CTRL_HASH_TYPE_IPV6;
 
-		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+		*(uint32_t *)extra->u.hash.value = skb_get_hash_raw(skb);
+
+		extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
+		extra->flags = 0;
+
+		pkt->extra_count++;
+	}
+}
+
+static void xenvif_rx_complete(struct xenvif_queue *queue,
+			       struct xenvif_pkt_state *pkt)
+{
+	int notify;
+
+	/* Complete any outstanding copy ops for this skb. */
+	xenvif_rx_copy_flush(queue);
+
+	/* Push responses and notify. */
+	queue->rx.rsp_prod_pvt = queue->rx.req_cons;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify);
+	if (notify)
+		notify_remote_via_irq(queue->rx_irq);
+
+	dev_kfree_skb(pkt->skb);
+}
+
+static void xenvif_rx_next_chunk(struct xenvif_queue *queue,
+				 struct xenvif_pkt_state *pkt,
+				 unsigned int offset, void **data,
+				 size_t *len)
+{
+	struct sk_buff *skb = pkt->skb;
+	void *frag_data;
+	size_t frag_len, chunk_len;
+
+	if (pkt->frag == -1) {
+		frag_data = skb->data;
+		frag_len = skb_headlen(skb);
+	} else {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[pkt->frag];
+
+		frag_data = skb_frag_address(frag);
+		frag_len = skb_frag_size(frag);
+	}
+
+	frag_data += pkt->frag_offset;
+	frag_len -= pkt->frag_offset;
+
+	chunk_len = min(frag_len, XEN_PAGE_SIZE - offset);
+	chunk_len = min(chunk_len,
+			XEN_PAGE_SIZE -	xen_offset_in_page(frag_data));
+
+	pkt->frag_offset += chunk_len;
+
+	/* Advance to next frag? */
+	if (frag_len == chunk_len) {
+		pkt->frag++;
+		pkt->frag_offset = 0;
+	}
+
+	*data = frag_data;
+	*len = chunk_len;
+}
+
+static void xenvif_rx_data_slot(struct xenvif_queue *queue,
+				struct xenvif_pkt_state *pkt,
+				struct xen_netif_rx_request *req,
+				struct xen_netif_rx_response *rsp)
+{
+	unsigned int offset = 0;
+	unsigned int flags;
+
+	do {
+		size_t len;
+		void *data;
+
+		xenvif_rx_next_chunk(queue, pkt, offset, &data, &len);
+		xenvif_rx_copy_add(queue, req, offset, data, len);
+
+		offset += len;
+		pkt->remaining_len -= len;
+
+	} while (offset < XEN_PAGE_SIZE && pkt->remaining_len > 0);
+
+	if (pkt->remaining_len > 0)
+		flags = XEN_NETRXF_more_data;
+	else
+		flags = 0;
+
+	if (pkt->slot == 0) {
+		struct sk_buff *skb = pkt->skb;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flags |= XEN_NETRXF_csum_blank |
 				 XEN_NETRXF_data_validated;
 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-			/* remote but checksummed. */
 			flags |= XEN_NETRXF_data_validated;
 
-		offset = 0;
-		resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
-					status, offset,
-					queue->meta[npo.meta_cons].size,
-					flags);
-
-		if ((1 << queue->meta[npo.meta_cons].gso_type) &
-		    vif->gso_mask) {
-			extra = (struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&queue->rx,
-						  queue->rx.rsp_prod_pvt++);
-
-			resp->flags |= XEN_NETRXF_extra_info;
-
-			extra->u.gso.type = queue->meta[npo.meta_cons].gso_type;
-			extra->u.gso.size = queue->meta[npo.meta_cons].gso_size;
-			extra->u.gso.pad = 0;
-			extra->u.gso.features = 0;
-
-			extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
-			extra->flags = 0;
-		}
-
-		if (skb->sw_hash) {
-			/* Since the skb got here via xenvif_select_queue()
-			 * we know that the hash has been re-calculated
-			 * according to a configuration set by the frontend
-			 * and therefore we know that it is legitimate to
-			 * pass it to the frontend.
-			 */
-			if (resp->flags & XEN_NETRXF_extra_info)
-				extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
-			else
-				resp->flags |= XEN_NETRXF_extra_info;
-
-			extra = (struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&queue->rx,
-						  queue->rx.rsp_prod_pvt++);
-
-			extra->u.hash.algorithm =
-				XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
-
-			if (skb->l4_hash)
-				extra->u.hash.type =
-					skb->protocol == htons(ETH_P_IP) ?
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
-			else
-				extra->u.hash.type =
-					skb->protocol == htons(ETH_P_IP) ?
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV6;
-
-			*(uint32_t *)extra->u.hash.value =
-				skb_get_hash_raw(skb);
-
-			extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
-			extra->flags = 0;
-		}
-
-		xenvif_add_frag_responses(queue, status,
-					  queue->meta + npo.meta_cons + 1,
-					  XENVIF_RX_CB(skb)->meta_slots_used);
-
-		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
-
-		need_to_notify |= !!ret;
-
-		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
-		dev_kfree_skb(skb);
+		if (pkt->extra_count != 0)
+			flags |= XEN_NETRXF_extra_info;
 	}
 
-done:
-	if (need_to_notify)
-		notify_remote_via_irq(queue->rx_irq);
+	rsp->offset = 0;
+	rsp->flags = flags;
+	rsp->id = req->id;
+	rsp->status = (s16)offset;
+}
+
+static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
+				 struct xenvif_pkt_state *pkt,
+				 struct xen_netif_rx_request *req,
+				 struct xen_netif_rx_response *rsp)
+{
+	struct xen_netif_extra_info *extra = (void *)rsp;
+	unsigned int i;
+
+	pkt->extra_count--;
+
+	for (i = 0; i < ARRAY_SIZE(pkt->extras); i++) {
+		if (pkt->extras[i].type) {
+			*extra = pkt->extras[i];
+
+			if (pkt->extra_count != 0)
+				extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+
+			pkt->extras[i].type = 0;
+			return;
+		}
+	}
+	BUG();
+}
+
+void xenvif_rx_action(struct xenvif_queue *queue)
+{
+	struct xenvif_pkt_state pkt;
+
+	xenvif_rx_next_skb(queue, &pkt);
+
+	do {
+		struct xen_netif_rx_request *req;
+		struct xen_netif_rx_response *rsp;
+
+		req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons);
+		rsp = RING_GET_RESPONSE(&queue->rx, queue->rx.req_cons);
+
+		/* Extras must go after the first data slot */
+		if (pkt.slot != 0 && pkt.extra_count != 0)
+			xenvif_rx_extra_slot(queue, &pkt, req, rsp);
+		else
+			xenvif_rx_data_slot(queue, &pkt, req, rsp);
+
+		queue->rx.req_cons++;
+		pkt.slot++;
+	} while (pkt.remaining_len > 0 || pkt.extra_count != 0);
+
+	xenvif_rx_complete(queue, &pkt);
 }
 
 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 3/7] xen-netback: refactor guest rx
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (4 preceding siblings ...)
  2016-10-04  9:29 ` [PATCH v2 net-next 3/7] xen-netback: refactor guest rx Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04  9:29 ` [PATCH v2 net-next 4/7] xen-netback: immediately wake tx queue when guest rx queue has space Paul Durrant
                   ` (9 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Paul Durrant, Wei Liu, David Vrabel

From: David Vrabel <david.vrabel@citrix.com>

Refactor the to-guest (rx) path to:

1. Push responses for completed skbs earlier, reducing latency.

2. Reduce the per-queue memory overhead by greatly reducing the
   maximum number of grant copy ops in each hypercall (from 4352 to
   64).  Each struct xenvif_queue is now only 44 kB instead of 220 kB.

3. Make the code more maintainable.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/common.h |  23 +-
 drivers/net/xen-netback/rx.c     | 654 +++++++++++++++------------------------
 2 files changed, 254 insertions(+), 423 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 0ba5910..7d12a38 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -91,13 +91,6 @@ struct xenvif_rx_meta {
  */
 #define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
 
-/* It's possible for an skb to have a maximal number of frags
- * but still be less than MAX_BUFFER_OFFSET in size. Thus the
- * worst-case number of copy operations is MAX_XEN_SKB_FRAGS per
- * ring slot.
- */
-#define MAX_GRANT_COPY_OPS (MAX_XEN_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE)
-
 #define NETBACK_INVALID_HANDLE -1
 
 /* To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
@@ -133,6 +126,14 @@ struct xenvif_stats {
 	unsigned long tx_frag_overflow;
 };
 
+#define COPY_BATCH_SIZE 64
+
+struct xenvif_copy_state {
+	struct gnttab_copy op[COPY_BATCH_SIZE];
+	RING_IDX idx[COPY_BATCH_SIZE];
+	unsigned int num;
+};
+
 struct xenvif_queue { /* Per-queue data for xenvif */
 	unsigned int id; /* Queue ID, 0-based */
 	char name[QUEUE_NAME_SIZE]; /* DEVNAME-qN */
@@ -189,12 +190,7 @@ struct xenvif_queue { /* Per-queue data for xenvif */
 	unsigned long last_rx_time;
 	bool stalled;
 
-	struct gnttab_copy grant_copy_op[MAX_GRANT_COPY_OPS];
-
-	/* We create one meta structure per ring request we consume, so
-	 * the maximum number is the same as the ring size.
-	 */
-	struct xenvif_rx_meta meta[XEN_NETIF_RX_RING_SIZE];
+	struct xenvif_copy_state rx_copy;
 
 	/* Transmit shaping: allow 'credit_bytes' every 'credit_usec'. */
 	unsigned long   credit_bytes;
@@ -358,6 +354,7 @@ int xenvif_dealloc_kthread(void *data);
 
 irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data);
 
+void xenvif_rx_action(struct xenvif_queue *queue);
 void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb);
 
 void xenvif_carrier_on(struct xenvif *vif);
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 6bd7d6e..b0ce4c6 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -26,7 +26,6 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  * IN THE SOFTWARE.
  */
-
 #include "common.h"
 
 #include <linux/kthread.h>
@@ -137,464 +136,299 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
 	}
 }
 
-struct netrx_pending_operations {
-	unsigned int copy_prod, copy_cons;
-	unsigned int meta_prod, meta_cons;
-	struct gnttab_copy *copy;
-	struct xenvif_rx_meta *meta;
-	int copy_off;
-	grant_ref_t copy_gref;
-};
-
-static struct xenvif_rx_meta *get_next_rx_buffer(
-	struct xenvif_queue *queue,
-	struct netrx_pending_operations *npo)
+static void xenvif_rx_copy_flush(struct xenvif_queue *queue)
 {
-	struct xenvif_rx_meta *meta;
-	struct xen_netif_rx_request req;
+	unsigned int i;
 
-	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
+	gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num);
 
-	meta = npo->meta + npo->meta_prod++;
-	meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
-	meta->gso_size = 0;
-	meta->size = 0;
-	meta->id = req.id;
+	for (i = 0; i < queue->rx_copy.num; i++) {
+		struct gnttab_copy *op;
 
-	npo->copy_off = 0;
-	npo->copy_gref = req.gref;
+		op = &queue->rx_copy.op[i];
 
-	return meta;
+		/* If the copy failed, overwrite the status field in
+		 * the corresponding response.
+		 */
+		if (unlikely(op->status != GNTST_okay)) {
+			struct xen_netif_rx_response *rsp;
+
+			rsp = RING_GET_RESPONSE(&queue->rx,
+						queue->rx_copy.idx[i]);
+			rsp->status = op->status;
+		}
+	}
+
+	queue->rx_copy.num = 0;
 }
 
-struct gop_frag_copy {
-	struct xenvif_queue *queue;
-	struct netrx_pending_operations *npo;
-	struct xenvif_rx_meta *meta;
-	int head;
-	int gso_type;
-	int protocol;
-	int hash_present;
-
-	struct page *page;
-};
-
-static void xenvif_setup_copy_gop(unsigned long gfn,
-				  unsigned int offset,
-				  unsigned int *len,
-				  struct gop_frag_copy *info)
+static void xenvif_rx_copy_add(struct xenvif_queue *queue,
+			       struct xen_netif_rx_request *req,
+			       unsigned int offset, void *data, size_t len)
 {
-	struct gnttab_copy *copy_gop;
+	struct gnttab_copy *op;
+	struct page *page;
 	struct xen_page_foreign *foreign;
-	/* Convenient aliases */
-	struct xenvif_queue *queue = info->queue;
-	struct netrx_pending_operations *npo = info->npo;
-	struct page *page = info->page;
 
-	WARN_ON(npo->copy_off > MAX_BUFFER_OFFSET);
+	if (queue->rx_copy.num == COPY_BATCH_SIZE)
+		xenvif_rx_copy_flush(queue);
 
-	if (npo->copy_off == MAX_BUFFER_OFFSET)
-		info->meta = get_next_rx_buffer(queue, npo);
+	op = &queue->rx_copy.op[queue->rx_copy.num];
 
-	if (npo->copy_off + *len > MAX_BUFFER_OFFSET)
-		*len = MAX_BUFFER_OFFSET - npo->copy_off;
+	page = virt_to_page(data);
 
-	copy_gop = npo->copy + npo->copy_prod++;
-	copy_gop->flags = GNTCOPY_dest_gref;
-	copy_gop->len = *len;
+	op->flags = GNTCOPY_dest_gref;
 
 	foreign = xen_page_foreign(page);
 	if (foreign) {
-		copy_gop->source.domid = foreign->domid;
-		copy_gop->source.u.ref = foreign->gref;
-		copy_gop->flags |= GNTCOPY_source_gref;
+		op->source.domid = foreign->domid;
+		op->source.u.ref = foreign->gref;
+		op->flags |= GNTCOPY_source_gref;
 	} else {
-		copy_gop->source.domid = DOMID_SELF;
-		copy_gop->source.u.gmfn = gfn;
+		op->source.u.gmfn = virt_to_gfn(data);
+		op->source.domid  = DOMID_SELF;
 	}
-	copy_gop->source.offset = offset;
 
-	copy_gop->dest.domid = queue->vif->domid;
-	copy_gop->dest.offset = npo->copy_off;
-	copy_gop->dest.u.ref = npo->copy_gref;
+	op->source.offset = xen_offset_in_page(data);
+	op->dest.u.ref    = req->gref;
+	op->dest.domid    = queue->vif->domid;
+	op->dest.offset   = offset;
+	op->len           = len;
 
-	npo->copy_off += *len;
-	info->meta->size += *len;
-
-	if (!info->head)
-		return;
-
-	/* Leave a gap for the GSO descriptor. */
-	if ((1 << info->gso_type) & queue->vif->gso_mask)
-		queue->rx.req_cons++;
-
-	/* Leave a gap for the hash extra segment. */
-	if (info->hash_present)
-		queue->rx.req_cons++;
-
-	info->head = 0; /* There must be something in this buffer now */
+	queue->rx_copy.idx[queue->rx_copy.num] = queue->rx.req_cons;
+	queue->rx_copy.num++;
 }
 
-static void xenvif_gop_frag_copy_grant(unsigned long gfn,
-				       unsigned int offset,
-				       unsigned int len,
-				       void *data)
+static unsigned int xenvif_gso_type(struct sk_buff *skb)
 {
-	unsigned int bytes;
-
-	while (len) {
-		bytes = len;
-		xenvif_setup_copy_gop(gfn, offset, &bytes, data);
-		offset += bytes;
-		len -= bytes;
-	}
-}
-
-/* Set up the grant operations for this fragment. If it's a flipping
- * interface, we also set up the unmap request from here.
- */
-static void xenvif_gop_frag_copy(struct xenvif_queue *queue,
-				 struct sk_buff *skb,
-				 struct netrx_pending_operations *npo,
-				 struct page *page, unsigned long size,
-				 unsigned long offset, int *head)
-{
-	struct gop_frag_copy info = {
-		.queue = queue,
-		.npo = npo,
-		.head = *head,
-		.gso_type = XEN_NETIF_GSO_TYPE_NONE,
-		/* xenvif_set_skb_hash() will have either set a s/w
-		 * hash or cleared the hash depending on
-		 * whether the the frontend wants a hash for this skb.
-		 */
-		.hash_present = skb->sw_hash,
-	};
-	unsigned long bytes;
-
 	if (skb_is_gso(skb)) {
 		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-			info.gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-	}
-
-	/* Data must not cross a page boundary. */
-	WARN_ON(size + offset > (PAGE_SIZE << compound_order(page)));
-
-	info.meta = npo->meta + npo->meta_prod - 1;
-
-	/* Skip unused frames from start of page */
-	page += offset >> PAGE_SHIFT;
-	offset &= ~PAGE_MASK;
-
-	while (size > 0) {
-		WARN_ON(offset >= PAGE_SIZE);
-
-		bytes = PAGE_SIZE - offset;
-		if (bytes > size)
-			bytes = size;
-
-		info.page = page;
-		gnttab_foreach_grant_in_range(page, offset, bytes,
-					      xenvif_gop_frag_copy_grant,
-					      &info);
-		size -= bytes;
-		offset = 0;
-
-		/* Next page */
-		if (size) {
-			WARN_ON(!PageCompound(page));
-			page++;
-		}
-	}
-
-	*head = info.head;
-}
-
-/* Prepare an SKB to be transmitted to the frontend.
- *
- * This function is responsible for allocating grant operations, meta
- * structures, etc.
- *
- * It returns the number of meta structures consumed. The number of
- * ring slots used is always equal to the number of meta slots used
- * plus the number of GSO descriptors used. Currently, we use either
- * zero GSO descriptors (for non-GSO packets) or one descriptor (for
- * frontend-side LRO).
- */
-static int xenvif_gop_skb(struct sk_buff *skb,
-			  struct netrx_pending_operations *npo,
-			  struct xenvif_queue *queue)
-{
-	struct xenvif *vif = netdev_priv(skb->dev);
-	int nr_frags = skb_shinfo(skb)->nr_frags;
-	int i;
-	struct xen_netif_rx_request req;
-	struct xenvif_rx_meta *meta;
-	unsigned char *data;
-	int head = 1;
-	int old_meta_prod;
-	int gso_type;
-
-	old_meta_prod = npo->meta_prod;
-
-	gso_type = XEN_NETIF_GSO_TYPE_NONE;
-	if (skb_is_gso(skb)) {
-		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
-			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
-		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)
-			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
-	}
-
-	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
-	meta = npo->meta + npo->meta_prod++;
-
-	if ((1 << gso_type) & vif->gso_mask) {
-		meta->gso_type = gso_type;
-		meta->gso_size = skb_shinfo(skb)->gso_size;
-	} else {
-		meta->gso_type = XEN_NETIF_GSO_TYPE_NONE;
-		meta->gso_size = 0;
-	}
-
-	meta->size = 0;
-	meta->id = req.id;
-	npo->copy_off = 0;
-	npo->copy_gref = req.gref;
-
-	data = skb->data;
-	while (data < skb_tail_pointer(skb)) {
-		unsigned int offset = offset_in_page(data);
-		unsigned int len = PAGE_SIZE - offset;
-
-		if (data + len > skb_tail_pointer(skb))
-			len = skb_tail_pointer(skb) - data;
-
-		xenvif_gop_frag_copy(queue, skb, npo,
-				     virt_to_page(data), len, offset, &head);
-		data += len;
-	}
-
-	for (i = 0; i < nr_frags; i++) {
-		xenvif_gop_frag_copy(queue, skb, npo,
-				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
-				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
-				     skb_shinfo(skb)->frags[i].page_offset,
-				     &head);
-	}
-
-	return npo->meta_prod - old_meta_prod;
-}
-
-/* This is a twin to xenvif_gop_skb.  Assume that xenvif_gop_skb was
- * used to set up the operations on the top of
- * netrx_pending_operations, which have since been done.  Check that
- * they didn't give any errors and advance over them.
- */
-static int xenvif_check_gop(struct xenvif *vif, int nr_meta_slots,
-			    struct netrx_pending_operations *npo)
-{
-	struct gnttab_copy     *copy_op;
-	int status = XEN_NETIF_RSP_OKAY;
-	int i;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		copy_op = npo->copy + npo->copy_cons++;
-		if (copy_op->status != GNTST_okay) {
-			netdev_dbg(vif->dev,
-				   "Bad status %d from copy to DOM%d.\n",
-				   copy_op->status, vif->domid);
-			status = XEN_NETIF_RSP_ERROR;
-		}
-	}
-
-	return status;
-}
-
-static struct xen_netif_rx_response *make_rx_response(
-	struct xenvif_queue *queue, u16 id, s8 st, u16 offset, u16 size,
-	u16 flags)
-{
-	RING_IDX i = queue->rx.rsp_prod_pvt;
-	struct xen_netif_rx_response *resp;
-
-	resp = RING_GET_RESPONSE(&queue->rx, i);
-	resp->offset     = offset;
-	resp->flags      = flags;
-	resp->id         = id;
-	resp->status     = (s16)size;
-	if (st < 0)
-		resp->status = (s16)st;
-
-	queue->rx.rsp_prod_pvt = ++i;
-
-	return resp;
-}
-
-static void xenvif_add_frag_responses(struct xenvif_queue *queue,
-				      int status,
-				      struct xenvif_rx_meta *meta,
-				      int nr_meta_slots)
-{
-	int i;
-	unsigned long offset;
-
-	/* No fragments used */
-	if (nr_meta_slots <= 1)
-		return;
-
-	nr_meta_slots--;
-
-	for (i = 0; i < nr_meta_slots; i++) {
-		int flags;
-
-		if (i == nr_meta_slots - 1)
-			flags = 0;
+			return XEN_NETIF_GSO_TYPE_TCPV4;
 		else
-			flags = XEN_NETRXF_more_data;
-
-		offset = 0;
-		make_rx_response(queue, meta[i].id, status, offset,
-				 meta[i].size, flags);
+			return XEN_NETIF_GSO_TYPE_TCPV6;
 	}
+	return XEN_NETIF_GSO_TYPE_NONE;
 }
 
-static void xenvif_rx_action(struct xenvif_queue *queue)
-{
-	struct xenvif *vif = queue->vif;
-	s8 status;
-	u16 flags;
-	struct xen_netif_rx_response *resp;
-	struct sk_buff_head rxq;
+struct xenvif_pkt_state {
 	struct sk_buff *skb;
-	LIST_HEAD(notify);
-	int ret;
-	unsigned long offset;
-	bool need_to_notify = false;
+	size_t remaining_len;
+	int frag; /* frag == -1 => skb->head */
+	unsigned int frag_offset;
+	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
+	unsigned int extra_count;
+	unsigned int slot;
+};
 
-	struct netrx_pending_operations npo = {
-		.copy  = queue->grant_copy_op,
-		.meta  = queue->meta,
-	};
+static void xenvif_rx_next_skb(struct xenvif_queue *queue,
+			       struct xenvif_pkt_state *pkt)
+{
+	struct sk_buff *skb;
+	unsigned int gso_type;
 
-	skb_queue_head_init(&rxq);
+	skb = xenvif_rx_dequeue(queue);
 
-	while (xenvif_rx_ring_slots_available(queue) &&
-	       (skb = xenvif_rx_dequeue(queue)) != NULL) {
-		queue->last_rx_time = jiffies;
+	queue->stats.tx_bytes += skb->len;
+	queue->stats.tx_packets++;
 
-		XENVIF_RX_CB(skb)->meta_slots_used =
-			xenvif_gop_skb(skb, &npo, queue);
+	/* Reset packet state. */
+	memset(pkt, 0, sizeof(struct xenvif_pkt_state));
 
-		__skb_queue_tail(&rxq, skb);
+	pkt->skb = skb;
+	pkt->remaining_len = skb->len;
+	pkt->frag = -1;
+
+	gso_type = xenvif_gso_type(skb);
+	if ((1 << gso_type) & queue->vif->gso_mask) {
+		struct xen_netif_extra_info *extra;
+
+		extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
+
+		extra->u.gso.type = gso_type;
+		extra->u.gso.size = skb_shinfo(skb)->gso_size;
+		extra->u.gso.pad = 0;
+		extra->u.gso.features = 0;
+		extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
+		extra->flags = 0;
+
+		pkt->extra_count++;
 	}
 
-	WARN_ON(npo.meta_prod > ARRAY_SIZE(queue->meta));
+	if (skb->sw_hash) {
+		struct xen_netif_extra_info *extra;
 
-	if (!npo.copy_prod)
-		goto done;
+		extra = &pkt->extras[XEN_NETIF_EXTRA_TYPE_HASH - 1];
 
-	WARN_ON(npo.copy_prod > MAX_GRANT_COPY_OPS);
-	gnttab_batch_copy(queue->grant_copy_op, npo.copy_prod);
+		extra->u.hash.algorithm =
+			XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
 
-	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-		struct xen_netif_extra_info *extra = NULL;
-
-		queue->stats.tx_bytes += skb->len;
-		queue->stats.tx_packets++;
-
-		status = xenvif_check_gop(vif,
-					  XENVIF_RX_CB(skb)->meta_slots_used,
-					  &npo);
-
-		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
-			flags = 0;
+		if (skb->l4_hash)
+			extra->u.hash.type =
+				skb->protocol == htons(ETH_P_IP) ?
+				_XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
+				_XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
 		else
-			flags = XEN_NETRXF_more_data;
+			extra->u.hash.type =
+				skb->protocol == htons(ETH_P_IP) ?
+				_XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
+				_XEN_NETIF_CTRL_HASH_TYPE_IPV6;
 
-		if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
+		*(uint32_t *)extra->u.hash.value = skb_get_hash_raw(skb);
+
+		extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
+		extra->flags = 0;
+
+		pkt->extra_count++;
+	}
+}
+
+static void xenvif_rx_complete(struct xenvif_queue *queue,
+			       struct xenvif_pkt_state *pkt)
+{
+	int notify;
+
+	/* Complete any outstanding copy ops for this skb. */
+	xenvif_rx_copy_flush(queue);
+
+	/* Push responses and notify. */
+	queue->rx.rsp_prod_pvt = queue->rx.req_cons;
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify);
+	if (notify)
+		notify_remote_via_irq(queue->rx_irq);
+
+	dev_kfree_skb(pkt->skb);
+}
+
+static void xenvif_rx_next_chunk(struct xenvif_queue *queue,
+				 struct xenvif_pkt_state *pkt,
+				 unsigned int offset, void **data,
+				 size_t *len)
+{
+	struct sk_buff *skb = pkt->skb;
+	void *frag_data;
+	size_t frag_len, chunk_len;
+
+	if (pkt->frag == -1) {
+		frag_data = skb->data;
+		frag_len = skb_headlen(skb);
+	} else {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[pkt->frag];
+
+		frag_data = skb_frag_address(frag);
+		frag_len = skb_frag_size(frag);
+	}
+
+	frag_data += pkt->frag_offset;
+	frag_len -= pkt->frag_offset;
+
+	chunk_len = min(frag_len, XEN_PAGE_SIZE - offset);
+	chunk_len = min(chunk_len,
+			XEN_PAGE_SIZE -	xen_offset_in_page(frag_data));
+
+	pkt->frag_offset += chunk_len;
+
+	/* Advance to next frag? */
+	if (frag_len == chunk_len) {
+		pkt->frag++;
+		pkt->frag_offset = 0;
+	}
+
+	*data = frag_data;
+	*len = chunk_len;
+}
+
+static void xenvif_rx_data_slot(struct xenvif_queue *queue,
+				struct xenvif_pkt_state *pkt,
+				struct xen_netif_rx_request *req,
+				struct xen_netif_rx_response *rsp)
+{
+	unsigned int offset = 0;
+	unsigned int flags;
+
+	do {
+		size_t len;
+		void *data;
+
+		xenvif_rx_next_chunk(queue, pkt, offset, &data, &len);
+		xenvif_rx_copy_add(queue, req, offset, data, len);
+
+		offset += len;
+		pkt->remaining_len -= len;
+
+	} while (offset < XEN_PAGE_SIZE && pkt->remaining_len > 0);
+
+	if (pkt->remaining_len > 0)
+		flags = XEN_NETRXF_more_data;
+	else
+		flags = 0;
+
+	if (pkt->slot == 0) {
+		struct sk_buff *skb = pkt->skb;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flags |= XEN_NETRXF_csum_blank |
 				 XEN_NETRXF_data_validated;
 		else if (skb->ip_summed == CHECKSUM_UNNECESSARY)
-			/* remote but checksummed. */
 			flags |= XEN_NETRXF_data_validated;
 
-		offset = 0;
-		resp = make_rx_response(queue, queue->meta[npo.meta_cons].id,
-					status, offset,
-					queue->meta[npo.meta_cons].size,
-					flags);
-
-		if ((1 << queue->meta[npo.meta_cons].gso_type) &
-		    vif->gso_mask) {
-			extra = (struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&queue->rx,
-						  queue->rx.rsp_prod_pvt++);
-
-			resp->flags |= XEN_NETRXF_extra_info;
-
-			extra->u.gso.type = queue->meta[npo.meta_cons].gso_type;
-			extra->u.gso.size = queue->meta[npo.meta_cons].gso_size;
-			extra->u.gso.pad = 0;
-			extra->u.gso.features = 0;
-
-			extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
-			extra->flags = 0;
-		}
-
-		if (skb->sw_hash) {
-			/* Since the skb got here via xenvif_select_queue()
-			 * we know that the hash has been re-calculated
-			 * according to a configuration set by the frontend
-			 * and therefore we know that it is legitimate to
-			 * pass it to the frontend.
-			 */
-			if (resp->flags & XEN_NETRXF_extra_info)
-				extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
-			else
-				resp->flags |= XEN_NETRXF_extra_info;
-
-			extra = (struct xen_netif_extra_info *)
-				RING_GET_RESPONSE(&queue->rx,
-						  queue->rx.rsp_prod_pvt++);
-
-			extra->u.hash.algorithm =
-				XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ;
-
-			if (skb->l4_hash)
-				extra->u.hash.type =
-					skb->protocol == htons(ETH_P_IP) ?
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP :
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP;
-			else
-				extra->u.hash.type =
-					skb->protocol == htons(ETH_P_IP) ?
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV4 :
-					_XEN_NETIF_CTRL_HASH_TYPE_IPV6;
-
-			*(uint32_t *)extra->u.hash.value =
-				skb_get_hash_raw(skb);
-
-			extra->type = XEN_NETIF_EXTRA_TYPE_HASH;
-			extra->flags = 0;
-		}
-
-		xenvif_add_frag_responses(queue, status,
-					  queue->meta + npo.meta_cons + 1,
-					  XENVIF_RX_CB(skb)->meta_slots_used);
-
-		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, ret);
-
-		need_to_notify |= !!ret;
-
-		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
-		dev_kfree_skb(skb);
+		if (pkt->extra_count != 0)
+			flags |= XEN_NETRXF_extra_info;
 	}
 
-done:
-	if (need_to_notify)
-		notify_remote_via_irq(queue->rx_irq);
+	rsp->offset = 0;
+	rsp->flags = flags;
+	rsp->id = req->id;
+	rsp->status = (s16)offset;
+}
+
+static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
+				 struct xenvif_pkt_state *pkt,
+				 struct xen_netif_rx_request *req,
+				 struct xen_netif_rx_response *rsp)
+{
+	struct xen_netif_extra_info *extra = (void *)rsp;
+	unsigned int i;
+
+	pkt->extra_count--;
+
+	for (i = 0; i < ARRAY_SIZE(pkt->extras); i++) {
+		if (pkt->extras[i].type) {
+			*extra = pkt->extras[i];
+
+			if (pkt->extra_count != 0)
+				extra->flags |= XEN_NETIF_EXTRA_FLAG_MORE;
+
+			pkt->extras[i].type = 0;
+			return;
+		}
+	}
+	BUG();
+}
+
+void xenvif_rx_action(struct xenvif_queue *queue)
+{
+	struct xenvif_pkt_state pkt;
+
+	xenvif_rx_next_skb(queue, &pkt);
+
+	do {
+		struct xen_netif_rx_request *req;
+		struct xen_netif_rx_response *rsp;
+
+		req = RING_GET_REQUEST(&queue->rx, queue->rx.req_cons);
+		rsp = RING_GET_RESPONSE(&queue->rx, queue->rx.req_cons);
+
+		/* Extras must go after the first data slot */
+		if (pkt.slot != 0 && pkt.extra_count != 0)
+			xenvif_rx_extra_slot(queue, &pkt, req, rsp);
+		else
+			xenvif_rx_data_slot(queue, &pkt, req, rsp);
+
+		queue->rx.req_cons++;
+		pkt.slot++;
+	} while (pkt.remaining_len > 0 || pkt.extra_count != 0);
+
+	xenvif_rx_complete(queue, &pkt);
 }
 
 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 4/7] xen-netback: immediately wake tx queue when guest rx queue has space
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (6 preceding siblings ...)
  2016-10-04  9:29 ` [PATCH v2 net-next 4/7] xen-netback: immediately wake tx queue when guest rx queue has space Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04 12:48   ` [Xen-devel] " Konrad Rzeszutek Wilk
  2016-10-04 12:48   ` Konrad Rzeszutek Wilk
  2016-10-04  9:29 ` [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches Paul Durrant
                   ` (7 subsequent siblings)
  15 siblings, 2 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: David Vrabel, Paul Durrant, Wei Liu

From: David Vrabel <david.vrabel@citrix.com>

When an skb is removed from the guest rx queue, immediately wake the
tx queue, instead of after processing them.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/rx.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index b0ce4c6..9548709 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -92,27 +92,21 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
 	spin_lock_irq(&queue->rx_queue.lock);
 
 	skb = __skb_dequeue(&queue->rx_queue);
-	if (skb)
+	if (skb) {
 		queue->rx_queue_len -= skb->len;
+		if (queue->rx_queue_len < queue->rx_queue_max) {
+			struct netdev_queue *txq;
+
+			txq = netdev_get_tx_queue(queue->vif->dev, queue->id);
+			netif_tx_wake_queue(txq);
+		}
+	}
 
 	spin_unlock_irq(&queue->rx_queue.lock);
 
 	return skb;
 }
 
-static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
-{
-	spin_lock_irq(&queue->rx_queue.lock);
-
-	if (queue->rx_queue_len < queue->rx_queue_max) {
-		struct net_device *dev = queue->vif->dev;
-
-		netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
-	}
-
-	spin_unlock_irq(&queue->rx_queue.lock);
-}
-
 static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
 {
 	struct sk_buff *skb;
@@ -585,8 +579,6 @@ int xenvif_kthread_guest_rx(void *data)
 		 */
 		xenvif_rx_queue_drop_expired(queue);
 
-		xenvif_rx_queue_maybe_wake(queue);
-
 		cond_resched();
 	}
 
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 4/7] xen-netback: immediately wake tx queue when guest rx queue has space
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (5 preceding siblings ...)
  2016-10-04  9:29 ` Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04  9:29 ` Paul Durrant
                   ` (8 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Paul Durrant, Wei Liu, David Vrabel

From: David Vrabel <david.vrabel@citrix.com>

When an skb is removed from the guest rx queue, immediately wake the
tx queue, instead of after processing them.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/rx.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index b0ce4c6..9548709 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -92,27 +92,21 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
 	spin_lock_irq(&queue->rx_queue.lock);
 
 	skb = __skb_dequeue(&queue->rx_queue);
-	if (skb)
+	if (skb) {
 		queue->rx_queue_len -= skb->len;
+		if (queue->rx_queue_len < queue->rx_queue_max) {
+			struct netdev_queue *txq;
+
+			txq = netdev_get_tx_queue(queue->vif->dev, queue->id);
+			netif_tx_wake_queue(txq);
+		}
+	}
 
 	spin_unlock_irq(&queue->rx_queue.lock);
 
 	return skb;
 }
 
-static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
-{
-	spin_lock_irq(&queue->rx_queue.lock);
-
-	if (queue->rx_queue_len < queue->rx_queue_max) {
-		struct net_device *dev = queue->vif->dev;
-
-		netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
-	}
-
-	spin_unlock_irq(&queue->rx_queue.lock);
-}
-
 static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
 {
 	struct sk_buff *skb;
@@ -585,8 +579,6 @@ int xenvif_kthread_guest_rx(void *data)
 		 */
 		xenvif_rx_queue_drop_expired(queue);
 
-		xenvif_rx_queue_maybe_wake(queue);
-
 		cond_resched();
 	}
 
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (7 preceding siblings ...)
  2016-10-04  9:29 ` Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04 12:47   ` Konrad Rzeszutek Wilk
  2016-10-04 12:47   ` [Xen-devel] " Konrad Rzeszutek Wilk
  2016-10-04  9:29 ` Paul Durrant
                   ` (6 subsequent siblings)
  15 siblings, 2 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: David Vrabel, Paul Durrant, Wei Liu

From: David Vrabel <david.vrabel@citrix.com>

Instead of only placing one skb on the guest rx ring at a time, process
a batch of up-to 64.  This improves performance by ~10% in some tests.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/rx.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 9548709..ae822b8 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -399,7 +399,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
 	BUG();
 }
 
-void xenvif_rx_action(struct xenvif_queue *queue)
+void xenvif_rx_skb(struct xenvif_queue *queue)
 {
 	struct xenvif_pkt_state pkt;
 
@@ -425,6 +425,19 @@ void xenvif_rx_action(struct xenvif_queue *queue)
 	xenvif_rx_complete(queue, &pkt);
 }
 
+#define RX_BATCH_SIZE 64
+
+void xenvif_rx_action(struct xenvif_queue *queue)
+{
+	unsigned int work_done = 0;
+
+	while (xenvif_rx_ring_slots_available(queue) &&
+	       work_done < RX_BATCH_SIZE) {
+		xenvif_rx_skb(queue);
+		work_done++;
+	}
+}
+
 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
 {
 	RING_IDX prod, cons;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (8 preceding siblings ...)
  2016-10-04  9:29 ` [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04  9:29 ` [PATCH v2 net-next 6/7] xen-netback: batch copies for multiple to-guest rx packets Paul Durrant
                   ` (5 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Paul Durrant, Wei Liu, David Vrabel

From: David Vrabel <david.vrabel@citrix.com>

Instead of only placing one skb on the guest rx ring at a time, process
a batch of up-to 64.  This improves performance by ~10% in some tests.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/rx.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 9548709..ae822b8 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -399,7 +399,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
 	BUG();
 }
 
-void xenvif_rx_action(struct xenvif_queue *queue)
+void xenvif_rx_skb(struct xenvif_queue *queue)
 {
 	struct xenvif_pkt_state pkt;
 
@@ -425,6 +425,19 @@ void xenvif_rx_action(struct xenvif_queue *queue)
 	xenvif_rx_complete(queue, &pkt);
 }
 
+#define RX_BATCH_SIZE 64
+
+void xenvif_rx_action(struct xenvif_queue *queue)
+{
+	unsigned int work_done = 0;
+
+	while (xenvif_rx_ring_slots_available(queue) &&
+	       work_done < RX_BATCH_SIZE) {
+		xenvif_rx_skb(queue);
+		work_done++;
+	}
+}
+
 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
 {
 	RING_IDX prod, cons;
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 6/7] xen-netback: batch copies for multiple to-guest rx packets
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (10 preceding siblings ...)
  2016-10-04  9:29 ` [PATCH v2 net-next 6/7] xen-netback: batch copies for multiple to-guest rx packets Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04  9:29 ` [PATCH v2 net-next 7/7] xen/netback: add fraglist support for to-guest rx Paul Durrant
                   ` (3 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: David Vrabel, Paul Durrant, Wei Liu

From: David Vrabel <david.vrabel@citrix.com>

Instead of flushing the copy ops when an packet is complete, complete
packets when their copy ops are done.  This improves performance by
reducing the number of grant copy hypercalls.

Latency is still limited by the relatively small size of the copy
batch.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/common.h |  1 +
 drivers/net/xen-netback/rx.c     | 27 +++++++++++++++++----------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 7d12a38..cf68149 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -132,6 +132,7 @@ struct xenvif_copy_state {
 	struct gnttab_copy op[COPY_BATCH_SIZE];
 	RING_IDX idx[COPY_BATCH_SIZE];
 	unsigned int num;
+	struct sk_buff_head *completed;
 };
 
 struct xenvif_queue { /* Per-queue data for xenvif */
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index ae822b8..8c8c5b5 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -133,6 +133,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
 static void xenvif_rx_copy_flush(struct xenvif_queue *queue)
 {
 	unsigned int i;
+	int notify;
 
 	gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num);
 
@@ -154,6 +155,13 @@ static void xenvif_rx_copy_flush(struct xenvif_queue *queue)
 	}
 
 	queue->rx_copy.num = 0;
+
+	/* Push responses for all completed packets. */
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify);
+	if (notify)
+		notify_remote_via_irq(queue->rx_irq);
+
+	__skb_queue_purge(queue->rx_copy.completed);
 }
 
 static void xenvif_rx_copy_add(struct xenvif_queue *queue,
@@ -279,18 +287,10 @@ static void xenvif_rx_next_skb(struct xenvif_queue *queue,
 static void xenvif_rx_complete(struct xenvif_queue *queue,
 			       struct xenvif_pkt_state *pkt)
 {
-	int notify;
-
-	/* Complete any outstanding copy ops for this skb. */
-	xenvif_rx_copy_flush(queue);
-
-	/* Push responses and notify. */
+	/* All responses are ready to be pushed. */
 	queue->rx.rsp_prod_pvt = queue->rx.req_cons;
-	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify);
-	if (notify)
-		notify_remote_via_irq(queue->rx_irq);
 
-	dev_kfree_skb(pkt->skb);
+	__skb_queue_tail(queue->rx_copy.completed, pkt->skb);
 }
 
 static void xenvif_rx_next_chunk(struct xenvif_queue *queue,
@@ -429,13 +429,20 @@ void xenvif_rx_skb(struct xenvif_queue *queue)
 
 void xenvif_rx_action(struct xenvif_queue *queue)
 {
+	struct sk_buff_head completed_skbs;
 	unsigned int work_done = 0;
 
+	__skb_queue_head_init(&completed_skbs);
+	queue->rx_copy.completed = &completed_skbs;
+
 	while (xenvif_rx_ring_slots_available(queue) &&
 	       work_done < RX_BATCH_SIZE) {
 		xenvif_rx_skb(queue);
 		work_done++;
 	}
+
+	/* Flush any pending copies and complete all skbs. */
+	xenvif_rx_copy_flush(queue);
 }
 
 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 6/7] xen-netback: batch copies for multiple to-guest rx packets
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (9 preceding siblings ...)
  2016-10-04  9:29 ` Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04  9:29 ` Paul Durrant
                   ` (4 subsequent siblings)
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Paul Durrant, Wei Liu, David Vrabel

From: David Vrabel <david.vrabel@citrix.com>

Instead of flushing the copy ops when an packet is complete, complete
packets when their copy ops are done.  This improves performance by
reducing the number of grant copy hypercalls.

Latency is still limited by the relatively small size of the copy
batch.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/common.h |  1 +
 drivers/net/xen-netback/rx.c     | 27 +++++++++++++++++----------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 7d12a38..cf68149 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -132,6 +132,7 @@ struct xenvif_copy_state {
 	struct gnttab_copy op[COPY_BATCH_SIZE];
 	RING_IDX idx[COPY_BATCH_SIZE];
 	unsigned int num;
+	struct sk_buff_head *completed;
 };
 
 struct xenvif_queue { /* Per-queue data for xenvif */
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index ae822b8..8c8c5b5 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -133,6 +133,7 @@ static void xenvif_rx_queue_drop_expired(struct xenvif_queue *queue)
 static void xenvif_rx_copy_flush(struct xenvif_queue *queue)
 {
 	unsigned int i;
+	int notify;
 
 	gnttab_batch_copy(queue->rx_copy.op, queue->rx_copy.num);
 
@@ -154,6 +155,13 @@ static void xenvif_rx_copy_flush(struct xenvif_queue *queue)
 	}
 
 	queue->rx_copy.num = 0;
+
+	/* Push responses for all completed packets. */
+	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify);
+	if (notify)
+		notify_remote_via_irq(queue->rx_irq);
+
+	__skb_queue_purge(queue->rx_copy.completed);
 }
 
 static void xenvif_rx_copy_add(struct xenvif_queue *queue,
@@ -279,18 +287,10 @@ static void xenvif_rx_next_skb(struct xenvif_queue *queue,
 static void xenvif_rx_complete(struct xenvif_queue *queue,
 			       struct xenvif_pkt_state *pkt)
 {
-	int notify;
-
-	/* Complete any outstanding copy ops for this skb. */
-	xenvif_rx_copy_flush(queue);
-
-	/* Push responses and notify. */
+	/* All responses are ready to be pushed. */
 	queue->rx.rsp_prod_pvt = queue->rx.req_cons;
-	RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&queue->rx, notify);
-	if (notify)
-		notify_remote_via_irq(queue->rx_irq);
 
-	dev_kfree_skb(pkt->skb);
+	__skb_queue_tail(queue->rx_copy.completed, pkt->skb);
 }
 
 static void xenvif_rx_next_chunk(struct xenvif_queue *queue,
@@ -429,13 +429,20 @@ void xenvif_rx_skb(struct xenvif_queue *queue)
 
 void xenvif_rx_action(struct xenvif_queue *queue)
 {
+	struct sk_buff_head completed_skbs;
 	unsigned int work_done = 0;
 
+	__skb_queue_head_init(&completed_skbs);
+	queue->rx_copy.completed = &completed_skbs;
+
 	while (xenvif_rx_ring_slots_available(queue) &&
 	       work_done < RX_BATCH_SIZE) {
 		xenvif_rx_skb(queue);
 		work_done++;
 	}
+
+	/* Flush any pending copies and complete all skbs. */
+	xenvif_rx_copy_flush(queue);
 }
 
 static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 7/7] xen/netback: add fraglist support for to-guest rx
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (11 preceding siblings ...)
  2016-10-04  9:29 ` Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-04 10:56   ` David Vrabel
  2016-10-04 10:56   ` [Xen-devel] " David Vrabel
  2016-10-04  9:29 ` Paul Durrant
                   ` (2 subsequent siblings)
  15 siblings, 2 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Ross Lagerwall, Paul Durrant, Wei Liu

From: Ross Lagerwall <ross.lagerwall@citrix.com>

This allows full 64K skbuffs (with 1500 mtu ethernet, composed of 45
fragments) to be handled by netback for to-guest rx.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/interface.c |  2 +-
 drivers/net/xen-netback/rx.c        | 38 ++++++++++++++++++++++++++++---------
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 211d542..4af532a 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -467,7 +467,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	dev->netdev_ops	= &xenvif_netdev_ops;
 	dev->hw_features = NETIF_F_SG |
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-		NETIF_F_TSO | NETIF_F_TSO6;
+		NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_FRAGLIST;
 	dev->features = dev->hw_features | NETIF_F_RXCSUM;
 	dev->ethtool_ops = &xenvif_ethtool_ops;
 
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 8c8c5b5..8e9ade6 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -215,7 +215,8 @@ static unsigned int xenvif_gso_type(struct sk_buff *skb)
 struct xenvif_pkt_state {
 	struct sk_buff *skb;
 	size_t remaining_len;
-	int frag; /* frag == -1 => skb->head */
+	struct sk_buff *frag_iter;
+	int frag; /* frag == -1 => frag_iter->head */
 	unsigned int frag_offset;
 	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 	unsigned int extra_count;
@@ -237,6 +238,7 @@ static void xenvif_rx_next_skb(struct xenvif_queue *queue,
 	memset(pkt, 0, sizeof(struct xenvif_pkt_state));
 
 	pkt->skb = skb;
+	pkt->frag_iter = skb;
 	pkt->remaining_len = skb->len;
 	pkt->frag = -1;
 
@@ -293,20 +295,40 @@ static void xenvif_rx_complete(struct xenvif_queue *queue,
 	__skb_queue_tail(queue->rx_copy.completed, pkt->skb);
 }
 
+static void xenvif_rx_next_frag(struct xenvif_pkt_state *pkt)
+{
+	struct sk_buff *frag_iter = pkt->frag_iter;
+	unsigned int nr_frags = skb_shinfo(frag_iter)->nr_frags;
+
+	pkt->frag++;
+	pkt->frag_offset = 0;
+
+	if (pkt->frag >= nr_frags) {
+		if (frag_iter == pkt->skb)
+			pkt->frag_iter = skb_shinfo(frag_iter)->frag_list;
+		else
+			pkt->frag_iter = frag_iter->next;
+
+		pkt->frag = -1;
+	}
+}
+
 static void xenvif_rx_next_chunk(struct xenvif_queue *queue,
 				 struct xenvif_pkt_state *pkt,
 				 unsigned int offset, void **data,
 				 size_t *len)
 {
-	struct sk_buff *skb = pkt->skb;
+	struct sk_buff *frag_iter = pkt->frag_iter;
 	void *frag_data;
 	size_t frag_len, chunk_len;
 
+	BUG_ON(!frag_iter);
+
 	if (pkt->frag == -1) {
-		frag_data = skb->data;
-		frag_len = skb_headlen(skb);
+		frag_data = frag_iter->data;
+		frag_len = skb_headlen(frag_iter);
 	} else {
-		skb_frag_t *frag = &skb_shinfo(skb)->frags[pkt->frag];
+		skb_frag_t *frag = &skb_shinfo(frag_iter)->frags[pkt->frag];
 
 		frag_data = skb_frag_address(frag);
 		frag_len = skb_frag_size(frag);
@@ -322,10 +344,8 @@ static void xenvif_rx_next_chunk(struct xenvif_queue *queue,
 	pkt->frag_offset += chunk_len;
 
 	/* Advance to next frag? */
-	if (frag_len == chunk_len) {
-		pkt->frag++;
-		pkt->frag_offset = 0;
-	}
+	if (frag_len == chunk_len)
+		xenvif_rx_next_frag(pkt);
 
 	*data = frag_data;
 	*len = chunk_len;
-- 
2.1.4

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* [PATCH v2 net-next 7/7] xen/netback: add fraglist support for to-guest rx
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (12 preceding siblings ...)
  2016-10-04  9:29 ` [PATCH v2 net-next 7/7] xen/netback: add fraglist support for to-guest rx Paul Durrant
@ 2016-10-04  9:29 ` Paul Durrant
  2016-10-07  0:38 ` [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor David Miller
  2016-10-07  0:38 ` David Miller
  15 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04  9:29 UTC (permalink / raw)
  To: netdev, xen-devel; +Cc: Ross Lagerwall, Paul Durrant, Wei Liu

From: Ross Lagerwall <ross.lagerwall@citrix.com>

This allows full 64K skbuffs (with 1500 mtu ethernet, composed of 45
fragments) to be handled by netback for to-guest rx.

Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
[re-based]
Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
---
Cc: Wei Liu <wei.liu2@citrix.com>
---
 drivers/net/xen-netback/interface.c |  2 +-
 drivers/net/xen-netback/rx.c        | 38 ++++++++++++++++++++++++++++---------
 2 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 211d542..4af532a 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -467,7 +467,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	dev->netdev_ops	= &xenvif_netdev_ops;
 	dev->hw_features = NETIF_F_SG |
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
-		NETIF_F_TSO | NETIF_F_TSO6;
+		NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_FRAGLIST;
 	dev->features = dev->hw_features | NETIF_F_RXCSUM;
 	dev->ethtool_ops = &xenvif_ethtool_ops;
 
diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
index 8c8c5b5..8e9ade6 100644
--- a/drivers/net/xen-netback/rx.c
+++ b/drivers/net/xen-netback/rx.c
@@ -215,7 +215,8 @@ static unsigned int xenvif_gso_type(struct sk_buff *skb)
 struct xenvif_pkt_state {
 	struct sk_buff *skb;
 	size_t remaining_len;
-	int frag; /* frag == -1 => skb->head */
+	struct sk_buff *frag_iter;
+	int frag; /* frag == -1 => frag_iter->head */
 	unsigned int frag_offset;
 	struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX - 1];
 	unsigned int extra_count;
@@ -237,6 +238,7 @@ static void xenvif_rx_next_skb(struct xenvif_queue *queue,
 	memset(pkt, 0, sizeof(struct xenvif_pkt_state));
 
 	pkt->skb = skb;
+	pkt->frag_iter = skb;
 	pkt->remaining_len = skb->len;
 	pkt->frag = -1;
 
@@ -293,20 +295,40 @@ static void xenvif_rx_complete(struct xenvif_queue *queue,
 	__skb_queue_tail(queue->rx_copy.completed, pkt->skb);
 }
 
+static void xenvif_rx_next_frag(struct xenvif_pkt_state *pkt)
+{
+	struct sk_buff *frag_iter = pkt->frag_iter;
+	unsigned int nr_frags = skb_shinfo(frag_iter)->nr_frags;
+
+	pkt->frag++;
+	pkt->frag_offset = 0;
+
+	if (pkt->frag >= nr_frags) {
+		if (frag_iter == pkt->skb)
+			pkt->frag_iter = skb_shinfo(frag_iter)->frag_list;
+		else
+			pkt->frag_iter = frag_iter->next;
+
+		pkt->frag = -1;
+	}
+}
+
 static void xenvif_rx_next_chunk(struct xenvif_queue *queue,
 				 struct xenvif_pkt_state *pkt,
 				 unsigned int offset, void **data,
 				 size_t *len)
 {
-	struct sk_buff *skb = pkt->skb;
+	struct sk_buff *frag_iter = pkt->frag_iter;
 	void *frag_data;
 	size_t frag_len, chunk_len;
 
+	BUG_ON(!frag_iter);
+
 	if (pkt->frag == -1) {
-		frag_data = skb->data;
-		frag_len = skb_headlen(skb);
+		frag_data = frag_iter->data;
+		frag_len = skb_headlen(frag_iter);
 	} else {
-		skb_frag_t *frag = &skb_shinfo(skb)->frags[pkt->frag];
+		skb_frag_t *frag = &skb_shinfo(frag_iter)->frags[pkt->frag];
 
 		frag_data = skb_frag_address(frag);
 		frag_len = skb_frag_size(frag);
@@ -322,10 +344,8 @@ static void xenvif_rx_next_chunk(struct xenvif_queue *queue,
 	pkt->frag_offset += chunk_len;
 
 	/* Advance to next frag? */
-	if (frag_len == chunk_len) {
-		pkt->frag++;
-		pkt->frag_offset = 0;
-	}
+	if (frag_len == chunk_len)
+		xenvif_rx_next_frag(pkt);
 
 	*data = frag_data;
 	*len = chunk_len;
-- 
2.1.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 40+ messages in thread

* Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04  9:29 ` [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature Paul Durrant
  2016-10-04 10:14   ` David Vrabel
@ 2016-10-04 10:14   ` David Vrabel
  2016-10-04 12:52   ` Konrad Rzeszutek Wilk
  2016-10-04 12:52   ` [Xen-devel] " Konrad Rzeszutek Wilk
  3 siblings, 0 replies; 40+ messages in thread
From: David Vrabel @ 2016-10-04 10:14 UTC (permalink / raw)
  To: Paul Durrant, netdev, xen-devel; +Cc: Wei Liu

On 04/10/16 10:29, Paul Durrant wrote:
> As far as I am aware only very old Windows network frontends make use of
> this style of passing GSO packets from backend to frontend. These
> frontends can easily be replaced by the freely available Xen Project
> Windows PV network frontend, which uses the 'default' mechanism for
> passing GSO packets, which is also used by all Linux frontends.
> 
> NOTE: Removal of this feature will not cause breakage in old Windows
>       frontends. They simply will no longer receive GSO packets - the
>       packets instead being fragmented in the backend.

Reviewed-by: David Vrabel <david.vrabel@citrix.com>

David

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04  9:29 ` [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature Paul Durrant
@ 2016-10-04 10:14   ` David Vrabel
  2016-10-04 10:14   ` [Xen-devel] " David Vrabel
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 40+ messages in thread
From: David Vrabel @ 2016-10-04 10:14 UTC (permalink / raw)
  To: Paul Durrant, netdev, xen-devel; +Cc: Wei Liu

On 04/10/16 10:29, Paul Durrant wrote:
> As far as I am aware only very old Windows network frontends make use of
> this style of passing GSO packets from backend to frontend. These
> frontends can easily be replaced by the freely available Xen Project
> Windows PV network frontend, which uses the 'default' mechanism for
> passing GSO packets, which is also used by all Linux frontends.
> 
> NOTE: Removal of this feature will not cause breakage in old Windows
>       frontends. They simply will no longer receive GSO packets - the
>       packets instead being fragmented in the backend.

Reviewed-by: David Vrabel <david.vrabel@citrix.com>

David


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [Xen-devel] [PATCH v2 net-next 7/7] xen/netback: add fraglist support for to-guest rx
  2016-10-04  9:29 ` [PATCH v2 net-next 7/7] xen/netback: add fraglist support for to-guest rx Paul Durrant
  2016-10-04 10:56   ` David Vrabel
@ 2016-10-04 10:56   ` David Vrabel
  1 sibling, 0 replies; 40+ messages in thread
From: David Vrabel @ 2016-10-04 10:56 UTC (permalink / raw)
  To: Paul Durrant, netdev, xen-devel; +Cc: Ross Lagerwall, Wei Liu

On 04/10/16 10:29, Paul Durrant wrote:
> From: Ross Lagerwall <ross.lagerwall@citrix.com>
> 
> This allows full 64K skbuffs (with 1500 mtu ethernet, composed of 45
> fragments) to be handled by netback for to-guest rx.

Reviewed-by: David Vrabel <david.vrabel@citrix.com>

David

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 7/7] xen/netback: add fraglist support for to-guest rx
  2016-10-04  9:29 ` [PATCH v2 net-next 7/7] xen/netback: add fraglist support for to-guest rx Paul Durrant
@ 2016-10-04 10:56   ` David Vrabel
  2016-10-04 10:56   ` [Xen-devel] " David Vrabel
  1 sibling, 0 replies; 40+ messages in thread
From: David Vrabel @ 2016-10-04 10:56 UTC (permalink / raw)
  To: Paul Durrant, netdev, xen-devel; +Cc: Ross Lagerwall, Wei Liu

On 04/10/16 10:29, Paul Durrant wrote:
> From: Ross Lagerwall <ross.lagerwall@citrix.com>
> 
> This allows full 64K skbuffs (with 1500 mtu ethernet, composed of 45
> fragments) to be handled by netback for to-guest rx.

Reviewed-by: David Vrabel <david.vrabel@citrix.com>

David

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [Xen-devel] [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches
  2016-10-04  9:29 ` [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches Paul Durrant
  2016-10-04 12:47   ` Konrad Rzeszutek Wilk
@ 2016-10-04 12:47   ` Konrad Rzeszutek Wilk
  2016-10-04 14:02     ` Paul Durrant
                       ` (3 more replies)
  1 sibling, 4 replies; 40+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-10-04 12:47 UTC (permalink / raw)
  To: Paul Durrant; +Cc: netdev, xen-devel, Wei Liu, David Vrabel

On Tue, Oct 04, 2016 at 10:29:16AM +0100, Paul Durrant wrote:
> From: David Vrabel <david.vrabel@citrix.com>
> 
> Instead of only placing one skb on the guest rx ring at a time, process
> a batch of up-to 64.  This improves performance by ~10% in some tests.

And does it regress latency workloads?

What are those 'some tests' you speak off?

Thanks.
> 
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> [re-based]
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> ---
> Cc: Wei Liu <wei.liu2@citrix.com>
> ---
>  drivers/net/xen-netback/rx.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
> index 9548709..ae822b8 100644
> --- a/drivers/net/xen-netback/rx.c
> +++ b/drivers/net/xen-netback/rx.c
> @@ -399,7 +399,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
>  	BUG();
>  }
>  
> -void xenvif_rx_action(struct xenvif_queue *queue)
> +void xenvif_rx_skb(struct xenvif_queue *queue)
>  {
>  	struct xenvif_pkt_state pkt;
>  
> @@ -425,6 +425,19 @@ void xenvif_rx_action(struct xenvif_queue *queue)
>  	xenvif_rx_complete(queue, &pkt);
>  }
>  
> +#define RX_BATCH_SIZE 64
> +
> +void xenvif_rx_action(struct xenvif_queue *queue)
> +{
> +	unsigned int work_done = 0;
> +
> +	while (xenvif_rx_ring_slots_available(queue) &&
> +	       work_done < RX_BATCH_SIZE) {
> +		xenvif_rx_skb(queue);
> +		work_done++;
> +	}
> +}
> +
>  static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
>  {
>  	RING_IDX prod, cons;
> -- 
> 2.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches
  2016-10-04  9:29 ` [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches Paul Durrant
@ 2016-10-04 12:47   ` Konrad Rzeszutek Wilk
  2016-10-04 12:47   ` [Xen-devel] " Konrad Rzeszutek Wilk
  1 sibling, 0 replies; 40+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-10-04 12:47 UTC (permalink / raw)
  To: Paul Durrant; +Cc: netdev, Wei Liu, David Vrabel, xen-devel

On Tue, Oct 04, 2016 at 10:29:16AM +0100, Paul Durrant wrote:
> From: David Vrabel <david.vrabel@citrix.com>
> 
> Instead of only placing one skb on the guest rx ring at a time, process
> a batch of up-to 64.  This improves performance by ~10% in some tests.

And does it regress latency workloads?

What are those 'some tests' you speak off?

Thanks.
> 
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> [re-based]
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> ---
> Cc: Wei Liu <wei.liu2@citrix.com>
> ---
>  drivers/net/xen-netback/rx.c | 15 ++++++++++++++-
>  1 file changed, 14 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
> index 9548709..ae822b8 100644
> --- a/drivers/net/xen-netback/rx.c
> +++ b/drivers/net/xen-netback/rx.c
> @@ -399,7 +399,7 @@ static void xenvif_rx_extra_slot(struct xenvif_queue *queue,
>  	BUG();
>  }
>  
> -void xenvif_rx_action(struct xenvif_queue *queue)
> +void xenvif_rx_skb(struct xenvif_queue *queue)
>  {
>  	struct xenvif_pkt_state pkt;
>  
> @@ -425,6 +425,19 @@ void xenvif_rx_action(struct xenvif_queue *queue)
>  	xenvif_rx_complete(queue, &pkt);
>  }
>  
> +#define RX_BATCH_SIZE 64
> +
> +void xenvif_rx_action(struct xenvif_queue *queue)
> +{
> +	unsigned int work_done = 0;
> +
> +	while (xenvif_rx_ring_slots_available(queue) &&
> +	       work_done < RX_BATCH_SIZE) {
> +		xenvif_rx_skb(queue);
> +		work_done++;
> +	}
> +}
> +
>  static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)
>  {
>  	RING_IDX prod, cons;
> -- 
> 2.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [Xen-devel] [PATCH v2 net-next 4/7] xen-netback: immediately wake tx queue when guest rx queue has space
  2016-10-04  9:29 ` Paul Durrant
@ 2016-10-04 12:48   ` Konrad Rzeszutek Wilk
  2016-10-04 13:56     ` Paul Durrant
  2016-10-04 13:56     ` Paul Durrant
  2016-10-04 12:48   ` Konrad Rzeszutek Wilk
  1 sibling, 2 replies; 40+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-10-04 12:48 UTC (permalink / raw)
  To: Paul Durrant; +Cc: netdev, xen-devel, Wei Liu, David Vrabel

On Tue, Oct 04, 2016 at 02:29:15AM -0700, Paul Durrant wrote:
> From: David Vrabel <david.vrabel@citrix.com>
> 
> When an skb is removed from the guest rx queue, immediately wake the
> tx queue, instead of after processing them.

Please, could the description explain why?

> 
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> [re-based]
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> ---
> Cc: Wei Liu <wei.liu2@citrix.com>
> ---
>  drivers/net/xen-netback/rx.c | 24 ++++++++----------------
>  1 file changed, 8 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
> index b0ce4c6..9548709 100644
> --- a/drivers/net/xen-netback/rx.c
> +++ b/drivers/net/xen-netback/rx.c
> @@ -92,27 +92,21 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
>  	spin_lock_irq(&queue->rx_queue.lock);
>  
>  	skb = __skb_dequeue(&queue->rx_queue);
> -	if (skb)
> +	if (skb) {
>  		queue->rx_queue_len -= skb->len;
> +		if (queue->rx_queue_len < queue->rx_queue_max) {
> +			struct netdev_queue *txq;
> +
> +			txq = netdev_get_tx_queue(queue->vif->dev, queue->id);
> +			netif_tx_wake_queue(txq);
> +		}
> +	}
>  
>  	spin_unlock_irq(&queue->rx_queue.lock);
>  
>  	return skb;
>  }
>  
> -static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
> -{
> -	spin_lock_irq(&queue->rx_queue.lock);
> -
> -	if (queue->rx_queue_len < queue->rx_queue_max) {
> -		struct net_device *dev = queue->vif->dev;
> -
> -		netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
> -	}
> -
> -	spin_unlock_irq(&queue->rx_queue.lock);
> -}
> -
>  static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
>  {
>  	struct sk_buff *skb;
> @@ -585,8 +579,6 @@ int xenvif_kthread_guest_rx(void *data)
>  		 */
>  		xenvif_rx_queue_drop_expired(queue);
>  
> -		xenvif_rx_queue_maybe_wake(queue);
> -
>  		cond_resched();
>  	}
>  
> -- 
> 2.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 4/7] xen-netback: immediately wake tx queue when guest rx queue has space
  2016-10-04  9:29 ` Paul Durrant
  2016-10-04 12:48   ` [Xen-devel] " Konrad Rzeszutek Wilk
@ 2016-10-04 12:48   ` Konrad Rzeszutek Wilk
  1 sibling, 0 replies; 40+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-10-04 12:48 UTC (permalink / raw)
  To: Paul Durrant; +Cc: netdev, Wei Liu, David Vrabel, xen-devel

On Tue, Oct 04, 2016 at 02:29:15AM -0700, Paul Durrant wrote:
> From: David Vrabel <david.vrabel@citrix.com>
> 
> When an skb is removed from the guest rx queue, immediately wake the
> tx queue, instead of after processing them.

Please, could the description explain why?

> 
> Signed-off-by: David Vrabel <david.vrabel@citrix.com>
> [re-based]
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> ---
> Cc: Wei Liu <wei.liu2@citrix.com>
> ---
>  drivers/net/xen-netback/rx.c | 24 ++++++++----------------
>  1 file changed, 8 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
> index b0ce4c6..9548709 100644
> --- a/drivers/net/xen-netback/rx.c
> +++ b/drivers/net/xen-netback/rx.c
> @@ -92,27 +92,21 @@ static struct sk_buff *xenvif_rx_dequeue(struct xenvif_queue *queue)
>  	spin_lock_irq(&queue->rx_queue.lock);
>  
>  	skb = __skb_dequeue(&queue->rx_queue);
> -	if (skb)
> +	if (skb) {
>  		queue->rx_queue_len -= skb->len;
> +		if (queue->rx_queue_len < queue->rx_queue_max) {
> +			struct netdev_queue *txq;
> +
> +			txq = netdev_get_tx_queue(queue->vif->dev, queue->id);
> +			netif_tx_wake_queue(txq);
> +		}
> +	}
>  
>  	spin_unlock_irq(&queue->rx_queue.lock);
>  
>  	return skb;
>  }
>  
> -static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue)
> -{
> -	spin_lock_irq(&queue->rx_queue.lock);
> -
> -	if (queue->rx_queue_len < queue->rx_queue_max) {
> -		struct net_device *dev = queue->vif->dev;
> -
> -		netif_tx_wake_queue(netdev_get_tx_queue(dev, queue->id));
> -	}
> -
> -	spin_unlock_irq(&queue->rx_queue.lock);
> -}
> -
>  static void xenvif_rx_queue_purge(struct xenvif_queue *queue)
>  {
>  	struct sk_buff *skb;
> @@ -585,8 +579,6 @@ int xenvif_kthread_guest_rx(void *data)
>  		 */
>  		xenvif_rx_queue_drop_expired(queue);
>  
> -		xenvif_rx_queue_maybe_wake(queue);
> -
>  		cond_resched();
>  	}
>  
> -- 
> 2.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04  9:29 ` [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature Paul Durrant
                     ` (2 preceding siblings ...)
  2016-10-04 12:52   ` Konrad Rzeszutek Wilk
@ 2016-10-04 12:52   ` Konrad Rzeszutek Wilk
  2016-10-04 13:35     ` Paul Durrant
  2016-10-04 13:35     ` [Xen-devel] " Paul Durrant
  3 siblings, 2 replies; 40+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-10-04 12:52 UTC (permalink / raw)
  To: Paul Durrant, annie.li, joao.m.martins; +Cc: netdev, xen-devel, Wei Liu

On Tue, Oct 04, 2016 at 10:29:13AM +0100, Paul Durrant wrote:
> As far as I am aware only very old Windows network frontends make use of
> this style of passing GSO packets from backend to frontend. These
> frontends can easily be replaced by the freely available Xen Project
> Windows PV network frontend, which uses the 'default' mechanism for
> passing GSO packets, which is also used by all Linux frontends.

It is not that simple. Some companies have extra juice in their Windows
frontends so can't easily swap over to the Xen Project one.

Either way CC-ing Annie

Also would it make sense to CC the FreeBSD and NetBSD maintainers of
their PV drivers just to make sure? (Or has that been confirmed)

> 
> NOTE: Removal of this feature will not cause breakage in old Windows
>       frontends. They simply will no longer receive GSO packets - the
>       packets instead being fragmented in the backend.

Did you also test this with SuSE/Novell Windows PV drivers?

Thanks.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> ---
> Cc: Wei Liu <wei.liu2@citrix.com>
> ---
>  drivers/net/xen-netback/common.h    |  1 -
>  drivers/net/xen-netback/interface.c |  4 ++--
>  drivers/net/xen-netback/rx.c        | 26 --------------------------
>  drivers/net/xen-netback/xenbus.c    | 21 ---------------------
>  4 files changed, 2 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
> index b38fb2c..0ba5910 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -260,7 +260,6 @@ struct xenvif {
>  
>  	/* Frontend feature information. */
>  	int gso_mask;
> -	int gso_prefix_mask;
>  
>  	u8 can_sg:1;
>  	u8 ip_csum:1;
> diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
> index fb50c6d..211d542 100644
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -319,9 +319,9 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev,
>  
>  	if (!vif->can_sg)
>  		features &= ~NETIF_F_SG;
> -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
> +	if (~(vif->gso_mask) & GSO_BIT(TCPV4))
>  		features &= ~NETIF_F_TSO;
> -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
> +	if (~(vif->gso_mask) & GSO_BIT(TCPV6))
>  		features &= ~NETIF_F_TSO6;
>  	if (!vif->ip_csum)
>  		features &= ~NETIF_F_IP_CSUM;
> diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
> index 03836aa..6bd7d6e 100644
> --- a/drivers/net/xen-netback/rx.c
> +++ b/drivers/net/xen-netback/rx.c
> @@ -347,16 +347,6 @@ static int xenvif_gop_skb(struct sk_buff *skb,
>  			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
>  	}
>  
> -	/* Set up a GSO prefix descriptor, if necessary */
> -	if ((1 << gso_type) & vif->gso_prefix_mask) {
> -		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
> -		meta = npo->meta + npo->meta_prod++;
> -		meta->gso_type = gso_type;
> -		meta->gso_size = skb_shinfo(skb)->gso_size;
> -		meta->size = 0;
> -		meta->id = req.id;
> -	}
> -
>  	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
>  	meta = npo->meta + npo->meta_prod++;
>  
> @@ -511,22 +501,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
>  	while ((skb = __skb_dequeue(&rxq)) != NULL) {
>  		struct xen_netif_extra_info *extra = NULL;
>  
> -		if ((1 << queue->meta[npo.meta_cons].gso_type) &
> -		    vif->gso_prefix_mask) {
> -			resp = RING_GET_RESPONSE(&queue->rx,
> -						 queue->rx.rsp_prod_pvt++);
> -
> -			resp->flags = XEN_NETRXF_gso_prefix |
> -				      XEN_NETRXF_more_data;
> -
> -			resp->offset = queue->meta[npo.meta_cons].gso_size;
> -			resp->id = queue->meta[npo.meta_cons].id;
> -			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
> -
> -			npo.meta_cons++;
> -			XENVIF_RX_CB(skb)->meta_slots_used--;
> -		}
> -
>  		queue->stats.tx_bytes += skb->len;
>  		queue->stats.tx_packets++;
>  
> diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
> index daf4c78..7056404 100644
> --- a/drivers/net/xen-netback/xenbus.c
> +++ b/drivers/net/xen-netback/xenbus.c
> @@ -1135,7 +1135,6 @@ static int read_xenbus_vif_flags(struct backend_info *be)
>  	vif->can_sg = !!val;
>  
>  	vif->gso_mask = 0;
> -	vif->gso_prefix_mask = 0;
>  
>  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
>  			 "%d", &val) < 0)
> @@ -1143,32 +1142,12 @@ static int read_xenbus_vif_flags(struct backend_info *be)
>  	if (val)
>  		vif->gso_mask |= GSO_BIT(TCPV4);
>  
> -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
> -			 "%d", &val) < 0)
> -		val = 0;
> -	if (val)
> -		vif->gso_prefix_mask |= GSO_BIT(TCPV4);
> -
>  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
>  			 "%d", &val) < 0)
>  		val = 0;
>  	if (val)
>  		vif->gso_mask |= GSO_BIT(TCPV6);
>  
> -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix",
> -			 "%d", &val) < 0)
> -		val = 0;
> -	if (val)
> -		vif->gso_prefix_mask |= GSO_BIT(TCPV6);
> -
> -	if (vif->gso_mask & vif->gso_prefix_mask) {
> -		xenbus_dev_fatal(dev, err,
> -				 "%s: gso and gso prefix flags are not "
> -				 "mutually exclusive",
> -				 dev->otherend);
> -		return -EOPNOTSUPP;
> -	}
> -
>  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
>  			 "%d", &val) < 0)
>  		val = 0;
> -- 
> 2.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04  9:29 ` [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature Paul Durrant
  2016-10-04 10:14   ` David Vrabel
  2016-10-04 10:14   ` [Xen-devel] " David Vrabel
@ 2016-10-04 12:52   ` Konrad Rzeszutek Wilk
  2016-10-04 12:52   ` [Xen-devel] " Konrad Rzeszutek Wilk
  3 siblings, 0 replies; 40+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-10-04 12:52 UTC (permalink / raw)
  To: Paul Durrant, annie.li, joao.m.martins; +Cc: netdev, Wei Liu, xen-devel

On Tue, Oct 04, 2016 at 10:29:13AM +0100, Paul Durrant wrote:
> As far as I am aware only very old Windows network frontends make use of
> this style of passing GSO packets from backend to frontend. These
> frontends can easily be replaced by the freely available Xen Project
> Windows PV network frontend, which uses the 'default' mechanism for
> passing GSO packets, which is also used by all Linux frontends.

It is not that simple. Some companies have extra juice in their Windows
frontends so can't easily swap over to the Xen Project one.

Either way CC-ing Annie

Also would it make sense to CC the FreeBSD and NetBSD maintainers of
their PV drivers just to make sure? (Or has that been confirmed)

> 
> NOTE: Removal of this feature will not cause breakage in old Windows
>       frontends. They simply will no longer receive GSO packets - the
>       packets instead being fragmented in the backend.

Did you also test this with SuSE/Novell Windows PV drivers?

Thanks.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> ---
> Cc: Wei Liu <wei.liu2@citrix.com>
> ---
>  drivers/net/xen-netback/common.h    |  1 -
>  drivers/net/xen-netback/interface.c |  4 ++--
>  drivers/net/xen-netback/rx.c        | 26 --------------------------
>  drivers/net/xen-netback/xenbus.c    | 21 ---------------------
>  4 files changed, 2 insertions(+), 50 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
> index b38fb2c..0ba5910 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -260,7 +260,6 @@ struct xenvif {
>  
>  	/* Frontend feature information. */
>  	int gso_mask;
> -	int gso_prefix_mask;
>  
>  	u8 can_sg:1;
>  	u8 ip_csum:1;
> diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
> index fb50c6d..211d542 100644
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -319,9 +319,9 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev,
>  
>  	if (!vif->can_sg)
>  		features &= ~NETIF_F_SG;
> -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
> +	if (~(vif->gso_mask) & GSO_BIT(TCPV4))
>  		features &= ~NETIF_F_TSO;
> -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
> +	if (~(vif->gso_mask) & GSO_BIT(TCPV6))
>  		features &= ~NETIF_F_TSO6;
>  	if (!vif->ip_csum)
>  		features &= ~NETIF_F_IP_CSUM;
> diff --git a/drivers/net/xen-netback/rx.c b/drivers/net/xen-netback/rx.c
> index 03836aa..6bd7d6e 100644
> --- a/drivers/net/xen-netback/rx.c
> +++ b/drivers/net/xen-netback/rx.c
> @@ -347,16 +347,6 @@ static int xenvif_gop_skb(struct sk_buff *skb,
>  			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
>  	}
>  
> -	/* Set up a GSO prefix descriptor, if necessary */
> -	if ((1 << gso_type) & vif->gso_prefix_mask) {
> -		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
> -		meta = npo->meta + npo->meta_prod++;
> -		meta->gso_type = gso_type;
> -		meta->gso_size = skb_shinfo(skb)->gso_size;
> -		meta->size = 0;
> -		meta->id = req.id;
> -	}
> -
>  	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
>  	meta = npo->meta + npo->meta_prod++;
>  
> @@ -511,22 +501,6 @@ static void xenvif_rx_action(struct xenvif_queue *queue)
>  	while ((skb = __skb_dequeue(&rxq)) != NULL) {
>  		struct xen_netif_extra_info *extra = NULL;
>  
> -		if ((1 << queue->meta[npo.meta_cons].gso_type) &
> -		    vif->gso_prefix_mask) {
> -			resp = RING_GET_RESPONSE(&queue->rx,
> -						 queue->rx.rsp_prod_pvt++);
> -
> -			resp->flags = XEN_NETRXF_gso_prefix |
> -				      XEN_NETRXF_more_data;
> -
> -			resp->offset = queue->meta[npo.meta_cons].gso_size;
> -			resp->id = queue->meta[npo.meta_cons].id;
> -			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
> -
> -			npo.meta_cons++;
> -			XENVIF_RX_CB(skb)->meta_slots_used--;
> -		}
> -
>  		queue->stats.tx_bytes += skb->len;
>  		queue->stats.tx_packets++;
>  
> diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
> index daf4c78..7056404 100644
> --- a/drivers/net/xen-netback/xenbus.c
> +++ b/drivers/net/xen-netback/xenbus.c
> @@ -1135,7 +1135,6 @@ static int read_xenbus_vif_flags(struct backend_info *be)
>  	vif->can_sg = !!val;
>  
>  	vif->gso_mask = 0;
> -	vif->gso_prefix_mask = 0;
>  
>  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
>  			 "%d", &val) < 0)
> @@ -1143,32 +1142,12 @@ static int read_xenbus_vif_flags(struct backend_info *be)
>  	if (val)
>  		vif->gso_mask |= GSO_BIT(TCPV4);
>  
> -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-prefix",
> -			 "%d", &val) < 0)
> -		val = 0;
> -	if (val)
> -		vif->gso_prefix_mask |= GSO_BIT(TCPV4);
> -
>  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
>  			 "%d", &val) < 0)
>  		val = 0;
>  	if (val)
>  		vif->gso_mask |= GSO_BIT(TCPV6);
>  
> -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-prefix",
> -			 "%d", &val) < 0)
> -		val = 0;
> -	if (val)
> -		vif->gso_prefix_mask |= GSO_BIT(TCPV6);
> -
> -	if (vif->gso_mask & vif->gso_prefix_mask) {
> -		xenbus_dev_fatal(dev, err,
> -				 "%s: gso and gso prefix flags are not "
> -				 "mutually exclusive",
> -				 dev->otherend);
> -		return -EOPNOTSUPP;
> -	}
> -
>  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
>  			 "%d", &val) < 0)
>  		val = 0;
> -- 
> 2.1.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> https://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04 12:52   ` [Xen-devel] " Konrad Rzeszutek Wilk
  2016-10-04 13:35     ` Paul Durrant
@ 2016-10-04 13:35     ` Paul Durrant
  2016-10-04 14:24       ` Konrad Rzeszutek Wilk
  2016-10-04 14:24       ` [Xen-devel] " Konrad Rzeszutek Wilk
  1 sibling, 2 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04 13:35 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, annie.li, joao.m.martins
  Cc: netdev, xen-devel, Wei Liu

> -----Original Message-----
> From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> Sent: 04 October 2016 13:52
> To: Paul Durrant <Paul.Durrant@citrix.com>; annie.li@oracle.com;
> joao.m.martins@oracle.com
> Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> <wei.liu2@citrix.com>
> Subject: Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest
> rx side prefix GSO feature
> 
> On Tue, Oct 04, 2016 at 10:29:13AM +0100, Paul Durrant wrote:
> > As far as I am aware only very old Windows network frontends make use
> > of this style of passing GSO packets from backend to frontend. These
> > frontends can easily be replaced by the freely available Xen Project
> > Windows PV network frontend, which uses the 'default' mechanism for
> > passing GSO packets, which is also used by all Linux frontends.
> 
> It is not that simple. Some companies have extra juice in their Windows
> frontends so can't easily swap over to the Xen Project one.

Ok, then those frontends will continue to work, but they won't get GSO packets any more. Prefix GSO has never been specified in the canonical netif header and so has been in a limbo state forever so such frontends have always been on borrowed time and only just happened to work against a linux backend. If someone wants to actually specify prefix GSO properly then it could be added back in, but it should not be necessary now that the RX side req<->rsp identity relation is documented (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/io/netif.h;hb=HEAD#l729).

> 
> Either way CC-ing Annie
> 
> Also would it make sense to CC the FreeBSD and NetBSD maintainers of their
> PV drivers just to make sure? (Or has that been confirmed)
> 

I could do that, but I'd hope that they would be subscribed to xen-devel and will chime in if there's likely to be a problem.

> >
> > NOTE: Removal of this feature will not cause breakage in old Windows
> >       frontends. They simply will no longer receive GSO packets - the
> >       packets instead being fragmented in the backend.
> 
> Did you also test this with SuSE/Novell Windows PV drivers?
> 

No, I don't have copies of these. Internal XenServer testing has not shown up any issues with 'legacy' PV drivers though (which do still have the prefix GSO code in).

  Paul

> Thanks.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > ---
> > Cc: Wei Liu <wei.liu2@citrix.com>
> > ---
> >  drivers/net/xen-netback/common.h    |  1 -
> >  drivers/net/xen-netback/interface.c |  4 ++--
> >  drivers/net/xen-netback/rx.c        | 26 --------------------------
> >  drivers/net/xen-netback/xenbus.c    | 21 ---------------------
> >  4 files changed, 2 insertions(+), 50 deletions(-)
> >
> > diff --git a/drivers/net/xen-netback/common.h
> > b/drivers/net/xen-netback/common.h
> > index b38fb2c..0ba5910 100644
> > --- a/drivers/net/xen-netback/common.h
> > +++ b/drivers/net/xen-netback/common.h
> > @@ -260,7 +260,6 @@ struct xenvif {
> >
> >  	/* Frontend feature information. */
> >  	int gso_mask;
> > -	int gso_prefix_mask;
> >
> >  	u8 can_sg:1;
> >  	u8 ip_csum:1;
> > diff --git a/drivers/net/xen-netback/interface.c
> > b/drivers/net/xen-netback/interface.c
> > index fb50c6d..211d542 100644
> > --- a/drivers/net/xen-netback/interface.c
> > +++ b/drivers/net/xen-netback/interface.c
> > @@ -319,9 +319,9 @@ static netdev_features_t
> > xenvif_fix_features(struct net_device *dev,
> >
> >  	if (!vif->can_sg)
> >  		features &= ~NETIF_F_SG;
> > -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
> > +	if (~(vif->gso_mask) & GSO_BIT(TCPV4))
> >  		features &= ~NETIF_F_TSO;
> > -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
> > +	if (~(vif->gso_mask) & GSO_BIT(TCPV6))
> >  		features &= ~NETIF_F_TSO6;
> >  	if (!vif->ip_csum)
> >  		features &= ~NETIF_F_IP_CSUM;
> > diff --git a/drivers/net/xen-netback/rx.c
> > b/drivers/net/xen-netback/rx.c index 03836aa..6bd7d6e 100644
> > --- a/drivers/net/xen-netback/rx.c
> > +++ b/drivers/net/xen-netback/rx.c
> > @@ -347,16 +347,6 @@ static int xenvif_gop_skb(struct sk_buff *skb,
> >  			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
> >  	}
> >
> > -	/* Set up a GSO prefix descriptor, if necessary */
> > -	if ((1 << gso_type) & vif->gso_prefix_mask) {
> > -		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++,
> &req);
> > -		meta = npo->meta + npo->meta_prod++;
> > -		meta->gso_type = gso_type;
> > -		meta->gso_size = skb_shinfo(skb)->gso_size;
> > -		meta->size = 0;
> > -		meta->id = req.id;
> > -	}
> > -
> >  	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
> >  	meta = npo->meta + npo->meta_prod++;
> >
> > @@ -511,22 +501,6 @@ static void xenvif_rx_action(struct xenvif_queue
> *queue)
> >  	while ((skb = __skb_dequeue(&rxq)) != NULL) {
> >  		struct xen_netif_extra_info *extra = NULL;
> >
> > -		if ((1 << queue->meta[npo.meta_cons].gso_type) &
> > -		    vif->gso_prefix_mask) {
> > -			resp = RING_GET_RESPONSE(&queue->rx,
> > -						 queue->rx.rsp_prod_pvt++);
> > -
> > -			resp->flags = XEN_NETRXF_gso_prefix |
> > -				      XEN_NETRXF_more_data;
> > -
> > -			resp->offset = queue-
> >meta[npo.meta_cons].gso_size;
> > -			resp->id = queue->meta[npo.meta_cons].id;
> > -			resp->status = XENVIF_RX_CB(skb)-
> >meta_slots_used;
> > -
> > -			npo.meta_cons++;
> > -			XENVIF_RX_CB(skb)->meta_slots_used--;
> > -		}
> > -
> >  		queue->stats.tx_bytes += skb->len;
> >  		queue->stats.tx_packets++;
> >
> > diff --git a/drivers/net/xen-netback/xenbus.c
> > b/drivers/net/xen-netback/xenbus.c
> > index daf4c78..7056404 100644
> > --- a/drivers/net/xen-netback/xenbus.c
> > +++ b/drivers/net/xen-netback/xenbus.c
> > @@ -1135,7 +1135,6 @@ static int read_xenbus_vif_flags(struct
> backend_info *be)
> >  	vif->can_sg = !!val;
> >
> >  	vif->gso_mask = 0;
> > -	vif->gso_prefix_mask = 0;
> >
> >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
> >  			 "%d", &val) < 0)
> > @@ -1143,32 +1142,12 @@ static int read_xenbus_vif_flags(struct
> backend_info *be)
> >  	if (val)
> >  		vif->gso_mask |= GSO_BIT(TCPV4);
> >
> > -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-
> prefix",
> > -			 "%d", &val) < 0)
> > -		val = 0;
> > -	if (val)
> > -		vif->gso_prefix_mask |= GSO_BIT(TCPV4);
> > -
> >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
> >  			 "%d", &val) < 0)
> >  		val = 0;
> >  	if (val)
> >  		vif->gso_mask |= GSO_BIT(TCPV6);
> >
> > -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-
> prefix",
> > -			 "%d", &val) < 0)
> > -		val = 0;
> > -	if (val)
> > -		vif->gso_prefix_mask |= GSO_BIT(TCPV6);
> > -
> > -	if (vif->gso_mask & vif->gso_prefix_mask) {
> > -		xenbus_dev_fatal(dev, err,
> > -				 "%s: gso and gso prefix flags are not "
> > -				 "mutually exclusive",
> > -				 dev->otherend);
> > -		return -EOPNOTSUPP;
> > -	}
> > -
> >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-
> offload",
> >  			 "%d", &val) < 0)
> >  		val = 0;
> > --
> > 2.1.4
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04 12:52   ` [Xen-devel] " Konrad Rzeszutek Wilk
@ 2016-10-04 13:35     ` Paul Durrant
  2016-10-04 13:35     ` [Xen-devel] " Paul Durrant
  1 sibling, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04 13:35 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, annie.li, joao.m.martins
  Cc: netdev, Wei Liu, xen-devel

> -----Original Message-----
> From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> Sent: 04 October 2016 13:52
> To: Paul Durrant <Paul.Durrant@citrix.com>; annie.li@oracle.com;
> joao.m.martins@oracle.com
> Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> <wei.liu2@citrix.com>
> Subject: Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest
> rx side prefix GSO feature
> 
> On Tue, Oct 04, 2016 at 10:29:13AM +0100, Paul Durrant wrote:
> > As far as I am aware only very old Windows network frontends make use
> > of this style of passing GSO packets from backend to frontend. These
> > frontends can easily be replaced by the freely available Xen Project
> > Windows PV network frontend, which uses the 'default' mechanism for
> > passing GSO packets, which is also used by all Linux frontends.
> 
> It is not that simple. Some companies have extra juice in their Windows
> frontends so can't easily swap over to the Xen Project one.

Ok, then those frontends will continue to work, but they won't get GSO packets any more. Prefix GSO has never been specified in the canonical netif header and so has been in a limbo state forever so such frontends have always been on borrowed time and only just happened to work against a linux backend. If someone wants to actually specify prefix GSO properly then it could be added back in, but it should not be necessary now that the RX side req<->rsp identity relation is documented (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/io/netif.h;hb=HEAD#l729).

> 
> Either way CC-ing Annie
> 
> Also would it make sense to CC the FreeBSD and NetBSD maintainers of their
> PV drivers just to make sure? (Or has that been confirmed)
> 

I could do that, but I'd hope that they would be subscribed to xen-devel and will chime in if there's likely to be a problem.

> >
> > NOTE: Removal of this feature will not cause breakage in old Windows
> >       frontends. They simply will no longer receive GSO packets - the
> >       packets instead being fragmented in the backend.
> 
> Did you also test this with SuSE/Novell Windows PV drivers?
> 

No, I don't have copies of these. Internal XenServer testing has not shown up any issues with 'legacy' PV drivers though (which do still have the prefix GSO code in).

  Paul

> Thanks.
> >
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > ---
> > Cc: Wei Liu <wei.liu2@citrix.com>
> > ---
> >  drivers/net/xen-netback/common.h    |  1 -
> >  drivers/net/xen-netback/interface.c |  4 ++--
> >  drivers/net/xen-netback/rx.c        | 26 --------------------------
> >  drivers/net/xen-netback/xenbus.c    | 21 ---------------------
> >  4 files changed, 2 insertions(+), 50 deletions(-)
> >
> > diff --git a/drivers/net/xen-netback/common.h
> > b/drivers/net/xen-netback/common.h
> > index b38fb2c..0ba5910 100644
> > --- a/drivers/net/xen-netback/common.h
> > +++ b/drivers/net/xen-netback/common.h
> > @@ -260,7 +260,6 @@ struct xenvif {
> >
> >  	/* Frontend feature information. */
> >  	int gso_mask;
> > -	int gso_prefix_mask;
> >
> >  	u8 can_sg:1;
> >  	u8 ip_csum:1;
> > diff --git a/drivers/net/xen-netback/interface.c
> > b/drivers/net/xen-netback/interface.c
> > index fb50c6d..211d542 100644
> > --- a/drivers/net/xen-netback/interface.c
> > +++ b/drivers/net/xen-netback/interface.c
> > @@ -319,9 +319,9 @@ static netdev_features_t
> > xenvif_fix_features(struct net_device *dev,
> >
> >  	if (!vif->can_sg)
> >  		features &= ~NETIF_F_SG;
> > -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
> > +	if (~(vif->gso_mask) & GSO_BIT(TCPV4))
> >  		features &= ~NETIF_F_TSO;
> > -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
> > +	if (~(vif->gso_mask) & GSO_BIT(TCPV6))
> >  		features &= ~NETIF_F_TSO6;
> >  	if (!vif->ip_csum)
> >  		features &= ~NETIF_F_IP_CSUM;
> > diff --git a/drivers/net/xen-netback/rx.c
> > b/drivers/net/xen-netback/rx.c index 03836aa..6bd7d6e 100644
> > --- a/drivers/net/xen-netback/rx.c
> > +++ b/drivers/net/xen-netback/rx.c
> > @@ -347,16 +347,6 @@ static int xenvif_gop_skb(struct sk_buff *skb,
> >  			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
> >  	}
> >
> > -	/* Set up a GSO prefix descriptor, if necessary */
> > -	if ((1 << gso_type) & vif->gso_prefix_mask) {
> > -		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++,
> &req);
> > -		meta = npo->meta + npo->meta_prod++;
> > -		meta->gso_type = gso_type;
> > -		meta->gso_size = skb_shinfo(skb)->gso_size;
> > -		meta->size = 0;
> > -		meta->id = req.id;
> > -	}
> > -
> >  	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
> >  	meta = npo->meta + npo->meta_prod++;
> >
> > @@ -511,22 +501,6 @@ static void xenvif_rx_action(struct xenvif_queue
> *queue)
> >  	while ((skb = __skb_dequeue(&rxq)) != NULL) {
> >  		struct xen_netif_extra_info *extra = NULL;
> >
> > -		if ((1 << queue->meta[npo.meta_cons].gso_type) &
> > -		    vif->gso_prefix_mask) {
> > -			resp = RING_GET_RESPONSE(&queue->rx,
> > -						 queue->rx.rsp_prod_pvt++);
> > -
> > -			resp->flags = XEN_NETRXF_gso_prefix |
> > -				      XEN_NETRXF_more_data;
> > -
> > -			resp->offset = queue-
> >meta[npo.meta_cons].gso_size;
> > -			resp->id = queue->meta[npo.meta_cons].id;
> > -			resp->status = XENVIF_RX_CB(skb)-
> >meta_slots_used;
> > -
> > -			npo.meta_cons++;
> > -			XENVIF_RX_CB(skb)->meta_slots_used--;
> > -		}
> > -
> >  		queue->stats.tx_bytes += skb->len;
> >  		queue->stats.tx_packets++;
> >
> > diff --git a/drivers/net/xen-netback/xenbus.c
> > b/drivers/net/xen-netback/xenbus.c
> > index daf4c78..7056404 100644
> > --- a/drivers/net/xen-netback/xenbus.c
> > +++ b/drivers/net/xen-netback/xenbus.c
> > @@ -1135,7 +1135,6 @@ static int read_xenbus_vif_flags(struct
> backend_info *be)
> >  	vif->can_sg = !!val;
> >
> >  	vif->gso_mask = 0;
> > -	vif->gso_prefix_mask = 0;
> >
> >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
> >  			 "%d", &val) < 0)
> > @@ -1143,32 +1142,12 @@ static int read_xenbus_vif_flags(struct
> backend_info *be)
> >  	if (val)
> >  		vif->gso_mask |= GSO_BIT(TCPV4);
> >
> > -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-
> prefix",
> > -			 "%d", &val) < 0)
> > -		val = 0;
> > -	if (val)
> > -		vif->gso_prefix_mask |= GSO_BIT(TCPV4);
> > -
> >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
> >  			 "%d", &val) < 0)
> >  		val = 0;
> >  	if (val)
> >  		vif->gso_mask |= GSO_BIT(TCPV6);
> >
> > -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-
> prefix",
> > -			 "%d", &val) < 0)
> > -		val = 0;
> > -	if (val)
> > -		vif->gso_prefix_mask |= GSO_BIT(TCPV6);
> > -
> > -	if (vif->gso_mask & vif->gso_prefix_mask) {
> > -		xenbus_dev_fatal(dev, err,
> > -				 "%s: gso and gso prefix flags are not "
> > -				 "mutually exclusive",
> > -				 dev->otherend);
> > -		return -EOPNOTSUPP;
> > -	}
> > -
> >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-
> offload",
> >  			 "%d", &val) < 0)
> >  		val = 0;
> > --
> > 2.1.4
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > https://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [Xen-devel] [PATCH v2 net-next 4/7] xen-netback: immediately wake tx queue when guest rx queue has space
  2016-10-04 12:48   ` [Xen-devel] " Konrad Rzeszutek Wilk
@ 2016-10-04 13:56     ` Paul Durrant
  2016-10-04 13:56     ` Paul Durrant
  1 sibling, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04 13:56 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: netdev, xen-devel, Wei Liu, David Vrabel

> -----Original Message-----
> From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> Sent: 04 October 2016 13:49
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> <wei.liu2@citrix.com>; David Vrabel <david.vrabel@citrix.com>
> Subject: Re: [Xen-devel] [PATCH v2 net-next 4/7] xen-netback: immediately
> wake tx queue when guest rx queue has space
> 
> On Tue, Oct 04, 2016 at 02:29:15AM -0700, Paul Durrant wrote:
> > From: David Vrabel <david.vrabel@citrix.com>
> >
> > When an skb is removed from the guest rx queue, immediately wake the
> > tx queue, instead of after processing them.
> 
> Please, could the description explain why?
> 

Is it not reasonably obvious that it improves parallelism between filling and draining the queue? I could add a comment if you think it needs spelling out.

  Paul

> >
> > Signed-off-by: David Vrabel <david.vrabel@citrix.com> [re-based]
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > ---
> > Cc: Wei Liu <wei.liu2@citrix.com>
> > ---
> >  drivers/net/xen-netback/rx.c | 24 ++++++++----------------
> >  1 file changed, 8 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/net/xen-netback/rx.c
> > b/drivers/net/xen-netback/rx.c index b0ce4c6..9548709 100644
> > --- a/drivers/net/xen-netback/rx.c
> > +++ b/drivers/net/xen-netback/rx.c
> > @@ -92,27 +92,21 @@ static struct sk_buff *xenvif_rx_dequeue(struct
> xenvif_queue *queue)
> >  	spin_lock_irq(&queue->rx_queue.lock);
> >
> >  	skb = __skb_dequeue(&queue->rx_queue);
> > -	if (skb)
> > +	if (skb) {
> >  		queue->rx_queue_len -= skb->len;
> > +		if (queue->rx_queue_len < queue->rx_queue_max) {
> > +			struct netdev_queue *txq;
> > +
> > +			txq = netdev_get_tx_queue(queue->vif->dev,
> queue->id);
> > +			netif_tx_wake_queue(txq);
> > +		}
> > +	}
> >
> >  	spin_unlock_irq(&queue->rx_queue.lock);
> >
> >  	return skb;
> >  }
> >
> > -static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue) -
> {
> > -	spin_lock_irq(&queue->rx_queue.lock);
> > -
> > -	if (queue->rx_queue_len < queue->rx_queue_max) {
> > -		struct net_device *dev = queue->vif->dev;
> > -
> > -		netif_tx_wake_queue(netdev_get_tx_queue(dev, queue-
> >id));
> > -	}
> > -
> > -	spin_unlock_irq(&queue->rx_queue.lock);
> > -}
> > -
> >  static void xenvif_rx_queue_purge(struct xenvif_queue *queue)  {
> >  	struct sk_buff *skb;
> > @@ -585,8 +579,6 @@ int xenvif_kthread_guest_rx(void *data)
> >  		 */
> >  		xenvif_rx_queue_drop_expired(queue);
> >
> > -		xenvif_rx_queue_maybe_wake(queue);
> > -
> >  		cond_resched();
> >  	}
> >
> > --
> > 2.1.4
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 4/7] xen-netback: immediately wake tx queue when guest rx queue has space
  2016-10-04 12:48   ` [Xen-devel] " Konrad Rzeszutek Wilk
  2016-10-04 13:56     ` Paul Durrant
@ 2016-10-04 13:56     ` Paul Durrant
  1 sibling, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04 13:56 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: netdev, Wei Liu, David Vrabel, xen-devel

> -----Original Message-----
> From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> Sent: 04 October 2016 13:49
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> <wei.liu2@citrix.com>; David Vrabel <david.vrabel@citrix.com>
> Subject: Re: [Xen-devel] [PATCH v2 net-next 4/7] xen-netback: immediately
> wake tx queue when guest rx queue has space
> 
> On Tue, Oct 04, 2016 at 02:29:15AM -0700, Paul Durrant wrote:
> > From: David Vrabel <david.vrabel@citrix.com>
> >
> > When an skb is removed from the guest rx queue, immediately wake the
> > tx queue, instead of after processing them.
> 
> Please, could the description explain why?
> 

Is it not reasonably obvious that it improves parallelism between filling and draining the queue? I could add a comment if you think it needs spelling out.

  Paul

> >
> > Signed-off-by: David Vrabel <david.vrabel@citrix.com> [re-based]
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > ---
> > Cc: Wei Liu <wei.liu2@citrix.com>
> > ---
> >  drivers/net/xen-netback/rx.c | 24 ++++++++----------------
> >  1 file changed, 8 insertions(+), 16 deletions(-)
> >
> > diff --git a/drivers/net/xen-netback/rx.c
> > b/drivers/net/xen-netback/rx.c index b0ce4c6..9548709 100644
> > --- a/drivers/net/xen-netback/rx.c
> > +++ b/drivers/net/xen-netback/rx.c
> > @@ -92,27 +92,21 @@ static struct sk_buff *xenvif_rx_dequeue(struct
> xenvif_queue *queue)
> >  	spin_lock_irq(&queue->rx_queue.lock);
> >
> >  	skb = __skb_dequeue(&queue->rx_queue);
> > -	if (skb)
> > +	if (skb) {
> >  		queue->rx_queue_len -= skb->len;
> > +		if (queue->rx_queue_len < queue->rx_queue_max) {
> > +			struct netdev_queue *txq;
> > +
> > +			txq = netdev_get_tx_queue(queue->vif->dev,
> queue->id);
> > +			netif_tx_wake_queue(txq);
> > +		}
> > +	}
> >
> >  	spin_unlock_irq(&queue->rx_queue.lock);
> >
> >  	return skb;
> >  }
> >
> > -static void xenvif_rx_queue_maybe_wake(struct xenvif_queue *queue) -
> {
> > -	spin_lock_irq(&queue->rx_queue.lock);
> > -
> > -	if (queue->rx_queue_len < queue->rx_queue_max) {
> > -		struct net_device *dev = queue->vif->dev;
> > -
> > -		netif_tx_wake_queue(netdev_get_tx_queue(dev, queue-
> >id));
> > -	}
> > -
> > -	spin_unlock_irq(&queue->rx_queue.lock);
> > -}
> > -
> >  static void xenvif_rx_queue_purge(struct xenvif_queue *queue)  {
> >  	struct sk_buff *skb;
> > @@ -585,8 +579,6 @@ int xenvif_kthread_guest_rx(void *data)
> >  		 */
> >  		xenvif_rx_queue_drop_expired(queue);
> >
> > -		xenvif_rx_queue_maybe_wake(queue);
> > -
> >  		cond_resched();
> >  	}
> >
> > --
> > 2.1.4
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > https://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* RE: [Xen-devel] [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches
  2016-10-04 12:47   ` [Xen-devel] " Konrad Rzeszutek Wilk
  2016-10-04 14:02     ` Paul Durrant
@ 2016-10-04 14:02     ` Paul Durrant
  2016-10-04 14:51     ` David Vrabel
  2016-10-04 14:51     ` [Xen-devel] " David Vrabel
  3 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04 14:02 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: netdev, xen-devel, Wei Liu, David Vrabel

> -----Original Message-----
> From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> Sent: 04 October 2016 13:48
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> <wei.liu2@citrix.com>; David Vrabel <david.vrabel@citrix.com>
> Subject: Re: [Xen-devel] [PATCH v2 net-next 5/7] xen-netback: process
> guest rx packets in batches
> 
> On Tue, Oct 04, 2016 at 10:29:16AM +0100, Paul Durrant wrote:
> > From: David Vrabel <david.vrabel@citrix.com>
> >
> > Instead of only placing one skb on the guest rx ring at a time,
> > process a batch of up-to 64.  This improves performance by ~10% in some
> tests.

I believe the tests are mainly throughput tests, but David would know the specifics.

> 
> And does it regress latency workloads?
> 

It shouldn't, although I have not run ping-pong tests to verify. If packets are only placed on the vif queue singly though then the batching should have no effect, since rx_action will complete and do the push as before.

  Paul

> What are those 'some tests' you speak off?
> 
> Thanks.
> >
> > Signed-off-by: David Vrabel <david.vrabel@citrix.com> [re-based]
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > ---
> > Cc: Wei Liu <wei.liu2@citrix.com>
> > ---
> >  drivers/net/xen-netback/rx.c | 15 ++++++++++++++-
> >  1 file changed, 14 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/xen-netback/rx.c
> > b/drivers/net/xen-netback/rx.c index 9548709..ae822b8 100644
> > --- a/drivers/net/xen-netback/rx.c
> > +++ b/drivers/net/xen-netback/rx.c
> > @@ -399,7 +399,7 @@ static void xenvif_rx_extra_slot(struct
> xenvif_queue *queue,
> >  	BUG();
> >  }
> >
> > -void xenvif_rx_action(struct xenvif_queue *queue)
> > +void xenvif_rx_skb(struct xenvif_queue *queue)
> >  {
> >  	struct xenvif_pkt_state pkt;
> >
> > @@ -425,6 +425,19 @@ void xenvif_rx_action(struct xenvif_queue
> *queue)
> >  	xenvif_rx_complete(queue, &pkt);
> >  }
> >
> > +#define RX_BATCH_SIZE 64
> > +
> > +void xenvif_rx_action(struct xenvif_queue *queue) {
> > +	unsigned int work_done = 0;
> > +
> > +	while (xenvif_rx_ring_slots_available(queue) &&
> > +	       work_done < RX_BATCH_SIZE) {
> > +		xenvif_rx_skb(queue);
> > +		work_done++;
> > +	}
> > +}
> > +
> >  static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)  {
> >  	RING_IDX prod, cons;
> > --
> > 2.1.4
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches
  2016-10-04 12:47   ` [Xen-devel] " Konrad Rzeszutek Wilk
@ 2016-10-04 14:02     ` Paul Durrant
  2016-10-04 14:02     ` [Xen-devel] " Paul Durrant
                       ` (2 subsequent siblings)
  3 siblings, 0 replies; 40+ messages in thread
From: Paul Durrant @ 2016-10-04 14:02 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk; +Cc: netdev, Wei Liu, David Vrabel, xen-devel

> -----Original Message-----
> From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> Sent: 04 October 2016 13:48
> To: Paul Durrant <Paul.Durrant@citrix.com>
> Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> <wei.liu2@citrix.com>; David Vrabel <david.vrabel@citrix.com>
> Subject: Re: [Xen-devel] [PATCH v2 net-next 5/7] xen-netback: process
> guest rx packets in batches
> 
> On Tue, Oct 04, 2016 at 10:29:16AM +0100, Paul Durrant wrote:
> > From: David Vrabel <david.vrabel@citrix.com>
> >
> > Instead of only placing one skb on the guest rx ring at a time,
> > process a batch of up-to 64.  This improves performance by ~10% in some
> tests.

I believe the tests are mainly throughput tests, but David would know the specifics.

> 
> And does it regress latency workloads?
> 

It shouldn't, although I have not run ping-pong tests to verify. If packets are only placed on the vif queue singly though then the batching should have no effect, since rx_action will complete and do the push as before.

  Paul

> What are those 'some tests' you speak off?
> 
> Thanks.
> >
> > Signed-off-by: David Vrabel <david.vrabel@citrix.com> [re-based]
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > ---
> > Cc: Wei Liu <wei.liu2@citrix.com>
> > ---
> >  drivers/net/xen-netback/rx.c | 15 ++++++++++++++-
> >  1 file changed, 14 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/net/xen-netback/rx.c
> > b/drivers/net/xen-netback/rx.c index 9548709..ae822b8 100644
> > --- a/drivers/net/xen-netback/rx.c
> > +++ b/drivers/net/xen-netback/rx.c
> > @@ -399,7 +399,7 @@ static void xenvif_rx_extra_slot(struct
> xenvif_queue *queue,
> >  	BUG();
> >  }
> >
> > -void xenvif_rx_action(struct xenvif_queue *queue)
> > +void xenvif_rx_skb(struct xenvif_queue *queue)
> >  {
> >  	struct xenvif_pkt_state pkt;
> >
> > @@ -425,6 +425,19 @@ void xenvif_rx_action(struct xenvif_queue
> *queue)
> >  	xenvif_rx_complete(queue, &pkt);
> >  }
> >
> > +#define RX_BATCH_SIZE 64
> > +
> > +void xenvif_rx_action(struct xenvif_queue *queue) {
> > +	unsigned int work_done = 0;
> > +
> > +	while (xenvif_rx_ring_slots_available(queue) &&
> > +	       work_done < RX_BATCH_SIZE) {
> > +		xenvif_rx_skb(queue);
> > +		work_done++;
> > +	}
> > +}
> > +
> >  static bool xenvif_rx_queue_stalled(struct xenvif_queue *queue)  {
> >  	RING_IDX prod, cons;
> > --
> > 2.1.4
> >
> >
> > _______________________________________________
> > Xen-devel mailing list
> > Xen-devel@lists.xen.org
> > https://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04 13:35     ` [Xen-devel] " Paul Durrant
  2016-10-04 14:24       ` Konrad Rzeszutek Wilk
@ 2016-10-04 14:24       ` Konrad Rzeszutek Wilk
  2016-10-05 15:30         ` Roger Pau Monné
  2016-10-05 15:30         ` [Xen-devel] " Roger Pau Monné
  1 sibling, 2 replies; 40+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-10-04 14:24 UTC (permalink / raw)
  To: Paul Durrant, bouyer, roger.pau
  Cc: annie.li, joao.m.martins, netdev, xen-devel, Wei Liu

On Tue, Oct 04, 2016 at 01:35:41PM +0000, Paul Durrant wrote:
> > -----Original Message-----
> > From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> > Sent: 04 October 2016 13:52
> > To: Paul Durrant <Paul.Durrant@citrix.com>; annie.li@oracle.com;
> > joao.m.martins@oracle.com
> > Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> > <wei.liu2@citrix.com>
> > Subject: Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest
> > rx side prefix GSO feature
> > 
> > On Tue, Oct 04, 2016 at 10:29:13AM +0100, Paul Durrant wrote:
> > > As far as I am aware only very old Windows network frontends make use
> > > of this style of passing GSO packets from backend to frontend. These
> > > frontends can easily be replaced by the freely available Xen Project
> > > Windows PV network frontend, which uses the 'default' mechanism for
> > > passing GSO packets, which is also used by all Linux frontends.
> > 
> > It is not that simple. Some companies have extra juice in their Windows
> > frontends so can't easily swap over to the Xen Project one.
> 
> Ok, then those frontends will continue to work, but they won't get GSO packets any more. Prefix GSO has never been specified in the canonical netif header and so has been in a limbo state forever so such frontends have always been on borrowed time and only just happened to work against a linux backend. If someone wants to actually specify prefix GSO properly then it could be added back in, but it should not be necessary now that the RX side req<->rsp identity relation is documented (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/io/netif.h;hb=HEAD#l729).
> 
> > 
> > Either way CC-ing Annie
> > 
> > Also would it make sense to CC the FreeBSD and NetBSD maintainers of their
> > PV drivers just to make sure? (Or has that been confirmed)
> > 
> 
> I could do that, but I'd hope that they would be subscribed to xen-devel and will chime in if there's likely to be a problem.

Usually one CCs those folks. I think you are asking me to do
the legwork and find them and CC them here?

CC-ing Roger and  Manuel Bouyer.

> 
> > >
> > > NOTE: Removal of this feature will not cause breakage in old Windows
> > >       frontends. They simply will no longer receive GSO packets - the
> > >       packets instead being fragmented in the backend.
> > 
> > Did you also test this with SuSE/Novell Windows PV drivers?
> > 
> 
> No, I don't have copies of these. Internal XenServer testing has not shown up any issues with 'legacy' PV drivers though (which do still have the prefix GSO code in).

You can download these drivers and install on your guests.
> 
>   Paul
> 
> > Thanks.
> > >
> > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > > ---
> > > Cc: Wei Liu <wei.liu2@citrix.com>
> > > ---
> > >  drivers/net/xen-netback/common.h    |  1 -
> > >  drivers/net/xen-netback/interface.c |  4 ++--
> > >  drivers/net/xen-netback/rx.c        | 26 --------------------------
> > >  drivers/net/xen-netback/xenbus.c    | 21 ---------------------
> > >  4 files changed, 2 insertions(+), 50 deletions(-)
> > >
> > > diff --git a/drivers/net/xen-netback/common.h
> > > b/drivers/net/xen-netback/common.h
> > > index b38fb2c..0ba5910 100644
> > > --- a/drivers/net/xen-netback/common.h
> > > +++ b/drivers/net/xen-netback/common.h
> > > @@ -260,7 +260,6 @@ struct xenvif {
> > >
> > >  	/* Frontend feature information. */
> > >  	int gso_mask;
> > > -	int gso_prefix_mask;
> > >
> > >  	u8 can_sg:1;
> > >  	u8 ip_csum:1;
> > > diff --git a/drivers/net/xen-netback/interface.c
> > > b/drivers/net/xen-netback/interface.c
> > > index fb50c6d..211d542 100644
> > > --- a/drivers/net/xen-netback/interface.c
> > > +++ b/drivers/net/xen-netback/interface.c
> > > @@ -319,9 +319,9 @@ static netdev_features_t
> > > xenvif_fix_features(struct net_device *dev,
> > >
> > >  	if (!vif->can_sg)
> > >  		features &= ~NETIF_F_SG;
> > > -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
> > > +	if (~(vif->gso_mask) & GSO_BIT(TCPV4))
> > >  		features &= ~NETIF_F_TSO;
> > > -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
> > > +	if (~(vif->gso_mask) & GSO_BIT(TCPV6))
> > >  		features &= ~NETIF_F_TSO6;
> > >  	if (!vif->ip_csum)
> > >  		features &= ~NETIF_F_IP_CSUM;
> > > diff --git a/drivers/net/xen-netback/rx.c
> > > b/drivers/net/xen-netback/rx.c index 03836aa..6bd7d6e 100644
> > > --- a/drivers/net/xen-netback/rx.c
> > > +++ b/drivers/net/xen-netback/rx.c
> > > @@ -347,16 +347,6 @@ static int xenvif_gop_skb(struct sk_buff *skb,
> > >  			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
> > >  	}
> > >
> > > -	/* Set up a GSO prefix descriptor, if necessary */
> > > -	if ((1 << gso_type) & vif->gso_prefix_mask) {
> > > -		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++,
> > &req);
> > > -		meta = npo->meta + npo->meta_prod++;
> > > -		meta->gso_type = gso_type;
> > > -		meta->gso_size = skb_shinfo(skb)->gso_size;
> > > -		meta->size = 0;
> > > -		meta->id = req.id;
> > > -	}
> > > -
> > >  	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
> > >  	meta = npo->meta + npo->meta_prod++;
> > >
> > > @@ -511,22 +501,6 @@ static void xenvif_rx_action(struct xenvif_queue
> > *queue)
> > >  	while ((skb = __skb_dequeue(&rxq)) != NULL) {
> > >  		struct xen_netif_extra_info *extra = NULL;
> > >
> > > -		if ((1 << queue->meta[npo.meta_cons].gso_type) &
> > > -		    vif->gso_prefix_mask) {
> > > -			resp = RING_GET_RESPONSE(&queue->rx,
> > > -						 queue->rx.rsp_prod_pvt++);
> > > -
> > > -			resp->flags = XEN_NETRXF_gso_prefix |
> > > -				      XEN_NETRXF_more_data;
> > > -
> > > -			resp->offset = queue-
> > >meta[npo.meta_cons].gso_size;
> > > -			resp->id = queue->meta[npo.meta_cons].id;
> > > -			resp->status = XENVIF_RX_CB(skb)-
> > >meta_slots_used;
> > > -
> > > -			npo.meta_cons++;
> > > -			XENVIF_RX_CB(skb)->meta_slots_used--;
> > > -		}
> > > -
> > >  		queue->stats.tx_bytes += skb->len;
> > >  		queue->stats.tx_packets++;
> > >
> > > diff --git a/drivers/net/xen-netback/xenbus.c
> > > b/drivers/net/xen-netback/xenbus.c
> > > index daf4c78..7056404 100644
> > > --- a/drivers/net/xen-netback/xenbus.c
> > > +++ b/drivers/net/xen-netback/xenbus.c
> > > @@ -1135,7 +1135,6 @@ static int read_xenbus_vif_flags(struct
> > backend_info *be)
> > >  	vif->can_sg = !!val;
> > >
> > >  	vif->gso_mask = 0;
> > > -	vif->gso_prefix_mask = 0;
> > >
> > >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
> > >  			 "%d", &val) < 0)
> > > @@ -1143,32 +1142,12 @@ static int read_xenbus_vif_flags(struct
> > backend_info *be)
> > >  	if (val)
> > >  		vif->gso_mask |= GSO_BIT(TCPV4);
> > >
> > > -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-
> > prefix",
> > > -			 "%d", &val) < 0)
> > > -		val = 0;
> > > -	if (val)
> > > -		vif->gso_prefix_mask |= GSO_BIT(TCPV4);
> > > -
> > >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
> > >  			 "%d", &val) < 0)
> > >  		val = 0;
> > >  	if (val)
> > >  		vif->gso_mask |= GSO_BIT(TCPV6);
> > >
> > > -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-
> > prefix",
> > > -			 "%d", &val) < 0)
> > > -		val = 0;
> > > -	if (val)
> > > -		vif->gso_prefix_mask |= GSO_BIT(TCPV6);
> > > -
> > > -	if (vif->gso_mask & vif->gso_prefix_mask) {
> > > -		xenbus_dev_fatal(dev, err,
> > > -				 "%s: gso and gso prefix flags are not "
> > > -				 "mutually exclusive",
> > > -				 dev->otherend);
> > > -		return -EOPNOTSUPP;
> > > -	}
> > > -
> > >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-
> > offload",
> > >  			 "%d", &val) < 0)
> > >  		val = 0;
> > > --
> > > 2.1.4
> > >
> > >
> > > _______________________________________________
> > > Xen-devel mailing list
> > > Xen-devel@lists.xen.org
> > > https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04 13:35     ` [Xen-devel] " Paul Durrant
@ 2016-10-04 14:24       ` Konrad Rzeszutek Wilk
  2016-10-04 14:24       ` [Xen-devel] " Konrad Rzeszutek Wilk
  1 sibling, 0 replies; 40+ messages in thread
From: Konrad Rzeszutek Wilk @ 2016-10-04 14:24 UTC (permalink / raw)
  To: Paul Durrant, bouyer, roger.pau
  Cc: netdev, annie.li, Wei Liu, joao.m.martins, xen-devel

On Tue, Oct 04, 2016 at 01:35:41PM +0000, Paul Durrant wrote:
> > -----Original Message-----
> > From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> > Sent: 04 October 2016 13:52
> > To: Paul Durrant <Paul.Durrant@citrix.com>; annie.li@oracle.com;
> > joao.m.martins@oracle.com
> > Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> > <wei.liu2@citrix.com>
> > Subject: Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest
> > rx side prefix GSO feature
> > 
> > On Tue, Oct 04, 2016 at 10:29:13AM +0100, Paul Durrant wrote:
> > > As far as I am aware only very old Windows network frontends make use
> > > of this style of passing GSO packets from backend to frontend. These
> > > frontends can easily be replaced by the freely available Xen Project
> > > Windows PV network frontend, which uses the 'default' mechanism for
> > > passing GSO packets, which is also used by all Linux frontends.
> > 
> > It is not that simple. Some companies have extra juice in their Windows
> > frontends so can't easily swap over to the Xen Project one.
> 
> Ok, then those frontends will continue to work, but they won't get GSO packets any more. Prefix GSO has never been specified in the canonical netif header and so has been in a limbo state forever so such frontends have always been on borrowed time and only just happened to work against a linux backend. If someone wants to actually specify prefix GSO properly then it could be added back in, but it should not be necessary now that the RX side req<->rsp identity relation is documented (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/io/netif.h;hb=HEAD#l729).
> 
> > 
> > Either way CC-ing Annie
> > 
> > Also would it make sense to CC the FreeBSD and NetBSD maintainers of their
> > PV drivers just to make sure? (Or has that been confirmed)
> > 
> 
> I could do that, but I'd hope that they would be subscribed to xen-devel and will chime in if there's likely to be a problem.

Usually one CCs those folks. I think you are asking me to do
the legwork and find them and CC them here?

CC-ing Roger and  Manuel Bouyer.

> 
> > >
> > > NOTE: Removal of this feature will not cause breakage in old Windows
> > >       frontends. They simply will no longer receive GSO packets - the
> > >       packets instead being fragmented in the backend.
> > 
> > Did you also test this with SuSE/Novell Windows PV drivers?
> > 
> 
> No, I don't have copies of these. Internal XenServer testing has not shown up any issues with 'legacy' PV drivers though (which do still have the prefix GSO code in).

You can download these drivers and install on your guests.
> 
>   Paul
> 
> > Thanks.
> > >
> > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > > ---
> > > Cc: Wei Liu <wei.liu2@citrix.com>
> > > ---
> > >  drivers/net/xen-netback/common.h    |  1 -
> > >  drivers/net/xen-netback/interface.c |  4 ++--
> > >  drivers/net/xen-netback/rx.c        | 26 --------------------------
> > >  drivers/net/xen-netback/xenbus.c    | 21 ---------------------
> > >  4 files changed, 2 insertions(+), 50 deletions(-)
> > >
> > > diff --git a/drivers/net/xen-netback/common.h
> > > b/drivers/net/xen-netback/common.h
> > > index b38fb2c..0ba5910 100644
> > > --- a/drivers/net/xen-netback/common.h
> > > +++ b/drivers/net/xen-netback/common.h
> > > @@ -260,7 +260,6 @@ struct xenvif {
> > >
> > >  	/* Frontend feature information. */
> > >  	int gso_mask;
> > > -	int gso_prefix_mask;
> > >
> > >  	u8 can_sg:1;
> > >  	u8 ip_csum:1;
> > > diff --git a/drivers/net/xen-netback/interface.c
> > > b/drivers/net/xen-netback/interface.c
> > > index fb50c6d..211d542 100644
> > > --- a/drivers/net/xen-netback/interface.c
> > > +++ b/drivers/net/xen-netback/interface.c
> > > @@ -319,9 +319,9 @@ static netdev_features_t
> > > xenvif_fix_features(struct net_device *dev,
> > >
> > >  	if (!vif->can_sg)
> > >  		features &= ~NETIF_F_SG;
> > > -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))
> > > +	if (~(vif->gso_mask) & GSO_BIT(TCPV4))
> > >  		features &= ~NETIF_F_TSO;
> > > -	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV6))
> > > +	if (~(vif->gso_mask) & GSO_BIT(TCPV6))
> > >  		features &= ~NETIF_F_TSO6;
> > >  	if (!vif->ip_csum)
> > >  		features &= ~NETIF_F_IP_CSUM;
> > > diff --git a/drivers/net/xen-netback/rx.c
> > > b/drivers/net/xen-netback/rx.c index 03836aa..6bd7d6e 100644
> > > --- a/drivers/net/xen-netback/rx.c
> > > +++ b/drivers/net/xen-netback/rx.c
> > > @@ -347,16 +347,6 @@ static int xenvif_gop_skb(struct sk_buff *skb,
> > >  			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
> > >  	}
> > >
> > > -	/* Set up a GSO prefix descriptor, if necessary */
> > > -	if ((1 << gso_type) & vif->gso_prefix_mask) {
> > > -		RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++,
> > &req);
> > > -		meta = npo->meta + npo->meta_prod++;
> > > -		meta->gso_type = gso_type;
> > > -		meta->gso_size = skb_shinfo(skb)->gso_size;
> > > -		meta->size = 0;
> > > -		meta->id = req.id;
> > > -	}
> > > -
> > >  	RING_COPY_REQUEST(&queue->rx, queue->rx.req_cons++, &req);
> > >  	meta = npo->meta + npo->meta_prod++;
> > >
> > > @@ -511,22 +501,6 @@ static void xenvif_rx_action(struct xenvif_queue
> > *queue)
> > >  	while ((skb = __skb_dequeue(&rxq)) != NULL) {
> > >  		struct xen_netif_extra_info *extra = NULL;
> > >
> > > -		if ((1 << queue->meta[npo.meta_cons].gso_type) &
> > > -		    vif->gso_prefix_mask) {
> > > -			resp = RING_GET_RESPONSE(&queue->rx,
> > > -						 queue->rx.rsp_prod_pvt++);
> > > -
> > > -			resp->flags = XEN_NETRXF_gso_prefix |
> > > -				      XEN_NETRXF_more_data;
> > > -
> > > -			resp->offset = queue-
> > >meta[npo.meta_cons].gso_size;
> > > -			resp->id = queue->meta[npo.meta_cons].id;
> > > -			resp->status = XENVIF_RX_CB(skb)-
> > >meta_slots_used;
> > > -
> > > -			npo.meta_cons++;
> > > -			XENVIF_RX_CB(skb)->meta_slots_used--;
> > > -		}
> > > -
> > >  		queue->stats.tx_bytes += skb->len;
> > >  		queue->stats.tx_packets++;
> > >
> > > diff --git a/drivers/net/xen-netback/xenbus.c
> > > b/drivers/net/xen-netback/xenbus.c
> > > index daf4c78..7056404 100644
> > > --- a/drivers/net/xen-netback/xenbus.c
> > > +++ b/drivers/net/xen-netback/xenbus.c
> > > @@ -1135,7 +1135,6 @@ static int read_xenbus_vif_flags(struct
> > backend_info *be)
> > >  	vif->can_sg = !!val;
> > >
> > >  	vif->gso_mask = 0;
> > > -	vif->gso_prefix_mask = 0;
> > >
> > >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4",
> > >  			 "%d", &val) < 0)
> > > @@ -1143,32 +1142,12 @@ static int read_xenbus_vif_flags(struct
> > backend_info *be)
> > >  	if (val)
> > >  		vif->gso_mask |= GSO_BIT(TCPV4);
> > >
> > > -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv4-
> > prefix",
> > > -			 "%d", &val) < 0)
> > > -		val = 0;
> > > -	if (val)
> > > -		vif->gso_prefix_mask |= GSO_BIT(TCPV4);
> > > -
> > >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6",
> > >  			 "%d", &val) < 0)
> > >  		val = 0;
> > >  	if (val)
> > >  		vif->gso_mask |= GSO_BIT(TCPV6);
> > >
> > > -	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-gso-tcpv6-
> > prefix",
> > > -			 "%d", &val) < 0)
> > > -		val = 0;
> > > -	if (val)
> > > -		vif->gso_prefix_mask |= GSO_BIT(TCPV6);
> > > -
> > > -	if (vif->gso_mask & vif->gso_prefix_mask) {
> > > -		xenbus_dev_fatal(dev, err,
> > > -				 "%s: gso and gso prefix flags are not "
> > > -				 "mutually exclusive",
> > > -				 dev->otherend);
> > > -		return -EOPNOTSUPP;
> > > -	}
> > > -
> > >  	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-
> > offload",
> > >  			 "%d", &val) < 0)
> > >  		val = 0;
> > > --
> > > 2.1.4
> > >
> > >
> > > _______________________________________________
> > > Xen-devel mailing list
> > > Xen-devel@lists.xen.org
> > > https://lists.xen.org/xen-devel

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [Xen-devel] [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches
  2016-10-04 12:47   ` [Xen-devel] " Konrad Rzeszutek Wilk
                       ` (2 preceding siblings ...)
  2016-10-04 14:51     ` David Vrabel
@ 2016-10-04 14:51     ` David Vrabel
  3 siblings, 0 replies; 40+ messages in thread
From: David Vrabel @ 2016-10-04 14:51 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, Paul Durrant
  Cc: netdev, Wei Liu, David Vrabel, xen-devel

On 04/10/16 13:47, Konrad Rzeszutek Wilk wrote:
> On Tue, Oct 04, 2016 at 10:29:16AM +0100, Paul Durrant wrote:
>> From: David Vrabel <david.vrabel@citrix.com>
>>
>> Instead of only placing one skb on the guest rx ring at a time, process
>> a batch of up-to 64.  This improves performance by ~10% in some tests.
> 
> And does it regress latency workloads?

No. Because the loop outside of these batches is only checking for a
fatal error condition or a disconnection.

> What are those 'some tests' you speak off?

I think it was aggregate intrahost, but I don't remember exactly.

David

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches
  2016-10-04 12:47   ` [Xen-devel] " Konrad Rzeszutek Wilk
  2016-10-04 14:02     ` Paul Durrant
  2016-10-04 14:02     ` [Xen-devel] " Paul Durrant
@ 2016-10-04 14:51     ` David Vrabel
  2016-10-04 14:51     ` [Xen-devel] " David Vrabel
  3 siblings, 0 replies; 40+ messages in thread
From: David Vrabel @ 2016-10-04 14:51 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk, Paul Durrant
  Cc: netdev, Wei Liu, David Vrabel, xen-devel

On 04/10/16 13:47, Konrad Rzeszutek Wilk wrote:
> On Tue, Oct 04, 2016 at 10:29:16AM +0100, Paul Durrant wrote:
>> From: David Vrabel <david.vrabel@citrix.com>
>>
>> Instead of only placing one skb on the guest rx ring at a time, process
>> a batch of up-to 64.  This improves performance by ~10% in some tests.
> 
> And does it regress latency workloads?

No. Because the loop outside of these batches is only checking for a
fatal error condition or a disconnection.

> What are those 'some tests' you speak off?

I think it was aggregate intrahost, but I don't remember exactly.

David

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04 14:24       ` [Xen-devel] " Konrad Rzeszutek Wilk
  2016-10-05 15:30         ` Roger Pau Monné
@ 2016-10-05 15:30         ` Roger Pau Monné
  2016-10-05 15:40           ` Manuel Bouyer
  1 sibling, 1 reply; 40+ messages in thread
From: Roger Pau Monné @ 2016-10-05 15:30 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Paul Durrant, bouyer, annie.li, joao.m.martins, netdev,
	xen-devel, Wei Liu

On Tue, Oct 04, 2016 at 10:24:04AM -0400, Konrad Rzeszutek Wilk wrote:
> On Tue, Oct 04, 2016 at 01:35:41PM +0000, Paul Durrant wrote:
> > > -----Original Message-----
> > > From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> > > Sent: 04 October 2016 13:52
> > > To: Paul Durrant <Paul.Durrant@citrix.com>; annie.li@oracle.com;
> > > joao.m.martins@oracle.com
> > > Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> > > <wei.liu2@citrix.com>
> > > Subject: Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest
> > > rx side prefix GSO feature
> > > 
> > > On Tue, Oct 04, 2016 at 10:29:13AM +0100, Paul Durrant wrote:
> > > > As far as I am aware only very old Windows network frontends make use
> > > > of this style of passing GSO packets from backend to frontend. These
> > > > frontends can easily be replaced by the freely available Xen Project
> > > > Windows PV network frontend, which uses the 'default' mechanism for
> > > > passing GSO packets, which is also used by all Linux frontends.
> > > 
> > > It is not that simple. Some companies have extra juice in their Windows
> > > frontends so can't easily swap over to the Xen Project one.
> > 
> > Ok, then those frontends will continue to work, but they won't get GSO packets any more. Prefix GSO has never been specified in the canonical netif header and so has been in a limbo state forever so such frontends have always been on borrowed time and only just happened to work against a linux backend. If someone wants to actually specify prefix GSO properly then it could be added back in, but it should not be necessary now that the RX side req<->rsp identity relation is documented (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/io/netif.h;hb=HEAD#l729).
> > 
> > > 
> > > Either way CC-ing Annie
> > > 
> > > Also would it make sense to CC the FreeBSD and NetBSD maintainers of their
> > > PV drivers just to make sure? (Or has that been confirmed)
> > > 
> > 
> > I could do that, but I'd hope that they would be subscribed to xen-devel and will chime in if there's likely to be a problem.
> 
> Usually one CCs those folks. I think you are asking me to do
> the legwork and find them and CC them here?
> 
> CC-ing Roger and  Manuel Bouyer.

Thanks. FreeBSD is using the same method as current Linux in order to both 
send and receive GSO packets. That is using an extra slot in the ring, 
filled with a netif_extra_info of type XEN_NETIF_EXTRA_TYPE_GSO. Full code 
can be found here [0], but AFAICT FreeBSD is not using this prefix stuff.

Also, IIRC NetBSD doesn't have a Xen GSO implementation [1], but I would let 
Manuel answer that one.

Roger.

[0] http://fxr.watson.org/fxr/source/dev/xen/netfront/netfront.c
[1] https://github.com/jsonn/src/blob/trunk/sys/arch/xen/xen/if_xennet_xenbus.c

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-04 14:24       ` [Xen-devel] " Konrad Rzeszutek Wilk
@ 2016-10-05 15:30         ` Roger Pau Monné
  2016-10-05 15:30         ` [Xen-devel] " Roger Pau Monné
  1 sibling, 0 replies; 40+ messages in thread
From: Roger Pau Monné @ 2016-10-05 15:30 UTC (permalink / raw)
  To: Konrad Rzeszutek Wilk
  Cc: Wei Liu, bouyer, netdev, annie.li, Paul Durrant, xen-devel,
	joao.m.martins

On Tue, Oct 04, 2016 at 10:24:04AM -0400, Konrad Rzeszutek Wilk wrote:
> On Tue, Oct 04, 2016 at 01:35:41PM +0000, Paul Durrant wrote:
> > > -----Original Message-----
> > > From: Konrad Rzeszutek Wilk [mailto:konrad.wilk@oracle.com]
> > > Sent: 04 October 2016 13:52
> > > To: Paul Durrant <Paul.Durrant@citrix.com>; annie.li@oracle.com;
> > > joao.m.martins@oracle.com
> > > Cc: netdev@vger.kernel.org; xen-devel@lists.xenproject.org; Wei Liu
> > > <wei.liu2@citrix.com>
> > > Subject: Re: [Xen-devel] [PATCH v2 net-next 2/7] xen-netback: retire guest
> > > rx side prefix GSO feature
> > > 
> > > On Tue, Oct 04, 2016 at 10:29:13AM +0100, Paul Durrant wrote:
> > > > As far as I am aware only very old Windows network frontends make use
> > > > of this style of passing GSO packets from backend to frontend. These
> > > > frontends can easily be replaced by the freely available Xen Project
> > > > Windows PV network frontend, which uses the 'default' mechanism for
> > > > passing GSO packets, which is also used by all Linux frontends.
> > > 
> > > It is not that simple. Some companies have extra juice in their Windows
> > > frontends so can't easily swap over to the Xen Project one.
> > 
> > Ok, then those frontends will continue to work, but they won't get GSO packets any more. Prefix GSO has never been specified in the canonical netif header and so has been in a limbo state forever so such frontends have always been on borrowed time and only just happened to work against a linux backend. If someone wants to actually specify prefix GSO properly then it could be added back in, but it should not be necessary now that the RX side req<->rsp identity relation is documented (http://xenbits.xen.org/gitweb/?p=xen.git;a=blob;f=xen/include/public/io/netif.h;hb=HEAD#l729).
> > 
> > > 
> > > Either way CC-ing Annie
> > > 
> > > Also would it make sense to CC the FreeBSD and NetBSD maintainers of their
> > > PV drivers just to make sure? (Or has that been confirmed)
> > > 
> > 
> > I could do that, but I'd hope that they would be subscribed to xen-devel and will chime in if there's likely to be a problem.
> 
> Usually one CCs those folks. I think you are asking me to do
> the legwork and find them and CC them here?
> 
> CC-ing Roger and  Manuel Bouyer.

Thanks. FreeBSD is using the same method as current Linux in order to both 
send and receive GSO packets. That is using an extra slot in the ring, 
filled with a netif_extra_info of type XEN_NETIF_EXTRA_TYPE_GSO. Full code 
can be found here [0], but AFAICT FreeBSD is not using this prefix stuff.

Also, IIRC NetBSD doesn't have a Xen GSO implementation [1], but I would let 
Manuel answer that one.

Roger.

[0] http://fxr.watson.org/fxr/source/dev/xen/netfront/netfront.c
[1] https://github.com/jsonn/src/blob/trunk/sys/arch/xen/xen/if_xennet_xenbus.c

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature
  2016-10-05 15:30         ` [Xen-devel] " Roger Pau Monné
@ 2016-10-05 15:40           ` Manuel Bouyer
  0 siblings, 0 replies; 40+ messages in thread
From: Manuel Bouyer @ 2016-10-05 15:40 UTC (permalink / raw)
  To: Roger Pau Monné
  Cc: Wei Liu, netdev, annie.li, Paul Durrant, xen-devel, joao.m.martins

On Wed, Oct 05, 2016 at 05:30:26PM +0200, Roger Pau Monné wrote:
> [...]
> Also, IIRC NetBSD doesn't have a Xen GSO implementation [1], but I would let 
> Manuel answer that one.

I confirm, we don't support GSO at this time.

-- 
Manuel Bouyer <bouyer@antioche.eu.org>
     NetBSD: 26 ans d'experience feront toujours la difference
--

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (14 preceding siblings ...)
  2016-10-07  0:38 ` [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor David Miller
@ 2016-10-07  0:38 ` David Miller
  15 siblings, 0 replies; 40+ messages in thread
From: David Miller @ 2016-10-07  0:38 UTC (permalink / raw)
  To: paul.durrant; +Cc: netdev, xen-devel

From: Paul Durrant <paul.durrant@citrix.com>
Date: Tue, 4 Oct 2016 10:29:11 +0100

> This series refactors the guest rx side of xen-netback:
> 
> - The code is moved into its own source module.
> 
> - The prefix variant of GSO handling is retired (since it is no longer
>   in common use, and alternatives exist).
> 
> - The code is then simplified and modifications made to improve
>   performance.
> 
> v2:
> - Rebased onto refreshed net-next

Series applied, thanks.

^ permalink raw reply	[flat|nested] 40+ messages in thread

* Re: [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor
  2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
                   ` (13 preceding siblings ...)
  2016-10-04  9:29 ` Paul Durrant
@ 2016-10-07  0:38 ` David Miller
  2016-10-07  0:38 ` David Miller
  15 siblings, 0 replies; 40+ messages in thread
From: David Miller @ 2016-10-07  0:38 UTC (permalink / raw)
  To: paul.durrant; +Cc: netdev, xen-devel

From: Paul Durrant <paul.durrant@citrix.com>
Date: Tue, 4 Oct 2016 10:29:11 +0100

> This series refactors the guest rx side of xen-netback:
> 
> - The code is moved into its own source module.
> 
> - The prefix variant of GSO handling is retired (since it is no longer
>   in common use, and alternatives exist).
> 
> - The code is then simplified and modifications made to improve
>   performance.
> 
> v2:
> - Rebased onto refreshed net-next

Series applied, thanks.

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
https://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 40+ messages in thread

end of thread, other threads:[~2016-10-07  0:38 UTC | newest]

Thread overview: 40+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-04  9:29 [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor Paul Durrant
2016-10-04  9:29 ` [PATCH v2 net-next 1/7] xen-netback: separate guest side rx code into separate module Paul Durrant
2016-10-04  9:29 ` Paul Durrant
2016-10-04  9:29 ` [PATCH v2 net-next 2/7] xen-netback: retire guest rx side prefix GSO feature Paul Durrant
2016-10-04 10:14   ` David Vrabel
2016-10-04 10:14   ` [Xen-devel] " David Vrabel
2016-10-04 12:52   ` Konrad Rzeszutek Wilk
2016-10-04 12:52   ` [Xen-devel] " Konrad Rzeszutek Wilk
2016-10-04 13:35     ` Paul Durrant
2016-10-04 13:35     ` [Xen-devel] " Paul Durrant
2016-10-04 14:24       ` Konrad Rzeszutek Wilk
2016-10-04 14:24       ` [Xen-devel] " Konrad Rzeszutek Wilk
2016-10-05 15:30         ` Roger Pau Monné
2016-10-05 15:30         ` [Xen-devel] " Roger Pau Monné
2016-10-05 15:40           ` Manuel Bouyer
2016-10-04  9:29 ` Paul Durrant
2016-10-04  9:29 ` [PATCH v2 net-next 3/7] xen-netback: refactor guest rx Paul Durrant
2016-10-04  9:29 ` Paul Durrant
2016-10-04  9:29 ` [PATCH v2 net-next 4/7] xen-netback: immediately wake tx queue when guest rx queue has space Paul Durrant
2016-10-04  9:29 ` Paul Durrant
2016-10-04 12:48   ` [Xen-devel] " Konrad Rzeszutek Wilk
2016-10-04 13:56     ` Paul Durrant
2016-10-04 13:56     ` Paul Durrant
2016-10-04 12:48   ` Konrad Rzeszutek Wilk
2016-10-04  9:29 ` [PATCH v2 net-next 5/7] xen-netback: process guest rx packets in batches Paul Durrant
2016-10-04 12:47   ` Konrad Rzeszutek Wilk
2016-10-04 12:47   ` [Xen-devel] " Konrad Rzeszutek Wilk
2016-10-04 14:02     ` Paul Durrant
2016-10-04 14:02     ` [Xen-devel] " Paul Durrant
2016-10-04 14:51     ` David Vrabel
2016-10-04 14:51     ` [Xen-devel] " David Vrabel
2016-10-04  9:29 ` Paul Durrant
2016-10-04  9:29 ` [PATCH v2 net-next 6/7] xen-netback: batch copies for multiple to-guest rx packets Paul Durrant
2016-10-04  9:29 ` Paul Durrant
2016-10-04  9:29 ` [PATCH v2 net-next 7/7] xen/netback: add fraglist support for to-guest rx Paul Durrant
2016-10-04 10:56   ` David Vrabel
2016-10-04 10:56   ` [Xen-devel] " David Vrabel
2016-10-04  9:29 ` Paul Durrant
2016-10-07  0:38 ` [PATCH v2 net-next 0/7] xen-netback: guest rx side refactor David Miller
2016-10-07  0:38 ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.