All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] virtio: simplify virtio_ring.
@ 2014-09-03  4:29 Rusty Russell
  2014-09-03  4:29 ` [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*() Rusty Russell
                   ` (6 more replies)
  0 siblings, 7 replies; 20+ messages in thread
From: Rusty Russell @ 2014-09-03  4:29 UTC (permalink / raw)
  To: netdev; +Cc: Andy Lutomirski, Michael S. Tsirkin, virtualization, Rusty Russell

I resurrected these patches after prompting from Andy Lutomirski's
recent patches.  I put them on the back-burner because vring_bench
had a 15% slowdown on my laptop: pktgen testing revealed a speedup,
if anything, so I've cleaned them up.

Rusty Russell (3):
  virtio_net: pass well-formed sgs to virtqueue_add_*()
  virtio_ring: assume sgs are always well-formed.
  virtio_ring: unify direct/indirect code paths.

 drivers/net/virtio_net.c     |   5 +-
 drivers/virtio/virtio_ring.c | 185 +++++++++++++++----------------------------
 2 files changed, 69 insertions(+), 121 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*()
  2014-09-03  4:29 [PATCH 0/3] virtio: simplify virtio_ring Rusty Russell
  2014-09-03  4:29 ` [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*() Rusty Russell
@ 2014-09-03  4:29 ` Rusty Russell
  2014-09-03  4:54   ` Andy Lutomirski
                     ` (3 more replies)
  2014-09-03  4:29 ` [PATCH 2/3] virtio_ring: assume sgs are always well-formed Rusty Russell
                   ` (4 subsequent siblings)
  6 siblings, 4 replies; 20+ messages in thread
From: Rusty Russell @ 2014-09-03  4:29 UTC (permalink / raw)
  To: netdev; +Cc: Andy Lutomirski, Michael S. Tsirkin, virtualization, Rusty Russell

This is the only driver which doesn't hand virtqueue_add_inbuf and
virtqueue_add_outbuf a well-formed, well-terminated sg.  Fix it,
so we can make virtio_add_* simpler.

pktgen results:
	modprobe pktgen
	echo 'add_device eth0' > /proc/net/pktgen/kpktgend_0
	echo nowait 1 > /proc/net/pktgen/eth0
	echo count 1000000 > /proc/net/pktgen/eth0
	echo clone_skb 100000 > /proc/net/pktgen/eth0
	echo dst_mac 4e:14:25:a9:30:ac > /proc/net/pktgen/eth0
	echo dst 192.168.1.2 > /proc/net/pktgen/eth0
	for i in `seq 20`; do echo start > /proc/net/pktgen/pgctrl; tail -n1 /proc/net/pktgen/eth0; done

Before:
  746547-793084(786421+/-9.6e+03)pps 346-367(364.4+/-4.4)Mb/sec (346397808-367990976(3.649e+08+/-4.5e+06)bps) errors: 0

After:
  767390-792966(785159+/-6.5e+03)pps 356-367(363.75+/-2.9)Mb/sec (356068960-367936224(3.64314e+08+/-3e+06)bps) errors: 0

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 59caa06f34a6..31cac511b3c3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -546,8 +546,8 @@ static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
 	skb_put(skb, GOOD_PACKET_LEN);
 
 	hdr = skb_vnet_hdr(skb);
+	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
 	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
-
 	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
 
 	err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
@@ -563,6 +563,8 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
 	char *p;
 	int i, err, offset;
 
+	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
+
 	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
 	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
 		first = get_a_page(rq, gfp);
@@ -899,6 +901,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 	if (vi->mergeable_rx_bufs)
 		hdr->mhdr.num_buffers = 0;
 
+	sg_init_table(sq->sg, MAX_SKB_FRAGS + 2);
 	if (can_push) {
 		__skb_push(skb, hdr_len);
 		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*()
  2014-09-03  4:29 [PATCH 0/3] virtio: simplify virtio_ring Rusty Russell
@ 2014-09-03  4:29 ` Rusty Russell
  2014-09-05 10:40   ` Paolo Bonzini
  2014-09-03  4:29 ` Rusty Russell
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 20+ messages in thread
From: Rusty Russell @ 2014-09-03  4:29 UTC (permalink / raw)
  To: netdev; +Cc: Michael S. Tsirkin, virtualization, Andy Lutomirski

This is the only driver which doesn't hand virtqueue_add_inbuf and
virtqueue_add_outbuf a well-formed, well-terminated sg.  Fix it,
so we can make virtio_add_* simpler.

pktgen results:
	modprobe pktgen
	echo 'add_device eth0' > /proc/net/pktgen/kpktgend_0
	echo nowait 1 > /proc/net/pktgen/eth0
	echo count 1000000 > /proc/net/pktgen/eth0
	echo clone_skb 100000 > /proc/net/pktgen/eth0
	echo dst_mac 4e:14:25:a9:30:ac > /proc/net/pktgen/eth0
	echo dst 192.168.1.2 > /proc/net/pktgen/eth0
	for i in `seq 20`; do echo start > /proc/net/pktgen/pgctrl; tail -n1 /proc/net/pktgen/eth0; done

Before:
  746547-793084(786421+/-9.6e+03)pps 346-367(364.4+/-4.4)Mb/sec (346397808-367990976(3.649e+08+/-4.5e+06)bps) errors: 0

After:
  767390-792966(785159+/-6.5e+03)pps 356-367(363.75+/-2.9)Mb/sec (356068960-367936224(3.64314e+08+/-3e+06)bps) errors: 0

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/net/virtio_net.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 59caa06f34a6..31cac511b3c3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -546,8 +546,8 @@ static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
 	skb_put(skb, GOOD_PACKET_LEN);
 
 	hdr = skb_vnet_hdr(skb);
+	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
 	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
-
 	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
 
 	err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
@@ -563,6 +563,8 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
 	char *p;
 	int i, err, offset;
 
+	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
+
 	/* page in rq->sg[MAX_SKB_FRAGS + 1] is list tail */
 	for (i = MAX_SKB_FRAGS + 1; i > 1; --i) {
 		first = get_a_page(rq, gfp);
@@ -899,6 +901,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 	if (vi->mergeable_rx_bufs)
 		hdr->mhdr.num_buffers = 0;
 
+	sg_init_table(sq->sg, MAX_SKB_FRAGS + 2);
 	if (can_push) {
 		__skb_push(skb, hdr_len);
 		num_sg = skb_to_sgvec(skb, sq->sg, 0, skb->len);
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/3] virtio_ring: assume sgs are always well-formed.
  2014-09-03  4:29 [PATCH 0/3] virtio: simplify virtio_ring Rusty Russell
  2014-09-03  4:29 ` [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*() Rusty Russell
  2014-09-03  4:29 ` Rusty Russell
@ 2014-09-03  4:29 ` Rusty Russell
  2014-09-03  4:29 ` Rusty Russell
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Rusty Russell @ 2014-09-03  4:29 UTC (permalink / raw)
  To: netdev; +Cc: Andy Lutomirski, Michael S. Tsirkin, virtualization, Rusty Russell

We used to have several callers which just used arrays.  They're
gone, so we can use sg_next() everywhere, simplifying the code.

On my laptop, this slowed down vring_bench by 15%:

vring_bench before:
	936153354-967745359(9.44739e+08+/-6.1e+06)ns
vring_bench after:
	1061485790-1104800648(1.08254e+09+/-6.6e+06)ns

However, a more realistic test using pktgen on a AMD FX(tm)-8320 saw
a few percent improvement:

pktgen before:
  767390-792966(785159+/-6.5e+03)pps 356-367(363.75+/-2.9)Mb/sec (356068960-367936224(3.64314e+08+/-3e+06)bps) errors: 0

pktgen after:
   787781-796334(793165+/-2.4e+03)pps 365-369(367.5+/-1.2)Mb/sec (365530384-369498976(3.68028e+08+/-1.1e+06)bps) errors: 0

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 68 +++++++++++++-------------------------------
 1 file changed, 19 insertions(+), 49 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 4d08f45a9c29..374399c62080 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -99,28 +99,10 @@ struct vring_virtqueue
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
-static inline struct scatterlist *sg_next_chained(struct scatterlist *sg,
-						  unsigned int *count)
-{
-	return sg_next(sg);
-}
-
-static inline struct scatterlist *sg_next_arr(struct scatterlist *sg,
-					      unsigned int *count)
-{
-	if (--(*count) == 0)
-		return NULL;
-	return sg + 1;
-}
-
 /* Set up an indirect table of descriptors and add it to the queue. */
 static inline int vring_add_indirect(struct vring_virtqueue *vq,
 				     struct scatterlist *sgs[],
-				     struct scatterlist *(*next)
-				       (struct scatterlist *, unsigned int *),
 				     unsigned int total_sg,
-				     unsigned int total_out,
-				     unsigned int total_in,
 				     unsigned int out_sgs,
 				     unsigned int in_sgs,
 				     gfp_t gfp)
@@ -144,7 +126,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
 	/* Transfer entries from the sg lists into the indirect page */
 	i = 0;
 	for (n = 0; n < out_sgs; n++) {
-		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			desc[i].flags = VRING_DESC_F_NEXT;
 			desc[i].addr = sg_phys(sg);
 			desc[i].len = sg->length;
@@ -153,7 +135,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
 		}
 	}
 	for (; n < (out_sgs + in_sgs); n++) {
-		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
 			desc[i].addr = sg_phys(sg);
 			desc[i].len = sg->length;
@@ -186,10 +168,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
 
 static inline int virtqueue_add(struct virtqueue *_vq,
 				struct scatterlist *sgs[],
-				struct scatterlist *(*next)
-				  (struct scatterlist *, unsigned int *),
-				unsigned int total_out,
-				unsigned int total_in,
+				unsigned int total_sg,
 				unsigned int out_sgs,
 				unsigned int in_sgs,
 				void *data,
@@ -197,7 +176,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
-	unsigned int i, n, avail, uninitialized_var(prev), total_sg;
+	unsigned int i, n, avail, uninitialized_var(prev);
 	int head;
 
 	START_USE(vq);
@@ -222,13 +201,10 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	}
 #endif
 
-	total_sg = total_in + total_out;
-
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
 	if (vq->indirect && total_sg > 1 && vq->vq.num_free) {
-		head = vring_add_indirect(vq, sgs, next, total_sg, total_out,
-					  total_in,
+		head = vring_add_indirect(vq, sgs, total_sg, 
 					  out_sgs, in_sgs, gfp);
 		if (likely(head >= 0))
 			goto add_head;
@@ -254,7 +230,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 
 	head = i = vq->free_head;
 	for (n = 0; n < out_sgs; n++) {
-		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
 			vq->vring.desc[i].addr = sg_phys(sg);
 			vq->vring.desc[i].len = sg->length;
@@ -263,7 +239,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 		}
 	}
 	for (; n < (out_sgs + in_sgs); n++) {
-		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
 			vq->vring.desc[i].addr = sg_phys(sg);
 			vq->vring.desc[i].len = sg->length;
@@ -324,29 +300,23 @@ int virtqueue_add_sgs(struct virtqueue *_vq,
 		      void *data,
 		      gfp_t gfp)
 {
-	unsigned int i, total_out, total_in;
+	unsigned int i, total_sg = 0;
 
 	/* Count them first. */
-	for (i = total_out = total_in = 0; i < out_sgs; i++) {
-		struct scatterlist *sg;
-		for (sg = sgs[i]; sg; sg = sg_next(sg))
-			total_out++;
-	}
-	for (; i < out_sgs + in_sgs; i++) {
+	for (i = 0; i < out_sgs + in_sgs; i++) {
 		struct scatterlist *sg;
 		for (sg = sgs[i]; sg; sg = sg_next(sg))
-			total_in++;
+			total_sg++;
 	}
-	return virtqueue_add(_vq, sgs, sg_next_chained,
-			     total_out, total_in, out_sgs, in_sgs, data, gfp);
+	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
 
 /**
  * virtqueue_add_outbuf - expose output buffers to other end
  * @vq: the struct virtqueue we're talking about.
- * @sgs: array of scatterlists (need not be terminated!)
- * @num: the number of scatterlists readable by other side
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg readable by other side
  * @data: the token identifying the buffer.
  * @gfp: how to do memory allocations (if necessary).
  *
@@ -356,19 +326,19 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
  */
 int virtqueue_add_outbuf(struct virtqueue *vq,
-			 struct scatterlist sg[], unsigned int num,
+			 struct scatterlist *sg, unsigned int num,
 			 void *data,
 			 gfp_t gfp)
 {
-	return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp);
+	return virtqueue_add(vq, &sg, num, 1, 0, data, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
 
 /**
  * virtqueue_add_inbuf - expose input buffers to other end
  * @vq: the struct virtqueue we're talking about.
- * @sgs: array of scatterlists (need not be terminated!)
- * @num: the number of scatterlists writable by other side
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
  * @data: the token identifying the buffer.
  * @gfp: how to do memory allocations (if necessary).
  *
@@ -378,11 +348,11 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
  */
 int virtqueue_add_inbuf(struct virtqueue *vq,
-			struct scatterlist sg[], unsigned int num,
+			struct scatterlist *sg, unsigned int num,
 			void *data,
 			gfp_t gfp)
 {
-	return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp);
+	return virtqueue_add(vq, &sg, num, 0, 1, data, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 2/3] virtio_ring: assume sgs are always well-formed.
  2014-09-03  4:29 [PATCH 0/3] virtio: simplify virtio_ring Rusty Russell
                   ` (2 preceding siblings ...)
  2014-09-03  4:29 ` [PATCH 2/3] virtio_ring: assume sgs are always well-formed Rusty Russell
@ 2014-09-03  4:29 ` Rusty Russell
  2014-09-03  4:29 ` [PATCH 3/3] virtio_ring: unify direct/indirect code paths Rusty Russell
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 20+ messages in thread
From: Rusty Russell @ 2014-09-03  4:29 UTC (permalink / raw)
  To: netdev; +Cc: Michael S. Tsirkin, virtualization, Andy Lutomirski

We used to have several callers which just used arrays.  They're
gone, so we can use sg_next() everywhere, simplifying the code.

On my laptop, this slowed down vring_bench by 15%:

vring_bench before:
	936153354-967745359(9.44739e+08+/-6.1e+06)ns
vring_bench after:
	1061485790-1104800648(1.08254e+09+/-6.6e+06)ns

However, a more realistic test using pktgen on a AMD FX(tm)-8320 saw
a few percent improvement:

pktgen before:
  767390-792966(785159+/-6.5e+03)pps 356-367(363.75+/-2.9)Mb/sec (356068960-367936224(3.64314e+08+/-3e+06)bps) errors: 0

pktgen after:
   787781-796334(793165+/-2.4e+03)pps 365-369(367.5+/-1.2)Mb/sec (365530384-369498976(3.68028e+08+/-1.1e+06)bps) errors: 0

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 68 +++++++++++++-------------------------------
 1 file changed, 19 insertions(+), 49 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 4d08f45a9c29..374399c62080 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -99,28 +99,10 @@ struct vring_virtqueue
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
-static inline struct scatterlist *sg_next_chained(struct scatterlist *sg,
-						  unsigned int *count)
-{
-	return sg_next(sg);
-}
-
-static inline struct scatterlist *sg_next_arr(struct scatterlist *sg,
-					      unsigned int *count)
-{
-	if (--(*count) == 0)
-		return NULL;
-	return sg + 1;
-}
-
 /* Set up an indirect table of descriptors and add it to the queue. */
 static inline int vring_add_indirect(struct vring_virtqueue *vq,
 				     struct scatterlist *sgs[],
-				     struct scatterlist *(*next)
-				       (struct scatterlist *, unsigned int *),
 				     unsigned int total_sg,
-				     unsigned int total_out,
-				     unsigned int total_in,
 				     unsigned int out_sgs,
 				     unsigned int in_sgs,
 				     gfp_t gfp)
@@ -144,7 +126,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
 	/* Transfer entries from the sg lists into the indirect page */
 	i = 0;
 	for (n = 0; n < out_sgs; n++) {
-		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			desc[i].flags = VRING_DESC_F_NEXT;
 			desc[i].addr = sg_phys(sg);
 			desc[i].len = sg->length;
@@ -153,7 +135,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
 		}
 	}
 	for (; n < (out_sgs + in_sgs); n++) {
-		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
 			desc[i].addr = sg_phys(sg);
 			desc[i].len = sg->length;
@@ -186,10 +168,7 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
 
 static inline int virtqueue_add(struct virtqueue *_vq,
 				struct scatterlist *sgs[],
-				struct scatterlist *(*next)
-				  (struct scatterlist *, unsigned int *),
-				unsigned int total_out,
-				unsigned int total_in,
+				unsigned int total_sg,
 				unsigned int out_sgs,
 				unsigned int in_sgs,
 				void *data,
@@ -197,7 +176,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
-	unsigned int i, n, avail, uninitialized_var(prev), total_sg;
+	unsigned int i, n, avail, uninitialized_var(prev);
 	int head;
 
 	START_USE(vq);
@@ -222,13 +201,10 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	}
 #endif
 
-	total_sg = total_in + total_out;
-
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
 	if (vq->indirect && total_sg > 1 && vq->vq.num_free) {
-		head = vring_add_indirect(vq, sgs, next, total_sg, total_out,
-					  total_in,
+		head = vring_add_indirect(vq, sgs, total_sg, 
 					  out_sgs, in_sgs, gfp);
 		if (likely(head >= 0))
 			goto add_head;
@@ -254,7 +230,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 
 	head = i = vq->free_head;
 	for (n = 0; n < out_sgs; n++) {
-		for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
 			vq->vring.desc[i].addr = sg_phys(sg);
 			vq->vring.desc[i].len = sg->length;
@@ -263,7 +239,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 		}
 	}
 	for (; n < (out_sgs + in_sgs); n++) {
-		for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
+		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
 			vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
 			vq->vring.desc[i].addr = sg_phys(sg);
 			vq->vring.desc[i].len = sg->length;
@@ -324,29 +300,23 @@ int virtqueue_add_sgs(struct virtqueue *_vq,
 		      void *data,
 		      gfp_t gfp)
 {
-	unsigned int i, total_out, total_in;
+	unsigned int i, total_sg = 0;
 
 	/* Count them first. */
-	for (i = total_out = total_in = 0; i < out_sgs; i++) {
-		struct scatterlist *sg;
-		for (sg = sgs[i]; sg; sg = sg_next(sg))
-			total_out++;
-	}
-	for (; i < out_sgs + in_sgs; i++) {
+	for (i = 0; i < out_sgs + in_sgs; i++) {
 		struct scatterlist *sg;
 		for (sg = sgs[i]; sg; sg = sg_next(sg))
-			total_in++;
+			total_sg++;
 	}
-	return virtqueue_add(_vq, sgs, sg_next_chained,
-			     total_out, total_in, out_sgs, in_sgs, data, gfp);
+	return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
 
 /**
  * virtqueue_add_outbuf - expose output buffers to other end
  * @vq: the struct virtqueue we're talking about.
- * @sgs: array of scatterlists (need not be terminated!)
- * @num: the number of scatterlists readable by other side
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg readable by other side
  * @data: the token identifying the buffer.
  * @gfp: how to do memory allocations (if necessary).
  *
@@ -356,19 +326,19 @@ EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
  */
 int virtqueue_add_outbuf(struct virtqueue *vq,
-			 struct scatterlist sg[], unsigned int num,
+			 struct scatterlist *sg, unsigned int num,
 			 void *data,
 			 gfp_t gfp)
 {
-	return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp);
+	return virtqueue_add(vq, &sg, num, 1, 0, data, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
 
 /**
  * virtqueue_add_inbuf - expose input buffers to other end
  * @vq: the struct virtqueue we're talking about.
- * @sgs: array of scatterlists (need not be terminated!)
- * @num: the number of scatterlists writable by other side
+ * @sg: scatterlist (must be well-formed and terminated!)
+ * @num: the number of entries in @sg writable by other side
  * @data: the token identifying the buffer.
  * @gfp: how to do memory allocations (if necessary).
  *
@@ -378,11 +348,11 @@ EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
  * Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
  */
 int virtqueue_add_inbuf(struct virtqueue *vq,
-			struct scatterlist sg[], unsigned int num,
+			struct scatterlist *sg, unsigned int num,
 			void *data,
 			gfp_t gfp)
 {
-	return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp);
+	return virtqueue_add(vq, &sg, num, 0, 1, data, gfp);
 }
 EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* [PATCH 3/3] virtio_ring: unify direct/indirect code paths.
  2014-09-03  4:29 [PATCH 0/3] virtio: simplify virtio_ring Rusty Russell
                   ` (3 preceding siblings ...)
  2014-09-03  4:29 ` Rusty Russell
@ 2014-09-03  4:29 ` Rusty Russell
  2014-09-04  1:59   ` Andy Lutomirski
  2014-09-05 10:55   ` Paolo Bonzini
  2014-09-05 21:27 ` [PATCH 0/3] virtio: simplify virtio_ring David Miller
  2014-09-05 21:27 ` David Miller
  6 siblings, 2 replies; 20+ messages in thread
From: Rusty Russell @ 2014-09-03  4:29 UTC (permalink / raw)
  To: netdev; +Cc: Michael S. Tsirkin, virtualization, Andy Lutomirski

virtqueue_add() populates the virtqueue descriptor table from the sgs
given.  If it uses an indirect descriptor table, then it puts a single
descriptor in the descriptor table pointing to the kmalloc'ed indirect
table where the sg is populated.

Previously vring_add_indirect() did the allocation and the simple
linear layout.  We replace that with alloc_indirect() which allocates
the indirect table then chains it like the normal descriptor table so
we can reuse the core logic.

This slows down pktgen by less than 1/2 a percent (which uses direct
descriptors), as well as vring_bench, but it's far neater.

vring_bench before:
	1061485790-1104800648(1.08254e+09+/-6.6e+06)ns
vring_bench after:
	1125610268-1183528965(1.14172e+09+/-8e+06)ns

pktgen before:
   787781-796334(793165+/-2.4e+03)pps 365-369(367.5+/-1.2)Mb/sec (365530384-369498976(3.68028e+08+/-1.1e+06)bps) errors: 0

pktgen after:
   779988-790404(786391+/-2.5e+03)pps 361-366(364.35+/-1.3)Mb/sec (361914432-366747456(3.64885e+08+/-1.2e+06)bps) errors: 0

Now, if we make force indirect descriptors by turning off any_header_sg
in virtio_net.c:

pktgen before:
  713773-721062(718374+/-2.1e+03)pps 331-334(332.95+/-0.92)Mb/sec (331190672-334572768(3.33325e+08+/-9.6e+05)bps) errors: 0
pktgen after:
  710542-719195(714898+/-2.4e+03)pps 329-333(331.15+/-1.1)Mb/sec (329691488-333706480(3.31713e+08+/-1.1e+06)bps) errors: 0

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 125 +++++++++++++++++--------------------------
 1 file changed, 50 insertions(+), 75 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 374399c62080..a4ebbffac141 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -99,18 +99,10 @@ struct vring_virtqueue
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
-/* Set up an indirect table of descriptors and add it to the queue. */
-static inline int vring_add_indirect(struct vring_virtqueue *vq,
-				     struct scatterlist *sgs[],
-				     unsigned int total_sg,
-				     unsigned int out_sgs,
-				     unsigned int in_sgs,
-				     gfp_t gfp)
+static struct vring_desc *alloc_indirect(unsigned int total_sg, gfp_t gfp)
 {
-	struct vring_desc *desc;
-	unsigned head;
-	struct scatterlist *sg;
-	int i, n;
+ 	struct vring_desc *desc;
+	unsigned int i;
 
 	/*
 	 * We require lowmem mappings for the descriptors because
@@ -119,51 +111,13 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
 	 */
 	gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
 
-	desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
-	if (!desc)
-		return -ENOMEM;
-
-	/* Transfer entries from the sg lists into the indirect page */
-	i = 0;
-	for (n = 0; n < out_sgs; n++) {
-		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			desc[i].flags = VRING_DESC_F_NEXT;
-			desc[i].addr = sg_phys(sg);
-			desc[i].len = sg->length;
-			desc[i].next = i+1;
-			i++;
-		}
-	}
-	for (; n < (out_sgs + in_sgs); n++) {
-		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
-			desc[i].addr = sg_phys(sg);
-			desc[i].len = sg->length;
-			desc[i].next = i+1;
-			i++;
-		}
-	}
-	BUG_ON(i != total_sg);
-
-	/* Last one doesn't continue. */
-	desc[i-1].flags &= ~VRING_DESC_F_NEXT;
-	desc[i-1].next = 0;
-
-	/* We're about to use a buffer */
-	vq->vq.num_free--;
-
-	/* Use a single buffer which doesn't continue */
-	head = vq->free_head;
-	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
-	vq->vring.desc[head].addr = virt_to_phys(desc);
-	/* kmemleak gives a false positive, as it's hidden by virt_to_phys */
-	kmemleak_ignore(desc);
-	vq->vring.desc[head].len = i * sizeof(struct vring_desc);
-
-	/* Update free pointer */
-	vq->free_head = vq->vring.desc[head].next;
+ 	desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
+ 	if (!desc)
+		return NULL;
 
-	return head;
+	for (i = 0; i < total_sg; i++)
+		desc[i].next = i+1;
+	return desc;
 }
 
 static inline int virtqueue_add(struct virtqueue *_vq,
@@ -176,8 +130,10 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
+	struct vring_desc *desc;
 	unsigned int i, n, avail, uninitialized_var(prev);
 	int head;
+	bool indirect;
 
 	START_USE(vq);
 
@@ -201,18 +157,36 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	}
 #endif
 
+	BUG_ON(total_sg > vq->vring.num);
+	BUG_ON(total_sg == 0);
+
+	head = vq->free_head;
+
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
-	if (vq->indirect && total_sg > 1 && vq->vq.num_free) {
-		head = vring_add_indirect(vq, sgs, total_sg, 
-					  out_sgs, in_sgs, gfp);
-		if (likely(head >= 0))
-			goto add_head;
+	if (vq->indirect && total_sg > 1 && vq->vq.num_free)
+		desc = alloc_indirect(total_sg, gfp);
+	else
+		desc = NULL;
+
+	if (desc) {
+		/* Use a single buffer which doesn't continue */
+		vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
+		vq->vring.desc[head].addr = virt_to_phys(desc);
+		/* avoid kmemleak false positive (hidden by virt_to_phys) */
+		kmemleak_ignore(desc);
+		vq->vring.desc[head].len = total_sg * sizeof(struct vring_desc);
+
+		/* Set up rest to use this indirect table. */
+		i = 0;
+		total_sg = 1;
+		indirect = true;
+	} else {
+		desc = vq->vring.desc;
+		i = head;
+		indirect = false;
 	}
 
-	BUG_ON(total_sg > vq->vring.num);
-	BUG_ON(total_sg == 0);
-
 	if (vq->vq.num_free < total_sg) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
 			 total_sg, vq->vq.num_free);
@@ -228,32 +202,33 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	/* We're about to use some buffers from the free list. */
 	vq->vq.num_free -= total_sg;
 
-	head = i = vq->free_head;
 	for (n = 0; n < out_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
-			vq->vring.desc[i].addr = sg_phys(sg);
-			vq->vring.desc[i].len = sg->length;
+			desc[i].flags = VRING_DESC_F_NEXT;
+			desc[i].addr = sg_phys(sg);
+			desc[i].len = sg->length;
 			prev = i;
-			i = vq->vring.desc[i].next;
+			i = desc[i].next;
 		}
 	}
 	for (; n < (out_sgs + in_sgs); n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
-			vq->vring.desc[i].addr = sg_phys(sg);
-			vq->vring.desc[i].len = sg->length;
+			desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
+			desc[i].addr = sg_phys(sg);
+			desc[i].len = sg->length;
 			prev = i;
-			i = vq->vring.desc[i].next;
+			i = desc[i].next;
 		}
 	}
 	/* Last one doesn't continue. */
-	vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
+	desc[prev].flags &= ~VRING_DESC_F_NEXT;
 
 	/* Update free pointer */
-	vq->free_head = i;
+	if (indirect)
+		vq->free_head = vq->vring.desc[head].next;
+	else
+		vq->free_head = i;
 
-add_head:
 	/* Set token. */
 	vq->data[head] = data;
 
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*()
  2014-09-03  4:29 ` Rusty Russell
  2014-09-03  4:54   ` Andy Lutomirski
@ 2014-09-03  4:54   ` Andy Lutomirski
  2014-09-03 10:23   ` Jesper Dangaard Brouer
  2014-09-03 10:23   ` Jesper Dangaard Brouer
  3 siblings, 0 replies; 20+ messages in thread
From: Andy Lutomirski @ 2014-09-03  4:54 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, Michael S. Tsirkin, Linux Virtualization

On Tue, Sep 2, 2014 at 9:29 PM, Rusty Russell <rusty@rustcorp.com.au> wrote:
> This is the only driver which doesn't hand virtqueue_add_inbuf and
> virtqueue_add_outbuf a well-formed, well-terminated sg.  Fix it,
> so we can make virtio_add_* simpler.

OK, I get it now: you're reinitializing the table every time, clearing
old end marks.  There's room to microoptimize this, but it's probably
not worth it.

IOW, looks good to me.

--Andy

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*()
  2014-09-03  4:29 ` Rusty Russell
@ 2014-09-03  4:54   ` Andy Lutomirski
  2014-09-03  4:54   ` Andy Lutomirski
                     ` (2 subsequent siblings)
  3 siblings, 0 replies; 20+ messages in thread
From: Andy Lutomirski @ 2014-09-03  4:54 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, Linux Virtualization, Michael S. Tsirkin

On Tue, Sep 2, 2014 at 9:29 PM, Rusty Russell <rusty@rustcorp.com.au> wrote:
> This is the only driver which doesn't hand virtqueue_add_inbuf and
> virtqueue_add_outbuf a well-formed, well-terminated sg.  Fix it,
> so we can make virtio_add_* simpler.

OK, I get it now: you're reinitializing the table every time, clearing
old end marks.  There's room to microoptimize this, but it's probably
not worth it.

IOW, looks good to me.

--Andy

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*()
  2014-09-03  4:29 ` Rusty Russell
                     ` (2 preceding siblings ...)
  2014-09-03 10:23   ` Jesper Dangaard Brouer
@ 2014-09-03 10:23   ` Jesper Dangaard Brouer
  3 siblings, 0 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2014-09-03 10:23 UTC (permalink / raw)
  To: Rusty Russell
  Cc: brouer, netdev, Andy Lutomirski, Michael S. Tsirkin, virtualization

On Wed,  3 Sep 2014 13:59:14 +0930
Rusty Russell <rusty@rustcorp.com.au> wrote:

> pktgen results:
> 	modprobe pktgen
> 	echo 'add_device eth0' > /proc/net/pktgen/kpktgend_0
> 	echo nowait 1 > /proc/net/pktgen/eth0

Maybe your pktgen "nowait" (patch) should be a flag instead?

 echo "flag EXIT_NO_WAIT" > /proc/net/pktgen/eth0

> 	echo count 1000000 > /proc/net/pktgen/eth0
> 	echo clone_skb 100000 > /proc/net/pktgen/eth0
> 	echo dst_mac 4e:14:25:a9:30:ac > /proc/net/pktgen/eth0
> 	echo dst 192.168.1.2 > /proc/net/pktgen/eth0
> 	for i in `seq 20`; do echo start > /proc/net/pktgen/pgctrl; tail -n1 /proc/net/pktgen/eth0; done
> 


-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Sr. Network Kernel Developer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*()
  2014-09-03  4:29 ` Rusty Russell
  2014-09-03  4:54   ` Andy Lutomirski
  2014-09-03  4:54   ` Andy Lutomirski
@ 2014-09-03 10:23   ` Jesper Dangaard Brouer
  2014-09-03 10:23   ` Jesper Dangaard Brouer
  3 siblings, 0 replies; 20+ messages in thread
From: Jesper Dangaard Brouer @ 2014-09-03 10:23 UTC (permalink / raw)
  To: Rusty Russell
  Cc: netdev, Andy Lutomirski, Michael S. Tsirkin, virtualization, brouer

On Wed,  3 Sep 2014 13:59:14 +0930
Rusty Russell <rusty@rustcorp.com.au> wrote:

> pktgen results:
> 	modprobe pktgen
> 	echo 'add_device eth0' > /proc/net/pktgen/kpktgend_0
> 	echo nowait 1 > /proc/net/pktgen/eth0

Maybe your pktgen "nowait" (patch) should be a flag instead?

 echo "flag EXIT_NO_WAIT" > /proc/net/pktgen/eth0

> 	echo count 1000000 > /proc/net/pktgen/eth0
> 	echo clone_skb 100000 > /proc/net/pktgen/eth0
> 	echo dst_mac 4e:14:25:a9:30:ac > /proc/net/pktgen/eth0
> 	echo dst 192.168.1.2 > /proc/net/pktgen/eth0
> 	for i in `seq 20`; do echo start > /proc/net/pktgen/pgctrl; tail -n1 /proc/net/pktgen/eth0; done
> 


-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Sr. Network Kernel Developer at Red Hat
  Author of http://www.iptv-analyzer.org
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] virtio_ring: unify direct/indirect code paths.
  2014-09-03  4:29 ` [PATCH 3/3] virtio_ring: unify direct/indirect code paths Rusty Russell
@ 2014-09-04  1:59   ` Andy Lutomirski
  2014-09-05  2:55     ` Rusty Russell
  2014-09-05 10:55   ` Paolo Bonzini
  1 sibling, 1 reply; 20+ messages in thread
From: Andy Lutomirski @ 2014-09-04  1:59 UTC (permalink / raw)
  To: Rusty Russell; +Cc: netdev, Linux Virtualization, Michael S. Tsirkin

On Tue, Sep 2, 2014 at 9:29 PM, Rusty Russell <rusty@rustcorp.com.au> wrote:
> virtqueue_add() populates the virtqueue descriptor table from the sgs
> given.  If it uses an indirect descriptor table, then it puts a single
> descriptor in the descriptor table pointing to the kmalloc'ed indirect
> table where the sg is populated.
>
> Previously vring_add_indirect() did the allocation and the simple
> linear layout.  We replace that with alloc_indirect() which allocates
> the indirect table then chains it like the normal descriptor table so
> we can reuse the core logic.
>

> +       if (vq->indirect && total_sg > 1 && vq->vq.num_free)
> +               desc = alloc_indirect(total_sg, gfp);
> +       else
> +               desc = NULL;
> +
> +       if (desc) {
> +               /* Use a single buffer which doesn't continue */
> +               vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
> +               vq->vring.desc[head].addr = virt_to_phys(desc);
> +               /* avoid kmemleak false positive (hidden by virt_to_phys) */
> +               kmemleak_ignore(desc);
> +               vq->vring.desc[head].len = total_sg * sizeof(struct vring_desc);
> +
> +               /* Set up rest to use this indirect table. */
> +               i = 0;
> +               total_sg = 1;

This is a little too magical for me.  Would it make sense to add a new
variable for this (total_root_descs or something)?

--Andy

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] virtio_ring: unify direct/indirect code paths.
  2014-09-04  1:59   ` Andy Lutomirski
@ 2014-09-05  2:55     ` Rusty Russell
  0 siblings, 0 replies; 20+ messages in thread
From: Rusty Russell @ 2014-09-05  2:55 UTC (permalink / raw)
  To: Andy Lutomirski; +Cc: netdev, Linux Virtualization, Michael S. Tsirkin

Andy Lutomirski <luto@amacapital.net> writes:
> On Tue, Sep 2, 2014 at 9:29 PM, Rusty Russell <rusty@rustcorp.com.au> wrote:
>> virtqueue_add() populates the virtqueue descriptor table from the sgs
>> given.  If it uses an indirect descriptor table, then it puts a single
>> descriptor in the descriptor table pointing to the kmalloc'ed indirect
>> table where the sg is populated.
>>
>> Previously vring_add_indirect() did the allocation and the simple
>> linear layout.  We replace that with alloc_indirect() which allocates
>> the indirect table then chains it like the normal descriptor table so
>> we can reuse the core logic.
>>
>
>> +       if (vq->indirect && total_sg > 1 && vq->vq.num_free)
>> +               desc = alloc_indirect(total_sg, gfp);
>> +       else
>> +               desc = NULL;
>> +
>> +       if (desc) {
>> +               /* Use a single buffer which doesn't continue */
>> +               vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
>> +               vq->vring.desc[head].addr = virt_to_phys(desc);
>> +               /* avoid kmemleak false positive (hidden by virt_to_phys) */
>> +               kmemleak_ignore(desc);
>> +               vq->vring.desc[head].len = total_sg * sizeof(struct vring_desc);
>> +
>> +               /* Set up rest to use this indirect table. */
>> +               i = 0;
>> +               total_sg = 1;
>
> This is a little too magical for me.  Would it make sense to add a new
> variable for this (total_root_descs or something)?

Agreed, it's a little hacky.

Here's the diff (I actually merged this into the patch, but no point
re-xmitting):

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index a4ebbffac141..6d2b5310c991 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -131,7 +131,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
 	struct vring_desc *desc;
-	unsigned int i, n, avail, uninitialized_var(prev);
+	unsigned int i, n, avail, descs_used, uninitialized_var(prev);
 	int head;
 	bool indirect;
 
@@ -179,17 +179,18 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 
 		/* Set up rest to use this indirect table. */
 		i = 0;
-		total_sg = 1;
+		descs_used = 1;
 		indirect = true;
 	} else {
 		desc = vq->vring.desc;
 		i = head;
+		descs_used = total_sg;
 		indirect = false;
 	}
 
-	if (vq->vq.num_free < total_sg) {
+	if (vq->vq.num_free < descs_used) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
-			 total_sg, vq->vq.num_free);
+			 descs_used, vq->vq.num_free);
 		/* FIXME: for historical reasons, we force a notify here if
 		 * there are outgoing parts to the buffer.  Presumably the
 		 * host should service the ring ASAP. */
@@ -200,7 +201,7 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	}
 
 	/* We're about to use some buffers from the free list. */
-	vq->vq.num_free -= total_sg;
+	vq->vq.num_free -= descs_used;
 
 	for (n = 0; n < out_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*()
  2014-09-03  4:29 ` [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*() Rusty Russell
@ 2014-09-05 10:40   ` Paolo Bonzini
  2014-09-07  7:20     ` Michael S. Tsirkin
  0 siblings, 1 reply; 20+ messages in thread
From: Paolo Bonzini @ 2014-09-05 10:40 UTC (permalink / raw)
  To: Rusty Russell, netdev; +Cc: virtualization, Andy Lutomirski, Michael S. Tsirkin

Il 03/09/2014 06:29, Rusty Russell ha scritto:
> +	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);

I think 2 is enough here.  That said...

>  	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
> -
>  	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
>  
>  	err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);

... skb_to_sgvec will already make the sg well formed, so the
sg_init_table is _almost_ redundant; it is only there to remove
intermediate end marks.  The block layer takes care to remove
them, but skb_to_sgvec doesn't.

If the following patch can be accepted to net/core/skbuff.c, the
sg_init_table in virtnet_alloc_queues will suffice.

Paolo

-------------------- 8< -------------------
From: Paolo Bonzini <pbonzini@redhat.com>
Subject: [PATCH] net: skb_to_sgvec: do not leave intermediate marks in the sgvec

sg_set_buf/sg_set_page will leave the end mark in place in their
argument, which may be in the middle of a scatterlist.  If we
remove the mark before calling them, we can avoid calls to
sg_init_table before skb_to_sgvec.

However, users of skb_to_sgvec_nomark now need to be careful and
possibly restore the mark.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 163b673f9e62..a3108ef1f1c0 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3265,6 +3265,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 	if (copy > 0) {
 		if (copy > len)
 			copy = len;
+		sg_unmark_end(sg);
 		sg_set_buf(sg, skb->data + offset, copy);
 		elt++;
 		if ((len -= copy) == 0)
@@ -3283,6 +3284,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 
 			if (copy > len)
 				copy = len;
+			sg_unmark_end(&sg[elt]);
 			sg_set_page(&sg[elt], skb_frag_page(frag), copy,
 					frag->page_offset+offset-start);
 			elt++;
@@ -3322,7 +3324,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
  * Scenario to use skb_to_sgvec_nomark:
  * 1. sg_init_table
  * 2. skb_to_sgvec_nomark(payload1)
- * 3. skb_to_sgvec_nomark(payload2)
+ * 3. skb_to_sgvec(payload2)
  *
  * This is equivalent to:
  * 1. sg_init_table
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index a2afa89513a0..9ae5756d9e5f 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -227,6 +227,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
 		*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
 		sg_set_buf(seqhisg, seqhi, seqhi_len);
 	}
+	sg_mark_end(&sg[nfrags + sglists]);
 	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
 	ahash_request_set_callback(req, 0, ah_output_done, skb);
 
@@ -395,6 +396,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
 		*seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
 		sg_set_buf(seqhisg, seqhi, seqhi_len);
 	}
+	sg_mark_end(&sg[nfrags + sglists]);
 	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
 	ahash_request_set_callback(req, 0, ah_input_done, skb);
 
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 72a4930bdc0a..c680d82e43de 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -430,6 +430,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
 		*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
 		sg_set_buf(seqhisg, seqhi, seqhi_len);
 	}
+	sg_mark_end(&sg[nfrags + sglists]);
+
 	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
 	ahash_request_set_callback(req, 0, ah6_output_done, skb);
 
@@ -608,6 +610,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
 		*seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
 		sg_set_buf(seqhisg, seqhi, seqhi_len);
 	}
+	sg_mark_end(&sg[nfrags + sglists]);
 
 	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
 	ahash_request_set_callback(req, 0, ah6_input_done, skb);

^ permalink raw reply related	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] virtio_ring: unify direct/indirect code paths.
  2014-09-03  4:29 ` [PATCH 3/3] virtio_ring: unify direct/indirect code paths Rusty Russell
  2014-09-04  1:59   ` Andy Lutomirski
@ 2014-09-05 10:55   ` Paolo Bonzini
  2014-09-08 17:32     ` Andy Lutomirski
  1 sibling, 1 reply; 20+ messages in thread
From: Paolo Bonzini @ 2014-09-05 10:55 UTC (permalink / raw)
  To: Rusty Russell, netdev; +Cc: virtualization, Andy Lutomirski, Michael S. Tsirkin

Il 03/09/2014 06:29, Rusty Russell ha scritto:
> + 	desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
> + 	if (!desc)
> +		return NULL;
>  
> -	return head;
> +	for (i = 0; i < total_sg; i++)
> +		desc[i].next = i+1;
> +	return desc;
>  }

Would it make sense to keep a cache of a few (say) 8- or 16-element
indirect descriptors?  You'd only have to do this ugly (and slowish) for
loop on the first allocation.

Also, since this is mostly an aesthetic patch,

> +	if (indirect)
> +		vq->free_head = vq->vring.desc[head].next;
> +	else
> +		vq->free_head = i;

I'd move the indirect case above, where the vring.desc[head] is actually
allocated.

Paolo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/3] virtio: simplify virtio_ring.
  2014-09-03  4:29 [PATCH 0/3] virtio: simplify virtio_ring Rusty Russell
                   ` (5 preceding siblings ...)
  2014-09-05 21:27 ` [PATCH 0/3] virtio: simplify virtio_ring David Miller
@ 2014-09-05 21:27 ` David Miller
  6 siblings, 0 replies; 20+ messages in thread
From: David Miller @ 2014-09-05 21:27 UTC (permalink / raw)
  To: rusty; +Cc: netdev, luto, mst, virtualization

From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed,  3 Sep 2014 13:59:13 +0930

> I resurrected these patches after prompting from Andy Lutomirski's
> recent patches.  I put them on the back-burner because vring_bench
> had a 15% slowdown on my laptop: pktgen testing revealed a speedup,
> if anything, so I've cleaned them up.

If you want to merge this via your virtio tree that's fine with me.

For virtio_net:

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 0/3] virtio: simplify virtio_ring.
  2014-09-03  4:29 [PATCH 0/3] virtio: simplify virtio_ring Rusty Russell
                   ` (4 preceding siblings ...)
  2014-09-03  4:29 ` [PATCH 3/3] virtio_ring: unify direct/indirect code paths Rusty Russell
@ 2014-09-05 21:27 ` David Miller
  2014-09-05 21:27 ` David Miller
  6 siblings, 0 replies; 20+ messages in thread
From: David Miller @ 2014-09-05 21:27 UTC (permalink / raw)
  To: rusty; +Cc: netdev, mst, virtualization, luto

From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed,  3 Sep 2014 13:59:13 +0930

> I resurrected these patches after prompting from Andy Lutomirski's
> recent patches.  I put them on the back-burner because vring_bench
> had a 15% slowdown on my laptop: pktgen testing revealed a speedup,
> if anything, so I've cleaned them up.

If you want to merge this via your virtio tree that's fine with me.

For virtio_net:

Acked-by: David S. Miller <davem@davemloft.net>

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*()
  2014-09-05 10:40   ` Paolo Bonzini
@ 2014-09-07  7:20     ` Michael S. Tsirkin
  2014-10-14  2:21       ` Rusty Russell
  0 siblings, 1 reply; 20+ messages in thread
From: Michael S. Tsirkin @ 2014-09-07  7:20 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: netdev, Andy Lutomirski, virtualization

On Fri, Sep 05, 2014 at 12:40:50PM +0200, Paolo Bonzini wrote:
> Il 03/09/2014 06:29, Rusty Russell ha scritto:
> > +	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
> 
> I think 2 is enough here.  That said...
> 
> >  	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
> > -
> >  	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
> >  
> >  	err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
> 
> ... skb_to_sgvec will already make the sg well formed, so the
> sg_init_table is _almost_ redundant; it is only there to remove
> intermediate end marks.  The block layer takes care to remove
> them, but skb_to_sgvec doesn't.
> 
> If the following patch can be accepted to net/core/skbuff.c, the
> sg_init_table in virtnet_alloc_queues will suffice.
> 
> Paolo

You will have to post it to netdev as a new topic and Cc
Dave Miller for it to be considered.

> -------------------- 8< -------------------
> From: Paolo Bonzini <pbonzini@redhat.com>
> Subject: [PATCH] net: skb_to_sgvec: do not leave intermediate marks in the sgvec
> 
> sg_set_buf/sg_set_page will leave the end mark in place in their
> argument, which may be in the middle of a scatterlist.  If we
> remove the mark before calling them, we can avoid calls to
> sg_init_table before skb_to_sgvec.
> 
> However, users of skb_to_sgvec_nomark now need to be careful and
> possibly restore the mark.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> 
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 163b673f9e62..a3108ef1f1c0 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3265,6 +3265,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
>  	if (copy > 0) {
>  		if (copy > len)
>  			copy = len;
> +		sg_unmark_end(sg);
>  		sg_set_buf(sg, skb->data + offset, copy);
>  		elt++;
>  		if ((len -= copy) == 0)
> @@ -3283,6 +3284,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
>  
>  			if (copy > len)
>  				copy = len;
> +			sg_unmark_end(&sg[elt]);
>  			sg_set_page(&sg[elt], skb_frag_page(frag), copy,
>  					frag->page_offset+offset-start);
>  			elt++;
> @@ -3322,7 +3324,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
>   * Scenario to use skb_to_sgvec_nomark:
>   * 1. sg_init_table
>   * 2. skb_to_sgvec_nomark(payload1)
> - * 3. skb_to_sgvec_nomark(payload2)
> + * 3. skb_to_sgvec(payload2)
>   *
>   * This is equivalent to:
>   * 1. sg_init_table
> diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
> index a2afa89513a0..9ae5756d9e5f 100644
> --- a/net/ipv4/ah4.c
> +++ b/net/ipv4/ah4.c
> @@ -227,6 +227,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb)
>  		*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
>  		sg_set_buf(seqhisg, seqhi, seqhi_len);
>  	}
> +	sg_mark_end(&sg[nfrags + sglists]);
>  	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
>  	ahash_request_set_callback(req, 0, ah_output_done, skb);
>  
> @@ -395,6 +396,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb)
>  		*seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
>  		sg_set_buf(seqhisg, seqhi, seqhi_len);
>  	}
> +	sg_mark_end(&sg[nfrags + sglists]);
>  	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
>  	ahash_request_set_callback(req, 0, ah_input_done, skb);
>  
> diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
> index 72a4930bdc0a..c680d82e43de 100644
> --- a/net/ipv6/ah6.c
> +++ b/net/ipv6/ah6.c
> @@ -430,6 +430,8 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)
>  		*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
>  		sg_set_buf(seqhisg, seqhi, seqhi_len);
>  	}
> +	sg_mark_end(&sg[nfrags + sglists]);
> +
>  	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
>  	ahash_request_set_callback(req, 0, ah6_output_done, skb);
>  
> @@ -608,6 +610,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)
>  		*seqhi = XFRM_SKB_CB(skb)->seq.input.hi;
>  		sg_set_buf(seqhisg, seqhi, seqhi_len);
>  	}
> +	sg_mark_end(&sg[nfrags + sglists]);
>  
>  	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);
>  	ahash_request_set_callback(req, 0, ah6_input_done, skb);

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 3/3] virtio_ring: unify direct/indirect code paths.
  2014-09-05 10:55   ` Paolo Bonzini
@ 2014-09-08 17:32     ` Andy Lutomirski
  0 siblings, 0 replies; 20+ messages in thread
From: Andy Lutomirski @ 2014-09-08 17:32 UTC (permalink / raw)
  To: Paolo Bonzini; +Cc: netdev, Linux Virtualization, Michael S. Tsirkin

On Sep 5, 2014 3:55 AM, "Paolo Bonzini" <pbonzini@redhat.com> wrote:
>
> Il 03/09/2014 06:29, Rusty Russell ha scritto:
> > +     desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
> > +     if (!desc)
> > +             return NULL;
> >
> > -     return head;
> > +     for (i = 0; i < total_sg; i++)
> > +             desc[i].next = i+1;
> > +     return desc;
> >  }
>
> Would it make sense to keep a cache of a few (say) 8- or 16-element
> indirect descriptors?  You'd only have to do this ugly (and slowish) for
> loop on the first allocation.
>
> Also, since this is mostly an aesthetic patch,
>
> > +     if (indirect)
> > +             vq->free_head = vq->vring.desc[head].next;
> > +     else
> > +             vq->free_head = i;
>
> I'd move the indirect case above, where the vring.desc[head] is actually
> allocated.
>

Please don't: I'll just have to undo it for the DMA API stuff, since
descriptor setup will be able to fail.

--Andy

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*()
  2014-09-07  7:20     ` Michael S. Tsirkin
@ 2014-10-14  2:21       ` Rusty Russell
  0 siblings, 0 replies; 20+ messages in thread
From: Rusty Russell @ 2014-10-14  2:21 UTC (permalink / raw)
  To: Michael S. Tsirkin, Paolo Bonzini; +Cc: netdev, Andy Lutomirski, virtualization

"Michael S. Tsirkin" <mst@redhat.com> writes:
> On Fri, Sep 05, 2014 at 12:40:50PM +0200, Paolo Bonzini wrote:
>> Il 03/09/2014 06:29, Rusty Russell ha scritto:
>> > +	sg_init_table(rq->sg, MAX_SKB_FRAGS + 2);
>> 
>> I think 2 is enough here.  That said...
>> 
>> >  	sg_set_buf(rq->sg, &hdr->hdr, sizeof hdr->hdr);
>> > -
>> >  	skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
>> >  
>> >  	err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
>> 
>> ... skb_to_sgvec will already make the sg well formed, so the
>> sg_init_table is _almost_ redundant; it is only there to remove
>> intermediate end marks.  The block layer takes care to remove
>> them, but skb_to_sgvec doesn't.

sg_init_table is still needed if CONFIG_DEBUG_SG, so I don't
think it's worth it.

Thanks,
Rusty.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH 3/3] virtio_ring: unify direct/indirect code paths.
  2014-09-11  0:47 [PATCH 0/3] virtio_net/virtio_ring Rusty Russell
@ 2014-09-11  0:47 ` Rusty Russell
  0 siblings, 0 replies; 20+ messages in thread
From: Rusty Russell @ 2014-09-11  0:47 UTC (permalink / raw)
  To: linux-kernel; +Cc: Rusty Russell, netdev, Michael S. Tsirkin

virtqueue_add() populates the virtqueue descriptor table from the sgs
given.  If it uses an indirect descriptor table, then it puts a single
descriptor in the descriptor table pointing to the kmalloc'ed indirect
table where the sg is populated.

Previously vring_add_indirect() did the allocation and the simple
linear layout.  We replace that with alloc_indirect() which allocates
the indirect table then chains it like the normal descriptor table so
we can reuse the core logic.

This slows down pktgen by less than 1/2 a percent (which uses direct
descriptors), as well as vring_bench, but it's far neater.

vring_bench before:
	1061485790-1104800648(1.08254e+09+/-6.6e+06)ns
vring_bench after:
	1125610268-1183528965(1.14172e+09+/-8e+06)ns

pktgen before:
   787781-796334(793165+/-2.4e+03)pps 365-369(367.5+/-1.2)Mb/sec (365530384-369498976(3.68028e+08+/-1.1e+06)bps) errors: 0

pktgen after:
   779988-790404(786391+/-2.5e+03)pps 361-366(364.35+/-1.3)Mb/sec (361914432-366747456(3.64885e+08+/-1.2e+06)bps) errors: 0

Now, if we make force indirect descriptors by turning off any_header_sg
in virtio_net.c:

pktgen before:
  713773-721062(718374+/-2.1e+03)pps 331-334(332.95+/-0.92)Mb/sec (331190672-334572768(3.33325e+08+/-9.6e+05)bps) errors: 0
pktgen after:
  710542-719195(714898+/-2.4e+03)pps 329-333(331.15+/-1.1)Mb/sec (329691488-333706480(3.31713e+08+/-1.1e+06)bps) errors: 0

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_ring.c | 134 ++++++++++++++++++-------------------------
 1 file changed, 55 insertions(+), 79 deletions(-)

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 374399c62080..6d2b5310c991 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -99,18 +99,10 @@ struct vring_virtqueue
 
 #define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
 
-/* Set up an indirect table of descriptors and add it to the queue. */
-static inline int vring_add_indirect(struct vring_virtqueue *vq,
-				     struct scatterlist *sgs[],
-				     unsigned int total_sg,
-				     unsigned int out_sgs,
-				     unsigned int in_sgs,
-				     gfp_t gfp)
+static struct vring_desc *alloc_indirect(unsigned int total_sg, gfp_t gfp)
 {
-	struct vring_desc *desc;
-	unsigned head;
-	struct scatterlist *sg;
-	int i, n;
+ 	struct vring_desc *desc;
+	unsigned int i;
 
 	/*
 	 * We require lowmem mappings for the descriptors because
@@ -119,51 +111,13 @@ static inline int vring_add_indirect(struct vring_virtqueue *vq,
 	 */
 	gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
 
-	desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
-	if (!desc)
-		return -ENOMEM;
-
-	/* Transfer entries from the sg lists into the indirect page */
-	i = 0;
-	for (n = 0; n < out_sgs; n++) {
-		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			desc[i].flags = VRING_DESC_F_NEXT;
-			desc[i].addr = sg_phys(sg);
-			desc[i].len = sg->length;
-			desc[i].next = i+1;
-			i++;
-		}
-	}
-	for (; n < (out_sgs + in_sgs); n++) {
-		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
-			desc[i].addr = sg_phys(sg);
-			desc[i].len = sg->length;
-			desc[i].next = i+1;
-			i++;
-		}
-	}
-	BUG_ON(i != total_sg);
-
-	/* Last one doesn't continue. */
-	desc[i-1].flags &= ~VRING_DESC_F_NEXT;
-	desc[i-1].next = 0;
-
-	/* We're about to use a buffer */
-	vq->vq.num_free--;
-
-	/* Use a single buffer which doesn't continue */
-	head = vq->free_head;
-	vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
-	vq->vring.desc[head].addr = virt_to_phys(desc);
-	/* kmemleak gives a false positive, as it's hidden by virt_to_phys */
-	kmemleak_ignore(desc);
-	vq->vring.desc[head].len = i * sizeof(struct vring_desc);
-
-	/* Update free pointer */
-	vq->free_head = vq->vring.desc[head].next;
+ 	desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
+ 	if (!desc)
+		return NULL;
 
-	return head;
+	for (i = 0; i < total_sg; i++)
+		desc[i].next = i+1;
+	return desc;
 }
 
 static inline int virtqueue_add(struct virtqueue *_vq,
@@ -176,8 +130,10 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
 	struct scatterlist *sg;
-	unsigned int i, n, avail, uninitialized_var(prev);
+	struct vring_desc *desc;
+	unsigned int i, n, avail, descs_used, uninitialized_var(prev);
 	int head;
+	bool indirect;
 
 	START_USE(vq);
 
@@ -201,21 +157,40 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	}
 #endif
 
+	BUG_ON(total_sg > vq->vring.num);
+	BUG_ON(total_sg == 0);
+
+	head = vq->free_head;
+
 	/* If the host supports indirect descriptor tables, and we have multiple
 	 * buffers, then go indirect. FIXME: tune this threshold */
-	if (vq->indirect && total_sg > 1 && vq->vq.num_free) {
-		head = vring_add_indirect(vq, sgs, total_sg, 
-					  out_sgs, in_sgs, gfp);
-		if (likely(head >= 0))
-			goto add_head;
+	if (vq->indirect && total_sg > 1 && vq->vq.num_free)
+		desc = alloc_indirect(total_sg, gfp);
+	else
+		desc = NULL;
+
+	if (desc) {
+		/* Use a single buffer which doesn't continue */
+		vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
+		vq->vring.desc[head].addr = virt_to_phys(desc);
+		/* avoid kmemleak false positive (hidden by virt_to_phys) */
+		kmemleak_ignore(desc);
+		vq->vring.desc[head].len = total_sg * sizeof(struct vring_desc);
+
+		/* Set up rest to use this indirect table. */
+		i = 0;
+		descs_used = 1;
+		indirect = true;
+	} else {
+		desc = vq->vring.desc;
+		i = head;
+		descs_used = total_sg;
+		indirect = false;
 	}
 
-	BUG_ON(total_sg > vq->vring.num);
-	BUG_ON(total_sg == 0);
-
-	if (vq->vq.num_free < total_sg) {
+	if (vq->vq.num_free < descs_used) {
 		pr_debug("Can't add buf len %i - avail = %i\n",
-			 total_sg, vq->vq.num_free);
+			 descs_used, vq->vq.num_free);
 		/* FIXME: for historical reasons, we force a notify here if
 		 * there are outgoing parts to the buffer.  Presumably the
 		 * host should service the ring ASAP. */
@@ -226,34 +201,35 @@ static inline int virtqueue_add(struct virtqueue *_vq,
 	}
 
 	/* We're about to use some buffers from the free list. */
-	vq->vq.num_free -= total_sg;
+	vq->vq.num_free -= descs_used;
 
-	head = i = vq->free_head;
 	for (n = 0; n < out_sgs; n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
-			vq->vring.desc[i].addr = sg_phys(sg);
-			vq->vring.desc[i].len = sg->length;
+			desc[i].flags = VRING_DESC_F_NEXT;
+			desc[i].addr = sg_phys(sg);
+			desc[i].len = sg->length;
 			prev = i;
-			i = vq->vring.desc[i].next;
+			i = desc[i].next;
 		}
 	}
 	for (; n < (out_sgs + in_sgs); n++) {
 		for (sg = sgs[n]; sg; sg = sg_next(sg)) {
-			vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
-			vq->vring.desc[i].addr = sg_phys(sg);
-			vq->vring.desc[i].len = sg->length;
+			desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
+			desc[i].addr = sg_phys(sg);
+			desc[i].len = sg->length;
 			prev = i;
-			i = vq->vring.desc[i].next;
+			i = desc[i].next;
 		}
 	}
 	/* Last one doesn't continue. */
-	vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
+	desc[prev].flags &= ~VRING_DESC_F_NEXT;
 
 	/* Update free pointer */
-	vq->free_head = i;
+	if (indirect)
+		vq->free_head = vq->vring.desc[head].next;
+	else
+		vq->free_head = i;
 
-add_head:
 	/* Set token. */
 	vq->data[head] = data;
 
-- 
1.9.1


^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2014-10-14  2:21 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-03  4:29 [PATCH 0/3] virtio: simplify virtio_ring Rusty Russell
2014-09-03  4:29 ` [PATCH 1/3] virtio_net: pass well-formed sgs to virtqueue_add_*() Rusty Russell
2014-09-05 10:40   ` Paolo Bonzini
2014-09-07  7:20     ` Michael S. Tsirkin
2014-10-14  2:21       ` Rusty Russell
2014-09-03  4:29 ` Rusty Russell
2014-09-03  4:54   ` Andy Lutomirski
2014-09-03  4:54   ` Andy Lutomirski
2014-09-03 10:23   ` Jesper Dangaard Brouer
2014-09-03 10:23   ` Jesper Dangaard Brouer
2014-09-03  4:29 ` [PATCH 2/3] virtio_ring: assume sgs are always well-formed Rusty Russell
2014-09-03  4:29 ` Rusty Russell
2014-09-03  4:29 ` [PATCH 3/3] virtio_ring: unify direct/indirect code paths Rusty Russell
2014-09-04  1:59   ` Andy Lutomirski
2014-09-05  2:55     ` Rusty Russell
2014-09-05 10:55   ` Paolo Bonzini
2014-09-08 17:32     ` Andy Lutomirski
2014-09-05 21:27 ` [PATCH 0/3] virtio: simplify virtio_ring David Miller
2014-09-05 21:27 ` David Miller
2014-09-11  0:47 [PATCH 0/3] virtio_net/virtio_ring Rusty Russell
2014-09-11  0:47 ` [PATCH 3/3] virtio_ring: unify direct/indirect code paths Rusty Russell

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.