All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/3] ringtest: performance optimization
@ 2016-10-06  9:39 Paolo Bonzini
  2016-10-06  9:39 ` [PATCH 1/3] ringtest: use link-time optimization Paolo Bonzini
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-10-06  9:39 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: mst

In order to provide more reliable results, use link-time
optimization to inline the ring implementation in the guest/host
threads.  The second and third patch then are a reimplementation of
https://marc.info/?l=kvm&m=147428514903134&w=2 for all ring structures,
saving on code size by removing separate implementations of poll_avail
and poll_used.

Here are the results of the benchmarks:

	0_9		inorder		poll		new
base    0m10.019s       0m8.001s        0m8.724s        0m7.991s
LTO     0m8.063s        0m6.636s        0m7.106s        0m7.201s
havebuf 0m7.969s        0m6.601s        0m7.097s        0m7.054s


Paolo Bonzini (3):
  ringtest: use link-time optimization
  ringtest: commonize implementation of poll_avail/poll_used
  ringtest: poll for new buffers once before updating event index

 tools/virtio/ringtest/Makefile          |  4 +--
 tools/virtio/ringtest/main.c            | 20 ++++++++---
 tools/virtio/ringtest/main.h            |  4 +--
 tools/virtio/ringtest/noring.c          |  6 ++--
 tools/virtio/ringtest/ptr_ring.c        | 22 +++---------
 tools/virtio/ringtest/ring.c            | 18 ++++------
 tools/virtio/ringtest/virtio_ring_0_9.c | 64 ++++++++-------------------------
 7 files changed, 49 insertions(+), 89 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] ringtest: use link-time optimization
  2016-10-06  9:39 [PATCH 0/3] ringtest: performance optimization Paolo Bonzini
@ 2016-10-06  9:39 ` Paolo Bonzini
  2016-10-06  9:39 ` [PATCH 2/3] ringtest: commonize implementation of poll_avail/poll_used Paolo Bonzini
  2016-10-06  9:39 ` [PATCH 3/3] ringtest: poll for new buffers once before updating event index Paolo Bonzini
  2 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-10-06  9:39 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: mst

By using -flto and -fwhole-program, all functions from the ring implementation
can be treated as static and possibly inlined.  Force this to happen through
the GCC flatten attribute.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/virtio/ringtest/Makefile | 4 ++--
 tools/virtio/ringtest/main.c   | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
index 877a8a4721b6..c012edbdb13b 100644
--- a/tools/virtio/ringtest/Makefile
+++ b/tools/virtio/ringtest/Makefile
@@ -3,8 +3,8 @@ all:
 all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring
 
 CFLAGS += -Wall
-CFLAGS += -pthread -O2 -ggdb
-LDFLAGS += -pthread -O2 -ggdb
+CFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
+LDFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
 
 main.o: main.c main.h
 ring.o: ring.c main.h
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
index 147abb452a6c..bda7f0dad981 100644
--- a/tools/virtio/ringtest/main.c
+++ b/tools/virtio/ringtest/main.c
@@ -96,7 +96,7 @@ void set_affinity(const char *arg)
 	assert(!ret);
 }
 
-static void run_guest(void)
+static void __attribute__((__flatten__)) run_guest(void)
 {
 	int completed_before;
 	int completed = 0;
@@ -149,7 +149,7 @@ static void run_guest(void)
 	}
 }
 
-static void run_host(void)
+static void __attribute__((__flatten__)) run_host(void)
 {
 	int completed_before;
 	int completed = 0;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] ringtest: commonize implementation of poll_avail/poll_used
  2016-10-06  9:39 [PATCH 0/3] ringtest: performance optimization Paolo Bonzini
  2016-10-06  9:39 ` [PATCH 1/3] ringtest: use link-time optimization Paolo Bonzini
@ 2016-10-06  9:39 ` Paolo Bonzini
  2016-10-06  9:39 ` [PATCH 3/3] ringtest: poll for new buffers once before updating event index Paolo Bonzini
  2 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-10-06  9:39 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: mst

Provide new primitives used_empty/avail_empty and
build poll_avail/poll_used on top of it.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/virtio/ringtest/main.c            | 12 +++++++
 tools/virtio/ringtest/main.h            |  4 +--
 tools/virtio/ringtest/noring.c          |  6 ++--
 tools/virtio/ringtest/ptr_ring.c        | 22 +++---------
 tools/virtio/ringtest/ring.c            | 18 ++++------
 tools/virtio/ringtest/virtio_ring_0_9.c | 64 ++++++++-------------------------
 6 files changed, 43 insertions(+), 83 deletions(-)

diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
index bda7f0dad981..ac1269f87394 100644
--- a/tools/virtio/ringtest/main.c
+++ b/tools/virtio/ringtest/main.c
@@ -96,6 +96,12 @@ void set_affinity(const char *arg)
 	assert(!ret);
 }
 
+void poll_used(void)
+{
+	while (used_empty())
+		busy_wait();
+}
+
 static void __attribute__((__flatten__)) run_guest(void)
 {
 	int completed_before;
@@ -149,6 +155,12 @@ static void __attribute__((__flatten__)) run_guest(void)
 	}
 }
 
+void poll_avail(void)
+{
+	while (avail_empty())
+		busy_wait();
+}
+
 static void __attribute__((__flatten__)) run_host(void)
 {
 	int completed_before;
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 16917acb0ade..c373ca870322 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -56,15 +56,15 @@ void alloc_ring(void);
 int add_inbuf(unsigned, void *, void *);
 void *get_buf(unsigned *, void **);
 void disable_call();
+bool used_empty();
 bool enable_call();
 void kick_available();
-void poll_used();
 /* host side */
 void disable_kick();
+bool avail_empty();
 bool enable_kick();
 bool use_buf(unsigned *, void **);
 void call_used();
-void poll_avail();
 
 /* implemented by main */
 extern bool do_sleep;
diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c
index eda2f4824130..ad47f90aad3f 100644
--- a/tools/virtio/ringtest/noring.c
+++ b/tools/virtio/ringtest/noring.c
@@ -24,8 +24,9 @@ void *get_buf(unsigned *lenp, void **bufp)
 	return "Buffer";
 }
 
-void poll_used(void)
+bool used_empty()
 {
+	return false;
 }
 
 void disable_call()
@@ -54,8 +55,9 @@ bool enable_kick()
 	assert(0);
 }
 
-void poll_avail(void)
+bool avail_empty()
 {
+	return false;
 }
 
 bool use_buf(unsigned *lenp, void **bufp)
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index bd2ad1d3b7a9..6f31cf84640a 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -133,18 +133,9 @@ void *get_buf(unsigned *lenp, void **bufp)
 	return datap;
 }
 
-void poll_used(void)
+bool used_empty()
 {
-	void *b;
-
-	do {
-		if (tailcnt == headcnt || __ptr_ring_full(&array)) {
-			b = NULL;
-			barrier();
-		} else {
-			b = "Buffer\n";
-		}
-	} while (!b);
+	return (tailcnt == headcnt || __ptr_ring_full(&array));
 }
 
 void disable_call()
@@ -173,14 +164,9 @@ bool enable_kick()
 	assert(0);
 }
 
-void poll_avail(void)
+bool avail_empty()
 {
-	void *b;
-
-	do {
-		barrier();
-		b = __ptr_ring_peek(&array);
-	} while (!b);
+	return !__ptr_ring_peek(&array);
 }
 
 bool use_buf(unsigned *lenp, void **bufp)
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
index c25c8d248b6b..d9a571bbb22d 100644
--- a/tools/virtio/ringtest/ring.c
+++ b/tools/virtio/ringtest/ring.c
@@ -163,12 +163,11 @@ void *get_buf(unsigned *lenp, void **bufp)
 	return datap;
 }
 
-void poll_used(void)
+bool used_empty()
 {
 	unsigned head = (ring_size - 1) & guest.last_used_idx;
 
-	while (ring[head].flags & DESC_HW)
-		busy_wait();
+	return (ring[head].flags & DESC_HW);
 }
 
 void disable_call()
@@ -180,13 +179,11 @@ void disable_call()
 
 bool enable_call()
 {
-	unsigned head = (ring_size - 1) & guest.last_used_idx;
-
 	event->call_index = guest.last_used_idx;
 	/* Flush call index write */
 	/* Barrier D (for pairing) */
 	smp_mb();
-	return ring[head].flags & DESC_HW;
+	return used_empty();
 }
 
 void kick_available(void)
@@ -213,20 +210,17 @@ void disable_kick()
 
 bool enable_kick()
 {
-	unsigned head = (ring_size - 1) & host.used_idx;
-
 	event->kick_index = host.used_idx;
 	/* Barrier C (for pairing) */
 	smp_mb();
-	return !(ring[head].flags & DESC_HW);
+	return avail_empty();
 }
 
-void poll_avail(void)
+bool avail_empty()
 {
 	unsigned head = (ring_size - 1) & host.used_idx;
 
-	while (!(ring[head].flags & DESC_HW))
-		busy_wait();
+	return !(ring[head].flags & DESC_HW);
 }
 
 bool use_buf(unsigned *lenp, void **bufp)
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
index 761866212aac..40b2e7e94e68 100644
--- a/tools/virtio/ringtest/virtio_ring_0_9.c
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -194,24 +194,16 @@ void *get_buf(unsigned *lenp, void **bufp)
 	return datap;
 }
 
-void poll_used(void)
+bool used_empty()
 {
+	unsigned short last_used_idx = guest.last_used_idx;
 #ifdef RING_POLL
-	unsigned head = (ring_size - 1) & guest.last_used_idx;
+	unsigned short head = last_used_idx & (ring_size - 1);
+	unsigned index = ring.used->ring[head].id;
 
-	for (;;) {
-		unsigned index = ring.used->ring[head].id;
-
-		if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
-			busy_wait();
-		else
-			break;
-	}
+	return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
 #else
-	unsigned head = guest.last_used_idx;
-
-	while (ring.used->idx == head)
-		busy_wait();
+	return ring.used->idx == last_used_idx;
 #endif
 }
 
@@ -224,22 +216,11 @@ void disable_call()
 
 bool enable_call()
 {
-	unsigned short last_used_idx;
-
-	vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+	vring_used_event(&ring) = guest.last_used_idx;
 	/* Flush call index write */
 	/* Barrier D (for pairing) */
 	smp_mb();
-#ifdef RING_POLL
-	{
-		unsigned short head = last_used_idx & (ring_size - 1);
-		unsigned index = ring.used->ring[head].id;
-
-		return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
-	}
-#else
-	return ring.used->idx == last_used_idx;
-#endif
+	return used_empty();
 }
 
 void kick_available(void)
@@ -266,36 +247,21 @@ void disable_kick()
 
 bool enable_kick()
 {
-	unsigned head = host.used_idx;
-
-	vring_avail_event(&ring) = head;
+	vring_avail_event(&ring) = host.used_idx;
 	/* Barrier C (for pairing) */
 	smp_mb();
-#ifdef RING_POLL
-	{
-		unsigned index = ring.avail->ring[head & (ring_size - 1)];
-
-		return (index ^ head ^ 0x8000) & ~(ring_size - 1);
-	}
-#else
-	return head == ring.avail->idx;
-#endif
+	return avail_empty();
 }
 
-void poll_avail(void)
+bool avail_empty()
 {
 	unsigned head = host.used_idx;
 #ifdef RING_POLL
-	for (;;) {
-		unsigned index = ring.avail->ring[head & (ring_size - 1)];
-		if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
-			busy_wait();
-		else
-			break;
-	}
+	unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+	return ((index ^ head ^ 0x8000) & ~(ring_size - 1));
 #else
-	while (ring.avail->idx == head)
-		busy_wait();
+	return head == ring.avail->idx;
 #endif
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] ringtest: poll for new buffers once before updating event index
  2016-10-06  9:39 [PATCH 0/3] ringtest: performance optimization Paolo Bonzini
  2016-10-06  9:39 ` [PATCH 1/3] ringtest: use link-time optimization Paolo Bonzini
  2016-10-06  9:39 ` [PATCH 2/3] ringtest: commonize implementation of poll_avail/poll_used Paolo Bonzini
@ 2016-10-06  9:39 ` Paolo Bonzini
  2 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-10-06  9:39 UTC (permalink / raw)
  To: linux-kernel, kvm; +Cc: mst

Updating the event index has a memory barrier and causes more work
on the other side to actually signal the event.  It is unnecessary
if a new buffer has already appeared on the ring, so poll once before
doing the update.

The effect of this on the 0.9 ring implementation is pretty much
invisible, but on the new-style ring it provides a consistent 3%
performance improvement.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/virtio/ringtest/main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
index ac1269f87394..19543799c621 100644
--- a/tools/virtio/ringtest/main.c
+++ b/tools/virtio/ringtest/main.c
@@ -147,7 +147,7 @@ static void __attribute__((__flatten__)) run_guest(void)
 		assert(completed <= bufs);
 		assert(started <= bufs);
 		if (do_sleep) {
-			if (enable_call())
+			if (used_empty() && enable_call())
 				wait_for_call();
 		} else {
 			poll_used();
@@ -172,7 +172,7 @@ static void __attribute__((__flatten__)) run_host(void)
 
 	for (;;) {
 		if (do_sleep) {
-			if (enable_kick())
+			if (avail_empty() && enable_kick())
 				wait_for_kick();
 		} else {
 			poll_avail();
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-10-06  9:39 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-06  9:39 [PATCH 0/3] ringtest: performance optimization Paolo Bonzini
2016-10-06  9:39 ` [PATCH 1/3] ringtest: use link-time optimization Paolo Bonzini
2016-10-06  9:39 ` [PATCH 2/3] ringtest: commonize implementation of poll_avail/poll_used Paolo Bonzini
2016-10-06  9:39 ` [PATCH 3/3] ringtest: poll for new buffers once before updating event index Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.