* [PATCH 0/3] ringtest: performance optimization
@ 2016-10-06 9:39 Paolo Bonzini
2016-10-06 9:39 ` [PATCH 1/3] ringtest: use link-time optimization Paolo Bonzini
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-10-06 9:39 UTC (permalink / raw)
To: linux-kernel, kvm; +Cc: mst
In order to provide more reliable results, use link-time
optimization to inline the ring implementation in the guest/host
threads. The second and third patch then are a reimplementation of
https://marc.info/?l=kvm&m=147428514903134&w=2 for all ring structures,
saving on code size by removing separate implementations of poll_avail
and poll_used.
Here are the results of the benchmarks:
0_9 inorder poll new
base 0m10.019s 0m8.001s 0m8.724s 0m7.991s
LTO 0m8.063s 0m6.636s 0m7.106s 0m7.201s
havebuf 0m7.969s 0m6.601s 0m7.097s 0m7.054s
Paolo Bonzini (3):
ringtest: use link-time optimization
ringtest: commonize implementation of poll_avail/poll_used
ringtest: poll for new buffers once before updating event index
tools/virtio/ringtest/Makefile | 4 +--
tools/virtio/ringtest/main.c | 20 ++++++++---
tools/virtio/ringtest/main.h | 4 +--
tools/virtio/ringtest/noring.c | 6 ++--
tools/virtio/ringtest/ptr_ring.c | 22 +++---------
tools/virtio/ringtest/ring.c | 18 ++++------
tools/virtio/ringtest/virtio_ring_0_9.c | 64 ++++++++-------------------------
7 files changed, 49 insertions(+), 89 deletions(-)
--
2.7.4
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/3] ringtest: use link-time optimization
2016-10-06 9:39 [PATCH 0/3] ringtest: performance optimization Paolo Bonzini
@ 2016-10-06 9:39 ` Paolo Bonzini
2016-10-06 9:39 ` [PATCH 2/3] ringtest: commonize implementation of poll_avail/poll_used Paolo Bonzini
2016-10-06 9:39 ` [PATCH 3/3] ringtest: poll for new buffers once before updating event index Paolo Bonzini
2 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-10-06 9:39 UTC (permalink / raw)
To: linux-kernel, kvm; +Cc: mst
By using -flto and -fwhole-program, all functions from the ring implementation
can be treated as static and possibly inlined. Force this to happen through
the GCC flatten attribute.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
tools/virtio/ringtest/Makefile | 4 ++--
tools/virtio/ringtest/main.c | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/tools/virtio/ringtest/Makefile b/tools/virtio/ringtest/Makefile
index 877a8a4721b6..c012edbdb13b 100644
--- a/tools/virtio/ringtest/Makefile
+++ b/tools/virtio/ringtest/Makefile
@@ -3,8 +3,8 @@ all:
all: ring virtio_ring_0_9 virtio_ring_poll virtio_ring_inorder ptr_ring noring
CFLAGS += -Wall
-CFLAGS += -pthread -O2 -ggdb
-LDFLAGS += -pthread -O2 -ggdb
+CFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
+LDFLAGS += -pthread -O2 -ggdb -flto -fwhole-program
main.o: main.c main.h
ring.o: ring.c main.h
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
index 147abb452a6c..bda7f0dad981 100644
--- a/tools/virtio/ringtest/main.c
+++ b/tools/virtio/ringtest/main.c
@@ -96,7 +96,7 @@ void set_affinity(const char *arg)
assert(!ret);
}
-static void run_guest(void)
+static void __attribute__((__flatten__)) run_guest(void)
{
int completed_before;
int completed = 0;
@@ -149,7 +149,7 @@ static void run_guest(void)
}
}
-static void run_host(void)
+static void __attribute__((__flatten__)) run_host(void)
{
int completed_before;
int completed = 0;
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] ringtest: commonize implementation of poll_avail/poll_used
2016-10-06 9:39 [PATCH 0/3] ringtest: performance optimization Paolo Bonzini
2016-10-06 9:39 ` [PATCH 1/3] ringtest: use link-time optimization Paolo Bonzini
@ 2016-10-06 9:39 ` Paolo Bonzini
2016-10-06 9:39 ` [PATCH 3/3] ringtest: poll for new buffers once before updating event index Paolo Bonzini
2 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-10-06 9:39 UTC (permalink / raw)
To: linux-kernel, kvm; +Cc: mst
Provide new primitives used_empty/avail_empty and
build poll_avail/poll_used on top of it.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
tools/virtio/ringtest/main.c | 12 +++++++
tools/virtio/ringtest/main.h | 4 +--
tools/virtio/ringtest/noring.c | 6 ++--
tools/virtio/ringtest/ptr_ring.c | 22 +++---------
tools/virtio/ringtest/ring.c | 18 ++++------
tools/virtio/ringtest/virtio_ring_0_9.c | 64 ++++++++-------------------------
6 files changed, 43 insertions(+), 83 deletions(-)
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
index bda7f0dad981..ac1269f87394 100644
--- a/tools/virtio/ringtest/main.c
+++ b/tools/virtio/ringtest/main.c
@@ -96,6 +96,12 @@ void set_affinity(const char *arg)
assert(!ret);
}
+void poll_used(void)
+{
+ while (used_empty())
+ busy_wait();
+}
+
static void __attribute__((__flatten__)) run_guest(void)
{
int completed_before;
@@ -149,6 +155,12 @@ static void __attribute__((__flatten__)) run_guest(void)
}
}
+void poll_avail(void)
+{
+ while (avail_empty())
+ busy_wait();
+}
+
static void __attribute__((__flatten__)) run_host(void)
{
int completed_before;
diff --git a/tools/virtio/ringtest/main.h b/tools/virtio/ringtest/main.h
index 16917acb0ade..c373ca870322 100644
--- a/tools/virtio/ringtest/main.h
+++ b/tools/virtio/ringtest/main.h
@@ -56,15 +56,15 @@ void alloc_ring(void);
int add_inbuf(unsigned, void *, void *);
void *get_buf(unsigned *, void **);
void disable_call();
+bool used_empty();
bool enable_call();
void kick_available();
-void poll_used();
/* host side */
void disable_kick();
+bool avail_empty();
bool enable_kick();
bool use_buf(unsigned *, void **);
void call_used();
-void poll_avail();
/* implemented by main */
extern bool do_sleep;
diff --git a/tools/virtio/ringtest/noring.c b/tools/virtio/ringtest/noring.c
index eda2f4824130..ad47f90aad3f 100644
--- a/tools/virtio/ringtest/noring.c
+++ b/tools/virtio/ringtest/noring.c
@@ -24,8 +24,9 @@ void *get_buf(unsigned *lenp, void **bufp)
return "Buffer";
}
-void poll_used(void)
+bool used_empty()
{
+ return false;
}
void disable_call()
@@ -54,8 +55,9 @@ bool enable_kick()
assert(0);
}
-void poll_avail(void)
+bool avail_empty()
{
+ return false;
}
bool use_buf(unsigned *lenp, void **bufp)
diff --git a/tools/virtio/ringtest/ptr_ring.c b/tools/virtio/ringtest/ptr_ring.c
index bd2ad1d3b7a9..6f31cf84640a 100644
--- a/tools/virtio/ringtest/ptr_ring.c
+++ b/tools/virtio/ringtest/ptr_ring.c
@@ -133,18 +133,9 @@ void *get_buf(unsigned *lenp, void **bufp)
return datap;
}
-void poll_used(void)
+bool used_empty()
{
- void *b;
-
- do {
- if (tailcnt == headcnt || __ptr_ring_full(&array)) {
- b = NULL;
- barrier();
- } else {
- b = "Buffer\n";
- }
- } while (!b);
+ return (tailcnt == headcnt || __ptr_ring_full(&array));
}
void disable_call()
@@ -173,14 +164,9 @@ bool enable_kick()
assert(0);
}
-void poll_avail(void)
+bool avail_empty()
{
- void *b;
-
- do {
- barrier();
- b = __ptr_ring_peek(&array);
- } while (!b);
+ return !__ptr_ring_peek(&array);
}
bool use_buf(unsigned *lenp, void **bufp)
diff --git a/tools/virtio/ringtest/ring.c b/tools/virtio/ringtest/ring.c
index c25c8d248b6b..d9a571bbb22d 100644
--- a/tools/virtio/ringtest/ring.c
+++ b/tools/virtio/ringtest/ring.c
@@ -163,12 +163,11 @@ void *get_buf(unsigned *lenp, void **bufp)
return datap;
}
-void poll_used(void)
+bool used_empty()
{
unsigned head = (ring_size - 1) & guest.last_used_idx;
- while (ring[head].flags & DESC_HW)
- busy_wait();
+ return (ring[head].flags & DESC_HW);
}
void disable_call()
@@ -180,13 +179,11 @@ void disable_call()
bool enable_call()
{
- unsigned head = (ring_size - 1) & guest.last_used_idx;
-
event->call_index = guest.last_used_idx;
/* Flush call index write */
/* Barrier D (for pairing) */
smp_mb();
- return ring[head].flags & DESC_HW;
+ return used_empty();
}
void kick_available(void)
@@ -213,20 +210,17 @@ void disable_kick()
bool enable_kick()
{
- unsigned head = (ring_size - 1) & host.used_idx;
-
event->kick_index = host.used_idx;
/* Barrier C (for pairing) */
smp_mb();
- return !(ring[head].flags & DESC_HW);
+ return avail_empty();
}
-void poll_avail(void)
+bool avail_empty()
{
unsigned head = (ring_size - 1) & host.used_idx;
- while (!(ring[head].flags & DESC_HW))
- busy_wait();
+ return !(ring[head].flags & DESC_HW);
}
bool use_buf(unsigned *lenp, void **bufp)
diff --git a/tools/virtio/ringtest/virtio_ring_0_9.c b/tools/virtio/ringtest/virtio_ring_0_9.c
index 761866212aac..40b2e7e94e68 100644
--- a/tools/virtio/ringtest/virtio_ring_0_9.c
+++ b/tools/virtio/ringtest/virtio_ring_0_9.c
@@ -194,24 +194,16 @@ void *get_buf(unsigned *lenp, void **bufp)
return datap;
}
-void poll_used(void)
+bool used_empty()
{
+ unsigned short last_used_idx = guest.last_used_idx;
#ifdef RING_POLL
- unsigned head = (ring_size - 1) & guest.last_used_idx;
+ unsigned short head = last_used_idx & (ring_size - 1);
+ unsigned index = ring.used->ring[head].id;
- for (;;) {
- unsigned index = ring.used->ring[head].id;
-
- if ((index ^ guest.last_used_idx ^ 0x8000) & ~(ring_size - 1))
- busy_wait();
- else
- break;
- }
+ return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
#else
- unsigned head = guest.last_used_idx;
-
- while (ring.used->idx == head)
- busy_wait();
+ return ring.used->idx == last_used_idx;
#endif
}
@@ -224,22 +216,11 @@ void disable_call()
bool enable_call()
{
- unsigned short last_used_idx;
-
- vring_used_event(&ring) = (last_used_idx = guest.last_used_idx);
+ vring_used_event(&ring) = guest.last_used_idx;
/* Flush call index write */
/* Barrier D (for pairing) */
smp_mb();
-#ifdef RING_POLL
- {
- unsigned short head = last_used_idx & (ring_size - 1);
- unsigned index = ring.used->ring[head].id;
-
- return (index ^ last_used_idx ^ 0x8000) & ~(ring_size - 1);
- }
-#else
- return ring.used->idx == last_used_idx;
-#endif
+ return used_empty();
}
void kick_available(void)
@@ -266,36 +247,21 @@ void disable_kick()
bool enable_kick()
{
- unsigned head = host.used_idx;
-
- vring_avail_event(&ring) = head;
+ vring_avail_event(&ring) = host.used_idx;
/* Barrier C (for pairing) */
smp_mb();
-#ifdef RING_POLL
- {
- unsigned index = ring.avail->ring[head & (ring_size - 1)];
-
- return (index ^ head ^ 0x8000) & ~(ring_size - 1);
- }
-#else
- return head == ring.avail->idx;
-#endif
+ return avail_empty();
}
-void poll_avail(void)
+bool avail_empty()
{
unsigned head = host.used_idx;
#ifdef RING_POLL
- for (;;) {
- unsigned index = ring.avail->ring[head & (ring_size - 1)];
- if ((index ^ head ^ 0x8000) & ~(ring_size - 1))
- busy_wait();
- else
- break;
- }
+ unsigned index = ring.avail->ring[head & (ring_size - 1)];
+
+ return ((index ^ head ^ 0x8000) & ~(ring_size - 1));
#else
- while (ring.avail->idx == head)
- busy_wait();
+ return head == ring.avail->idx;
#endif
}
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] ringtest: poll for new buffers once before updating event index
2016-10-06 9:39 [PATCH 0/3] ringtest: performance optimization Paolo Bonzini
2016-10-06 9:39 ` [PATCH 1/3] ringtest: use link-time optimization Paolo Bonzini
2016-10-06 9:39 ` [PATCH 2/3] ringtest: commonize implementation of poll_avail/poll_used Paolo Bonzini
@ 2016-10-06 9:39 ` Paolo Bonzini
2 siblings, 0 replies; 4+ messages in thread
From: Paolo Bonzini @ 2016-10-06 9:39 UTC (permalink / raw)
To: linux-kernel, kvm; +Cc: mst
Updating the event index has a memory barrier and causes more work
on the other side to actually signal the event. It is unnecessary
if a new buffer has already appeared on the ring, so poll once before
doing the update.
The effect of this on the 0.9 ring implementation is pretty much
invisible, but on the new-style ring it provides a consistent 3%
performance improvement.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
tools/virtio/ringtest/main.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c
index ac1269f87394..19543799c621 100644
--- a/tools/virtio/ringtest/main.c
+++ b/tools/virtio/ringtest/main.c
@@ -147,7 +147,7 @@ static void __attribute__((__flatten__)) run_guest(void)
assert(completed <= bufs);
assert(started <= bufs);
if (do_sleep) {
- if (enable_call())
+ if (used_empty() && enable_call())
wait_for_call();
} else {
poll_used();
@@ -172,7 +172,7 @@ static void __attribute__((__flatten__)) run_host(void)
for (;;) {
if (do_sleep) {
- if (enable_kick())
+ if (avail_empty() && enable_kick())
wait_for_kick();
} else {
poll_avail();
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-10-06 9:39 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-06 9:39 [PATCH 0/3] ringtest: performance optimization Paolo Bonzini
2016-10-06 9:39 ` [PATCH 1/3] ringtest: use link-time optimization Paolo Bonzini
2016-10-06 9:39 ` [PATCH 2/3] ringtest: commonize implementation of poll_avail/poll_used Paolo Bonzini
2016-10-06 9:39 ` [PATCH 3/3] ringtest: poll for new buffers once before updating event index Paolo Bonzini
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.