[Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset

All of lore.kernel.org
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset
@ 2015-05-05 22:22 John Snow
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 1/5] qtest: allow arbitrarily long sends John Snow
                   ` (5 more replies)
  0 siblings, 6 replies; 19+ messages in thread
From: John Snow @ 2015-05-05 22:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, John Snow, afaerber

Adds new qtest protocol commands for base64 reads and writes,
as well as a proper command for memset instead of faking it
via write.

This improves the ahci-test performance on my machine from about
14 seconds to about ~3.5.

v3:
 - Including a memset optimization.
v2:
 - Resend as non-RFC.

==
For convenience, this branch is available at:
https://github.com/jnsnow/qemu.git branch qtest_protocol
https://github.com/jnsnow/qemu/tree/qtest_protocol

This version is tagged qtest_protocol-v3:
https://github.com/jnsnow/qemu/releases/tag/qtest_protocol-v3
==

John Snow (5):
  qtest: allow arbitrarily long sends
  qtest: Add base64 encoded read/write
  qtest: add memset to qtest protocol
  qtest: precompute hex nibs
  libqos/ahci: Swap memread/write with bufread/write

 qtest.c             | 147 +++++++++++++++++++++++++++++++++++++++++++++-------
 tests/ahci-test.c   |   8 +--
 tests/libqos/ahci.c |   4 +-
 tests/libqtest.c    |  56 ++++++++++++++++----
 tests/libqtest.h    |  49 ++++++++++++++++++
 5 files changed, 230 insertions(+), 34 deletions(-)

-- 
2.1.0

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v3 1/5] qtest: allow arbitrarily long sends
  2015-05-05 22:22 [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset John Snow
@ 2015-05-05 22:22 ` John Snow
  2015-05-05 23:22   ` Eric Blake
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 2/5] qtest: Add base64 encoded read/write John Snow
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 19+ messages in thread
From: John Snow @ 2015-05-05 22:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, John Snow, afaerber

qtest currently has a static buffer of size 1024 that if we
overflow, ignores the additional data silently which leads
to hangs or stream failures.

Use glib's string facilities to allow arbitrarily long data,
but split this off into a new function, qtest_sendf.

Static data can still be sent using qtest_send, which avoids
the malloc/copy overflow.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 qtest.c | 46 ++++++++++++++++++++++++++++------------------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/qtest.c b/qtest.c
index 8d1e66c..f635a48 100644
--- a/qtest.c
+++ b/qtest.c
@@ -182,21 +182,29 @@ static void qtest_send_prefix(CharDriverState *chr)
             (long) tv.tv_sec, (long) tv.tv_usec);
 }
 
-static void GCC_FMT_ATTR(2, 3) qtest_send(CharDriverState *chr,
-                                          const char *fmt, ...)
+static void do_qtest_send(CharDriverState *chr, const char *str, size_t len)
+{
+    qemu_chr_fe_write_all(chr, (uint8_t *)str, len);
+    if (qtest_log_fp && qtest_opened) {
+        fprintf(qtest_log_fp, "%s", str);
+    }
+}
+
+static void qtest_send(CharDriverState *chr, const char *str)
+{
+    do_qtest_send(chr, str, strlen(str));
+}
+
+static void GCC_FMT_ATTR(2, 3) qtest_sendf(CharDriverState *chr,
+                                           const char *fmt, ...)
 {
     va_list ap;
-    char buffer[1024];
-    size_t len;
+    gchar *buffer;
 
     va_start(ap, fmt);
-    len = vsnprintf(buffer, sizeof(buffer), fmt, ap);
+    buffer = g_strdup_vprintf(fmt, ap);
+    qtest_send(chr, buffer);
     va_end(ap);
-
-    qemu_chr_fe_write_all(chr, (uint8_t *)buffer, len);
-    if (qtest_log_fp && qtest_opened) {
-        fprintf(qtest_log_fp, "%s", buffer);
-    }
 }
 
 static void qtest_irq_handler(void *opaque, int n, int level)
@@ -208,8 +216,8 @@ static void qtest_irq_handler(void *opaque, int n, int level)
         CharDriverState *chr = qtest_chr;
         irq_levels[n] = level;
         qtest_send_prefix(chr);
-        qtest_send(chr, "IRQ %s %d\n",
-                   level ? "raise" : "lower", n);
+        qtest_sendf(chr, "IRQ %s %d\n",
+                    level ? "raise" : "lower", n);
     }
 }
 
@@ -318,7 +326,7 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
             value = cpu_inl(addr);
         }
         qtest_send_prefix(chr);
-        qtest_send(chr, "OK 0x%04x\n", value);
+        qtest_sendf(chr, "OK 0x%04x\n", value);
     } else if (strcmp(words[0], "writeb") == 0 ||
                strcmp(words[0], "writew") == 0 ||
                strcmp(words[0], "writel") == 0 ||
@@ -375,7 +383,7 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
             tswap64s(&value);
         }
         qtest_send_prefix(chr);
-        qtest_send(chr, "OK 0x%016" PRIx64 "\n", value);
+        qtest_sendf(chr, "OK 0x%016" PRIx64 "\n", value);
     } else if (strcmp(words[0], "read") == 0) {
         uint64_t addr, len, i;
         uint8_t *data;
@@ -390,7 +398,7 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         qtest_send_prefix(chr);
         qtest_send(chr, "OK 0x");
         for (i = 0; i < len; i++) {
-            qtest_send(chr, "%02x", data[i]);
+            qtest_sendf(chr, "%02x", data[i]);
         }
         qtest_send(chr, "\n");
 
@@ -434,7 +442,8 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         }
         qtest_clock_warp(qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + ns);
         qtest_send_prefix(chr);
-        qtest_send(chr, "OK %"PRIi64"\n", (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+        qtest_sendf(chr, "OK %"PRIi64"\n",
+                    (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
     } else if (qtest_enabled() && strcmp(words[0], "clock_set") == 0) {
         int64_t ns;
 
@@ -442,10 +451,11 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         ns = strtoll(words[1], NULL, 0);
         qtest_clock_warp(ns);
         qtest_send_prefix(chr);
-        qtest_send(chr, "OK %"PRIi64"\n", (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
+        qtest_sendf(chr, "OK %"PRIi64"\n",
+                    (int64_t)qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL));
     } else {
         qtest_send_prefix(chr);
-        qtest_send(chr, "FAIL Unknown command `%s'\n", words[0]);
+        qtest_sendf(chr, "FAIL Unknown command `%s'\n", words[0]);
     }
 }
 
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v3 2/5] qtest: Add base64 encoded read/write
  2015-05-05 22:22 [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset John Snow
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 1/5] qtest: allow arbitrarily long sends John Snow
@ 2015-05-05 22:22 ` John Snow
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 3/5] qtest: add memset to qtest protocol John Snow
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 19+ messages in thread
From: John Snow @ 2015-05-05 22:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, John Snow, afaerber

For larger pieces of data that won't need to be debugged and
viewing the hex nibbles is unlikely to be useful, we can encode
data using base64 instead of encoding each byte as %02x, which
leads to some space savings and faster reads/writes.

For now, the default is left as hex nibbles in memwrite() and memread().
For the purposes of making qtest io easier to read and debug, some
callers may want to specify using the old encoding format for small
patches of data where the savings from base64 wouldn't be that profound.

memwrite/memread use a data encoding that takes 2x the size of the original
buffer, but base64 uses "only" (4/3)x, so for larger buffers we can save a
decent amount of time and space.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 qtest.c          | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tests/libqtest.c | 31 +++++++++++++++++++++++++
 tests/libqtest.h | 49 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 149 insertions(+)

diff --git a/qtest.c b/qtest.c
index f635a48..fd85e04 100644
--- a/qtest.c
+++ b/qtest.c
@@ -119,12 +119,21 @@ static bool qtest_opened;
  *  > write ADDR SIZE DATA
  *  < OK
  *
+ *  > b64read ADDR SIZE
+ *  < OK B64_DATA
+ *
+ *  > b64write ADDR SIZE B64_DATA
+ *  < OK
+ *
  * ADDR, SIZE, VALUE are all integers parsed with strtoul() with a base of 0.
  *
  * DATA is an arbitrarily long hex number prefixed with '0x'.  If it's smaller
  * than the expected size, the value will be zero filled at the end of the data
  * sequence.
  *
+ * B64_DATA is an arbitrarily long base64 encoded string.
+ * If the sizes do not match, the data will be truncated.
+ *
  * IRQ management:
  *
  *  > irq_intercept_in QOM-PATH
@@ -182,6 +191,21 @@ static void qtest_send_prefix(CharDriverState *chr)
             (long) tv.tv_sec, (long) tv.tv_usec);
 }
 
+static void GCC_FMT_ATTR(1, 2) qtest_log_send(const char *fmt, ...)
+{
+    va_list ap;
+
+    if (!qtest_log_fp || !qtest_opened) {
+        return;
+    }
+
+    qtest_send_prefix(NULL);
+
+    va_start(ap, fmt);
+    vfprintf(qtest_log_fp, fmt, ap);
+    va_end(ap);
+}
+
 static void do_qtest_send(CharDriverState *chr, const char *str, size_t len)
 {
     qemu_chr_fe_write_all(chr, (uint8_t *)str, len);
@@ -403,6 +427,23 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         qtest_send(chr, "\n");
 
         g_free(data);
+    } else if (strcmp(words[0], "b64read") == 0) {
+        uint64_t addr, len;
+        uint8_t *data;
+        gchar *b64_data;
+
+        g_assert(words[1] && words[2]);
+        addr = strtoull(words[1], NULL, 0);
+        len = strtoull(words[2], NULL, 0);
+
+        data = g_malloc(len);
+        cpu_physical_memory_read(addr, data, len);
+        b64_data = g_base64_encode(data, len);
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "OK %s\n", b64_data);
+
+        g_free(data);
+        g_free(b64_data);
     } else if (strcmp(words[0], "write") == 0) {
         uint64_t addr, len, i;
         uint8_t *data;
@@ -432,6 +473,34 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
 
         qtest_send_prefix(chr);
         qtest_send(chr, "OK\n");
+    }  else if (strcmp(words[0], "b64write") == 0) {
+        uint64_t addr, len;
+        uint8_t *data;
+        size_t data_len;
+        gsize out_len;
+
+        g_assert(words[1] && words[2] && words[3]);
+        addr = strtoull(words[1], NULL, 0);
+        len = strtoull(words[2], NULL, 0);
+
+        data_len = strlen(words[3]);
+        if (data_len < 3) {
+            qtest_send(chr, "ERR invalid argument size\n");
+            return;
+        }
+
+        data = g_base64_decode_inplace(words[3], &out_len);
+        if (out_len != len) {
+            qtest_log_send("b64write: data length mismatch (told %zu, "
+                           "found %zu)\n",
+                           len, out_len);
+            out_len = MIN(out_len, len);
+        }
+
+        cpu_physical_memory_write(addr, data, out_len);
+
+        qtest_send_prefix(chr);
+        qtest_send(chr, "OK\n");
     } else if (qtest_enabled() && strcmp(words[0], "clock_step") == 0) {
         int64_t ns;
 
diff --git a/tests/libqtest.c b/tests/libqtest.c
index a525dc5..5f57005 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -695,6 +695,37 @@ void qtest_add_data_func(const char *str, const void *data, void (*fn))
     g_free(path);
 }
 
+void qtest_bufwrite(QTestState *s, uint64_t addr, const void *data, size_t size)
+{
+    gchar *bdata;
+
+    bdata = g_base64_encode(data, size);
+    qtest_sendf(s, "b64write 0x%" PRIx64 " 0x%zx ", addr, size);
+    socket_send(s->fd, bdata, strlen(bdata));
+    socket_send(s->fd, "\n", 1);
+    qtest_rsp(s, 0);
+    g_free(bdata);
+}
+
+void qtest_bufread(QTestState *s, uint64_t addr, void *data, size_t size)
+{
+    gchar **args;
+    size_t len;
+
+    qtest_sendf(s, "b64read 0x%" PRIx64 " 0x%zx\n", addr, size);
+    args = qtest_rsp(s, 2);
+
+    g_base64_decode_inplace(args[1], &len);
+    if (size != len) {
+        fprintf(stderr, "bufread: asked for %zu bytes but decoded %zu\n",
+                size, len);
+        len = MIN(len, size);
+    }
+
+    memcpy(data, args[1], len);
+    g_strfreev(args);
+}
+
 void qtest_memwrite(QTestState *s, uint64_t addr, const void *data, size_t size)
 {
     const uint8_t *ptr = data;
diff --git a/tests/libqtest.h b/tests/libqtest.h
index 4b54b5d..ec42031 100644
--- a/tests/libqtest.h
+++ b/tests/libqtest.h
@@ -301,6 +301,17 @@ uint64_t qtest_readq(QTestState *s, uint64_t addr);
 void qtest_memread(QTestState *s, uint64_t addr, void *data, size_t size);
 
 /**
+ * qtest_bufread:
+ * @s: #QTestState instance to operate on.
+ * @addr: Guest address to read from.
+ * @data: Pointer to where memory contents will be stored.
+ * @size: Number of bytes to read.
+ *
+ * Read guest memory into a buffer and receive using a base64 encoding.
+ */
+void qtest_bufread(QTestState *s, uint64_t addr, void *data, size_t size);
+
+/**
  * qtest_memwrite:
  * @s: #QTestState instance to operate on.
  * @addr: Guest address to write to.
@@ -312,6 +323,18 @@ void qtest_memread(QTestState *s, uint64_t addr, void *data, size_t size);
 void qtest_memwrite(QTestState *s, uint64_t addr, const void *data, size_t size);
 
 /**
+ * qtest_bufwrite:
+ * @s: #QTestState instance to operate on.
+ * @addr: Guest address to write to.
+ * @data: Pointer to the bytes that will be written to guest memory.
+ * @size: Number of bytes to write.
+ *
+ * Write a buffer to guest memory and transmit using a base64 encoding.
+ */
+void qtest_bufwrite(QTestState *s, uint64_t addr,
+                    const void *data, size_t size);
+
+/**
  * qtest_memset:
  * @s: #QTestState instance to operate on.
  * @addr: Guest address to write to.
@@ -699,6 +722,19 @@ static inline void memread(uint64_t addr, void *data, size_t size)
 }
 
 /**
+ * bufread:
+ * @addr: Guest address to read from.
+ * @data: Pointer to where memory contents will be stored.
+ * @size: Number of bytes to read.
+ *
+ * Read guest memory into a buffer, receive using a base64 encoding.
+ */
+static inline void bufread(uint64_t addr, void *data, size_t size)
+{
+    qtest_bufread(global_qtest, addr, data, size);
+}
+
+/**
  * memwrite:
  * @addr: Guest address to write to.
  * @data: Pointer to the bytes that will be written to guest memory.
@@ -712,6 +748,19 @@ static inline void memwrite(uint64_t addr, const void *data, size_t size)
 }
 
 /**
+ * bufwrite:
+ * @addr: Guest address to write to.
+ * @data: Pointer to the bytes that will be written to guest memory.
+ * @size: Number of bytes to write.
+ *
+ * Write a buffer to guest memory, transmit using a base64 encoding.
+ */
+static inline void bufwrite(uint64_t addr, const void *data, size_t size)
+{
+    qtest_bufwrite(global_qtest, addr, data, size);
+}
+
+/**
  * qmemset:
  * @addr: Guest address to write to.
  * @patt: Byte pattern to fill the guest memory region with.
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v3 3/5] qtest: add memset to qtest protocol
  2015-05-05 22:22 [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset John Snow
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 1/5] qtest: allow arbitrarily long sends John Snow
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 2/5] qtest: Add base64 encoded read/write John Snow
@ 2015-05-05 22:22 ` John Snow
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs John Snow
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 19+ messages in thread
From: John Snow @ 2015-05-05 22:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, John Snow, afaerber

Previously, memset was just a frontend to write() and only
stupidly sent the pattern many times across the wire.

Let's not discuss who stupidly wrote it like that in the first place.
(Hint: It was me.)

Signed-off-by: John Snow <jsnow@redhat.com>
---
 qtest.c          | 20 ++++++++++++++++++++
 tests/libqtest.c |  8 +-------
 2 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/qtest.c b/qtest.c
index fd85e04..c1a0493 100644
--- a/qtest.c
+++ b/qtest.c
@@ -125,6 +125,9 @@ static bool qtest_opened;
  *  > b64write ADDR SIZE B64_DATA
  *  < OK
  *
+ *  > memset ADDR SIZE VALUE
+ *  < OK
+ *
  * ADDR, SIZE, VALUE are all integers parsed with strtoul() with a base of 0.
  *
  * DATA is an arbitrarily long hex number prefixed with '0x'.  If it's smaller
@@ -473,6 +476,23 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
 
         qtest_send_prefix(chr);
         qtest_send(chr, "OK\n");
+    } else if (strcmp(words[0], "memset") == 0) {
+        uint64_t addr, len;
+        uint8_t *data;
+        uint8_t pattern;
+
+        g_assert(words[1] && words[2] && words[3]);
+        addr = strtoull(words[1], NULL, 0);
+        len = strtoull(words[2], NULL, 0);
+        pattern = strtoull(words[3], NULL, 0);
+
+        data = g_malloc(len);
+        memset(data, pattern, len);
+        cpu_physical_memory_write(addr, data, len);
+        g_free(data);
+
+        qtest_send_prefix(chr);
+        qtest_send(chr, "OK\n");
     }  else if (strcmp(words[0], "b64write") == 0) {
         uint64_t addr, len;
         uint8_t *data;
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 5f57005..055aad6 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -741,13 +741,7 @@ void qtest_memwrite(QTestState *s, uint64_t addr, const void *data, size_t size)
 
 void qtest_memset(QTestState *s, uint64_t addr, uint8_t pattern, size_t size)
 {
-    size_t i;
-
-    qtest_sendf(s, "write 0x%" PRIx64 " 0x%zx 0x", addr, size);
-    for (i = 0; i < size; i++) {
-        qtest_sendf(s, "%02x", pattern);
-    }
-    qtest_sendf(s, "\n");
+    qtest_sendf(s, "memset 0x%" PRIx64 " 0x%zx 0x%02x\n", addr, size, pattern);
     qtest_rsp(s, 0);
 }
 
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-05 22:22 [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset John Snow
                   ` (2 preceding siblings ...)
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 3/5] qtest: add memset to qtest protocol John Snow
@ 2015-05-05 22:22 ` John Snow
  2015-05-06  6:25   ` Markus Armbruster
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 5/5] libqos/ahci: Swap memread/write with bufread/write John Snow
  2015-05-06 14:56 ` [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset Paolo Bonzini
  5 siblings, 1 reply; 19+ messages in thread
From: John Snow @ 2015-05-05 22:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, John Snow, afaerber

Instead of letting printf and friends do this for us
one byte at a time, fill a buffer ourselves and then
send the entire buffer in one go.

This gives a moderate speed improvement over the old
method.

Signed-off-by: John Snow <jsnow@redhat.com>
---
 qtest.c          | 20 ++++++++++++++++----
 tests/libqtest.c | 17 ++++++++++++++---
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/qtest.c b/qtest.c
index c1a0493..383f9ed 100644
--- a/qtest.c
+++ b/qtest.c
@@ -157,6 +157,8 @@ static bool qtest_opened;
  * NUM=0 even though it is remapped to GSI 2).
  */
 
+static const char *hex = "0123456789abcdef";
+
 static int hex2nib(char ch)
 {
     if (ch >= '0' && ch <= '9') {
@@ -170,6 +172,12 @@ static int hex2nib(char ch)
     }
 }
 
+static inline void byte2hex(uint8_t byte, uint8_t *out)
+{
+    *out++ = hex[byte >> 4];
+    *out = hex[byte & 0x0F];
+}
+
 static void qtest_get_time(qemu_timeval *tv)
 {
     qemu_gettimeofday(tv);
@@ -414,6 +422,7 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
     } else if (strcmp(words[0], "read") == 0) {
         uint64_t addr, len, i;
         uint8_t *data;
+        uint8_t *enc;
 
         g_assert(words[1] && words[2]);
         addr = strtoull(words[1], NULL, 0);
@@ -422,14 +431,17 @@ static void qtest_process_command(CharDriverState *chr, gchar **words)
         data = g_malloc(len);
         cpu_physical_memory_read(addr, data, len);
 
-        qtest_send_prefix(chr);
-        qtest_send(chr, "OK 0x");
+        enc = g_malloc(2 * len + 1);
         for (i = 0; i < len; i++) {
-            qtest_sendf(chr, "%02x", data[i]);
+            byte2hex(data[i], &enc[i * 2]);
         }
-        qtest_send(chr, "\n");
+        enc[2 * len] = '\0';
+
+        qtest_send_prefix(chr);
+        qtest_sendf(chr, "OK 0x%s\n", enc);
 
         g_free(data);
+        g_free(enc);
     } else if (strcmp(words[0], "b64read") == 0) {
         uint64_t addr, len;
         uint8_t *data;
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 055aad6..1b246c9 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -651,6 +651,8 @@ uint64_t qtest_readq(QTestState *s, uint64_t addr)
     return qtest_read(s, "readq", addr);
 }
 
+static const char *hex = "0123456789abcdef";
+
 static int hex2nib(char ch)
 {
     if (ch >= '0' && ch <= '9') {
@@ -664,6 +666,12 @@ static int hex2nib(char ch)
     }
 }
 
+static inline void byte2hex(uint8_t byte, uint8_t *out)
+{
+    *out++ = hex[byte >> 4];
+    *out = hex[byte & 0x0f];
+}
+
 void qtest_memread(QTestState *s, uint64_t addr, void *data, size_t size)
 {
     uint8_t *ptr = data;
@@ -730,13 +738,16 @@ void qtest_memwrite(QTestState *s, uint64_t addr, const void *data, size_t size)
 {
     const uint8_t *ptr = data;
     size_t i;
+    uint8_t *enc = g_malloc(2 * size + 1);
 
-    qtest_sendf(s, "write 0x%" PRIx64 " 0x%zx 0x", addr, size);
     for (i = 0; i < size; i++) {
-        qtest_sendf(s, "%02x", ptr[i]);
+        byte2hex(ptr[i], &enc[i * 2]);
     }
-    qtest_sendf(s, "\n");
+    enc[2 * size] = '\0';
+
+    qtest_sendf(s, "write 0x%" PRIx64 " 0x%zx 0x%s\n", addr, size, enc);
     qtest_rsp(s, 0);
+    g_free(enc);
 }
 
 void qtest_memset(QTestState *s, uint64_t addr, uint8_t pattern, size_t size)
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [Qemu-devel] [PATCH v3 5/5] libqos/ahci: Swap memread/write with bufread/write
  2015-05-05 22:22 [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset John Snow
                   ` (3 preceding siblings ...)
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs John Snow
@ 2015-05-05 22:22 ` John Snow
  2015-05-06 14:56 ` [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset Paolo Bonzini
  5 siblings, 0 replies; 19+ messages in thread
From: John Snow @ 2015-05-05 22:22 UTC (permalink / raw)
  To: qemu-devel; +Cc: pbonzini, John Snow, afaerber

Where it makes sense, use the new faster primitives.
For generally small reads/writes such as for the PRDT
and FIS packets, stick with the more wasteful but
easier to debug memread/memwrite.

For ahci-test;
With this patch:
real    0m3.675s
user    0m2.582s
sys     0m1.718s

Without this /series/:
real    0m14.171s
user    0m12.072s
sys     0m12.527s

Signed-off-by: John Snow <jsnow@redhat.com>
---
 tests/ahci-test.c   | 8 ++++----
 tests/libqos/ahci.c | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/ahci-test.c b/tests/ahci-test.c
index 7c23bb2..bbcb52a 100644
--- a/tests/ahci-test.c
+++ b/tests/ahci-test.c
@@ -806,7 +806,7 @@ static void ahci_test_io_rw_simple(AHCIQState *ahci, unsigned bufsize,
 
     /* Write some indicative pattern to our buffer. */
     generate_pattern(tx, bufsize, AHCI_SECTOR_SIZE);
-    memwrite(ptr, tx, bufsize);
+    bufwrite(ptr, tx, bufsize);
 
     /* Write this buffer to disk, then read it back to the DMA buffer. */
     ahci_guest_io(ahci, port, write_cmd, ptr, bufsize, sector);
@@ -814,7 +814,7 @@ static void ahci_test_io_rw_simple(AHCIQState *ahci, unsigned bufsize,
     ahci_guest_io(ahci, port, read_cmd, ptr, bufsize, sector);
 
     /*** Read back the Data ***/
-    memread(ptr, rx, bufsize);
+    bufread(ptr, rx, bufsize);
     g_assert_cmphex(memcmp(tx, rx, bufsize), ==, 0);
 
     ahci_free(ahci, ptr);
@@ -950,7 +950,7 @@ static void test_dma_fragmented(void)
     /* Create a DMA buffer in guest memory, and write our pattern to it. */
     ptr = guest_alloc(ahci->parent->alloc, bufsize);
     g_assert(ptr);
-    memwrite(ptr, tx, bufsize);
+    bufwrite(ptr, tx, bufsize);
 
     cmd = ahci_command_create(CMD_WRITE_DMA);
     ahci_command_adjust(cmd, 0, ptr, bufsize, 32);
@@ -967,7 +967,7 @@ static void test_dma_fragmented(void)
     g_free(cmd);
 
     /* Read back the guest's receive buffer into local memory */
-    memread(ptr, rx, bufsize);
+    bufread(ptr, rx, bufsize);
     guest_free(ahci->parent->alloc, ptr);
 
     g_assert_cmphex(memcmp(tx, rx, bufsize), ==, 0);
diff --git a/tests/libqos/ahci.c b/tests/libqos/ahci.c
index 843cf72..e9b8bde 100644
--- a/tests/libqos/ahci.c
+++ b/tests/libqos/ahci.c
@@ -625,13 +625,13 @@ void ahci_io(AHCIQState *ahci, uint8_t port, uint8_t ide_cmd,
     g_assert(ptr);
 
     if (props->write) {
-        memwrite(ptr, buffer, bufsize);
+        bufwrite(ptr, buffer, bufsize);
     }
 
     ahci_guest_io(ahci, port, ide_cmd, ptr, bufsize, sector);
 
     if (props->read) {
-        memread(ptr, buffer, bufsize);
+        bufread(ptr, buffer, bufsize);
     }
 
     ahci_free(ahci, ptr);
-- 
2.1.0

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 1/5] qtest: allow arbitrarily long sends
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 1/5] qtest: allow arbitrarily long sends John Snow
@ 2015-05-05 23:22   ` Eric Blake
  2015-05-05 23:35     ` John Snow
  0 siblings, 1 reply; 19+ messages in thread
From: Eric Blake @ 2015-05-05 23:22 UTC (permalink / raw)
  To: John Snow, qemu-devel; +Cc: pbonzini, afaerber

[-- Attachment #1: Type: text/plain, Size: 1226 bytes --]

On 05/05/2015 04:22 PM, John Snow wrote:
> qtest currently has a static buffer of size 1024 that if we
> overflow, ignores the additional data silently which leads
> to hangs or stream failures.
> 
> Use glib's string facilities to allow arbitrarily long data,
> but split this off into a new function, qtest_sendf.
> 
> Static data can still be sent using qtest_send, which avoids
> the malloc/copy overflow.

Did you mean 'overhead' instead of 'overflow'?

> 
> Signed-off-by: John Snow <jsnow@redhat.com>
> ---
>  qtest.c | 46 ++++++++++++++++++++++++++++------------------
>  1 file changed, 28 insertions(+), 18 deletions(-)
> 

>          qtest_send_prefix(chr);
> -        qtest_send(chr, "FAIL Unknown command `%s'\n", words[0]);
> +        qtest_sendf(chr, "FAIL Unknown command `%s'\n", words[0]);
>      }

Unrelated - these days, I don't see `this' quoting much any more (except
in m4); most people have moved to 'this' quoting.  We could clean it
while touching this line, but it really doesn't affect correctness
either way.

Reviewed-by: Eric Blake <eblake@redhat.com>

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 1/5] qtest: allow arbitrarily long sends
  2015-05-05 23:22   ` Eric Blake
@ 2015-05-05 23:35     ` John Snow
  0 siblings, 0 replies; 19+ messages in thread
From: John Snow @ 2015-05-05 23:35 UTC (permalink / raw)
  To: Eric Blake, qemu-devel; +Cc: pbonzini, afaerber



On 05/05/2015 07:22 PM, Eric Blake wrote:
> On 05/05/2015 04:22 PM, John Snow wrote:
>> qtest currently has a static buffer of size 1024 that if we 
>> overflow, ignores the additional data silently which leads to
>> hangs or stream failures.
>> 
>> Use glib's string facilities to allow arbitrarily long data, but
>> split this off into a new function, qtest_sendf.
>> 
>> Static data can still be sent using qtest_send, which avoids the
>> malloc/copy overflow.
> 
> Did you mean 'overhead' instead of 'overflow'?
> 

Sigh, yes. The fingers run on autopilot without input from the brain.
Will nab it if I spin again.

>> 
>> Signed-off-by: John Snow <jsnow@redhat.com> --- qtest.c | 46
>> ++++++++++++++++++++++++++++------------------ 1 file changed, 28
>> insertions(+), 18 deletions(-)
>> 
> 
>> qtest_send_prefix(chr); -        qtest_send(chr, "FAIL Unknown
>> command `%s'\n", words[0]); +        qtest_sendf(chr, "FAIL
>> Unknown command `%s'\n", words[0]); }
> 
> Unrelated - these days, I don't see `this' quoting much any more
> (except in m4); most people have moved to 'this' quoting.  We could
> clean it while touching this line, but it really doesn't affect
> correctness either way.
> 

If I spin again I'll get it.

> Reviewed-by: Eric Blake <eblake@redhat.com>
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs John Snow
@ 2015-05-06  6:25   ` Markus Armbruster
  2015-05-06 14:12     ` John Snow
  0 siblings, 1 reply; 19+ messages in thread
From: Markus Armbruster @ 2015-05-06  6:25 UTC (permalink / raw)
  To: John Snow; +Cc: pbonzini, qemu-devel, afaerber

John Snow <jsnow@redhat.com> writes:

> Instead of letting printf and friends do this for us
> one byte at a time, fill a buffer ourselves and then
> send the entire buffer in one go.
>
> This gives a moderate speed improvement over the old
> method.

Out of curiosity: how much of the improvement is due to doing our own
buffering instead of printf()'s (assuming the stream is buffered), and
how much is due to doing our own hex formatting instead of printf()'s?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-06  6:25   ` Markus Armbruster
@ 2015-05-06 14:12     ` John Snow
  2015-05-06 15:19       ` Markus Armbruster
  0 siblings, 1 reply; 19+ messages in thread
From: John Snow @ 2015-05-06 14:12 UTC (permalink / raw)
  To: Markus Armbruster; +Cc: pbonzini, qemu-devel, afaerber



On 05/06/2015 02:25 AM, Markus Armbruster wrote:
> John Snow <jsnow@redhat.com> writes:
> 
>> Instead of letting printf and friends do this for us
>> one byte at a time, fill a buffer ourselves and then
>> send the entire buffer in one go.
>>
>> This gives a moderate speed improvement over the old
>> method.
> 
> Out of curiosity: how much of the improvement is due to doing our own
> buffering instead of printf()'s (assuming the stream is buffered), and
> how much is due to doing our own hex formatting instead of printf()'s?
> 

Out of ignorance: How would I measure?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset
  2015-05-05 22:22 [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset John Snow
                   ` (4 preceding siblings ...)
  2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 5/5] libqos/ahci: Swap memread/write with bufread/write John Snow
@ 2015-05-06 14:56 ` Paolo Bonzini
  5 siblings, 0 replies; 19+ messages in thread
From: Paolo Bonzini @ 2015-05-06 14:56 UTC (permalink / raw)
  To: John Snow, qemu-devel; +Cc: afaerber



On 06/05/2015 00:22, John Snow wrote:
> Adds new qtest protocol commands for base64 reads and writes,
> as well as a proper command for memset instead of faking it
> via write.
> 
> This improves the ahci-test performance on my machine from about
> 14 seconds to about ~3.5.
> 
> v3:
>  - Including a memset optimization.
> v2:
>  - Resend as non-RFC.
> 
> ==
> For convenience, this branch is available at:
> https://github.com/jnsnow/qemu.git branch qtest_protocol
> https://github.com/jnsnow/qemu/tree/qtest_protocol
> 
> This version is tagged qtest_protocol-v3:
> https://github.com/jnsnow/qemu/releases/tag/qtest_protocol-v3
> ==
> 
> John Snow (5):
>   qtest: allow arbitrarily long sends
>   qtest: Add base64 encoded read/write
>   qtest: add memset to qtest protocol
>   qtest: precompute hex nibs
>   libqos/ahci: Swap memread/write with bufread/write
> 
>  qtest.c             | 147 +++++++++++++++++++++++++++++++++++++++++++++-------
>  tests/ahci-test.c   |   8 +--
>  tests/libqos/ahci.c |   4 +-
>  tests/libqtest.c    |  56 ++++++++++++++++----
>  tests/libqtest.h    |  49 ++++++++++++++++++
>  5 files changed, 230 insertions(+), 34 deletions(-)
> 

Looks good!

Paolo

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-06 14:12     ` John Snow
@ 2015-05-06 15:19       ` Markus Armbruster
  2015-05-06 16:18         ` John Snow
  0 siblings, 1 reply; 19+ messages in thread
From: Markus Armbruster @ 2015-05-06 15:19 UTC (permalink / raw)
  To: John Snow; +Cc: pbonzini, qemu-devel, afaerber

John Snow <jsnow@redhat.com> writes:

> On 05/06/2015 02:25 AM, Markus Armbruster wrote:
>> John Snow <jsnow@redhat.com> writes:
>> 
>>> Instead of letting printf and friends do this for us
>>> one byte at a time, fill a buffer ourselves and then
>>> send the entire buffer in one go.
>>>
>>> This gives a moderate speed improvement over the old
>>> method.
>> 
>> Out of curiosity: how much of the improvement is due to doing our own
>> buffering instead of printf()'s (assuming the stream is buffered), and
>> how much is due to doing our own hex formatting instead of printf()'s?
>> 
>
> Out of ignorance: How would I measure?

Heh, well played!

The code before the series uses chr unbuffered:

        for (i = 0; i < len; i++) {
            qtest_send(chr, "%02x", data[i]);
        }

qtest_send() formats into two bytes, passes them to
qemu_chr_fe_write_all(), which writes them to chr.

The chr are typically unbuffered, so this could well produce a series of
two-byte write() system calls.

Adding some buffering will obviously make a difference for larger len.

Whether formatting hex digits by hands can make a difference is not
obvious.

To find out, add just buffering.  Something like this in your patch
instead of byte2hex():

         for (i = 0; i < len; i++) {
-            qtest_sendf(chr, "%02x", data[i]);
+            snprintf(&enc[i * 2], 2, "%02x", data[i]);
         }

If the speedup is pretty much entirely due to buffering (which I
suspect), then your commit message could use a bit of love :)

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-06 15:19       ` Markus Armbruster
@ 2015-05-06 16:18         ` John Snow
  2015-05-07  6:13           ` Markus Armbruster
  2015-05-07 20:27           ` Eric Blake
  0 siblings, 2 replies; 19+ messages in thread
From: John Snow @ 2015-05-06 16:18 UTC (permalink / raw)
  To: Markus Armbruster; +Cc: pbonzini, qemu-devel, afaerber



On 05/06/2015 11:19 AM, Markus Armbruster wrote:
> John Snow <jsnow@redhat.com> writes:
> 
>> On 05/06/2015 02:25 AM, Markus Armbruster wrote:
>>> John Snow <jsnow@redhat.com> writes:
>>>
>>>> Instead of letting printf and friends do this for us
>>>> one byte at a time, fill a buffer ourselves and then
>>>> send the entire buffer in one go.
>>>>
>>>> This gives a moderate speed improvement over the old
>>>> method.
>>>
>>> Out of curiosity: how much of the improvement is due to doing our own
>>> buffering instead of printf()'s (assuming the stream is buffered), and
>>> how much is due to doing our own hex formatting instead of printf()'s?
>>>
>>
>> Out of ignorance: How would I measure?
> 
> Heh, well played!
> 
> The code before the series uses chr unbuffered:
> 
>         for (i = 0; i < len; i++) {
>             qtest_send(chr, "%02x", data[i]);
>         }
> 
> qtest_send() formats into two bytes, passes them to
> qemu_chr_fe_write_all(), which writes them to chr.
> 
> The chr are typically unbuffered, so this could well produce a series of
> two-byte write() system calls.
> 
> Adding some buffering will obviously make a difference for larger len.
> 
> Whether formatting hex digits by hands can make a difference is not
> obvious.
> 
> To find out, add just buffering.  Something like this in your patch
> instead of byte2hex():
> 
>          for (i = 0; i < len; i++) {
> -            qtest_sendf(chr, "%02x", data[i]);
> +            snprintf(&enc[i * 2], 2, "%02x", data[i]);
>          }
> 
> If the speedup is pretty much entirely due to buffering (which I
> suspect), then your commit message could use a bit of love :)
> 

When you're right, you're right. The difference may not be statistically
meaningful, but with today's current planetary alignment, using
sprintf() to batch the sends instead of my home-rolled nib computation
function, I can eke out a few more tenths of a second.

If there are no objections, I will stage patches 1-3 and 5, and resubmit
a quick v2 of just this single patch, unless you want to go ahead and
say that making the edit will be fine, then I will just edit it before
sending the pullreq.

--js

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-06 16:18         ` John Snow
@ 2015-05-07  6:13           ` Markus Armbruster
  2015-05-07 17:52             ` John Snow
  2015-05-07 20:27           ` Eric Blake
  1 sibling, 1 reply; 19+ messages in thread
From: Markus Armbruster @ 2015-05-07  6:13 UTC (permalink / raw)
  To: John Snow; +Cc: pbonzini, qemu-devel, afaerber

John Snow <jsnow@redhat.com> writes:

> On 05/06/2015 11:19 AM, Markus Armbruster wrote:
>> John Snow <jsnow@redhat.com> writes:
>> 
>>> On 05/06/2015 02:25 AM, Markus Armbruster wrote:
>>>> John Snow <jsnow@redhat.com> writes:
>>>>
>>>>> Instead of letting printf and friends do this for us
>>>>> one byte at a time, fill a buffer ourselves and then
>>>>> send the entire buffer in one go.
>>>>>
>>>>> This gives a moderate speed improvement over the old
>>>>> method.
>>>>
>>>> Out of curiosity: how much of the improvement is due to doing our own
>>>> buffering instead of printf()'s (assuming the stream is buffered), and
>>>> how much is due to doing our own hex formatting instead of printf()'s?
>>>>
>>>
>>> Out of ignorance: How would I measure?
>> 
>> Heh, well played!
>> 
>> The code before the series uses chr unbuffered:
>> 
>>         for (i = 0; i < len; i++) {
>>             qtest_send(chr, "%02x", data[i]);
>>         }
>> 
>> qtest_send() formats into two bytes, passes them to
>> qemu_chr_fe_write_all(), which writes them to chr.
>> 
>> The chr are typically unbuffered, so this could well produce a series of
>> two-byte write() system calls.
>> 
>> Adding some buffering will obviously make a difference for larger len.
>> 
>> Whether formatting hex digits by hands can make a difference is not
>> obvious.
>> 
>> To find out, add just buffering.  Something like this in your patch
>> instead of byte2hex():
>> 
>>          for (i = 0; i < len; i++) {
>> -            qtest_sendf(chr, "%02x", data[i]);
>> +            snprintf(&enc[i * 2], 2, "%02x", data[i]);
>>          }
>> 
>> If the speedup is pretty much entirely due to buffering (which I
>> suspect), then your commit message could use a bit of love :)
>> 
>
> When you're right, you're right. The difference may not be statistically
> meaningful, but with today's current planetary alignment, using
> sprintf() to batch the sends instead of my home-rolled nib computation
> function, I can eke out a few more tenths of a second.

:)

> If there are no objections, I will stage patches 1-3 and 5, and resubmit
> a quick v2 of just this single patch, unless you want to go ahead and
> say that making the edit will be fine, then I will just edit it before
> sending the pullreq.

Recommend to post the revised patch, with an updated commit message.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-07  6:13           ` Markus Armbruster
@ 2015-05-07 17:52             ` John Snow
  0 siblings, 0 replies; 19+ messages in thread
From: John Snow @ 2015-05-07 17:52 UTC (permalink / raw)
  To: Markus Armbruster; +Cc: pbonzini, qemu-devel, afaerber



On 05/07/2015 02:13 AM, Markus Armbruster wrote:
> John Snow <jsnow@redhat.com> writes:
> 
>> On 05/06/2015 11:19 AM, Markus Armbruster wrote:
>>> John Snow <jsnow@redhat.com> writes:
>>>
>>>> On 05/06/2015 02:25 AM, Markus Armbruster wrote:
>>>>> John Snow <jsnow@redhat.com> writes:
>>>>>
>>>>>> Instead of letting printf and friends do this for us
>>>>>> one byte at a time, fill a buffer ourselves and then
>>>>>> send the entire buffer in one go.
>>>>>>
>>>>>> This gives a moderate speed improvement over the old
>>>>>> method.
>>>>>
>>>>> Out of curiosity: how much of the improvement is due to doing our own
>>>>> buffering instead of printf()'s (assuming the stream is buffered), and
>>>>> how much is due to doing our own hex formatting instead of printf()'s?
>>>>>
>>>>
>>>> Out of ignorance: How would I measure?
>>>
>>> Heh, well played!
>>>
>>> The code before the series uses chr unbuffered:
>>>
>>>         for (i = 0; i < len; i++) {
>>>             qtest_send(chr, "%02x", data[i]);
>>>         }
>>>
>>> qtest_send() formats into two bytes, passes them to
>>> qemu_chr_fe_write_all(), which writes them to chr.
>>>
>>> The chr are typically unbuffered, so this could well produce a series of
>>> two-byte write() system calls.
>>>
>>> Adding some buffering will obviously make a difference for larger len.
>>>
>>> Whether formatting hex digits by hands can make a difference is not
>>> obvious.
>>>
>>> To find out, add just buffering.  Something like this in your patch
>>> instead of byte2hex():
>>>
>>>          for (i = 0; i < len; i++) {
>>> -            qtest_sendf(chr, "%02x", data[i]);
>>> +            snprintf(&enc[i * 2], 2, "%02x", data[i]);
>>>          }
>>>
>>> If the speedup is pretty much entirely due to buffering (which I
>>> suspect), then your commit message could use a bit of love :)
>>>
>>
>> When you're right, you're right. The difference may not be statistically
>> meaningful, but with today's current planetary alignment, using
>> sprintf() to batch the sends instead of my home-rolled nib computation
>> function, I can eke out a few more tenths of a second.
> 
> :)
> 
>> If there are no objections, I will stage patches 1-3 and 5, and resubmit
>> a quick v2 of just this single patch, unless you want to go ahead and
>> say that making the edit will be fine, then I will just edit it before
>> sending the pullreq.
> 
> Recommend to post the revised patch, with an updated commit message.
> 

Staged patches and dropped patch 4, thanks.

--js

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-06 16:18         ` John Snow
  2015-05-07  6:13           ` Markus Armbruster
@ 2015-05-07 20:27           ` Eric Blake
  2015-05-08  6:25             ` Markus Armbruster
  1 sibling, 1 reply; 19+ messages in thread
From: Eric Blake @ 2015-05-07 20:27 UTC (permalink / raw)
  To: John Snow, Markus Armbruster; +Cc: pbonzini, qemu-devel, afaerber

[-- Attachment #1: Type: text/plain, Size: 1254 bytes --]

On 05/06/2015 10:18 AM, John Snow wrote:

>> To find out, add just buffering.  Something like this in your patch
>> instead of byte2hex():
>>
>>          for (i = 0; i < len; i++) {
>> -            qtest_sendf(chr, "%02x", data[i]);
>> +            snprintf(&enc[i * 2], 2, "%02x", data[i]);
>>          }
>>
>> If the speedup is pretty much entirely due to buffering (which I
>> suspect), then your commit message could use a bit of love :)
>>
> 
> When you're right, you're right. The difference may not be statistically
> meaningful, but with today's current planetary alignment, using
> sprintf() to batch the sends instead of my home-rolled nib computation
> function, I can eke out a few more tenths of a second.

I'm a bit surprised - making a function call per byte generally executes
more instructions than open-coding the conversion (albeit the branch
prediction in the hardware probably does fairly well over long strings,
since it is a tight and predictable loop).  Remember, sprintf() has to
decode the format string on every call (unless the compiler is smart
enough to open-code what sprintf would do).

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-07 20:27           ` Eric Blake
@ 2015-05-08  6:25             ` Markus Armbruster
  2015-05-08 16:22               ` John Snow
  0 siblings, 1 reply; 19+ messages in thread
From: Markus Armbruster @ 2015-05-08  6:25 UTC (permalink / raw)
  To: Eric Blake; +Cc: pbonzini, John Snow, qemu-devel, afaerber

Eric Blake <eblake@redhat.com> writes:

> On 05/06/2015 10:18 AM, John Snow wrote:
>
>>> To find out, add just buffering.  Something like this in your patch
>>> instead of byte2hex():
>>>
>>>          for (i = 0; i < len; i++) {
>>> -            qtest_sendf(chr, "%02x", data[i]);
>>> +            snprintf(&enc[i * 2], 2, "%02x", data[i]);
>>>          }
>>>
>>> If the speedup is pretty much entirely due to buffering (which I
>>> suspect), then your commit message could use a bit of love :)
>>>
>> 
>> When you're right, you're right. The difference may not be statistically
>> meaningful, but with today's current planetary alignment, using
>> sprintf() to batch the sends instead of my home-rolled nib computation
>> function, I can eke out a few more tenths of a second.
>
> I'm a bit surprised - making a function call per byte generally executes
> more instructions than open-coding the conversion (albeit the branch
> prediction in the hardware probably does fairly well over long strings,
> since it is a tight and predictable loop).  Remember, sprintf() has to
> decode the format string on every call (unless the compiler is smart
> enough to open-code what sprintf would do).

John's measurements show that the speed difference between snprintf()
and a local copy of formatting code gets thoroughly drowned in noise.

The snprintf() version takes 18 lines less, according to diffstat.  Less
code, same measured performance, what's not to like?

However, if you feel strongly about avoiding snprintf() here, I won't
argue further.  Except for the commit message: it needs to be fixed not
to claim avoiding "printf and friends" makes a speed difference.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-08  6:25             ` Markus Armbruster
@ 2015-05-08 16:22               ` John Snow
  2015-05-08 19:47                 ` Eric Blake
  0 siblings, 1 reply; 19+ messages in thread
From: John Snow @ 2015-05-08 16:22 UTC (permalink / raw)
  To: Markus Armbruster, Eric Blake; +Cc: pbonzini, qemu-devel, afaerber



On 05/08/2015 02:25 AM, Markus Armbruster wrote:
> Eric Blake <eblake@redhat.com> writes:
> 
>> On 05/06/2015 10:18 AM, John Snow wrote:
>>
>>>> To find out, add just buffering.  Something like this in your patch
>>>> instead of byte2hex():
>>>>
>>>>          for (i = 0; i < len; i++) {
>>>> -            qtest_sendf(chr, "%02x", data[i]);
>>>> +            snprintf(&enc[i * 2], 2, "%02x", data[i]);
>>>>          }
>>>>
>>>> If the speedup is pretty much entirely due to buffering (which I
>>>> suspect), then your commit message could use a bit of love :)
>>>>
>>>
>>> When you're right, you're right. The difference may not be statistically
>>> meaningful, but with today's current planetary alignment, using
>>> sprintf() to batch the sends instead of my home-rolled nib computation
>>> function, I can eke out a few more tenths of a second.
>>
>> I'm a bit surprised - making a function call per byte generally executes
>> more instructions than open-coding the conversion (albeit the branch
>> prediction in the hardware probably does fairly well over long strings,
>> since it is a tight and predictable loop).  Remember, sprintf() has to
>> decode the format string on every call (unless the compiler is smart
>> enough to open-code what sprintf would do).
> 
> John's measurements show that the speed difference between snprintf()
> and a local copy of formatting code gets thoroughly drowned in noise.
> 
> The snprintf() version takes 18 lines less, according to diffstat.  Less
> code, same measured performance, what's not to like?
> 
> However, if you feel strongly about avoiding snprintf() here, I won't
> argue further.  Except for the commit message: it needs to be fixed not
> to claim avoiding "printf and friends" makes a speed difference.
> 

My reasoning was the same as Markus's: the difference was so negligible
that I went with the "less home-rolled code" version.

I already staged this series without the nib functions and submitted the
snprintf version as its own patch with a less disparaging (to printf and
friends) commit message.

Any further micro-optimization is a waste of time to properly benchmark
and split hairs. I already dropped the test from ~14s to ~4s. Good enough.

--js

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs
  2015-05-08 16:22               ` John Snow
@ 2015-05-08 19:47                 ` Eric Blake
  0 siblings, 0 replies; 19+ messages in thread
From: Eric Blake @ 2015-05-08 19:47 UTC (permalink / raw)
  To: John Snow, Markus Armbruster; +Cc: pbonzini, qemu-devel, afaerber

[-- Attachment #1: Type: text/plain, Size: 1772 bytes --]

On 05/08/2015 10:22 AM, John Snow wrote:
>>> I'm a bit surprised - making a function call per byte generally executes
>>> more instructions than open-coding the conversion (albeit the branch
>>> prediction in the hardware probably does fairly well over long strings,
>>> since it is a tight and predictable loop).  Remember, sprintf() has to
>>> decode the format string on every call (unless the compiler is smart
>>> enough to open-code what sprintf would do).
>>
>> John's measurements show that the speed difference between snprintf()
>> and a local copy of formatting code gets thoroughly drowned in noise.
>>
>> The snprintf() version takes 18 lines less, according to diffstat.  Less
>> code, same measured performance, what's not to like?
>>
>> However, if you feel strongly about avoiding snprintf() here, I won't
>> argue further.  Except for the commit message: it needs to be fixed not
>> to claim avoiding "printf and friends" makes a speed difference.
>>
> 
> My reasoning was the same as Markus's: the difference was so negligible
> that I went with the "less home-rolled code" version.
> 
> I already staged this series without the nib functions and submitted the
> snprintf version as its own patch with a less disparaging (to printf and
> friends) commit message.
> 
> Any further micro-optimization is a waste of time to properly benchmark
> and split hairs. I already dropped the test from ~14s to ~4s. Good enough.

Okay, I'm convinced - keep the s[n]printf, on the grounds that it wasn't
the bottleneck.  You are right that premature optimization is not worth
the complexity if it gives no real speed gain.

-- 
Eric Blake   eblake redhat com    +1-919-301-3266
Libvirt virtualization library http://libvirt.org


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 604 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2015-05-08 19:47 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-05-05 22:22 [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset John Snow
2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 1/5] qtest: allow arbitrarily long sends John Snow
2015-05-05 23:22   ` Eric Blake
2015-05-05 23:35     ` John Snow
2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 2/5] qtest: Add base64 encoded read/write John Snow
2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 3/5] qtest: add memset to qtest protocol John Snow
2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 4/5] qtest: precompute hex nibs John Snow
2015-05-06  6:25   ` Markus Armbruster
2015-05-06 14:12     ` John Snow
2015-05-06 15:19       ` Markus Armbruster
2015-05-06 16:18         ` John Snow
2015-05-07  6:13           ` Markus Armbruster
2015-05-07 17:52             ` John Snow
2015-05-07 20:27           ` Eric Blake
2015-05-08  6:25             ` Markus Armbruster
2015-05-08 16:22               ` John Snow
2015-05-08 19:47                 ` Eric Blake
2015-05-05 22:22 ` [Qemu-devel] [PATCH v3 5/5] libqos/ahci: Swap memread/write with bufread/write John Snow
2015-05-06 14:56 ` [Qemu-devel] [PATCH v3 0/5] qtest: base64 r/w and faster memset Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.