All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Emilio G. Cota" <cota@braap.org>
To: QEMU Developers <qemu-devel@nongnu.org>,
	MTTCG Devel <mttcg@listserver.greensocs.com>
Cc: "Alex Bennée" <alex.bennee@linaro.org>,
	"Paolo Bonzini" <pbonzini@redhat.com>,
	"Richard Henderson" <rth@twiddle.net>,
	"Sergey Fedorov" <serge.fdrv@gmail.com>,
	"Alvise Rigo" <a.rigo@virtualopensystems.com>,
	"Peter Maydell" <peter.maydell@linaro.org>
Subject: [Qemu-devel] [RFC 19/30] tests: add atomic_add-bench
Date: Mon, 27 Jun 2016 15:02:05 -0400	[thread overview]
Message-ID: <1467054136-10430-20-git-send-email-cota@braap.org> (raw)
In-Reply-To: <1467054136-10430-1-git-send-email-cota@braap.org>

With this microbenchmark we can measure the overhead of emulating atomic
instructions with a configurable degree of contention.

The benchmark spawns $n threads, each performing $o atomic ops (additions)
in a loop. Each atomic operation is performed on a different cache line
(assuming lines are 64b long) that is randomly selected from a range [0, $r).

[ Note: each $foo corresponds to a -foo flag ]

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 tests/.gitignore         |   1 +
 tests/Makefile.include   |   4 +-
 tests/atomic_add-bench.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 184 insertions(+), 1 deletion(-)
 create mode 100644 tests/atomic_add-bench.c

diff --git a/tests/.gitignore b/tests/.gitignore
index 840ea39..52488a0 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -1,3 +1,4 @@
+atomic_add-bench
 check-qdict
 check-qfloat
 check-qint
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 6c09962..7421778 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -408,7 +408,8 @@ test-obj-y = tests/check-qint.o tests/check-qstring.o tests/check-qdict.o \
 	tests/test-opts-visitor.o tests/test-qmp-event.o \
 	tests/rcutorture.o tests/test-rcu-list.o \
 	tests/test-qdist.o \
-	tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o
+	tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \
+	tests/atomic_add-bench.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 QEMU_CFLAGS += -I$(SRC_PATH)/tests
@@ -451,6 +452,7 @@ tests/test-qdist$(EXESUF): tests/test-qdist.o $(test-util-obj-y)
 tests/test-qht$(EXESUF): tests/test-qht.o $(test-util-obj-y)
 tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(test-util-obj-y)
 tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
+tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o $(test-util-obj-y)
 
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
 	hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
diff --git a/tests/atomic_add-bench.c b/tests/atomic_add-bench.c
new file mode 100644
index 0000000..5bbecf6
--- /dev/null
+++ b/tests/atomic_add-bench.c
@@ -0,0 +1,180 @@
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/host-utils.h"
+#include "qemu/processor.h"
+
+struct thread_info {
+    uint64_t r;
+} QEMU_ALIGNED(64);
+
+struct count {
+    unsigned long val;
+} QEMU_ALIGNED(64);
+
+static QemuThread *threads;
+static struct thread_info *th_info;
+static unsigned int n_threads = 1;
+static unsigned int n_ready_threads;
+static struct count *counts;
+static unsigned long n_ops = 10000;
+static double duration;
+static unsigned int range = 1;
+static bool test_start;
+
+static const char commands_string[] =
+    " -n = number of threads\n"
+    " -o = number of ops per thread\n"
+    " -r = range (will be rounded up to pow2)";
+
+static void usage_complete(char *argv[])
+{
+    fprintf(stderr, "Usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "options:\n%s\n", commands_string);
+}
+
+/*
+ * From: https://en.wikipedia.org/wiki/Xorshift
+ * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
+ * guaranteed to be >= INT_MAX).
+ */
+static uint64_t xorshift64star(uint64_t x)
+{
+    x ^= x >> 12; /* a */
+    x ^= x << 25; /* b */
+    x ^= x >> 27; /* c */
+    return x * UINT64_C(2685821657736338717);
+}
+
+static void *thread_func(void *arg)
+{
+    struct thread_info *info = arg;
+    unsigned long i;
+
+    atomic_inc(&n_ready_threads);
+    while (!atomic_mb_read(&test_start)) {
+        cpu_relax();
+    }
+
+    for (i = 0; i < n_ops; i++) {
+        unsigned int index;
+
+        info->r = xorshift64star(info->r);
+        index = info->r & (range - 1);
+        atomic_inc(&counts[index].val);
+    }
+    return NULL;
+}
+
+static inline
+uint64_t ts_subtract(const struct timespec *a, const struct timespec *b)
+{
+    uint64_t ns;
+
+    ns = (b->tv_sec - a->tv_sec) * 1000000000ULL;
+    ns += (b->tv_nsec - a->tv_nsec);
+    return ns;
+}
+
+static void run_test(void)
+{
+    unsigned int i;
+    struct timespec ts_start, ts_end;
+
+    while (atomic_read(&n_ready_threads) != n_threads) {
+        cpu_relax();
+    }
+    atomic_mb_set(&test_start, true);
+
+    clock_gettime(CLOCK_MONOTONIC, &ts_start);
+    for (i = 0; i < n_threads; i++) {
+        qemu_thread_join(&threads[i]);
+    }
+    clock_gettime(CLOCK_MONOTONIC, &ts_end);
+    duration = ts_subtract(&ts_start, &ts_end) / 1e9;
+}
+
+static void create_threads(void)
+{
+    unsigned int i;
+
+    threads = g_new(QemuThread, n_threads);
+    th_info = g_new(struct thread_info, n_threads);
+    counts = qemu_memalign(64, sizeof(*counts) * range);
+
+    for (i = 0; i < n_threads; i++) {
+        struct thread_info *info = &th_info[i];
+
+        info->r = (i + 1) ^ time(NULL);
+        qemu_thread_create(&threads[i], NULL, thread_func, info,
+                           QEMU_THREAD_JOINABLE);
+    }
+}
+
+static void pr_params(void)
+{
+    printf("Parameters:\n");
+    printf(" # of threads:      %u\n", n_threads);
+    printf(" n_ops:             %lu\n", n_ops);
+    printf(" ops' range:        %u\n", range);
+}
+
+static void pr_stats(void)
+{
+    unsigned long long val = 0;
+    unsigned int i;
+    double tx;
+
+    for (i = 0; i < range; i++) {
+        val += counts[i].val;
+    }
+    assert(val == n_threads * n_ops);
+    tx = val / duration / 1e6;
+
+    printf("Results:\n");
+    printf("Duration:            %.2f s\n", duration);
+    printf(" Throughput:         %.2f Mops/s\n", tx);
+    printf(" Throughput/thread:  %.2f Mops/s/thread\n", tx / n_threads);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+    unsigned long long n_ops_ull;
+    int c;
+
+    for (;;) {
+        c = getopt(argc, argv, "hn:o:r:");
+        if (c < 0) {
+            break;
+        }
+        switch (c) {
+        case 'h':
+            usage_complete(argv);
+            exit(0);
+        case 'n':
+            n_threads = atoi(optarg);
+            break;
+        case 'o':
+            n_ops_ull = atoll(optarg);
+            if (n_ops_ull > ULONG_MAX) {
+                fprintf(stderr,
+                        "fatal: -o cannot be greater than %lu\n", ULONG_MAX);
+                exit(1);
+            }
+            n_ops = n_ops_ull;
+            break;
+        case 'r':
+            range = pow2ceil(atoi(optarg));
+            break;
+        }
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    parse_args(argc, argv);
+    pr_params();
+    create_threads();
+    run_test();
+    pr_stats();
+    return 0;
+}
-- 
2.5.0

  parent reply	other threads:[~2016-06-27 19:03 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-06-27 19:01 [Qemu-devel] [RFC 00/30] cmpxchg-based emulation of atomics Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 01/30] softmmu: add cmpxchg helpers Emilio G. Cota
2016-06-27 20:11   ` Richard Henderson
2016-06-27 21:19     ` Emilio G. Cota
2016-06-27 21:43       ` Richard Henderson
2016-06-27 21:48         ` Peter Maydell
2016-06-27 21:53           ` Richard Henderson
2016-06-27 19:01 ` [Qemu-devel] [RFC 02/30] tcg: add tcg_cmpxchg_lock Emilio G. Cota
2016-06-27 20:07   ` Richard Henderson
2016-06-27 20:41     ` Emilio G. Cota
2016-06-27 21:02       ` Richard Henderson
2016-06-27 19:01 ` [Qemu-devel] [RFC 03/30] cpu_ldst: add cpu_cmpxchg helpers Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 04/30] target-i386: add cmpxchg helpers Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 05/30] target-i386: emulate LOCK'ed cmpxchg using " Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 06/30] target-i386: emulate LOCK'ed cmpxchg8b/16b " Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 07/30] atomics: add atomic_xor Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 08/30] atomics: add atomic_op_fetch variants Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 09/30] softmmu: add atomic helpers Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 10/30] cpu_ldst: add cpu_atomic helpers Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 11/30] target-i386: add atomic helpers Emilio G. Cota
2016-06-27 20:27   ` Richard Henderson
2016-06-27 21:39     ` Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 12/30] target-i386: emulate LOCK'ed OP instructions using " Emilio G. Cota
2016-06-27 19:01 ` [Qemu-devel] [RFC 13/30] target-i386: emulate LOCK'ed INC using atomic helper Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 14/30] target-i386: emulate LOCK'ed NOT " Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 15/30] target-i386: emulate LOCK'ed NEG using cmpxchg helper Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 16/30] target-i386: emulate LOCK'ed XADD using atomic helper Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 17/30] target-i386: emulate LOCK'ed BTX ops using atomic helpers Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 18/30] target-i386: emulate XCHG using atomic helper Emilio G. Cota
2016-06-27 19:02 ` Emilio G. Cota [this message]
2016-06-27 19:02 ` [Qemu-devel] [RFC 20/30] target-i386: remove helper_lock() Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 21/30] target-arm: add cmpxchg helpers Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 22/30] target-arm: emulate LL/SC using " Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 23/30] target-arm: add atomic_xchg helper Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 24/30] target-arm: emulate SWP with " Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 25/30] helper: add DEF_HELPER_6 Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 26/30] target-arm: add cmpxchg helpers for aarch64 Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 27/30] target-arm: emulate aarch64's LL/SC using cmpxchg helpers Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 28/30] linux-user: remove handling of ARM's EXCP_STREX Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 29/30] linux-user: remove handling of aarch64's EXCP_STREX Emilio G. Cota
2016-06-27 19:02 ` [Qemu-devel] [RFC 30/30] target-arm: remove EXCP_STREX + cpu_exclusive_{test, info} Emilio G. Cota
2016-06-28  8:45 ` [Qemu-devel] [RFC 00/30] cmpxchg-based emulation of atomics Lluís Vilanova
2016-06-28 15:48   ` Richard Henderson
2016-06-28 19:52   ` Emilio G. Cota

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1467054136-10430-20-git-send-email-cota@braap.org \
    --to=cota@braap.org \
    --cc=a.rigo@virtualopensystems.com \
    --cc=alex.bennee@linaro.org \
    --cc=mttcg@listserver.greensocs.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=serge.fdrv@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.