All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg
@ 2018-09-10 23:27 Emilio G. Cota
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 01/12] cacheinfo: add i/d cache_linesize_log Emilio G. Cota
                   ` (12 more replies)
  0 siblings, 13 replies; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

v1: https://lists.gnu.org/archive/html/qemu-devel/2018-09/msg00237.html

Changes since v1:

- Add Alex's R-b tags
- Introduce atomic64 to just use a spinlock when doing atomic_set/read
  on i64's and u64's if !CONFIG_ATOMIC64, just like the kernel does.
  + Add a benchmark for these types of accesses to tests/
  + Add i/d cacheline_size_log
- Convert qsp to atomic64
- Convert test-rcu-list to atomic64
  + Add comments about what the patch does, as suggested by Murilo
- Convert cpus.c to atomic64
- Always use seqlock_write on cpu_update_icount

There's one checkpatch error, but it's a false positive.

You can fetch this series from:
  https://github.com/cota/qemu/tree/i386-mttcg-v2

Thanks,

		Emilio

^ permalink raw reply	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 01/12] cacheinfo: add i/d cache_linesize_log
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-11 12:16   ` Richard Henderson
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 02/12] util: add atomic64 Emilio G. Cota
                   ` (11 subsequent siblings)
  12 siblings, 1 reply; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 include/qemu/osdep.h | 2 ++
 util/cacheinfo.c     | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h
index a91068df0e..a746a5e531 100644
--- a/include/qemu/osdep.h
+++ b/include/qemu/osdep.h
@@ -570,6 +570,8 @@ extern uintptr_t qemu_real_host_page_size;
 extern intptr_t qemu_real_host_page_mask;
 
 extern int qemu_icache_linesize;
+extern int qemu_icache_linesize_log;
 extern int qemu_dcache_linesize;
+extern int qemu_dcache_linesize_log;
 
 #endif
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
index db5172d07c..57c7d58159 100644
--- a/util/cacheinfo.c
+++ b/util/cacheinfo.c
@@ -7,9 +7,12 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/host-utils.h"
 
 int qemu_icache_linesize = 0;
+int qemu_icache_linesize_log;
 int qemu_dcache_linesize = 0;
+int qemu_dcache_linesize_log;
 
 /*
  * Operating system specific detection mechanisms.
@@ -173,5 +176,7 @@ static void __attribute__((constructor)) init_cache_info(void)
     fallback_cache_info(&isize, &dsize);
 
     qemu_icache_linesize = isize;
+    qemu_icache_linesize_log = 31 - clz32(isize);
     qemu_dcache_linesize = dsize;
+    qemu_dcache_linesize_log = 31 - clz32(dsize);
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 02/12] util: add atomic64
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 01/12] cacheinfo: add i/d cache_linesize_log Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-11 12:43   ` Richard Henderson
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 03/12] tests: add atomic64-bench Emilio G. Cota
                   ` (10 subsequent siblings)
  12 siblings, 1 reply; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

This introduces read/set accessors for int64_t and uint64_t.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 include/qemu/atomic.h | 34 ++++++++++++++++++
 util/atomic64.c       | 83 +++++++++++++++++++++++++++++++++++++++++++
 util/cacheinfo.c      |  3 ++
 util/Makefile.objs    |  1 +
 4 files changed, 121 insertions(+)
 create mode 100644 util/atomic64.c

diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index 9ed39effd3..c34c2f78c4 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -450,4 +450,38 @@
     _oldn;                                                              \
 })
 
+/* Abstractions to access atomically (i.e. "once") i64/u64 variables */
+#ifdef CONFIG_ATOMIC64
+static inline int64_t atomic_read_i64(const int64_t *ptr)
+{
+    /* use __nocheck because sizeof(void *) might be < sizeof(u64) */
+    return atomic_read__nocheck(ptr);
+}
+
+static inline uint64_t atomic_read_u64(const uint64_t *ptr)
+{
+    return atomic_read__nocheck(ptr);
+}
+
+static inline void atomic_set_i64(int64_t *ptr, int64_t val)
+{
+    atomic_set__nocheck(ptr, val);
+}
+
+static inline void atomic_set_u64(uint64_t *ptr, uint64_t val)
+{
+    atomic_set__nocheck(ptr, val);
+}
+
+static inline void atomic64_init(void)
+{
+}
+#else /* !CONFIG_ATOMIC64 */
+int64_t  atomic_read_i64(const int64_t *ptr);
+uint64_t atomic_read_u64(const uint64_t *ptr);
+void atomic_set_i64(int64_t *ptr, int64_t val);
+void atomic_set_u64(uint64_t *ptr, uint64_t val);
+void atomic64_init(void);
+#endif /* !CONFIG_ATOMIC64 */
+
 #endif /* QEMU_ATOMIC_H */
diff --git a/util/atomic64.c b/util/atomic64.c
new file mode 100644
index 0000000000..b198a6c9c8
--- /dev/null
+++ b/util/atomic64.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/atomic.h"
+#include "qemu/thread.h"
+
+#ifdef CONFIG_ATOMIC64
+#error This file must only be compiled if !CONFIG_ATOMIC64
+#endif
+
+/*
+ * When !CONFIG_ATOMIC64, we serialize both reads and writes with spinlocks.
+ * We use an array of spinlocks, with padding computed at run-time based on
+ * the host's dcache line size.
+ * We point to the array with a void * to simplify the padding's computation.
+ * Each spinlock is located every lock_size bytes.
+ */
+static void *lock_array;
+static size_t lock_size;
+
+/*
+ * Systems without CONFIG_ATOMIC64 are unlikely to have many cores, so we use a
+ * small array of locks.
+ */
+#define NR_LOCKS 16
+
+static QemuSpin *addr_to_lock(const void *addr)
+{
+    uintptr_t a = (uintptr_t)addr;
+    uintptr_t idx;
+
+    idx = a >> qemu_dcache_linesize_log;
+    idx ^= (idx >> 8) ^ (idx >> 16);
+    idx &= NR_LOCKS - 1;
+    return lock_array + idx * lock_size;
+}
+
+#define GEN_READ(name, type)                    \
+    type name(const type *ptr)                  \
+    {                                           \
+        QemuSpin *lock = addr_to_lock(ptr);     \
+        type ret;                               \
+                                                \
+        qemu_spin_lock(lock);                   \
+        ret = *ptr;                             \
+        qemu_spin_unlock(lock);                 \
+        return ret;                             \
+    }
+
+GEN_READ(atomic_read_i64, int64_t)
+GEN_READ(atomic_read_u64, uint64_t)
+#undef GEN_READ
+
+#define GEN_SET(name, type)                     \
+    void name(type *ptr, type val)              \
+    {                                           \
+        QemuSpin *lock = addr_to_lock(ptr);     \
+                                                \
+        qemu_spin_lock(lock);                   \
+        *ptr = val;                             \
+        qemu_spin_unlock(lock);                 \
+    }
+
+GEN_SET(atomic_set_i64, int64_t)
+GEN_SET(atomic_set_u64, uint64_t)
+#undef GEN_SET
+
+void atomic64_init(void)
+{
+    int i;
+
+    lock_size = ROUND_UP(sizeof(QemuSpin), qemu_dcache_linesize);
+    lock_array = qemu_memalign(qemu_dcache_linesize, lock_size * NR_LOCKS);
+    for (i = 0; i < NR_LOCKS; i++) {
+        QemuSpin *lock = lock_array + i * lock_size;
+
+        qemu_spin_init(lock);
+    }
+}
diff --git a/util/cacheinfo.c b/util/cacheinfo.c
index 57c7d58159..6c6cbdb25d 100644
--- a/util/cacheinfo.c
+++ b/util/cacheinfo.c
@@ -8,6 +8,7 @@
 
 #include "qemu/osdep.h"
 #include "qemu/host-utils.h"
+#include "qemu/atomic.h"
 
 int qemu_icache_linesize = 0;
 int qemu_icache_linesize_log;
@@ -179,4 +180,6 @@ static void __attribute__((constructor)) init_cache_info(void)
     qemu_icache_linesize_log = 31 - clz32(isize);
     qemu_dcache_linesize = dsize;
     qemu_dcache_linesize_log = 31 - clz32(dsize);
+
+    atomic64_init();
 }
diff --git a/util/Makefile.objs b/util/Makefile.objs
index 0e88899011..0820923c18 100644
--- a/util/Makefile.objs
+++ b/util/Makefile.objs
@@ -3,6 +3,7 @@ util-obj-y += bufferiszero.o
 util-obj-y += lockcnt.o
 util-obj-y += aiocb.o async.o aio-wait.o thread-pool.o qemu-timer.o
 util-obj-y += main-loop.o iohandler.o
+util-obj-$(call lnot,$(CONFIG_ATOMIC64)) += atomic64.o
 util-obj-$(CONFIG_POSIX) += aio-posix.o
 util-obj-$(CONFIG_POSIX) += compatfd.o
 util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 03/12] tests: add atomic64-bench
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 01/12] cacheinfo: add i/d cache_linesize_log Emilio G. Cota
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 02/12] util: add atomic64 Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 04/12] qsp: use atomic64 accessors Emilio G. Cota
                   ` (9 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

- With CONFIG_ATOMIC64:
$ tests/atomic64-bench  -n 1
 Throughput:         310.40 Mops/s

- Without:
$ tests/atomic64-bench  -n 1
 Throughput:         149.08 Mops/s

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 tests/atomic64-bench.c | 171 +++++++++++++++++++++++++++++++++++++++++
 tests/Makefile.include |   3 +-
 2 files changed, 173 insertions(+), 1 deletion(-)
 create mode 100644 tests/atomic64-bench.c

diff --git a/tests/atomic64-bench.c b/tests/atomic64-bench.c
new file mode 100644
index 0000000000..71692560ed
--- /dev/null
+++ b/tests/atomic64-bench.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2018, Emilio G. Cota <cota@braap.org>
+ *
+ * License: GNU GPL, version 2 or later.
+ *   See the COPYING file in the top-level directory.
+ */
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/host-utils.h"
+#include "qemu/processor.h"
+
+struct thread_info {
+    uint64_t r;
+    uint64_t accesses;
+} QEMU_ALIGNED(64);
+
+struct count {
+    int64_t i64;
+} QEMU_ALIGNED(64);
+
+static QemuThread *threads;
+static struct thread_info *th_info;
+static unsigned int n_threads = 1;
+static unsigned int n_ready_threads;
+static struct count *counts;
+static unsigned int duration = 1;
+static unsigned int range = 1024;
+static bool test_start;
+static bool test_stop;
+
+static const char commands_string[] =
+    " -d = duration in seconds\n"
+    " -n = number of threads\n"
+    " -r = range (will be rounded up to pow2)";
+
+static void usage_complete(char *argv[])
+{
+    fprintf(stderr, "Usage: %s [options]\n", argv[0]);
+    fprintf(stderr, "options:\n%s\n", commands_string);
+}
+
+/*
+ * From: https://en.wikipedia.org/wiki/Xorshift
+ * This is faster than rand_r(), and gives us a wider range (RAND_MAX is only
+ * guaranteed to be >= INT_MAX).
+ */
+static uint64_t xorshift64star(uint64_t x)
+{
+    x ^= x >> 12; /* a */
+    x ^= x << 25; /* b */
+    x ^= x >> 27; /* c */
+    return x * UINT64_C(2685821657736338717);
+}
+
+static void *thread_func(void *arg)
+{
+    struct thread_info *info = arg;
+
+    atomic_inc(&n_ready_threads);
+    while (!atomic_read(&test_start)) {
+        cpu_relax();
+    }
+
+    while (!atomic_read(&test_stop)) {
+        unsigned int index;
+
+        info->r = xorshift64star(info->r);
+        index = info->r & (range - 1);
+        atomic_read_i64(&counts[index].i64);
+        info->accesses++;
+    }
+    return NULL;
+}
+
+static void run_test(void)
+{
+    unsigned int remaining;
+    unsigned int i;
+
+    while (atomic_read(&n_ready_threads) != n_threads) {
+        cpu_relax();
+    }
+    atomic_set(&test_start, true);
+    do {
+        remaining = sleep(duration);
+    } while (remaining);
+    atomic_set(&test_stop, true);
+
+    for (i = 0; i < n_threads; i++) {
+        qemu_thread_join(&threads[i]);
+    }
+}
+
+static void create_threads(void)
+{
+    unsigned int i;
+
+    threads = g_new(QemuThread, n_threads);
+    th_info = g_new(struct thread_info, n_threads);
+    counts = g_malloc0_n(range, sizeof(*counts));
+
+    for (i = 0; i < n_threads; i++) {
+        struct thread_info *info = &th_info[i];
+
+        info->r = (i + 1) ^ time(NULL);
+        info->accesses = 0;
+        qemu_thread_create(&threads[i], NULL, thread_func, info,
+                           QEMU_THREAD_JOINABLE);
+    }
+}
+
+static void pr_params(void)
+{
+    printf("Parameters:\n");
+    printf(" # of threads:      %u\n", n_threads);
+    printf(" duration:          %u\n", duration);
+    printf(" ops' range:        %u\n", range);
+}
+
+static void pr_stats(void)
+{
+    unsigned long long val = 0;
+    double tx;
+    int i;
+
+    for (i = 0; i < n_threads; i++) {
+        val += th_info[i].accesses;
+    }
+    tx = val / duration / 1e6;
+
+    printf("Results:\n");
+    printf("Duration:            %u s\n", duration);
+    printf(" Throughput:         %.2f Mops/s\n", tx);
+    printf(" Throughput/thread:  %.2f Mops/s/thread\n", tx / n_threads);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+    int c;
+
+    for (;;) {
+        c = getopt(argc, argv, "hd:n:r:");
+        if (c < 0) {
+            break;
+        }
+        switch (c) {
+        case 'h':
+            usage_complete(argv);
+            exit(0);
+        case 'd':
+            duration = atoi(optarg);
+            break;
+        case 'n':
+            n_threads = atoi(optarg);
+            break;
+        case 'r':
+            range = pow2ceil(atoi(optarg));
+            break;
+        }
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    parse_args(argc, argv);
+    pr_params();
+    create_threads();
+    run_test();
+    pr_stats();
+    return 0;
+}
diff --git a/tests/Makefile.include b/tests/Makefile.include
index 87c81d1dcc..b12b6a867a 100644
--- a/tests/Makefile.include
+++ b/tests/Makefile.include
@@ -601,7 +601,7 @@ test-obj-y = tests/check-qnum.o tests/check-qstring.o tests/check-qdict.o \
 	tests/test-rcu-tailq.o \
 	tests/test-qdist.o tests/test-shift128.o \
 	tests/test-qht.o tests/qht-bench.o tests/test-qht-par.o \
-	tests/atomic_add-bench.o
+	tests/atomic_add-bench.o tests/atomic64-bench.o
 
 $(test-obj-y): QEMU_INCLUDES += -Itests
 QEMU_CFLAGS += -I$(SRC_PATH)/tests
@@ -656,6 +656,7 @@ tests/test-qht-par$(EXESUF): tests/test-qht-par.o tests/qht-bench$(EXESUF) $(tes
 tests/qht-bench$(EXESUF): tests/qht-bench.o $(test-util-obj-y)
 tests/test-bufferiszero$(EXESUF): tests/test-bufferiszero.o $(test-util-obj-y)
 tests/atomic_add-bench$(EXESUF): tests/atomic_add-bench.o $(test-util-obj-y)
+tests/atomic64-bench$(EXESUF): tests/atomic64-bench.o $(test-util-obj-y)
 
 tests/test-qdev-global-props$(EXESUF): tests/test-qdev-global-props.o \
 	hw/core/qdev.o hw/core/qdev-properties.o hw/core/hotplug.o\
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 04/12] qsp: use atomic64 accessors
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (2 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 03/12] tests: add atomic64-bench Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 05/12] test-rcu-list: access n_reclaims and n_nodes_removed with atomic64 Emilio G. Cota
                   ` (8 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

With the seqlock, we either have to use atomics to remain
within defined behaviour (and note that 64-bit atomics aren't
always guaranteed to compile, irrespective of __nocheck), or
drop the atomics and be in undefined behaviour territory.

Fix it by dropping the seqlock and using atomic64 accessors.
This will limit scalability when !CONFIG_ATOMIC64, but those
machines (1) don't have many users and (2) are unlikely to
have many cores.

- With CONFIG_ATOMIC64:
$ tests/atomic_add-bench -n 1 -m -p
 Throughput:         13.00 Mops/s

- Forcing !CONFIG_ATOMIC64:
$ tests/atomic_add-bench -n 1 -m -p
 Throughput:         10.89 Mops/s

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 util/qsp.c | 49 ++++++++-----------------------------------------
 1 file changed, 8 insertions(+), 41 deletions(-)

diff --git a/util/qsp.c b/util/qsp.c
index b0c2575d10..c16af03f6a 100644
--- a/util/qsp.c
+++ b/util/qsp.c
@@ -84,13 +84,6 @@ struct QSPEntry {
     uint64_t n_acqs;
     uint64_t ns;
     unsigned int n_objs; /* count of coalesced objs; only used for reporting */
-#ifndef CONFIG_ATOMIC64
-    /*
-     * If we cannot update the counts atomically, then use a seqlock.
-     * We don't need an associated lock because the updates are thread-local.
-     */
-    QemuSeqLock sequence;
-#endif
 };
 typedef struct QSPEntry QSPEntry;
 
@@ -344,47 +337,16 @@ static QSPEntry *qsp_entry_get(const void *obj, const char *file, int line,
     return qsp_entry_find(&qsp_ht, &orig, hash);
 }
 
-/*
- * @from is in the global hash table; read it atomically if the host
- * supports it, otherwise use the seqlock.
- */
-static void qsp_entry_aggregate(QSPEntry *to, const QSPEntry *from)
-{
-#ifdef CONFIG_ATOMIC64
-    to->ns += atomic_read__nocheck(&from->ns);
-    to->n_acqs += atomic_read__nocheck(&from->n_acqs);
-#else
-    unsigned int version;
-    uint64_t ns, n_acqs;
-
-    do {
-        version = seqlock_read_begin(&from->sequence);
-        ns = atomic_read__nocheck(&from->ns);
-        n_acqs = atomic_read__nocheck(&from->n_acqs);
-    } while (seqlock_read_retry(&from->sequence, version));
-
-    to->ns += ns;
-    to->n_acqs += n_acqs;
-#endif
-}
-
 /*
  * @e is in the global hash table; it is only written to by the current thread,
  * so we write to it atomically (as in "write once") to prevent torn reads.
- * If the host doesn't support u64 atomics, use the seqlock.
  */
 static inline void do_qsp_entry_record(QSPEntry *e, int64_t delta, bool acq)
 {
-#ifndef CONFIG_ATOMIC64
-    seqlock_write_begin(&e->sequence);
-#endif
-    atomic_set__nocheck(&e->ns, e->ns + delta);
+    atomic_set_u64(&e->ns, e->ns + delta);
     if (acq) {
-        atomic_set__nocheck(&e->n_acqs, e->n_acqs + 1);
+        atomic_set_u64(&e->n_acqs, e->n_acqs + 1);
     }
-#ifndef CONFIG_ATOMIC64
-    seqlock_write_end(&e->sequence);
-#endif
 }
 
 static inline void qsp_entry_record(QSPEntry *e, int64_t delta)
@@ -550,7 +512,12 @@ static void qsp_aggregate(struct qht *global_ht, void *p, uint32_t h, void *up)
 
     hash = qsp_entry_no_thread_hash(e);
     agg = qsp_entry_find(ht, e, hash);
-    qsp_entry_aggregate(agg, e);
+    /*
+     * The entry is in the global hash table; read from it atomically (as in
+     * "read once").
+     */
+    agg->ns += atomic_read_u64(&e->ns);
+    agg->n_acqs += atomic_read_u64(&e->n_acqs);
 }
 
 static void qsp_iter_diff(struct qht *orig, void *p, uint32_t hash, void *htp)
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 05/12] test-rcu-list: access n_reclaims and n_nodes_removed with atomic64
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (3 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 04/12] qsp: use atomic64 accessors Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 06/12] atomic: fix comment s/x64_64/x86_64/ Emilio G. Cota
                   ` (7 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

To avoid undefined behaviour.

Note that these "atomics" are atomic in the "access once" sense.
The variables are updated by a single thread at a time, so no
"full" atomics are necessary.

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 tests/test-rcu-list.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/tests/test-rcu-list.c b/tests/test-rcu-list.c
index 192bfbf02e..2e6f70bd59 100644
--- a/tests/test-rcu-list.c
+++ b/tests/test-rcu-list.c
@@ -33,8 +33,8 @@
 static QemuMutex counts_mutex;
 static long long n_reads = 0LL;
 static long long n_updates = 0LL;
-static long long n_reclaims = 0LL;
-static long long n_nodes_removed = 0LL;
+static int64_t n_reclaims;
+static int64_t n_nodes_removed;
 static long long n_nodes = 0LL;
 static int g_test_in_charge = 0;
 
@@ -104,7 +104,7 @@ static void reclaim_list_el(struct rcu_head *prcu)
     struct list_element *el = container_of(prcu, struct list_element, rcu);
     g_free(el);
     /* Accessed only from call_rcu thread.  */
-    n_reclaims++;
+    atomic_set_i64(&n_reclaims, n_reclaims + 1);
 }
 
 #if TEST_LIST_TYPE == 1
@@ -232,7 +232,7 @@ static void *rcu_q_updater(void *arg)
     qemu_mutex_lock(&counts_mutex);
     n_nodes += n_nodes_local;
     n_updates += n_updates_local;
-    n_nodes_removed += n_removed_local;
+    atomic_set_i64(&n_nodes_removed, n_nodes_removed + n_removed_local);
     qemu_mutex_unlock(&counts_mutex);
     return NULL;
 }
@@ -286,19 +286,21 @@ static void rcu_qtest(const char *test, int duration, int nreaders)
         n_removed_local++;
     }
     qemu_mutex_lock(&counts_mutex);
-    n_nodes_removed += n_removed_local;
+    atomic_set_i64(&n_nodes_removed, n_nodes_removed + n_removed_local);
     qemu_mutex_unlock(&counts_mutex);
     synchronize_rcu();
-    while (n_nodes_removed > n_reclaims) {
+    while (atomic_read_i64(&n_nodes_removed) > atomic_read_i64(&n_reclaims)) {
         g_usleep(100);
         synchronize_rcu();
     }
     if (g_test_in_charge) {
-        g_assert_cmpint(n_nodes_removed, ==, n_reclaims);
+        g_assert_cmpint(atomic_read_i64(&n_nodes_removed), ==,
+                        atomic_read_i64(&n_reclaims));
     } else {
         printf("%s: %d readers; 1 updater; nodes read: "  \
-               "%lld, nodes removed: %lld; nodes reclaimed: %lld\n",
-               test, nthreadsrunning - 1, n_reads, n_nodes_removed, n_reclaims);
+               "%lld, nodes removed: %"PRIi64"; nodes reclaimed: %"PRIi64"\n",
+               test, nthreadsrunning - 1, n_reads,
+               atomic_read_i64(&n_nodes_removed), atomic_read_i64(&n_reclaims));
         exit(0);
     }
 }
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 06/12] atomic: fix comment s/x64_64/x86_64/
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (4 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 05/12] test-rcu-list: access n_reclaims and n_nodes_removed with atomic64 Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-11 12:50   ` Richard Henderson
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 07/12] cpus: initialize timers_state.vm_clock_lock Emilio G. Cota
                   ` (6 subsequent siblings)
  12 siblings, 1 reply; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 include/qemu/atomic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index c34c2f78c4..f6993a8fb1 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -98,7 +98,7 @@
  * We'd prefer not want to pull in everything else TCG related, so handle
  * those few cases by hand.
  *
- * Note that x32 is fully detected with __x64_64__ + _ILP32, and that for
+ * Note that x32 is fully detected with __x86_64__ + _ILP32, and that for
  * Sparc we always force the use of sparcv9 in configure.
  */
 #if defined(__x86_64__) || defined(__sparc__)
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 07/12] cpus: initialize timers_state.vm_clock_lock
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (5 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 06/12] atomic: fix comment s/x64_64/x86_64/ Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-11 12:50   ` Richard Henderson
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 08/12] cpus: always call seqlock_write in cpu_update_icount Emilio G. Cota
                   ` (5 subsequent siblings)
  12 siblings, 1 reply; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

We forgot to initialize the spinlock introduced in 94377115b2
("cpus: protect TimerState writes with a spinlock", 2018-08-23).
Fix it.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 cpus.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cpus.c b/cpus.c
index 8ee6e5db93..ebc13bac2d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -823,6 +823,7 @@ int cpu_throttle_get_percentage(void)
 void cpu_ticks_init(void)
 {
     seqlock_init(&timers_state.vm_clock_seqlock);
+    qemu_spin_init(&timers_state.vm_clock_lock);
     vmstate_register(NULL, 0, &vmstate_timers, &timers_state);
     throttle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT,
                                            cpu_throttle_timer_tick, NULL);
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 08/12] cpus: always call seqlock_write in cpu_update_icount
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (6 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 07/12] cpus: initialize timers_state.vm_clock_lock Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-11 12:53   ` Richard Henderson
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 09/12] cpus: access .qemu_icount with atomic64 Emilio G. Cota
                   ` (4 subsequent siblings)
  12 siblings, 1 reply; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 cpus.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/cpus.c b/cpus.c
index ebc13bac2d..38dabb138d 100644
--- a/cpus.c
+++ b/cpus.c
@@ -250,16 +250,12 @@ void cpu_update_icount(CPUState *cpu)
     int64_t executed = cpu_get_icount_executed(cpu);
     cpu->icount_budget -= executed;
 
-#ifndef CONFIG_ATOMIC64
     seqlock_write_lock(&timers_state.vm_clock_seqlock,
                        &timers_state.vm_clock_lock);
-#endif
     atomic_set__nocheck(&timers_state.qemu_icount,
                         timers_state.qemu_icount + executed);
-#ifndef CONFIG_ATOMIC64
     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                          &timers_state.vm_clock_lock);
-#endif
 }
 
 static int64_t cpu_get_icount_raw_locked(void)
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 09/12] cpus: access .qemu_icount with atomic64
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (7 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 08/12] cpus: always call seqlock_write in cpu_update_icount Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 10/12] cpus: access .qemu_icount_bias " Emilio G. Cota
                   ` (3 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 cpus.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/cpus.c b/cpus.c
index 38dabb138d..f045416c94 100644
--- a/cpus.c
+++ b/cpus.c
@@ -252,8 +252,8 @@ void cpu_update_icount(CPUState *cpu)
 
     seqlock_write_lock(&timers_state.vm_clock_seqlock,
                        &timers_state.vm_clock_lock);
-    atomic_set__nocheck(&timers_state.qemu_icount,
-                        timers_state.qemu_icount + executed);
+    atomic_set_i64(&timers_state.qemu_icount,
+                   timers_state.qemu_icount + executed);
     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                          &timers_state.vm_clock_lock);
 }
@@ -270,8 +270,8 @@ static int64_t cpu_get_icount_raw_locked(void)
         /* Take into account what has run */
         cpu_update_icount(cpu);
     }
-    /* The read is protected by the seqlock, so __nocheck is okay.  */
-    return atomic_read__nocheck(&timers_state.qemu_icount);
+    /* The read is protected by the seqlock, but needs atomic64 to avoid UB */
+    return atomic_read_i64(&timers_state.qemu_icount);
 }
 
 static int64_t cpu_get_icount_locked(void)
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 10/12] cpus: access .qemu_icount_bias with atomic64
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (8 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 09/12] cpus: access .qemu_icount with atomic64 Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 11/12] target/i386/translate: use thread-local storage in !user-mode Emilio G. Cota
                   ` (2 subsequent siblings)
  12 siblings, 0 replies; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 cpus.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/cpus.c b/cpus.c
index f045416c94..5a16a0186c 100644
--- a/cpus.c
+++ b/cpus.c
@@ -277,7 +277,8 @@ static int64_t cpu_get_icount_raw_locked(void)
 static int64_t cpu_get_icount_locked(void)
 {
     int64_t icount = cpu_get_icount_raw_locked();
-    return atomic_read__nocheck(&timers_state.qemu_icount_bias) + cpu_icount_to_ns(icount);
+    return atomic_read_i64(&timers_state.qemu_icount_bias) +
+        cpu_icount_to_ns(icount);
 }
 
 int64_t cpu_get_icount_raw(void)
@@ -450,9 +451,9 @@ static void icount_adjust(void)
                    timers_state.icount_time_shift + 1);
     }
     last_delta = delta;
-    atomic_set__nocheck(&timers_state.qemu_icount_bias,
-                        cur_icount - (timers_state.qemu_icount
-                                      << timers_state.icount_time_shift));
+    atomic_set_i64(&timers_state.qemu_icount_bias,
+                   cur_icount - (timers_state.qemu_icount
+                                 << timers_state.icount_time_shift));
     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                          &timers_state.vm_clock_lock);
 }
@@ -512,8 +513,8 @@ static void icount_warp_rt(void)
             int64_t delta = clock - cur_icount;
             warp_delta = MIN(warp_delta, delta);
         }
-        atomic_set__nocheck(&timers_state.qemu_icount_bias,
-                            timers_state.qemu_icount_bias + warp_delta);
+        atomic_set_i64(&timers_state.qemu_icount_bias,
+                       timers_state.qemu_icount_bias + warp_delta);
     }
     timers_state.vm_clock_warp_start = -1;
     seqlock_write_unlock(&timers_state.vm_clock_seqlock,
@@ -544,8 +545,8 @@ void qtest_clock_warp(int64_t dest)
 
         seqlock_write_lock(&timers_state.vm_clock_seqlock,
                            &timers_state.vm_clock_lock);
-        atomic_set__nocheck(&timers_state.qemu_icount_bias,
-                            timers_state.qemu_icount_bias + warp);
+        atomic_set_i64(&timers_state.qemu_icount_bias,
+                       timers_state.qemu_icount_bias + warp);
         seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                              &timers_state.vm_clock_lock);
 
@@ -616,8 +617,8 @@ void qemu_start_warp_timer(void)
              */
             seqlock_write_lock(&timers_state.vm_clock_seqlock,
                                &timers_state.vm_clock_lock);
-            atomic_set__nocheck(&timers_state.qemu_icount_bias,
-                                timers_state.qemu_icount_bias + deadline);
+            atomic_set_i64(&timers_state.qemu_icount_bias,
+                           timers_state.qemu_icount_bias + deadline);
             seqlock_write_unlock(&timers_state.vm_clock_seqlock,
                                  &timers_state.vm_clock_lock);
             qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 11/12] target/i386/translate: use thread-local storage in !user-mode
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (9 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 10/12] cpus: access .qemu_icount_bias " Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-11 12:57   ` Richard Henderson
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 12/12] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
  2018-09-11 13:00 ` [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Paolo Bonzini
  12 siblings, 1 reply; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

Needed for MTTCG.

Note that in user-mode, code generation is serialized by
mmap_lock, so making these variables per-thread would
just waste TLS space.

Acked-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 target/i386/translate.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/target/i386/translate.c b/target/i386/translate.c
index 1f9d1d9b24..9a6a72e205 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -71,26 +71,34 @@
 
 //#define MACRO_TEST   1
 
+/* we need thread-local storage for mttcg */
+#ifdef CONFIG_USER_ONLY
+#define I386_THREAD
+#else
+#define I386_THREAD __thread
+#endif
+
 /* global register indexes */
-static TCGv cpu_A0;
-static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
+static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
 static TCGv_i32 cpu_cc_op;
 static TCGv cpu_regs[CPU_NB_REGS];
 static TCGv cpu_seg_base[6];
 static TCGv_i64 cpu_bndl[4];
 static TCGv_i64 cpu_bndu[4];
 /* local temps */
-static TCGv cpu_T0, cpu_T1;
+static I386_THREAD TCGv cpu_cc_srcT;
+static I386_THREAD TCGv cpu_A0;
+static I386_THREAD TCGv cpu_T0, cpu_T1;
 /* local register indexes (only used inside old micro ops) */
-static TCGv cpu_tmp0, cpu_tmp4;
-static TCGv_ptr cpu_ptr0, cpu_ptr1;
-static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
-static TCGv_i64 cpu_tmp1_i64;
+static I386_THREAD TCGv cpu_tmp0, cpu_tmp4;
+static I386_THREAD TCGv_ptr cpu_ptr0, cpu_ptr1;
+static I386_THREAD TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
+static I386_THREAD TCGv_i64 cpu_tmp1_i64;
 
 #include "exec/gen-icount.h"
 
 #ifdef TARGET_X86_64
-static int x86_64_hregs;
+static I386_THREAD int x86_64_hregs;
 #endif
 
 typedef struct DisasContext {
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* [Qemu-devel] [PATCH v2 12/12] configure: enable mttcg for i386 and x86_64
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (10 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 11/12] target/i386/translate: use thread-local storage in !user-mode Emilio G. Cota
@ 2018-09-10 23:27 ` Emilio G. Cota
  2018-09-11 12:58   ` Richard Henderson
  2018-09-11 13:00 ` [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Paolo Bonzini
  12 siblings, 1 reply; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-10 23:27 UTC (permalink / raw)
  To: qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Eduardo Habkost, Alex Bennée, Murilo Opsfelder Araujo

Signed-off-by: Emilio G. Cota <cota@braap.org>
---
 configure | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/configure b/configure
index 58862d2ae8..f715252c9f 100755
--- a/configure
+++ b/configure
@@ -7025,12 +7025,14 @@ TARGET_ABI_DIR=""
 
 case "$target_name" in
   i386)
+    mttcg="yes"
     gdb_xml_files="i386-32bit.xml i386-32bit-core.xml i386-32bit-sse.xml"
     target_compiler=$cross_cc_i386
     target_compiler_cflags=$cross_cc_ccflags_i386
   ;;
   x86_64)
     TARGET_BASE_ARCH=i386
+    mttcg="yes"
     gdb_xml_files="i386-64bit.xml i386-64bit-core.xml i386-64bit-sse.xml"
     target_compiler=$cross_cc_x86_64
   ;;
-- 
2.17.1

^ permalink raw reply related	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 01/12] cacheinfo: add i/d cache_linesize_log
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 01/12] cacheinfo: add i/d cache_linesize_log Emilio G. Cota
@ 2018-09-11 12:16   ` Richard Henderson
  2018-09-11 12:57     ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Henderson @ 2018-09-11 12:16 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée, Murilo Opsfelder Araujo

On 09/10/2018 04:27 PM, Emilio G. Cota wrote:
> @@ -173,5 +176,7 @@ static void __attribute__((constructor)) init_cache_info(void)
>      fallback_cache_info(&isize, &dsize);
>  
>      qemu_icache_linesize = isize;
> +    qemu_icache_linesize_log = 31 - clz32(isize);
>      qemu_dcache_linesize = dsize;
> +    qemu_dcache_linesize_log = 31 - clz32(dsize);

Either pow2ceil or realize that linesize is already a power of 2 and use ctz32.


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 02/12] util: add atomic64
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 02/12] util: add atomic64 Emilio G. Cota
@ 2018-09-11 12:43   ` Richard Henderson
  2018-09-11 20:43     ` Emilio G. Cota
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Henderson @ 2018-09-11 12:43 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée, Murilo Opsfelder Araujo

On 09/10/2018 04:27 PM, Emilio G. Cota wrote:
> +#define GEN_READ(name, type)                    \
> +    type name(const type *ptr)                  \
> +    {                                           \
> +        QemuSpin *lock = addr_to_lock(ptr);     \
> +        type ret;                               \
> +                                                \
> +        qemu_spin_lock(lock);                   \
> +        ret = *ptr;                             \
> +        qemu_spin_unlock(lock);                 \
> +        return ret;                             \
> +    }
> +
> +GEN_READ(atomic_read_i64, int64_t)
> +GEN_READ(atomic_read_u64, uint64_t)

Is there really a good reason to have two external
functions instead of having one of them inline and
perform a cast?

Is this any better than using libatomic?


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 06/12] atomic: fix comment s/x64_64/x86_64/
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 06/12] atomic: fix comment s/x64_64/x86_64/ Emilio G. Cota
@ 2018-09-11 12:50   ` Richard Henderson
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2018-09-11 12:50 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée, Murilo Opsfelder Araujo

On 09/10/2018 04:27 PM, Emilio G. Cota wrote:
> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  include/qemu/atomic.h | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 07/12] cpus: initialize timers_state.vm_clock_lock
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 07/12] cpus: initialize timers_state.vm_clock_lock Emilio G. Cota
@ 2018-09-11 12:50   ` Richard Henderson
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2018-09-11 12:50 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée, Murilo Opsfelder Araujo

On 09/10/2018 04:27 PM, Emilio G. Cota wrote:
> We forgot to initialize the spinlock introduced in 94377115b2
> ("cpus: protect TimerState writes with a spinlock", 2018-08-23).
> Fix it.
> 
> Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  cpus.c | 1 +
>  1 file changed, 1 insertion(+)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 08/12] cpus: always call seqlock_write in cpu_update_icount
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 08/12] cpus: always call seqlock_write in cpu_update_icount Emilio G. Cota
@ 2018-09-11 12:53   ` Richard Henderson
  2018-09-11 12:55     ` Paolo Bonzini
  0 siblings, 1 reply; 24+ messages in thread
From: Richard Henderson @ 2018-09-11 12:53 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée, Murilo Opsfelder Araujo

On 09/10/2018 04:27 PM, Emilio G. Cota wrote:
>  
> -#ifndef CONFIG_ATOMIC64
>      seqlock_write_lock(&timers_state.vm_clock_seqlock,
>                         &timers_state.vm_clock_lock);
> -#endif
>      atomic_set__nocheck(&timers_state.qemu_icount,
>                          timers_state.qemu_icount + executed);
> -#ifndef CONFIG_ATOMIC64
>      seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>                           &timers_state.vm_clock_lock);
> -#endif

I don't understand this.  Surely you either want atomic64_set,
or an actual atomic64_add, but no extra lock when CONFIG_ATOMIC64.


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 08/12] cpus: always call seqlock_write in cpu_update_icount
  2018-09-11 12:53   ` Richard Henderson
@ 2018-09-11 12:55     ` Paolo Bonzini
  0 siblings, 0 replies; 24+ messages in thread
From: Paolo Bonzini @ 2018-09-11 12:55 UTC (permalink / raw)
  To: Richard Henderson, Emilio G. Cota, qemu-devel
  Cc: Peter Crosthwaite, Eduardo Habkost, Alex Bennée,
	Murilo Opsfelder Araujo

On 11/09/2018 14:53, Richard Henderson wrote:
>>  
>> -#ifndef CONFIG_ATOMIC64
>>      seqlock_write_lock(&timers_state.vm_clock_seqlock,
>>                         &timers_state.vm_clock_lock);
>> -#endif
>>      atomic_set__nocheck(&timers_state.qemu_icount,
>>                          timers_state.qemu_icount + executed);
>> -#ifndef CONFIG_ATOMIC64
>>      seqlock_write_unlock(&timers_state.vm_clock_seqlock,
>>                           &timers_state.vm_clock_lock);
>> -#endif
> I don't understand this.  Surely you either want atomic64_set,
> or an actual atomic64_add, but no extra lock when CONFIG_ATOMIC64.

Even though writes of qemu_icount can safely race with reads in
qemu_icount_raw, qemu_icount is also read by icount_adjust, which runs
in the I/O thread.  Therefore, writes do need protection of the
vm_clock_lock; for simplicity Emilio's patch protects it with both
seqlock+spinlock, which we already do for hosts that lack 64-bit atomics.

The bug actually predated the introduction of vm_clock_lock;
cpu_update_icount would have needed the BQL before the spinlock was
introduced.

The above could have been a commit message, by the way. :)

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 01/12] cacheinfo: add i/d cache_linesize_log
  2018-09-11 12:16   ` Richard Henderson
@ 2018-09-11 12:57     ` Paolo Bonzini
  0 siblings, 0 replies; 24+ messages in thread
From: Paolo Bonzini @ 2018-09-11 12:57 UTC (permalink / raw)
  To: Richard Henderson, Emilio G. Cota, qemu-devel
  Cc: Peter Crosthwaite, Eduardo Habkost, Alex Bennée,
	Murilo Opsfelder Araujo

On 11/09/2018 14:16, Richard Henderson wrote:
> On 09/10/2018 04:27 PM, Emilio G. Cota wrote:
>> @@ -173,5 +176,7 @@ static void __attribute__((constructor)) init_cache_info(void)
>>      fallback_cache_info(&isize, &dsize);
>>  
>>      qemu_icache_linesize = isize;
>> +    qemu_icache_linesize_log = 31 - clz32(isize);
>>      qemu_dcache_linesize = dsize;
>> +    qemu_dcache_linesize_log = 31 - clz32(dsize);
> 
> Either pow2ceil or realize that linesize is already a power of 2 and use ctz32.

Changed to ctz32.

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 11/12] target/i386/translate: use thread-local storage in !user-mode
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 11/12] target/i386/translate: use thread-local storage in !user-mode Emilio G. Cota
@ 2018-09-11 12:57   ` Richard Henderson
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2018-09-11 12:57 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée, Murilo Opsfelder Araujo

On 09/10/2018 04:27 PM, Emilio G. Cota wrote:
> Needed for MTTCG.
> 
> Note that in user-mode, code generation is serialized by
> mmap_lock, so making these variables per-thread would
> just waste TLS space.
> 
> Acked-by: Alex Bennée <alex.bennee@linaro.org>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---

This is ok for now, but we really really should move these into DisasContext.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 12/12] configure: enable mttcg for i386 and x86_64
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 12/12] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
@ 2018-09-11 12:58   ` Richard Henderson
  0 siblings, 0 replies; 24+ messages in thread
From: Richard Henderson @ 2018-09-11 12:58 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée, Murilo Opsfelder Araujo

On 09/10/2018 04:27 PM, Emilio G. Cota wrote:
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  configure | 2 ++
>  1 file changed, 2 insertions(+)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg
  2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
                   ` (11 preceding siblings ...)
  2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 12/12] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
@ 2018-09-11 13:00 ` Paolo Bonzini
  12 siblings, 0 replies; 24+ messages in thread
From: Paolo Bonzini @ 2018-09-11 13:00 UTC (permalink / raw)
  To: Emilio G. Cota, qemu-devel
  Cc: Peter Crosthwaite, Richard Henderson, Eduardo Habkost,
	Alex Bennée, Murilo Opsfelder Araujo

On 11/09/2018 01:27, Emilio G. Cota wrote:
> v1: https://lists.gnu.org/archive/html/qemu-devel/2018-09/msg00237.html
> 
> Changes since v1:
> 
> - Add Alex's R-b tags
> - Introduce atomic64 to just use a spinlock when doing atomic_set/read
>   on i64's and u64's if !CONFIG_ATOMIC64, just like the kernel does.
>   + Add a benchmark for these types of accesses to tests/
>   + Add i/d cacheline_size_log
> - Convert qsp to atomic64
> - Convert test-rcu-list to atomic64
>   + Add comments about what the patch does, as suggested by Murilo
> - Convert cpus.c to atomic64
> - Always use seqlock_write on cpu_update_icount
> 
> There's one checkpatch error, but it's a false positive.
> 
> You can fetch this series from:
>   https://github.com/cota/qemu/tree/i386-mttcg-v2

Queued 1-10.

Paolo

^ permalink raw reply	[flat|nested] 24+ messages in thread

* Re: [Qemu-devel] [PATCH v2 02/12] util: add atomic64
  2018-09-11 12:43   ` Richard Henderson
@ 2018-09-11 20:43     ` Emilio G. Cota
  0 siblings, 0 replies; 24+ messages in thread
From: Emilio G. Cota @ 2018-09-11 20:43 UTC (permalink / raw)
  To: Richard Henderson
  Cc: qemu-devel, Paolo Bonzini, Peter Crosthwaite, Eduardo Habkost,
	Alex Bennée, Murilo Opsfelder Araujo

On Tue, Sep 11, 2018 at 05:43:38 -0700, Richard Henderson wrote:
> On 09/10/2018 04:27 PM, Emilio G. Cota wrote:
> > +#define GEN_READ(name, type)                    \
> > +    type name(const type *ptr)                  \
> > +    {                                           \
> > +        QemuSpin *lock = addr_to_lock(ptr);     \
> > +        type ret;                               \
> > +                                                \
> > +        qemu_spin_lock(lock);                   \
> > +        ret = *ptr;                             \
> > +        qemu_spin_unlock(lock);                 \
> > +        return ret;                             \
> > +    }
> > +
> > +GEN_READ(atomic_read_i64, int64_t)
> > +GEN_READ(atomic_read_u64, uint64_t)
> 
> Is there really a good reason to have two external
> functions instead of having one of them inline and
> perform a cast?

Not really. I can do a follow-up patch if you want me to.

> Is this any better than using libatomic?

I didn't think of using libatomic. I just checked the source
code and it's quite similar:
- It uses 64 locks instead of 16 ($page_size/$cache_line,
  but these are hard-coded for POSIX as 4096/64, respectively)
- We compute the cacheline size and corresponding padding
  at run-time, which is a little better than the above.
- The locks are implemented as pthread_mutex instead of
  spinlocks. I think spinlocks make more sense here because
  we do not expect huge contention (systems without
  !CONFIG_ATOMIC64 have few cores); for libatomic it makes
  sense to use mutexes because it might be used in many-core
  machines.

So yes, we could have used libatomic. If you feel strongly
about it, I can give it a shot.

Thanks,

		Emilio

^ permalink raw reply	[flat|nested] 24+ messages in thread

end of thread, other threads:[~2018-09-11 20:43 UTC | newest]

Thread overview: 24+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-09-10 23:27 [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Emilio G. Cota
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 01/12] cacheinfo: add i/d cache_linesize_log Emilio G. Cota
2018-09-11 12:16   ` Richard Henderson
2018-09-11 12:57     ` Paolo Bonzini
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 02/12] util: add atomic64 Emilio G. Cota
2018-09-11 12:43   ` Richard Henderson
2018-09-11 20:43     ` Emilio G. Cota
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 03/12] tests: add atomic64-bench Emilio G. Cota
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 04/12] qsp: use atomic64 accessors Emilio G. Cota
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 05/12] test-rcu-list: access n_reclaims and n_nodes_removed with atomic64 Emilio G. Cota
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 06/12] atomic: fix comment s/x64_64/x86_64/ Emilio G. Cota
2018-09-11 12:50   ` Richard Henderson
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 07/12] cpus: initialize timers_state.vm_clock_lock Emilio G. Cota
2018-09-11 12:50   ` Richard Henderson
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 08/12] cpus: always call seqlock_write in cpu_update_icount Emilio G. Cota
2018-09-11 12:53   ` Richard Henderson
2018-09-11 12:55     ` Paolo Bonzini
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 09/12] cpus: access .qemu_icount with atomic64 Emilio G. Cota
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 10/12] cpus: access .qemu_icount_bias " Emilio G. Cota
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 11/12] target/i386/translate: use thread-local storage in !user-mode Emilio G. Cota
2018-09-11 12:57   ` Richard Henderson
2018-09-10 23:27 ` [Qemu-devel] [PATCH v2 12/12] configure: enable mttcg for i386 and x86_64 Emilio G. Cota
2018-09-11 12:58   ` Richard Henderson
2018-09-11 13:00 ` [Qemu-devel] [PATCH v2 00/12] i386 + x86_64 mttcg Paolo Bonzini

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.