All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment
@ 2017-08-29  6:33 Pranith Kumar
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items Pranith Kumar
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Pranith Kumar @ 2017-08-29  6:33 UTC (permalink / raw)
  To: alex.bennee, Peter Maydell, open list:ARM, open list:All patches CC here
  Cc: rth, pbonzini

Update the comment which is not true since MTTCG.

Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 target/arm/translate-a64.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
index 2200e25be0..f42b155d7d 100644
--- a/target/arm/translate-a64.c
+++ b/target/arm/translate-a64.c
@@ -2012,10 +2012,6 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
     }
     tcg_addr = read_cpu_reg_sp(s, rn, 1);
 
-    /* Note that since TCG is single threaded load-acquire/store-release
-     * semantics require no extra if (is_lasr) { ... } handling.
-     */
-
     if (is_excl) {
         if (!is_store) {
             s->is_ldex = true;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items
  2017-08-29  6:33 [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Pranith Kumar
@ 2017-08-29  6:33 ` Pranith Kumar
  2017-09-05 12:28   ` Alex Bennée
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 3/5] mttcg: Add tcg target default memory ordering Pranith Kumar
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Pranith Kumar @ 2017-08-29  6:33 UTC (permalink / raw)
  To: alex.bennee, Paolo Bonzini, Richard Henderson, Sergey Fedorov,
	open list:All patches CC here

Using heaptrack, I found that quite a few of our temporary allocations
are coming from allocating work items. Instead of doing this
continously, we can cache the allocated items and reuse them instead
of freeing them.

Stats from an ARM64 guest (boot+shutdown):

heaptrack stats(before):
        allocations:            1471317
        leaked allocations:     73824
        temporary allocations:  651293

heaptrack stats(after):
        allocations:            1143130
        leaked allocations:     73693
        temporary allocations:  487342

The improvement in speedup is minor and within error margins, however I think the
patch is still worth. We can also explore atomics instead of taking a lock for
the work item pool.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 cpus-common.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 60 insertions(+), 15 deletions(-)

diff --git a/cpus-common.c b/cpus-common.c
index 59f751ecf9..ccf5f50e4e 100644
--- a/cpus-common.c
+++ b/cpus-common.c
@@ -24,6 +24,7 @@
 #include "sysemu/cpus.h"
 
 static QemuMutex qemu_cpu_list_lock;
+static QemuMutex qemu_wi_pool_lock;
 static QemuCond exclusive_cond;
 static QemuCond exclusive_resume;
 static QemuCond qemu_work_cond;
@@ -33,6 +34,49 @@ static QemuCond qemu_work_cond;
  */
 static int pending_cpus;
 
+typedef struct qemu_work_item {
+    struct qemu_work_item *next;
+    run_on_cpu_func func;
+    run_on_cpu_data data;
+    bool free, exclusive, done;
+} qemu_work_item;
+
+typedef struct qemu_wi_pool {
+    qemu_work_item *head;
+    int num_items;
+} qemu_wi_pool;
+
+qemu_wi_pool *wi_free_pool;
+
+static void qemu_init_workitem_pool(void)
+{
+    wi_free_pool = g_malloc0(sizeof(qemu_wi_pool));
+}
+
+static void qemu_wi_pool_insert(qemu_work_item *item)
+{
+    qemu_mutex_lock(&qemu_wi_pool_lock);
+    qemu_work_item *curr = atomic_read(&wi_free_pool->head);
+    item->next = curr;
+    wi_free_pool->head = item;
+    qemu_mutex_unlock(&qemu_wi_pool_lock);
+}
+
+static qemu_work_item *qemu_wi_pool_remove(void)
+{
+    qemu_mutex_lock(&qemu_wi_pool_lock);
+    qemu_work_item *curr = atomic_read(&wi_free_pool->head);
+    if (curr == NULL) {
+        goto out;
+    }
+    wi_free_pool->head = curr->next;
+    curr->next = NULL;
+
+ out:
+    qemu_mutex_unlock(&qemu_wi_pool_lock);
+    return curr;
+}
+
 void qemu_init_cpu_list(void)
 {
     /* This is needed because qemu_init_cpu_list is also called by the
@@ -43,6 +87,9 @@ void qemu_init_cpu_list(void)
     qemu_cond_init(&exclusive_cond);
     qemu_cond_init(&exclusive_resume);
     qemu_cond_init(&qemu_work_cond);
+
+    qemu_init_workitem_pool();
+    qemu_mutex_init(&qemu_wi_pool_lock);
 }
 
 void cpu_list_lock(void)
@@ -106,14 +153,7 @@ void cpu_list_remove(CPUState *cpu)
     qemu_mutex_unlock(&qemu_cpu_list_lock);
 }
 
-struct qemu_work_item {
-    struct qemu_work_item *next;
-    run_on_cpu_func func;
-    run_on_cpu_data data;
-    bool free, exclusive, done;
-};
-
-static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
+static void queue_work_on_cpu(CPUState *cpu, qemu_work_item *wi)
 {
     qemu_mutex_lock(&cpu->work_mutex);
     if (cpu->queued_work_first == NULL) {
@@ -132,7 +172,7 @@ static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
 void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
                    QemuMutex *mutex)
 {
-    struct qemu_work_item wi;
+    qemu_work_item wi;
 
     if (qemu_cpu_is_self(cpu)) {
         func(cpu, data);
@@ -156,9 +196,11 @@ void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
 
 void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
 {
-    struct qemu_work_item *wi;
+    qemu_work_item *wi = qemu_wi_pool_remove();
 
-    wi = g_malloc0(sizeof(struct qemu_work_item));
+    if (!wi) {
+        wi = g_malloc0(sizeof(qemu_work_item));
+    }
     wi->func = func;
     wi->data = data;
     wi->free = true;
@@ -299,9 +341,11 @@ void cpu_exec_end(CPUState *cpu)
 void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
                            run_on_cpu_data data)
 {
-    struct qemu_work_item *wi;
+    qemu_work_item *wi = qemu_wi_pool_remove();
 
-    wi = g_malloc0(sizeof(struct qemu_work_item));
+    if (!wi) {
+        wi = g_malloc0(sizeof(qemu_work_item));
+    }
     wi->func = func;
     wi->data = data;
     wi->free = true;
@@ -312,7 +356,7 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
 
 void process_queued_cpu_work(CPUState *cpu)
 {
-    struct qemu_work_item *wi;
+    qemu_work_item *wi;
 
     if (cpu->queued_work_first == NULL) {
         return;
@@ -343,7 +387,8 @@ void process_queued_cpu_work(CPUState *cpu)
         }
         qemu_mutex_lock(&cpu->work_mutex);
         if (wi->free) {
-            g_free(wi);
+            memset(wi, 0, sizeof(qemu_work_item));
+            qemu_wi_pool_insert(wi);
         } else {
             atomic_mb_set(&wi->done, true);
         }
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [RFC v3 PATCH 3/5] mttcg: Add tcg target default memory ordering
  2017-08-29  6:33 [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Pranith Kumar
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items Pranith Kumar
@ 2017-08-29  6:33 ` Pranith Kumar
  2017-08-29 14:51   ` Richard Henderson
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics Pranith Kumar
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 14+ messages in thread
From: Pranith Kumar @ 2017-08-29  6:33 UTC (permalink / raw)
  To: alex.bennee, Claudio Fontana, Richard Henderson,
	Andrzej Zaborowski, Aurelien Jarno, Alexander Graf,
	open list:AArch64 target, open list:All patches CC here
  Cc: pbonzini

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 tcg/aarch64/tcg-target.h | 2 ++
 tcg/arm/tcg-target.h     | 2 ++
 tcg/ia64/tcg-target.h    | 2 ++
 tcg/mips/tcg-target.h    | 2 ++
 tcg/ppc/tcg-target.h     | 2 ++
 tcg/s390/tcg-target.h    | 2 ++
 tcg/sparc/tcg-target.h   | 2 ++
 7 files changed, 14 insertions(+)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 55a46ac825..b41a248bee 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -117,4 +117,6 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
     __builtin___clear_cache((char *)start, (char *)stop);
 }
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 #endif /* AARCH64_TCG_TARGET_H */
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 5ef1086710..a38be15a39 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -134,4 +134,6 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
     __builtin___clear_cache((char *) start, (char *) stop);
 }
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 #endif
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 901bb7575d..8f475fe742 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -195,4 +195,6 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
     asm volatile (";;sync.i;;srlz.i;;");
 }
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 #endif
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index d75cb63ed3..e9558d15bc 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -206,4 +206,6 @@ static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
     cacheflush ((void *)start, stop-start, ICACHE);
 }
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 #endif
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 5f4a40a5b4..5a092b038a 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -125,4 +125,6 @@ extern bool have_isa_3_00;
 
 void flush_icache_range(uintptr_t start, uintptr_t stop);
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 #endif
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 957f0c0afe..dc0e59193c 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -133,6 +133,8 @@ extern uint64_t s390_facilities;
 
 #define TCG_TARGET_EXTEND_ARGS 1
 
+#define TCG_TARGET_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD)
+
 enum {
     TCG_AREG0 = TCG_REG_R10,
 };
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 854a0afd70..4515c9ab48 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -162,6 +162,8 @@ extern bool use_vis3_instructions;
 
 #define TCG_AREG0 TCG_REG_I0
 
+#define TCG_TARGET_DEFAULT_MO (0)
+
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
     uintptr_t p;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics
  2017-08-29  6:33 [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Pranith Kumar
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items Pranith Kumar
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 3/5] mttcg: Add tcg target default memory ordering Pranith Kumar
@ 2017-08-29  6:33 ` Pranith Kumar
  2017-08-29 14:53   ` Richard Henderson
  2017-09-02  1:44   ` Emilio G. Cota
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches Pranith Kumar
  2017-09-05 12:02 ` [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Alex Bennée
  4 siblings, 2 replies; 14+ messages in thread
From: Pranith Kumar @ 2017-08-29  6:33 UTC (permalink / raw)
  To: alex.bennee, Richard Henderson, open list:All patches CC here; +Cc: pbonzini

Currently, we cannot use mttcg for running strong memory model guests
on weak memory model hosts due to missing ordering semantics.

We implicitly generate fence instructions for stronger guests if an
ordering mismatch is detected. We generate fences only for the orders
for which fence instructions are necessary, for example a fence is not
necessary between a store and a subsequent load on x86 since its
absence in the guest binary tells that ordering need not be
ensured. Also note that if we find multiple subsequent fence
instructions in the generated IR, we combine them in the TCG
optimization pass.

This patch allows us to boot an x86 guest on ARM64 hosts using mttcg.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 tcg/tcg-op.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 87f673ef49..688d91755b 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -28,6 +28,7 @@
 #include "exec/exec-all.h"
 #include "tcg.h"
 #include "tcg-op.h"
+#include "tcg-mo.h"
 #include "trace-tcg.h"
 #include "trace/mem.h"
 
@@ -2662,8 +2663,20 @@ static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
 #endif
 }
 
+static void tcg_gen_req_mo(TCGBar type)
+{
+#ifdef TCG_GUEST_DEFAULT_MO
+    type &= TCG_GUEST_DEFAULT_MO;
+#endif
+    type &= ~TCG_TARGET_DEFAULT_MO;
+    if (type) {
+        tcg_gen_mb(type | TCG_BAR_SC);
+    }
+}
+
 void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     memop = tcg_canonicalize_memop(memop, 0, 0);
     trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env,
                                addr, trace_mem_get_info(memop, 0));
@@ -2672,6 +2685,7 @@ void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 
 void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     memop = tcg_canonicalize_memop(memop, 0, 1);
     trace_guest_mem_before_tcg(tcg_ctx.cpu, tcg_ctx.tcg_env,
                                addr, trace_mem_get_info(memop, 1));
@@ -2680,6 +2694,7 @@ void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 
 void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
         if (memop & MO_SIGN) {
@@ -2698,6 +2713,7 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 
 void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
 {
+    tcg_gen_req_mo(TCG_MO_LD_ST | TCG_MO_ST_ST);
     if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
         tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
         return;
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches
  2017-08-29  6:33 [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Pranith Kumar
                   ` (2 preceding siblings ...)
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics Pranith Kumar
@ 2017-08-29  6:33 ` Pranith Kumar
  2017-08-29 15:01   ` Richard Henderson
  2017-08-29 15:03   ` Richard Henderson
  2017-09-05 12:02 ` [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Alex Bennée
  4 siblings, 2 replies; 14+ messages in thread
From: Pranith Kumar @ 2017-08-29  6:33 UTC (permalink / raw)
  To: alex.bennee, Paolo Bonzini, Peter Crosthwaite, Richard Henderson,
	Claudio Fontana, Andrzej Zaborowski, Aurelien Jarno,
	Alexander Graf, Stefan Weil, open list:Overall,
	open list:AArch64 target

This patch increases the number of entries cached in the TLB. I went
over a few architectures to see if increasing it is problematic. Only
armv6 seems to have a limitation that only 8 bits can be used for
indexing these entries. For other architectures, the number of TLB
entries is increased to a 4K-sized cache. The patch also doubles the
number of victim TLB entries.

Some statistics collected from a build benchmark for various cache
sizes is listed below:

| TLB bits\vTLB entires |             8 |            16  |            32 |
|                     8 | 952.94(+0.0%) | 929.99(+2.4%)  | 919.02(+3.6%) |
|                    10 | 898.92(+5.6%) | 886.13(+7.0%)  | 887.03(+6.9%) |
|                    12 | 878.56(+7.8%) | 873.53(+8.3%)* | 875.34(+8.1%) |

The best combination for this workload came out to be 12 bits for the
TLB and a 16 entry vTLB cache.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
 include/exec/cpu-defs.h  | 13 ++++---------
 tcg/aarch64/tcg-target.h |  1 +
 tcg/arm/tcg-target.h     |  1 +
 tcg/i386/tcg-target.h    |  6 ++++++
 tcg/ia64/tcg-target.h    |  1 +
 tcg/mips/tcg-target.h    |  6 ++++++
 tcg/ppc/tcg-target.h     |  1 +
 tcg/s390/tcg-target.h    |  1 +
 tcg/sparc/tcg-target.h   |  1 +
 tcg/tci/tcg-target.h     |  2 ++
 10 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index bc8e7f848d..33b0ac6fe0 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -57,8 +57,8 @@ typedef uint64_t target_ulong;
 #endif
 
 #if !defined(CONFIG_USER_ONLY) && defined(CONFIG_TCG)
-/* use a fully associative victim tlb of 8 entries */
-#define CPU_VTLB_SIZE 8
+/* use a fully associative victim tlb of 16 entries */
+#define CPU_VTLB_SIZE 16
 
 #if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
 #define CPU_TLB_ENTRY_BITS 4
@@ -87,14 +87,9 @@ typedef uint64_t target_ulong;
  * could be something like 0xC000 (the offset of the last TLB table) plus
  * 0x18 (the offset of the addend field in each TLB entry) plus the offset
  * of tlb_table inside env (which is non-trivial but not huge).
+ * TODO: rewrite this comment
  */
-#define CPU_TLB_BITS                                             \
-    MIN(8,                                                       \
-        TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS -  \
-        (NB_MMU_MODES <= 1 ? 0 :                                 \
-         NB_MMU_MODES <= 2 ? 1 :                                 \
-         NB_MMU_MODES <= 4 ? 2 :                                 \
-         NB_MMU_MODES <= 8 ? 3 : 4))
+#define CPU_TLB_BITS MIN(12, TCG_TARGET_TLB_MAX_INDEX_BITS)
 
 #define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
 
diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index b41a248bee..9f4558cd83 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -15,6 +15,7 @@
 
 #define TCG_TARGET_INSN_UNIT_SIZE  4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
 #undef TCG_TARGET_STACK_GROWSUP
 
 typedef enum {
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index a38be15a39..ebe27991f3 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -60,6 +60,7 @@ extern int arm_arch;
 #undef TCG_TARGET_STACK_GROWSUP
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 8
 
 typedef enum {
     TCG_REG_R0 = 0,
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 73a15f7e80..456d57115c 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -162,6 +162,12 @@ extern bool have_popcnt;
 # define TCG_AREG0 TCG_REG_EBP
 #endif
 
+#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 28
+#else
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 27
+#endif
+
 static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
 {
 }
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 8f475fe742..35878e20c7 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -28,6 +28,7 @@
 
 #define TCG_TARGET_INSN_UNIT_SIZE 16
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 21
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
 
 typedef struct {
     uint64_t lo __attribute__((aligned(16)));
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index e9558d15bc..0c7c5cf64c 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -39,6 +39,12 @@
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 #define TCG_TARGET_NB_REGS 32
 
+#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 12
+#else
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 11
+#endif
+
 typedef enum {
     TCG_REG_ZERO = 0,
     TCG_REG_AT,
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 5a092b038a..82e10c9471 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -34,6 +34,7 @@
 #define TCG_TARGET_NB_REGS 32
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
 
 typedef enum {
     TCG_REG_R0,  TCG_REG_R1,  TCG_REG_R2,  TCG_REG_R3,
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index dc0e59193c..57f0e22532 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -27,6 +27,7 @@
 
 #define TCG_TARGET_INSN_UNIT_SIZE 2
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
 
 typedef enum TCGReg {
     TCG_REG_R0 = 0,
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 4515c9ab48..378d218923 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -29,6 +29,7 @@
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 12
 #define TCG_TARGET_NB_REGS 32
 
 typedef enum {
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 06963288dc..456a4fc4e1 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -40,9 +40,11 @@
 #ifndef TCG_TARGET_H
 #define TCG_TARGET_H
 
+
 #define TCG_TARGET_INTERPRETER 1
 #define TCG_TARGET_INSN_UNIT_SIZE 1
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
+#define TCG_TARGET_TLB_MAX_INDEX_BITS 32
 
 #if UINTPTR_MAX == UINT32_MAX
 # define TCG_TARGET_REG_BITS 32
-- 
2.13.0

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC v3 PATCH 3/5] mttcg: Add tcg target default memory ordering
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 3/5] mttcg: Add tcg target default memory ordering Pranith Kumar
@ 2017-08-29 14:51   ` Richard Henderson
  0 siblings, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2017-08-29 14:51 UTC (permalink / raw)
  To: Pranith Kumar, alex.bennee, Claudio Fontana, Andrzej Zaborowski,
	Aurelien Jarno, Alexander Graf, open list:AArch64 target,
	open list:All patches CC here
  Cc: pbonzini

On 08/28/2017 11:33 PM, Pranith Kumar wrote:
> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
> ---
>  tcg/aarch64/tcg-target.h | 2 ++
>  tcg/arm/tcg-target.h     | 2 ++
>  tcg/ia64/tcg-target.h    | 2 ++
>  tcg/mips/tcg-target.h    | 2 ++
>  tcg/ppc/tcg-target.h     | 2 ++
>  tcg/s390/tcg-target.h    | 2 ++
>  tcg/sparc/tcg-target.h   | 2 ++
>  7 files changed, 14 insertions(+)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics Pranith Kumar
@ 2017-08-29 14:53   ` Richard Henderson
  2017-09-02  1:44   ` Emilio G. Cota
  1 sibling, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2017-08-29 14:53 UTC (permalink / raw)
  To: Pranith Kumar, alex.bennee, open list:All patches CC here; +Cc: pbonzini

On 08/28/2017 11:33 PM, Pranith Kumar wrote:
> Currently, we cannot use mttcg for running strong memory model guests
> on weak memory model hosts due to missing ordering semantics.
> 
> We implicitly generate fence instructions for stronger guests if an
> ordering mismatch is detected. We generate fences only for the orders
> for which fence instructions are necessary, for example a fence is not
> necessary between a store and a subsequent load on x86 since its
> absence in the guest binary tells that ordering need not be
> ensured. Also note that if we find multiple subsequent fence
> instructions in the generated IR, we combine them in the TCG
> optimization pass.
> 
> This patch allows us to boot an x86 guest on ARM64 hosts using mttcg.
> 
> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
> ---
>  tcg/tcg-op.c | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>


r~

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches Pranith Kumar
@ 2017-08-29 15:01   ` Richard Henderson
  2017-08-29 16:23     ` Pranith Kumar
  2017-08-29 15:03   ` Richard Henderson
  1 sibling, 1 reply; 14+ messages in thread
From: Richard Henderson @ 2017-08-29 15:01 UTC (permalink / raw)
  To: Pranith Kumar, alex.bennee, Paolo Bonzini, Peter Crosthwaite,
	Claudio Fontana, Andrzej Zaborowski, Aurelien Jarno,
	Alexander Graf, Stefan Weil, open list:Overall,
	open list:AArch64 target

On 08/28/2017 11:33 PM, Pranith Kumar wrote:
> + * TODO: rewrite this comment
>   */
> -#define CPU_TLB_BITS                                             \
> -    MIN(8,                                                       \
> -        TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS -  \
> -        (NB_MMU_MODES <= 1 ? 0 :                                 \
> -         NB_MMU_MODES <= 2 ? 1 :                                 \
> -         NB_MMU_MODES <= 4 ? 2 :                                 \
> -         NB_MMU_MODES <= 8 ? 3 : 4))
> +#define CPU_TLB_BITS MIN(12, TCG_TARGET_TLB_MAX_INDEX_BITS)
>  

Ah, no.  This will cause several builds to fail.
You still need to restrict the *total* size of
the TLB to TCG_TARGET_TLB_DISPLACEMENT_BITS.

(That's not a 100% accurate statement, but is close.
See the QEMU_BUILD_BUG_ON in tcg/*/*.c for specifics.)

The upshot is that if a target has 2 MMU modes,
we can allow them to be bigger.  But if it has 8,
we have to make them smaller.

I was expecting you to write

  MIN(MIN(12, TCG_TARGET_TLB_MAX_INDEX_BITS)
      TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS -
      ...)

r~

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches Pranith Kumar
  2017-08-29 15:01   ` Richard Henderson
@ 2017-08-29 15:03   ` Richard Henderson
  1 sibling, 0 replies; 14+ messages in thread
From: Richard Henderson @ 2017-08-29 15:03 UTC (permalink / raw)
  To: Pranith Kumar, alex.bennee, Paolo Bonzini, Peter Crosthwaite,
	Claudio Fontana, Andrzej Zaborowski, Aurelien Jarno,
	Alexander Graf, Stefan Weil, open list:Overall,
	open list:AArch64 target

On 08/28/2017 11:33 PM, Pranith Kumar wrote:
> +#if HOST_LONG_BITS == 32 && TARGET_LONG_BITS == 32
> +#define TCG_TARGET_TLB_MAX_INDEX_BITS 28
> +#else
> +#define TCG_TARGET_TLB_MAX_INDEX_BITS 27
> +#endif
> +

For the record, did it not work to actually write (32 - CPU_TLB_BITS)?  I'm not
fond of repeating the conditions that go into computing CPU_TLB_BITS.


r~

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches
  2017-08-29 15:01   ` Richard Henderson
@ 2017-08-29 16:23     ` Pranith Kumar
  0 siblings, 0 replies; 14+ messages in thread
From: Pranith Kumar @ 2017-08-29 16:23 UTC (permalink / raw)
  To: Richard Henderson
  Cc: Alex Bennée, Paolo Bonzini, Peter Crosthwaite,
	Claudio Fontana, Andrzej Zaborowski, Aurelien Jarno,
	Alexander Graf, Stefan Weil, open list:Overall,
	open list:AArch64 target

On Tue, Aug 29, 2017 at 11:01 AM, Richard Henderson
<richard.henderson@linaro.org> wrote:
> On 08/28/2017 11:33 PM, Pranith Kumar wrote:
>> + * TODO: rewrite this comment
>>   */
>> -#define CPU_TLB_BITS                                             \
>> -    MIN(8,                                                       \
>> -        TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS -  \
>> -        (NB_MMU_MODES <= 1 ? 0 :                                 \
>> -         NB_MMU_MODES <= 2 ? 1 :                                 \
>> -         NB_MMU_MODES <= 4 ? 2 :                                 \
>> -         NB_MMU_MODES <= 8 ? 3 : 4))
>> +#define CPU_TLB_BITS MIN(12, TCG_TARGET_TLB_MAX_INDEX_BITS)
>>
>
> Ah, no.  This will cause several builds to fail.
> You still need to restrict the *total* size of
> the TLB to TCG_TARGET_TLB_DISPLACEMENT_BITS.
>
> (That's not a 100% accurate statement, but is close.
> See the QEMU_BUILD_BUG_ON in tcg/*/*.c for specifics.)
>
> The upshot is that if a target has 2 MMU modes,
> we can allow them to be bigger.  But if it has 8,
> we have to make them smaller.
>
> I was expecting you to write
>
>   MIN(MIN(12, TCG_TARGET_TLB_MAX_INDEX_BITS)
>       TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS -
>       ...)

I see what you mean. I will fix the blunder and send an updated patch.

Thanks!
-- 
Pranith

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics Pranith Kumar
  2017-08-29 14:53   ` Richard Henderson
@ 2017-09-02  1:44   ` Emilio G. Cota
  1 sibling, 0 replies; 14+ messages in thread
From: Emilio G. Cota @ 2017-09-02  1:44 UTC (permalink / raw)
  To: Pranith Kumar
  Cc: alex.bennee, Richard Henderson, open list:All patches CC here, pbonzini

On Tue, Aug 29, 2017 at 02:33:12 -0400, Pranith Kumar wrote:
> Currently, we cannot use mttcg for running strong memory model guests
> on weak memory model hosts due to missing ordering semantics.
> 
> We implicitly generate fence instructions for stronger guests if an

This confused me. By "We implicitly" are we still talking about
the current state (as per the "currently" above?). If not, I'd
rephrase as:

"We cannot use [...].

To fix it, generate fences [...]"

Also, I think you meant s/stronger/weaker/ in the last sentence.

> ordering mismatch is detected. We generate fences only for the orders
> for which fence instructions are necessary, for example a fence is not
> necessary between a store and a subsequent load on x86 since its
> absence in the guest binary tells that ordering need not be
> ensured. Also note that if we find multiple subsequent fence
> instructions in the generated IR, we combine them in the TCG
> optimization pass.

A before/after example of -d out_asm would be great to have here.
> 
> This patch allows us to boot an x86 guest on ARM64 hosts using mttcg.

A test with a simple program that *cannot* work without this patch
would be even better.

Thanks,

		Emilio

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment
  2017-08-29  6:33 [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Pranith Kumar
                   ` (3 preceding siblings ...)
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches Pranith Kumar
@ 2017-09-05 12:02 ` Alex Bennée
  2017-09-06  0:35   ` Pranith Kumar
  4 siblings, 1 reply; 14+ messages in thread
From: Alex Bennée @ 2017-09-05 12:02 UTC (permalink / raw)
  To: Pranith Kumar; +Cc: Peter Maydell, open list:ARM, qemu-devel, rth, pbonzini


Pranith Kumar <bobby.prani@gmail.com> writes:

> Update the comment which is not true since MTTCG.

What happened to the cover letter? We seem to have a mix of patches but
no summary of the overall outcome.

>
> Reviewed-by: Richard Henderson <rth@twiddle.net>
> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
> ---
>  target/arm/translate-a64.c | 4 ----
>  1 file changed, 4 deletions(-)
>
> diff --git a/target/arm/translate-a64.c b/target/arm/translate-a64.c
> index 2200e25be0..f42b155d7d 100644
> --- a/target/arm/translate-a64.c
> +++ b/target/arm/translate-a64.c
> @@ -2012,10 +2012,6 @@ static void disas_ldst_excl(DisasContext *s, uint32_t insn)
>      }
>      tcg_addr = read_cpu_reg_sp(s, rn, 1);
>
> -    /* Note that since TCG is single threaded load-acquire/store-release
> -     * semantics require no extra if (is_lasr) { ... } handling.
> -     */
> -
>      if (is_excl) {
>          if (!is_store) {
>              s->is_ldex = true;


--
Alex Bennée

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items
  2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items Pranith Kumar
@ 2017-09-05 12:28   ` Alex Bennée
  0 siblings, 0 replies; 14+ messages in thread
From: Alex Bennée @ 2017-09-05 12:28 UTC (permalink / raw)
  To: Pranith Kumar
  Cc: Paolo Bonzini, Richard Henderson, Sergey Fedorov, qemu-devel


Pranith Kumar <bobby.prani@gmail.com> writes:

> Using heaptrack, I found that quite a few of our temporary allocations
> are coming from allocating work items. Instead of doing this
> continously, we can cache the allocated items and reuse them instead
> of freeing them.
>
> Stats from an ARM64 guest (boot+shutdown):
>
> heaptrack stats(before):
>         allocations:            1471317
>         leaked allocations:     73824
>         temporary allocations:  651293
>
> heaptrack stats(after):
>         allocations:            1143130
>         leaked allocations:     73693
>         temporary allocations:  487342
>
> The improvement in speedup is minor and within error margins, however I think the
> patch is still worth. We can also explore atomics instead of taking a lock for
> the work item pool.

When we where doing the original MTTCG work I looked at using GArray for
the work queue, see:

  http://lists.gnu.org/archive/html/qemu-devel/2016-08/msg00367.html

specifically:

  Subject: [PATCH v5 13/13] cpu-exec: replace cpu->queued_work with GArray
  Date: Tue,  2 Aug 2016 18:27:44 +0100
  Message-Id: <1470158864-17651-14-git-send-email-alex.bennee@linaro.org>

which I personally think might yield better results than messing around
with custom allocators and GSlice and the like. You still get the
dynamic sizing of a malloc based array but for operations like insertion
and iterating through the work queue should be cache friendly.

Once the array has (transparently) reached a reasonable size to service
all allocations in the usual servicing period the same memory can be
used over and over again ;-)

My fondness for arrays is informed by comments by Bjarne Stroustrup:

  https://www.youtube.com/watch?v=YQs6IC-vgmo

Obviously this patch would need to be re-worked given how much the code
has changes since it was merged.

>
> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
> ---
>  cpus-common.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++------------
>  1 file changed, 60 insertions(+), 15 deletions(-)
>
> diff --git a/cpus-common.c b/cpus-common.c
> index 59f751ecf9..ccf5f50e4e 100644
> --- a/cpus-common.c
> +++ b/cpus-common.c
> @@ -24,6 +24,7 @@
>  #include "sysemu/cpus.h"
>
>  static QemuMutex qemu_cpu_list_lock;
> +static QemuMutex qemu_wi_pool_lock;
>  static QemuCond exclusive_cond;
>  static QemuCond exclusive_resume;
>  static QemuCond qemu_work_cond;
> @@ -33,6 +34,49 @@ static QemuCond qemu_work_cond;
>   */
>  static int pending_cpus;
>
> +typedef struct qemu_work_item {
> +    struct qemu_work_item *next;
> +    run_on_cpu_func func;
> +    run_on_cpu_data data;
> +    bool free, exclusive, done;
> +} qemu_work_item;
> +
> +typedef struct qemu_wi_pool {
> +    qemu_work_item *head;
> +    int num_items;
> +} qemu_wi_pool;
> +
> +qemu_wi_pool *wi_free_pool;
> +
> +static void qemu_init_workitem_pool(void)
> +{
> +    wi_free_pool = g_malloc0(sizeof(qemu_wi_pool));
> +}
> +
> +static void qemu_wi_pool_insert(qemu_work_item *item)
> +{
> +    qemu_mutex_lock(&qemu_wi_pool_lock);
> +    qemu_work_item *curr = atomic_read(&wi_free_pool->head);
> +    item->next = curr;
> +    wi_free_pool->head = item;
> +    qemu_mutex_unlock(&qemu_wi_pool_lock);
> +}
> +
> +static qemu_work_item *qemu_wi_pool_remove(void)
> +{
> +    qemu_mutex_lock(&qemu_wi_pool_lock);
> +    qemu_work_item *curr = atomic_read(&wi_free_pool->head);
> +    if (curr == NULL) {
> +        goto out;
> +    }
> +    wi_free_pool->head = curr->next;
> +    curr->next = NULL;
> +
> + out:
> +    qemu_mutex_unlock(&qemu_wi_pool_lock);
> +    return curr;
> +}
> +
>  void qemu_init_cpu_list(void)
>  {
>      /* This is needed because qemu_init_cpu_list is also called by the
> @@ -43,6 +87,9 @@ void qemu_init_cpu_list(void)
>      qemu_cond_init(&exclusive_cond);
>      qemu_cond_init(&exclusive_resume);
>      qemu_cond_init(&qemu_work_cond);
> +
> +    qemu_init_workitem_pool();
> +    qemu_mutex_init(&qemu_wi_pool_lock);
>  }
>
>  void cpu_list_lock(void)
> @@ -106,14 +153,7 @@ void cpu_list_remove(CPUState *cpu)
>      qemu_mutex_unlock(&qemu_cpu_list_lock);
>  }
>
> -struct qemu_work_item {
> -    struct qemu_work_item *next;
> -    run_on_cpu_func func;
> -    run_on_cpu_data data;
> -    bool free, exclusive, done;
> -};
> -
> -static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
> +static void queue_work_on_cpu(CPUState *cpu, qemu_work_item *wi)
>  {
>      qemu_mutex_lock(&cpu->work_mutex);
>      if (cpu->queued_work_first == NULL) {
> @@ -132,7 +172,7 @@ static void queue_work_on_cpu(CPUState *cpu, struct qemu_work_item *wi)
>  void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
>                     QemuMutex *mutex)
>  {
> -    struct qemu_work_item wi;
> +    qemu_work_item wi;
>
>      if (qemu_cpu_is_self(cpu)) {
>          func(cpu, data);
> @@ -156,9 +196,11 @@ void do_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data,
>
>  void async_run_on_cpu(CPUState *cpu, run_on_cpu_func func, run_on_cpu_data data)
>  {
> -    struct qemu_work_item *wi;
> +    qemu_work_item *wi = qemu_wi_pool_remove();
>
> -    wi = g_malloc0(sizeof(struct qemu_work_item));
> +    if (!wi) {
> +        wi = g_malloc0(sizeof(qemu_work_item));
> +    }
>      wi->func = func;
>      wi->data = data;
>      wi->free = true;
> @@ -299,9 +341,11 @@ void cpu_exec_end(CPUState *cpu)
>  void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
>                             run_on_cpu_data data)
>  {
> -    struct qemu_work_item *wi;
> +    qemu_work_item *wi = qemu_wi_pool_remove();
>
> -    wi = g_malloc0(sizeof(struct qemu_work_item));
> +    if (!wi) {
> +        wi = g_malloc0(sizeof(qemu_work_item));
> +    }
>      wi->func = func;
>      wi->data = data;
>      wi->free = true;
> @@ -312,7 +356,7 @@ void async_safe_run_on_cpu(CPUState *cpu, run_on_cpu_func func,
>
>  void process_queued_cpu_work(CPUState *cpu)
>  {
> -    struct qemu_work_item *wi;
> +    qemu_work_item *wi;
>
>      if (cpu->queued_work_first == NULL) {
>          return;
> @@ -343,7 +387,8 @@ void process_queued_cpu_work(CPUState *cpu)
>          }
>          qemu_mutex_lock(&cpu->work_mutex);
>          if (wi->free) {
> -            g_free(wi);
> +            memset(wi, 0, sizeof(qemu_work_item));
> +            qemu_wi_pool_insert(wi);
>          } else {
>              atomic_mb_set(&wi->done, true);
>          }


--
Alex Bennée

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment
  2017-09-05 12:02 ` [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Alex Bennée
@ 2017-09-06  0:35   ` Pranith Kumar
  0 siblings, 0 replies; 14+ messages in thread
From: Pranith Kumar @ 2017-09-06  0:35 UTC (permalink / raw)
  To: Alex Bennée
  Cc: Peter Maydell, open list:ARM, qemu-devel, Richard Henderson,
	Paolo Bonzini

Hi Alex,

On Tue, Sep 5, 2017 at 8:02 AM, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Pranith Kumar <bobby.prani@gmail.com> writes:
>
>> Update the comment which is not true since MTTCG.
>
> What happened to the cover letter? We seem to have a mix of patches but
> no summary of the overall outcome.
>

These are a bunch of unrelated patches, so there is no theme. I will
include a cover letter saying so from now on.

Thanks,
-- 
Pranith

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2017-09-06  0:35 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-08-29  6:33 [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Pranith Kumar
2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 2/5] cpus-common: Cache allocated work items Pranith Kumar
2017-09-05 12:28   ` Alex Bennée
2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 3/5] mttcg: Add tcg target default memory ordering Pranith Kumar
2017-08-29 14:51   ` Richard Henderson
2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 4/5] mttcg: Implement implicit ordering semantics Pranith Kumar
2017-08-29 14:53   ` Richard Henderson
2017-09-02  1:44   ` Emilio G. Cota
2017-08-29  6:33 ` [Qemu-devel] [RFC v3 PATCH 5/5] tcg/softmmu: Increase size of TLB caches Pranith Kumar
2017-08-29 15:01   ` Richard Henderson
2017-08-29 16:23     ` Pranith Kumar
2017-08-29 15:03   ` Richard Henderson
2017-09-05 12:02 ` [Qemu-devel] [PATCH 1/5] target/arm: Remove stale comment Alex Bennée
2017-09-06  0:35   ` Pranith Kumar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.