From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:55198) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bjS3Z-0000EI-H5 for qemu-devel@nongnu.org; Mon, 12 Sep 2016 10:18:02 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1bjS3T-0006sL-Ct for qemu-devel@nongnu.org; Mon, 12 Sep 2016 10:17:56 -0400 Received: from mail-wm0-f45.google.com ([74.125.82.45]:37747) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1bjS3T-0006s6-1T for qemu-devel@nongnu.org; Mon, 12 Sep 2016 10:17:51 -0400 Received: by mail-wm0-f45.google.com with SMTP id c131so60369006wmh.0 for ; Mon, 12 Sep 2016 07:17:50 -0700 (PDT) References: <1472935202-3342-1-git-send-email-rth@twiddle.net> <1472935202-3342-7-git-send-email-rth@twiddle.net> From: Alex =?utf-8?Q?Benn=C3=A9e?= In-reply-to: <1472935202-3342-7-git-send-email-rth@twiddle.net> Date: Mon, 12 Sep 2016 15:16:48 +0100 Message-ID: <87twdlw6cv.fsf@linaro.org> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Subject: Re: [Qemu-devel] [PATCH v3 06/34] tcg: Add EXCP_ATOMIC List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Henderson Cc: qemu-devel@nongnu.org Richard Henderson writes: > When we cannot emulate an atomic operation within a parallel > context, this exception allows us to stop the world and try > again in a serial context. > > Signed-off-by: Richard Henderson > --- > cpu-exec-common.c | 6 +++++ > cpu-exec.c | 23 +++++++++++++++++++ > cpus.c | 6 +++++ > include/exec/cpu-all.h | 1 + > include/exec/exec-all.h | 1 + > include/qemu-common.h | 1 + > linux-user/main.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++- > tcg/tcg.h | 1 + > translate-all.c | 1 + > 9 files changed, 98 insertions(+), 1 deletion(-) > > diff --git a/cpu-exec-common.c b/cpu-exec-common.c > index 0cb4ae6..767d9c6 100644 > --- a/cpu-exec-common.c > +++ b/cpu-exec-common.c > @@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) > } > siglongjmp(cpu->jmp_env, 1); > } > + > +void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc) > +{ > + cpu->exception_index = EXCP_ATOMIC; > + cpu_loop_exit_restore(cpu, pc); > +} > diff --git a/cpu-exec.c b/cpu-exec.c > index 5d9710a..8d77516 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -225,6 +225,29 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, > } > #endif > > +void cpu_exec_step(CPUState *cpu) > +{ > + CPUArchState *env = (CPUArchState *)cpu->env_ptr; > + TranslationBlock *tb; > + target_ulong cs_base, pc; > + uint32_t flags; > + bool old_tb_flushed; > + > + old_tb_flushed = cpu->tb_flushed; > + cpu->tb_flushed = false; Advanced warning, these disappear soon when the async safe work (plus safe tb flush) patches get merged. > + > + cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags); > + tb = tb_gen_code(cpu, pc, cs_base, flags, > + 1 | CF_NOCACHE | CF_IGNORE_ICOUNT); > + tb->orig_tb = NULL; > + cpu->tb_flushed |= old_tb_flushed; > + /* execute the generated code */ > + trace_exec_tb_nocache(tb, pc); > + cpu_tb_exec(cpu, tb); > + tb_phys_invalidate(tb, -1); > + tb_free(tb); > +} > + > struct tb_desc { > target_ulong pc; > target_ulong cs_base; > diff --git a/cpus.c b/cpus.c > index 84c3520..a01bbbd 100644 > --- a/cpus.c > +++ b/cpus.c > @@ -1575,6 +1575,12 @@ static void tcg_exec_all(void) > if (r == EXCP_DEBUG) { > cpu_handle_guest_debug(cpu); > break; > + } else if (r == EXCP_ATOMIC) { > + /* ??? When we begin running cpus in parallel, we should > + stop all cpus, clear parallel_cpus, and interpret a > + single insn with cpu_exec_step. In the meantime, > + we should never get here. */ > + abort(); Possibly more correct would be: g_assert(parallel_cpus == false); abort(); As mentioned in other patches I was seeing this triggered by TLB_NOTDIRTY writes. However I think the fix is fairly simple, see bellow: > } > } else if (cpu->stop || cpu->stopped) { > if (cpu->unplug) { > diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h > index b6a7059..1f7e9d8 100644 > --- a/include/exec/cpu-all.h > +++ b/include/exec/cpu-all.h > @@ -31,6 +31,7 @@ > #define EXCP_DEBUG 0x10002 /* cpu stopped after a breakpoint or singlestep */ > #define EXCP_HALTED 0x10003 /* cpu is halted (waiting for external event) */ > #define EXCP_YIELD 0x10004 /* cpu wants to yield timeslice to another */ > +#define EXCP_ATOMIC 0x10005 /* stop-the-world and emulate atomic */ > > /* some important defines: > * > diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h > index d008296..08424ae 100644 > --- a/include/exec/exec-all.h > +++ b/include/exec/exec-all.h > @@ -71,6 +71,7 @@ static inline void cpu_list_lock(void) > void cpu_exec_init(CPUState *cpu, Error **errp); > void QEMU_NORETURN cpu_loop_exit(CPUState *cpu); > void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc); > +void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc); > > #if !defined(CONFIG_USER_ONLY) > void cpu_reloading_memory_map(void); > diff --git a/include/qemu-common.h b/include/qemu-common.h > index 9e8b0bd..f814317 100644 > --- a/include/qemu-common.h > +++ b/include/qemu-common.h > @@ -80,6 +80,7 @@ void tcg_exec_init(unsigned long tb_size); > bool tcg_enabled(void); > > void cpu_exec_init_all(void); > +void cpu_exec_step(CPUState *cpu); > > /** > * Sends a (part of) iovec down a socket, yielding when the socket is full, or > diff --git a/linux-user/main.c b/linux-user/main.c > index f2f4d2f..8d5c948 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -182,13 +182,25 @@ static inline void start_exclusive(void) > } > > /* Finish an exclusive operation. */ > -static inline void __attribute__((unused)) end_exclusive(void) > +static inline void end_exclusive(void) > { > pending_cpus = 0; > pthread_cond_broadcast(&exclusive_resume); > pthread_mutex_unlock(&exclusive_lock); > } > > +static void step_atomic(CPUState *cpu) > +{ > + start_exclusive(); > + > + /* Since we got here, we know that parallel_cpus must be true. */ > + parallel_cpus = false; > + cpu_exec_step(cpu); > + parallel_cpus = true; > + > + end_exclusive(); > +} > + Paolo's safe work patches bring the start/end_exclusive functions into cpu-exec-common so I think after that has been merged this function can also be moved and called directly from the MTTCG loop on an EXCP_ATOMIC exit. > /* Wait for exclusive ops to finish, and begin cpu execution. */ > static inline void cpu_exec_start(CPUState *cpu) > { > @@ -440,6 +452,9 @@ void cpu_loop(CPUX86State *env) > } > } > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > pc = env->segs[R_CS].base + env->eip; > EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n", > @@ -932,6 +947,9 @@ void cpu_loop(CPUARMState *env) > case EXCP_YIELD: > /* nothing to do here for user-mode, just resume guest code */ > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > error: > EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr); > @@ -1131,6 +1149,9 @@ void cpu_loop(CPUARMState *env) > case EXCP_YIELD: > /* nothing to do here for user-mode, just resume guest code */ > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr); > abort(); > @@ -1220,6 +1241,9 @@ void cpu_loop(CPUUniCore32State *env) > } > } > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > goto error; > } > @@ -1492,6 +1516,9 @@ void cpu_loop (CPUSPARCState *env) > } > } > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > printf ("Unhandled trap: 0x%x\n", trapnr); > cpu_dump_state(cs, stderr, fprintf, 0); > @@ -2040,6 +2067,9 @@ void cpu_loop(CPUPPCState *env) > case EXCP_INTERRUPT: > /* just indicate that signals should be handled asap */ > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > cpu_abort(cs, "Unknown exception 0x%d. Aborting\n", trapnr); > break; > @@ -2713,6 +2743,9 @@ done_syscall: > } > } > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > error: > EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr); > @@ -2799,6 +2832,9 @@ void cpu_loop(CPUOpenRISCState *env) > case EXCP_NR: > qemu_log_mask(CPU_LOG_INT, "\nNR\n"); > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n", > trapnr); > @@ -2874,6 +2910,9 @@ void cpu_loop(CPUSH4State *env) > queue_signal(env, info.si_signo, &info); > break; > > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > printf ("Unhandled trap: 0x%x\n", trapnr); > cpu_dump_state(cs, stderr, fprintf, 0); > @@ -2939,6 +2978,9 @@ void cpu_loop(CPUCRISState *env) > } > } > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > printf ("Unhandled trap: 0x%x\n", trapnr); > cpu_dump_state(cs, stderr, fprintf, 0); > @@ -3053,6 +3095,9 @@ void cpu_loop(CPUMBState *env) > } > } > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > printf ("Unhandled trap: 0x%x\n", trapnr); > cpu_dump_state(cs, stderr, fprintf, 0); > @@ -3154,6 +3199,9 @@ void cpu_loop(CPUM68KState *env) > } > } > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr); > abort(); > @@ -3389,6 +3437,9 @@ void cpu_loop(CPUAlphaState *env) > case EXCP_INTERRUPT: > /* Just indicate that signals should be handled asap. */ > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > printf ("Unhandled trap: 0x%x\n", trapnr); > cpu_dump_state(cs, stderr, fprintf, 0); > @@ -3516,6 +3567,9 @@ void cpu_loop(CPUS390XState *env) > queue_signal(env, info.si_signo, &info); > break; > > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr); > cpu_dump_state(cs, stderr, fprintf, 0); > @@ -3768,6 +3822,9 @@ void cpu_loop(CPUTLGState *env) > case TILEGX_EXCP_REG_UDN_ACCESS: > gen_sigill_reg(env); > break; > + case EXCP_ATOMIC: > + step_atomic(cs); > + break; > default: > fprintf(stderr, "trapnr is %d[0x%x].\n", trapnr, trapnr); > g_assert_not_reached(); > diff --git a/tcg/tcg.h b/tcg/tcg.h > index 1bcabca..00498fc 100644 > --- a/tcg/tcg.h > +++ b/tcg/tcg.h > @@ -700,6 +700,7 @@ struct TCGContext { > }; > > extern TCGContext tcg_ctx; > +extern bool parallel_cpus; > > static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v) > { > diff --git a/translate-all.c b/translate-all.c > index 0dd6466..f97fc1e 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -119,6 +119,7 @@ static void *l1_map[V_L1_SIZE]; > > /* code generation context */ > TCGContext tcg_ctx; > +bool parallel_cpus; So lets add some words to this to distinguish between this and the mttcg enabled flags and its relation to -smp. Something like: parallel_cpus indicates to the front-ends if code needs to be generated taking into account multiple threads of execution. It will be true for linux-user after the first thread clone and if system mode if MTTCG is enabled. On the transition from false->true any code generated while false needs to be invalidated. It may be temporally set to false when generating non-cached code in an exclusive context. > > /* translation block context */ > #ifdef CONFIG_USER_ONLY -- Alex Bennée