From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:55198)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <alex.bennee@linaro.org>) id 1bjS3Z-0000EI-H5
	for qemu-devel@nongnu.org; Mon, 12 Sep 2016 10:18:02 -0400
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <alex.bennee@linaro.org>) id 1bjS3T-0006sL-Ct
	for qemu-devel@nongnu.org; Mon, 12 Sep 2016 10:17:56 -0400
Received: from mail-wm0-f45.google.com ([74.125.82.45]:37747)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <alex.bennee@linaro.org>) id 1bjS3T-0006s6-1T
	for qemu-devel@nongnu.org; Mon, 12 Sep 2016 10:17:51 -0400
Received: by mail-wm0-f45.google.com with SMTP id c131so60369006wmh.0
	for <qemu-devel@nongnu.org>; Mon, 12 Sep 2016 07:17:50 -0700 (PDT)
References: <1472935202-3342-1-git-send-email-rth@twiddle.net>
	<1472935202-3342-7-git-send-email-rth@twiddle.net>
From: Alex =?utf-8?Q?Benn=C3=A9e?= <alex.bennee@linaro.org>
In-reply-to: <1472935202-3342-7-git-send-email-rth@twiddle.net>
Date: Mon, 12 Sep 2016 15:16:48 +0100
Message-ID: <87twdlw6cv.fsf@linaro.org>
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
Subject: Re: [Qemu-devel] [PATCH v3 06/34] tcg: Add EXCP_ATOMIC
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel/>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org


Richard Henderson <rth@twiddle.net> writes:

> When we cannot emulate an atomic operation within a parallel
> context, this exception allows us to stop the world and try
> again in a serial context.
>
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
>  cpu-exec-common.c       |  6 +++++
>  cpu-exec.c              | 23 +++++++++++++++++++
>  cpus.c                  |  6 +++++
>  include/exec/cpu-all.h  |  1 +
>  include/exec/exec-all.h |  1 +
>  include/qemu-common.h   |  1 +
>  linux-user/main.c       | 59 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  tcg/tcg.h               |  1 +
>  translate-all.c         |  1 +
>  9 files changed, 98 insertions(+), 1 deletion(-)
>
> diff --git a/cpu-exec-common.c b/cpu-exec-common.c
> index 0cb4ae6..767d9c6 100644
> --- a/cpu-exec-common.c
> +++ b/cpu-exec-common.c
> @@ -77,3 +77,9 @@ void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc)
>      }
>      siglongjmp(cpu->jmp_env, 1);
>  }
> +
> +void cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc)
> +{
> +    cpu->exception_index = EXCP_ATOMIC;
> +    cpu_loop_exit_restore(cpu, pc);
> +}
> diff --git a/cpu-exec.c b/cpu-exec.c
> index 5d9710a..8d77516 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -225,6 +225,29 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
>  }
>  #endif
>
> +void cpu_exec_step(CPUState *cpu)
> +{
> +    CPUArchState *env = (CPUArchState *)cpu->env_ptr;
> +    TranslationBlock *tb;
> +    target_ulong cs_base, pc;
> +    uint32_t flags;
> +    bool old_tb_flushed;
> +
> +    old_tb_flushed = cpu->tb_flushed;
> +    cpu->tb_flushed = false;

Advanced warning, these disappear soon when the async safe work (plus
safe tb flush) patches get merged.

> +
> +    cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> +    tb = tb_gen_code(cpu, pc, cs_base, flags,
> +                     1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
> +    tb->orig_tb = NULL;
> +    cpu->tb_flushed |= old_tb_flushed;
> +    /* execute the generated code */
> +    trace_exec_tb_nocache(tb, pc);
> +    cpu_tb_exec(cpu, tb);
> +    tb_phys_invalidate(tb, -1);
> +    tb_free(tb);
> +}
> +
>  struct tb_desc {
>      target_ulong pc;
>      target_ulong cs_base;
> diff --git a/cpus.c b/cpus.c
> index 84c3520..a01bbbd 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1575,6 +1575,12 @@ static void tcg_exec_all(void)
>              if (r == EXCP_DEBUG) {
>                  cpu_handle_guest_debug(cpu);
>                  break;
> +            } else if (r == EXCP_ATOMIC) {
> +                /* ??? When we begin running cpus in parallel, we should
> +                   stop all cpus, clear parallel_cpus, and interpret a
> +                   single insn with cpu_exec_step.  In the meantime,
> +                   we should never get here.  */
> +                abort();

Possibly more correct would be:

         g_assert(parallel_cpus == false);
         abort();

As mentioned in other patches I was seeing this triggered by
TLB_NOTDIRTY writes. However I think the fix is fairly simple, see
bellow:

>              }
>          } else if (cpu->stop || cpu->stopped) {
>              if (cpu->unplug) {
> diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
> index b6a7059..1f7e9d8 100644
> --- a/include/exec/cpu-all.h
> +++ b/include/exec/cpu-all.h
> @@ -31,6 +31,7 @@
>  #define EXCP_DEBUG      0x10002 /* cpu stopped after a breakpoint or singlestep */
>  #define EXCP_HALTED     0x10003 /* cpu is halted (waiting for external event) */
>  #define EXCP_YIELD      0x10004 /* cpu wants to yield timeslice to another */
> +#define EXCP_ATOMIC     0x10005 /* stop-the-world and emulate atomic */
>
>  /* some important defines:
>   *
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index d008296..08424ae 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -71,6 +71,7 @@ static inline void cpu_list_lock(void)
>  void cpu_exec_init(CPUState *cpu, Error **errp);
>  void QEMU_NORETURN cpu_loop_exit(CPUState *cpu);
>  void QEMU_NORETURN cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc);
> +void QEMU_NORETURN cpu_loop_exit_atomic(CPUState *cpu, uintptr_t pc);
>
>  #if !defined(CONFIG_USER_ONLY)
>  void cpu_reloading_memory_map(void);
> diff --git a/include/qemu-common.h b/include/qemu-common.h
> index 9e8b0bd..f814317 100644
> --- a/include/qemu-common.h
> +++ b/include/qemu-common.h
> @@ -80,6 +80,7 @@ void tcg_exec_init(unsigned long tb_size);
>  bool tcg_enabled(void);
>
>  void cpu_exec_init_all(void);
> +void cpu_exec_step(CPUState *cpu);
>
>  /**
>   * Sends a (part of) iovec down a socket, yielding when the socket is full, or
> diff --git a/linux-user/main.c b/linux-user/main.c
> index f2f4d2f..8d5c948 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -182,13 +182,25 @@ static inline void start_exclusive(void)
>  }
>
>  /* Finish an exclusive operation.  */
> -static inline void __attribute__((unused)) end_exclusive(void)
> +static inline void end_exclusive(void)
>  {
>      pending_cpus = 0;
>      pthread_cond_broadcast(&exclusive_resume);
>      pthread_mutex_unlock(&exclusive_lock);
>  }
>
> +static void step_atomic(CPUState *cpu)
> +{
> +    start_exclusive();
> +
> +    /* Since we got here, we know that parallel_cpus must be true.  */
> +    parallel_cpus = false;
> +    cpu_exec_step(cpu);
> +    parallel_cpus = true;
> +
> +    end_exclusive();
> +}
> +

Paolo's safe work patches bring the start/end_exclusive functions into
cpu-exec-common so I think after that has been merged this function
can also be moved and called directly from the MTTCG loop on an
EXCP_ATOMIC exit.

>  /* Wait for exclusive ops to finish, and begin cpu execution.  */
>  static inline void cpu_exec_start(CPUState *cpu)
>  {
> @@ -440,6 +452,9 @@ void cpu_loop(CPUX86State *env)
>                    }
>              }
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              pc = env->segs[R_CS].base + env->eip;
>              EXCP_DUMP(env, "qemu: 0x%08lx: unhandled CPU exception 0x%x - aborting\n",
> @@ -932,6 +947,9 @@ void cpu_loop(CPUARMState *env)
>          case EXCP_YIELD:
>              /* nothing to do here for user-mode, just resume guest code */
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>          error:
>              EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
> @@ -1131,6 +1149,9 @@ void cpu_loop(CPUARMState *env)
>          case EXCP_YIELD:
>              /* nothing to do here for user-mode, just resume guest code */
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
>              abort();
> @@ -1220,6 +1241,9 @@ void cpu_loop(CPUUniCore32State *env)
>                  }
>              }
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              goto error;
>          }
> @@ -1492,6 +1516,9 @@ void cpu_loop (CPUSPARCState *env)
>                    }
>              }
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              printf ("Unhandled trap: 0x%x\n", trapnr);
>              cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -2040,6 +2067,9 @@ void cpu_loop(CPUPPCState *env)
>          case EXCP_INTERRUPT:
>              /* just indicate that signals should be handled asap */
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              cpu_abort(cs, "Unknown exception 0x%d. Aborting\n", trapnr);
>              break;
> @@ -2713,6 +2743,9 @@ done_syscall:
>                  }
>              }
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>  error:
>              EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
> @@ -2799,6 +2832,9 @@ void cpu_loop(CPUOpenRISCState *env)
>          case EXCP_NR:
>              qemu_log_mask(CPU_LOG_INT, "\nNR\n");
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              EXCP_DUMP(env, "\nqemu: unhandled CPU exception %#x - aborting\n",
>                       trapnr);
> @@ -2874,6 +2910,9 @@ void cpu_loop(CPUSH4State *env)
>              queue_signal(env, info.si_signo, &info);
>  	    break;
>
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              printf ("Unhandled trap: 0x%x\n", trapnr);
>              cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -2939,6 +2978,9 @@ void cpu_loop(CPUCRISState *env)
>                    }
>              }
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              printf ("Unhandled trap: 0x%x\n", trapnr);
>              cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -3053,6 +3095,9 @@ void cpu_loop(CPUMBState *env)
>                    }
>              }
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              printf ("Unhandled trap: 0x%x\n", trapnr);
>              cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -3154,6 +3199,9 @@ void cpu_loop(CPUM68KState *env)
>                    }
>              }
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              EXCP_DUMP(env, "qemu: unhandled CPU exception 0x%x - aborting\n", trapnr);
>              abort();
> @@ -3389,6 +3437,9 @@ void cpu_loop(CPUAlphaState *env)
>          case EXCP_INTERRUPT:
>              /* Just indicate that signals should be handled asap.  */
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              printf ("Unhandled trap: 0x%x\n", trapnr);
>              cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -3516,6 +3567,9 @@ void cpu_loop(CPUS390XState *env)
>              queue_signal(env, info.si_signo, &info);
>              break;
>
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              fprintf(stderr, "Unhandled trap: 0x%x\n", trapnr);
>              cpu_dump_state(cs, stderr, fprintf, 0);
> @@ -3768,6 +3822,9 @@ void cpu_loop(CPUTLGState *env)
>          case TILEGX_EXCP_REG_UDN_ACCESS:
>              gen_sigill_reg(env);
>              break;
> +        case EXCP_ATOMIC:
> +            step_atomic(cs);
> +            break;
>          default:
>              fprintf(stderr, "trapnr is %d[0x%x].\n", trapnr, trapnr);
>              g_assert_not_reached();
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 1bcabca..00498fc 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -700,6 +700,7 @@ struct TCGContext {
>  };
>
>  extern TCGContext tcg_ctx;
> +extern bool parallel_cpus;
>
>  static inline void tcg_set_insn_param(int op_idx, int arg, TCGArg v)
>  {
> diff --git a/translate-all.c b/translate-all.c
> index 0dd6466..f97fc1e 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -119,6 +119,7 @@ static void *l1_map[V_L1_SIZE];
>
>  /* code generation context */
>  TCGContext tcg_ctx;
> +bool parallel_cpus;

So lets add some words to this to distinguish between this and the mttcg
enabled flags and its relation to -smp. Something like:

  parallel_cpus indicates to the front-ends if code needs to be
  generated taking into account multiple threads of execution. It will
  be true for linux-user after the first thread clone and if system mode
  if MTTCG is enabled. On the transition from false->true any code
  generated while false needs to be invalidated. It may be temporally
  set to false when generating non-cached code in an exclusive context.

>
>  /* translation block context */
>  #ifdef CONFIG_USER_ONLY


--
Alex Bennée